nir,ac/llvm,aco,radv,radeonsi: remove nir_export_vertex_amd
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20691>
This commit is contained in:
@@ -5297,8 +5297,6 @@ load_input_from_temps(isel_context* ctx, nir_intrinsic_instr* instr, Temp dst)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void export_vs_varying(isel_context* ctx, int slot, bool is_pos, int* next_pos);
|
|
||||||
|
|
||||||
void
|
void
|
||||||
visit_store_output(isel_context* ctx, nir_intrinsic_instr* instr)
|
visit_store_output(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||||
{
|
{
|
||||||
@@ -8178,7 +8176,6 @@ emit_interp_center(isel_context* ctx, Temp dst, Temp bary, Temp pos1, Temp pos2)
|
|||||||
Temp merged_wave_info_to_mask(isel_context* ctx, unsigned i);
|
Temp merged_wave_info_to_mask(isel_context* ctx, unsigned i);
|
||||||
Temp lanecount_to_mask(isel_context* ctx, Temp count);
|
Temp lanecount_to_mask(isel_context* ctx, Temp count);
|
||||||
void ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt);
|
void ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt);
|
||||||
static void create_vs_exports(isel_context* ctx);
|
|
||||||
|
|
||||||
Temp
|
Temp
|
||||||
get_interp_param(isel_context* ctx, nir_intrinsic_op intrin,
|
get_interp_param(isel_context* ctx, nir_intrinsic_op intrin,
|
||||||
@@ -9075,10 +9072,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||||||
bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), lanecount_to_mask(ctx, src));
|
bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), lanecount_to_mask(ctx, src));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_export_vertex_amd: {
|
|
||||||
create_vs_exports(ctx);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case nir_intrinsic_alloc_vertices_and_primitives_amd: {
|
case nir_intrinsic_alloc_vertices_and_primitives_amd: {
|
||||||
assert(ctx->stage.hw == HWStage::NGG);
|
assert(ctx->stage.hw == HWStage::NGG);
|
||||||
Temp num_vertices = get_ssa_temp(ctx, instr->src[0].ssa);
|
Temp num_vertices = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||||
@@ -10825,137 +10818,6 @@ visit_cf_list(isel_context* ctx, struct exec_list* list)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
export_vs_varying(isel_context* ctx, int slot, bool is_pos, int* next_pos)
|
|
||||||
{
|
|
||||||
assert(ctx->stage.hw == HWStage::VS || ctx->stage.hw == HWStage::NGG);
|
|
||||||
|
|
||||||
const uint8_t *vs_output_param_offset =
|
|
||||||
ctx->program->info.outinfo.vs_output_param_offset;
|
|
||||||
|
|
||||||
assert(vs_output_param_offset);
|
|
||||||
|
|
||||||
int offset = vs_output_param_offset[slot];
|
|
||||||
unsigned mask = ctx->outputs.mask[slot];
|
|
||||||
if (!is_pos && !mask)
|
|
||||||
return;
|
|
||||||
if (!is_pos && offset == AC_EXP_PARAM_UNDEFINED)
|
|
||||||
return;
|
|
||||||
aco_ptr<Export_instruction> exp{
|
|
||||||
create_instruction<Export_instruction>(aco_opcode::exp, Format::EXP, 4, 0)};
|
|
||||||
exp->enabled_mask = mask;
|
|
||||||
for (unsigned i = 0; i < 4; ++i) {
|
|
||||||
if (mask & (1 << i))
|
|
||||||
exp->operands[i] = Operand(ctx->outputs.temps[slot * 4u + i]);
|
|
||||||
else
|
|
||||||
exp->operands[i] = Operand(v1);
|
|
||||||
}
|
|
||||||
/* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
|
|
||||||
* Setting valid_mask=1 prevents it and has no other effect.
|
|
||||||
*/
|
|
||||||
exp->valid_mask = ctx->options->gfx_level == GFX10 && is_pos && *next_pos == 0;
|
|
||||||
exp->done = false;
|
|
||||||
exp->compressed = false;
|
|
||||||
if (is_pos)
|
|
||||||
exp->dest = V_008DFC_SQ_EXP_POS + (*next_pos)++;
|
|
||||||
else
|
|
||||||
exp->dest = V_008DFC_SQ_EXP_PARAM + offset;
|
|
||||||
ctx->block->instructions.emplace_back(std::move(exp));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
export_vs_psiz_layer_viewport_vrs(isel_context* ctx, int* next_pos,
|
|
||||||
const aco_vp_output_info* outinfo)
|
|
||||||
{
|
|
||||||
aco_ptr<Export_instruction> exp{
|
|
||||||
create_instruction<Export_instruction>(aco_opcode::exp, Format::EXP, 4, 0)};
|
|
||||||
exp->enabled_mask = 0;
|
|
||||||
for (unsigned i = 0; i < 4; ++i)
|
|
||||||
exp->operands[i] = Operand(v1);
|
|
||||||
if (ctx->outputs.mask[VARYING_SLOT_PSIZ]) {
|
|
||||||
exp->operands[0] = Operand(ctx->outputs.temps[VARYING_SLOT_PSIZ * 4u]);
|
|
||||||
exp->enabled_mask |= 0x1;
|
|
||||||
}
|
|
||||||
if (ctx->outputs.mask[VARYING_SLOT_LAYER] && !outinfo->writes_layer_per_primitive) {
|
|
||||||
exp->operands[2] = Operand(ctx->outputs.temps[VARYING_SLOT_LAYER * 4u]);
|
|
||||||
exp->enabled_mask |= 0x4;
|
|
||||||
}
|
|
||||||
if (ctx->outputs.mask[VARYING_SLOT_VIEWPORT] && !outinfo->writes_viewport_index_per_primitive) {
|
|
||||||
if (ctx->options->gfx_level < GFX9) {
|
|
||||||
exp->operands[3] = Operand(ctx->outputs.temps[VARYING_SLOT_VIEWPORT * 4u]);
|
|
||||||
exp->enabled_mask |= 0x8;
|
|
||||||
} else {
|
|
||||||
Builder bld(ctx->program, ctx->block);
|
|
||||||
|
|
||||||
Temp out = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u),
|
|
||||||
Operand(ctx->outputs.temps[VARYING_SLOT_VIEWPORT * 4u]));
|
|
||||||
if (exp->operands[2].isTemp())
|
|
||||||
out = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand(out), exp->operands[2]);
|
|
||||||
|
|
||||||
exp->operands[2] = Operand(out);
|
|
||||||
exp->enabled_mask |= 0x4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (ctx->outputs.mask[VARYING_SLOT_PRIMITIVE_SHADING_RATE]) {
|
|
||||||
exp->operands[1] = Operand(ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_SHADING_RATE * 4u]);
|
|
||||||
exp->enabled_mask |= 0x2;
|
|
||||||
}
|
|
||||||
|
|
||||||
exp->valid_mask = ctx->options->gfx_level == GFX10 && *next_pos == 0;
|
|
||||||
exp->done = false;
|
|
||||||
exp->compressed = false;
|
|
||||||
exp->dest = V_008DFC_SQ_EXP_POS + (*next_pos)++;
|
|
||||||
ctx->block->instructions.emplace_back(std::move(exp));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
create_vs_exports(isel_context* ctx)
|
|
||||||
{
|
|
||||||
assert(ctx->stage.hw == HWStage::VS || ctx->stage.hw == HWStage::NGG);
|
|
||||||
const aco_vp_output_info* outinfo = &ctx->program->info.outinfo;
|
|
||||||
|
|
||||||
assert(outinfo);
|
|
||||||
ctx->block->kind |= block_kind_export_end;
|
|
||||||
|
|
||||||
/* Hardware requires position data to always be exported, even if the
|
|
||||||
* application did not write gl_Position.
|
|
||||||
*/
|
|
||||||
ctx->outputs.mask[VARYING_SLOT_POS] = 0xf;
|
|
||||||
|
|
||||||
/* the order these position exports are created is important */
|
|
||||||
int next_pos = 0;
|
|
||||||
export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos);
|
|
||||||
|
|
||||||
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index ||
|
|
||||||
outinfo->writes_primitive_shading_rate) {
|
|
||||||
export_vs_psiz_layer_viewport_vrs(ctx, &next_pos, outinfo);
|
|
||||||
}
|
|
||||||
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
|
|
||||||
export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, true, &next_pos);
|
|
||||||
if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
|
|
||||||
export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, true, &next_pos);
|
|
||||||
|
|
||||||
if (ctx->program->gfx_level >= GFX11)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (ctx->export_clip_dists) {
|
|
||||||
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
|
|
||||||
export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, false, &next_pos);
|
|
||||||
if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
|
|
||||||
export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, false, &next_pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned i = 0; i <= VARYING_SLOT_VAR31; ++i) {
|
|
||||||
if (i < VARYING_SLOT_VAR0 && i != VARYING_SLOT_LAYER && i != VARYING_SLOT_PRIMITIVE_ID &&
|
|
||||||
i != VARYING_SLOT_VIEWPORT)
|
|
||||||
continue;
|
|
||||||
if (ctx->shader && ctx->shader->info.per_primitive_outputs & BITFIELD64_BIT(i))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
export_vs_varying(ctx, i, false, NULL);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
export_fs_mrt_z(isel_context* ctx)
|
export_fs_mrt_z(isel_context* ctx)
|
||||||
{
|
{
|
||||||
|
@@ -4166,9 +4166,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||||||
result = ac_build_intrinsic(&ctx->ac, name, return_type, args, 5, 0);
|
result = ac_build_intrinsic(&ctx->ac, name, return_type, args, 5, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case nir_intrinsic_export_vertex_amd:
|
|
||||||
ctx->abi->export_vertex(ctx->abi);
|
|
||||||
break;
|
|
||||||
case nir_intrinsic_elect:
|
case nir_intrinsic_elect:
|
||||||
result = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, visit_first_invocation(ctx),
|
result = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, visit_first_invocation(ctx),
|
||||||
ac_get_thread_id(&ctx->ac), "");
|
ac_get_thread_id(&ctx->ac), "");
|
||||||
|
@@ -60,8 +60,6 @@ struct ac_shader_abi {
|
|||||||
/* Varying -> attribute number mapping. Also NIR-only */
|
/* Varying -> attribute number mapping. Also NIR-only */
|
||||||
unsigned fs_input_attr_indices[MAX_VARYING];
|
unsigned fs_input_attr_indices[MAX_VARYING];
|
||||||
|
|
||||||
void (*export_vertex)(struct ac_shader_abi *abi);
|
|
||||||
|
|
||||||
void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream);
|
void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream);
|
||||||
|
|
||||||
void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
|
void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
|
||||||
|
@@ -71,13 +71,6 @@ struct radv_shader_context {
|
|||||||
uint64_t output_mask;
|
uint64_t output_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct radv_shader_output_values {
|
|
||||||
LLVMValueRef values[4];
|
|
||||||
unsigned slot_name;
|
|
||||||
unsigned slot_index;
|
|
||||||
unsigned usage_mask;
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline struct radv_shader_context *
|
static inline struct radv_shader_context *
|
||||||
radv_shader_context_from_abi(struct ac_shader_abi *abi)
|
radv_shader_context_from_abi(struct ac_shader_abi *abi)
|
||||||
{
|
{
|
||||||
@@ -667,16 +660,6 @@ si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values,
|
|||||||
args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
|
args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
radv_export_param(struct radv_shader_context *ctx, unsigned index, LLVMValueRef *values,
|
|
||||||
unsigned enabled_channels)
|
|
||||||
{
|
|
||||||
struct ac_export_args args;
|
|
||||||
|
|
||||||
si_llvm_init_export_args(ctx, values, enabled_channels, V_008DFC_SQ_EXP_PARAM + index, 0, &args);
|
|
||||||
ac_build_export(&ctx->ac, &args);
|
|
||||||
}
|
|
||||||
|
|
||||||
static LLVMValueRef
|
static LLVMValueRef
|
||||||
radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
|
radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
|
||||||
{
|
{
|
||||||
@@ -686,211 +669,6 @@ radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
|
|||||||
return LLVMBuildLoad2(ctx->ac.builder, type, output, "");
|
return LLVMBuildLoad2(ctx->ac.builder, type, output, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
radv_build_param_exports(struct radv_shader_context *ctx, struct radv_shader_output_values *outputs,
|
|
||||||
unsigned noutput, const struct radv_vs_output_info *outinfo,
|
|
||||||
bool export_clip_dists)
|
|
||||||
{
|
|
||||||
for (unsigned i = 0; i < noutput; i++) {
|
|
||||||
unsigned slot_name = outputs[i].slot_name;
|
|
||||||
unsigned usage_mask = outputs[i].usage_mask;
|
|
||||||
|
|
||||||
if (slot_name != VARYING_SLOT_LAYER && slot_name != VARYING_SLOT_PRIMITIVE_ID &&
|
|
||||||
slot_name != VARYING_SLOT_VIEWPORT && slot_name != VARYING_SLOT_CLIP_DIST0 &&
|
|
||||||
slot_name != VARYING_SLOT_CLIP_DIST1 && slot_name < VARYING_SLOT_VAR0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if ((slot_name == VARYING_SLOT_CLIP_DIST0 || slot_name == VARYING_SLOT_CLIP_DIST1) &&
|
|
||||||
!export_clip_dists)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
radv_export_param(ctx, outinfo->vs_output_param_offset[slot_name], outputs[i].values,
|
|
||||||
usage_mask);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Generate export instructions for hardware VS shader stage or NGG GS stage
|
|
||||||
* (position and parameter data only).
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_values *outputs,
|
|
||||||
unsigned noutput, const struct radv_vs_output_info *outinfo,
|
|
||||||
bool export_clip_dists)
|
|
||||||
{
|
|
||||||
LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_value = NULL;
|
|
||||||
LLVMValueRef primitive_shading_rate = NULL;
|
|
||||||
struct ac_export_args pos_args[4] = {0};
|
|
||||||
unsigned pos_idx, index;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* Build position exports */
|
|
||||||
for (i = 0; i < noutput; i++) {
|
|
||||||
switch (outputs[i].slot_name) {
|
|
||||||
case VARYING_SLOT_POS:
|
|
||||||
si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS, 0, &pos_args[0]);
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_PSIZ:
|
|
||||||
psize_value = outputs[i].values[0];
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_LAYER:
|
|
||||||
layer_value = outputs[i].values[0];
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_VIEWPORT:
|
|
||||||
viewport_value = outputs[i].values[0];
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_PRIMITIVE_SHADING_RATE:
|
|
||||||
primitive_shading_rate = outputs[i].values[0];
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_CLIP_DIST0:
|
|
||||||
case VARYING_SLOT_CLIP_DIST1:
|
|
||||||
index = 2 + outputs[i].slot_index;
|
|
||||||
si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS + index, 0,
|
|
||||||
&pos_args[index]);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We need to add the position output manually if it's missing. */
|
|
||||||
if (!pos_args[0].out[0]) {
|
|
||||||
pos_args[0].enabled_channels = 0xf; /* writemask */
|
|
||||||
pos_args[0].valid_mask = 0; /* EXEC mask */
|
|
||||||
pos_args[0].done = 0; /* last export? */
|
|
||||||
pos_args[0].target = V_008DFC_SQ_EXP_POS;
|
|
||||||
pos_args[0].compr = 0; /* COMPR flag */
|
|
||||||
pos_args[0].out[0] = ctx->ac.f32_0; /* X */
|
|
||||||
pos_args[0].out[1] = ctx->ac.f32_0; /* Y */
|
|
||||||
pos_args[0].out[2] = ctx->ac.f32_0; /* Z */
|
|
||||||
pos_args[0].out[3] = ctx->ac.f32_1; /* W */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add clip distance outputs manually if they're missing. */
|
|
||||||
uint8_t clip_cull_mask = outinfo->clip_dist_mask | outinfo->cull_dist_mask;
|
|
||||||
for (i = 2; i < 4; i++) {
|
|
||||||
uint8_t mask = 0xf << (i * 4 - 8);
|
|
||||||
if ((clip_cull_mask & mask) && !pos_args[i].out[0]) {
|
|
||||||
pos_args[i].enabled_channels = 0x0;
|
|
||||||
pos_args[i].valid_mask = 0;
|
|
||||||
pos_args[i].done = 0;
|
|
||||||
pos_args[i].target = V_008DFC_SQ_EXP_POS + i;
|
|
||||||
pos_args[i].compr = 0;
|
|
||||||
pos_args[i].out[0] = ctx->ac.f32_0;
|
|
||||||
pos_args[i].out[1] = ctx->ac.f32_0;
|
|
||||||
pos_args[i].out[2] = ctx->ac.f32_0;
|
|
||||||
pos_args[i].out[3] = ctx->ac.f32_0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_layer ||
|
|
||||||
outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate) {
|
|
||||||
pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
|
|
||||||
(outinfo->writes_primitive_shading_rate == true ? 2 : 0) |
|
|
||||||
(outinfo->writes_layer == true ? 4 : 0));
|
|
||||||
pos_args[1].valid_mask = 0;
|
|
||||||
pos_args[1].done = 0;
|
|
||||||
pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
|
|
||||||
pos_args[1].compr = 0;
|
|
||||||
pos_args[1].out[0] = ctx->ac.f32_0; /* X */
|
|
||||||
pos_args[1].out[1] = ctx->ac.f32_0; /* Y */
|
|
||||||
pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
|
|
||||||
pos_args[1].out[3] = ctx->ac.f32_0; /* W */
|
|
||||||
|
|
||||||
if (outinfo->writes_pointsize == true)
|
|
||||||
pos_args[1].out[0] = psize_value;
|
|
||||||
if (outinfo->writes_layer == true)
|
|
||||||
pos_args[1].out[2] = layer_value;
|
|
||||||
if (outinfo->writes_viewport_index == true) {
|
|
||||||
if (ctx->options->gfx_level >= GFX9) {
|
|
||||||
/* GFX9 has the layer in out.z[10:0] and the viewport
|
|
||||||
* index in out.z[19:16].
|
|
||||||
*/
|
|
||||||
LLVMValueRef v = viewport_value;
|
|
||||||
v = ac_to_integer(&ctx->ac, v);
|
|
||||||
v = LLVMBuildShl(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 16, false), "");
|
|
||||||
v = LLVMBuildOr(ctx->ac.builder, v, ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
|
|
||||||
|
|
||||||
pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
|
|
||||||
pos_args[1].enabled_channels |= 1 << 2;
|
|
||||||
} else {
|
|
||||||
pos_args[1].out[3] = viewport_value;
|
|
||||||
pos_args[1].enabled_channels |= 1 << 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (outinfo->writes_primitive_shading_rate) {
|
|
||||||
pos_args[1].out[1] = primitive_shading_rate;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* GFX10 skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
|
|
||||||
* Setting valid_mask=1 prevents it and has no other effect.
|
|
||||||
*/
|
|
||||||
if (ctx->ac.gfx_level == GFX10)
|
|
||||||
pos_args[0].valid_mask = 1;
|
|
||||||
|
|
||||||
pos_idx = 0;
|
|
||||||
for (i = 0; i < 4; i++) {
|
|
||||||
if (!pos_args[i].out[0])
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* Specify the target we are exporting */
|
|
||||||
pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
|
|
||||||
|
|
||||||
if (pos_idx == outinfo->pos_exports)
|
|
||||||
/* Specify that this is the last export */
|
|
||||||
pos_args[i].done = 1;
|
|
||||||
|
|
||||||
ac_build_export(&ctx->ac, &pos_args[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ctx->options->gfx_level >= GFX11)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* Build parameter exports */
|
|
||||||
radv_build_param_exports(ctx, outputs, noutput, outinfo, export_clip_dists);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
radv_llvm_visit_export_vertex(struct ac_shader_abi *abi)
|
|
||||||
{
|
|
||||||
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
|
|
||||||
const struct radv_vs_output_info *outinfo = &ctx->shader_info->outinfo;
|
|
||||||
const bool export_clip_dists = outinfo->export_clip_dists;
|
|
||||||
struct radv_shader_output_values *outputs;
|
|
||||||
unsigned noutput = 0;
|
|
||||||
|
|
||||||
/* Allocate a temporary array for the output values. */
|
|
||||||
unsigned num_outputs = util_bitcount64(ctx->output_mask);
|
|
||||||
outputs = malloc(num_outputs * sizeof(outputs[0]));
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
|
|
||||||
if (!(ctx->output_mask & (1ull << i)))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
outputs[noutput].slot_name = i;
|
|
||||||
outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
|
|
||||||
|
|
||||||
if (ctx->stage == MESA_SHADER_VERTEX) {
|
|
||||||
outputs[noutput].usage_mask = ctx->shader_info->vs.output_usage_mask[i];
|
|
||||||
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
|
|
||||||
outputs[noutput].usage_mask = ctx->shader_info->tes.output_usage_mask[i];
|
|
||||||
} else if (ctx->stage == MESA_SHADER_GEOMETRY) {
|
|
||||||
outputs[noutput].usage_mask = ctx->shader_info->gs.output_usage_mask[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned j = 0; j < 4; j++) {
|
|
||||||
outputs[noutput].values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
|
|
||||||
}
|
|
||||||
|
|
||||||
noutput++;
|
|
||||||
}
|
|
||||||
|
|
||||||
radv_llvm_export_vs(ctx, outputs, noutput, outinfo, export_clip_dists);
|
|
||||||
|
|
||||||
free(outputs);
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
si_export_mrt_color(struct radv_shader_context *ctx, LLVMValueRef *color, unsigned target,
|
si_export_mrt_color(struct radv_shader_context *ctx, LLVMValueRef *color, unsigned target,
|
||||||
unsigned index, struct ac_export_args *args)
|
unsigned index, struct ac_export_args *args)
|
||||||
@@ -1245,8 +1023,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
|||||||
ac_nir_fixup_ls_hs_input_vgprs(&ctx);
|
ac_nir_fixup_ls_hs_input_vgprs(&ctx);
|
||||||
|
|
||||||
if (is_ngg) {
|
if (is_ngg) {
|
||||||
ctx.abi.export_vertex = radv_llvm_visit_export_vertex;
|
|
||||||
|
|
||||||
if (!info->is_ngg_passthrough)
|
if (!info->is_ngg_passthrough)
|
||||||
declare_esgs_ring(&ctx);
|
declare_esgs_ring(&ctx);
|
||||||
|
|
||||||
@@ -1280,10 +1056,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
|||||||
if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && !ctx.shader_info->is_ngg) {
|
if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && !ctx.shader_info->is_ngg) {
|
||||||
ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
|
ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
|
||||||
ctx.abi.emit_primitive = visit_end_primitive;
|
ctx.abi.emit_primitive = visit_end_primitive;
|
||||||
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_TESS_EVAL) {
|
|
||||||
ctx.abi.export_vertex = radv_llvm_visit_export_vertex;
|
|
||||||
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX) {
|
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX) {
|
||||||
ctx.abi.export_vertex = radv_llvm_visit_export_vertex;
|
|
||||||
ctx.abi.load_inputs = radv_load_vs_inputs;
|
ctx.abi.load_inputs = radv_load_vs_inputs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1423,8 +1423,6 @@ intrinsic("load_cull_any_enabled_amd", dest_comp=1, bit_sizes=[1], flags=[CAN_EL
|
|||||||
intrinsic("load_cull_small_prim_precision_amd", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER])
|
intrinsic("load_cull_small_prim_precision_amd", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||||
# Initial edge flags in a Vertex Shader, packed into the format the HW needs for primitive export.
|
# Initial edge flags in a Vertex Shader, packed into the format the HW needs for primitive export.
|
||||||
intrinsic("load_initial_edgeflags_amd", src_comp=[], dest_comp=1, bit_sizes=[32], indices=[])
|
intrinsic("load_initial_edgeflags_amd", src_comp=[], dest_comp=1, bit_sizes=[32], indices=[])
|
||||||
# Exports the current invocation's vertex. This is a placeholder where all vertex attribute export instructions should be emitted.
|
|
||||||
intrinsic("export_vertex_amd", src_comp=[], indices=[])
|
|
||||||
# Allocates export space for vertices and primitives. src[] = {num_vertices, num_primitives}.
|
# Allocates export space for vertices and primitives. src[] = {num_vertices, num_primitives}.
|
||||||
intrinsic("alloc_vertices_and_primitives_amd", src_comp=[1, 1], indices=[])
|
intrinsic("alloc_vertices_and_primitives_amd", src_comp=[1, 1], indices=[])
|
||||||
# Overwrites VS input registers, for use with vertex compaction after culling. src = {vertex_id, instance_id}.
|
# Overwrites VS input registers, for use with vertex compaction after culling. src = {vertex_id, instance_id}.
|
||||||
|
@@ -30,12 +30,6 @@
|
|||||||
|
|
||||||
struct util_debug_callback;
|
struct util_debug_callback;
|
||||||
|
|
||||||
struct si_shader_output_values {
|
|
||||||
LLVMValueRef values[4];
|
|
||||||
ubyte vertex_streams;
|
|
||||||
ubyte semantic;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct si_shader_args {
|
struct si_shader_args {
|
||||||
struct ac_shader_args ac;
|
struct ac_shader_args ac;
|
||||||
|
|
||||||
@@ -257,10 +251,6 @@ void si_llvm_ps_build_end(struct si_shader_context *ctx);
|
|||||||
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx);
|
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx);
|
||||||
|
|
||||||
/* si_shader_llvm_vs.c */
|
/* si_shader_llvm_vs.c */
|
||||||
void si_llvm_clipvertex_to_clipdist(struct si_shader_context *ctx,
|
|
||||||
struct ac_export_args clipdist[2], LLVMValueRef clipvertex[4]);
|
|
||||||
void si_llvm_build_vs_exports(struct si_shader_context *ctx,
|
|
||||||
struct si_shader_output_values *outputs, unsigned noutput);
|
|
||||||
void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key);
|
void si_llvm_build_vs_prolog(struct si_shader_context *ctx, union si_shader_part_key *key);
|
||||||
void si_llvm_init_vs_callbacks(struct si_shader_context *ctx);
|
void si_llvm_init_vs_callbacks(struct si_shader_context *ctx);
|
||||||
|
|
||||||
|
@@ -774,35 +774,6 @@ static LLVMValueRef si_llvm_load_sampler_desc(struct ac_shader_abi *abi, LLVMVal
|
|||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void si_llvm_export_vertex(struct ac_shader_abi *abi)
|
|
||||||
{
|
|
||||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
|
||||||
struct si_shader_info *info = &ctx->shader->selector->info;
|
|
||||||
struct si_shader_output_values outputs[PIPE_MAX_SHADER_OUTPUTS];
|
|
||||||
LLVMValueRef *addrs = ctx->abi.outputs;
|
|
||||||
|
|
||||||
unsigned num_outputs = info->num_outputs;
|
|
||||||
/* if needed, nir lower will append primitive id export at last */
|
|
||||||
if (ctx->shader->key.ge.mono.u.vs_export_prim_id)
|
|
||||||
num_outputs++;
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < num_outputs; i++) {
|
|
||||||
if (i < info->num_outputs) {
|
|
||||||
outputs[i].semantic = info->output_semantic[i];
|
|
||||||
outputs[i].vertex_streams = info->output_streams[i];
|
|
||||||
} else {
|
|
||||||
outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
|
|
||||||
outputs[i].vertex_streams = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (unsigned j = 0; j < 4; j++)
|
|
||||||
outputs[i].values[j] =
|
|
||||||
LLVMBuildLoad2(ctx->ac.builder, ctx->ac.f32, addrs[4 * i + j], "");
|
|
||||||
}
|
|
||||||
|
|
||||||
si_llvm_build_vs_exports(ctx, outputs, num_outputs);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader,
|
bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader,
|
||||||
struct nir_shader *nir, bool free_nir)
|
struct nir_shader *nir, bool free_nir)
|
||||||
{
|
{
|
||||||
@@ -819,7 +790,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||||||
ctx->num_images = info->base.num_images;
|
ctx->num_images = info->base.num_images;
|
||||||
|
|
||||||
ctx->abi.intrinsic_load = si_llvm_load_intrinsic;
|
ctx->abi.intrinsic_load = si_llvm_load_intrinsic;
|
||||||
ctx->abi.export_vertex = si_llvm_export_vertex;
|
|
||||||
ctx->abi.load_sampler_desc = si_llvm_load_sampler_desc;
|
ctx->abi.load_sampler_desc = si_llvm_load_sampler_desc;
|
||||||
|
|
||||||
si_llvm_create_main_func(ctx);
|
si_llvm_create_main_func(ctx);
|
||||||
|
@@ -330,282 +330,6 @@ static LLVMValueRef si_load_vs_input(struct ac_shader_abi *abi, unsigned driver_
|
|||||||
return ac_build_varying_gather_values(&ctx->ac, values, num_components, component);
|
return ac_build_varying_gather_values(&ctx->ac, values, num_components, component);
|
||||||
}
|
}
|
||||||
|
|
||||||
void si_llvm_clipvertex_to_clipdist(struct si_shader_context *ctx,
|
|
||||||
struct ac_export_args clipdist[2], LLVMValueRef clipvertex[4])
|
|
||||||
{
|
|
||||||
unsigned reg_index;
|
|
||||||
unsigned chan;
|
|
||||||
unsigned const_chan;
|
|
||||||
LLVMValueRef base_elt;
|
|
||||||
LLVMValueRef constbuf_index = LLVMConstInt(ctx->ac.i32, SI_VS_CONST_CLIP_PLANES, 0);
|
|
||||||
LLVMValueRef const_resource = ac_build_load_to_sgpr(
|
|
||||||
&ctx->ac, ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->internal_bindings), constbuf_index);
|
|
||||||
unsigned clipdist_mask = ctx->shader->selector->info.clipdist_mask &
|
|
||||||
~ctx->shader->key.ge.opt.kill_clip_distances;
|
|
||||||
|
|
||||||
for (reg_index = 0; reg_index < 2; reg_index++) {
|
|
||||||
struct ac_export_args *args = &clipdist[reg_index];
|
|
||||||
|
|
||||||
if (!(clipdist_mask & BITFIELD_RANGE(reg_index * 4, 4)))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
args->out[0] = args->out[1] = args->out[2] = args->out[3] = LLVMGetUndef(ctx->ac.f32);
|
|
||||||
|
|
||||||
/* Compute dot products of position and user clip plane vectors */
|
|
||||||
for (chan = 0; chan < 4; chan++) {
|
|
||||||
if (!(clipdist_mask & BITFIELD_BIT(reg_index * 4 + chan)))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
for (const_chan = 0; const_chan < 4; const_chan++) {
|
|
||||||
LLVMValueRef addr =
|
|
||||||
LLVMConstInt(ctx->ac.i32, ((reg_index * 4 + chan) * 4 + const_chan) * 4, 0);
|
|
||||||
base_elt = si_buffer_load_const(ctx, const_resource, addr);
|
|
||||||
args->out[chan] =
|
|
||||||
ac_build_fmad(&ctx->ac, base_elt, clipvertex[const_chan],
|
|
||||||
const_chan == 0 ? ctx->ac.f32_0 : args->out[chan]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
args->enabled_channels = 0xf;
|
|
||||||
args->valid_mask = 0;
|
|
||||||
args->done = 0;
|
|
||||||
args->target = V_008DFC_SQ_EXP_POS + 2 + reg_index;
|
|
||||||
args->compr = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Initialize arguments for the shader export intrinsic */
|
|
||||||
static void si_llvm_init_vs_export_args(struct si_shader_context *ctx, const LLVMValueRef *values,
|
|
||||||
unsigned target, struct ac_export_args *args)
|
|
||||||
{
|
|
||||||
args->enabled_channels = 0xf; /* writemask - default is 0xf */
|
|
||||||
args->valid_mask = 0; /* Specify whether the EXEC mask represents the valid mask */
|
|
||||||
args->done = 0; /* Specify whether this is the last export */
|
|
||||||
args->target = target; /* Specify the target we are exporting */
|
|
||||||
args->compr = false;
|
|
||||||
|
|
||||||
memcpy(&args->out[0], values, sizeof(values[0]) * 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Generate export instructions for hardware VS shader stage or NGG GS stage
|
|
||||||
* (position and parameter data only).
|
|
||||||
*/
|
|
||||||
void si_llvm_build_vs_exports(struct si_shader_context *ctx,
|
|
||||||
struct si_shader_output_values *outputs, unsigned noutput)
|
|
||||||
{
|
|
||||||
struct si_shader *shader = ctx->shader;
|
|
||||||
struct ac_export_args pos_args[4] = {};
|
|
||||||
LLVMValueRef psize_value = NULL, edgeflag_value = NULL, layer_value = NULL,
|
|
||||||
viewport_index_value = NULL;
|
|
||||||
unsigned pos_idx, index;
|
|
||||||
unsigned clipdist_mask = (shader->selector->info.clipdist_mask &
|
|
||||||
~shader->key.ge.opt.kill_clip_distances) |
|
|
||||||
shader->selector->info.culldist_mask;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* Build position exports. */
|
|
||||||
for (i = 0; i < noutput; i++) {
|
|
||||||
switch (outputs[i].semantic) {
|
|
||||||
case VARYING_SLOT_POS:
|
|
||||||
si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS, &pos_args[0]);
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_PSIZ:
|
|
||||||
psize_value = outputs[i].values[0];
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_LAYER:
|
|
||||||
layer_value = outputs[i].values[0];
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_VIEWPORT:
|
|
||||||
viewport_index_value = outputs[i].values[0];
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_EDGE:
|
|
||||||
edgeflag_value = outputs[i].values[0];
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_CLIP_DIST0:
|
|
||||||
case VARYING_SLOT_CLIP_DIST1:
|
|
||||||
index = outputs[i].semantic - VARYING_SLOT_CLIP_DIST0;
|
|
||||||
if (clipdist_mask & BITFIELD_RANGE(index * 4, 4)) {
|
|
||||||
si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_POS + 2 + index,
|
|
||||||
&pos_args[2 + index]);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case VARYING_SLOT_CLIP_VERTEX:
|
|
||||||
si_llvm_clipvertex_to_clipdist(ctx, pos_args + 2, outputs[i].values);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We need to add the position output manually if it's missing. */
|
|
||||||
if (!pos_args[0].out[0]) {
|
|
||||||
pos_args[0].enabled_channels = 0xf; /* writemask */
|
|
||||||
pos_args[0].valid_mask = 0; /* EXEC mask */
|
|
||||||
pos_args[0].done = 0; /* last export? */
|
|
||||||
pos_args[0].target = V_008DFC_SQ_EXP_POS;
|
|
||||||
pos_args[0].compr = 0; /* COMPR flag */
|
|
||||||
pos_args[0].out[0] = ctx->ac.f32_0; /* X */
|
|
||||||
pos_args[0].out[1] = ctx->ac.f32_0; /* Y */
|
|
||||||
pos_args[0].out[2] = ctx->ac.f32_0; /* Z */
|
|
||||||
pos_args[0].out[3] = ctx->ac.f32_1; /* W */
|
|
||||||
}
|
|
||||||
|
|
||||||
bool writes_psize = shader->selector->info.writes_psize && !shader->key.ge.opt.kill_pointsize;
|
|
||||||
bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.ge.as_ngg;
|
|
||||||
bool writes_vrs = ctx->screen->options.vrs2x2;
|
|
||||||
|
|
||||||
/* Write the misc vector (point size, edgeflag, layer, viewport). */
|
|
||||||
if (writes_psize || pos_writes_edgeflag || writes_vrs ||
|
|
||||||
shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) {
|
|
||||||
pos_args[1].enabled_channels = writes_psize |
|
|
||||||
((pos_writes_edgeflag | writes_vrs) << 1) |
|
|
||||||
(shader->selector->info.writes_layer << 2);
|
|
||||||
|
|
||||||
pos_args[1].valid_mask = 0; /* EXEC mask */
|
|
||||||
pos_args[1].done = 0; /* last export? */
|
|
||||||
pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
|
|
||||||
pos_args[1].compr = 0; /* COMPR flag */
|
|
||||||
pos_args[1].out[0] = ctx->ac.f32_0; /* X */
|
|
||||||
pos_args[1].out[1] = ctx->ac.f32_0; /* Y */
|
|
||||||
pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
|
|
||||||
pos_args[1].out[3] = ctx->ac.f32_0; /* W */
|
|
||||||
|
|
||||||
if (writes_psize)
|
|
||||||
pos_args[1].out[0] = psize_value;
|
|
||||||
|
|
||||||
if (pos_writes_edgeflag) {
|
|
||||||
/* The output is a float, but the hw expects an integer
|
|
||||||
* with the first bit containing the edge flag. */
|
|
||||||
edgeflag_value = LLVMBuildFPToUI(ctx->ac.builder, edgeflag_value, ctx->ac.i32, "");
|
|
||||||
edgeflag_value = ac_build_umin(&ctx->ac, edgeflag_value, ctx->ac.i32_1);
|
|
||||||
|
|
||||||
/* The LLVM intrinsic expects a float. */
|
|
||||||
pos_args[1].out[1] = ac_to_float(&ctx->ac, edgeflag_value);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (writes_vrs) {
|
|
||||||
LLVMValueRef rates;
|
|
||||||
if (ctx->screen->info.gfx_level >= GFX11) {
|
|
||||||
/* Bits [2:5] = VRS rate
|
|
||||||
*
|
|
||||||
* The range is [0, 15].
|
|
||||||
*
|
|
||||||
* If the hw doesn't support VRS 4x4, it will silently use 2x2 instead.
|
|
||||||
*/
|
|
||||||
rates = LLVMConstInt(ctx->ac.i32, (V_0283D0_VRS_SHADING_RATE_4X4 << 2), 0);
|
|
||||||
} else {
|
|
||||||
/* Bits [2:3] = VRS rate X
|
|
||||||
* Bits [4:5] = VRS rate Y
|
|
||||||
*
|
|
||||||
* The range is [-2, 1]. Values:
|
|
||||||
* 1: 2x coarser shading rate in that direction.
|
|
||||||
* 0: normal shading rate
|
|
||||||
* -1: 2x finer shading rate (sample shading, not directional)
|
|
||||||
* -2: 4x finer shading rate (sample shading, not directional)
|
|
||||||
*
|
|
||||||
* Sample shading can't go above 8 samples, so both numbers can't be -2
|
|
||||||
* at the same time.
|
|
||||||
*/
|
|
||||||
rates = LLVMConstInt(ctx->ac.i32, (1 << 2) | (1 << 4), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */
|
|
||||||
rates = LLVMBuildSelect(ctx->ac.builder,
|
|
||||||
LLVMBuildFCmp(ctx->ac.builder, LLVMRealUNE,
|
|
||||||
pos_args[0].out[3], ctx->ac.f32_1, ""),
|
|
||||||
rates, ctx->ac.i32_0, "");
|
|
||||||
|
|
||||||
LLVMValueRef v = ac_to_integer(&ctx->ac, pos_args[1].out[1]);
|
|
||||||
v = LLVMBuildOr(ctx->ac.builder, v, rates, "");
|
|
||||||
pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ctx->screen->info.gfx_level >= GFX9) {
|
|
||||||
/* GFX9 has the layer in out.z[10:0] and the viewport
|
|
||||||
* index in out.z[19:16].
|
|
||||||
*/
|
|
||||||
if (shader->selector->info.writes_layer)
|
|
||||||
pos_args[1].out[2] = layer_value;
|
|
||||||
|
|
||||||
if (shader->selector->info.writes_viewport_index) {
|
|
||||||
LLVMValueRef v = viewport_index_value;
|
|
||||||
|
|
||||||
v = ac_to_integer(&ctx->ac, v);
|
|
||||||
v = LLVMBuildShl(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 16, 0), "");
|
|
||||||
v = LLVMBuildOr(ctx->ac.builder, v, ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
|
|
||||||
pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
|
|
||||||
pos_args[1].enabled_channels |= 1 << 2;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (shader->selector->info.writes_layer)
|
|
||||||
pos_args[1].out[2] = layer_value;
|
|
||||||
|
|
||||||
if (shader->selector->info.writes_viewport_index) {
|
|
||||||
pos_args[1].out[3] = viewport_index_value;
|
|
||||||
pos_args[1].enabled_channels |= 1 << 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* GFX10 (Navi1x) skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
|
|
||||||
* Setting valid_mask=1 prevents it and has no other effect.
|
|
||||||
*/
|
|
||||||
if (ctx->screen->info.gfx_level == GFX10)
|
|
||||||
pos_args[0].valid_mask = 1;
|
|
||||||
|
|
||||||
pos_idx = 0;
|
|
||||||
for (i = 0; i < 4; i++) {
|
|
||||||
if (!pos_args[i].out[0])
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* Specify the target we are exporting */
|
|
||||||
pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
|
|
||||||
|
|
||||||
if (pos_idx == shader->info.nr_pos_exports) {
|
|
||||||
/* Specify that this is the last export */
|
|
||||||
pos_args[i].done = 1;
|
|
||||||
|
|
||||||
/* If a shader has no param exports, rasterization can start before
|
|
||||||
* the shader finishes and thus memory stores might not finish before
|
|
||||||
* the pixel shader starts.
|
|
||||||
*
|
|
||||||
* VLOAD is for atomics with return.
|
|
||||||
*/
|
|
||||||
if (ctx->screen->info.gfx_level >= GFX10 &&
|
|
||||||
!shader->info.nr_param_exports &&
|
|
||||||
shader->selector->info.base.writes_memory)
|
|
||||||
ac_build_waitcnt(&ctx->ac, AC_WAIT_VLOAD | AC_WAIT_VSTORE);
|
|
||||||
}
|
|
||||||
|
|
||||||
ac_build_export(&ctx->ac, &pos_args[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!shader->info.nr_param_exports ||
|
|
||||||
/* GFX11 param export is handled in nir */
|
|
||||||
ctx->screen->info.gfx_level >= GFX11)
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* Build parameter exports. Use 2 loops to export params in ascending order.
|
|
||||||
* 32 is the maximum number of parameter exports.
|
|
||||||
*/
|
|
||||||
struct ac_export_args param_exports[32] = {};
|
|
||||||
uint64_t vs_output_param_mask = shader->info.vs_output_param_mask;
|
|
||||||
|
|
||||||
while (vs_output_param_mask) {
|
|
||||||
unsigned i = u_bit_scan64(&vs_output_param_mask);
|
|
||||||
unsigned offset = shader->info.vs_output_param_offset[outputs[i].semantic];
|
|
||||||
|
|
||||||
assert(offset <= AC_EXP_PARAM_OFFSET_31);
|
|
||||||
assert(!param_exports[offset].enabled_channels);
|
|
||||||
|
|
||||||
si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_PARAM + offset,
|
|
||||||
¶m_exports[offset]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Export attributes using parameter exports. */
|
|
||||||
for (unsigned i = 0; i < shader->info.nr_param_exports; i++)
|
|
||||||
ac_build_export(&ctx->ac, ¶m_exports[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build the vertex shader prolog function.
|
* Build the vertex shader prolog function.
|
||||||
*
|
*
|
||||||
|
Reference in New Issue
Block a user