ac: unify shader arguments that are duplicated
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Connor Abbott <cwabbott0@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7939>
This commit is contained in:
@@ -71,17 +71,51 @@ struct ac_shader_args {
|
||||
uint16_t num_sgprs_returned;
|
||||
uint16_t num_vgprs_returned;
|
||||
|
||||
/* VS */
|
||||
struct ac_arg base_vertex;
|
||||
struct ac_arg start_instance;
|
||||
struct ac_arg draw_id;
|
||||
struct ac_arg vertex_buffers;
|
||||
struct ac_arg vertex_id;
|
||||
struct ac_arg vs_rel_patch_id;
|
||||
struct ac_arg vs_prim_id;
|
||||
struct ac_arg instance_id;
|
||||
|
||||
/* Merged shaders */
|
||||
struct ac_arg tess_offchip_offset;
|
||||
struct ac_arg merged_wave_info;
|
||||
/* On gfx10:
|
||||
* - bits 0..11: ordered_wave_id
|
||||
* - bits 12..20: number of vertices in group
|
||||
* - bits 22..30: number of primitives in group
|
||||
*/
|
||||
struct ac_arg gs_tg_info;
|
||||
struct ac_arg scratch_offset;
|
||||
|
||||
/* TCS */
|
||||
struct ac_arg tcs_factor_offset;
|
||||
struct ac_arg tcs_patch_id;
|
||||
struct ac_arg tcs_rel_ids;
|
||||
|
||||
/* TES */
|
||||
struct ac_arg tes_u;
|
||||
struct ac_arg tes_v;
|
||||
struct ac_arg tes_rel_patch_id;
|
||||
struct ac_arg tes_patch_id;
|
||||
|
||||
/* GS */
|
||||
struct ac_arg es2gs_offset; /* separate legacy ES */
|
||||
struct ac_arg gs2vs_offset; /* legacy GS */
|
||||
struct ac_arg gs_wave_id; /* legacy GS */
|
||||
struct ac_arg gs_vtx_offset[6]; /* separate legacy GS */
|
||||
struct ac_arg gs_prim_id;
|
||||
struct ac_arg gs_invocation_id;
|
||||
|
||||
/* Streamout */
|
||||
struct ac_arg streamout_config;
|
||||
struct ac_arg streamout_write_index;
|
||||
struct ac_arg streamout_offset[4];
|
||||
|
||||
/* PS */
|
||||
struct ac_arg frag_pos[4];
|
||||
struct ac_arg front_face;
|
||||
|
@@ -4034,7 +4034,7 @@ Temp wave_id_in_threadgroup(isel_context *ctx)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
return bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->merged_wave_info), Operand(24u | (4u << 16)));
|
||||
get_arg(ctx, ctx->args->ac.merged_wave_info), Operand(24u | (4u << 16)));
|
||||
}
|
||||
|
||||
Temp thread_id_in_threadgroup(isel_context *ctx)
|
||||
@@ -4057,7 +4057,7 @@ Temp wave_count_in_threadgroup(isel_context *ctx)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
return bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->merged_wave_info), Operand(28u | (4u << 16)));
|
||||
get_arg(ctx, ctx->args->ac.merged_wave_info), Operand(28u | (4u << 16)));
|
||||
}
|
||||
|
||||
Temp ngg_gs_vertex_lds_addr(isel_context *ctx, Temp vertex_idx)
|
||||
@@ -4188,7 +4188,7 @@ Temp get_tess_rel_patch_id(isel_context *ctx)
|
||||
return bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0xffu),
|
||||
get_arg(ctx, ctx->args->ac.tcs_rel_ids));
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
return get_arg(ctx, ctx->args->tes_rel_patch_id);
|
||||
return get_arg(ctx, ctx->args->ac.tes_rel_patch_id);
|
||||
default:
|
||||
unreachable("Unsupported stage in get_tess_rel_patch_id");
|
||||
}
|
||||
@@ -4384,7 +4384,7 @@ void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||
if (ctx->stage.hw == HWStage::ES) {
|
||||
/* GFX6-8: ES stage is not merged into GS, data is passed from ES to GS in VMEM. */
|
||||
Temp esgs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_ESGS_VS * 16u));
|
||||
Temp es2gs_offset = get_arg(ctx, ctx->args->es2gs_offset);
|
||||
Temp es2gs_offset = get_arg(ctx, ctx->args->ac.es2gs_offset);
|
||||
store_vmem_mubuf(ctx, src, esgs_ring, offs.first, es2gs_offset, offs.second, elem_size_bytes, write_mask, false, memory_sync_info(), true);
|
||||
} else {
|
||||
Temp lds_base;
|
||||
@@ -4401,7 +4401,7 @@ void visit_store_ls_or_es_output(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||
/* GFX6-8: VS runs on LS stage when tessellation is used, but LS shares LDS space with HS.
|
||||
* GFX9+: LS is merged into HS, but still uses the same LDS layout.
|
||||
*/
|
||||
Temp vertex_idx = get_arg(ctx, ctx->args->rel_auto_id);
|
||||
Temp vertex_idx = get_arg(ctx, ctx->args->ac.vs_rel_patch_id);
|
||||
lds_base = bld.v_mul24_imm(bld.def(v1), vertex_idx, ctx->tcs_num_inputs * 16u);
|
||||
} else {
|
||||
unreachable("Invalid LS or ES stage");
|
||||
@@ -4458,7 +4458,7 @@ void visit_store_tcs_output(isel_context *ctx, nir_intrinsic_instr *instr, bool
|
||||
: get_tcs_per_patch_output_vmem_offset(ctx, instr);
|
||||
|
||||
Temp hs_ring_tess_offchip = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_HS_TESS_OFFCHIP * 16u));
|
||||
Temp oc_lds = get_arg(ctx, ctx->args->oc_lds);
|
||||
Temp oc_lds = get_arg(ctx, ctx->args->ac.tess_offchip_offset);
|
||||
store_vmem_mubuf(ctx, store_val, hs_ring_tess_offchip, vmem_offs.first, oc_lds, vmem_offs.second, elem_size_bytes, write_mask, true, memory_sync_info(storage_vmem_output));
|
||||
}
|
||||
|
||||
@@ -4709,7 +4709,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||
if (!nir_src_is_const(offset) || nir_src_as_uint(offset))
|
||||
isel_err(offset.ssa->parent_instr, "Unimplemented non-zero nir_intrinsic_load_input offset");
|
||||
|
||||
Temp vertex_buffers = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->vertex_buffers));
|
||||
Temp vertex_buffers = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ac.vertex_buffers));
|
||||
|
||||
unsigned location = nir_intrinsic_base(instr) - VERT_ATTRIB_GENERIC0;
|
||||
unsigned component = nir_intrinsic_component(instr);
|
||||
@@ -4951,7 +4951,7 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||
|
||||
} else if (ctx->shader->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
Temp ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_HS_TESS_OFFCHIP * 16u));
|
||||
Temp soffset = get_arg(ctx, ctx->args->oc_lds);
|
||||
Temp soffset = get_arg(ctx, ctx->args->ac.tess_offchip_offset);
|
||||
std::pair<Temp, unsigned> offs = get_tcs_per_patch_output_vmem_offset(ctx, instr);
|
||||
unsigned elem_size_bytes = instr->dest.ssa.bit_size / 8u;
|
||||
|
||||
@@ -4978,11 +4978,11 @@ std::pair<Temp, unsigned> get_gs_per_vertex_input_offset(isel_context *ctx, nir_
|
||||
Temp elem;
|
||||
|
||||
if (merged_esgs) {
|
||||
elem = get_arg(ctx, ctx->args->gs_vtx_offset[i / 2u * 2u]);
|
||||
elem = get_arg(ctx, ctx->args->ac.gs_vtx_offset[i / 2u * 2u]);
|
||||
if (i % 2u)
|
||||
elem = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand(16u), elem);
|
||||
} else {
|
||||
elem = get_arg(ctx, ctx->args->gs_vtx_offset[i]);
|
||||
elem = get_arg(ctx, ctx->args->ac.gs_vtx_offset[i]);
|
||||
}
|
||||
|
||||
if (vertex_offset.id()) {
|
||||
@@ -5000,10 +5000,10 @@ std::pair<Temp, unsigned> get_gs_per_vertex_input_offset(isel_context *ctx, nir_
|
||||
unsigned vertex = nir_src_as_uint(*vertex_src);
|
||||
if (merged_esgs)
|
||||
vertex_offset = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1),
|
||||
get_arg(ctx, ctx->args->gs_vtx_offset[vertex / 2u * 2u]),
|
||||
get_arg(ctx, ctx->args->ac.gs_vtx_offset[vertex / 2u * 2u]),
|
||||
Operand((vertex % 2u) * 16u), Operand(16u));
|
||||
else
|
||||
vertex_offset = get_arg(ctx, ctx->args->gs_vtx_offset[vertex]);
|
||||
vertex_offset = get_arg(ctx, ctx->args->ac.gs_vtx_offset[vertex]);
|
||||
}
|
||||
|
||||
std::pair<Temp, unsigned> offs = get_intrinsic_io_basic_offset(ctx, instr, base_stride);
|
||||
@@ -5054,7 +5054,7 @@ void visit_load_tes_per_vertex_input(isel_context *ctx, nir_intrinsic_instr *ins
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
Temp ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_HS_TESS_OFFCHIP * 16u));
|
||||
Temp oc_lds = get_arg(ctx, ctx->args->oc_lds);
|
||||
Temp oc_lds = get_arg(ctx, ctx->args->ac.tess_offchip_offset);
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
|
||||
unsigned elem_size_bytes = instr->dest.ssa.bit_size / 8;
|
||||
@@ -5100,8 +5100,8 @@ void visit_load_tess_coord(isel_context *ctx, nir_intrinsic_instr *instr)
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
|
||||
|
||||
Operand tes_u(get_arg(ctx, ctx->args->tes_u));
|
||||
Operand tes_v(get_arg(ctx, ctx->args->tes_v));
|
||||
Operand tes_u(get_arg(ctx, ctx->args->ac.tes_u));
|
||||
Operand tes_v(get_arg(ctx, ctx->args->ac.tes_v));
|
||||
Operand tes_w(0u);
|
||||
|
||||
if (ctx->shader->info.tess.primitive_mode == GL_TRIANGLES) {
|
||||
@@ -7111,7 +7111,7 @@ void visit_emit_vertex_with_counter(isel_context *ctx, nir_intrinsic_instr *inst
|
||||
aco_ptr<MTBUF_instruction> mtbuf{create_instruction<MTBUF_instruction>(aco_opcode::tbuffer_store_format_x, Format::MTBUF, 4, 0)};
|
||||
mtbuf->operands[0] = Operand(gsvs_ring);
|
||||
mtbuf->operands[1] = vaddr_offset;
|
||||
mtbuf->operands[2] = Operand(get_arg(ctx, ctx->args->gs2vs_offset));
|
||||
mtbuf->operands[2] = Operand(get_arg(ctx, ctx->args->ac.gs2vs_offset));
|
||||
mtbuf->operands[3] = Operand(ctx->outputs.temps[i * 4u + j]);
|
||||
mtbuf->offen = !vaddr_offset.isUndefined();
|
||||
mtbuf->dfmt = V_008F0C_BUF_DATA_FORMAT_32;
|
||||
@@ -10251,7 +10251,7 @@ static void create_vs_exports(isel_context *ctx)
|
||||
if (ctx->stage.has(SWStage::TES))
|
||||
ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_ID * 4u] = get_arg(ctx, ctx->args->ac.tes_patch_id);
|
||||
else
|
||||
ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_ID * 4u] = get_arg(ctx, ctx->args->vs_prim_id);
|
||||
ctx->outputs.temps[VARYING_SLOT_PRIMITIVE_ID * 4u] = get_arg(ctx, ctx->args->ac.vs_prim_id);
|
||||
}
|
||||
|
||||
if (ctx->options->key.has_multiview_view_index) {
|
||||
@@ -10647,7 +10647,7 @@ static void write_tcs_tess_factors(isel_context *ctx)
|
||||
}
|
||||
|
||||
Temp rel_patch_id = get_tess_rel_patch_id(ctx);
|
||||
Temp tf_base = get_arg(ctx, ctx->args->tess_factor_offset);
|
||||
Temp tf_base = get_arg(ctx, ctx->args->ac.tcs_factor_offset);
|
||||
Temp byte_offset = bld.v_mul24_imm(bld.def(v1), rel_patch_id, stride * 4u);
|
||||
unsigned tf_const_offset = 0;
|
||||
|
||||
@@ -10677,7 +10677,7 @@ static void write_tcs_tess_factors(isel_context *ctx)
|
||||
/* Store to offchip for TES to read - only if TES reads them */
|
||||
if (ctx->args->options->key.tcs.tes_reads_tess_factors) {
|
||||
Temp hs_ring_tess_offchip = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, Operand(RING_HS_TESS_OFFCHIP * 16u));
|
||||
Temp oc_lds = get_arg(ctx, ctx->args->oc_lds);
|
||||
Temp oc_lds = get_arg(ctx, ctx->args->ac.tess_offchip_offset);
|
||||
|
||||
std::pair<Temp, unsigned> vmem_offs_outer = get_tcs_per_patch_output_vmem_offset(ctx, nullptr, ctx->tcs_tess_lvl_out_loc);
|
||||
store_vmem_mubuf(ctx, tf_outer_vec, hs_ring_tess_offchip, vmem_offs_outer.first, oc_lds, vmem_offs_outer.second, 4, (1 << outer_comps) - 1, true, memory_sync_info(storage_vmem_output));
|
||||
@@ -10790,7 +10790,7 @@ static void emit_streamout(isel_context *ctx, unsigned stream)
|
||||
}
|
||||
|
||||
Temp so_vtx_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->streamout_config), Operand(0x70010u));
|
||||
get_arg(ctx, ctx->args->ac.streamout_config), Operand(0x70010u));
|
||||
|
||||
Temp tid = emit_mbcnt(ctx, bld.tmp(v1));
|
||||
|
||||
@@ -10801,7 +10801,7 @@ static void emit_streamout(isel_context *ctx, unsigned stream)
|
||||
|
||||
bld.reset(ctx->block);
|
||||
|
||||
Temp so_write_index = bld.vadd32(bld.def(v1), get_arg(ctx, ctx->args->streamout_write_idx), tid);
|
||||
Temp so_write_index = bld.vadd32(bld.def(v1), get_arg(ctx, ctx->args->ac.streamout_write_index), tid);
|
||||
|
||||
Temp so_write_offset[4];
|
||||
|
||||
@@ -10812,15 +10812,15 @@ static void emit_streamout(isel_context *ctx, unsigned stream)
|
||||
|
||||
if (stride == 1) {
|
||||
Temp offset = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->streamout_write_idx),
|
||||
get_arg(ctx, ctx->args->streamout_offset[i]));
|
||||
get_arg(ctx, ctx->args->ac.streamout_write_index),
|
||||
get_arg(ctx, ctx->args->ac.streamout_offset[i]));
|
||||
Temp new_offset = bld.vadd32(bld.def(v1), offset, tid);
|
||||
|
||||
so_write_offset[i] = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), new_offset);
|
||||
} else {
|
||||
Temp offset = bld.v_mul_imm(bld.def(v1), so_write_index, stride * 4u);
|
||||
Temp offset2 = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand(4u),
|
||||
get_arg(ctx, ctx->args->streamout_offset[i]));
|
||||
get_arg(ctx, ctx->args->ac.streamout_offset[i]));
|
||||
so_write_offset[i] = bld.vadd32(bld.def(v1), offset, offset2);
|
||||
}
|
||||
}
|
||||
@@ -10892,7 +10892,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
|
||||
* handling spilling.
|
||||
*/
|
||||
ctx->program->private_segment_buffer = get_arg(ctx, ctx->args->ring_offsets);
|
||||
ctx->program->scratch_offset = get_arg(ctx, ctx->args->scratch_offset);
|
||||
ctx->program->scratch_offset = get_arg(ctx, ctx->args->ac.scratch_offset);
|
||||
|
||||
return instr;
|
||||
}
|
||||
@@ -10903,19 +10903,19 @@ void fix_ls_vgpr_init_bug(isel_context *ctx, Pseudo_instruction *startpgm)
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
constexpr unsigned hs_idx = 1u;
|
||||
Builder::Result hs_thread_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->merged_wave_info),
|
||||
get_arg(ctx, ctx->args->ac.merged_wave_info),
|
||||
Operand((8u << 16) | (hs_idx * 8u)));
|
||||
Temp ls_has_nonzero_hs_threads = bool_to_vector_condition(ctx, hs_thread_count.def(1).getTemp());
|
||||
|
||||
/* If there are no HS threads, SPI mistakenly loads the LS VGPRs starting at VGPR 0. */
|
||||
|
||||
Temp instance_id = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
|
||||
get_arg(ctx, ctx->args->rel_auto_id),
|
||||
get_arg(ctx, ctx->args->ac.vs_rel_patch_id),
|
||||
get_arg(ctx, ctx->args->ac.instance_id),
|
||||
ls_has_nonzero_hs_threads);
|
||||
Temp rel_auto_id = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
|
||||
Temp vs_rel_patch_id = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
|
||||
get_arg(ctx, ctx->args->ac.tcs_rel_ids),
|
||||
get_arg(ctx, ctx->args->rel_auto_id),
|
||||
get_arg(ctx, ctx->args->ac.vs_rel_patch_id),
|
||||
ls_has_nonzero_hs_threads);
|
||||
Temp vertex_id = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
|
||||
get_arg(ctx, ctx->args->ac.tcs_patch_id),
|
||||
@@ -10923,7 +10923,7 @@ void fix_ls_vgpr_init_bug(isel_context *ctx, Pseudo_instruction *startpgm)
|
||||
ls_has_nonzero_hs_threads);
|
||||
|
||||
ctx->arg_temps[ctx->args->ac.instance_id.arg_index] = instance_id;
|
||||
ctx->arg_temps[ctx->args->rel_auto_id.arg_index] = rel_auto_id;
|
||||
ctx->arg_temps[ctx->args->ac.vs_rel_patch_id.arg_index] = vs_rel_patch_id;
|
||||
ctx->arg_temps[ctx->args->ac.vertex_id.arg_index] = vertex_id;
|
||||
}
|
||||
|
||||
@@ -11075,9 +11075,9 @@ Temp merged_wave_info_to_mask(isel_context *ctx, unsigned i)
|
||||
|
||||
/* lanecount_to_mask() only cares about s0.u[6:0] so we don't need either s_bfe nor s_and here */
|
||||
Temp count = i == 0
|
||||
? get_arg(ctx, ctx->args->merged_wave_info)
|
||||
? get_arg(ctx, ctx->args->ac.merged_wave_info)
|
||||
: bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->merged_wave_info), Operand(i * 8u));
|
||||
get_arg(ctx, ctx->args->ac.merged_wave_info), Operand(i * 8u));
|
||||
|
||||
return lanecount_to_mask(ctx, count);
|
||||
}
|
||||
@@ -11086,14 +11086,14 @@ Temp ngg_max_vertex_count(isel_context *ctx)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
return bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->gs_tg_info), Operand(12u | (9u << 16u)));
|
||||
get_arg(ctx, ctx->args->ac.gs_tg_info), Operand(12u | (9u << 16u)));
|
||||
}
|
||||
|
||||
Temp ngg_max_primitive_count(isel_context *ctx)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
return bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->gs_tg_info), Operand(22u | (9u << 16u)));
|
||||
get_arg(ctx, ctx->args->ac.gs_tg_info), Operand(22u | (9u << 16u)));
|
||||
}
|
||||
|
||||
void ngg_emit_sendmsg_gs_alloc_req(isel_context *ctx, Temp vtx_cnt = Temp(), Temp prm_cnt = Temp())
|
||||
@@ -11105,7 +11105,7 @@ void ngg_emit_sendmsg_gs_alloc_req(isel_context *ctx, Temp vtx_cnt = Temp(), Tem
|
||||
|
||||
/* Get the id of the current wave within the threadgroup (workgroup) */
|
||||
Builder::Result wave_id_in_tg = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(ctx, ctx->args->merged_wave_info), Operand(24u | (4u << 16)));
|
||||
get_arg(ctx, ctx->args->ac.merged_wave_info), Operand(24u | (4u << 16)));
|
||||
|
||||
/* Execute the following code only on the first wave (wave id 0),
|
||||
* use the SCC def to tell if the wave id is zero or not.
|
||||
@@ -11216,7 +11216,7 @@ void ngg_emit_prim_export(isel_context *ctx, unsigned num_vertices_per_primitive
|
||||
Temp prim_exp_arg;
|
||||
|
||||
if (!ctx->stage.has(SWStage::GS) && ctx->args->options->key.vs_common_out.as_ngg_passthrough)
|
||||
prim_exp_arg = get_arg(ctx, ctx->args->gs_vtx_offset[0]);
|
||||
prim_exp_arg = get_arg(ctx, ctx->args->ac.gs_vtx_offset[0]);
|
||||
else
|
||||
prim_exp_arg = ngg_pack_prim_exp_arg(ctx, num_vertices_per_primitive, vtxindex, is_null);
|
||||
|
||||
@@ -11258,13 +11258,13 @@ void ngg_nogs_export_primitives(isel_context *ctx)
|
||||
Temp vtxindex[max_vertices_per_primitive];
|
||||
if (!ctx->args->options->key.vs_common_out.as_ngg_passthrough) {
|
||||
vtxindex[0] = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0xffffu),
|
||||
get_arg(ctx, ctx->args->gs_vtx_offset[0]));
|
||||
get_arg(ctx, ctx->args->ac.gs_vtx_offset[0]));
|
||||
vtxindex[1] = num_vertices_per_primitive < 2 ? Temp(0, v1) :
|
||||
bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1),
|
||||
get_arg(ctx, ctx->args->gs_vtx_offset[0]), Operand(16u), Operand(16u));
|
||||
get_arg(ctx, ctx->args->ac.gs_vtx_offset[0]), Operand(16u), Operand(16u));
|
||||
vtxindex[2] = num_vertices_per_primitive < 3 ? Temp(0, v1) :
|
||||
bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0xffffu),
|
||||
get_arg(ctx, ctx->args->gs_vtx_offset[2]));
|
||||
get_arg(ctx, ctx->args->ac.gs_vtx_offset[2]));
|
||||
}
|
||||
|
||||
/* Export primitive data to the index buffer. */
|
||||
@@ -11797,10 +11797,10 @@ void select_program(Program *program,
|
||||
create_workgroup_barrier(bld);
|
||||
|
||||
if (ctx.stage == vertex_geometry_gs || ctx.stage == tess_eval_geometry_gs) {
|
||||
ctx.gs_wave_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, m0), bld.def(s1, scc), get_arg(&ctx, args->merged_wave_info), Operand((8u << 16) | 16u));
|
||||
ctx.gs_wave_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, m0), bld.def(s1, scc), get_arg(&ctx, args->ac.merged_wave_info), Operand((8u << 16) | 16u));
|
||||
}
|
||||
} else if (ctx.stage == geometry_gs)
|
||||
ctx.gs_wave_id = get_arg(&ctx, args->gs_wave_id);
|
||||
ctx.gs_wave_id = get_arg(&ctx, args->ac.gs_wave_id);
|
||||
|
||||
if (ctx.stage == fragment_fs)
|
||||
handle_bc_optimize(&ctx);
|
||||
@@ -11876,7 +11876,7 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
|
||||
Operand stream_id(0u);
|
||||
if (args->shader_info->so.num_outputs)
|
||||
stream_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
|
||||
get_arg(&ctx, ctx.args->streamout_config), Operand(0x20018u));
|
||||
get_arg(&ctx, ctx.args->ac.streamout_config), Operand(0x20018u));
|
||||
|
||||
Temp vtx_offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), get_arg(&ctx, ctx.args->ac.vertex_id));
|
||||
|
||||
|
@@ -58,7 +58,7 @@ struct radv_shader_context {
|
||||
|
||||
LLVMValueRef ring_offsets;
|
||||
|
||||
LLVMValueRef rel_auto_id;
|
||||
LLVMValueRef vs_rel_patch_id;
|
||||
|
||||
LLVMValueRef gs_wave_id;
|
||||
LLVMValueRef gs_vtx_offset[6];
|
||||
@@ -108,7 +108,7 @@ static LLVMValueRef get_rel_patch_id(struct radv_shader_context *ctx)
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.tcs_rel_ids),
|
||||
0, 8);
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
return ac_get_arg(&ctx->ac, ctx->args->tes_rel_patch_id);
|
||||
return ac_get_arg(&ctx->ac, ctx->args->ac.tes_rel_patch_id);
|
||||
break;
|
||||
default:
|
||||
unreachable("Illegal stage");
|
||||
@@ -559,7 +559,7 @@ store_tcs_output(struct ac_shader_abi *abi,
|
||||
LLVMValueRef dw_addr;
|
||||
LLVMValueRef stride = NULL;
|
||||
LLVMValueRef buf_addr = NULL;
|
||||
LLVMValueRef oc_lds = ac_get_arg(&ctx->ac, ctx->args->oc_lds);
|
||||
LLVMValueRef oc_lds = ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset);
|
||||
unsigned param = driver_location;
|
||||
bool store_lds = true;
|
||||
|
||||
@@ -626,7 +626,7 @@ load_tes_input(struct ac_shader_abi *abi,
|
||||
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
|
||||
LLVMValueRef buf_addr;
|
||||
LLVMValueRef result;
|
||||
LLVMValueRef oc_lds = ac_get_arg(&ctx->ac, ctx->args->oc_lds);
|
||||
LLVMValueRef oc_lds = ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset);
|
||||
unsigned param = driver_location;
|
||||
|
||||
buf_addr = get_tcs_tes_buffer_address_params(ctx, param, vertex_index, param_index);
|
||||
@@ -813,7 +813,7 @@ visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream,
|
||||
out_val, 1,
|
||||
voffset,
|
||||
ac_get_arg(&ctx->ac,
|
||||
ctx->args->gs2vs_offset),
|
||||
ctx->args->ac.gs2vs_offset),
|
||||
0, ac_glc | ac_slc | ac_swizzled);
|
||||
}
|
||||
}
|
||||
@@ -842,8 +842,8 @@ load_tess_coord(struct ac_shader_abi *abi)
|
||||
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
|
||||
|
||||
LLVMValueRef coord[4] = {
|
||||
ac_get_arg(&ctx->ac, ctx->args->tes_u),
|
||||
ac_get_arg(&ctx->ac, ctx->args->tes_v),
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.tes_u),
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.tes_v),
|
||||
ctx->ac.f32_0,
|
||||
ctx->ac.f32_0,
|
||||
};
|
||||
@@ -1114,7 +1114,7 @@ static void
|
||||
handle_vs_input_decl(struct radv_shader_context *ctx,
|
||||
struct nir_variable *variable)
|
||||
{
|
||||
LLVMValueRef t_list_ptr = ac_get_arg(&ctx->ac, ctx->args->vertex_buffers);
|
||||
LLVMValueRef t_list_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.vertex_buffers);
|
||||
LLVMValueRef t_offset;
|
||||
LLVMValueRef t_list;
|
||||
LLVMValueRef input;
|
||||
@@ -1619,10 +1619,10 @@ radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream)
|
||||
int i;
|
||||
|
||||
/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
|
||||
assert(ctx->args->streamout_config.used);
|
||||
assert(ctx->args->ac.streamout_config.used);
|
||||
LLVMValueRef so_vtx_count =
|
||||
ac_build_bfe(&ctx->ac,
|
||||
ac_get_arg(&ctx->ac, ctx->args->streamout_config),
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.streamout_config),
|
||||
LLVMConstInt(ctx->ac.i32, 16, false),
|
||||
LLVMConstInt(ctx->ac.i32, 7, false), false);
|
||||
|
||||
@@ -1644,7 +1644,7 @@ radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream)
|
||||
* attrib_offset
|
||||
*/
|
||||
LLVMValueRef so_write_index =
|
||||
ac_get_arg(&ctx->ac, ctx->args->streamout_write_idx);
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.streamout_write_index);
|
||||
|
||||
/* Compute (streamout_write_index + thread_id). */
|
||||
so_write_index =
|
||||
@@ -1670,7 +1670,7 @@ radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream)
|
||||
buf_ptr, offset);
|
||||
|
||||
LLVMValueRef so_offset =
|
||||
ac_get_arg(&ctx->ac, ctx->args->streamout_offset[i]);
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.streamout_offset[i]);
|
||||
|
||||
so_offset = LLVMBuildMul(ctx->ac.builder, so_offset,
|
||||
LLVMConstInt(ctx->ac.i32, 4, false), "");
|
||||
@@ -1938,7 +1938,7 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.tes_patch_id);
|
||||
else
|
||||
outputs[noutput].values[0] =
|
||||
ac_get_arg(&ctx->ac, ctx->args->vs_prim_id);
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.vs_prim_id);
|
||||
for (unsigned j = 1; j < 4; j++)
|
||||
outputs[noutput].values[j] = ctx->ac.f32_0;
|
||||
noutput++;
|
||||
@@ -1961,7 +1961,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
|
||||
LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
|
||||
LLVMValueRef wave_idx =
|
||||
ac_unpack_param(&ctx->ac,
|
||||
ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 24, 4);
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 24, 4);
|
||||
vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
|
||||
LLVMBuildMul(ctx->ac.builder, wave_idx,
|
||||
LLVMConstInt(ctx->ac.i32,
|
||||
@@ -2013,7 +2013,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
|
||||
ctx->esgs_ring,
|
||||
out_val, 1,
|
||||
NULL,
|
||||
ac_get_arg(&ctx->ac, ctx->args->es2gs_offset),
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.es2gs_offset),
|
||||
(4 * i + j) * 4,
|
||||
ac_glc | ac_slc | ac_swizzled);
|
||||
}
|
||||
@@ -2024,7 +2024,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
|
||||
static void
|
||||
handle_ls_outputs_post(struct radv_shader_context *ctx)
|
||||
{
|
||||
LLVMValueRef vertex_id = ctx->rel_auto_id;
|
||||
LLVMValueRef vertex_id = ctx->vs_rel_patch_id;
|
||||
uint32_t num_tcs_inputs = ctx->args->shader_info->vs.num_linked_outputs;
|
||||
LLVMValueRef vertex_dw_stride = LLVMConstInt(ctx->ac.i32, num_tcs_inputs * 4, false);
|
||||
LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id,
|
||||
@@ -2052,12 +2052,12 @@ handle_ls_outputs_post(struct radv_shader_context *ctx)
|
||||
static LLVMValueRef get_wave_id_in_tg(struct radv_shader_context *ctx)
|
||||
{
|
||||
return ac_unpack_param(&ctx->ac,
|
||||
ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 24, 4);
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 24, 4);
|
||||
}
|
||||
|
||||
static LLVMValueRef get_tgsize(struct radv_shader_context *ctx)
|
||||
{
|
||||
return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 28, 4);
|
||||
return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 28, 4);
|
||||
}
|
||||
|
||||
static LLVMValueRef get_thread_id_in_tg(struct radv_shader_context *ctx)
|
||||
@@ -2071,7 +2071,7 @@ static LLVMValueRef get_thread_id_in_tg(struct radv_shader_context *ctx)
|
||||
|
||||
static LLVMValueRef ngg_get_vtx_cnt(struct radv_shader_context *ctx)
|
||||
{
|
||||
return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info),
|
||||
return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
|
||||
LLVMConstInt(ctx->ac.i32, 12, false),
|
||||
LLVMConstInt(ctx->ac.i32, 9, false),
|
||||
false);
|
||||
@@ -2079,7 +2079,7 @@ static LLVMValueRef ngg_get_vtx_cnt(struct radv_shader_context *ctx)
|
||||
|
||||
static LLVMValueRef ngg_get_prim_cnt(struct radv_shader_context *ctx)
|
||||
{
|
||||
return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info),
|
||||
return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
|
||||
LLVMConstInt(ctx->ac.i32, 22, false),
|
||||
LLVMConstInt(ctx->ac.i32, 9, false),
|
||||
false);
|
||||
@@ -2087,7 +2087,7 @@ static LLVMValueRef ngg_get_prim_cnt(struct radv_shader_context *ctx)
|
||||
|
||||
static LLVMValueRef ngg_get_ordered_id(struct radv_shader_context *ctx)
|
||||
{
|
||||
return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info),
|
||||
return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
|
||||
ctx->ac.i32_0,
|
||||
LLVMConstInt(ctx->ac.i32, 12, false),
|
||||
false);
|
||||
@@ -2709,17 +2709,17 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx)
|
||||
ctx->stage == MESA_SHADER_TESS_EVAL) && !ctx->args->is_gs_copy_shader);
|
||||
|
||||
LLVMValueRef prims_in_wave = ac_unpack_param(&ctx->ac,
|
||||
ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 8, 8);
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 8, 8);
|
||||
LLVMValueRef vtx_in_wave = ac_unpack_param(&ctx->ac,
|
||||
ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 0, 8);
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 0, 8);
|
||||
LLVMValueRef is_gs_thread = LLVMBuildICmp(builder, LLVMIntULT,
|
||||
ac_get_thread_id(&ctx->ac), prims_in_wave, "");
|
||||
LLVMValueRef is_es_thread = LLVMBuildICmp(builder, LLVMIntULT,
|
||||
ac_get_thread_id(&ctx->ac), vtx_in_wave, "");
|
||||
LLVMValueRef vtxindex[] = {
|
||||
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[0]), 0, 16),
|
||||
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[0]), 16, 16),
|
||||
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[2]), 0, 16),
|
||||
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 0, 16),
|
||||
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 16, 16),
|
||||
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[2]), 0, 16),
|
||||
};
|
||||
|
||||
/* Determine the number of vertices per primitive. */
|
||||
@@ -2802,7 +2802,7 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx)
|
||||
struct ac_ngg_prim prim = {0};
|
||||
|
||||
if (ctx->args->options->key.vs_common_out.as_ngg_passthrough) {
|
||||
prim.passthrough = ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[0]);
|
||||
prim.passthrough = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]);
|
||||
} else {
|
||||
prim.num_vertices = num_vertices;
|
||||
prim.isnull = ctx->ac.i1false;
|
||||
@@ -3390,7 +3390,7 @@ write_tess_factors(struct radv_shader_context *ctx)
|
||||
|
||||
|
||||
buffer = ctx->hs_ring_tess_factor;
|
||||
tf_base = ac_get_arg(&ctx->ac, ctx->args->tess_factor_offset);
|
||||
tf_base = ac_get_arg(&ctx->ac, ctx->args->ac.tcs_factor_offset);
|
||||
byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
|
||||
LLVMConstInt(ctx->ac.i32, 4 * stride, false), "");
|
||||
unsigned tf_offset = 0;
|
||||
@@ -3432,7 +3432,7 @@ write_tess_factors(struct radv_shader_context *ctx)
|
||||
|
||||
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
|
||||
outer_comps, tf_outer_offset,
|
||||
ac_get_arg(&ctx->ac, ctx->args->oc_lds),
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset),
|
||||
0, ac_glc);
|
||||
if (inner_comps) {
|
||||
tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
|
||||
@@ -3442,7 +3442,7 @@ write_tess_factors(struct radv_shader_context *ctx)
|
||||
ac_build_gather_values(&ctx->ac, inner, inner_comps);
|
||||
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
|
||||
inner_comps, tf_inner_offset,
|
||||
ac_get_arg(&ctx->ac, ctx->args->oc_lds),
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.tess_offchip_offset),
|
||||
0, ac_glc);
|
||||
}
|
||||
}
|
||||
@@ -3757,15 +3757,15 @@ radv_nir_get_max_workgroup_size(enum chip_class chip_class,
|
||||
static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx)
|
||||
{
|
||||
LLVMValueRef count =
|
||||
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 8, 8);
|
||||
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 8, 8);
|
||||
LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count,
|
||||
ctx->ac.i32_0, "");
|
||||
ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty,
|
||||
ac_get_arg(&ctx->ac, ctx->args->rel_auto_id),
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.vs_rel_patch_id),
|
||||
ctx->abi.instance_id, "");
|
||||
ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty,
|
||||
ctx->vs_rel_patch_id = LLVMBuildSelect(ctx->ac.builder, hs_empty,
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.tcs_rel_ids),
|
||||
ctx->rel_auto_id,
|
||||
ctx->vs_rel_patch_id,
|
||||
"");
|
||||
ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty,
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.tcs_patch_id),
|
||||
@@ -3778,17 +3778,17 @@ static void prepare_gs_input_vgprs(struct radv_shader_context *ctx, bool merged)
|
||||
for(int i = 5; i >= 0; --i) {
|
||||
ctx->gs_vtx_offset[i] =
|
||||
ac_unpack_param(&ctx->ac,
|
||||
ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[i & ~1]),
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i & ~1]),
|
||||
(i & 1) * 16, 16);
|
||||
}
|
||||
|
||||
ctx->gs_wave_id = ac_unpack_param(&ctx->ac,
|
||||
ac_get_arg(&ctx->ac, ctx->args->merged_wave_info),
|
||||
ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info),
|
||||
16, 8);
|
||||
} else {
|
||||
for (int i = 0; i < 6; i++)
|
||||
ctx->gs_vtx_offset[i] = ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[i]);
|
||||
ctx->gs_wave_id = ac_get_arg(&ctx->ac, ctx->args->gs_wave_id);
|
||||
ctx->gs_vtx_offset[i] = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i]);
|
||||
ctx->gs_wave_id = ac_get_arg(&ctx->ac, ctx->args->ac.gs_wave_id);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3866,8 +3866,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
||||
|
||||
if (args->ac.vertex_id.used)
|
||||
ctx.abi.vertex_id = ac_get_arg(&ctx.ac, args->ac.vertex_id);
|
||||
if (args->rel_auto_id.used)
|
||||
ctx.rel_auto_id = ac_get_arg(&ctx.ac, args->rel_auto_id);
|
||||
if (args->ac.vs_rel_patch_id.used)
|
||||
ctx.vs_rel_patch_id = ac_get_arg(&ctx.ac, args->ac.vs_rel_patch_id);
|
||||
if (args->ac.instance_id.used)
|
||||
ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id);
|
||||
|
||||
@@ -4016,7 +4016,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
|
||||
|
||||
LLVMValueRef count =
|
||||
ac_unpack_param(&ctx.ac,
|
||||
ac_get_arg(&ctx.ac, args->merged_wave_info),
|
||||
ac_get_arg(&ctx.ac, args->ac.merged_wave_info),
|
||||
8 * shader_idx, 8);
|
||||
LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
|
||||
LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT,
|
||||
@@ -4215,7 +4215,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
|
||||
stream_id =
|
||||
ac_unpack_param(&ctx->ac,
|
||||
ac_get_arg(&ctx->ac,
|
||||
ctx->args->streamout_config),
|
||||
ctx->args->ac.streamout_config),
|
||||
24, 2);
|
||||
} else {
|
||||
stream_id = ctx->ac.i32_0;
|
||||
|
@@ -279,7 +279,7 @@ declare_vs_specific_input_sgprs(struct radv_shader_args *args,
|
||||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
|
||||
if (args->shader_info->vs.has_vertex_buffers) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
|
||||
&args->vertex_buffers);
|
||||
&args->ac.vertex_buffers);
|
||||
}
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
|
||||
@@ -295,7 +295,7 @@ declare_vs_input_vgprs(struct radv_shader_args *args)
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
|
||||
if (!args->is_gs_copy_shader) {
|
||||
if (args->options->key.vs_common_out.as_ls) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->rel_auto_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
|
||||
if (args->options->chip_class >= GFX10) {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
|
||||
@@ -311,12 +311,12 @@ declare_vs_input_vgprs(struct radv_shader_args *args)
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
|
||||
} else {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
|
||||
}
|
||||
} else {
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->vs_prim_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
|
||||
}
|
||||
}
|
||||
@@ -339,8 +339,8 @@ declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
|
||||
assert(stage == MESA_SHADER_VERTEX ||
|
||||
stage == MESA_SHADER_TESS_EVAL);
|
||||
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_config);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_write_idx);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
|
||||
} else if (stage == MESA_SHADER_TESS_EVAL) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
}
|
||||
@@ -350,16 +350,16 @@ declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
|
||||
if (!args->shader_info->so.strides[i])
|
||||
continue;
|
||||
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->streamout_offset[i]);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
declare_tes_input_vgprs(struct radv_shader_args *args)
|
||||
{
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_u);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->tes_v);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->tes_rel_patch_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
|
||||
}
|
||||
|
||||
@@ -482,7 +482,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
|
||||
|
||||
if (args->options->explicit_scratch_args) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->scratch_offset);
|
||||
&args->ac.scratch_offset);
|
||||
}
|
||||
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT,
|
||||
@@ -501,7 +501,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
|
||||
|
||||
if (args->options->key.vs_common_out.as_es) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->es2gs_offset);
|
||||
&args->ac.es2gs_offset);
|
||||
} else if (args->options->key.vs_common_out.as_ls) {
|
||||
/* no extra parameters */
|
||||
} else {
|
||||
@@ -510,7 +510,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
|
||||
|
||||
if (args->options->explicit_scratch_args) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->scratch_offset);
|
||||
&args->ac.scratch_offset);
|
||||
}
|
||||
|
||||
declare_vs_input_vgprs(args);
|
||||
@@ -518,13 +518,13 @@ radv_declare_shader_args(struct radv_shader_args *args,
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
if (has_previous_stage) {
|
||||
// First 6 system regs
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->merged_wave_info);
|
||||
&args->ac.merged_wave_info);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->tess_factor_offset);
|
||||
&args->ac.tcs_factor_offset);
|
||||
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
|
||||
|
||||
@@ -553,12 +553,12 @@ radv_declare_shader_args(struct radv_shader_args *args,
|
||||
&args->ac.view_index);
|
||||
}
|
||||
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->tess_factor_offset);
|
||||
&args->ac.tcs_factor_offset);
|
||||
if (args->options->explicit_scratch_args) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->scratch_offset);
|
||||
&args->ac.scratch_offset);
|
||||
}
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->ac.tcs_patch_id);
|
||||
@@ -574,17 +574,17 @@ radv_declare_shader_args(struct radv_shader_args *args,
|
||||
&args->ac.view_index);
|
||||
|
||||
if (args->options->key.vs_common_out.as_es) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->es2gs_offset);
|
||||
&args->ac.es2gs_offset);
|
||||
} else {
|
||||
declare_streamout_sgprs(args, stage);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
|
||||
}
|
||||
if (args->options->explicit_scratch_args) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->scratch_offset);
|
||||
&args->ac.scratch_offset);
|
||||
}
|
||||
declare_tes_input_vgprs(args);
|
||||
break;
|
||||
@@ -593,17 +593,17 @@ radv_declare_shader_args(struct radv_shader_args *args,
|
||||
// First 6 system regs
|
||||
if (args->options->key.vs_common_out.as_ngg) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->gs_tg_info);
|
||||
&args->ac.gs_tg_info);
|
||||
} else {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->gs2vs_offset);
|
||||
&args->ac.gs2vs_offset);
|
||||
}
|
||||
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->merged_wave_info);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
|
||||
&args->ac.merged_wave_info);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
|
||||
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
|
||||
|
||||
@@ -626,15 +626,15 @@ radv_declare_shader_args(struct radv_shader_args *args,
|
||||
}
|
||||
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->gs_vtx_offset[0]);
|
||||
&args->ac.gs_vtx_offset[0]);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->gs_vtx_offset[2]);
|
||||
&args->ac.gs_vtx_offset[2]);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->ac.gs_prim_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->ac.gs_invocation_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->gs_vtx_offset[4]);
|
||||
&args->ac.gs_vtx_offset[4]);
|
||||
|
||||
if (previous_stage == MESA_SHADER_VERTEX) {
|
||||
declare_vs_input_vgprs(args);
|
||||
@@ -649,26 +649,26 @@ radv_declare_shader_args(struct radv_shader_args *args,
|
||||
&args->ac.view_index);
|
||||
}
|
||||
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
|
||||
if (args->options->explicit_scratch_args) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->scratch_offset);
|
||||
&args->ac.scratch_offset);
|
||||
}
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->gs_vtx_offset[0]);
|
||||
&args->ac.gs_vtx_offset[0]);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->gs_vtx_offset[1]);
|
||||
&args->ac.gs_vtx_offset[1]);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->ac.gs_prim_id);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->gs_vtx_offset[2]);
|
||||
&args->ac.gs_vtx_offset[2]);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->gs_vtx_offset[3]);
|
||||
&args->ac.gs_vtx_offset[3]);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->gs_vtx_offset[4]);
|
||||
&args->ac.gs_vtx_offset[4]);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->gs_vtx_offset[5]);
|
||||
&args->ac.gs_vtx_offset[5]);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&args->ac.gs_invocation_id);
|
||||
}
|
||||
@@ -679,7 +679,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
|
||||
if (args->options->explicit_scratch_args) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
&args->scratch_offset);
|
||||
&args->ac.scratch_offset);
|
||||
}
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
|
||||
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
|
||||
|
@@ -34,36 +34,9 @@ struct radv_shader_args {
|
||||
|
||||
struct ac_arg descriptor_sets[MAX_SETS];
|
||||
struct ac_arg ring_offsets;
|
||||
struct ac_arg scratch_offset;
|
||||
|
||||
struct ac_arg vertex_buffers;
|
||||
struct ac_arg rel_auto_id;
|
||||
struct ac_arg vs_prim_id;
|
||||
struct ac_arg es2gs_offset;
|
||||
|
||||
struct ac_arg oc_lds;
|
||||
struct ac_arg merged_wave_info;
|
||||
struct ac_arg tess_factor_offset;
|
||||
struct ac_arg tes_rel_patch_id;
|
||||
struct ac_arg tes_u;
|
||||
struct ac_arg tes_v;
|
||||
|
||||
/* HW GS */
|
||||
/* On gfx10:
|
||||
* - bits 0..11: ordered_wave_id
|
||||
* - bits 12..20: number of vertices in group
|
||||
* - bits 22..30: number of primitives in group
|
||||
*/
|
||||
struct ac_arg gs_tg_info;
|
||||
struct ac_arg gs2vs_offset;
|
||||
struct ac_arg gs_wave_id;
|
||||
struct ac_arg gs_vtx_offset[6];
|
||||
|
||||
/* Streamout */
|
||||
struct ac_arg streamout_buffers;
|
||||
struct ac_arg streamout_write_idx;
|
||||
struct ac_arg streamout_config;
|
||||
struct ac_arg streamout_offset[4];
|
||||
|
||||
/* NGG GS */
|
||||
struct ac_arg ngg_gs_state;
|
||||
|
@@ -30,12 +30,12 @@
|
||||
|
||||
static LLVMValueRef get_wave_id_in_tg(struct si_shader_context *ctx)
|
||||
{
|
||||
return si_unpack_param(ctx, ctx->merged_wave_info, 24, 4);
|
||||
return si_unpack_param(ctx, ctx->args.merged_wave_info, 24, 4);
|
||||
}
|
||||
|
||||
static LLVMValueRef get_tgsize(struct si_shader_context *ctx)
|
||||
{
|
||||
return si_unpack_param(ctx, ctx->merged_wave_info, 28, 4);
|
||||
return si_unpack_param(ctx, ctx->args.merged_wave_info, 28, 4);
|
||||
}
|
||||
|
||||
static LLVMValueRef get_thread_id_in_tg(struct si_shader_context *ctx)
|
||||
@@ -49,17 +49,17 @@ static LLVMValueRef get_thread_id_in_tg(struct si_shader_context *ctx)
|
||||
|
||||
static LLVMValueRef ngg_get_vtx_cnt(struct si_shader_context *ctx)
|
||||
{
|
||||
return si_unpack_param(ctx, ctx->gs_tg_info, 12, 9);
|
||||
return si_unpack_param(ctx, ctx->args.gs_tg_info, 12, 9);
|
||||
}
|
||||
|
||||
static LLVMValueRef ngg_get_prim_cnt(struct si_shader_context *ctx)
|
||||
{
|
||||
return si_unpack_param(ctx, ctx->gs_tg_info, 22, 9);
|
||||
return si_unpack_param(ctx, ctx->args.gs_tg_info, 22, 9);
|
||||
}
|
||||
|
||||
static LLVMValueRef ngg_get_ordered_id(struct si_shader_context *ctx)
|
||||
{
|
||||
return si_unpack_param(ctx, ctx->gs_tg_info, 0, 12);
|
||||
return si_unpack_param(ctx, ctx->args.gs_tg_info, 0, 12);
|
||||
}
|
||||
|
||||
static LLVMValueRef ngg_get_query_buf(struct si_shader_context *ctx)
|
||||
@@ -1011,11 +1011,11 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
|
||||
}
|
||||
} else {
|
||||
assert(ctx->stage == MESA_SHADER_TESS_EVAL);
|
||||
LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->tes_u)),
|
||||
LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tes_u)),
|
||||
ac_build_gep0(&ctx->ac, new_vtx, LLVMConstInt(ctx->ac.i32, lds_tes_u, 0)));
|
||||
LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->tes_v)),
|
||||
LLVMBuildStore(builder, ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tes_v)),
|
||||
ac_build_gep0(&ctx->ac, new_vtx, LLVMConstInt(ctx->ac.i32, lds_tes_v, 0)));
|
||||
LLVMBuildStore(builder, LLVMBuildTrunc(builder, ac_get_arg(&ctx->ac, ctx->tes_rel_patch_id), ctx->ac.i8, ""),
|
||||
LLVMBuildStore(builder, LLVMBuildTrunc(builder, ac_get_arg(&ctx->ac, ctx->args.tes_rel_patch_id), ctx->ac.i8, ""),
|
||||
si_build_gep_i8(ctx, new_vtx, lds_byte2_tes_rel_patch_id));
|
||||
if (uses_tes_prim_id) {
|
||||
LLVMBuildStore(
|
||||
@@ -1048,8 +1048,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
|
||||
ngg_get_prim_cnt(ctx));
|
||||
|
||||
/* Update thread counts in SGPRs. */
|
||||
LLVMValueRef new_gs_tg_info = ac_get_arg(&ctx->ac, ctx->gs_tg_info);
|
||||
LLVMValueRef new_merged_wave_info = ac_get_arg(&ctx->ac, ctx->merged_wave_info);
|
||||
LLVMValueRef new_gs_tg_info = ac_get_arg(&ctx->ac, ctx->args.gs_tg_info);
|
||||
LLVMValueRef new_merged_wave_info = ac_get_arg(&ctx->ac, ctx->args.merged_wave_info);
|
||||
|
||||
/* This also converts the thread count from the total count to the per-wave count. */
|
||||
update_thread_counts(ctx, &new_num_es_threads, &new_gs_tg_info, 9, 12, &new_merged_wave_info, 8,
|
||||
@@ -1128,7 +1128,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
|
||||
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, new_gs_tg_info, 2, "");
|
||||
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, new_merged_wave_info, 3, "");
|
||||
if (ctx->stage == MESA_SHADER_TESS_EVAL)
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 4);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, 4);
|
||||
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images,
|
||||
@@ -1142,7 +1142,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args.base_vertex, 8 + SI_SGPR_BASE_VERTEX);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args.draw_id, 8 + SI_SGPR_DRAWID);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args.start_instance, 8 + SI_SGPR_START_INSTANCE);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->vertex_buffers, 8 + SI_VS_NUM_USER_SGPR);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args.vertex_buffers, 8 + SI_VS_NUM_USER_SGPR);
|
||||
|
||||
for (unsigned i = 0; i < shader->selector->num_vbos_in_user_sgprs; i++) {
|
||||
ret = si_insert_input_v4i32(ctx, ret, ctx->vb_descriptors[i],
|
||||
|
@@ -174,8 +174,8 @@ static void declare_streamout_params(struct si_shader_context *ctx,
|
||||
|
||||
/* Streamout SGPRs. */
|
||||
if (so->num_outputs) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->streamout_config);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->streamout_write_index);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.streamout_config);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.streamout_write_index);
|
||||
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
}
|
||||
@@ -185,7 +185,7 @@ static void declare_streamout_params(struct si_shader_context *ctx,
|
||||
if (!so->stride[i])
|
||||
continue;
|
||||
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->streamout_offset[i]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.streamout_offset[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -269,7 +269,7 @@ static void declare_vs_specific_input_sgprs(struct si_shader_context *ctx)
|
||||
|
||||
static void declare_vb_descriptor_input_sgprs(struct si_shader_context *ctx)
|
||||
{
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->vertex_buffers);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->args.vertex_buffers);
|
||||
|
||||
unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs;
|
||||
if (num_vbos_in_user_sgprs) {
|
||||
@@ -295,7 +295,7 @@ static void declare_vs_input_vgprs(struct si_shader_context *ctx, unsigned *num_
|
||||
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vertex_id);
|
||||
if (shader->key.as_ls) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->rel_auto_id);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vs_rel_patch_id);
|
||||
if (ctx->screen->info.chip_class >= GFX10) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.instance_id);
|
||||
@@ -306,11 +306,11 @@ static void declare_vs_input_vgprs(struct si_shader_context *ctx, unsigned *num_
|
||||
} else if (ctx->screen->info.chip_class >= GFX10) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user VGPR */
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT,
|
||||
&ctx->vs_prim_id); /* user vgpr or PrimID (legacy) */
|
||||
&ctx->args.vs_prim_id); /* user vgpr or PrimID (legacy) */
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.instance_id);
|
||||
} else {
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.instance_id);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->vs_prim_id);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.vs_prim_id);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
|
||||
}
|
||||
|
||||
@@ -348,9 +348,9 @@ static void declare_vs_blit_inputs(struct si_shader_context *ctx, unsigned vs_bl
|
||||
|
||||
static void declare_tes_input_vgprs(struct si_shader_context *ctx)
|
||||
{
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->tes_u);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->tes_v);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->tes_rel_patch_id);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.tes_u);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &ctx->args.tes_v);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tes_rel_patch_id);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tes_patch_id);
|
||||
}
|
||||
|
||||
@@ -403,7 +403,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
||||
declare_vb_descriptor_input_sgprs(ctx);
|
||||
|
||||
if (shader->key.as_es) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->es2gs_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset);
|
||||
} else if (shader->key.as_ls) {
|
||||
/* no extra parameters */
|
||||
} else {
|
||||
@@ -428,8 +428,8 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_offsets);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset);
|
||||
|
||||
/* VGPRs */
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.tcs_patch_id);
|
||||
@@ -448,10 +448,10 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
||||
/* Merged stages have 8 system SGPRs at the beginning. */
|
||||
/* SPI_SHADER_USER_DATA_ADDR_LO/HI_HS */
|
||||
declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_TESS_CTRL);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_wave_info);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_scratch_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.merged_wave_info);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.scratch_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
|
||||
|
||||
@@ -511,13 +511,13 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
||||
declare_per_stage_desc_pointers(ctx, ctx->stage == MESA_SHADER_GEOMETRY);
|
||||
|
||||
if (ctx->shader->key.as_ngg)
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs_tg_info);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs_tg_info);
|
||||
else
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs2vs_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs2vs_offset);
|
||||
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_wave_info);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->merged_scratch_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.merged_wave_info);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.scratch_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
|
||||
&ctx->small_prim_cull_info); /* SPI_SHADER_PGM_LO_GS << 8 */
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
|
||||
@@ -599,12 +599,12 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr);
|
||||
|
||||
if (shader->key.as_es) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->es2gs_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset);
|
||||
} else {
|
||||
declare_streamout_params(ctx, &shader->selector->so);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
|
||||
}
|
||||
|
||||
/* VGPRs */
|
||||
@@ -614,17 +614,17 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
declare_global_desc_pointers(ctx);
|
||||
declare_per_stage_desc_pointers(ctx, true);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs2vs_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->gs_wave_id);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs2vs_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.gs_wave_id);
|
||||
|
||||
/* VGPRs */
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[0]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[1]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[0]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[1]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_prim_id);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[2]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[3]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[4]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->gs_vtx_offset[5]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[2]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[3]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[4]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[5]);
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_invocation_id);
|
||||
break;
|
||||
|
||||
|
@@ -80,15 +80,9 @@ struct si_shader_context {
|
||||
|
||||
struct ac_arg rw_buffers;
|
||||
struct ac_arg bindless_samplers_and_images;
|
||||
/* Common inputs for merged shaders. */
|
||||
struct ac_arg merged_wave_info;
|
||||
struct ac_arg merged_scratch_offset;
|
||||
struct ac_arg small_prim_cull_info;
|
||||
/* API VS */
|
||||
struct ac_arg vertex_buffers;
|
||||
struct ac_arg vb_descriptors[5];
|
||||
struct ac_arg rel_auto_id;
|
||||
struct ac_arg vs_prim_id;
|
||||
struct ac_arg vertex_index0;
|
||||
/* VS states and layout of LS outputs / TCS inputs at the end
|
||||
* [0] = clamp vertex color
|
||||
@@ -110,10 +104,6 @@ struct si_shader_context {
|
||||
*/
|
||||
struct ac_arg vs_state_bits;
|
||||
struct ac_arg vs_blit_inputs;
|
||||
/* HW VS */
|
||||
struct ac_arg streamout_config;
|
||||
struct ac_arg streamout_write_index;
|
||||
struct ac_arg streamout_offset[4];
|
||||
|
||||
/* API TCS & TES */
|
||||
/* Layout of TCS outputs in the offchip buffer
|
||||
@@ -141,27 +131,10 @@ struct si_shader_context {
|
||||
* [19:31] = high 13 bits of the 32-bit address of tessellation ring buffers
|
||||
*/
|
||||
struct ac_arg tcs_out_lds_layout;
|
||||
struct ac_arg tcs_offchip_offset;
|
||||
struct ac_arg tcs_factor_offset;
|
||||
|
||||
/* API TES */
|
||||
struct ac_arg tes_offchip_addr;
|
||||
struct ac_arg tes_u;
|
||||
struct ac_arg tes_v;
|
||||
struct ac_arg tes_rel_patch_id;
|
||||
/* HW ES */
|
||||
struct ac_arg es2gs_offset;
|
||||
/* HW GS */
|
||||
/* On gfx10:
|
||||
* - bits 0..11: ordered_wave_id
|
||||
* - bits 12..20: number of vertices in group
|
||||
* - bits 22..30: number of primitives in group
|
||||
*/
|
||||
struct ac_arg gs_tg_info;
|
||||
/* API GS */
|
||||
struct ac_arg gs2vs_offset;
|
||||
struct ac_arg gs_wave_id; /* GFX6 */
|
||||
struct ac_arg gs_vtx_offset[6]; /* in dwords (GFX6) */
|
||||
struct ac_arg gs_vtx01_offset; /* in dwords (GFX9) */
|
||||
struct ac_arg gs_vtx23_offset; /* in dwords (GFX9) */
|
||||
struct ac_arg gs_vtx45_offset; /* in dwords (GFX9) */
|
||||
|
@@ -393,7 +393,7 @@ LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle
|
||||
|
||||
switch (ctx->stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
return ac_get_arg(&ctx->ac, ctx->vs_prim_id);
|
||||
return ac_get_arg(&ctx->ac, ctx->args.vs_prim_id);
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
return ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id);
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
@@ -930,7 +930,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
||||
(ctx->stage == MESA_SHADER_TESS_EVAL ||
|
||||
(ctx->stage == MESA_SHADER_VERTEX &&
|
||||
!si_vs_needs_prolog(sel, &shader->key.part.vs.prolog, &shader->key, ngg_cull_shader)))) {
|
||||
si_init_exec_from_input(ctx, ctx->merged_wave_info, 0);
|
||||
si_init_exec_from_input(ctx, ctx->args.merged_wave_info, 0);
|
||||
} else if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_GEOMETRY ||
|
||||
(shader->key.as_ngg && !shader->key.as_es)) {
|
||||
LLVMValueRef thread_enabled = NULL;
|
||||
|
@@ -31,14 +31,14 @@ LLVMValueRef si_is_es_thread(struct si_shader_context *ctx)
|
||||
{
|
||||
/* Return true if the current thread should execute an ES thread. */
|
||||
return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac),
|
||||
si_unpack_param(ctx, ctx->merged_wave_info, 0, 8), "");
|
||||
si_unpack_param(ctx, ctx->args.merged_wave_info, 0, 8), "");
|
||||
}
|
||||
|
||||
LLVMValueRef si_is_gs_thread(struct si_shader_context *ctx)
|
||||
{
|
||||
/* Return true if the current thread should execute a GS thread. */
|
||||
return LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, ac_get_thread_id(&ctx->ac),
|
||||
si_unpack_param(ctx, ctx->merged_wave_info, 8, 8), "");
|
||||
si_unpack_param(ctx, ctx->args.merged_wave_info, 8, 8), "");
|
||||
}
|
||||
|
||||
static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned input_index,
|
||||
@@ -84,7 +84,7 @@ static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned in
|
||||
|
||||
/* GFX6: input load from the ESGS ring in memory. */
|
||||
/* Get the vertex offset parameter on GFX6. */
|
||||
LLVMValueRef gs_vtx_offset = ac_get_arg(&ctx->ac, ctx->gs_vtx_offset[vtx_offset_param]);
|
||||
LLVMValueRef gs_vtx_offset = ac_get_arg(&ctx->ac, ctx->args.gs_vtx_offset[vtx_offset_param]);
|
||||
|
||||
vtx_offset = LLVMBuildMul(ctx->ac.builder, gs_vtx_offset, LLVMConstInt(ctx->ac.i32, 4, 0), "");
|
||||
|
||||
@@ -119,11 +119,11 @@ static void si_set_es_return_value_for_gs(struct si_shader_context *ctx)
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
|
||||
if (ctx->shader->key.as_ngg)
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->gs_tg_info, 2);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->args.gs_tg_info, 2);
|
||||
else
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->gs2vs_offset, 2);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.gs2vs_offset, 2);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.merged_wave_info, 3);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.scratch_offset, 5);
|
||||
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images,
|
||||
@@ -158,7 +158,7 @@ void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, L
|
||||
if (ctx->screen->info.chip_class >= GFX9 && info->num_outputs) {
|
||||
unsigned itemsize_dw = es->selector->esgs_itemsize / 4;
|
||||
LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
|
||||
LLVMValueRef wave_idx = si_unpack_param(ctx, ctx->merged_wave_info, 24, 4);
|
||||
LLVMValueRef wave_idx = si_unpack_param(ctx, ctx->args.merged_wave_info, 24, 4);
|
||||
vertex_idx =
|
||||
LLVMBuildOr(ctx->ac.builder, vertex_idx,
|
||||
LLVMBuildMul(ctx->ac.builder, wave_idx,
|
||||
@@ -193,7 +193,7 @@ void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, L
|
||||
}
|
||||
|
||||
ac_build_buffer_store_dword(&ctx->ac, ctx->esgs_ring, out_val, 1, NULL,
|
||||
ac_get_arg(&ctx->ac, ctx->es2gs_offset),
|
||||
ac_get_arg(&ctx->ac, ctx->args.es2gs_offset),
|
||||
(4 * param + chan) * 4, ac_glc | ac_slc | ac_swizzled);
|
||||
}
|
||||
}
|
||||
@@ -205,9 +205,9 @@ void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, L
|
||||
static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
|
||||
{
|
||||
if (ctx->screen->info.chip_class >= GFX9)
|
||||
return si_unpack_param(ctx, ctx->merged_wave_info, 16, 8);
|
||||
return si_unpack_param(ctx, ctx->args.merged_wave_info, 16, 8);
|
||||
else
|
||||
return ac_get_arg(&ctx->ac, ctx->gs_wave_id);
|
||||
return ac_get_arg(&ctx->ac, ctx->args.gs_wave_id);
|
||||
}
|
||||
|
||||
static void emit_gs_epilogue(struct si_shader_context *ctx)
|
||||
@@ -249,7 +249,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVM
|
||||
|
||||
struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
struct si_shader *shader = ctx->shader;
|
||||
LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->gs2vs_offset);
|
||||
LLVMValueRef soffset = ac_get_arg(&ctx->ac, ctx->args.gs2vs_offset);
|
||||
LLVMValueRef gs_next_vertex;
|
||||
LLVMValueRef can_emit;
|
||||
unsigned chan, offset;
|
||||
@@ -464,7 +464,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
|
||||
LLVMValueRef stream_id;
|
||||
|
||||
if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs)
|
||||
stream_id = si_unpack_param(&ctx, ctx.streamout_config, 24, 2);
|
||||
stream_id = si_unpack_param(&ctx, ctx.args.streamout_config, 24, 2);
|
||||
else
|
||||
stream_id = ctx.ac.i32_0;
|
||||
|
||||
|
@@ -33,7 +33,7 @@ static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
|
||||
return si_unpack_param(ctx, ctx->args.tcs_rel_ids, 0, 8);
|
||||
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
return ac_get_arg(&ctx->ac, ctx->tes_rel_patch_id);
|
||||
return ac_get_arg(&ctx->ac, ctx->args.tes_rel_patch_id);
|
||||
|
||||
default:
|
||||
assert(0);
|
||||
@@ -454,7 +454,7 @@ static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef
|
||||
semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
|
||||
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == (vertex_index == NULL));
|
||||
|
||||
base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
|
||||
base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
|
||||
|
||||
addr =
|
||||
get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic);
|
||||
@@ -516,7 +516,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
|
||||
|
||||
buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
|
||||
|
||||
base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
|
||||
base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
|
||||
|
||||
addr =
|
||||
get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic);
|
||||
@@ -562,7 +562,8 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
|
||||
static LLVMValueRef si_load_tess_coord(struct ac_shader_abi *abi)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
LLVMValueRef coord[4] = {ac_get_arg(&ctx->ac, ctx->tes_u), ac_get_arg(&ctx->ac, ctx->tes_v),
|
||||
LLVMValueRef coord[4] = {ac_get_arg(&ctx->ac, ctx->args.tes_u),
|
||||
ac_get_arg(&ctx->ac, ctx->args.tes_v),
|
||||
ctx->ac.f32_0, ctx->ac.f32_0};
|
||||
|
||||
/* For triangles, the vector should be (u, v, 1-u-v). */
|
||||
@@ -579,7 +580,7 @@ static LLVMValueRef load_tess_level(struct si_shader_context *ctx, unsigned sema
|
||||
|
||||
int param = si_shader_io_get_unique_index_patch(semantic);
|
||||
|
||||
base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
|
||||
base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
|
||||
addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
|
||||
LLVMConstInt(ctx->ac.i32, param, 0));
|
||||
|
||||
@@ -658,7 +659,7 @@ static void si_copy_tcs_inputs(struct si_shader_context *ctx)
|
||||
|
||||
invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
|
||||
buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
|
||||
buffer_offset = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
|
||||
buffer_offset = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
|
||||
|
||||
lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx);
|
||||
lds_base = get_tcs_in_current_patch_offset(ctx);
|
||||
@@ -779,7 +780,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
|
||||
buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING);
|
||||
|
||||
/* Get the offset. */
|
||||
tf_base = ac_get_arg(&ctx->ac, ctx->tcs_factor_offset);
|
||||
tf_base = ac_get_arg(&ctx->ac, ctx->args.tcs_factor_offset);
|
||||
byteoffset =
|
||||
LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, 0), "");
|
||||
offset = 0;
|
||||
@@ -809,7 +810,7 @@ static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef re
|
||||
unsigned param_outer, param_inner;
|
||||
|
||||
buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
|
||||
base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
|
||||
base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
|
||||
|
||||
param_outer = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER);
|
||||
tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
|
||||
@@ -879,15 +880,15 @@ static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi, unsigned max_ou
|
||||
si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
|
||||
/* Tess offchip and tess factor offsets are at the beginning. */
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, 2);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, 4);
|
||||
vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1;
|
||||
} else {
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, GFX6_SGPR_TCS_OUT_LAYOUT);
|
||||
/* Tess offchip and tess factor offsets are after user SGPRs. */
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, GFX6_TCS_NUM_USER_SGPR);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, GFX6_TCS_NUM_USER_SGPR);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1);
|
||||
vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
|
||||
}
|
||||
|
||||
@@ -925,10 +926,10 @@ static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
|
||||
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, 2);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.merged_wave_info, 3);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, 4);
|
||||
ret = si_insert_input_ret(ctx, ret, ctx->args.scratch_offset, 5);
|
||||
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS);
|
||||
ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images,
|
||||
@@ -956,7 +957,7 @@ void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, L
|
||||
struct si_shader *shader = ctx->shader;
|
||||
struct si_shader_info *info = &shader->selector->info;
|
||||
unsigned i, chan;
|
||||
LLVMValueRef vertex_id = ac_get_arg(&ctx->ac, ctx->rel_auto_id);
|
||||
LLVMValueRef vertex_id = ac_get_arg(&ctx->ac, ctx->args.vs_rel_patch_id);
|
||||
LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx);
|
||||
LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, "");
|
||||
unsigned ret_offset = 8 + GFX9_TCS_NUM_USER_SGPR + 2;
|
||||
@@ -1020,9 +1021,9 @@ void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_par
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* wave info */
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
@@ -1046,8 +1047,8 @@ void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_par
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset);
|
||||
}
|
||||
|
||||
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
|
||||
|
@@ -105,7 +105,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
|
||||
vb_desc = ac_get_arg(&ctx->ac, ctx->vb_descriptors[input_index]);
|
||||
} else {
|
||||
unsigned index = input_index - num_vbos_in_user_sgprs;
|
||||
vb_desc = ac_build_load_to_sgpr(&ctx->ac, ac_get_arg(&ctx->ac, ctx->vertex_buffers),
|
||||
vb_desc = ac_build_load_to_sgpr(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.vertex_buffers),
|
||||
LLVMConstInt(ctx->ac.i32, index, 0));
|
||||
}
|
||||
|
||||
@@ -301,7 +301,7 @@ void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_outp
|
||||
int i;
|
||||
|
||||
/* Get bits [22:16], i.e. (so_param >> 16) & 127; */
|
||||
LLVMValueRef so_vtx_count = si_unpack_param(ctx, ctx->streamout_config, 16, 7);
|
||||
LLVMValueRef so_vtx_count = si_unpack_param(ctx, ctx->args.streamout_config, 16, 7);
|
||||
|
||||
LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
|
||||
|
||||
@@ -319,7 +319,7 @@ void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_outp
|
||||
* attrib_offset
|
||||
*/
|
||||
|
||||
LLVMValueRef so_write_index = ac_get_arg(&ctx->ac, ctx->streamout_write_index);
|
||||
LLVMValueRef so_write_index = ac_get_arg(&ctx->ac, ctx->args.streamout_write_index);
|
||||
|
||||
/* Compute (streamout_write_index + thread_id). */
|
||||
so_write_index = LLVMBuildAdd(builder, so_write_index, tid, "");
|
||||
@@ -338,7 +338,7 @@ void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_outp
|
||||
|
||||
so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
|
||||
|
||||
LLVMValueRef so_offset = ac_get_arg(&ctx->ac, ctx->streamout_offset[i]);
|
||||
LLVMValueRef so_offset = ac_get_arg(&ctx->ac, ctx->args.streamout_offset[i]);
|
||||
so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(ctx->ac.i32, 4, 0), "");
|
||||
|
||||
so_write_offset[i] = ac_build_imad(
|
||||
|
Reference in New Issue
Block a user