ac/nir: lower more loads in ac_nir_lower_intrinsics_to_args instead of drivers

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32782>
This commit is contained in:
Marek Olšák
2024-12-25 19:05:38 -05:00
committed by Marge Bot
parent dc8a40ff3e
commit ae22da2ff8
3 changed files with 56 additions and 111 deletions

View File

@@ -255,6 +255,52 @@ lower_intrinsic_to_arg(nir_builder *b, nir_instr *instr, void *state)
replacement = ac_nir_load_arg(b, s->args, s->args->local_invocation_ids); replacement = ac_nir_load_arg(b, s->args, s->args->local_invocation_ids);
} }
break; break;
case nir_intrinsic_load_merged_wave_info_amd:
replacement = ac_nir_load_arg(b, s->args, s->args->merged_wave_info);
break;
case nir_intrinsic_load_workgroup_num_input_vertices_amd:
replacement = ac_nir_unpack_arg(b, s->args, s->args->gs_tg_info, 12, 9);
break;
case nir_intrinsic_load_workgroup_num_input_primitives_amd:
replacement = ac_nir_unpack_arg(b, s->args, s->args->gs_tg_info, 22, 9);
break;
case nir_intrinsic_load_packed_passthrough_primitive_amd:
/* NGG passthrough mode: the HW already packs the primitive export value to a single register.
*/
replacement = ac_nir_load_arg(b, s->args, s->args->gs_vtx_offset[0]);
break;
case nir_intrinsic_load_ordered_id_amd:
replacement = ac_nir_unpack_arg(b, s->args, s->args->gs_tg_info, 0, 12);
break;
case nir_intrinsic_load_ring_tess_offchip_offset_amd:
replacement = ac_nir_load_arg(b, s->args, s->args->tess_offchip_offset);
break;
case nir_intrinsic_load_ring_tess_factors_offset_amd:
replacement = ac_nir_load_arg(b, s->args, s->args->tcs_factor_offset);
break;
case nir_intrinsic_load_ring_es2gs_offset_amd:
replacement = ac_nir_load_arg(b, s->args, s->args->es2gs_offset);
break;
case nir_intrinsic_load_ring_gs2vs_offset_amd:
replacement = ac_nir_load_arg(b, s->args, s->args->gs2vs_offset);
break;
case nir_intrinsic_load_gs_vertex_offset_amd:
replacement = ac_nir_load_arg(b, s->args, s->args->gs_vtx_offset[nir_intrinsic_base(intrin)]);
break;
case nir_intrinsic_load_streamout_config_amd:
replacement = ac_nir_load_arg(b, s->args, s->args->streamout_config);
break;
case nir_intrinsic_load_streamout_write_index_amd:
replacement = ac_nir_load_arg(b, s->args, s->args->streamout_write_index);
break;
case nir_intrinsic_load_streamout_offset_amd:
replacement = ac_nir_load_arg(b, s->args, s->args->streamout_offset[nir_intrinsic_base(intrin)]);
break;
case nir_intrinsic_load_ring_attr_offset_amd: {
nir_def *ring_attr_offset = ac_nir_load_arg(b, s->args, s->args->gs_attr_offset);
replacement = nir_ishl_imm(b, nir_ubfe_imm(b, ring_attr_offset, 0, 15), 9); /* 512b increments. */
break;
}
case nir_intrinsic_load_first_vertex: case nir_intrinsic_load_first_vertex:
replacement = ac_nir_load_arg(b, s->args, s->args->base_vertex); replacement = ac_nir_load_arg(b, s->args, s->args->base_vertex);
break; break;
@@ -310,6 +356,16 @@ lower_intrinsic_to_arg(nir_builder *b, nir_instr *instr, void *state)
case nir_intrinsic_load_front_face_fsign: case nir_intrinsic_load_front_face_fsign:
replacement = ac_nir_load_arg(b, s->args, s->args->front_face); replacement = ac_nir_load_arg(b, s->args, s->args->front_face);
break; break;
case nir_intrinsic_load_layer_id:
replacement = ac_nir_unpack_arg(b, s->args, s->args->ancillary,
16, s->gfx_level >= GFX12 ? 14 : 13);
break;
case nir_intrinsic_load_barycentric_optimize_amd: {
nir_def *prim_mask = ac_nir_load_arg(b, s->args, s->args->prim_mask);
/* enabled when bit 31 is set */
replacement = nir_ilt_imm(b, prim_mask, 0);
break;
}
case nir_intrinsic_load_barycentric_pixel: case nir_intrinsic_load_barycentric_pixel:
if (nir_intrinsic_interp_mode(intrin) == INTERP_MODE_NOPERSPECTIVE) if (nir_intrinsic_interp_mode(intrin) == INTERP_MODE_NOPERSPECTIVE)
replacement = ac_nir_load_arg(b, s->args, s->args->linear_center); replacement = ac_nir_load_arg(b, s->args, s->args->linear_center);

View File

@@ -73,15 +73,9 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
case nir_intrinsic_load_ring_tess_factors_amd: case nir_intrinsic_load_ring_tess_factors_amd:
replacement = load_ring(b, RING_HS_TESS_FACTOR, s); replacement = load_ring(b, RING_HS_TESS_FACTOR, s);
break; break;
case nir_intrinsic_load_ring_tess_factors_offset_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_factor_offset);
break;
case nir_intrinsic_load_ring_tess_offchip_amd: case nir_intrinsic_load_ring_tess_offchip_amd:
replacement = load_ring(b, RING_HS_TESS_OFFCHIP, s); replacement = load_ring(b, RING_HS_TESS_OFFCHIP, s);
break; break;
case nir_intrinsic_load_ring_tess_offchip_offset_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tess_offchip_offset);
break;
case nir_intrinsic_load_tcs_num_patches_amd: case nir_intrinsic_load_tcs_num_patches_amd:
if (s->info->num_tess_patches) { if (s->info->num_tess_patches) {
replacement = nir_imm_int(b, s->info->num_tess_patches); replacement = nir_imm_int(b, s->info->num_tess_patches);
@@ -114,13 +108,6 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
else else
replacement = s->gsvs_ring[nir_intrinsic_stream_id(intrin)]; replacement = s->gsvs_ring[nir_intrinsic_stream_id(intrin)];
break; break;
case nir_intrinsic_load_ring_gs2vs_offset_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs2vs_offset);
break;
case nir_intrinsic_load_ring_es2gs_offset_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.es2gs_offset);
break;
case nir_intrinsic_load_ring_attr_amd: case nir_intrinsic_load_ring_attr_amd:
replacement = load_ring(b, RING_PS_ATTR, s); replacement = load_ring(b, RING_PS_ATTR, s);
@@ -132,12 +119,6 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
replacement = nir_vector_insert_imm(b, replacement, dword1, 1); replacement = nir_vector_insert_imm(b, replacement, dword1, 1);
break; break;
case nir_intrinsic_load_ring_attr_offset_amd: {
nir_def *ring_attr_offset = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_attr_offset);
replacement = nir_ishl_imm(b, nir_ubfe_imm(b, ring_attr_offset, 0, 15), 9); /* 512b increments. */
break;
}
case nir_intrinsic_load_patch_vertices_in: case nir_intrinsic_load_patch_vertices_in:
if (stage == MESA_SHADER_TESS_CTRL) { if (stage == MESA_SHADER_TESS_CTRL) {
if (s->gfx_state->ts.patch_control_points) { if (s->gfx_state->ts.patch_control_points) {
@@ -156,20 +137,6 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
} else } else
unreachable("invalid tessellation shader stage"); unreachable("invalid tessellation shader stage");
break; break;
case nir_intrinsic_load_gs_vertex_offset_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]);
break;
case nir_intrinsic_load_workgroup_num_input_vertices_amd:
replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 12, 9);
break;
case nir_intrinsic_load_workgroup_num_input_primitives_amd:
replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 22, 9);
break;
case nir_intrinsic_load_packed_passthrough_primitive_amd:
/* NGG passthrough mode: the HW already packs the primitive export value to a single register.
*/
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[0]);
break;
case nir_intrinsic_load_pipeline_stat_query_enabled_amd: case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
replacement = shader_query_bool_setting(b, radv_shader_query_pipeline_stat, s); replacement = shader_query_bool_setting(b, radv_shader_query_pipeline_stat, s);
break; break;
@@ -179,9 +146,6 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
case nir_intrinsic_load_prim_xfb_query_enabled_amd: case nir_intrinsic_load_prim_xfb_query_enabled_amd:
replacement = shader_query_bool_setting(b, radv_shader_query_prim_xfb, s); replacement = shader_query_bool_setting(b, radv_shader_query_prim_xfb, s);
break; break;
case nir_intrinsic_load_merged_wave_info_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.merged_wave_info);
break;
case nir_intrinsic_load_cull_any_enabled_amd: { case nir_intrinsic_load_cull_any_enabled_amd: {
nir_def *gs_tg_info = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info); nir_def *gs_tg_info = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info);
@@ -388,21 +352,12 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, offset), nir_imm_int(b, 0x100)); nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, offset), nir_imm_int(b, 0x100));
break; break;
} }
case nir_intrinsic_load_streamout_config_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_config);
break;
case nir_intrinsic_load_streamout_write_index_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_write_index);
break;
case nir_intrinsic_load_streamout_buffer_amd: { case nir_intrinsic_load_streamout_buffer_amd: {
nir_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers), nir_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers),
nir_imm_int(b, s->address32_hi)); nir_imm_int(b, s->address32_hi));
replacement = nir_load_smem_amd(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16)); replacement = nir_load_smem_amd(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16));
break; break;
} }
case nir_intrinsic_load_streamout_offset_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_offset[nir_intrinsic_base(intrin)]);
break;
case nir_intrinsic_load_xfb_state_address_gfx12_amd: case nir_intrinsic_load_xfb_state_address_gfx12_amd:
replacement = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_state), replacement = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_state),
nir_imm_int(b, s->address32_hi)); nir_imm_int(b, s->address32_hi));
@@ -460,9 +415,6 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
} }
break; break;
} }
case nir_intrinsic_load_ordered_id_amd:
replacement = ac_nir_unpack_arg(b, &s->args->ac, s->args->ac.gs_tg_info, 0, 12);
break;
case nir_intrinsic_load_force_vrs_rates_amd: case nir_intrinsic_load_force_vrs_rates_amd:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.force_vrs_rates); replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.force_vrs_rates);
break; break;
@@ -471,12 +423,6 @@ lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
replacement = nir_ine_imm(b, sample_coverage, 0); replacement = nir_ine_imm(b, sample_coverage, 0);
break; break;
} }
case nir_intrinsic_load_barycentric_optimize_amd: {
nir_def *prim_mask = ac_nir_load_arg(b, &s->args->ac, s->args->ac.prim_mask);
/* enabled when bit 31 is set */
replacement = nir_ilt_imm(b, prim_mask, 0);
break;
}
case nir_intrinsic_load_poly_line_smooth_enabled: { case nir_intrinsic_load_poly_line_smooth_enabled: {
nir_def *line_rast_mode = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE); nir_def *line_rast_mode = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE);
replacement = nir_ieq_imm(b, line_rast_mode, VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH); replacement = nir_ieq_imm(b, line_rast_mode, VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH);

View File

@@ -386,12 +386,6 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
replacement = nir_imul(b, per_vtx_out_patch_size, num_patches); replacement = nir_imul(b, per_vtx_out_patch_size, num_patches);
break; break;
} }
case nir_intrinsic_load_ring_tess_offchip_offset_amd:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.tess_offchip_offset);
break;
case nir_intrinsic_load_ring_es2gs_offset_amd:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.es2gs_offset);
break;
case nir_intrinsic_load_clip_half_line_width_amd: { case nir_intrinsic_load_clip_half_line_width_amd: {
nir_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info); nir_def *addr = ac_nir_load_arg(b, &args->ac, args->small_prim_cull_info);
replacement = nir_load_smem_amd(b, 2, addr, nir_imm_int(b, 32)); replacement = nir_load_smem_amd(b, 2, addr, nir_imm_int(b, 32));
@@ -512,24 +506,6 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
case nir_intrinsic_load_ring_attr_amd: case nir_intrinsic_load_ring_attr_amd:
replacement = build_attr_ring_desc(b, shader, args); replacement = build_attr_ring_desc(b, shader, args);
break; break;
case nir_intrinsic_load_ring_attr_offset_amd: {
nir_def *offset = ac_nir_unpack_arg(b, &args->ac, args->ac.gs_attr_offset, 0, 15);
replacement = nir_ishl_imm(b, offset, 9);
break;
}
case nir_intrinsic_load_ring_gs2vs_offset_amd:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.gs2vs_offset);
break;
case nir_intrinsic_load_streamout_config_amd:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.streamout_config);
break;
case nir_intrinsic_load_streamout_write_index_amd:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.streamout_write_index);
break;
case nir_intrinsic_load_streamout_offset_amd:
replacement =
ac_nir_load_arg(b, &args->ac, args->ac.streamout_offset[nir_intrinsic_base(intrin)]);
break;
case nir_intrinsic_load_force_vrs_rates_amd: case nir_intrinsic_load_force_vrs_rates_amd:
if (sel->screen->info.gfx_level >= GFX11) { if (sel->screen->info.gfx_level >= GFX11) {
/* Bits [2:5] = VRS rate /* Bits [2:5] = VRS rate
@@ -593,9 +569,6 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
replacement = nir_vector_insert_imm(b, s->tess_offchip_ring, addr, 0); replacement = nir_vector_insert_imm(b, s->tess_offchip_ring, addr, 0);
break; break;
} }
case nir_intrinsic_load_ring_tess_factors_offset_amd:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.tcs_factor_offset);
break;
case nir_intrinsic_load_alpha_reference_amd: case nir_intrinsic_load_alpha_reference_amd:
replacement = ac_nir_load_arg(b, &args->ac, args->alpha_reference); replacement = ac_nir_load_arg(b, &args->ac, args->alpha_reference);
break; break;
@@ -608,16 +581,6 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
else else
replacement = nir_imm_float(b, key->ps.opt.force_front_face_input == 1 ? 1.0 : -1.0); replacement = nir_imm_float(b, key->ps.opt.force_front_face_input == 1 ? 1.0 : -1.0);
break; break;
case nir_intrinsic_load_barycentric_optimize_amd: {
nir_def *prim_mask = ac_nir_load_arg(b, &args->ac, args->ac.prim_mask);
/* enabled when bit 31 is set */
replacement = nir_ilt_imm(b, prim_mask, 0);
break;
}
case nir_intrinsic_load_layer_id:
replacement = ac_nir_unpack_arg(b, &args->ac, args->ac.ancillary,
16, sel->screen->info.gfx_level >= GFX12 ? 14 : 13);
break;
case nir_intrinsic_load_color0: case nir_intrinsic_load_color0:
case nir_intrinsic_load_color1: { case nir_intrinsic_load_color1: {
uint32_t colors_read = sel->info.colors_read; uint32_t colors_read = sel->info.colors_read;
@@ -661,20 +624,6 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
case nir_intrinsic_load_poly_line_smooth_enabled: case nir_intrinsic_load_poly_line_smooth_enabled:
replacement = nir_imm_bool(b, key->ps.mono.poly_line_smoothing); replacement = nir_imm_bool(b, key->ps.mono.poly_line_smoothing);
break; break;
case nir_intrinsic_load_gs_vertex_offset_amd: {
unsigned base = nir_intrinsic_base(intrin);
replacement = ac_nir_load_arg(b, &args->ac, args->ac.gs_vtx_offset[base]);
break;
}
case nir_intrinsic_load_merged_wave_info_amd:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.merged_wave_info);
break;
case nir_intrinsic_load_workgroup_num_input_vertices_amd:
replacement = ac_nir_unpack_arg(b, &args->ac, args->ac.gs_tg_info, 12, 9);
break;
case nir_intrinsic_load_workgroup_num_input_primitives_amd:
replacement = ac_nir_unpack_arg(b, &args->ac, args->ac.gs_tg_info, 22, 9);
break;
case nir_intrinsic_load_initial_edgeflags_amd: { case nir_intrinsic_load_initial_edgeflags_amd: {
unsigned output_prim = si_get_output_prim_simplified(sel, &shader->key); unsigned output_prim = si_get_output_prim_simplified(sel, &shader->key);
@@ -705,12 +654,6 @@ static bool lower_intrinsic(nir_builder *b, nir_instr *instr, struct lower_abi_s
} }
break; break;
} }
case nir_intrinsic_load_packed_passthrough_primitive_amd:
replacement = ac_nir_load_arg(b, &args->ac, args->ac.gs_vtx_offset[0]);
break;
case nir_intrinsic_load_ordered_id_amd:
replacement = ac_nir_unpack_arg(b, &args->ac, args->ac.gs_tg_info, 0, 12);
break;
case nir_intrinsic_load_ring_esgs_amd: case nir_intrinsic_load_ring_esgs_amd:
assert(s->esgs_ring); assert(s->esgs_ring);
replacement = s->esgs_ring; replacement = s->esgs_ring;