From 428601095c38bd80f4ed164414f8096edd73832f Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 28 May 2024 15:56:52 +0200 Subject: [PATCH] ac,radeonsi import PM4 state from RadeonSI Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_pm4.c | 371 +++++++++++ src/amd/common/ac_pm4.h | 76 +++ src/amd/common/meson.build | 2 + .../drivers/radeonsi/si_cp_reg_shadowing.c | 6 +- src/gallium/drivers/radeonsi/si_gfx_cs.c | 2 +- src/gallium/drivers/radeonsi/si_pm4.c | 329 +--------- src/gallium/drivers/radeonsi/si_pm4.h | 29 +- src/gallium/drivers/radeonsi/si_state.c | 590 +++++++++--------- .../drivers/radeonsi/si_state_draw.cpp | 6 +- .../drivers/radeonsi/si_state_shaders.cpp | 128 ++-- 10 files changed, 829 insertions(+), 710 deletions(-) create mode 100644 src/amd/common/ac_pm4.c create mode 100644 src/amd/common/ac_pm4.h diff --git a/src/amd/common/ac_pm4.c b/src/amd/common/ac_pm4.c new file mode 100644 index 00000000000..4049d675ae5 --- /dev/null +++ b/src/amd/common/ac_pm4.c @@ -0,0 +1,371 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: MIT + */ + +#include "ac_debug.h" +#include "ac_gpu_info.h" +#include "ac_pm4.h" + +#include "sid.h" + +#include +#include + +static bool +opcode_is_pairs(unsigned opcode) +{ + return opcode == PKT3_SET_CONTEXT_REG_PAIRS || + opcode == PKT3_SET_SH_REG_PAIRS || + opcode == PKT3_SET_UCONFIG_REG_PAIRS; +} + +static bool +opcode_is_pairs_packed(unsigned opcode) +{ + return opcode == PKT3_SET_CONTEXT_REG_PAIRS_PACKED || + opcode == PKT3_SET_SH_REG_PAIRS_PACKED || + opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N; +} + +static unsigned +pairs_packed_opcode_to_regular(unsigned opcode) +{ + switch (opcode) { + case PKT3_SET_CONTEXT_REG_PAIRS_PACKED: + return PKT3_SET_CONTEXT_REG; + case PKT3_SET_SH_REG_PAIRS_PACKED: + return PKT3_SET_SH_REG; + default: + unreachable("invalid packed opcode"); + } +} + +static unsigned +regular_opcode_to_pairs(struct ac_pm4_state *state, unsigned opcode) +{ + const struct radeon_info *info = state->info; + + switch (opcode) { + case PKT3_SET_CONTEXT_REG: + return info->has_set_context_pairs_packed ? PKT3_SET_CONTEXT_REG_PAIRS_PACKED : + info->has_set_context_pairs ? PKT3_SET_CONTEXT_REG_PAIRS : opcode; + case PKT3_SET_SH_REG: + return info->has_set_sh_pairs_packed ? PKT3_SET_SH_REG_PAIRS_PACKED : + info->has_set_sh_pairs ? PKT3_SET_SH_REG_PAIRS : opcode; + case PKT3_SET_UCONFIG_REG: + return info->has_set_uconfig_pairs ? PKT3_SET_UCONFIG_REG_PAIRS : opcode; + } + + return opcode; +} + +static bool +packed_next_is_reg_offset_pair(struct ac_pm4_state *state) +{ + return (state->ndw - state->last_pm4) % 3 == 2; +} + +static bool +packed_next_is_reg_value1(struct ac_pm4_state *state) +{ + return (state->ndw - state->last_pm4) % 3 == 1; +} + +static bool +packed_prev_is_reg_value0(struct ac_pm4_state *state) +{ + return packed_next_is_reg_value1(state); +} + +static unsigned +get_packed_reg_dw_offsetN(struct ac_pm4_state *state, unsigned index) +{ + unsigned i = state->last_pm4 + 2 + (index / 2) * 3; + assert(i < state->ndw); + return (state->pm4[i] >> ((index % 2) * 16)) & 0xffff; +} + +static unsigned +get_packed_reg_valueN_idx(struct ac_pm4_state *state, unsigned index) +{ + unsigned i = state->last_pm4 + 2 + (index / 2) * 3 + 1 + (index % 2); + assert(i < state->ndw); + return i; +} + +static unsigned +get_packed_reg_valueN(struct ac_pm4_state *state, unsigned index) +{ + return state->pm4[get_packed_reg_valueN_idx(state, index)]; +} + +static unsigned +get_packed_reg_count(struct ac_pm4_state *state) +{ + int body_size = state->ndw - state->last_pm4 - 2; + assert(body_size > 0 && body_size % 3 == 0); + return (body_size / 3) * 2; +} + +void +ac_pm4_finalize(struct ac_pm4_state *state) +{ + if (opcode_is_pairs_packed(state->last_opcode)) { + unsigned reg_count = get_packed_reg_count(state); + unsigned reg_dw_offset0 = get_packed_reg_dw_offsetN(state, 0); + + if (state->packed_is_padded) + reg_count--; + + bool all_consecutive = true; + + /* If the whole packed SET packet only sets consecutive registers, rewrite the packet + * to be unpacked to make it shorter. + * + * This also eliminates the invalid scenario when the packed SET packet sets only + * 2 registers and the register offsets are equal due to padding. + */ + for (unsigned i = 1; i < reg_count; i++) { + if (reg_dw_offset0 != get_packed_reg_dw_offsetN(state, i) - i) { + all_consecutive = false; + break; + } + } + + if (all_consecutive) { + assert(state->ndw - state->last_pm4 == 2 + 3 * (reg_count + state->packed_is_padded) / 2); + state->pm4[state->last_pm4] = PKT3(pairs_packed_opcode_to_regular(state->last_opcode), + reg_count, 0); + state->pm4[state->last_pm4 + 1] = reg_dw_offset0; + for (unsigned i = 0; i < reg_count; i++) + state->pm4[state->last_pm4 + 2 + i] = get_packed_reg_valueN(state, i); + state->ndw = state->last_pm4 + 2 + reg_count; + state->last_opcode = PKT3_SET_SH_REG; + } else { + /* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */ + if (state->debug_sqtt && + (state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED || + state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N)) { + if (state->packed_is_padded) + reg_count++; /* Add this back because we only need to record the last write. */ + + for (int i = reg_count - 1; i >= 0; i--) { + unsigned reg_offset = SI_SH_REG_OFFSET + get_packed_reg_dw_offsetN(state, i) * 4; + + if (strstr(ac_get_register_name(state->info->gfx_level, + state->info->family, reg_offset), + "SPI_SHADER_PGM_LO_")) { + state->spi_shader_pgm_lo_reg = reg_offset; + break; + } + } + } + + /* If it's a packed SET_SH packet, use the *_N variant when possible. */ + if (state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED && reg_count <= 14) { + state->pm4[state->last_pm4] &= PKT3_IT_OPCODE_C; + state->pm4[state->last_pm4] |= PKT3_IT_OPCODE_S(PKT3_SET_SH_REG_PAIRS_PACKED_N); + } + } + } + + if (state->debug_sqtt && state->last_opcode == PKT3_SET_SH_REG) { + /* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */ + unsigned reg_count = PKT_COUNT_G(state->pm4[state->last_pm4]); + unsigned reg_base_offset = SI_SH_REG_OFFSET + state->pm4[state->last_pm4 + 1] * 4; + + for (unsigned i = 0; i < reg_count; i++) { + if (strstr(ac_get_register_name(state->info->gfx_level, + state->info->family, reg_base_offset + i * 4), + "SPI_SHADER_PGM_LO_")) { + state->spi_shader_pgm_lo_reg = reg_base_offset + i * 4; + + break; + } + } + } +} + +void +ac_pm4_cmd_begin(struct ac_pm4_state *state, unsigned opcode) +{ + ac_pm4_finalize(state); + + assert(state->max_dw); + assert(state->ndw < state->max_dw); + assert(opcode <= 254); + state->last_opcode = opcode; + state->last_pm4 = state->ndw++; + state->packed_is_padded = false; +} + +void +ac_pm4_cmd_add(struct ac_pm4_state *state, uint32_t dw) +{ + assert(state->max_dw); + assert(state->ndw < state->max_dw); + state->pm4[state->ndw++] = dw; + state->last_opcode = 255; /* invalid opcode */ +} + +void +ac_pm4_cmd_end(struct ac_pm4_state *state, bool predicate) +{ + unsigned count; + count = state->ndw - state->last_pm4 - 2; + /* All SET_*_PAIRS* packets on the gfx queue must set RESET_FILTER_CAM. */ + bool reset_filter_cam = !state->is_compute_queue && + (opcode_is_pairs(state->last_opcode) || + opcode_is_pairs_packed(state->last_opcode)); + + state->pm4[state->last_pm4] = PKT3(state->last_opcode, count, predicate) | + PKT3_RESET_FILTER_CAM_S(reset_filter_cam); + + if (opcode_is_pairs_packed(state->last_opcode)) { + if (packed_prev_is_reg_value0(state)) { + /* Duplicate the first register at the end to make the number of registers aligned to 2. */ + ac_pm4_set_reg_custom(state, get_packed_reg_dw_offsetN(state, 0) * 4, + get_packed_reg_valueN(state, 0), + state->last_opcode, 0); + state->packed_is_padded = true; + } + + state->pm4[state->last_pm4 + 1] = get_packed_reg_count(state); + } +} + +void +ac_pm4_set_reg_custom(struct ac_pm4_state *state, unsigned reg, uint32_t val, + unsigned opcode, unsigned idx) +{ + bool is_packed = opcode_is_pairs_packed(opcode); + reg >>= 2; + + assert(state->max_dw); + assert(state->ndw + 2 <= state->max_dw); + + if (is_packed) { + assert(idx == 0); + + if (opcode != state->last_opcode) { + ac_pm4_cmd_begin(state, opcode); /* reserve space for the header */ + state->ndw++; /* reserve space for the register count, it will be set at the end */ + } + } else if (opcode_is_pairs(opcode)) { + assert(idx == 0); + + if (opcode != state->last_opcode) + ac_pm4_cmd_begin(state, opcode); + + state->pm4[state->ndw++] = reg; + } else if (opcode != state->last_opcode || reg != (state->last_reg + 1) || + idx != state->last_idx) { + ac_pm4_cmd_begin(state, opcode); + state->pm4[state->ndw++] = reg | (idx << 28); + } + + assert(reg <= UINT16_MAX); + state->last_reg = reg; + state->last_idx = idx; + + if (is_packed) { + if (state->packed_is_padded) { + /* The packet is padded, which means the first register is written redundantly again + * at the end. Remove it, so that we can replace it with this register. + */ + state->packed_is_padded = false; + state->ndw--; + } + + if (packed_next_is_reg_offset_pair(state)) { + state->pm4[state->ndw++] = reg; + } else if (packed_next_is_reg_value1(state)) { + /* Set the second register offset in the high 16 bits. */ + state->pm4[state->ndw - 2] &= 0x0000ffff; + state->pm4[state->ndw - 2] |= reg << 16; + } + } + + state->pm4[state->ndw++] = val; + ac_pm4_cmd_end(state, false); +} + +void ac_pm4_set_reg(struct ac_pm4_state *state, unsigned reg, uint32_t val) +{ + unsigned opcode; + + if (reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END) { + opcode = PKT3_SET_CONFIG_REG; + reg -= SI_CONFIG_REG_OFFSET; + + } else if (reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END) { + opcode = PKT3_SET_SH_REG; + reg -= SI_SH_REG_OFFSET; + + } else if (reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END) { + opcode = PKT3_SET_CONTEXT_REG; + reg -= SI_CONTEXT_REG_OFFSET; + + } else if (reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END) { + opcode = PKT3_SET_UCONFIG_REG; + reg -= CIK_UCONFIG_REG_OFFSET; + + } else { + fprintf(stderr, "mesa: Invalid register offset %08x!\n", reg); + return; + } + + opcode = regular_opcode_to_pairs(state, opcode); + + ac_pm4_set_reg_custom(state, reg, val, opcode, 0); +} + +void +ac_pm4_set_reg_idx3(struct ac_pm4_state *state, unsigned reg, uint32_t val) +{ + if (state->info->uses_kernel_cu_mask) { + assert(state->info->gfx_level >= GFX10); + ac_pm4_set_reg_custom(state, reg - SI_SH_REG_OFFSET, val, PKT3_SET_SH_REG_INDEX, 3); + } else { + ac_pm4_set_reg(state, reg, val); + } +} + +void +ac_pm4_clear_state(struct ac_pm4_state *state, const struct radeon_info *info, + bool debug_sqtt, bool is_compute_queue) +{ + state->info = info; + state->debug_sqtt = debug_sqtt; + state->ndw = 0; + state->is_compute_queue = is_compute_queue; + + if (!state->max_dw) + state->max_dw = ARRAY_SIZE(state->pm4); +} + +struct ac_pm4_state * +ac_pm4_create_sized(const struct radeon_info *info, bool debug_sqtt, + unsigned max_dw, bool is_compute_queue) +{ + struct ac_pm4_state *pm4; + unsigned size = sizeof(*pm4) + 4 * (max_dw - ARRAY_SIZE(pm4->pm4)); + + pm4 = (struct ac_pm4_state *)calloc(1, size); + if (pm4) { + pm4->max_dw = max_dw; + ac_pm4_clear_state(pm4, info, debug_sqtt, is_compute_queue); + } + return pm4; +} + +void +ac_pm4_free_state(struct ac_pm4_state *state) +{ + if (!state) + return; + + free(state); +} diff --git a/src/amd/common/ac_pm4.h b/src/amd/common/ac_pm4.h new file mode 100644 index 00000000000..61c068fce91 --- /dev/null +++ b/src/amd/common/ac_pm4.h @@ -0,0 +1,76 @@ +/* + * Copyright 2012 Advanced Micro Devices, Inc. + * + * SPDX-License-Identifier: MIT + */ + +#ifndef AC_PM4_H +#define AC_PM4_H + +#include "ac_gpu_info.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct ac_pm4_state { + const struct radeon_info *info; + + /* PKT3_SET_*_REG handling */ + uint16_t last_reg; /* register offset in dwords */ + uint16_t last_pm4; + uint16_t ndw; /* number of dwords in pm4 */ + uint8_t last_opcode; + uint8_t last_idx; + bool is_compute_queue; + bool packed_is_padded; /* whether SET_*_REG_PAIRS_PACKED is padded to an even number of regs */ + + /* commands for the DE */ + uint16_t max_dw; + + /* Used by SQTT to override the shader address */ + bool debug_sqtt; + uint32_t spi_shader_pgm_lo_reg; + + /* This must be the last field because the array can continue after the structure. */ + uint32_t pm4[64]; +}; + +void +ac_pm4_set_reg(struct ac_pm4_state *state, unsigned reg, uint32_t val); + +void +ac_pm4_set_reg_custom(struct ac_pm4_state *state, unsigned reg, uint32_t val, + unsigned opcode, unsigned idx); + +void +ac_pm4_set_reg_idx3(struct ac_pm4_state *state, unsigned reg, uint32_t val); + +void +ac_pm4_clear_state(struct ac_pm4_state *state, const struct radeon_info *info, + bool debug_sqtt, bool is_compute_queue); + +void +ac_pm4_cmd_begin(struct ac_pm4_state *state, unsigned opcode); + +void +ac_pm4_cmd_add(struct ac_pm4_state *state, uint32_t dw); + +void +ac_pm4_cmd_end(struct ac_pm4_state *state, bool predicate); + +void +ac_pm4_finalize(struct ac_pm4_state *state); + +struct ac_pm4_state * +ac_pm4_create_sized(const struct radeon_info *info, bool debug_sqtt, + unsigned max_dw, bool is_compute_queue); + +void +ac_pm4_free_state(struct ac_pm4_state *state); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/amd/common/meson.build b/src/amd/common/meson.build index 156499ca97e..cebeae803d4 100644 --- a/src/amd/common/meson.build +++ b/src/amd/common/meson.build @@ -115,6 +115,8 @@ amd_common_files = files( 'ac_parse_ib.c', 'ac_perfcounter.c', 'ac_perfcounter.h', + 'ac_pm4.c', + 'ac_pm4.h', 'ac_vcn_av1_default.h', ) diff --git a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c index 23e3d0274bd..751ce9a5bb9 100644 --- a/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c +++ b/src/gallium/drivers/radeonsi/si_cp_reg_shadowing.c @@ -65,7 +65,7 @@ void si_init_cp_reg_shadowing(struct si_context *sctx) struct si_pm4_state *shadowing_preamble = si_pm4_create_sized(sctx->screen, 256, false); ac_create_shadowing_ib_preamble(&sctx->screen->info, - (pm4_cmd_add_fn)si_pm4_cmd_add, shadowing_preamble, + (pm4_cmd_add_fn)ac_pm4_cmd_add, shadowing_preamble, sctx->shadowing.registers->gpu_address, sctx->screen->dpbb_allowed); /* Initialize shadowed registers as follows. */ @@ -95,8 +95,8 @@ void si_init_cp_reg_shadowing(struct si_context *sctx) /* Setup preemption. The shadowing preamble will be executed as a preamble IB, * which will load register values from memory on a context switch. */ - sctx->ws->cs_setup_preemption(&sctx->gfx_cs, shadowing_preamble->pm4, - shadowing_preamble->ndw); + sctx->ws->cs_setup_preemption(&sctx->gfx_cs, shadowing_preamble->base.pm4, + shadowing_preamble->base.ndw); si_pm4_free_state(sctx, shadowing_preamble, ~0); } } diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c index efbd7f13c0c..78dbabe52f5 100644 --- a/src/gallium/drivers/radeonsi/si_gfx_cs.c +++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c @@ -511,7 +511,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs) struct si_pm4_state *preamble = is_secure ? ctx->cs_preamble_state_tmz : ctx->cs_preamble_state; radeon_begin(&ctx->gfx_cs); - radeon_emit_array(preamble->pm4, preamble->ndw); + radeon_emit_array(preamble->base.pm4, preamble->base.ndw); radeon_end(); } diff --git a/src/gallium/drivers/radeonsi/si_pm4.c b/src/gallium/drivers/radeonsi/si_pm4.c index 60ccb55bbfb..858f44c4057 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.c +++ b/src/gallium/drivers/radeonsi/si_pm4.c @@ -11,321 +11,12 @@ #include "util/u_memory.h" #include "ac_debug.h" -static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint32_t val, - unsigned opcode, unsigned idx); - -static bool opcode_is_pairs(unsigned opcode) -{ - return opcode == PKT3_SET_CONTEXT_REG_PAIRS || - opcode == PKT3_SET_SH_REG_PAIRS || - opcode == PKT3_SET_UCONFIG_REG_PAIRS; -} - -static bool opcode_is_pairs_packed(unsigned opcode) -{ - return opcode == PKT3_SET_CONTEXT_REG_PAIRS_PACKED || - opcode == PKT3_SET_SH_REG_PAIRS_PACKED || - opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N; -} - -static unsigned pairs_packed_opcode_to_regular(unsigned opcode) -{ - switch (opcode) { - case PKT3_SET_CONTEXT_REG_PAIRS_PACKED: - return PKT3_SET_CONTEXT_REG; - case PKT3_SET_SH_REG_PAIRS_PACKED: - return PKT3_SET_SH_REG; - default: - unreachable("invalid packed opcode"); - } -} - -static unsigned regular_opcode_to_pairs(struct si_pm4_state *state, unsigned opcode) -{ - const struct radeon_info *info = &state->screen->info; - - switch (opcode) { - case PKT3_SET_CONTEXT_REG: - return info->has_set_context_pairs_packed ? PKT3_SET_CONTEXT_REG_PAIRS_PACKED : - info->has_set_context_pairs ? PKT3_SET_CONTEXT_REG_PAIRS : opcode; - case PKT3_SET_SH_REG: - return info->has_set_sh_pairs_packed ? PKT3_SET_SH_REG_PAIRS_PACKED : - info->has_set_sh_pairs ? PKT3_SET_SH_REG_PAIRS : opcode; - case PKT3_SET_UCONFIG_REG: - return info->has_set_uconfig_pairs ? PKT3_SET_UCONFIG_REG_PAIRS : opcode; - } - - return opcode; -} - -static bool packed_next_is_reg_offset_pair(struct si_pm4_state *state) -{ - return (state->ndw - state->last_pm4) % 3 == 2; -} - -static bool packed_next_is_reg_value1(struct si_pm4_state *state) -{ - return (state->ndw - state->last_pm4) % 3 == 1; -} - -static bool packed_prev_is_reg_value0(struct si_pm4_state *state) -{ - return packed_next_is_reg_value1(state); -} - -static unsigned get_packed_reg_dw_offsetN(struct si_pm4_state *state, unsigned index) -{ - unsigned i = state->last_pm4 + 2 + (index / 2) * 3; - assert(i < state->ndw); - return (state->pm4[i] >> ((index % 2) * 16)) & 0xffff; -} - -static unsigned get_packed_reg_valueN_idx(struct si_pm4_state *state, unsigned index) -{ - unsigned i = state->last_pm4 + 2 + (index / 2) * 3 + 1 + (index % 2); - assert(i < state->ndw); - return i; -} - -static unsigned get_packed_reg_valueN(struct si_pm4_state *state, unsigned index) -{ - return state->pm4[get_packed_reg_valueN_idx(state, index)]; -} - -static unsigned get_packed_reg_count(struct si_pm4_state *state) -{ - int body_size = state->ndw - state->last_pm4 - 2; - assert(body_size > 0 && body_size % 3 == 0); - return (body_size / 3) * 2; -} - -void si_pm4_finalize(struct si_pm4_state *state) -{ - if (opcode_is_pairs_packed(state->last_opcode)) { - unsigned reg_count = get_packed_reg_count(state); - unsigned reg_dw_offset0 = get_packed_reg_dw_offsetN(state, 0); - - if (state->packed_is_padded) - reg_count--; - - bool all_consecutive = true; - - /* If the whole packed SET packet only sets consecutive registers, rewrite the packet - * to be unpacked to make it shorter. - * - * This also eliminates the invalid scenario when the packed SET packet sets only - * 2 registers and the register offsets are equal due to padding. - */ - for (unsigned i = 1; i < reg_count; i++) { - if (reg_dw_offset0 != get_packed_reg_dw_offsetN(state, i) - i) { - all_consecutive = false; - break; - } - } - - if (all_consecutive) { - assert(state->ndw - state->last_pm4 == 2 + 3 * (reg_count + state->packed_is_padded) / 2); - state->pm4[state->last_pm4] = PKT3(pairs_packed_opcode_to_regular(state->last_opcode), - reg_count, 0); - state->pm4[state->last_pm4 + 1] = reg_dw_offset0; - for (unsigned i = 0; i < reg_count; i++) - state->pm4[state->last_pm4 + 2 + i] = get_packed_reg_valueN(state, i); - state->ndw = state->last_pm4 + 2 + reg_count; - state->last_opcode = PKT3_SET_SH_REG; - } else { - /* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */ - if (state->screen->debug_flags & DBG(SQTT) && - (state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED || - state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N)) { - if (state->packed_is_padded) - reg_count++; /* Add this back because we only need to record the last write. */ - - for (int i = reg_count - 1; i >= 0; i--) { - unsigned reg_offset = SI_SH_REG_OFFSET + get_packed_reg_dw_offsetN(state, i) * 4; - - if (strstr(ac_get_register_name(state->screen->info.gfx_level, - state->screen->info.family, reg_offset), - "SPI_SHADER_PGM_LO_")) { - state->spi_shader_pgm_lo_reg = reg_offset; - break; - } - } - } - - /* If it's a packed SET_SH packet, use the *_N variant when possible. */ - if (state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED && reg_count <= 14) { - state->pm4[state->last_pm4] &= PKT3_IT_OPCODE_C; - state->pm4[state->last_pm4] |= PKT3_IT_OPCODE_S(PKT3_SET_SH_REG_PAIRS_PACKED_N); - } - } - } - - if (state->screen->debug_flags & DBG(SQTT) && state->last_opcode == PKT3_SET_SH_REG) { - /* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */ - unsigned reg_count = PKT_COUNT_G(state->pm4[state->last_pm4]); - unsigned reg_base_offset = SI_SH_REG_OFFSET + state->pm4[state->last_pm4 + 1] * 4; - - for (unsigned i = 0; i < reg_count; i++) { - if (strstr(ac_get_register_name(state->screen->info.gfx_level, - state->screen->info.family, reg_base_offset + i * 4), - "SPI_SHADER_PGM_LO_")) { - state->spi_shader_pgm_lo_reg = reg_base_offset + i * 4; - - break; - } - } - } -} - -static void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode) -{ - si_pm4_finalize(state); - - assert(state->max_dw); - assert(state->ndw < state->max_dw); - assert(opcode <= 254); - state->last_opcode = opcode; - state->last_pm4 = state->ndw++; - state->packed_is_padded = false; -} - -void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw) -{ - assert(state->max_dw); - assert(state->ndw < state->max_dw); - state->pm4[state->ndw++] = dw; - state->last_opcode = 255; /* invalid opcode */ -} - -static void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate) -{ - unsigned count; - count = state->ndw - state->last_pm4 - 2; - /* All SET_*_PAIRS* packets on the gfx queue must set RESET_FILTER_CAM. */ - bool reset_filter_cam = !state->is_compute_queue && - (opcode_is_pairs(state->last_opcode) || - opcode_is_pairs_packed(state->last_opcode)); - - state->pm4[state->last_pm4] = PKT3(state->last_opcode, count, predicate) | - PKT3_RESET_FILTER_CAM_S(reset_filter_cam); - - if (opcode_is_pairs_packed(state->last_opcode)) { - if (packed_prev_is_reg_value0(state)) { - /* Duplicate the first register at the end to make the number of registers aligned to 2. */ - si_pm4_set_reg_custom(state, get_packed_reg_dw_offsetN(state, 0) * 4, - get_packed_reg_valueN(state, 0), - state->last_opcode, 0); - state->packed_is_padded = true; - } - - state->pm4[state->last_pm4 + 1] = get_packed_reg_count(state); - } -} - -static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint32_t val, - unsigned opcode, unsigned idx) -{ - bool is_packed = opcode_is_pairs_packed(opcode); - reg >>= 2; - - assert(state->max_dw); - assert(state->ndw + 2 <= state->max_dw); - - if (is_packed) { - assert(idx == 0); - - if (opcode != state->last_opcode) { - si_pm4_cmd_begin(state, opcode); /* reserve space for the header */ - state->ndw++; /* reserve space for the register count, it will be set at the end */ - } - } else if (opcode_is_pairs(opcode)) { - assert(idx == 0); - - if (opcode != state->last_opcode) - si_pm4_cmd_begin(state, opcode); - - state->pm4[state->ndw++] = reg; - } else if (opcode != state->last_opcode || reg != (state->last_reg + 1) || - idx != state->last_idx) { - si_pm4_cmd_begin(state, opcode); - state->pm4[state->ndw++] = reg | (idx << 28); - } - - assert(reg <= UINT16_MAX); - state->last_reg = reg; - state->last_idx = idx; - - if (is_packed) { - if (state->packed_is_padded) { - /* The packet is padded, which means the first register is written redundantly again - * at the end. Remove it, so that we can replace it with this register. - */ - state->packed_is_padded = false; - state->ndw--; - } - - if (packed_next_is_reg_offset_pair(state)) { - state->pm4[state->ndw++] = reg; - } else if (packed_next_is_reg_value1(state)) { - /* Set the second register offset in the high 16 bits. */ - state->pm4[state->ndw - 2] &= 0x0000ffff; - state->pm4[state->ndw - 2] |= reg << 16; - } - } - - state->pm4[state->ndw++] = val; - si_pm4_cmd_end(state, false); -} - -void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val) -{ - unsigned opcode; - - if (reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END) { - opcode = PKT3_SET_CONFIG_REG; - reg -= SI_CONFIG_REG_OFFSET; - - } else if (reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END) { - opcode = PKT3_SET_SH_REG; - reg -= SI_SH_REG_OFFSET; - - } else if (reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END) { - opcode = PKT3_SET_CONTEXT_REG; - reg -= SI_CONTEXT_REG_OFFSET; - - } else if (reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END) { - opcode = PKT3_SET_UCONFIG_REG; - reg -= CIK_UCONFIG_REG_OFFSET; - - } else { - PRINT_ERR("Invalid register offset %08x!\n", reg); - return; - } - - opcode = regular_opcode_to_pairs(state, opcode); - - si_pm4_set_reg_custom(state, reg, val, opcode, 0); -} - -void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val) -{ - if (state->screen->info.uses_kernel_cu_mask) { - assert(state->screen->info.gfx_level >= GFX10); - si_pm4_set_reg_custom(state, reg - SI_SH_REG_OFFSET, val, PKT3_SET_SH_REG_INDEX, 3); - } else { - si_pm4_set_reg(state, reg, val); - } -} - void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen, bool is_compute_queue) { - state->screen = sscreen; - state->ndw = 0; - state->is_compute_queue = is_compute_queue; + const bool debug_sqtt = !!(sscreen->debug_flags & DBG(SQTT)); - if (!state->max_dw) - state->max_dw = ARRAY_SIZE(state->pm4); + ac_pm4_clear_state(&state->base, &sscreen->info, debug_sqtt, is_compute_queue); } void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx) @@ -351,7 +42,7 @@ void si_pm4_emit_commands(struct si_context *sctx, struct si_pm4_state *state) struct radeon_cmdbuf *cs = &sctx->gfx_cs; radeon_begin(cs); - radeon_emit_array(state->pm4, state->ndw); + radeon_emit_array(state->base.pm4, state->base.ndw); radeon_end(); } @@ -364,7 +55,7 @@ void si_pm4_emit_state(struct si_context *sctx, unsigned index) assert(state && state != sctx->emitted.array[index]); radeon_begin(cs); - radeon_emit_array(state->pm4, state->ndw); + radeon_emit_array(state->base.pm4, state->base.ndw); radeon_end(); sctx->emitted.array[index] = state; @@ -396,21 +87,21 @@ struct si_pm4_state *si_pm4_create_sized(struct si_screen *sscreen, unsigned max bool is_compute_queue) { struct si_pm4_state *pm4; - unsigned size = sizeof(*pm4) + 4 * (max_dw - ARRAY_SIZE(pm4->pm4)); + unsigned size = sizeof(*pm4) + 4 * (max_dw - ARRAY_SIZE(pm4->base.pm4)); pm4 = (struct si_pm4_state *)calloc(1, size); if (pm4) { - pm4->max_dw = max_dw; + pm4->base.max_dw = max_dw; si_pm4_clear_state(pm4, sscreen, is_compute_queue); } return pm4; } -struct si_pm4_state *si_pm4_clone(struct si_pm4_state *orig) +struct si_pm4_state *si_pm4_clone(struct si_screen *sscreen, struct si_pm4_state *orig) { - struct si_pm4_state *pm4 = si_pm4_create_sized(orig->screen, orig->max_dw, - orig->is_compute_queue); + struct si_pm4_state *pm4 = si_pm4_create_sized(sscreen, orig->base.max_dw, + orig->base.is_compute_queue); if (pm4) - memcpy(pm4, orig, sizeof(*pm4) + 4 * (pm4->max_dw - ARRAY_SIZE(pm4->pm4))); + memcpy(pm4, orig, sizeof(*pm4) + 4 * (pm4->base.max_dw - ARRAY_SIZE(pm4->base.pm4))); return pm4; } diff --git a/src/gallium/drivers/radeonsi/si_pm4.h b/src/gallium/drivers/radeonsi/si_pm4.h index 56a6b654d3a..710b99c0008 100644 --- a/src/gallium/drivers/radeonsi/si_pm4.h +++ b/src/gallium/drivers/radeonsi/si_pm4.h @@ -10,6 +10,8 @@ #include #include +#include "ac_pm4.h" + #ifdef __cplusplus extern "C" { #endif @@ -27,35 +29,12 @@ struct si_atom { }; struct si_pm4_state { - struct si_screen *screen; - - /* PKT3_SET_*_REG handling */ - uint16_t last_reg; /* register offset in dwords */ - uint16_t last_pm4; - uint16_t ndw; /* number of dwords in pm4 */ - uint8_t last_opcode; - uint8_t last_idx; - bool is_compute_queue; - bool packed_is_padded; /* whether SET_*_REG_PAIRS_PACKED is padded to an even number of regs */ - /* For shader states only */ struct si_atom atom; - /* commands for the DE */ - uint16_t max_dw; - - /* Used by SQTT to override the shader address */ - uint32_t spi_shader_pgm_lo_reg; - - /* This must be the last field because the array can continue after the structure. */ - uint32_t pm4[64]; + struct ac_pm4_state base; }; -void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw); -void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val); -void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val); -void si_pm4_finalize(struct si_pm4_state *state); - void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen, bool is_compute_queue); void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx); @@ -66,7 +45,7 @@ void si_pm4_emit_shader(struct si_context *sctx, unsigned index); void si_pm4_reset_emitted(struct si_context *sctx); struct si_pm4_state *si_pm4_create_sized(struct si_screen *sscreen, unsigned max_dw, bool is_compute_queue); -struct si_pm4_state *si_pm4_clone(struct si_pm4_state *orig); +struct si_pm4_state *si_pm4_clone(struct si_screen *sscreen, struct si_pm4_state *orig); #ifdef __cplusplus } diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 0b08792d6dc..ea165771c24 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -497,9 +497,9 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, } if (sctx->gfx_level >= GFX12) - si_pm4_set_reg(pm4, R_02807C_DB_ALPHA_TO_MASK, db_alpha_to_mask); + ac_pm4_set_reg(&pm4->base, R_02807C_DB_ALPHA_TO_MASK, db_alpha_to_mask); else - si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, db_alpha_to_mask); + ac_pm4_set_reg(&pm4->base, R_028B70_DB_ALPHA_TO_MASK, db_alpha_to_mask); blend->cb_target_mask = 0; blend->cb_target_enabled_4bit = 0; @@ -532,7 +532,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, blend_cntl = S_028780_ENABLE(1); } - si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); + ac_pm4_set_reg(&pm4->base, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); continue; } @@ -542,7 +542,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, if (blend->dual_src_blend && (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { assert(!"Unsupported equation for dual source blending"); - si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); + ac_pm4_set_reg(&pm4->base, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); continue; } @@ -552,7 +552,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, blend->cb_target_enabled_4bit |= 0xf << (4 * i); if (!state->rt[j].colormask || !state->rt[j].blend_enable) { - si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); + ac_pm4_set_reg(&pm4->base, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); continue; } @@ -618,7 +618,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(sctx->gfx_level, srcA)); blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(sctx->gfx_level, dstA)); } - si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); + ac_pm4_set_reg(&pm4->base, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); last_blend_cntl = blend_cntl; blend->blend_enable_4bit |= 0xfu << (i * 4); @@ -655,7 +655,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, } for (int i = 0; i < num_shader_outputs; i++) - si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, sx_mrt_blend_opt[i]); + ac_pm4_set_reg(&pm4->base, R_028760_SX_MRT0_BLEND_OPT + i * 4, sx_mrt_blend_opt[i]); /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ if (blend->dual_src_blend || logicop_enable || mode == V_028808_CB_RESOLVE) @@ -663,11 +663,11 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, } if (sctx->gfx_level >= GFX12) - si_pm4_set_reg(pm4, R_028858_CB_COLOR_CONTROL, color_control); + ac_pm4_set_reg(&pm4->base, R_028858_CB_COLOR_CONTROL, color_control); else - si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); + ac_pm4_set_reg(&pm4->base, R_028808_CB_COLOR_CONTROL, color_control); - si_pm4_finalize(pm4); + ac_pm4_finalize(&pm4->base); return blend; } @@ -5072,7 +5072,7 @@ void si_init_screen_state_functions(struct si_screen *sscreen) static void si_set_grbm_gfx_index(struct si_context *sctx, struct si_pm4_state *pm4, unsigned value) { unsigned reg = sctx->gfx_level >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX; - si_pm4_set_reg(pm4, reg, value); + ac_pm4_set_reg(&pm4->base, reg, value); } static void si_set_grbm_gfx_index_se(struct si_context *sctx, struct si_pm4_state *pm4, unsigned se) @@ -5095,12 +5095,12 @@ static void si_write_harvested_raster_configs(struct si_context *sctx, struct si for (se = 0; se < num_se; se++) { si_set_grbm_gfx_index_se(sctx, pm4, se); - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]); + ac_pm4_set_reg(&pm4->base, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]); } si_set_grbm_gfx_index(sctx, pm4, ~0); if (sctx->gfx_level >= GFX7) { - si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); + ac_pm4_set_reg(&pm4->base, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } } @@ -5116,9 +5116,9 @@ static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *p /* Always use the default config when all backends are enabled * (or when we failed to determine the enabled backends). */ - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config); + ac_pm4_set_reg(&pm4->base, R_028350_PA_SC_RASTER_CONFIG, raster_config); if (sctx->gfx_level >= GFX7) - si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); + ac_pm4_set_reg(&pm4->base, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); } else { si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); } @@ -5151,41 +5151,41 @@ static void gfx6_init_gfx_preamble_state(struct si_context *sctx) return; if (sctx->has_graphics && !sctx->shadowing.registers) { - si_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1)); - si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1)); + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); + ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1)); + ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1)); if (sscreen->dpbb_allowed) { - si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0)); - si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0)); + ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); } if (has_clear_state) { - si_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0)); - si_pm4_cmd_add(pm4, 0); + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CLEAR_STATE, 0, 0)); + ac_pm4_cmd_add(&pm4->base, 0); } } /* Compute registers. */ - si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(sctx->screen->info.address32_hi >> 8)); - si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, compute_cu_en); - si_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(sctx->screen->info.address32_hi >> 8)); + ac_pm4_set_reg(&pm4->base, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, compute_cu_en); if (sctx->gfx_level >= GFX7) { - si_pm4_set_reg(pm4, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, compute_cu_en); - si_pm4_set_reg(pm4, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, compute_cu_en); } if (sctx->gfx_level >= GFX9) - si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 0); + ac_pm4_set_reg(&pm4->base, R_0301EC_CP_COHER_START_DELAY, 0); /* Set the pointer to border colors. MI200 doesn't support border colors. */ if (sctx->gfx_level >= GFX7 && sctx->border_color_buffer) { - si_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, border_color_va >> 8); - si_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI, + ac_pm4_set_reg(&pm4->base, R_030E00_TA_CS_BC_BASE_ADDR, border_color_va >> 8); + ac_pm4_set_reg(&pm4->base, R_030E04_TA_CS_BC_BASE_ADDR_HI, S_030E04_ADDRESS(border_color_va >> 40)); } else if (sctx->gfx_level == GFX6) { - si_pm4_set_reg(pm4, R_00950C_TA_CS_BC_BASE_ADDR, border_color_va >> 8); + ac_pm4_set_reg(&pm4->base, R_00950C_TA_CS_BC_BASE_ADDR, border_color_va >> 8); } if (!sctx->has_graphics) @@ -5193,46 +5193,46 @@ static void gfx6_init_gfx_preamble_state(struct si_context *sctx) /* Graphics registers. */ /* CLEAR_STATE doesn't restore these correctly. */ - si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); - si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, + ac_pm4_set_reg(&pm4->base, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); + ac_pm4_set_reg(&pm4->base, R_028244_PA_SC_GENERIC_SCISSOR_BR, S_028244_BR_X(16384) | S_028244_BR_Y(16384)); - si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); + ac_pm4_set_reg(&pm4->base, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); if (!has_clear_state) - si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); + ac_pm4_set_reg(&pm4->base, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); if (!has_clear_state) { - si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); - si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); - si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); - si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); - si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); - si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); + ac_pm4_set_reg(&pm4->base, R_028820_PA_CL_NANINF_CNTL, 0); + ac_pm4_set_reg(&pm4->base, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); + ac_pm4_set_reg(&pm4->base, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); + ac_pm4_set_reg(&pm4->base, R_028AC8_DB_PRELOAD_CONTROL, 0x0); + ac_pm4_set_reg(&pm4->base, R_02800C_DB_RENDER_OVERRIDE, 0); + ac_pm4_set_reg(&pm4->base, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); - si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); - si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); - si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); + ac_pm4_set_reg(&pm4->base, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); + ac_pm4_set_reg(&pm4->base, R_028A5C_VGT_GS_PER_VS, 0x2); + ac_pm4_set_reg(&pm4->base, R_028AB8_VGT_VTX_CNT_EN, 0x0); } - si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); + ac_pm4_set_reg(&pm4->base, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); if (sctx->gfx_level >= GFX7) - si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); + ac_pm4_set_reg(&pm4->base, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); if (sctx->gfx_level == GFX6) { - si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, + ac_pm4_set_reg(&pm4->base, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); } if (sctx->gfx_level >= GFX7) { - si_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0); - si_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0); + ac_pm4_set_reg(&pm4->base, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0); + ac_pm4_set_reg(&pm4->base, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0); } else { - si_pm4_set_reg(pm4, R_008A60_PA_SU_LINE_STIPPLE_VALUE, 0); - si_pm4_set_reg(pm4, R_008B10_PA_SC_LINE_STIPPLE_STATE, 0); + ac_pm4_set_reg(&pm4->base, R_008A60_PA_SU_LINE_STIPPLE_VALUE, 0); + ac_pm4_set_reg(&pm4->base, R_008B10_PA_SC_LINE_STIPPLE_STATE, 0); } /* If any sample location uses the -8 coordinate, the EXCLUSION fields should be set to 0. */ - si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, + ac_pm4_set_reg(&pm4->base, R_02882C_PA_SU_PRIM_FILTER_CNTL, S_02882C_XMAX_RIGHT_EXCLUSION(sctx->gfx_level >= GFX7) | S_02882C_YMAX_BOTTOM_EXCLUSION(sctx->gfx_level >= GFX7)); @@ -5241,26 +5241,26 @@ static void gfx6_init_gfx_preamble_state(struct si_context *sctx) * so they never enter this branch. */ assert(sctx->family > CHIP_POLARIS12); - si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, + ac_pm4_set_reg(&pm4->base, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, S_028830_SMALL_PRIM_FILTER_ENABLE(1)); } if (sctx->gfx_level <= GFX7 || !has_clear_state) { - si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); - si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); + ac_pm4_set_reg(&pm4->base, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); + ac_pm4_set_reg(&pm4->base, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); /* CLEAR_STATE doesn't clear these correctly on certain generations. * I don't know why. Deduced by trial and error. */ - si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); - si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); - si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); - si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, + ac_pm4_set_reg(&pm4->base, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); + ac_pm4_set_reg(&pm4->base, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); + ac_pm4_set_reg(&pm4->base, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); + ac_pm4_set_reg(&pm4->base, R_028034_PA_SC_SCREEN_SCISSOR_BR, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); } if (sctx->gfx_level >= GFX7) { - si_pm4_set_reg_idx3(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, ac_apply_cu_en(S_00B01C_CU_EN(0xffffffff) | S_00B01C_WAVE_LIMIT_GFX7(0x3F), C_00B01C_CU_EN, 0, &sscreen->info)); @@ -5270,34 +5270,34 @@ static void gfx6_init_gfx_preamble_state(struct si_context *sctx) si_set_raster_config(sctx, pm4); /* FIXME calculate these values somehow ??? */ - si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); - si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); + ac_pm4_set_reg(&pm4->base, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); + ac_pm4_set_reg(&pm4->base, R_028A58_VGT_ES_PER_GS, 0x40); /* These registers, when written, also overwrite the CLEAR_STATE * context, so we can't rely on CLEAR_STATE setting them. * It would be an issue if there was another UMD changing them. */ - si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); - si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); - si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); + ac_pm4_set_reg(&pm4->base, R_028400_VGT_MAX_VTX_INDX, ~0); + ac_pm4_set_reg(&pm4->base, R_028404_VGT_MIN_VTX_INDX, 0); + ac_pm4_set_reg(&pm4->base, R_028408_VGT_INDX_OFFSET, 0); } if (sctx->gfx_level == GFX9) { - si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, + ac_pm4_set_reg(&pm4->base, R_00B414_SPI_SHADER_PGM_HI_LS, S_00B414_MEM_BASE(sscreen->info.address32_hi >> 8)); - si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES, + ac_pm4_set_reg(&pm4->base, R_00B214_SPI_SHADER_PGM_HI_ES, S_00B214_MEM_BASE(sscreen->info.address32_hi >> 8)); } else { - si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, + ac_pm4_set_reg(&pm4->base, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); } if (sctx->gfx_level >= GFX7 && sctx->gfx_level <= GFX8) { - si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, + ac_pm4_set_reg(&pm4->base, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, ac_apply_cu_en(S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F), C_00B51C_CU_EN, 0, &sscreen->info)); - si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F)); - si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, + ac_pm4_set_reg(&pm4->base, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F)); + ac_pm4_set_reg(&pm4->base, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, ac_apply_cu_en(S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F), C_00B31C_CU_EN, 0, &sscreen->info)); @@ -5305,7 +5305,7 @@ static void gfx6_init_gfx_preamble_state(struct si_context *sctx) * Other chips are unaffected. These are suboptimal values, * but we don't use on-chip GS. */ - si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL, + ac_pm4_set_reg(&pm4->base, R_028A44_VGT_GS_ONCHIP_CNTL, S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4)); } @@ -5331,36 +5331,36 @@ static void gfx6_init_gfx_preamble_state(struct si_context *sctx) vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); } - si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); + ac_pm4_set_reg(&pm4->base, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); } - si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); + ac_pm4_set_reg(&pm4->base, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); if (sctx->gfx_level == GFX9) { - si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0); - si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0); - si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0); + ac_pm4_set_reg(&pm4->base, R_030920_VGT_MAX_VTX_INDX, ~0); + ac_pm4_set_reg(&pm4->base, R_030924_VGT_MIN_VTX_INDX, 0); + ac_pm4_set_reg(&pm4->base, R_030928_VGT_INDX_OFFSET, 0); - si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF)); + ac_pm4_set_reg(&pm4->base, R_028060_DB_DFSM_CONTROL, S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF)); - si_pm4_set_reg_idx3(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN, 0, &sscreen->info)); - si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, + ac_pm4_set_reg(&pm4->base, R_028C48_PA_SC_BINNER_CNTL_1, S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) | S_028C48_MAX_PRIM_PER_BATCH(1023)); - si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, + ac_pm4_set_reg(&pm4->base, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); - si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1); - si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0); + ac_pm4_set_reg(&pm4->base, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1); + ac_pm4_set_reg(&pm4->base, R_030968_VGT_INSTANCE_BASE_ID, 0); } done: - si_pm4_finalize(pm4); + ac_pm4_finalize(&pm4->base); sctx->cs_preamble_state = pm4; - sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */ + sctx->cs_preamble_state_tmz = si_pm4_clone(sscreen, pm4); /* Make a copy of the preamble for TMZ. */ } static void cdna_init_compute_preamble_state(struct si_context *sctx) @@ -5377,36 +5377,36 @@ static void cdna_init_compute_preamble_state(struct si_context *sctx) /* Compute registers. */ /* Disable profiling on compute chips. */ - si_pm4_set_reg(pm4, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, 0); - si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(sctx->screen->info.address32_hi >> 8)); - si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, compute_cu_en); - si_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, compute_cu_en); - si_pm4_set_reg(pm4, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, compute_cu_en); - si_pm4_set_reg(pm4, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, compute_cu_en); - si_pm4_set_reg(pm4, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, 0); + ac_pm4_set_reg(&pm4->base, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, 0); + ac_pm4_set_reg(&pm4->base, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(sctx->screen->info.address32_hi >> 8)); + ac_pm4_set_reg(&pm4->base, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, 0); if (sscreen->info.family >= CHIP_GFX940) { - si_pm4_set_reg(pm4, R_00B89C_COMPUTE_TG_CHUNK_SIZE, 0); - si_pm4_set_reg(pm4, R_00B8B4_COMPUTE_PGM_RSRC3, 0); + ac_pm4_set_reg(&pm4->base, R_00B89C_COMPUTE_TG_CHUNK_SIZE, 0); + ac_pm4_set_reg(&pm4->base, R_00B8B4_COMPUTE_PGM_RSRC3, 0); } else { - si_pm4_set_reg(pm4, R_00B894_COMPUTE_STATIC_THREAD_MGMT_SE4, compute_cu_en); - si_pm4_set_reg(pm4, R_00B898_COMPUTE_STATIC_THREAD_MGMT_SE5, compute_cu_en); - si_pm4_set_reg(pm4, R_00B89C_COMPUTE_STATIC_THREAD_MGMT_SE6, compute_cu_en); - si_pm4_set_reg(pm4, R_00B8A0_COMPUTE_STATIC_THREAD_MGMT_SE7, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B894_COMPUTE_STATIC_THREAD_MGMT_SE4, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B898_COMPUTE_STATIC_THREAD_MGMT_SE5, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B89C_COMPUTE_STATIC_THREAD_MGMT_SE6, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B8A0_COMPUTE_STATIC_THREAD_MGMT_SE7, compute_cu_en); } - si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 0); + ac_pm4_set_reg(&pm4->base, R_0301EC_CP_COHER_START_DELAY, 0); /* Set the pointer to border colors. Only MI100 supports border colors. */ if (sscreen->info.family == CHIP_MI100) { - si_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, border_color_va >> 8); - si_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI, + ac_pm4_set_reg(&pm4->base, R_030E00_TA_CS_BC_BASE_ADDR, border_color_va >> 8); + ac_pm4_set_reg(&pm4->base, R_030E04_TA_CS_BC_BASE_ADDR_HI, S_030E04_ADDRESS(border_color_va >> 40)); } - si_pm4_finalize(pm4); + ac_pm4_finalize(&pm4->base); sctx->cs_preamble_state = pm4; - sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */ + sctx->cs_preamble_state_tmz = si_pm4_clone(sscreen, pm4); /* Make a copy of the preamble for TMZ. */ } static void gfx10_init_gfx_preamble_state(struct si_context *sctx) @@ -5450,52 +5450,52 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx) return; if (sctx->has_graphics && !sctx->shadowing.registers) { - si_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1)); - si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1)); + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); + ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1)); + ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1)); if (sscreen->dpbb_allowed) { - si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0)); - si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0)); + ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); } - si_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0)); - si_pm4_cmd_add(pm4, 0); + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CLEAR_STATE, 0, 0)); + ac_pm4_cmd_add(&pm4->base, 0); } /* Non-graphics uconfig registers. */ if (sctx->gfx_level < GFX11) - si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 0x20); - si_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, border_color_va >> 8); - si_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI, S_030E04_ADDRESS(border_color_va >> 40)); + ac_pm4_set_reg(&pm4->base, R_0301EC_CP_COHER_START_DELAY, 0x20); + ac_pm4_set_reg(&pm4->base, R_030E00_TA_CS_BC_BASE_ADDR, border_color_va >> 8); + ac_pm4_set_reg(&pm4->base, R_030E04_TA_CS_BC_BASE_ADDR_HI, S_030E04_ADDRESS(border_color_va >> 40)); /* Compute registers. */ - si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(sscreen->info.address32_hi >> 8)); + ac_pm4_set_reg(&pm4->base, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(sscreen->info.address32_hi >> 8)); for (unsigned i = 0; i < 4; ++i) - si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 + i * 4, + ac_pm4_set_reg(&pm4->base, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 + i * 4, i < sscreen->info.max_se ? compute_cu_en : 0x0); - si_pm4_set_reg(pm4, R_00B890_COMPUTE_USER_ACCUM_0, 0); - si_pm4_set_reg(pm4, R_00B894_COMPUTE_USER_ACCUM_1, 0); - si_pm4_set_reg(pm4, R_00B898_COMPUTE_USER_ACCUM_2, 0); - si_pm4_set_reg(pm4, R_00B89C_COMPUTE_USER_ACCUM_3, 0); + ac_pm4_set_reg(&pm4->base, R_00B890_COMPUTE_USER_ACCUM_0, 0); + ac_pm4_set_reg(&pm4->base, R_00B894_COMPUTE_USER_ACCUM_1, 0); + ac_pm4_set_reg(&pm4->base, R_00B898_COMPUTE_USER_ACCUM_2, 0); + ac_pm4_set_reg(&pm4->base, R_00B89C_COMPUTE_USER_ACCUM_3, 0); if (sctx->gfx_level >= GFX11) { for (unsigned i = 4; i < 8; ++i) - si_pm4_set_reg(pm4, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4 + (i - 4) * 4, + ac_pm4_set_reg(&pm4->base, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4 + (i - 4) * 4, i < sscreen->info.max_se ? compute_cu_en : 0x0); /* How many threads should go to 1 SE before moving onto the next. Think of GL1 cache hits. * Only these values are valid: 0 (disabled), 64, 128, 256, 512 * Recommendation: 64 = RT, 256 = non-RT (run benchmarks to be sure) */ - si_pm4_set_reg(pm4, R_00B8BC_COMPUTE_DISPATCH_INTERLEAVE, S_00B8BC_INTERLEAVE(256)); + ac_pm4_set_reg(&pm4->base, R_00B8BC_COMPUTE_DISPATCH_INTERLEAVE, S_00B8BC_INTERLEAVE(256)); } else { - si_pm4_set_reg(pm4, R_00B8A0_COMPUTE_PGM_RSRC3, 0); + ac_pm4_set_reg(&pm4->base, R_00B8A0_COMPUTE_PGM_RSRC3, 0); } - si_pm4_set_reg(pm4, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0); + ac_pm4_set_reg(&pm4->base, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0); if (!sctx->has_graphics) goto done; @@ -5503,64 +5503,64 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx) /* Shader registers - PS. */ unsigned cu_mask_ps = sctx->gfx_level >= GFX10_3 ? gfx103_get_cu_mask_ps(sscreen) : ~0u; if (sctx->gfx_level < GFX11) { - si_pm4_set_reg_idx3(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B004_SPI_SHADER_PGM_RSRC4_PS, ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16), /* CUs 16-31 */ C_00B004_CU_EN, 16, &sscreen->info)); } - si_pm4_set_reg_idx3(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, ac_apply_cu_en(S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT_GFX7(0x3F) | S_00B01C_LDS_GROUP_SIZE_GFX11(sctx->gfx_level >= GFX11), C_00B01C_CU_EN, 0, &sscreen->info)); - si_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS, + ac_pm4_set_reg(&pm4->base, R_00B0C0_SPI_SHADER_REQ_CTRL_PS, S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); - si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0); - si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0); - si_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0); - si_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0); + ac_pm4_set_reg(&pm4->base, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0); + ac_pm4_set_reg(&pm4->base, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0); + ac_pm4_set_reg(&pm4->base, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0); + ac_pm4_set_reg(&pm4->base, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0); /* Shader registers - VS. */ if (sctx->gfx_level < GFX11) { - si_pm4_set_reg_idx3(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B104_SPI_SHADER_PGM_RSRC4_VS, ac_apply_cu_en(S_00B104_CU_EN(0xffff), /* CUs 16-31 */ C_00B104_CU_EN, 16, &sscreen->info)); - si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); - si_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0); - si_pm4_set_reg(pm4, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0); - si_pm4_set_reg(pm4, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0); - si_pm4_set_reg(pm4, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0); + ac_pm4_set_reg(&pm4->base, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); + ac_pm4_set_reg(&pm4->base, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0); + ac_pm4_set_reg(&pm4->base, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0); + ac_pm4_set_reg(&pm4->base, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0); + ac_pm4_set_reg(&pm4->base, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0); } /* Shader registers - GS. */ - si_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0); - si_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0); - si_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0); - si_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0); - si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, + ac_pm4_set_reg(&pm4->base, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0); + ac_pm4_set_reg(&pm4->base, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0); + ac_pm4_set_reg(&pm4->base, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0); + ac_pm4_set_reg(&pm4->base, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0); + ac_pm4_set_reg(&pm4->base, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8)); /* Shader registers - HS. */ if (sctx->gfx_level < GFX11) { - si_pm4_set_reg_idx3(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B404_SPI_SHADER_PGM_RSRC4_HS, ac_apply_cu_en(S_00B404_CU_EN(0xffff), /* CUs 16-31 */ C_00B404_CU_EN, 16, &sscreen->info)); } - si_pm4_set_reg_idx3(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN, 0, &sscreen->info)); - si_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0); - si_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0); - si_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0); - si_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0); - si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, + ac_pm4_set_reg(&pm4->base, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0); + ac_pm4_set_reg(&pm4->base, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0); + ac_pm4_set_reg(&pm4->base, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0); + ac_pm4_set_reg(&pm4->base, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0); + ac_pm4_set_reg(&pm4->base, R_00B524_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); /* Context registers. */ if (sctx->gfx_level < GFX11) { - si_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL, S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF)); + ac_pm4_set_reg(&pm4->base, R_028038_DB_DFSM_CONTROL, S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF)); } - si_pm4_set_reg(pm4, R_02807C_DB_RMI_L2_CACHE_CONTROL, + ac_pm4_set_reg(&pm4->base, R_02807C_DB_RMI_L2_CACHE_CONTROL, S_02807C_Z_WR_POLICY(zs_write_policy) | S_02807C_S_WR_POLICY(zs_write_policy) | S_02807C_HTILE_WR_POLICY(meta_write_policy) | @@ -5568,10 +5568,10 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx) S_02807C_Z_RD_POLICY(zs_read_policy) | S_02807C_S_RD_POLICY(zs_read_policy) | S_02807C_HTILE_RD_POLICY(meta_read_policy)); - si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); - si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); + ac_pm4_set_reg(&pm4->base, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); + ac_pm4_set_reg(&pm4->base, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); - si_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL, + ac_pm4_set_reg(&pm4->base, R_028410_CB_RMI_GL2_CACHE_CONTROL, (sctx->gfx_level >= GFX11 ? S_028410_COLOR_WR_POLICY_GFX11(color_write_policy) | S_028410_COLOR_RD_POLICY(color_read_policy) | @@ -5586,17 +5586,17 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx) S_028410_CMASK_RD_POLICY(meta_read_policy) | S_028410_DCC_WR_POLICY_GFX10(meta_write_policy) | S_028410_DCC_RD_POLICY(meta_read_policy)); - si_pm4_set_reg(pm4, R_028708_SPI_SHADER_IDX_FORMAT, + ac_pm4_set_reg(&pm4->base, R_028708_SPI_SHADER_IDX_FORMAT, S_028708_IDX0_EXPORT_FORMAT(V_028708_SPI_SHADER_1COMP)); if (sctx->gfx_level >= GFX10_3) - si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff); + ac_pm4_set_reg(&pm4->base, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff); /* If any sample location uses the -8 coordinate, the EXCLUSION fields should be set to 0. */ - si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, + ac_pm4_set_reg(&pm4->base, R_02882C_PA_SU_PRIM_FILTER_CNTL, S_02882C_XMAX_RIGHT_EXCLUSION(1) | S_02882C_YMAX_BOTTOM_EXCLUSION(1)); - si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, + ac_pm4_set_reg(&pm4->base, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, S_028830_SMALL_PRIM_FILTER_ENABLE(1)); if (sctx->gfx_level >= GFX10_3) { /* The rate combiners have no effect if they are disabled like this: @@ -5608,14 +5608,14 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx) * Use OVERRIDE, which will ignore results from previous combiners. * (e.g. enabled sample shading overrides the vertex rate) */ - si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL, + ac_pm4_set_reg(&pm4->base, R_028848_PA_CL_VRS_CNTL, S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE) | S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE)); } - si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); - si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1); - si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, + ac_pm4_set_reg(&pm4->base, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); + ac_pm4_set_reg(&pm4->base, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1); + ac_pm4_set_reg(&pm4->base, R_028B50_VGT_TESS_DISTRIBUTION, sctx->gfx_level >= GFX11 ? S_028B50_ACCUM_ISOLINE(128) | S_028B50_ACCUM_TRI(128) | @@ -5631,12 +5631,12 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx) /* GFX11+ shouldn't subtract 1 from pbb_max_alloc_count. */ unsigned gfx10_one = sctx->gfx_level < GFX11; - si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, + ac_pm4_set_reg(&pm4->base, R_028C48_PA_SC_BINNER_CNTL_1, S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - gfx10_one) | S_028C48_MAX_PRIM_PER_BATCH(1023)); if (sctx->gfx_level >= GFX11_5) - si_pm4_set_reg(pm4, R_028C54_PA_SC_BINNER_CNTL_2, + ac_pm4_set_reg(&pm4->base, R_028C54_PA_SC_BINNER_CNTL_2, S_028C54_ENABLE_PING_PONG_BIN_ORDER(1)); /* Break up a pixel wave if it contains deallocs for more than @@ -5648,44 +5648,44 @@ static void gfx10_init_gfx_preamble_state(struct si_context *sctx) * the size of the PC minus the largest possible allocation for * a single primitive shader subgroup. */ - si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, + ac_pm4_set_reg(&pm4->base, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(sctx->gfx_level >= GFX11 ? 16 : 512)); if (sctx->gfx_level < GFX11) - si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); /* Reuse for legacy (non-NGG) only. */ + ac_pm4_set_reg(&pm4->base, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); /* Reuse for legacy (non-NGG) only. */ /* Uconfig registers. */ - si_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0); - si_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0); + ac_pm4_set_reg(&pm4->base, R_030924_GE_MIN_VTX_INDX, 0); + ac_pm4_set_reg(&pm4->base, R_030928_GE_INDX_OFFSET, 0); if (sctx->gfx_level >= GFX11) { /* This is changed by draws for indexed draws, but we need to set DISABLE_FOR_AUTO_INDEX * here, which disables primitive restart for all non-indexed draws, so that those draws * won't have to set this state. */ - si_pm4_set_reg(pm4, R_03092C_GE_MULTI_PRIM_IB_RESET_EN, S_03092C_DISABLE_FOR_AUTO_INDEX(1)); + ac_pm4_set_reg(&pm4->base, R_03092C_GE_MULTI_PRIM_IB_RESET_EN, S_03092C_DISABLE_FOR_AUTO_INDEX(1)); } - si_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0); - si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0); - si_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0); - si_pm4_set_reg(pm4, R_030988_GE_USER_VGPR_EN, 0); + ac_pm4_set_reg(&pm4->base, R_030964_GE_MAX_VTX_INDX, ~0); + ac_pm4_set_reg(&pm4->base, R_030968_VGT_INSTANCE_BASE_ID, 0); + ac_pm4_set_reg(&pm4->base, R_03097C_GE_STEREO_CNTL, 0); + ac_pm4_set_reg(&pm4->base, R_030988_GE_USER_VGPR_EN, 0); - si_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0); - si_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0); + ac_pm4_set_reg(&pm4->base, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0); + ac_pm4_set_reg(&pm4->base, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0); if (sctx->gfx_level >= GFX11) { uint64_t rb_mask = BITFIELD64_MASK(sscreen->info.max_render_backends); - si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 2, 0)); - si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1)); - si_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) | + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 2, 0)); + ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1)); + ac_pm4_cmd_add(&pm4->base, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) | PIXEL_PIPE_STATE_CNTL_STRIDE(2) | PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask)); - si_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask)); + ac_pm4_cmd_add(&pm4->base, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask)); } done: - si_pm4_finalize(pm4); + ac_pm4_finalize(&pm4->base); sctx->cs_preamble_state = pm4; - sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */ + sctx->cs_preamble_state_tmz = si_pm4_clone(sscreen, pm4); /* Make a copy of the preamble for TMZ. */ } static void gfx12_init_gfx_preamble_state(struct si_context *sctx) @@ -5720,99 +5720,99 @@ static void gfx12_init_gfx_preamble_state(struct si_context *sctx) return; if (sctx->has_graphics && !sctx->shadowing.registers) { - si_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); - si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1)); - si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1)); + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); + ac_pm4_cmd_add(&pm4->base, CC0_UPDATE_LOAD_ENABLES(1)); + ac_pm4_cmd_add(&pm4->base, CC1_UPDATE_SHADOW_ENABLES(1)); } if (sctx->has_graphics && sscreen->dpbb_allowed) { - si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0)); - si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0)); + ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); } /* Non-graphics uconfig registers. */ - si_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, border_color_va >> 8); - si_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI, S_030E04_ADDRESS(border_color_va >> 40)); + ac_pm4_set_reg(&pm4->base, R_030E00_TA_CS_BC_BASE_ADDR, border_color_va >> 8); + ac_pm4_set_reg(&pm4->base, R_030E04_TA_CS_BC_BASE_ADDR_HI, S_030E04_ADDRESS(border_color_va >> 40)); /* Compute registers. */ - si_pm4_set_reg(pm4, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, 0); - si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(sctx->screen->info.address32_hi >> 8)); - si_pm4_set_reg(pm4, R_00B838_COMPUTE_DISPATCH_PKT_ADDR_LO, 0); - si_pm4_set_reg(pm4, R_00B83C_COMPUTE_DISPATCH_PKT_ADDR_HI, 0); - si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, compute_cu_en); - si_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, num_se > 1 ? compute_cu_en : 0); - si_pm4_set_reg(pm4, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, num_se > 2 ? compute_cu_en : 0); - si_pm4_set_reg(pm4, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, num_se > 3 ? compute_cu_en : 0); - si_pm4_set_reg(pm4, R_00B88C_COMPUTE_STATIC_THREAD_MGMT_SE8, num_se > 8 ? compute_cu_en : 0); - si_pm4_set_reg(pm4, R_00B890_COMPUTE_USER_ACCUM_0, 0); - si_pm4_set_reg(pm4, R_00B894_COMPUTE_USER_ACCUM_1, 0); - si_pm4_set_reg(pm4, R_00B898_COMPUTE_USER_ACCUM_2, 0); - si_pm4_set_reg(pm4, R_00B89C_COMPUTE_USER_ACCUM_3, 0); - si_pm4_set_reg(pm4, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, num_se > 4 ? compute_cu_en : 0); - si_pm4_set_reg(pm4, R_00B8B0_COMPUTE_STATIC_THREAD_MGMT_SE5, num_se > 5 ? compute_cu_en : 0); - si_pm4_set_reg(pm4, R_00B8B4_COMPUTE_STATIC_THREAD_MGMT_SE6, num_se > 6 ? compute_cu_en : 0); - si_pm4_set_reg(pm4, R_00B8B8_COMPUTE_STATIC_THREAD_MGMT_SE7, num_se > 7 ? compute_cu_en : 0); - si_pm4_set_reg(pm4, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0); + ac_pm4_set_reg(&pm4->base, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, 0); + ac_pm4_set_reg(&pm4->base, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(sctx->screen->info.address32_hi >> 8)); + ac_pm4_set_reg(&pm4->base, R_00B838_COMPUTE_DISPATCH_PKT_ADDR_LO, 0); + ac_pm4_set_reg(&pm4->base, R_00B83C_COMPUTE_DISPATCH_PKT_ADDR_HI, 0); + ac_pm4_set_reg(&pm4->base, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, compute_cu_en); + ac_pm4_set_reg(&pm4->base, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, num_se > 1 ? compute_cu_en : 0); + ac_pm4_set_reg(&pm4->base, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, num_se > 2 ? compute_cu_en : 0); + ac_pm4_set_reg(&pm4->base, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, num_se > 3 ? compute_cu_en : 0); + ac_pm4_set_reg(&pm4->base, R_00B88C_COMPUTE_STATIC_THREAD_MGMT_SE8, num_se > 8 ? compute_cu_en : 0); + ac_pm4_set_reg(&pm4->base, R_00B890_COMPUTE_USER_ACCUM_0, 0); + ac_pm4_set_reg(&pm4->base, R_00B894_COMPUTE_USER_ACCUM_1, 0); + ac_pm4_set_reg(&pm4->base, R_00B898_COMPUTE_USER_ACCUM_2, 0); + ac_pm4_set_reg(&pm4->base, R_00B89C_COMPUTE_USER_ACCUM_3, 0); + ac_pm4_set_reg(&pm4->base, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, num_se > 4 ? compute_cu_en : 0); + ac_pm4_set_reg(&pm4->base, R_00B8B0_COMPUTE_STATIC_THREAD_MGMT_SE5, num_se > 5 ? compute_cu_en : 0); + ac_pm4_set_reg(&pm4->base, R_00B8B4_COMPUTE_STATIC_THREAD_MGMT_SE6, num_se > 6 ? compute_cu_en : 0); + ac_pm4_set_reg(&pm4->base, R_00B8B8_COMPUTE_STATIC_THREAD_MGMT_SE7, num_se > 7 ? compute_cu_en : 0); + ac_pm4_set_reg(&pm4->base, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0); if (!sctx->has_graphics) goto done; /* Graphics registers. */ /* Shader registers - PS */ - si_pm4_set_reg_idx3(pm4, R_00B018_SPI_SHADER_PGM_RSRC3_PS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B018_SPI_SHADER_PGM_RSRC3_PS, ac_apply_cu_en(S_00B018_CU_EN(0xffff), C_00B018_CU_EN, 0, &sscreen->info)); - si_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS, + ac_pm4_set_reg(&pm4->base, R_00B0C0_SPI_SHADER_REQ_CTRL_PS, S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); - si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0); - si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0); - si_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0); - si_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0); + ac_pm4_set_reg(&pm4->base, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0); + ac_pm4_set_reg(&pm4->base, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0); + ac_pm4_set_reg(&pm4->base, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0); + ac_pm4_set_reg(&pm4->base, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0); /* Shader registers - GS */ - si_pm4_set_reg(pm4, R_00B218_SPI_SHADER_PGM_HI_ES, + ac_pm4_set_reg(&pm4->base, R_00B218_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8)); - si_pm4_set_reg_idx3(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, ac_apply_cu_en(0xfffffdfd, 0, 0, &sscreen->info)); - si_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0); - si_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0); - si_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0); - si_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0); + ac_pm4_set_reg(&pm4->base, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0); + ac_pm4_set_reg(&pm4->base, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0); + ac_pm4_set_reg(&pm4->base, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0); + ac_pm4_set_reg(&pm4->base, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0); /* Shader registers - HS */ - si_pm4_set_reg(pm4, R_00B418_SPI_SHADER_PGM_HI_LS, + ac_pm4_set_reg(&pm4->base, R_00B418_SPI_SHADER_PGM_HI_LS, S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); - si_pm4_set_reg_idx3(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, ac_apply_cu_en(0xffffffff, 0, 0, &sscreen->info)); - si_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0); - si_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0); - si_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0); - si_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0); + ac_pm4_set_reg(&pm4->base, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0); + ac_pm4_set_reg(&pm4->base, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0); + ac_pm4_set_reg(&pm4->base, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0); + ac_pm4_set_reg(&pm4->base, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0); /* Context registers */ - si_pm4_set_reg(pm4, R_028000_DB_RENDER_CONTROL, 0); - si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); - si_pm4_set_reg(pm4, R_028040_DB_GL1_INTERFACE_CONTROL, 0); - si_pm4_set_reg(pm4, R_028048_DB_MEM_TEMPORAL, + ac_pm4_set_reg(&pm4->base, R_028000_DB_RENDER_CONTROL, 0); + ac_pm4_set_reg(&pm4->base, R_02800C_DB_RENDER_OVERRIDE, 0); + ac_pm4_set_reg(&pm4->base, R_028040_DB_GL1_INTERFACE_CONTROL, 0); + ac_pm4_set_reg(&pm4->base, R_028048_DB_MEM_TEMPORAL, S_028048_Z_TEMPORAL_READ(zs_read_temporal_hint) | S_028048_Z_TEMPORAL_WRITE(zs_write_temporal_hint) | S_028048_STENCIL_TEMPORAL_READ(zs_read_temporal_hint) | S_028048_STENCIL_TEMPORAL_WRITE(zs_write_temporal_hint) | S_028048_OCCLUSION_TEMPORAL_WRITE(gfx12_store_regular_temporal)); - si_pm4_set_reg(pm4, R_028064_DB_VIEWPORT_CONTROL, 0); - si_pm4_set_reg(pm4, R_028068_DB_SPI_VRS_CENTER_LOCATION, 0); - si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); - si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); - si_pm4_set_reg(pm4, R_02808C_DB_STENCIL_OPVAL, S_02808C_OPVAL(1) | S_02808C_OPVAL_BF(1)); - si_pm4_set_reg(pm4, R_0280F8_SC_MEM_TEMPORAL, + ac_pm4_set_reg(&pm4->base, R_028064_DB_VIEWPORT_CONTROL, 0); + ac_pm4_set_reg(&pm4->base, R_028068_DB_SPI_VRS_CENTER_LOCATION, 0); + ac_pm4_set_reg(&pm4->base, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); + ac_pm4_set_reg(&pm4->base, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); + ac_pm4_set_reg(&pm4->base, R_02808C_DB_STENCIL_OPVAL, S_02808C_OPVAL(1) | S_02808C_OPVAL_BF(1)); + ac_pm4_set_reg(&pm4->base, R_0280F8_SC_MEM_TEMPORAL, S_0280F8_VRS_TEMPORAL_READ(gfx12_load_regular_temporal) | S_0280F8_VRS_TEMPORAL_WRITE(gfx12_store_regular_temporal) | S_0280F8_HIZ_TEMPORAL_READ(gfx12_load_regular_temporal) | S_0280F8_HIZ_TEMPORAL_WRITE(gfx12_store_regular_temporal) | S_0280F8_HIS_TEMPORAL_READ(gfx12_load_regular_temporal) | S_0280F8_HIS_TEMPORAL_WRITE(gfx12_store_regular_temporal)); - si_pm4_set_reg(pm4, R_0280FC_SC_MEM_SPEC_READ, + ac_pm4_set_reg(&pm4->base, R_0280FC_SC_MEM_SPEC_READ, S_0280FC_VRS_SPECULATIVE_READ(gfx12_spec_read_force_on) | S_0280FC_HIZ_SPECULATIVE_READ(gfx12_spec_read_force_on) | S_0280FC_HIS_SPECULATIVE_READ(gfx12_spec_read_force_on)); @@ -5826,41 +5826,41 @@ static void gfx12_init_gfx_preamble_state(struct si_context *sctx) * both enable bits, the hw will use the intersection of both. It allows separating implicit * viewport scissors from user scissors. */ - si_pm4_set_reg(pm4, R_028180_PA_SC_SCREEN_SCISSOR_TL, 0); - si_pm4_set_reg(pm4, R_028184_PA_SC_SCREEN_SCISSOR_BR, + ac_pm4_set_reg(&pm4->base, R_028180_PA_SC_SCREEN_SCISSOR_TL, 0); + ac_pm4_set_reg(&pm4->base, R_028184_PA_SC_SCREEN_SCISSOR_BR, S_028184_BR_X(65535) | S_028184_BR_Y(65535)); /* inclusive bounds */ - si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, 0); - si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, 0); - si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, + ac_pm4_set_reg(&pm4->base, R_028204_PA_SC_WINDOW_SCISSOR_TL, 0); + ac_pm4_set_reg(&pm4->base, R_028240_PA_SC_GENERIC_SCISSOR_TL, 0); + ac_pm4_set_reg(&pm4->base, R_028244_PA_SC_GENERIC_SCISSOR_BR, S_028244_BR_X(65535) | S_028244_BR_Y(65535)); /* inclusive bounds */ - si_pm4_set_reg(pm4, R_028358_PA_SC_SCREEN_EXTENT_CONTROL, 0); - si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, + ac_pm4_set_reg(&pm4->base, R_028358_PA_SC_SCREEN_EXTENT_CONTROL, 0); + ac_pm4_set_reg(&pm4->base, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, sscreen->info.pa_sc_tile_steering_override); - si_pm4_set_reg(pm4, R_0283E0_PA_SC_VRS_INFO, 0); + ac_pm4_set_reg(&pm4->base, R_0283E0_PA_SC_VRS_INFO, 0); - si_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL, + ac_pm4_set_reg(&pm4->base, R_028410_CB_RMI_GL2_CACHE_CONTROL, S_028410_COLOR_WR_POLICY_GFX11(color_write_policy) | S_028410_COLOR_RD_POLICY(color_read_policy)); - si_pm4_set_reg(pm4, R_028648_SPI_SHADER_IDX_FORMAT, + ac_pm4_set_reg(&pm4->base, R_028648_SPI_SHADER_IDX_FORMAT, S_028648_IDX0_EXPORT_FORMAT(V_028648_SPI_SHADER_1COMP)); - si_pm4_set_reg(pm4, R_0286E4_SPI_BARYC_SSAA_CNTL, S_0286E4_COVERED_CENTROID_IS_CENTER(1)); - si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff); - si_pm4_set_reg(pm4, R_0287D4_PA_CL_POINT_X_RAD, 0); - si_pm4_set_reg(pm4, R_0287D8_PA_CL_POINT_Y_RAD, 0); - si_pm4_set_reg(pm4, R_0287DC_PA_CL_POINT_SIZE, 0); - si_pm4_set_reg(pm4, R_0287E0_PA_CL_POINT_CULL_RAD, 0); - si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); - si_pm4_set_reg(pm4, R_028824_PA_SU_LINE_STIPPLE_CNTL, 0); - si_pm4_set_reg(pm4, R_028828_PA_SU_LINE_STIPPLE_SCALE, 0); + ac_pm4_set_reg(&pm4->base, R_0286E4_SPI_BARYC_SSAA_CNTL, S_0286E4_COVERED_CENTROID_IS_CENTER(1)); + ac_pm4_set_reg(&pm4->base, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff); + ac_pm4_set_reg(&pm4->base, R_0287D4_PA_CL_POINT_X_RAD, 0); + ac_pm4_set_reg(&pm4->base, R_0287D8_PA_CL_POINT_Y_RAD, 0); + ac_pm4_set_reg(&pm4->base, R_0287DC_PA_CL_POINT_SIZE, 0); + ac_pm4_set_reg(&pm4->base, R_0287E0_PA_CL_POINT_CULL_RAD, 0); + ac_pm4_set_reg(&pm4->base, R_028820_PA_CL_NANINF_CNTL, 0); + ac_pm4_set_reg(&pm4->base, R_028824_PA_SU_LINE_STIPPLE_CNTL, 0); + ac_pm4_set_reg(&pm4->base, R_028828_PA_SU_LINE_STIPPLE_SCALE, 0); /* If any sample location uses the -8 coordinate, the EXCLUSION fields should be set to 0. */ - si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, + ac_pm4_set_reg(&pm4->base, R_02882C_PA_SU_PRIM_FILTER_CNTL, S_02882C_XMAX_RIGHT_EXCLUSION(1) | S_02882C_YMAX_BOTTOM_EXCLUSION(1)); - si_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, + ac_pm4_set_reg(&pm4->base, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, S_028830_SMALL_PRIM_FILTER_ENABLE(1) | S_028830_SC_1XMSAA_COMPATIBLE_DISABLE(1) /* use sample locations even for MSAA 1x */); - si_pm4_set_reg(pm4, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0); - si_pm4_set_reg(pm4, R_028840_PA_STEREO_CNTL, S_028840_STEREO_MODE(1)); + ac_pm4_set_reg(&pm4->base, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0); + ac_pm4_set_reg(&pm4->base, R_028840_PA_STEREO_CNTL, S_028840_STEREO_MODE(1)); /* The rate combiners have no effect if they are disabled like this: * VERTEX_RATE: BYPASS_VTX_RATE_COMBINER = 1 @@ -5871,53 +5871,53 @@ static void gfx12_init_gfx_preamble_state(struct si_context *sctx) * Use OVERRIDE, which will ignore results from previous combiners. * (e.g. enabled sample shading overrides the vertex rate) */ - si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL, + ac_pm4_set_reg(&pm4->base, R_028848_PA_CL_VRS_CNTL, S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE) | S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_SC_VRS_COMB_MODE_OVERRIDE)); - si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); - si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); - si_pm4_set_reg(pm4, R_028A50_GE_SE_ENHANCE, 0); - si_pm4_set_reg(pm4, R_028A70_GE_IA_ENHANCE, 0); - si_pm4_set_reg(pm4, R_028A80_GE_WD_ENHANCE, 0); - si_pm4_set_reg(pm4, R_028A9C_VGT_REUSE_OFF, 0); - si_pm4_set_reg(pm4, R_028AA0_VGT_DRAW_PAYLOAD_CNTL, 0); - si_pm4_set_reg(pm4, R_028ABC_DB_HTILE_SURFACE, 0); + ac_pm4_set_reg(&pm4->base, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); + ac_pm4_set_reg(&pm4->base, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); + ac_pm4_set_reg(&pm4->base, R_028A50_GE_SE_ENHANCE, 0); + ac_pm4_set_reg(&pm4->base, R_028A70_GE_IA_ENHANCE, 0); + ac_pm4_set_reg(&pm4->base, R_028A80_GE_WD_ENHANCE, 0); + ac_pm4_set_reg(&pm4->base, R_028A9C_VGT_REUSE_OFF, 0); + ac_pm4_set_reg(&pm4->base, R_028AA0_VGT_DRAW_PAYLOAD_CNTL, 0); + ac_pm4_set_reg(&pm4->base, R_028ABC_DB_HTILE_SURFACE, 0); - si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); - si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, + ac_pm4_set_reg(&pm4->base, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); + ac_pm4_set_reg(&pm4->base, R_028B50_VGT_TESS_DISTRIBUTION, S_028B50_ACCUM_ISOLINE(128) | S_028B50_ACCUM_TRI(128) | S_028B50_ACCUM_QUAD(128) | S_028B50_DONUT_SPLIT_GFX9(24) | S_028B50_TRAP_SPLIT(6)); - si_pm4_set_reg(pm4, R_028BC0_PA_SC_HISZ_RENDER_OVERRIDE, 0); + ac_pm4_set_reg(&pm4->base, R_028BC0_PA_SC_HISZ_RENDER_OVERRIDE, 0); - si_pm4_set_reg(pm4, R_028C40_PA_SC_BINNER_OUTPUT_TIMEOUT_COUNTER, 0x800); - si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, + ac_pm4_set_reg(&pm4->base, R_028C40_PA_SC_BINNER_OUTPUT_TIMEOUT_COUNTER, 0x800); + ac_pm4_set_reg(&pm4->base, R_028C48_PA_SC_BINNER_CNTL_1, S_028C48_MAX_ALLOC_COUNT(254) | S_028C48_MAX_PRIM_PER_BATCH(511)); - si_pm4_set_reg(pm4, R_028C4C_PA_SC_BINNER_CNTL_2, S_028C4C_ENABLE_PING_PONG_BIN_ORDER(1)); - si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(64)); - si_pm4_set_reg(pm4, R_028C54_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, + ac_pm4_set_reg(&pm4->base, R_028C4C_PA_SC_BINNER_CNTL_2, S_028C4C_ENABLE_PING_PONG_BIN_ORDER(1)); + ac_pm4_set_reg(&pm4->base, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(64)); + ac_pm4_set_reg(&pm4->base, R_028C54_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, S_028C54_NULL_SQUAD_AA_MASK_ENABLE(1)); - si_pm4_set_reg(pm4, R_028C58_PA_SC_SHADER_CONTROL, 0); + ac_pm4_set_reg(&pm4->base, R_028C58_PA_SC_SHADER_CONTROL, 0); for (unsigned i = 0; i < 8; i++) { - si_pm4_set_reg(pm4, R_028F00_CB_MEM0_INFO + i * 4, + ac_pm4_set_reg(&pm4->base, R_028F00_CB_MEM0_INFO + i * 4, S_028F00_TEMPORAL_READ(color_read_temporal_hint) | S_028F00_TEMPORAL_WRITE(color_write_temporal_hint)); } /* Uconfig registers. */ - si_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0); - si_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0); + ac_pm4_set_reg(&pm4->base, R_030924_GE_MIN_VTX_INDX, 0); + ac_pm4_set_reg(&pm4->base, R_030928_GE_INDX_OFFSET, 0); /* This is changed by draws for indexed draws, but we need to set DISABLE_FOR_AUTO_INDEX * here, which disables primitive restart for all non-indexed draws, so that those draws * won't have to set this state. */ - si_pm4_set_reg(pm4, R_03092C_GE_MULTI_PRIM_IB_RESET_EN, S_03092C_DISABLE_FOR_AUTO_INDEX(1)); - si_pm4_set_reg(pm4, R_030950_GE_GS_THROTTLE, + ac_pm4_set_reg(&pm4->base, R_03092C_GE_MULTI_PRIM_IB_RESET_EN, S_03092C_DISABLE_FOR_AUTO_INDEX(1)); + ac_pm4_set_reg(&pm4->base, R_030950_GE_GS_THROTTLE, S_030950_T0(0x1) | S_030950_T1(0x4) | S_030950_T2(0x3) | @@ -5926,30 +5926,30 @@ static void gfx12_init_gfx_preamble_state(struct si_context *sctx) S_030950_FACTOR2(0x3) | S_030950_ENABLE_THROTTLE(0) | S_030950_NUM_INIT_GRPS(0xff)); - si_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0); - si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0); - si_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0); - si_pm4_set_reg(pm4, R_030980_GE_USER_VGPR_EN, 0); - si_pm4_set_reg(pm4, R_0309B4_VGT_PRIMITIVEID_RESET, 0); - si_pm4_set_reg(pm4, R_03098C_GE_VRS_RATE, 0); - si_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0); - si_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0); + ac_pm4_set_reg(&pm4->base, R_030964_GE_MAX_VTX_INDX, ~0); + ac_pm4_set_reg(&pm4->base, R_030968_VGT_INSTANCE_BASE_ID, 0); + ac_pm4_set_reg(&pm4->base, R_03097C_GE_STEREO_CNTL, 0); + ac_pm4_set_reg(&pm4->base, R_030980_GE_USER_VGPR_EN, 0); + ac_pm4_set_reg(&pm4->base, R_0309B4_VGT_PRIMITIVEID_RESET, 0); + ac_pm4_set_reg(&pm4->base, R_03098C_GE_VRS_RATE, 0); + ac_pm4_set_reg(&pm4->base, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0); + ac_pm4_set_reg(&pm4->base, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0); - si_pm4_set_reg(pm4, R_031128_SPI_GRP_LAUNCH_GUARANTEE_ENABLE, 0x8A4D); - si_pm4_set_reg(pm4, R_03112C_SPI_GRP_LAUNCH_GUARANTEE_CTRL, 0x1123); + ac_pm4_set_reg(&pm4->base, R_031128_SPI_GRP_LAUNCH_GUARANTEE_ENABLE, 0x8A4D); + ac_pm4_set_reg(&pm4->base, R_03112C_SPI_GRP_LAUNCH_GUARANTEE_CTRL, 0x1123); uint64_t rb_mask = BITFIELD64_MASK(sctx->screen->info.max_render_backends); - si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 2, 0)); - si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1)); - si_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) | + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 2, 0)); + ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1)); + ac_pm4_cmd_add(&pm4->base, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) | PIXEL_PIPE_STATE_CNTL_STRIDE(2) | PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask)); - si_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask)); + ac_pm4_cmd_add(&pm4->base, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask)); done: sctx->cs_preamble_state = pm4; - sctx->cs_preamble_state_tmz = si_pm4_clone(pm4); /* Make a copy of the preamble for TMZ. */ + sctx->cs_preamble_state_tmz = si_pm4_clone(sscreen, pm4); /* Make a copy of the preamble for TMZ. */ } void si_init_gfx_preamble_state(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp index 4c8580f9152..702a6b6cd19 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.cpp +++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp @@ -376,11 +376,11 @@ static bool si_update_shaders(struct si_context *sctx) struct si_pm4_state *pm4 = &shader->pm4; uint64_t va_low = shader->gpu_address >> 8; - uint32_t reg = pm4->spi_shader_pgm_lo_reg; - si_pm4_set_reg(&pipeline->pm4, reg, va_low); + uint32_t reg = pm4->base.spi_shader_pgm_lo_reg; + ac_pm4_set_reg(&pipeline->pm4.base, reg, va_low); } } - si_pm4_finalize(&pipeline->pm4); + ac_pm4_finalize(&pipeline->pm4.base); sctx->screen->ws->buffer_unmap(sctx->screen->ws, bo->buf); _mesa_hash_table_u64_insert(sctx->sqtt->pipeline_bos, diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp index 0d36c93ce39..f74c29bcb72 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp +++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp @@ -685,7 +685,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) return; va = shader->bo->gpu_address; - si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); shader->config.rsrc1 = S_00B528_VGPRS(si_shader_encode_vgprs(shader)) | S_00B528_SGPRS(si_shader_encode_sgprs(shader)) | @@ -694,7 +694,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader) S_00B528_FLOAT_MODE(shader->config.float_mode); shader->config.rsrc2 = S_00B52C_USER_SGPR(si_get_num_vs_user_sgprs(shader, SI_VS_NUM_USER_SGPR)) | S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0); - si_pm4_finalize(pm4); + ac_pm4_finalize(&pm4->base); } static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) @@ -709,30 +709,30 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) GFX6_TCS_NUM_USER_SGPR; if (sscreen->info.gfx_level >= GFX12) { - si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_RSRC4_HS, + ac_pm4_set_reg(&pm4->base, R_00B420_SPI_SHADER_PGM_RSRC4_HS, S_00B420_WAVE_LIMIT(0x3ff) | S_00B420_GLG_FORCE_DISABLE(1) | S_00B420_INST_PREF_SIZE(si_get_shader_prefetch_size(shader))); - si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_LO_LS, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B424_SPI_SHADER_PGM_LO_LS, va >> 8); } else if (sscreen->info.gfx_level >= GFX11) { - si_pm4_set_reg_idx3(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B404_SPI_SHADER_PGM_RSRC4_HS, ac_apply_cu_en(S_00B404_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)) | S_00B404_CU_EN(0xffff), C_00B404_CU_EN, 16, &sscreen->info)); - si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); } else if (sscreen->info.gfx_level >= GFX10) { - si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); } else if (sscreen->info.gfx_level >= GFX9) { - si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8); } else { - si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); - si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS, + ac_pm4_set_reg(&pm4->base, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B424_SPI_SHADER_PGM_HI_HS, S_00B424_MEM_BASE(sscreen->info.address32_hi >> 8)); } - si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS, + ac_pm4_set_reg(&pm4->base, R_00B428_SPI_SHADER_PGM_RSRC1_HS, S_00B428_VGPRS(si_shader_encode_vgprs(shader)) | S_00B428_SGPRS(si_shader_encode_sgprs(shader)) | S_00B428_DX10_CLAMP(sscreen->info.gfx_level < GFX12) | @@ -752,9 +752,9 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader) shader->config.rsrc2 |= S_00B42C_OC_LDS_EN(1); if (sscreen->info.gfx_level <= GFX8) - si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, shader->config.rsrc2); + ac_pm4_set_reg(&pm4->base, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, shader->config.rsrc2); - si_pm4_finalize(pm4); + ac_pm4_finalize(&pm4->base); } static void si_emit_shader_es(struct si_context *sctx, unsigned index) @@ -804,16 +804,16 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) oc_lds_en = shader->selector->stage == MESA_SHADER_TESS_EVAL ? 1 : 0; - si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); - si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, + ac_pm4_set_reg(&pm4->base, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B324_SPI_SHADER_PGM_HI_ES, S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8)); - si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES, + ac_pm4_set_reg(&pm4->base, R_00B328_SPI_SHADER_PGM_RSRC1_ES, S_00B328_VGPRS(si_shader_encode_vgprs(shader)) | S_00B328_SGPRS(si_shader_encode_sgprs(shader)) | S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) | S_00B328_DX10_CLAMP(1) | S_00B328_FLOAT_MODE(shader->config.float_mode)); - si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES, + ac_pm4_set_reg(&pm4->base, R_00B32C_SPI_SHADER_PGM_RSRC2_ES, S_00B32C_USER_SGPR(num_user_sgprs) | S_00B32C_OC_LDS_EN(oc_lds_en) | S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); @@ -821,7 +821,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader) si_set_tesseval_regs(sscreen, shader->selector, shader); polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader); - si_pm4_finalize(pm4); + ac_pm4_finalize(&pm4->base); } void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *gs, @@ -1094,9 +1094,9 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) num_user_sgprs = GFX9_GS_NUM_USER_SGPR; if (sscreen->info.gfx_level >= GFX10) { - si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); } else { - si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8); } uint32_t rsrc1 = S_00B228_VGPRS(si_shader_encode_vgprs(shader)) | @@ -1117,8 +1117,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(num_user_sgprs >> 5); } - si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, rsrc1); - si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2); + ac_pm4_set_reg(&pm4->base, R_00B228_SPI_SHADER_PGM_RSRC1_GS, rsrc1); + ac_pm4_set_reg(&pm4->base, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2); shader->gs.spi_shader_pgm_rsrc3_gs = ac_apply_cu_en(S_00B21C_CU_EN(0xffff) | @@ -1147,20 +1147,20 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader) S_00B21C_WAVE_LIMIT(0x3F), C_00B21C_CU_EN, 0, &sscreen->info); - si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8); - si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, + ac_pm4_set_reg(&pm4->base, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B224_SPI_SHADER_PGM_HI_GS, S_00B224_MEM_BASE(sscreen->info.address32_hi >> 8)); - si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, + ac_pm4_set_reg(&pm4->base, R_00B228_SPI_SHADER_PGM_RSRC1_GS, S_00B228_VGPRS(si_shader_encode_vgprs(shader)) | S_00B228_SGPRS(si_shader_encode_sgprs(shader)) | S_00B228_DX10_CLAMP(1) | S_00B228_FLOAT_MODE(shader->config.float_mode)); - si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, + ac_pm4_set_reg(&pm4->base, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, S_00B22C_USER_SGPR(GFX6_GS_NUM_USER_SGPR) | S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); } - si_pm4_finalize(pm4); + ac_pm4_finalize(&pm4->base); } bool gfx10_is_ngg_passthrough(struct si_shader *shader) @@ -1488,18 +1488,18 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader } if (sscreen->info.gfx_level >= GFX12) { - si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_LO_ES, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B224_SPI_SHADER_PGM_LO_ES, va >> 8); } else { - si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); } - si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, + ac_pm4_set_reg(&pm4->base, R_00B228_SPI_SHADER_PGM_RSRC1_GS, S_00B228_VGPRS(si_shader_encode_vgprs(shader)) | S_00B228_FLOAT_MODE(shader->config.float_mode) | S_00B228_DX10_CLAMP(sscreen->info.gfx_level < GFX12) | S_00B228_MEM_ORDERED(si_shader_mem_ordered(shader)) | S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt)); - si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, + ac_pm4_set_reg(&pm4->base, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0) | S_00B22C_USER_SGPR(num_user_sgprs) | S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | @@ -1672,7 +1672,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader S_028B54_MAX_PRIMGRP_IN_WAVE(2); } - si_pm4_finalize(pm4); + ac_pm4_finalize(&pm4->base); } static void si_emit_shader_vs(struct si_context *sctx, unsigned index) @@ -1829,15 +1829,15 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, oc_lds_en = shader->selector->stage == MESA_SHADER_TESS_EVAL ? 1 : 0; if (sscreen->info.gfx_level >= GFX7) { - si_pm4_set_reg_idx3(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B118_SPI_SHADER_PGM_RSRC3_VS, ac_apply_cu_en(S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F), C_00B118_CU_EN, 0, &sscreen->info)); - si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64)); + ac_pm4_set_reg(&pm4->base, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64)); } - si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8); - si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS, + ac_pm4_set_reg(&pm4->base, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B124_SPI_SHADER_PGM_HI_VS, S_00B124_MEM_BASE(sscreen->info.address32_hi >> 8)); uint32_t rsrc1 = @@ -1863,8 +1863,8 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, S_00B12C_SO_EN(1); } - si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1); - si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, rsrc2); + ac_pm4_set_reg(&pm4->base, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1); + ac_pm4_set_reg(&pm4->base, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, rsrc2); if (window_space) shader->vs.pa_cl_vte_cntl = S_028818_VTX_XY_FMT(1) | S_028818_VTX_Z_FMT(1); @@ -1878,7 +1878,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader, si_set_tesseval_regs(sscreen, shader->selector, shader); polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader); - si_pm4_finalize(pm4); + ac_pm4_finalize(&pm4->base); } static unsigned si_get_spi_shader_col_format(struct si_shader *shader) @@ -2173,40 +2173,40 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) if (sscreen->dpbb_allowed && (sscreen->pbb_context_states_per_bin > 1 || sscreen->pbb_persistent_states_per_bin > 1)) { - si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0)); - si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0)); + ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); } if (sscreen->info.gfx_level >= GFX12) { - si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC4_PS, + ac_pm4_set_reg(&pm4->base, R_00B01C_SPI_SHADER_PGM_RSRC4_PS, S_00B01C_WAVE_LIMIT_GFX12(0x3FF) | S_00B01C_LDS_GROUP_SIZE_GFX12(1) | S_00B01C_INST_PREF_SIZE(si_get_shader_prefetch_size(shader))); } else if (sscreen->info.gfx_level >= GFX11) { unsigned cu_mask_ps = gfx103_get_cu_mask_ps(sscreen); - si_pm4_set_reg_idx3(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, + ac_pm4_set_reg_idx3(&pm4->base, R_00B004_SPI_SHADER_PGM_RSRC4_PS, ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16) | S_00B004_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)), C_00B004_CU_EN, 16, &sscreen->info)); } uint64_t va = shader->bo->gpu_address; - si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); - si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, + ac_pm4_set_reg(&pm4->base, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); + ac_pm4_set_reg(&pm4->base, R_00B024_SPI_SHADER_PGM_HI_PS, S_00B024_MEM_BASE(sscreen->info.address32_hi >> 8)); - si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS, + ac_pm4_set_reg(&pm4->base, R_00B028_SPI_SHADER_PGM_RSRC1_PS, S_00B028_VGPRS(si_shader_encode_vgprs(shader)) | S_00B028_SGPRS(si_shader_encode_sgprs(shader)) | S_00B028_DX10_CLAMP(sscreen->info.gfx_level < GFX12) | S_00B028_MEM_ORDERED(si_shader_mem_ordered(shader)) | S_00B028_FLOAT_MODE(shader->config.float_mode)); - si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, + ac_pm4_set_reg(&pm4->base, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) | S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) | S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0)); - si_pm4_finalize(pm4); + ac_pm4_finalize(&pm4->base); } static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader *shader) @@ -2251,7 +2251,7 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader assert(0); } - assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.spi_shader_pgm_lo_reg != 0); + assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.base.spi_shader_pgm_lo_reg != 0); } static void si_clear_vs_key_inputs(union si_shader_key *key) @@ -4052,13 +4052,13 @@ static void si_cs_preamble_add_vgt_flush(struct si_context *sctx, bool tmz) return; /* Done by Vulkan before VGT_FLUSH. */ - si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0)); - si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0)); + ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); /* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */ - si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0)); - si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); - si_pm4_finalize(pm4); + ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0)); + ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); + ac_pm4_finalize(&pm4->base); *has_vgt_flush = true; } @@ -4199,32 +4199,32 @@ bool si_update_gs_ring_buffers(struct si_context *sctx) if (!*gs_ring_state_dw_offset) { /* We are here for the first time. The packets will be added. */ - *gs_ring_state_dw_offset = pm4->ndw; + *gs_ring_state_dw_offset = pm4->base.ndw; } else { /* We have been here before. Overwrite the previous packets. */ - old_ndw = pm4->ndw; - pm4->ndw = *gs_ring_state_dw_offset; + old_ndw = pm4->base.ndw; + pm4->base.ndw = *gs_ring_state_dw_offset; } /* Unallocated rings are written to reserve the space in the pm4 * (to be able to overwrite them later). */ if (sctx->gfx_level >= GFX7) { if (sctx->gfx_level <= GFX8) - si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE, + ac_pm4_set_reg(&pm4->base, R_030900_VGT_ESGS_RING_SIZE, sctx->esgs_ring ? sctx->esgs_ring->width0 / 256 : 0); - si_pm4_set_reg(pm4, R_030904_VGT_GSVS_RING_SIZE, + ac_pm4_set_reg(&pm4->base, R_030904_VGT_GSVS_RING_SIZE, sctx->gsvs_ring ? sctx->gsvs_ring->width0 / 256 : 0); } else { - si_pm4_set_reg(pm4, R_0088C8_VGT_ESGS_RING_SIZE, + ac_pm4_set_reg(&pm4->base, R_0088C8_VGT_ESGS_RING_SIZE, sctx->esgs_ring ? sctx->esgs_ring->width0 / 256 : 0); - si_pm4_set_reg(pm4, R_0088CC_VGT_GSVS_RING_SIZE, + ac_pm4_set_reg(&pm4->base, R_0088CC_VGT_GSVS_RING_SIZE, sctx->gsvs_ring ? sctx->gsvs_ring->width0 / 256 : 0); } - si_pm4_finalize(pm4); + ac_pm4_finalize(&pm4->base); if (old_ndw) { - pm4->ndw = old_ndw; - pm4->last_opcode = 255; /* invalid opcode (we don't save the last opcode) */ + pm4->base.ndw = old_ndw; + pm4->base.last_opcode = 255; /* invalid opcode (we don't save the last opcode) */ } }