ac,radeonsi import PM4 state from RadeonSI
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29452>
This commit is contained in:

committed by
Marge Bot

parent
62c52fb59d
commit
428601095c
371
src/amd/common/ac_pm4.c
Normal file
371
src/amd/common/ac_pm4.c
Normal file
@@ -0,0 +1,371 @@
|
||||
/*
|
||||
* Copyright 2012 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "ac_debug.h"
|
||||
#include "ac_gpu_info.h"
|
||||
#include "ac_pm4.h"
|
||||
|
||||
#include "sid.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static bool
|
||||
opcode_is_pairs(unsigned opcode)
|
||||
{
|
||||
return opcode == PKT3_SET_CONTEXT_REG_PAIRS ||
|
||||
opcode == PKT3_SET_SH_REG_PAIRS ||
|
||||
opcode == PKT3_SET_UCONFIG_REG_PAIRS;
|
||||
}
|
||||
|
||||
static bool
|
||||
opcode_is_pairs_packed(unsigned opcode)
|
||||
{
|
||||
return opcode == PKT3_SET_CONTEXT_REG_PAIRS_PACKED ||
|
||||
opcode == PKT3_SET_SH_REG_PAIRS_PACKED ||
|
||||
opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
pairs_packed_opcode_to_regular(unsigned opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case PKT3_SET_CONTEXT_REG_PAIRS_PACKED:
|
||||
return PKT3_SET_CONTEXT_REG;
|
||||
case PKT3_SET_SH_REG_PAIRS_PACKED:
|
||||
return PKT3_SET_SH_REG;
|
||||
default:
|
||||
unreachable("invalid packed opcode");
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
regular_opcode_to_pairs(struct ac_pm4_state *state, unsigned opcode)
|
||||
{
|
||||
const struct radeon_info *info = state->info;
|
||||
|
||||
switch (opcode) {
|
||||
case PKT3_SET_CONTEXT_REG:
|
||||
return info->has_set_context_pairs_packed ? PKT3_SET_CONTEXT_REG_PAIRS_PACKED :
|
||||
info->has_set_context_pairs ? PKT3_SET_CONTEXT_REG_PAIRS : opcode;
|
||||
case PKT3_SET_SH_REG:
|
||||
return info->has_set_sh_pairs_packed ? PKT3_SET_SH_REG_PAIRS_PACKED :
|
||||
info->has_set_sh_pairs ? PKT3_SET_SH_REG_PAIRS : opcode;
|
||||
case PKT3_SET_UCONFIG_REG:
|
||||
return info->has_set_uconfig_pairs ? PKT3_SET_UCONFIG_REG_PAIRS : opcode;
|
||||
}
|
||||
|
||||
return opcode;
|
||||
}
|
||||
|
||||
static bool
|
||||
packed_next_is_reg_offset_pair(struct ac_pm4_state *state)
|
||||
{
|
||||
return (state->ndw - state->last_pm4) % 3 == 2;
|
||||
}
|
||||
|
||||
static bool
|
||||
packed_next_is_reg_value1(struct ac_pm4_state *state)
|
||||
{
|
||||
return (state->ndw - state->last_pm4) % 3 == 1;
|
||||
}
|
||||
|
||||
static bool
|
||||
packed_prev_is_reg_value0(struct ac_pm4_state *state)
|
||||
{
|
||||
return packed_next_is_reg_value1(state);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
get_packed_reg_dw_offsetN(struct ac_pm4_state *state, unsigned index)
|
||||
{
|
||||
unsigned i = state->last_pm4 + 2 + (index / 2) * 3;
|
||||
assert(i < state->ndw);
|
||||
return (state->pm4[i] >> ((index % 2) * 16)) & 0xffff;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
get_packed_reg_valueN_idx(struct ac_pm4_state *state, unsigned index)
|
||||
{
|
||||
unsigned i = state->last_pm4 + 2 + (index / 2) * 3 + 1 + (index % 2);
|
||||
assert(i < state->ndw);
|
||||
return i;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
get_packed_reg_valueN(struct ac_pm4_state *state, unsigned index)
|
||||
{
|
||||
return state->pm4[get_packed_reg_valueN_idx(state, index)];
|
||||
}
|
||||
|
||||
static unsigned
|
||||
get_packed_reg_count(struct ac_pm4_state *state)
|
||||
{
|
||||
int body_size = state->ndw - state->last_pm4 - 2;
|
||||
assert(body_size > 0 && body_size % 3 == 0);
|
||||
return (body_size / 3) * 2;
|
||||
}
|
||||
|
||||
void
|
||||
ac_pm4_finalize(struct ac_pm4_state *state)
|
||||
{
|
||||
if (opcode_is_pairs_packed(state->last_opcode)) {
|
||||
unsigned reg_count = get_packed_reg_count(state);
|
||||
unsigned reg_dw_offset0 = get_packed_reg_dw_offsetN(state, 0);
|
||||
|
||||
if (state->packed_is_padded)
|
||||
reg_count--;
|
||||
|
||||
bool all_consecutive = true;
|
||||
|
||||
/* If the whole packed SET packet only sets consecutive registers, rewrite the packet
|
||||
* to be unpacked to make it shorter.
|
||||
*
|
||||
* This also eliminates the invalid scenario when the packed SET packet sets only
|
||||
* 2 registers and the register offsets are equal due to padding.
|
||||
*/
|
||||
for (unsigned i = 1; i < reg_count; i++) {
|
||||
if (reg_dw_offset0 != get_packed_reg_dw_offsetN(state, i) - i) {
|
||||
all_consecutive = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (all_consecutive) {
|
||||
assert(state->ndw - state->last_pm4 == 2 + 3 * (reg_count + state->packed_is_padded) / 2);
|
||||
state->pm4[state->last_pm4] = PKT3(pairs_packed_opcode_to_regular(state->last_opcode),
|
||||
reg_count, 0);
|
||||
state->pm4[state->last_pm4 + 1] = reg_dw_offset0;
|
||||
for (unsigned i = 0; i < reg_count; i++)
|
||||
state->pm4[state->last_pm4 + 2 + i] = get_packed_reg_valueN(state, i);
|
||||
state->ndw = state->last_pm4 + 2 + reg_count;
|
||||
state->last_opcode = PKT3_SET_SH_REG;
|
||||
} else {
|
||||
/* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */
|
||||
if (state->debug_sqtt &&
|
||||
(state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED ||
|
||||
state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N)) {
|
||||
if (state->packed_is_padded)
|
||||
reg_count++; /* Add this back because we only need to record the last write. */
|
||||
|
||||
for (int i = reg_count - 1; i >= 0; i--) {
|
||||
unsigned reg_offset = SI_SH_REG_OFFSET + get_packed_reg_dw_offsetN(state, i) * 4;
|
||||
|
||||
if (strstr(ac_get_register_name(state->info->gfx_level,
|
||||
state->info->family, reg_offset),
|
||||
"SPI_SHADER_PGM_LO_")) {
|
||||
state->spi_shader_pgm_lo_reg = reg_offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If it's a packed SET_SH packet, use the *_N variant when possible. */
|
||||
if (state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED && reg_count <= 14) {
|
||||
state->pm4[state->last_pm4] &= PKT3_IT_OPCODE_C;
|
||||
state->pm4[state->last_pm4] |= PKT3_IT_OPCODE_S(PKT3_SET_SH_REG_PAIRS_PACKED_N);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (state->debug_sqtt && state->last_opcode == PKT3_SET_SH_REG) {
|
||||
/* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */
|
||||
unsigned reg_count = PKT_COUNT_G(state->pm4[state->last_pm4]);
|
||||
unsigned reg_base_offset = SI_SH_REG_OFFSET + state->pm4[state->last_pm4 + 1] * 4;
|
||||
|
||||
for (unsigned i = 0; i < reg_count; i++) {
|
||||
if (strstr(ac_get_register_name(state->info->gfx_level,
|
||||
state->info->family, reg_base_offset + i * 4),
|
||||
"SPI_SHADER_PGM_LO_")) {
|
||||
state->spi_shader_pgm_lo_reg = reg_base_offset + i * 4;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ac_pm4_cmd_begin(struct ac_pm4_state *state, unsigned opcode)
|
||||
{
|
||||
ac_pm4_finalize(state);
|
||||
|
||||
assert(state->max_dw);
|
||||
assert(state->ndw < state->max_dw);
|
||||
assert(opcode <= 254);
|
||||
state->last_opcode = opcode;
|
||||
state->last_pm4 = state->ndw++;
|
||||
state->packed_is_padded = false;
|
||||
}
|
||||
|
||||
void
|
||||
ac_pm4_cmd_add(struct ac_pm4_state *state, uint32_t dw)
|
||||
{
|
||||
assert(state->max_dw);
|
||||
assert(state->ndw < state->max_dw);
|
||||
state->pm4[state->ndw++] = dw;
|
||||
state->last_opcode = 255; /* invalid opcode */
|
||||
}
|
||||
|
||||
void
|
||||
ac_pm4_cmd_end(struct ac_pm4_state *state, bool predicate)
|
||||
{
|
||||
unsigned count;
|
||||
count = state->ndw - state->last_pm4 - 2;
|
||||
/* All SET_*_PAIRS* packets on the gfx queue must set RESET_FILTER_CAM. */
|
||||
bool reset_filter_cam = !state->is_compute_queue &&
|
||||
(opcode_is_pairs(state->last_opcode) ||
|
||||
opcode_is_pairs_packed(state->last_opcode));
|
||||
|
||||
state->pm4[state->last_pm4] = PKT3(state->last_opcode, count, predicate) |
|
||||
PKT3_RESET_FILTER_CAM_S(reset_filter_cam);
|
||||
|
||||
if (opcode_is_pairs_packed(state->last_opcode)) {
|
||||
if (packed_prev_is_reg_value0(state)) {
|
||||
/* Duplicate the first register at the end to make the number of registers aligned to 2. */
|
||||
ac_pm4_set_reg_custom(state, get_packed_reg_dw_offsetN(state, 0) * 4,
|
||||
get_packed_reg_valueN(state, 0),
|
||||
state->last_opcode, 0);
|
||||
state->packed_is_padded = true;
|
||||
}
|
||||
|
||||
state->pm4[state->last_pm4 + 1] = get_packed_reg_count(state);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ac_pm4_set_reg_custom(struct ac_pm4_state *state, unsigned reg, uint32_t val,
|
||||
unsigned opcode, unsigned idx)
|
||||
{
|
||||
bool is_packed = opcode_is_pairs_packed(opcode);
|
||||
reg >>= 2;
|
||||
|
||||
assert(state->max_dw);
|
||||
assert(state->ndw + 2 <= state->max_dw);
|
||||
|
||||
if (is_packed) {
|
||||
assert(idx == 0);
|
||||
|
||||
if (opcode != state->last_opcode) {
|
||||
ac_pm4_cmd_begin(state, opcode); /* reserve space for the header */
|
||||
state->ndw++; /* reserve space for the register count, it will be set at the end */
|
||||
}
|
||||
} else if (opcode_is_pairs(opcode)) {
|
||||
assert(idx == 0);
|
||||
|
||||
if (opcode != state->last_opcode)
|
||||
ac_pm4_cmd_begin(state, opcode);
|
||||
|
||||
state->pm4[state->ndw++] = reg;
|
||||
} else if (opcode != state->last_opcode || reg != (state->last_reg + 1) ||
|
||||
idx != state->last_idx) {
|
||||
ac_pm4_cmd_begin(state, opcode);
|
||||
state->pm4[state->ndw++] = reg | (idx << 28);
|
||||
}
|
||||
|
||||
assert(reg <= UINT16_MAX);
|
||||
state->last_reg = reg;
|
||||
state->last_idx = idx;
|
||||
|
||||
if (is_packed) {
|
||||
if (state->packed_is_padded) {
|
||||
/* The packet is padded, which means the first register is written redundantly again
|
||||
* at the end. Remove it, so that we can replace it with this register.
|
||||
*/
|
||||
state->packed_is_padded = false;
|
||||
state->ndw--;
|
||||
}
|
||||
|
||||
if (packed_next_is_reg_offset_pair(state)) {
|
||||
state->pm4[state->ndw++] = reg;
|
||||
} else if (packed_next_is_reg_value1(state)) {
|
||||
/* Set the second register offset in the high 16 bits. */
|
||||
state->pm4[state->ndw - 2] &= 0x0000ffff;
|
||||
state->pm4[state->ndw - 2] |= reg << 16;
|
||||
}
|
||||
}
|
||||
|
||||
state->pm4[state->ndw++] = val;
|
||||
ac_pm4_cmd_end(state, false);
|
||||
}
|
||||
|
||||
void ac_pm4_set_reg(struct ac_pm4_state *state, unsigned reg, uint32_t val)
|
||||
{
|
||||
unsigned opcode;
|
||||
|
||||
if (reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END) {
|
||||
opcode = PKT3_SET_CONFIG_REG;
|
||||
reg -= SI_CONFIG_REG_OFFSET;
|
||||
|
||||
} else if (reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END) {
|
||||
opcode = PKT3_SET_SH_REG;
|
||||
reg -= SI_SH_REG_OFFSET;
|
||||
|
||||
} else if (reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END) {
|
||||
opcode = PKT3_SET_CONTEXT_REG;
|
||||
reg -= SI_CONTEXT_REG_OFFSET;
|
||||
|
||||
} else if (reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END) {
|
||||
opcode = PKT3_SET_UCONFIG_REG;
|
||||
reg -= CIK_UCONFIG_REG_OFFSET;
|
||||
|
||||
} else {
|
||||
fprintf(stderr, "mesa: Invalid register offset %08x!\n", reg);
|
||||
return;
|
||||
}
|
||||
|
||||
opcode = regular_opcode_to_pairs(state, opcode);
|
||||
|
||||
ac_pm4_set_reg_custom(state, reg, val, opcode, 0);
|
||||
}
|
||||
|
||||
void
|
||||
ac_pm4_set_reg_idx3(struct ac_pm4_state *state, unsigned reg, uint32_t val)
|
||||
{
|
||||
if (state->info->uses_kernel_cu_mask) {
|
||||
assert(state->info->gfx_level >= GFX10);
|
||||
ac_pm4_set_reg_custom(state, reg - SI_SH_REG_OFFSET, val, PKT3_SET_SH_REG_INDEX, 3);
|
||||
} else {
|
||||
ac_pm4_set_reg(state, reg, val);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
ac_pm4_clear_state(struct ac_pm4_state *state, const struct radeon_info *info,
|
||||
bool debug_sqtt, bool is_compute_queue)
|
||||
{
|
||||
state->info = info;
|
||||
state->debug_sqtt = debug_sqtt;
|
||||
state->ndw = 0;
|
||||
state->is_compute_queue = is_compute_queue;
|
||||
|
||||
if (!state->max_dw)
|
||||
state->max_dw = ARRAY_SIZE(state->pm4);
|
||||
}
|
||||
|
||||
struct ac_pm4_state *
|
||||
ac_pm4_create_sized(const struct radeon_info *info, bool debug_sqtt,
|
||||
unsigned max_dw, bool is_compute_queue)
|
||||
{
|
||||
struct ac_pm4_state *pm4;
|
||||
unsigned size = sizeof(*pm4) + 4 * (max_dw - ARRAY_SIZE(pm4->pm4));
|
||||
|
||||
pm4 = (struct ac_pm4_state *)calloc(1, size);
|
||||
if (pm4) {
|
||||
pm4->max_dw = max_dw;
|
||||
ac_pm4_clear_state(pm4, info, debug_sqtt, is_compute_queue);
|
||||
}
|
||||
return pm4;
|
||||
}
|
||||
|
||||
void
|
||||
ac_pm4_free_state(struct ac_pm4_state *state)
|
||||
{
|
||||
if (!state)
|
||||
return;
|
||||
|
||||
free(state);
|
||||
}
|
76
src/amd/common/ac_pm4.h
Normal file
76
src/amd/common/ac_pm4.h
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright 2012 Advanced Micro Devices, Inc.
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#ifndef AC_PM4_H
|
||||
#define AC_PM4_H
|
||||
|
||||
#include "ac_gpu_info.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct ac_pm4_state {
|
||||
const struct radeon_info *info;
|
||||
|
||||
/* PKT3_SET_*_REG handling */
|
||||
uint16_t last_reg; /* register offset in dwords */
|
||||
uint16_t last_pm4;
|
||||
uint16_t ndw; /* number of dwords in pm4 */
|
||||
uint8_t last_opcode;
|
||||
uint8_t last_idx;
|
||||
bool is_compute_queue;
|
||||
bool packed_is_padded; /* whether SET_*_REG_PAIRS_PACKED is padded to an even number of regs */
|
||||
|
||||
/* commands for the DE */
|
||||
uint16_t max_dw;
|
||||
|
||||
/* Used by SQTT to override the shader address */
|
||||
bool debug_sqtt;
|
||||
uint32_t spi_shader_pgm_lo_reg;
|
||||
|
||||
/* This must be the last field because the array can continue after the structure. */
|
||||
uint32_t pm4[64];
|
||||
};
|
||||
|
||||
void
|
||||
ac_pm4_set_reg(struct ac_pm4_state *state, unsigned reg, uint32_t val);
|
||||
|
||||
void
|
||||
ac_pm4_set_reg_custom(struct ac_pm4_state *state, unsigned reg, uint32_t val,
|
||||
unsigned opcode, unsigned idx);
|
||||
|
||||
void
|
||||
ac_pm4_set_reg_idx3(struct ac_pm4_state *state, unsigned reg, uint32_t val);
|
||||
|
||||
void
|
||||
ac_pm4_clear_state(struct ac_pm4_state *state, const struct radeon_info *info,
|
||||
bool debug_sqtt, bool is_compute_queue);
|
||||
|
||||
void
|
||||
ac_pm4_cmd_begin(struct ac_pm4_state *state, unsigned opcode);
|
||||
|
||||
void
|
||||
ac_pm4_cmd_add(struct ac_pm4_state *state, uint32_t dw);
|
||||
|
||||
void
|
||||
ac_pm4_cmd_end(struct ac_pm4_state *state, bool predicate);
|
||||
|
||||
void
|
||||
ac_pm4_finalize(struct ac_pm4_state *state);
|
||||
|
||||
struct ac_pm4_state *
|
||||
ac_pm4_create_sized(const struct radeon_info *info, bool debug_sqtt,
|
||||
unsigned max_dw, bool is_compute_queue);
|
||||
|
||||
void
|
||||
ac_pm4_free_state(struct ac_pm4_state *state);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@@ -115,6 +115,8 @@ amd_common_files = files(
|
||||
'ac_parse_ib.c',
|
||||
'ac_perfcounter.c',
|
||||
'ac_perfcounter.h',
|
||||
'ac_pm4.c',
|
||||
'ac_pm4.h',
|
||||
'ac_vcn_av1_default.h',
|
||||
)
|
||||
|
||||
|
@@ -65,7 +65,7 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
|
||||
struct si_pm4_state *shadowing_preamble = si_pm4_create_sized(sctx->screen, 256, false);
|
||||
|
||||
ac_create_shadowing_ib_preamble(&sctx->screen->info,
|
||||
(pm4_cmd_add_fn)si_pm4_cmd_add, shadowing_preamble,
|
||||
(pm4_cmd_add_fn)ac_pm4_cmd_add, shadowing_preamble,
|
||||
sctx->shadowing.registers->gpu_address, sctx->screen->dpbb_allowed);
|
||||
|
||||
/* Initialize shadowed registers as follows. */
|
||||
@@ -95,8 +95,8 @@ void si_init_cp_reg_shadowing(struct si_context *sctx)
|
||||
/* Setup preemption. The shadowing preamble will be executed as a preamble IB,
|
||||
* which will load register values from memory on a context switch.
|
||||
*/
|
||||
sctx->ws->cs_setup_preemption(&sctx->gfx_cs, shadowing_preamble->pm4,
|
||||
shadowing_preamble->ndw);
|
||||
sctx->ws->cs_setup_preemption(&sctx->gfx_cs, shadowing_preamble->base.pm4,
|
||||
shadowing_preamble->base.ndw);
|
||||
si_pm4_free_state(sctx, shadowing_preamble, ~0);
|
||||
}
|
||||
}
|
||||
|
@@ -511,7 +511,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
|
||||
struct si_pm4_state *preamble = is_secure ? ctx->cs_preamble_state_tmz :
|
||||
ctx->cs_preamble_state;
|
||||
radeon_begin(&ctx->gfx_cs);
|
||||
radeon_emit_array(preamble->pm4, preamble->ndw);
|
||||
radeon_emit_array(preamble->base.pm4, preamble->base.ndw);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
|
@@ -11,321 +11,12 @@
|
||||
#include "util/u_memory.h"
|
||||
#include "ac_debug.h"
|
||||
|
||||
static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint32_t val,
|
||||
unsigned opcode, unsigned idx);
|
||||
|
||||
static bool opcode_is_pairs(unsigned opcode)
|
||||
{
|
||||
return opcode == PKT3_SET_CONTEXT_REG_PAIRS ||
|
||||
opcode == PKT3_SET_SH_REG_PAIRS ||
|
||||
opcode == PKT3_SET_UCONFIG_REG_PAIRS;
|
||||
}
|
||||
|
||||
static bool opcode_is_pairs_packed(unsigned opcode)
|
||||
{
|
||||
return opcode == PKT3_SET_CONTEXT_REG_PAIRS_PACKED ||
|
||||
opcode == PKT3_SET_SH_REG_PAIRS_PACKED ||
|
||||
opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N;
|
||||
}
|
||||
|
||||
static unsigned pairs_packed_opcode_to_regular(unsigned opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case PKT3_SET_CONTEXT_REG_PAIRS_PACKED:
|
||||
return PKT3_SET_CONTEXT_REG;
|
||||
case PKT3_SET_SH_REG_PAIRS_PACKED:
|
||||
return PKT3_SET_SH_REG;
|
||||
default:
|
||||
unreachable("invalid packed opcode");
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned regular_opcode_to_pairs(struct si_pm4_state *state, unsigned opcode)
|
||||
{
|
||||
const struct radeon_info *info = &state->screen->info;
|
||||
|
||||
switch (opcode) {
|
||||
case PKT3_SET_CONTEXT_REG:
|
||||
return info->has_set_context_pairs_packed ? PKT3_SET_CONTEXT_REG_PAIRS_PACKED :
|
||||
info->has_set_context_pairs ? PKT3_SET_CONTEXT_REG_PAIRS : opcode;
|
||||
case PKT3_SET_SH_REG:
|
||||
return info->has_set_sh_pairs_packed ? PKT3_SET_SH_REG_PAIRS_PACKED :
|
||||
info->has_set_sh_pairs ? PKT3_SET_SH_REG_PAIRS : opcode;
|
||||
case PKT3_SET_UCONFIG_REG:
|
||||
return info->has_set_uconfig_pairs ? PKT3_SET_UCONFIG_REG_PAIRS : opcode;
|
||||
}
|
||||
|
||||
return opcode;
|
||||
}
|
||||
|
||||
static bool packed_next_is_reg_offset_pair(struct si_pm4_state *state)
|
||||
{
|
||||
return (state->ndw - state->last_pm4) % 3 == 2;
|
||||
}
|
||||
|
||||
static bool packed_next_is_reg_value1(struct si_pm4_state *state)
|
||||
{
|
||||
return (state->ndw - state->last_pm4) % 3 == 1;
|
||||
}
|
||||
|
||||
static bool packed_prev_is_reg_value0(struct si_pm4_state *state)
|
||||
{
|
||||
return packed_next_is_reg_value1(state);
|
||||
}
|
||||
|
||||
static unsigned get_packed_reg_dw_offsetN(struct si_pm4_state *state, unsigned index)
|
||||
{
|
||||
unsigned i = state->last_pm4 + 2 + (index / 2) * 3;
|
||||
assert(i < state->ndw);
|
||||
return (state->pm4[i] >> ((index % 2) * 16)) & 0xffff;
|
||||
}
|
||||
|
||||
static unsigned get_packed_reg_valueN_idx(struct si_pm4_state *state, unsigned index)
|
||||
{
|
||||
unsigned i = state->last_pm4 + 2 + (index / 2) * 3 + 1 + (index % 2);
|
||||
assert(i < state->ndw);
|
||||
return i;
|
||||
}
|
||||
|
||||
static unsigned get_packed_reg_valueN(struct si_pm4_state *state, unsigned index)
|
||||
{
|
||||
return state->pm4[get_packed_reg_valueN_idx(state, index)];
|
||||
}
|
||||
|
||||
static unsigned get_packed_reg_count(struct si_pm4_state *state)
|
||||
{
|
||||
int body_size = state->ndw - state->last_pm4 - 2;
|
||||
assert(body_size > 0 && body_size % 3 == 0);
|
||||
return (body_size / 3) * 2;
|
||||
}
|
||||
|
||||
void si_pm4_finalize(struct si_pm4_state *state)
|
||||
{
|
||||
if (opcode_is_pairs_packed(state->last_opcode)) {
|
||||
unsigned reg_count = get_packed_reg_count(state);
|
||||
unsigned reg_dw_offset0 = get_packed_reg_dw_offsetN(state, 0);
|
||||
|
||||
if (state->packed_is_padded)
|
||||
reg_count--;
|
||||
|
||||
bool all_consecutive = true;
|
||||
|
||||
/* If the whole packed SET packet only sets consecutive registers, rewrite the packet
|
||||
* to be unpacked to make it shorter.
|
||||
*
|
||||
* This also eliminates the invalid scenario when the packed SET packet sets only
|
||||
* 2 registers and the register offsets are equal due to padding.
|
||||
*/
|
||||
for (unsigned i = 1; i < reg_count; i++) {
|
||||
if (reg_dw_offset0 != get_packed_reg_dw_offsetN(state, i) - i) {
|
||||
all_consecutive = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (all_consecutive) {
|
||||
assert(state->ndw - state->last_pm4 == 2 + 3 * (reg_count + state->packed_is_padded) / 2);
|
||||
state->pm4[state->last_pm4] = PKT3(pairs_packed_opcode_to_regular(state->last_opcode),
|
||||
reg_count, 0);
|
||||
state->pm4[state->last_pm4 + 1] = reg_dw_offset0;
|
||||
for (unsigned i = 0; i < reg_count; i++)
|
||||
state->pm4[state->last_pm4 + 2 + i] = get_packed_reg_valueN(state, i);
|
||||
state->ndw = state->last_pm4 + 2 + reg_count;
|
||||
state->last_opcode = PKT3_SET_SH_REG;
|
||||
} else {
|
||||
/* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */
|
||||
if (state->screen->debug_flags & DBG(SQTT) &&
|
||||
(state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED ||
|
||||
state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED_N)) {
|
||||
if (state->packed_is_padded)
|
||||
reg_count++; /* Add this back because we only need to record the last write. */
|
||||
|
||||
for (int i = reg_count - 1; i >= 0; i--) {
|
||||
unsigned reg_offset = SI_SH_REG_OFFSET + get_packed_reg_dw_offsetN(state, i) * 4;
|
||||
|
||||
if (strstr(ac_get_register_name(state->screen->info.gfx_level,
|
||||
state->screen->info.family, reg_offset),
|
||||
"SPI_SHADER_PGM_LO_")) {
|
||||
state->spi_shader_pgm_lo_reg = reg_offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If it's a packed SET_SH packet, use the *_N variant when possible. */
|
||||
if (state->last_opcode == PKT3_SET_SH_REG_PAIRS_PACKED && reg_count <= 14) {
|
||||
state->pm4[state->last_pm4] &= PKT3_IT_OPCODE_C;
|
||||
state->pm4[state->last_pm4] |= PKT3_IT_OPCODE_S(PKT3_SET_SH_REG_PAIRS_PACKED_N);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (state->screen->debug_flags & DBG(SQTT) && state->last_opcode == PKT3_SET_SH_REG) {
|
||||
/* Set reg_va_low_idx to where the shader address is stored in the pm4 state. */
|
||||
unsigned reg_count = PKT_COUNT_G(state->pm4[state->last_pm4]);
|
||||
unsigned reg_base_offset = SI_SH_REG_OFFSET + state->pm4[state->last_pm4 + 1] * 4;
|
||||
|
||||
for (unsigned i = 0; i < reg_count; i++) {
|
||||
if (strstr(ac_get_register_name(state->screen->info.gfx_level,
|
||||
state->screen->info.family, reg_base_offset + i * 4),
|
||||
"SPI_SHADER_PGM_LO_")) {
|
||||
state->spi_shader_pgm_lo_reg = reg_base_offset + i * 4;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void si_pm4_cmd_begin(struct si_pm4_state *state, unsigned opcode)
|
||||
{
|
||||
si_pm4_finalize(state);
|
||||
|
||||
assert(state->max_dw);
|
||||
assert(state->ndw < state->max_dw);
|
||||
assert(opcode <= 254);
|
||||
state->last_opcode = opcode;
|
||||
state->last_pm4 = state->ndw++;
|
||||
state->packed_is_padded = false;
|
||||
}
|
||||
|
||||
void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw)
|
||||
{
|
||||
assert(state->max_dw);
|
||||
assert(state->ndw < state->max_dw);
|
||||
state->pm4[state->ndw++] = dw;
|
||||
state->last_opcode = 255; /* invalid opcode */
|
||||
}
|
||||
|
||||
static void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate)
|
||||
{
|
||||
unsigned count;
|
||||
count = state->ndw - state->last_pm4 - 2;
|
||||
/* All SET_*_PAIRS* packets on the gfx queue must set RESET_FILTER_CAM. */
|
||||
bool reset_filter_cam = !state->is_compute_queue &&
|
||||
(opcode_is_pairs(state->last_opcode) ||
|
||||
opcode_is_pairs_packed(state->last_opcode));
|
||||
|
||||
state->pm4[state->last_pm4] = PKT3(state->last_opcode, count, predicate) |
|
||||
PKT3_RESET_FILTER_CAM_S(reset_filter_cam);
|
||||
|
||||
if (opcode_is_pairs_packed(state->last_opcode)) {
|
||||
if (packed_prev_is_reg_value0(state)) {
|
||||
/* Duplicate the first register at the end to make the number of registers aligned to 2. */
|
||||
si_pm4_set_reg_custom(state, get_packed_reg_dw_offsetN(state, 0) * 4,
|
||||
get_packed_reg_valueN(state, 0),
|
||||
state->last_opcode, 0);
|
||||
state->packed_is_padded = true;
|
||||
}
|
||||
|
||||
state->pm4[state->last_pm4 + 1] = get_packed_reg_count(state);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_pm4_set_reg_custom(struct si_pm4_state *state, unsigned reg, uint32_t val,
|
||||
unsigned opcode, unsigned idx)
|
||||
{
|
||||
bool is_packed = opcode_is_pairs_packed(opcode);
|
||||
reg >>= 2;
|
||||
|
||||
assert(state->max_dw);
|
||||
assert(state->ndw + 2 <= state->max_dw);
|
||||
|
||||
if (is_packed) {
|
||||
assert(idx == 0);
|
||||
|
||||
if (opcode != state->last_opcode) {
|
||||
si_pm4_cmd_begin(state, opcode); /* reserve space for the header */
|
||||
state->ndw++; /* reserve space for the register count, it will be set at the end */
|
||||
}
|
||||
} else if (opcode_is_pairs(opcode)) {
|
||||
assert(idx == 0);
|
||||
|
||||
if (opcode != state->last_opcode)
|
||||
si_pm4_cmd_begin(state, opcode);
|
||||
|
||||
state->pm4[state->ndw++] = reg;
|
||||
} else if (opcode != state->last_opcode || reg != (state->last_reg + 1) ||
|
||||
idx != state->last_idx) {
|
||||
si_pm4_cmd_begin(state, opcode);
|
||||
state->pm4[state->ndw++] = reg | (idx << 28);
|
||||
}
|
||||
|
||||
assert(reg <= UINT16_MAX);
|
||||
state->last_reg = reg;
|
||||
state->last_idx = idx;
|
||||
|
||||
if (is_packed) {
|
||||
if (state->packed_is_padded) {
|
||||
/* The packet is padded, which means the first register is written redundantly again
|
||||
* at the end. Remove it, so that we can replace it with this register.
|
||||
*/
|
||||
state->packed_is_padded = false;
|
||||
state->ndw--;
|
||||
}
|
||||
|
||||
if (packed_next_is_reg_offset_pair(state)) {
|
||||
state->pm4[state->ndw++] = reg;
|
||||
} else if (packed_next_is_reg_value1(state)) {
|
||||
/* Set the second register offset in the high 16 bits. */
|
||||
state->pm4[state->ndw - 2] &= 0x0000ffff;
|
||||
state->pm4[state->ndw - 2] |= reg << 16;
|
||||
}
|
||||
}
|
||||
|
||||
state->pm4[state->ndw++] = val;
|
||||
si_pm4_cmd_end(state, false);
|
||||
}
|
||||
|
||||
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val)
|
||||
{
|
||||
unsigned opcode;
|
||||
|
||||
if (reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END) {
|
||||
opcode = PKT3_SET_CONFIG_REG;
|
||||
reg -= SI_CONFIG_REG_OFFSET;
|
||||
|
||||
} else if (reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END) {
|
||||
opcode = PKT3_SET_SH_REG;
|
||||
reg -= SI_SH_REG_OFFSET;
|
||||
|
||||
} else if (reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END) {
|
||||
opcode = PKT3_SET_CONTEXT_REG;
|
||||
reg -= SI_CONTEXT_REG_OFFSET;
|
||||
|
||||
} else if (reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END) {
|
||||
opcode = PKT3_SET_UCONFIG_REG;
|
||||
reg -= CIK_UCONFIG_REG_OFFSET;
|
||||
|
||||
} else {
|
||||
PRINT_ERR("Invalid register offset %08x!\n", reg);
|
||||
return;
|
||||
}
|
||||
|
||||
opcode = regular_opcode_to_pairs(state, opcode);
|
||||
|
||||
si_pm4_set_reg_custom(state, reg, val, opcode, 0);
|
||||
}
|
||||
|
||||
void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val)
|
||||
{
|
||||
if (state->screen->info.uses_kernel_cu_mask) {
|
||||
assert(state->screen->info.gfx_level >= GFX10);
|
||||
si_pm4_set_reg_custom(state, reg - SI_SH_REG_OFFSET, val, PKT3_SET_SH_REG_INDEX, 3);
|
||||
} else {
|
||||
si_pm4_set_reg(state, reg, val);
|
||||
}
|
||||
}
|
||||
|
||||
void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen,
|
||||
bool is_compute_queue)
|
||||
{
|
||||
state->screen = sscreen;
|
||||
state->ndw = 0;
|
||||
state->is_compute_queue = is_compute_queue;
|
||||
const bool debug_sqtt = !!(sscreen->debug_flags & DBG(SQTT));
|
||||
|
||||
if (!state->max_dw)
|
||||
state->max_dw = ARRAY_SIZE(state->pm4);
|
||||
ac_pm4_clear_state(&state->base, &sscreen->info, debug_sqtt, is_compute_queue);
|
||||
}
|
||||
|
||||
void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx)
|
||||
@@ -351,7 +42,7 @@ void si_pm4_emit_commands(struct si_context *sctx, struct si_pm4_state *state)
|
||||
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_emit_array(state->pm4, state->ndw);
|
||||
radeon_emit_array(state->base.pm4, state->base.ndw);
|
||||
radeon_end();
|
||||
}
|
||||
|
||||
@@ -364,7 +55,7 @@ void si_pm4_emit_state(struct si_context *sctx, unsigned index)
|
||||
assert(state && state != sctx->emitted.array[index]);
|
||||
|
||||
radeon_begin(cs);
|
||||
radeon_emit_array(state->pm4, state->ndw);
|
||||
radeon_emit_array(state->base.pm4, state->base.ndw);
|
||||
radeon_end();
|
||||
|
||||
sctx->emitted.array[index] = state;
|
||||
@@ -396,21 +87,21 @@ struct si_pm4_state *si_pm4_create_sized(struct si_screen *sscreen, unsigned max
|
||||
bool is_compute_queue)
|
||||
{
|
||||
struct si_pm4_state *pm4;
|
||||
unsigned size = sizeof(*pm4) + 4 * (max_dw - ARRAY_SIZE(pm4->pm4));
|
||||
unsigned size = sizeof(*pm4) + 4 * (max_dw - ARRAY_SIZE(pm4->base.pm4));
|
||||
|
||||
pm4 = (struct si_pm4_state *)calloc(1, size);
|
||||
if (pm4) {
|
||||
pm4->max_dw = max_dw;
|
||||
pm4->base.max_dw = max_dw;
|
||||
si_pm4_clear_state(pm4, sscreen, is_compute_queue);
|
||||
}
|
||||
return pm4;
|
||||
}
|
||||
|
||||
struct si_pm4_state *si_pm4_clone(struct si_pm4_state *orig)
|
||||
struct si_pm4_state *si_pm4_clone(struct si_screen *sscreen, struct si_pm4_state *orig)
|
||||
{
|
||||
struct si_pm4_state *pm4 = si_pm4_create_sized(orig->screen, orig->max_dw,
|
||||
orig->is_compute_queue);
|
||||
struct si_pm4_state *pm4 = si_pm4_create_sized(sscreen, orig->base.max_dw,
|
||||
orig->base.is_compute_queue);
|
||||
if (pm4)
|
||||
memcpy(pm4, orig, sizeof(*pm4) + 4 * (pm4->max_dw - ARRAY_SIZE(pm4->pm4)));
|
||||
memcpy(pm4, orig, sizeof(*pm4) + 4 * (pm4->base.max_dw - ARRAY_SIZE(pm4->base.pm4)));
|
||||
return pm4;
|
||||
}
|
||||
|
@@ -10,6 +10,8 @@
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "ac_pm4.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@@ -27,35 +29,12 @@ struct si_atom {
|
||||
};
|
||||
|
||||
struct si_pm4_state {
|
||||
struct si_screen *screen;
|
||||
|
||||
/* PKT3_SET_*_REG handling */
|
||||
uint16_t last_reg; /* register offset in dwords */
|
||||
uint16_t last_pm4;
|
||||
uint16_t ndw; /* number of dwords in pm4 */
|
||||
uint8_t last_opcode;
|
||||
uint8_t last_idx;
|
||||
bool is_compute_queue;
|
||||
bool packed_is_padded; /* whether SET_*_REG_PAIRS_PACKED is padded to an even number of regs */
|
||||
|
||||
/* For shader states only */
|
||||
struct si_atom atom;
|
||||
|
||||
/* commands for the DE */
|
||||
uint16_t max_dw;
|
||||
|
||||
/* Used by SQTT to override the shader address */
|
||||
uint32_t spi_shader_pgm_lo_reg;
|
||||
|
||||
/* This must be the last field because the array can continue after the structure. */
|
||||
uint32_t pm4[64];
|
||||
struct ac_pm4_state base;
|
||||
};
|
||||
|
||||
void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw);
|
||||
void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
||||
void si_pm4_set_reg_idx3(struct si_pm4_state *state, unsigned reg, uint32_t val);
|
||||
void si_pm4_finalize(struct si_pm4_state *state);
|
||||
|
||||
void si_pm4_clear_state(struct si_pm4_state *state, struct si_screen *sscreen,
|
||||
bool is_compute_queue);
|
||||
void si_pm4_free_state(struct si_context *sctx, struct si_pm4_state *state, unsigned idx);
|
||||
@@ -66,7 +45,7 @@ void si_pm4_emit_shader(struct si_context *sctx, unsigned index);
|
||||
void si_pm4_reset_emitted(struct si_context *sctx);
|
||||
struct si_pm4_state *si_pm4_create_sized(struct si_screen *sscreen, unsigned max_dw,
|
||||
bool is_compute_queue);
|
||||
struct si_pm4_state *si_pm4_clone(struct si_pm4_state *orig);
|
||||
struct si_pm4_state *si_pm4_clone(struct si_screen *sscreen, struct si_pm4_state *orig);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -376,11 +376,11 @@ static bool si_update_shaders(struct si_context *sctx)
|
||||
struct si_pm4_state *pm4 = &shader->pm4;
|
||||
|
||||
uint64_t va_low = shader->gpu_address >> 8;
|
||||
uint32_t reg = pm4->spi_shader_pgm_lo_reg;
|
||||
si_pm4_set_reg(&pipeline->pm4, reg, va_low);
|
||||
uint32_t reg = pm4->base.spi_shader_pgm_lo_reg;
|
||||
ac_pm4_set_reg(&pipeline->pm4.base, reg, va_low);
|
||||
}
|
||||
}
|
||||
si_pm4_finalize(&pipeline->pm4);
|
||||
ac_pm4_finalize(&pipeline->pm4.base);
|
||||
sctx->screen->ws->buffer_unmap(sctx->screen->ws, bo->buf);
|
||||
|
||||
_mesa_hash_table_u64_insert(sctx->sqtt->pipeline_bos,
|
||||
|
@@ -685,7 +685,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
|
||||
return;
|
||||
|
||||
va = shader->bo->gpu_address;
|
||||
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
|
||||
shader->config.rsrc1 = S_00B528_VGPRS(si_shader_encode_vgprs(shader)) |
|
||||
S_00B528_SGPRS(si_shader_encode_sgprs(shader)) |
|
||||
@@ -694,7 +694,7 @@ static void si_shader_ls(struct si_screen *sscreen, struct si_shader *shader)
|
||||
S_00B528_FLOAT_MODE(shader->config.float_mode);
|
||||
shader->config.rsrc2 = S_00B52C_USER_SGPR(si_get_num_vs_user_sgprs(shader, SI_VS_NUM_USER_SGPR)) |
|
||||
S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
|
||||
si_pm4_finalize(pm4);
|
||||
ac_pm4_finalize(&pm4->base);
|
||||
}
|
||||
|
||||
static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
|
||||
@@ -709,30 +709,30 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
|
||||
GFX6_TCS_NUM_USER_SGPR;
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX12) {
|
||||
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_RSRC4_HS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B420_SPI_SHADER_PGM_RSRC4_HS,
|
||||
S_00B420_WAVE_LIMIT(0x3ff) |
|
||||
S_00B420_GLG_FORCE_DISABLE(1) |
|
||||
S_00B420_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)));
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B424_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
} else if (sscreen->info.gfx_level >= GFX11) {
|
||||
si_pm4_set_reg_idx3(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
|
||||
ac_pm4_set_reg_idx3(&pm4->base, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
|
||||
ac_apply_cu_en(S_00B404_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)) |
|
||||
S_00B404_CU_EN(0xffff),
|
||||
C_00B404_CU_EN, 16, &sscreen->info));
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
} else if (sscreen->info.gfx_level >= GFX10) {
|
||||
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
} else if (sscreen->info.gfx_level >= GFX9) {
|
||||
si_pm4_set_reg(pm4, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
} else {
|
||||
si_pm4_set_reg(pm4, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B424_SPI_SHADER_PGM_HI_HS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B420_SPI_SHADER_PGM_LO_HS, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B424_SPI_SHADER_PGM_HI_HS,
|
||||
S_00B424_MEM_BASE(sscreen->info.address32_hi >> 8));
|
||||
}
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
|
||||
S_00B428_VGPRS(si_shader_encode_vgprs(shader)) |
|
||||
S_00B428_SGPRS(si_shader_encode_sgprs(shader)) |
|
||||
S_00B428_DX10_CLAMP(sscreen->info.gfx_level < GFX12) |
|
||||
@@ -752,9 +752,9 @@ static void si_shader_hs(struct si_screen *sscreen, struct si_shader *shader)
|
||||
shader->config.rsrc2 |= S_00B42C_OC_LDS_EN(1);
|
||||
|
||||
if (sscreen->info.gfx_level <= GFX8)
|
||||
si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, shader->config.rsrc2);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B42C_SPI_SHADER_PGM_RSRC2_HS, shader->config.rsrc2);
|
||||
|
||||
si_pm4_finalize(pm4);
|
||||
ac_pm4_finalize(&pm4->base);
|
||||
}
|
||||
|
||||
static void si_emit_shader_es(struct si_context *sctx, unsigned index)
|
||||
@@ -804,16 +804,16 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
|
||||
|
||||
oc_lds_en = shader->selector->stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B324_SPI_SHADER_PGM_HI_ES,
|
||||
S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8));
|
||||
si_pm4_set_reg(pm4, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B328_SPI_SHADER_PGM_RSRC1_ES,
|
||||
S_00B328_VGPRS(si_shader_encode_vgprs(shader)) |
|
||||
S_00B328_SGPRS(si_shader_encode_sgprs(shader)) |
|
||||
S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
|
||||
S_00B328_DX10_CLAMP(1) |
|
||||
S_00B328_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
|
||||
S_00B32C_USER_SGPR(num_user_sgprs) | S_00B32C_OC_LDS_EN(oc_lds_en) |
|
||||
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||
|
||||
@@ -821,7 +821,7 @@ static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
|
||||
si_set_tesseval_regs(sscreen, shader->selector, shader);
|
||||
|
||||
polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader);
|
||||
si_pm4_finalize(pm4);
|
||||
ac_pm4_finalize(&pm4->base);
|
||||
}
|
||||
|
||||
void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *gs,
|
||||
@@ -1094,9 +1094,9 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
||||
num_user_sgprs = GFX9_GS_NUM_USER_SGPR;
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX10) {
|
||||
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
} else {
|
||||
si_pm4_set_reg(pm4, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
}
|
||||
|
||||
uint32_t rsrc1 = S_00B228_VGPRS(si_shader_encode_vgprs(shader)) |
|
||||
@@ -1117,8 +1117,8 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
||||
rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(num_user_sgprs >> 5);
|
||||
}
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, rsrc1);
|
||||
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B228_SPI_SHADER_PGM_RSRC1_GS, rsrc1);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2);
|
||||
|
||||
shader->gs.spi_shader_pgm_rsrc3_gs =
|
||||
ac_apply_cu_en(S_00B21C_CU_EN(0xffff) |
|
||||
@@ -1147,20 +1147,20 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
|
||||
S_00B21C_WAVE_LIMIT(0x3F),
|
||||
C_00B21C_CU_EN, 0, &sscreen->info);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B224_SPI_SHADER_PGM_HI_GS,
|
||||
S_00B224_MEM_BASE(sscreen->info.address32_hi >> 8));
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
|
||||
S_00B228_VGPRS(si_shader_encode_vgprs(shader)) |
|
||||
S_00B228_SGPRS(si_shader_encode_sgprs(shader)) |
|
||||
S_00B228_DX10_CLAMP(1) |
|
||||
S_00B228_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
|
||||
S_00B22C_USER_SGPR(GFX6_GS_NUM_USER_SGPR) |
|
||||
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||
}
|
||||
si_pm4_finalize(pm4);
|
||||
ac_pm4_finalize(&pm4->base);
|
||||
}
|
||||
|
||||
bool gfx10_is_ngg_passthrough(struct si_shader *shader)
|
||||
@@ -1488,18 +1488,18 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
||||
}
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX12) {
|
||||
si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B224_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
} else {
|
||||
si_pm4_set_reg(pm4, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8);
|
||||
}
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
|
||||
S_00B228_VGPRS(si_shader_encode_vgprs(shader)) |
|
||||
S_00B228_FLOAT_MODE(shader->config.float_mode) |
|
||||
S_00B228_DX10_CLAMP(sscreen->info.gfx_level < GFX12) |
|
||||
S_00B228_MEM_ORDERED(si_shader_mem_ordered(shader)) |
|
||||
S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt));
|
||||
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
|
||||
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0) |
|
||||
S_00B22C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
|
||||
@@ -1672,7 +1672,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
||||
S_028B54_MAX_PRIMGRP_IN_WAVE(2);
|
||||
}
|
||||
|
||||
si_pm4_finalize(pm4);
|
||||
ac_pm4_finalize(&pm4->base);
|
||||
}
|
||||
|
||||
static void si_emit_shader_vs(struct si_context *sctx, unsigned index)
|
||||
@@ -1829,15 +1829,15 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
||||
oc_lds_en = shader->selector->stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX7) {
|
||||
si_pm4_set_reg_idx3(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
|
||||
ac_pm4_set_reg_idx3(&pm4->base, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
|
||||
ac_apply_cu_en(S_00B118_CU_EN(cu_mask) |
|
||||
S_00B118_WAVE_LIMIT(0x3F),
|
||||
C_00B118_CU_EN, 0, &sscreen->info));
|
||||
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
|
||||
ac_pm4_set_reg(&pm4->base, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
|
||||
}
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B124_SPI_SHADER_PGM_HI_VS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B120_SPI_SHADER_PGM_LO_VS, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B124_SPI_SHADER_PGM_HI_VS,
|
||||
S_00B124_MEM_BASE(sscreen->info.address32_hi >> 8));
|
||||
|
||||
uint32_t rsrc1 =
|
||||
@@ -1863,8 +1863,8 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
||||
S_00B12C_SO_EN(1);
|
||||
}
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1);
|
||||
si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, rsrc2);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, rsrc2);
|
||||
|
||||
if (window_space)
|
||||
shader->vs.pa_cl_vte_cntl = S_028818_VTX_XY_FMT(1) | S_028818_VTX_Z_FMT(1);
|
||||
@@ -1878,7 +1878,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
||||
si_set_tesseval_regs(sscreen, shader->selector, shader);
|
||||
|
||||
polaris_set_vgt_vertex_reuse(sscreen, shader->selector, shader);
|
||||
si_pm4_finalize(pm4);
|
||||
ac_pm4_finalize(&pm4->base);
|
||||
}
|
||||
|
||||
static unsigned si_get_spi_shader_col_format(struct si_shader *shader)
|
||||
@@ -2173,40 +2173,40 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader)
|
||||
if (sscreen->dpbb_allowed &&
|
||||
(sscreen->pbb_context_states_per_bin > 1 ||
|
||||
sscreen->pbb_persistent_states_per_bin > 1)) {
|
||||
si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
|
||||
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
|
||||
}
|
||||
|
||||
if (sscreen->info.gfx_level >= GFX12) {
|
||||
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC4_PS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B01C_SPI_SHADER_PGM_RSRC4_PS,
|
||||
S_00B01C_WAVE_LIMIT_GFX12(0x3FF) |
|
||||
S_00B01C_LDS_GROUP_SIZE_GFX12(1) |
|
||||
S_00B01C_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)));
|
||||
} else if (sscreen->info.gfx_level >= GFX11) {
|
||||
unsigned cu_mask_ps = gfx103_get_cu_mask_ps(sscreen);
|
||||
|
||||
si_pm4_set_reg_idx3(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
|
||||
ac_pm4_set_reg_idx3(&pm4->base, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
|
||||
ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16) |
|
||||
S_00B004_INST_PREF_SIZE(si_get_shader_prefetch_size(shader)),
|
||||
C_00B004_CU_EN, 16, &sscreen->info));
|
||||
}
|
||||
|
||||
uint64_t va = shader->bo->gpu_address;
|
||||
si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8);
|
||||
ac_pm4_set_reg(&pm4->base, R_00B024_SPI_SHADER_PGM_HI_PS,
|
||||
S_00B024_MEM_BASE(sscreen->info.address32_hi >> 8));
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
|
||||
S_00B028_VGPRS(si_shader_encode_vgprs(shader)) |
|
||||
S_00B028_SGPRS(si_shader_encode_sgprs(shader)) |
|
||||
S_00B028_DX10_CLAMP(sscreen->info.gfx_level < GFX12) |
|
||||
S_00B028_MEM_ORDERED(si_shader_mem_ordered(shader)) |
|
||||
S_00B028_FLOAT_MODE(shader->config.float_mode));
|
||||
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
|
||||
ac_pm4_set_reg(&pm4->base, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
|
||||
S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
|
||||
S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
|
||||
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
|
||||
si_pm4_finalize(pm4);
|
||||
ac_pm4_finalize(&pm4->base);
|
||||
}
|
||||
|
||||
static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader *shader)
|
||||
@@ -2251,7 +2251,7 @@ static void si_shader_init_pm4_state(struct si_screen *sscreen, struct si_shader
|
||||
assert(0);
|
||||
}
|
||||
|
||||
assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.spi_shader_pgm_lo_reg != 0);
|
||||
assert(!(sscreen->debug_flags & DBG(SQTT)) || shader->pm4.base.spi_shader_pgm_lo_reg != 0);
|
||||
}
|
||||
|
||||
static void si_clear_vs_key_inputs(union si_shader_key *key)
|
||||
@@ -4052,13 +4052,13 @@ static void si_cs_preamble_add_vgt_flush(struct si_context *sctx, bool tmz)
|
||||
return;
|
||||
|
||||
/* Done by Vulkan before VGT_FLUSH. */
|
||||
si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
||||
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
||||
|
||||
/* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */
|
||||
si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
|
||||
si_pm4_finalize(pm4);
|
||||
ac_pm4_cmd_add(&pm4->base, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
ac_pm4_cmd_add(&pm4->base, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
|
||||
ac_pm4_finalize(&pm4->base);
|
||||
|
||||
*has_vgt_flush = true;
|
||||
}
|
||||
@@ -4199,32 +4199,32 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
|
||||
|
||||
if (!*gs_ring_state_dw_offset) {
|
||||
/* We are here for the first time. The packets will be added. */
|
||||
*gs_ring_state_dw_offset = pm4->ndw;
|
||||
*gs_ring_state_dw_offset = pm4->base.ndw;
|
||||
} else {
|
||||
/* We have been here before. Overwrite the previous packets. */
|
||||
old_ndw = pm4->ndw;
|
||||
pm4->ndw = *gs_ring_state_dw_offset;
|
||||
old_ndw = pm4->base.ndw;
|
||||
pm4->base.ndw = *gs_ring_state_dw_offset;
|
||||
}
|
||||
|
||||
/* Unallocated rings are written to reserve the space in the pm4
|
||||
* (to be able to overwrite them later). */
|
||||
if (sctx->gfx_level >= GFX7) {
|
||||
if (sctx->gfx_level <= GFX8)
|
||||
si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE,
|
||||
ac_pm4_set_reg(&pm4->base, R_030900_VGT_ESGS_RING_SIZE,
|
||||
sctx->esgs_ring ? sctx->esgs_ring->width0 / 256 : 0);
|
||||
si_pm4_set_reg(pm4, R_030904_VGT_GSVS_RING_SIZE,
|
||||
ac_pm4_set_reg(&pm4->base, R_030904_VGT_GSVS_RING_SIZE,
|
||||
sctx->gsvs_ring ? sctx->gsvs_ring->width0 / 256 : 0);
|
||||
} else {
|
||||
si_pm4_set_reg(pm4, R_0088C8_VGT_ESGS_RING_SIZE,
|
||||
ac_pm4_set_reg(&pm4->base, R_0088C8_VGT_ESGS_RING_SIZE,
|
||||
sctx->esgs_ring ? sctx->esgs_ring->width0 / 256 : 0);
|
||||
si_pm4_set_reg(pm4, R_0088CC_VGT_GSVS_RING_SIZE,
|
||||
ac_pm4_set_reg(&pm4->base, R_0088CC_VGT_GSVS_RING_SIZE,
|
||||
sctx->gsvs_ring ? sctx->gsvs_ring->width0 / 256 : 0);
|
||||
}
|
||||
si_pm4_finalize(pm4);
|
||||
ac_pm4_finalize(&pm4->base);
|
||||
|
||||
if (old_ndw) {
|
||||
pm4->ndw = old_ndw;
|
||||
pm4->last_opcode = 255; /* invalid opcode (we don't save the last opcode) */
|
||||
pm4->base.ndw = old_ndw;
|
||||
pm4->base.last_opcode = 255; /* invalid opcode (we don't save the last opcode) */
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user