amd: change chip_class naming to "enum amd_gfx_level gfx_level"

This aligns the naming with PAL.

Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Acked-by: Pierre-Eric Pellou-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16469>
This commit is contained in:
Marek Olšák
2022-05-12 02:50:17 -04:00
parent 6dcf7f651f
commit 39800f0fa3
198 changed files with 2775 additions and 2776 deletions

View File

@@ -66,15 +66,15 @@ init()
void
init_program(Program* program, Stage stage, const struct aco_shader_info* info,
enum chip_class chip_class, enum radeon_family family, bool wgp_mode,
enum amd_gfx_level gfx_level, enum radeon_family family, bool wgp_mode,
ac_shader_config* config)
{
program->stage = stage;
program->config = config;
program->info = *info;
program->chip_class = chip_class;
program->gfx_level = gfx_level;
if (family == CHIP_UNKNOWN) {
switch (chip_class) {
switch (gfx_level) {
case GFX6: program->family = CHIP_TAHITI; break;
case GFX7: program->family = CHIP_BONAIRE; break;
case GFX8: program->family = CHIP_POLARIS10; break;
@@ -88,12 +88,10 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
program->wave_size = info->wave_size;
program->lane_mask = program->wave_size == 32 ? s1 : s2;
program->dev.lds_encoding_granule = chip_class >= GFX11 && stage == fragment_fs ? 1024
: chip_class >= GFX7 ? 512
: 256;
program->dev.lds_alloc_granule =
chip_class >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
program->dev.lds_limit = chip_class >= GFX7 ? 65536 : 32768;
program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024 :
gfx_level >= GFX7 ? 512 : 256;
program->dev.lds_alloc_granule = gfx_level >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
program->dev.lds_limit = gfx_level >= GFX7 ? 65536 : 32768;
/* apparently gfx702 also has 16-bank LDS but I can't find a family for that */
program->dev.has_16bank_lds = family == CHIP_KABINI || family == CHIP_STONEY;
@@ -101,17 +99,17 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
program->dev.physical_vgprs = 256;
program->dev.vgpr_alloc_granule = 4;
if (chip_class >= GFX10) {
if (gfx_level >= GFX10) {
program->dev.physical_sgprs = 5120; /* doesn't matter as long as it's at least 128 * 40 */
program->dev.physical_vgprs = program->wave_size == 32 ? 1024 : 512;
program->dev.sgpr_alloc_granule = 128;
program->dev.sgpr_limit =
108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
if (chip_class == GFX10_3)
if (gfx_level == GFX10_3)
program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 16 : 8;
else
program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 8 : 4;
} else if (program->chip_class >= GFX8) {
} else if (program->gfx_level >= GFX8) {
program->dev.physical_sgprs = 800;
program->dev.sgpr_alloc_granule = 16;
program->dev.sgpr_limit = 102;
@@ -124,14 +122,14 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
}
program->dev.max_wave64_per_simd = 10;
if (program->chip_class >= GFX10_3)
if (program->gfx_level >= GFX10_3)
program->dev.max_wave64_per_simd = 16;
else if (program->chip_class == GFX10)
else if (program->gfx_level == GFX10)
program->dev.max_wave64_per_simd = 20;
else if (program->family >= CHIP_POLARIS10 && program->family <= CHIP_VEGAM)
program->dev.max_wave64_per_simd = 8;
program->dev.simd_per_cu = program->chip_class >= GFX10 ? 2 : 4;
program->dev.simd_per_cu = program->gfx_level >= GFX10 ? 2 : 4;
switch (program->family) {
/* GFX8 APUs */
@@ -146,13 +144,13 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
program->dev.sram_ecc_enabled = program->family == CHIP_ARCTURUS;
/* apparently gfx702 also has fast v_fma_f32 but I can't find a family for that */
program->dev.has_fast_fma32 = program->chip_class >= GFX9;
program->dev.has_fast_fma32 = program->gfx_level >= GFX9;
if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO ||
program->family == CHIP_HAWAII)
program->dev.has_fast_fma32 = true;
program->dev.has_mac_legacy32 = program->chip_class <= GFX7 || program->chip_class >= GFX10;
program->dev.has_mac_legacy32 = program->gfx_level <= GFX7 || program->gfx_level >= GFX10;
program->dev.fused_mad_mix = program->chip_class >= GFX10;
program->dev.fused_mad_mix = program->gfx_level >= GFX10;
if (program->family == CHIP_VEGA12 || program->family == CHIP_VEGA20 ||
program->family == CHIP_ARCTURUS || program->family == CHIP_ALDEBARAN)
program->dev.fused_mad_mix = true;
@@ -190,12 +188,12 @@ get_sync_info(const Instruction* instr)
}
bool
can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra)
{
if (!instr->isVALU())
return false;
if (chip < GFX8 || instr->isDPP() || instr->isVOP3P())
if (gfx_level < GFX8 || instr->isDPP() || instr->isVOP3P())
return false;
if (instr->isSDWA())
@@ -205,9 +203,9 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
VOP3_instruction& vop3 = instr->vop3();
if (instr->format == Format::VOP3)
return false;
if (vop3.clamp && instr->isVOPC() && chip != GFX8)
if (vop3.clamp && instr->isVOPC() && gfx_level != GFX8)
return false;
if (vop3.omod && chip < GFX9)
if (vop3.omod && gfx_level < GFX9)
return false;
// TODO: return true if we know we will use vcc
@@ -217,7 +215,7 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
for (unsigned i = 1; i < instr->operands.size(); i++) {
if (instr->operands[i].isLiteral())
return false;
if (chip < GFX9 && !instr->operands[i].isOfType(RegType::vgpr))
if (gfx_level < GFX9 && !instr->operands[i].isOfType(RegType::vgpr))
return false;
}
}
@@ -228,7 +226,7 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
if (!instr->operands.empty()) {
if (instr->operands[0].isLiteral())
return false;
if (chip < GFX9 && !instr->operands[0].isOfType(RegType::vgpr))
if (gfx_level < GFX9 && !instr->operands[0].isOfType(RegType::vgpr))
return false;
if (instr->operands[0].bytes() > 4)
return false;
@@ -239,11 +237,11 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
bool is_mac = instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_mac_f16 ||
instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_fmac_f16;
if (chip != GFX8 && is_mac)
if (gfx_level != GFX8 && is_mac)
return false;
// TODO: return true if we know we will use vcc
if (!pre_ra && instr->isVOPC() && chip == GFX8)
if (!pre_ra && instr->isVOPC() && gfx_level == GFX8)
return false;
if (!pre_ra && instr->operands.size() >= 3 && !is_mac)
return false;
@@ -256,7 +254,7 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
/* updates "instr" and returns the old instruction (or NULL if no update was needed) */
aco_ptr<Instruction>
convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr)
{
if (instr->isSDWA())
return NULL;
@@ -289,7 +287,7 @@ convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
sdwa.dst_sel = SubdwordSel(instr->definitions[0].bytes(), 0, false);
if (instr->definitions[0].getTemp().type() == RegType::sgpr && chip == GFX8)
if (instr->definitions[0].getTemp().type() == RegType::sgpr && gfx_level == GFX8)
instr->definitions[0].setFixed(vcc);
if (instr->definitions.size() >= 2)
instr->definitions[1].setFixed(vcc);
@@ -390,10 +388,10 @@ convert_to_DPP(aco_ptr<Instruction>& instr, bool dpp8)
}
bool
can_use_opsel(chip_class chip, aco_opcode op, int idx)
can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx)
{
/* opsel is only GFX9+ */
if (chip < GFX9)
if (gfx_level < GFX9)
return false;
switch (op) {
@@ -433,10 +431,10 @@ can_use_opsel(chip_class chip, aco_opcode op, int idx)
}
bool
instr_is_16bit(chip_class chip, aco_opcode op)
instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op)
{
/* partial register writes are GFX9+, only */
if (chip < GFX9)
if (gfx_level < GFX9)
return false;
switch (op) {
@@ -451,7 +449,7 @@ instr_is_16bit(chip_class chip, aco_opcode op)
/* VOP2 */
case aco_opcode::v_mac_f16:
case aco_opcode::v_madak_f16:
case aco_opcode::v_madmk_f16: return chip >= GFX9;
case aco_opcode::v_madmk_f16: return gfx_level >= GFX9;
case aco_opcode::v_add_f16:
case aco_opcode::v_sub_f16:
case aco_opcode::v_subrev_f16:
@@ -479,7 +477,7 @@ instr_is_16bit(chip_class chip, aco_opcode op)
case aco_opcode::v_rndne_f16:
case aco_opcode::v_fract_f16:
case aco_opcode::v_sin_f16:
case aco_opcode::v_cos_f16: return chip >= GFX10;
case aco_opcode::v_cos_f16: return gfx_level >= GFX10;
// TODO: confirm whether these write 16 or 32 bit on GFX10+
// case aco_opcode::v_cvt_u16_f16:
// case aco_opcode::v_cvt_i16_f16:
@@ -487,7 +485,7 @@ instr_is_16bit(chip_class chip, aco_opcode op)
// case aco_opcode::v_cvt_norm_i16_f16:
// case aco_opcode::v_cvt_norm_u16_f16:
/* on GFX10, all opsel instructions preserve the high bits */
default: return chip >= GFX10 && can_use_opsel(chip, op, -1);
default: return gfx_level >= GFX10 && can_use_opsel(gfx_level, op, -1);
}
}
@@ -760,25 +758,25 @@ wait_imm::wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_)
: vm(vm_), exp(exp_), lgkm(lgkm_), vs(vs_)
{}
wait_imm::wait_imm(enum chip_class chip, uint16_t packed) : vs(unset_counter)
wait_imm::wait_imm(enum amd_gfx_level gfx_level, uint16_t packed) : vs(unset_counter)
{
vm = packed & 0xf;
if (chip >= GFX9)
if (gfx_level >= GFX9)
vm |= (packed >> 10) & 0x30;
exp = (packed >> 4) & 0x7;
lgkm = (packed >> 8) & 0xf;
if (chip >= GFX10)
if (gfx_level >= GFX10)
lgkm |= (packed >> 8) & 0x30;
}
uint16_t
wait_imm::pack(enum chip_class chip) const
wait_imm::pack(enum amd_gfx_level gfx_level) const
{
uint16_t imm = 0;
assert(exp == unset_counter || exp <= 0x7);
switch (chip) {
switch (gfx_level) {
case GFX11:
assert(lgkm == unset_counter || lgkm <= 0x3f);
assert(vm == unset_counter || vm <= 0x3f);
@@ -801,10 +799,10 @@ wait_imm::pack(enum chip_class chip) const
imm = ((lgkm & 0xf) << 8) | ((exp & 0x7) << 4) | (vm & 0xf);
break;
}
if (chip < GFX9 && vm == wait_imm::unset_counter)
if (gfx_level < GFX9 && vm == wait_imm::unset_counter)
imm |= 0xc000; /* should have no effect on pre-GFX9 and now we won't have to worry about the
architecture when interpreting the immediate */
if (chip < GFX10 && lgkm == wait_imm::unset_counter)
if (gfx_level < GFX10 && lgkm == wait_imm::unset_counter)
imm |= 0x3000; /* should have no effect on pre-GFX10 and now we won't have to worry about the
architecture when interpreting the immediate */
return imm;