amd: change chip_class naming to "enum amd_gfx_level gfx_level"
This aligns the naming with PAL. Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Acked-by: Pierre-Eric Pellou-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16469>
This commit is contained in:
@@ -66,15 +66,15 @@ init()
|
||||
|
||||
void
|
||||
init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
||||
enum chip_class chip_class, enum radeon_family family, bool wgp_mode,
|
||||
enum amd_gfx_level gfx_level, enum radeon_family family, bool wgp_mode,
|
||||
ac_shader_config* config)
|
||||
{
|
||||
program->stage = stage;
|
||||
program->config = config;
|
||||
program->info = *info;
|
||||
program->chip_class = chip_class;
|
||||
program->gfx_level = gfx_level;
|
||||
if (family == CHIP_UNKNOWN) {
|
||||
switch (chip_class) {
|
||||
switch (gfx_level) {
|
||||
case GFX6: program->family = CHIP_TAHITI; break;
|
||||
case GFX7: program->family = CHIP_BONAIRE; break;
|
||||
case GFX8: program->family = CHIP_POLARIS10; break;
|
||||
@@ -88,12 +88,10 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
||||
program->wave_size = info->wave_size;
|
||||
program->lane_mask = program->wave_size == 32 ? s1 : s2;
|
||||
|
||||
program->dev.lds_encoding_granule = chip_class >= GFX11 && stage == fragment_fs ? 1024
|
||||
: chip_class >= GFX7 ? 512
|
||||
: 256;
|
||||
program->dev.lds_alloc_granule =
|
||||
chip_class >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
|
||||
program->dev.lds_limit = chip_class >= GFX7 ? 65536 : 32768;
|
||||
program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024 :
|
||||
gfx_level >= GFX7 ? 512 : 256;
|
||||
program->dev.lds_alloc_granule = gfx_level >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
|
||||
program->dev.lds_limit = gfx_level >= GFX7 ? 65536 : 32768;
|
||||
/* apparently gfx702 also has 16-bank LDS but I can't find a family for that */
|
||||
program->dev.has_16bank_lds = family == CHIP_KABINI || family == CHIP_STONEY;
|
||||
|
||||
@@ -101,17 +99,17 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
||||
program->dev.physical_vgprs = 256;
|
||||
program->dev.vgpr_alloc_granule = 4;
|
||||
|
||||
if (chip_class >= GFX10) {
|
||||
if (gfx_level >= GFX10) {
|
||||
program->dev.physical_sgprs = 5120; /* doesn't matter as long as it's at least 128 * 40 */
|
||||
program->dev.physical_vgprs = program->wave_size == 32 ? 1024 : 512;
|
||||
program->dev.sgpr_alloc_granule = 128;
|
||||
program->dev.sgpr_limit =
|
||||
108; /* includes VCC, which can be treated as s[106-107] on GFX10+ */
|
||||
if (chip_class == GFX10_3)
|
||||
if (gfx_level == GFX10_3)
|
||||
program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 16 : 8;
|
||||
else
|
||||
program->dev.vgpr_alloc_granule = program->wave_size == 32 ? 8 : 4;
|
||||
} else if (program->chip_class >= GFX8) {
|
||||
} else if (program->gfx_level >= GFX8) {
|
||||
program->dev.physical_sgprs = 800;
|
||||
program->dev.sgpr_alloc_granule = 16;
|
||||
program->dev.sgpr_limit = 102;
|
||||
@@ -124,14 +122,14 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
||||
}
|
||||
|
||||
program->dev.max_wave64_per_simd = 10;
|
||||
if (program->chip_class >= GFX10_3)
|
||||
if (program->gfx_level >= GFX10_3)
|
||||
program->dev.max_wave64_per_simd = 16;
|
||||
else if (program->chip_class == GFX10)
|
||||
else if (program->gfx_level == GFX10)
|
||||
program->dev.max_wave64_per_simd = 20;
|
||||
else if (program->family >= CHIP_POLARIS10 && program->family <= CHIP_VEGAM)
|
||||
program->dev.max_wave64_per_simd = 8;
|
||||
|
||||
program->dev.simd_per_cu = program->chip_class >= GFX10 ? 2 : 4;
|
||||
program->dev.simd_per_cu = program->gfx_level >= GFX10 ? 2 : 4;
|
||||
|
||||
switch (program->family) {
|
||||
/* GFX8 APUs */
|
||||
@@ -146,13 +144,13 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
||||
|
||||
program->dev.sram_ecc_enabled = program->family == CHIP_ARCTURUS;
|
||||
/* apparently gfx702 also has fast v_fma_f32 but I can't find a family for that */
|
||||
program->dev.has_fast_fma32 = program->chip_class >= GFX9;
|
||||
program->dev.has_fast_fma32 = program->gfx_level >= GFX9;
|
||||
if (program->family == CHIP_TAHITI || program->family == CHIP_CARRIZO ||
|
||||
program->family == CHIP_HAWAII)
|
||||
program->dev.has_fast_fma32 = true;
|
||||
program->dev.has_mac_legacy32 = program->chip_class <= GFX7 || program->chip_class >= GFX10;
|
||||
program->dev.has_mac_legacy32 = program->gfx_level <= GFX7 || program->gfx_level >= GFX10;
|
||||
|
||||
program->dev.fused_mad_mix = program->chip_class >= GFX10;
|
||||
program->dev.fused_mad_mix = program->gfx_level >= GFX10;
|
||||
if (program->family == CHIP_VEGA12 || program->family == CHIP_VEGA20 ||
|
||||
program->family == CHIP_ARCTURUS || program->family == CHIP_ALDEBARAN)
|
||||
program->dev.fused_mad_mix = true;
|
||||
@@ -190,12 +188,12 @@ get_sync_info(const Instruction* instr)
|
||||
}
|
||||
|
||||
bool
|
||||
can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
||||
can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pre_ra)
|
||||
{
|
||||
if (!instr->isVALU())
|
||||
return false;
|
||||
|
||||
if (chip < GFX8 || instr->isDPP() || instr->isVOP3P())
|
||||
if (gfx_level < GFX8 || instr->isDPP() || instr->isVOP3P())
|
||||
return false;
|
||||
|
||||
if (instr->isSDWA())
|
||||
@@ -205,9 +203,9 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
||||
VOP3_instruction& vop3 = instr->vop3();
|
||||
if (instr->format == Format::VOP3)
|
||||
return false;
|
||||
if (vop3.clamp && instr->isVOPC() && chip != GFX8)
|
||||
if (vop3.clamp && instr->isVOPC() && gfx_level != GFX8)
|
||||
return false;
|
||||
if (vop3.omod && chip < GFX9)
|
||||
if (vop3.omod && gfx_level < GFX9)
|
||||
return false;
|
||||
|
||||
// TODO: return true if we know we will use vcc
|
||||
@@ -217,7 +215,7 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
||||
for (unsigned i = 1; i < instr->operands.size(); i++) {
|
||||
if (instr->operands[i].isLiteral())
|
||||
return false;
|
||||
if (chip < GFX9 && !instr->operands[i].isOfType(RegType::vgpr))
|
||||
if (gfx_level < GFX9 && !instr->operands[i].isOfType(RegType::vgpr))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -228,7 +226,7 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
||||
if (!instr->operands.empty()) {
|
||||
if (instr->operands[0].isLiteral())
|
||||
return false;
|
||||
if (chip < GFX9 && !instr->operands[0].isOfType(RegType::vgpr))
|
||||
if (gfx_level < GFX9 && !instr->operands[0].isOfType(RegType::vgpr))
|
||||
return false;
|
||||
if (instr->operands[0].bytes() > 4)
|
||||
return false;
|
||||
@@ -239,11 +237,11 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
||||
bool is_mac = instr->opcode == aco_opcode::v_mac_f32 || instr->opcode == aco_opcode::v_mac_f16 ||
|
||||
instr->opcode == aco_opcode::v_fmac_f32 || instr->opcode == aco_opcode::v_fmac_f16;
|
||||
|
||||
if (chip != GFX8 && is_mac)
|
||||
if (gfx_level != GFX8 && is_mac)
|
||||
return false;
|
||||
|
||||
// TODO: return true if we know we will use vcc
|
||||
if (!pre_ra && instr->isVOPC() && chip == GFX8)
|
||||
if (!pre_ra && instr->isVOPC() && gfx_level == GFX8)
|
||||
return false;
|
||||
if (!pre_ra && instr->operands.size() >= 3 && !is_mac)
|
||||
return false;
|
||||
@@ -256,7 +254,7 @@ can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra)
|
||||
|
||||
/* updates "instr" and returns the old instruction (or NULL if no update was needed) */
|
||||
aco_ptr<Instruction>
|
||||
convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
|
||||
convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr)
|
||||
{
|
||||
if (instr->isSDWA())
|
||||
return NULL;
|
||||
@@ -289,7 +287,7 @@ convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
|
||||
|
||||
sdwa.dst_sel = SubdwordSel(instr->definitions[0].bytes(), 0, false);
|
||||
|
||||
if (instr->definitions[0].getTemp().type() == RegType::sgpr && chip == GFX8)
|
||||
if (instr->definitions[0].getTemp().type() == RegType::sgpr && gfx_level == GFX8)
|
||||
instr->definitions[0].setFixed(vcc);
|
||||
if (instr->definitions.size() >= 2)
|
||||
instr->definitions[1].setFixed(vcc);
|
||||
@@ -390,10 +388,10 @@ convert_to_DPP(aco_ptr<Instruction>& instr, bool dpp8)
|
||||
}
|
||||
|
||||
bool
|
||||
can_use_opsel(chip_class chip, aco_opcode op, int idx)
|
||||
can_use_opsel(amd_gfx_level gfx_level, aco_opcode op, int idx)
|
||||
{
|
||||
/* opsel is only GFX9+ */
|
||||
if (chip < GFX9)
|
||||
if (gfx_level < GFX9)
|
||||
return false;
|
||||
|
||||
switch (op) {
|
||||
@@ -433,10 +431,10 @@ can_use_opsel(chip_class chip, aco_opcode op, int idx)
|
||||
}
|
||||
|
||||
bool
|
||||
instr_is_16bit(chip_class chip, aco_opcode op)
|
||||
instr_is_16bit(amd_gfx_level gfx_level, aco_opcode op)
|
||||
{
|
||||
/* partial register writes are GFX9+, only */
|
||||
if (chip < GFX9)
|
||||
if (gfx_level < GFX9)
|
||||
return false;
|
||||
|
||||
switch (op) {
|
||||
@@ -451,7 +449,7 @@ instr_is_16bit(chip_class chip, aco_opcode op)
|
||||
/* VOP2 */
|
||||
case aco_opcode::v_mac_f16:
|
||||
case aco_opcode::v_madak_f16:
|
||||
case aco_opcode::v_madmk_f16: return chip >= GFX9;
|
||||
case aco_opcode::v_madmk_f16: return gfx_level >= GFX9;
|
||||
case aco_opcode::v_add_f16:
|
||||
case aco_opcode::v_sub_f16:
|
||||
case aco_opcode::v_subrev_f16:
|
||||
@@ -479,7 +477,7 @@ instr_is_16bit(chip_class chip, aco_opcode op)
|
||||
case aco_opcode::v_rndne_f16:
|
||||
case aco_opcode::v_fract_f16:
|
||||
case aco_opcode::v_sin_f16:
|
||||
case aco_opcode::v_cos_f16: return chip >= GFX10;
|
||||
case aco_opcode::v_cos_f16: return gfx_level >= GFX10;
|
||||
// TODO: confirm whether these write 16 or 32 bit on GFX10+
|
||||
// case aco_opcode::v_cvt_u16_f16:
|
||||
// case aco_opcode::v_cvt_i16_f16:
|
||||
@@ -487,7 +485,7 @@ instr_is_16bit(chip_class chip, aco_opcode op)
|
||||
// case aco_opcode::v_cvt_norm_i16_f16:
|
||||
// case aco_opcode::v_cvt_norm_u16_f16:
|
||||
/* on GFX10, all opsel instructions preserve the high bits */
|
||||
default: return chip >= GFX10 && can_use_opsel(chip, op, -1);
|
||||
default: return gfx_level >= GFX10 && can_use_opsel(gfx_level, op, -1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -760,25 +758,25 @@ wait_imm::wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_)
|
||||
: vm(vm_), exp(exp_), lgkm(lgkm_), vs(vs_)
|
||||
{}
|
||||
|
||||
wait_imm::wait_imm(enum chip_class chip, uint16_t packed) : vs(unset_counter)
|
||||
wait_imm::wait_imm(enum amd_gfx_level gfx_level, uint16_t packed) : vs(unset_counter)
|
||||
{
|
||||
vm = packed & 0xf;
|
||||
if (chip >= GFX9)
|
||||
if (gfx_level >= GFX9)
|
||||
vm |= (packed >> 10) & 0x30;
|
||||
|
||||
exp = (packed >> 4) & 0x7;
|
||||
|
||||
lgkm = (packed >> 8) & 0xf;
|
||||
if (chip >= GFX10)
|
||||
if (gfx_level >= GFX10)
|
||||
lgkm |= (packed >> 8) & 0x30;
|
||||
}
|
||||
|
||||
uint16_t
|
||||
wait_imm::pack(enum chip_class chip) const
|
||||
wait_imm::pack(enum amd_gfx_level gfx_level) const
|
||||
{
|
||||
uint16_t imm = 0;
|
||||
assert(exp == unset_counter || exp <= 0x7);
|
||||
switch (chip) {
|
||||
switch (gfx_level) {
|
||||
case GFX11:
|
||||
assert(lgkm == unset_counter || lgkm <= 0x3f);
|
||||
assert(vm == unset_counter || vm <= 0x3f);
|
||||
@@ -801,10 +799,10 @@ wait_imm::pack(enum chip_class chip) const
|
||||
imm = ((lgkm & 0xf) << 8) | ((exp & 0x7) << 4) | (vm & 0xf);
|
||||
break;
|
||||
}
|
||||
if (chip < GFX9 && vm == wait_imm::unset_counter)
|
||||
if (gfx_level < GFX9 && vm == wait_imm::unset_counter)
|
||||
imm |= 0xc000; /* should have no effect on pre-GFX9 and now we won't have to worry about the
|
||||
architecture when interpreting the immediate */
|
||||
if (chip < GFX10 && lgkm == wait_imm::unset_counter)
|
||||
if (gfx_level < GFX10 && lgkm == wait_imm::unset_counter)
|
||||
imm |= 0x3000; /* should have no effect on pre-GFX10 and now we won't have to worry about the
|
||||
architecture when interpreting the immediate */
|
||||
return imm;
|
||||
|
Reference in New Issue
Block a user