aco,ac/llvm,ac/nir,vtn: unify cube opcodes
fossil-db (navi21): Totals from 17068 (12.79% of 133461) affected shaders: Instrs: 24743703 -> 24743572 (-0.00%); split: -0.00%, +0.00% CodeSize: 132579952 -> 132580620 (+0.00%); split: -0.00%, +0.00% VGPRs: 1227840 -> 1227984 (+0.01%) Latency: 403180114 -> 403251188 (+0.02%); split: -0.00%, +0.02% InvThroughput: 75311302 -> 75320892 (+0.01%); split: -0.00%, +0.01% VClause: 415400 -> 415402 (+0.00%); split: -0.00%, +0.00% Copies: 1715404 -> 1715258 (-0.01%); split: -0.01%, +0.01% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Gert Wollny <gert.wollny@collabora.com> (r600) Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23930>
This commit is contained in:
@@ -219,8 +219,7 @@ lower_alu_instr_width(nir_builder *b, nir_instr *instr, void *_data)
|
||||
case nir_op_vec4:
|
||||
case nir_op_vec3:
|
||||
case nir_op_vec2:
|
||||
case nir_op_cube_face_coord_amd:
|
||||
case nir_op_cube_face_index_amd:
|
||||
case nir_op_cube_amd:
|
||||
/* We don't need to scalarize these ops, they're the ones generated to
|
||||
* group up outputs into a value that can be SSAed.
|
||||
*/
|
||||
|
@@ -536,38 +536,6 @@ for (unsigned bit = 0; bit < bit_size; bit++) {
|
||||
}
|
||||
""")
|
||||
|
||||
# AMD_gcn_shader extended instructions
|
||||
unop_horiz("cube_face_coord_amd", 3, tfloat32, 3, tfloat32, """
|
||||
dst.x = dst.y = dst.z = 0.0;
|
||||
float absX = fabsf(src0.x);
|
||||
float absY = fabsf(src0.y);
|
||||
float absZ = fabsf(src0.z);
|
||||
|
||||
if (absX >= absY && absX >= absZ) { dst.z = 2 * src0.x; }
|
||||
if (absY >= absX && absY >= absZ) { dst.z = 2 * src0.y; }
|
||||
if (absZ >= absX && absZ >= absY) { dst.z = 2 * src0.z; }
|
||||
|
||||
if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.z; dst.y = -src0.y; }
|
||||
if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = src0.z; dst.y = -src0.y; }
|
||||
if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = src0.z; }
|
||||
if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = -src0.z; }
|
||||
if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = src0.x; dst.y = -src0.y; }
|
||||
if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.x; dst.y = -src0.y; }
|
||||
""")
|
||||
|
||||
unop_horiz("cube_face_index_amd", 1, tfloat32, 3, tfloat32, """
|
||||
dst.x = 0.0;
|
||||
float absX = fabsf(src0.x);
|
||||
float absY = fabsf(src0.y);
|
||||
float absZ = fabsf(src0.z);
|
||||
if (src0.x >= 0 && absX >= absY && absX >= absZ) dst.x = 0;
|
||||
if (src0.x < 0 && absX >= absY && absX >= absZ) dst.x = 1;
|
||||
if (src0.y >= 0 && absY >= absX && absY >= absZ) dst.x = 2;
|
||||
if (src0.y < 0 && absY >= absX && absY >= absZ) dst.x = 3;
|
||||
if (src0.z >= 0 && absZ >= absX && absZ >= absY) dst.x = 4;
|
||||
if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5;
|
||||
""")
|
||||
|
||||
unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}",
|
||||
description = "Sum of vector components")
|
||||
|
||||
@@ -1267,11 +1235,11 @@ dst = ((((src0 & 0xffff0000) >> 16) * (src1 & 0x0000ffff)) << 16) + src2;
|
||||
triop("imad24_ir3", tint32, _2src_commutative,
|
||||
"(((int32_t)src0 << 8) >> 8) * (((int32_t)src1 << 8) >> 8) + src2")
|
||||
|
||||
# r600-specific instruction that evaluates unnormalized cube texture coordinates
|
||||
# r600/gcn specific instruction that evaluates unnormalized cube texture coordinates
|
||||
# and face index
|
||||
# The actual texture coordinates are evaluated from this according to
|
||||
# dst.yx / abs(dst.z) + 1.5
|
||||
unop_horiz("cube_r600", 4, tfloat32, 3, tfloat32, """
|
||||
unop_horiz("cube_amd", 4, tfloat32, 3, tfloat32, """
|
||||
dst.x = dst.y = dst.z = 0.0;
|
||||
float absX = fabsf(src0.x);
|
||||
float absY = fabsf(src0.y);
|
||||
|
@@ -33,11 +33,11 @@ vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, SpvOp ext_opcode,
|
||||
nir_ssa_def *def;
|
||||
switch ((enum GcnShaderAMD)ext_opcode) {
|
||||
case CubeFaceIndexAMD:
|
||||
def = nir_cube_face_index_amd(&b->nb, vtn_get_nir_ssa(b, w[5]));
|
||||
def = nir_channel(&b->nb, nir_cube_amd(&b->nb, vtn_get_nir_ssa(b, w[5])), 3);
|
||||
break;
|
||||
case CubeFaceCoordAMD: {
|
||||
def = nir_cube_face_coord_amd(&b->nb, vtn_get_nir_ssa(b, w[5]));
|
||||
nir_ssa_def *st = nir_trim_vector(&b->nb, def, 2);
|
||||
def = nir_cube_amd(&b->nb, vtn_get_nir_ssa(b, w[5]));
|
||||
nir_ssa_def *st = nir_swizzle(&b->nb, def, (unsigned[]){1, 0}, 2);
|
||||
nir_ssa_def *invma = nir_frcp(&b->nb, nir_channel(&b->nb, def, 2));
|
||||
def = nir_ffma_imm2(&b->nb, st, invma, 0.5);
|
||||
break;
|
||||
|
Reference in New Issue
Block a user