radv,radeonsi: use ac_nir_lower_tex
fossil-db (navi21): Totals from 17279 (12.74% of 135636) affected shaders: MaxWaves: 270015 -> 269991 (-0.01%) Instrs: 24847385 -> 24843807 (-0.01%); split: -0.02%, +0.00% CodeSize: 133215364 -> 133198744 (-0.01%); split: -0.02%, +0.01% VGPRs: 1217632 -> 1217872 (+0.02%); split: -0.00%, +0.02% Latency: 405347021 -> 404971784 (-0.09%); split: -0.09%, +0.00% InvThroughput: 75386590 -> 75350344 (-0.05%); split: -0.07%, +0.03% VClause: 426986 -> 426821 (-0.04%); split: -0.04%, +0.01% SClause: 966751 -> 966971 (+0.02%); split: -0.01%, +0.03% Copies: 1738510 -> 1737970 (-0.03%); split: -0.08%, +0.05% PreSGPRs: 1169070 -> 1169120 (+0.00%); split: -0.00%, +0.00% PreVGPRs: 1136102 -> 1136183 (+0.01%); split: -0.00%, +0.01% Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22636>
This commit is contained in:
@@ -9059,119 +9059,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
build_cube_select(isel_context* ctx, Temp ma, Temp id, Temp deriv, Temp* out_ma, Temp* out_sc,
|
||||
Temp* out_tc)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
Temp deriv_x = emit_extract_vector(ctx, deriv, 0, v1);
|
||||
Temp deriv_y = emit_extract_vector(ctx, deriv, 1, v1);
|
||||
Temp deriv_z = emit_extract_vector(ctx, deriv, 2, v1);
|
||||
|
||||
Operand neg_one = Operand::c32(0xbf800000u);
|
||||
Operand one = Operand::c32(0x3f800000u);
|
||||
Operand two = Operand::c32(0x40000000u);
|
||||
Operand four = Operand::c32(0x40800000u);
|
||||
|
||||
Temp is_ma_positive = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), Operand::zero(), ma);
|
||||
Temp sgn_ma = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, one, is_ma_positive);
|
||||
Temp neg_sgn_ma = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand::zero(), sgn_ma);
|
||||
|
||||
Temp is_ma_z = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), four, id);
|
||||
Temp is_ma_y = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), two, id);
|
||||
is_ma_y = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), is_ma_y, is_ma_z);
|
||||
Temp is_not_ma_x = bld.sop2(Builder::s_or, bld.def(bld.lm), bld.def(s1, scc), is_ma_z, is_ma_y);
|
||||
|
||||
/* select sc */
|
||||
Temp tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_z, deriv_x, is_not_ma_x);
|
||||
Temp sgn = bld.vop2_e64(
|
||||
aco_opcode::v_cndmask_b32, bld.def(v1),
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), neg_sgn_ma, sgn_ma, is_ma_z), one, is_ma_y);
|
||||
*out_sc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tmp, sgn);
|
||||
|
||||
/* select tc */
|
||||
tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_y, deriv_z, is_ma_y);
|
||||
sgn = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, sgn_ma, is_ma_y);
|
||||
*out_tc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tmp, sgn);
|
||||
|
||||
/* select ma */
|
||||
tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1),
|
||||
bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_x, deriv_y, is_ma_y),
|
||||
deriv_z, is_ma_z);
|
||||
tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffffu), tmp);
|
||||
*out_ma = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), two, tmp);
|
||||
}
|
||||
|
||||
void
|
||||
prepare_cube_coords(isel_context* ctx, std::vector<Temp>& coords, Temp* ddx, Temp* ddy,
|
||||
bool is_deriv, bool is_array)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp ma, tc, sc, id;
|
||||
aco_opcode madak =
|
||||
ctx->program->gfx_level >= GFX10_3 ? aco_opcode::v_fmaak_f32 : aco_opcode::v_madak_f32;
|
||||
aco_opcode madmk =
|
||||
ctx->program->gfx_level >= GFX10_3 ? aco_opcode::v_fmamk_f32 : aco_opcode::v_madmk_f32;
|
||||
|
||||
/* see comment in ac_prepare_cube_coords() */
|
||||
if (is_array && ctx->options->gfx_level <= GFX8)
|
||||
coords[3] = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::zero(), coords[3]);
|
||||
|
||||
ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]);
|
||||
|
||||
aco_ptr<VALU_instruction> vop3a{
|
||||
create_instruction<VALU_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
|
||||
vop3a->operands[0] = Operand(ma);
|
||||
vop3a->abs[0] = true;
|
||||
Temp invma = bld.tmp(v1);
|
||||
vop3a->definitions[0] = Definition(invma);
|
||||
ctx->block->instructions.emplace_back(std::move(vop3a));
|
||||
|
||||
sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
|
||||
if (!is_deriv)
|
||||
sc = bld.vop2(madak, bld.def(v1), sc, invma, Operand::c32(0x3fc00000u /*1.5*/));
|
||||
|
||||
tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), coords[0], coords[1], coords[2]);
|
||||
if (!is_deriv)
|
||||
tc = bld.vop2(madak, bld.def(v1), tc, invma, Operand::c32(0x3fc00000u /*1.5*/));
|
||||
|
||||
id = bld.vop3(aco_opcode::v_cubeid_f32, bld.def(v1), coords[0], coords[1], coords[2]);
|
||||
|
||||
if (is_deriv) {
|
||||
sc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, invma);
|
||||
tc = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, invma);
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
/* see comment in ac_prepare_cube_coords() */
|
||||
Temp deriv_ma;
|
||||
Temp deriv_sc, deriv_tc;
|
||||
build_cube_select(ctx, ma, id, i ? *ddy : *ddx, &deriv_ma, &deriv_sc, &deriv_tc);
|
||||
|
||||
deriv_ma = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, invma);
|
||||
|
||||
Temp x = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1),
|
||||
bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_sc, invma),
|
||||
bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, sc));
|
||||
Temp y = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1),
|
||||
bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_tc, invma),
|
||||
bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), deriv_ma, tc));
|
||||
*(i ? ddy : ddx) = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), x, y);
|
||||
}
|
||||
|
||||
sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3fc00000u /*1.5*/), sc);
|
||||
tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3fc00000u /*1.5*/), tc);
|
||||
}
|
||||
|
||||
if (is_array) {
|
||||
id = bld.vop2(madmk, bld.def(v1), coords[3], id, Operand::c32(0x41000000u /*8.0*/));
|
||||
coords.erase(coords.begin() + 3);
|
||||
}
|
||||
coords[0] = sc;
|
||||
coords[1] = tc;
|
||||
coords[2] = id;
|
||||
}
|
||||
|
||||
void
|
||||
get_const_vec(nir_ssa_def* vec, nir_const_value* cv[4])
|
||||
{
|
||||
@@ -9363,25 +9250,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
||||
}
|
||||
|
||||
std::vector<Temp> unpacked_coord;
|
||||
if (ctx->options->gfx_level == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
|
||||
instr->coord_components) {
|
||||
RegClass rc = a16 ? v2b : v1;
|
||||
for (unsigned i = 0; i < coord.bytes() / rc.bytes(); i++)
|
||||
unpacked_coord.emplace_back(emit_extract_vector(ctx, coord, i, rc));
|
||||
|
||||
assert(unpacked_coord.size() > 0 && unpacked_coord.size() < 3);
|
||||
|
||||
Operand coord2d;
|
||||
/* 0.5 for floating point coords, 0 for integer. */
|
||||
if (a16)
|
||||
coord2d = instr->op == nir_texop_txf ? Operand::c16(0) : Operand::c16(0x3800);
|
||||
else
|
||||
coord2d = instr->op == nir_texop_txf ? Operand::c32(0) : Operand::c32(0x3f000000);
|
||||
unpacked_coord.insert(std::next(unpacked_coord.begin()), bld.copy(bld.def(rc), coord2d));
|
||||
} else if (coord != Temp()) {
|
||||
if (coord != Temp())
|
||||
unpacked_coord.push_back(coord);
|
||||
}
|
||||
|
||||
if (has_sample_index)
|
||||
unpacked_coord.push_back(sample_index);
|
||||
if (has_lod)
|
||||
@@ -9391,25 +9261,14 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
|
||||
|
||||
coords = emit_pack_v1(ctx, unpacked_coord);
|
||||
|
||||
assert(instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE || !a16);
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->coord_components)
|
||||
prepare_cube_coords(ctx, coords, &ddx, &ddy, instr->op == nir_texop_txd,
|
||||
instr->is_array && instr->op != nir_texop_lod);
|
||||
|
||||
/* pack derivatives */
|
||||
if (has_ddx || has_ddy) {
|
||||
RegClass rc = g16 ? v2b : v1;
|
||||
assert(a16 == g16 || ctx->options->gfx_level >= GFX10);
|
||||
std::array<Temp, 2> ddxddy = {ddx, ddy};
|
||||
for (Temp tmp : ddxddy) {
|
||||
if (tmp == Temp())
|
||||
continue;
|
||||
std::vector<Temp> unpacked = {tmp};
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && ctx->options->gfx_level == GFX9) {
|
||||
assert(has_ddx && has_ddy);
|
||||
Temp zero = bld.copy(bld.def(rc), Operand::zero(rc.bytes()));
|
||||
unpacked.push_back(zero);
|
||||
}
|
||||
for (Temp derv : emit_pack_v1(ctx, unpacked))
|
||||
derivs.push_back(derv);
|
||||
}
|
||||
|
@@ -754,190 +754,6 @@ LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, LLVMV
|
||||
return LLVMBuildLShr(builder, num, post_shift, "");
|
||||
}
|
||||
|
||||
/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
|
||||
* of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
|
||||
* already multiplied by two. id is the cube face number.
|
||||
*/
|
||||
struct cube_selection_coords {
|
||||
LLVMValueRef stc[2];
|
||||
LLVMValueRef ma;
|
||||
LLVMValueRef id;
|
||||
};
|
||||
|
||||
static void build_cube_intrinsic(struct ac_llvm_context *ctx, LLVMValueRef in[3],
|
||||
struct cube_selection_coords *out)
|
||||
{
|
||||
LLVMTypeRef f32 = ctx->f32;
|
||||
|
||||
out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", f32, in, 3, 0);
|
||||
out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", f32, in, 3, 0);
|
||||
out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", f32, in, 3, 0);
|
||||
out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", f32, in, 3, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a manual selection sequence for cube face sc/tc coordinates and
|
||||
* major axis vector (multiplied by 2 for consistency) for the given
|
||||
* vec3 \p coords, for the face implied by \p selcoords.
|
||||
*
|
||||
* For the major axis, we always adjust the sign to be in the direction of
|
||||
* selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
|
||||
* the selcoords major axis.
|
||||
*/
|
||||
static void build_cube_select(struct ac_llvm_context *ctx,
|
||||
const struct cube_selection_coords *selcoords,
|
||||
const LLVMValueRef *coords, LLVMValueRef *out_st,
|
||||
LLVMValueRef *out_ma)
|
||||
{
|
||||
LLVMBuilderRef builder = ctx->builder;
|
||||
LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
|
||||
LLVMValueRef is_ma_positive;
|
||||
LLVMValueRef sgn_ma;
|
||||
LLVMValueRef is_ma_z, is_not_ma_z;
|
||||
LLVMValueRef is_ma_y;
|
||||
LLVMValueRef is_ma_x;
|
||||
LLVMValueRef sgn;
|
||||
LLVMValueRef tmp;
|
||||
|
||||
is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->ma, LLVMConstReal(f32, 0.0), "");
|
||||
sgn_ma = LLVMBuildSelect(builder, is_ma_positive, LLVMConstReal(f32, 1.0),
|
||||
LLVMConstReal(f32, -1.0), "");
|
||||
|
||||
is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
|
||||
is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
|
||||
is_ma_y = LLVMBuildAnd(
|
||||
builder, is_not_ma_z,
|
||||
LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
|
||||
is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
|
||||
|
||||
/* Select sc */
|
||||
tmp = LLVMBuildSelect(builder, is_ma_x, coords[2], coords[0], "");
|
||||
sgn = LLVMBuildSelect(
|
||||
builder, is_ma_y, LLVMConstReal(f32, 1.0),
|
||||
LLVMBuildSelect(builder, is_ma_z, sgn_ma, LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
|
||||
out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
|
||||
|
||||
/* Select tc */
|
||||
tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
|
||||
sgn = LLVMBuildSelect(builder, is_ma_y, sgn_ma, LLVMConstReal(f32, -1.0), "");
|
||||
out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
|
||||
|
||||
/* Select ma */
|
||||
tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
|
||||
LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
|
||||
tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &tmp, 1, 0);
|
||||
*out_ma = LLVMBuildFMul(builder, tmp, LLVMConstReal(f32, 2.0), "");
|
||||
}
|
||||
|
||||
void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod,
|
||||
LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg)
|
||||
{
|
||||
|
||||
LLVMBuilderRef builder = ctx->builder;
|
||||
struct cube_selection_coords selcoords;
|
||||
LLVMValueRef coords[3];
|
||||
LLVMValueRef invma;
|
||||
|
||||
if (is_array && !is_lod) {
|
||||
LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);
|
||||
|
||||
/* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
|
||||
*
|
||||
* "For Array forms, the array layer used will be
|
||||
*
|
||||
* max(0, min(d−1, floor(layer+0.5)))
|
||||
*
|
||||
* where d is the depth of the texture array and layer
|
||||
* comes from the component indicated in the tables below.
|
||||
* Workaround for an issue where the layer is taken from a
|
||||
* helper invocation which happens to fall on a different
|
||||
* layer due to extrapolation."
|
||||
*
|
||||
* GFX8 and earlier attempt to implement this in hardware by
|
||||
* clamping the value of coords[2] = (8 * layer) + face.
|
||||
* Unfortunately, this means that the we end up with the wrong
|
||||
* face when clamping occurs.
|
||||
*
|
||||
* Clamp the layer earlier to work around the issue.
|
||||
*/
|
||||
if (ctx->gfx_level <= GFX8) {
|
||||
LLVMValueRef ge0;
|
||||
ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
|
||||
tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
|
||||
}
|
||||
|
||||
coords_arg[3] = tmp;
|
||||
}
|
||||
|
||||
build_cube_intrinsic(ctx, coords_arg, &selcoords);
|
||||
|
||||
invma =
|
||||
ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &selcoords.ma, 1, 0);
|
||||
invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
|
||||
|
||||
for (int i = 0; i < 2; ++i)
|
||||
coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
|
||||
|
||||
coords[2] = selcoords.id;
|
||||
|
||||
if (is_deriv && derivs_arg) {
|
||||
LLVMValueRef derivs[4];
|
||||
int axis;
|
||||
|
||||
/* Convert cube derivatives to 2D derivatives. */
|
||||
for (axis = 0; axis < 2; axis++) {
|
||||
LLVMValueRef deriv_st[2];
|
||||
LLVMValueRef deriv_ma;
|
||||
|
||||
/* Transform the derivative alongside the texture
|
||||
* coordinate. Mathematically, the correct formula is
|
||||
* as follows. Assume we're projecting onto the +Z face
|
||||
* and denote by dx/dh the derivative of the (original)
|
||||
* X texture coordinate with respect to horizontal
|
||||
* window coordinates. The projection onto the +Z face
|
||||
* plane is:
|
||||
*
|
||||
* f(x,z) = x/z
|
||||
*
|
||||
* Then df/dh = df/dx * dx/dh + df/dz * dz/dh
|
||||
* = 1/z * dx/dh - x/z * 1/z * dz/dh.
|
||||
*
|
||||
* This motivatives the implementation below.
|
||||
*
|
||||
* Whether this actually gives the expected results for
|
||||
* apps that might feed in derivatives obtained via
|
||||
* finite differences is anyone's guess. The OpenGL spec
|
||||
* seems awfully quiet about how textureGrad for cube
|
||||
* maps should be handled.
|
||||
*/
|
||||
build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3], deriv_st, &deriv_ma);
|
||||
|
||||
deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
|
||||
|
||||
for (int i = 0; i < 2; ++i)
|
||||
derivs[axis * 2 + i] =
|
||||
LLVMBuildFSub(builder, LLVMBuildFMul(builder, deriv_st[i], invma, ""),
|
||||
LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
|
||||
}
|
||||
|
||||
memcpy(derivs_arg, derivs, sizeof(derivs));
|
||||
}
|
||||
|
||||
/* Shift the texture coordinate. This must be applied after the
|
||||
* derivative calculation.
|
||||
*/
|
||||
for (int i = 0; i < 2; ++i)
|
||||
coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
|
||||
|
||||
if (is_array) {
|
||||
/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
|
||||
/* coords_arg.w component - array_index for cube arrays */
|
||||
coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]);
|
||||
}
|
||||
|
||||
memcpy(coords_arg, coords, sizeof(coords));
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan,
|
||||
LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i,
|
||||
LLVMValueRef j)
|
||||
|
@@ -224,9 +224,6 @@ LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, LLVMValueRef nu
|
||||
LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, LLVMValueRef num,
|
||||
LLVMValueRef multiplier, LLVMValueRef post_shift);
|
||||
|
||||
void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod,
|
||||
LLVMValueRef *coords_arg, LLVMValueRef *derivs_arg);
|
||||
|
||||
LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan,
|
||||
LLVMValueRef attr_number, LLVMValueRef params, LLVMValueRef i,
|
||||
LLVMValueRef j);
|
||||
|
@@ -1540,13 +1540,6 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_te
|
||||
return lower_gather4_integer(&ctx->ac, args, instr);
|
||||
}
|
||||
|
||||
/* Fixup for GFX9 which allocates 1D textures as 2D. */
|
||||
if (instr->op == nir_texop_lod && ctx->ac.gfx_level == GFX9) {
|
||||
if ((args->dim == ac_image_2darray || args->dim == ac_image_2d) && !args->coords[1]) {
|
||||
args->coords[1] = ctx->ac.i32_0;
|
||||
}
|
||||
}
|
||||
|
||||
args->attributes = AC_ATTR_INVARIANT_LOAD;
|
||||
bool cs_derivs =
|
||||
ctx->stage == MESA_SHADER_COMPUTE && ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
|
||||
@@ -4242,61 +4235,26 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
||||
|
||||
/* pack derivatives */
|
||||
if (ddx || ddy) {
|
||||
int num_src_deriv_channels, num_dest_deriv_channels;
|
||||
int num_deriv_channels;
|
||||
switch (instr->sampler_dim) {
|
||||
case GLSL_SAMPLER_DIM_3D:
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
num_src_deriv_channels = 3;
|
||||
num_dest_deriv_channels = 3;
|
||||
num_deriv_channels = 3;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
default:
|
||||
num_src_deriv_channels = 2;
|
||||
num_dest_deriv_channels = 2;
|
||||
num_deriv_channels = 2;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
num_src_deriv_channels = 1;
|
||||
if (ctx->ac.gfx_level == GFX9) {
|
||||
num_dest_deriv_channels = 2;
|
||||
} else {
|
||||
num_dest_deriv_channels = 1;
|
||||
}
|
||||
num_deriv_channels = ctx->ac.gfx_level == GFX9 ? 2 : 1;
|
||||
break;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < num_src_deriv_channels; i++) {
|
||||
for (unsigned i = 0; i < num_deriv_channels; i++) {
|
||||
args.derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
|
||||
args.derivs[num_dest_deriv_channels + i] =
|
||||
args.derivs[num_deriv_channels + i] =
|
||||
ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
|
||||
}
|
||||
for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
|
||||
LLVMValueRef zero = args.g16 ? ctx->ac.f16_0 : ctx->ac.f32_0;
|
||||
args.derivs[i] = zero;
|
||||
args.derivs[num_dest_deriv_channels + i] = zero;
|
||||
}
|
||||
}
|
||||
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
|
||||
for (unsigned chan = 0; chan < instr->coord_components; chan++)
|
||||
args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
|
||||
if (instr->coord_components == 3)
|
||||
args.coords[3] = LLVMGetUndef(args.a16 ? ctx->ac.f16 : ctx->ac.f32);
|
||||
ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array,
|
||||
instr->op == nir_texop_lod, args.coords, args.derivs);
|
||||
}
|
||||
|
||||
/* Texture coordinates fixups */
|
||||
if (ctx->ac.gfx_level == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
|
||||
instr->op != nir_texop_lod) {
|
||||
LLVMValueRef filler;
|
||||
if (instr->op == nir_texop_txf)
|
||||
filler = args.a16 ? ctx->ac.i16_0 : ctx->ac.i32_0;
|
||||
else
|
||||
filler = LLVMConstReal(args.a16 ? ctx->ac.f16 : ctx->ac.f32, 0.5);
|
||||
|
||||
if (instr->is_array)
|
||||
args.coords[2] = args.coords[1];
|
||||
args.coords[1] = filler;
|
||||
}
|
||||
|
||||
/* Pack sample index */
|
||||
|
@@ -530,6 +530,13 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_pipeline_layo
|
||||
if (progress)
|
||||
nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
|
||||
|
||||
NIR_PASS(
|
||||
_, stage->nir, ac_nir_lower_tex,
|
||||
&(ac_nir_lower_tex_options){
|
||||
.gfx_level = gfx_level,
|
||||
.lower_array_layer_round_even = !device->physical_device->rad_info.conformant_trunc_coord,
|
||||
});
|
||||
|
||||
if (stage->nir->info.uses_resource_info_query)
|
||||
NIR_PASS(_, stage->nir, ac_nir_lower_resinfo, gfx_level);
|
||||
|
||||
|
@@ -613,7 +613,6 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_
|
||||
.lower_to_fragment_fetch_amd = device->physical_device->use_fmask,
|
||||
.lower_lod_zero_width = true,
|
||||
.lower_invalid_implicit_lod = true,
|
||||
.lower_array_layer_round_even = !device->physical_device->rad_info.conformant_trunc_coord,
|
||||
};
|
||||
|
||||
NIR_PASS(_, nir, nir_lower_tex, &tex_options);
|
||||
|
@@ -2063,6 +2063,13 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader,
|
||||
if (sel->stage <= MESA_SHADER_GEOMETRY)
|
||||
NIR_PASS(progress, nir, si_nir_kill_outputs, key);
|
||||
|
||||
NIR_PASS(
|
||||
_, nir, ac_nir_lower_tex,
|
||||
&(ac_nir_lower_tex_options){
|
||||
.gfx_level = sel->screen->info.gfx_level,
|
||||
.lower_array_layer_round_even = !sel->screen->info.conformant_trunc_coord,
|
||||
});
|
||||
|
||||
if (nir->info.uses_resource_info_query)
|
||||
NIR_PASS(progress, nir, ac_nir_lower_resinfo, sel->screen->info.gfx_level);
|
||||
|
||||
|
@@ -277,7 +277,6 @@ static void si_lower_nir(struct si_screen *sscreen, struct nir_shader *nir)
|
||||
.lower_invalid_implicit_lod = true,
|
||||
.lower_tg4_offsets = true,
|
||||
.lower_to_fragment_fetch_amd = sscreen->info.gfx_level < GFX11,
|
||||
.lower_array_layer_round_even = !sscreen->info.conformant_trunc_coord,
|
||||
};
|
||||
NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
|
||||
|
||||
|
Reference in New Issue
Block a user