v3d: Limit shader threading according to our maximum TMU fifo usage.
Fixes simulator assertion failures in dEQP-GLES3.functional.shaders.texture_functions.texture.samplercubeshadow_bias_fragment and similar complicated cases.
This commit is contained in:
@@ -31,9 +31,12 @@
|
||||
#include "cle/v3d_packet_v41_pack.h"
|
||||
|
||||
static void
|
||||
vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val)
|
||||
vir_TMU_WRITE(struct v3d_compile *c, enum v3d_qpu_waddr waddr, struct qreg val,
|
||||
int *tmu_writes)
|
||||
{
|
||||
vir_MOV_dest(c, vir_reg(QFILE_MAGIC, waddr), val);
|
||||
|
||||
(*tmu_writes)++;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -49,6 +52,7 @@ void
|
||||
v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
||||
{
|
||||
unsigned unit = instr->texture_index;
|
||||
int tmu_writes = 0;
|
||||
|
||||
struct V3D41_TMU_CONFIG_PARAMETER_0 p0_unpacked = {
|
||||
};
|
||||
@@ -82,29 +86,32 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
||||
if (non_array_components > 1) {
|
||||
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUT,
|
||||
ntq_get_src(c, instr->src[i].src,
|
||||
1));
|
||||
1), &tmu_writes);
|
||||
}
|
||||
if (non_array_components > 2) {
|
||||
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUR,
|
||||
ntq_get_src(c, instr->src[i].src,
|
||||
2));
|
||||
2), &tmu_writes);
|
||||
}
|
||||
|
||||
if (instr->is_array) {
|
||||
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUI,
|
||||
ntq_get_src(c, instr->src[i].src,
|
||||
instr->coord_components - 1));
|
||||
instr->coord_components - 1),
|
||||
&tmu_writes);
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_tex_src_bias:
|
||||
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUB,
|
||||
ntq_get_src(c, instr->src[i].src, 0));
|
||||
ntq_get_src(c, instr->src[i].src, 0),
|
||||
&tmu_writes);
|
||||
break;
|
||||
|
||||
case nir_tex_src_lod:
|
||||
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUB,
|
||||
ntq_get_src(c, instr->src[i].src, 0));
|
||||
ntq_get_src(c, instr->src[i].src, 0),
|
||||
&tmu_writes);
|
||||
|
||||
if (instr->op != nir_texop_txf &&
|
||||
instr->op != nir_texop_tg4) {
|
||||
@@ -114,7 +121,8 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
||||
|
||||
case nir_tex_src_comparator:
|
||||
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUDREF,
|
||||
ntq_get_src(c, instr->src[i].src, 0));
|
||||
ntq_get_src(c, instr->src[i].src, 0),
|
||||
&tmu_writes);
|
||||
break;
|
||||
|
||||
case nir_tex_src_offset: {
|
||||
@@ -173,15 +181,21 @@ v3d40_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
|
||||
|
||||
if (instr->op == nir_texop_txf) {
|
||||
assert(instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
|
||||
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSF, s);
|
||||
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSF, s, &tmu_writes);
|
||||
} else if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
|
||||
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSCM, s);
|
||||
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUSCM, s, &tmu_writes);
|
||||
} else {
|
||||
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUS, s);
|
||||
vir_TMU_WRITE(c, V3D_QPU_WADDR_TMUS, s, &tmu_writes);
|
||||
}
|
||||
|
||||
vir_emit_thrsw(c);
|
||||
|
||||
/* The input FIFO has 16 slots across all threads, so make sure we
|
||||
* don't overfill our allocation.
|
||||
*/
|
||||
while (tmu_writes > 16 / c->threads)
|
||||
c->threads /= 2;
|
||||
|
||||
struct qreg return_values[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
/* Swizzling .zw of an RG texture should give undefined
|
||||
|
Reference in New Issue
Block a user