v3d: Use combined input/output segments.
The HW apparently has some issues (or at least a much more complicated VCM calculation) with non-combined segments, and the closed source driver also uses combined I/O. Until I get the last CTS failure resolved (which does look plausibly like some VPM stomping), let's use combined I/O too.
This commit is contained in:
@@ -280,6 +280,11 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
|
|||||||
const struct v3d_device_info *devinfo = state->devinfo;
|
const struct v3d_device_info *devinfo = state->devinfo;
|
||||||
struct qinst *qinst = n->inst;
|
struct qinst *qinst = n->inst;
|
||||||
struct v3d_qpu_instr *inst = &qinst->qpu;
|
struct v3d_qpu_instr *inst = &qinst->qpu;
|
||||||
|
/* If the input and output segments are shared, then all VPM reads to
|
||||||
|
* a location need to happen before all writes. We handle this by
|
||||||
|
* serializing all VPM operations for now.
|
||||||
|
*/
|
||||||
|
bool separate_vpm_segment = false;
|
||||||
|
|
||||||
if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
|
if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
|
||||||
if (inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS)
|
if (inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS)
|
||||||
@@ -321,6 +326,14 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
|
|||||||
add_write_dep(state, &state->last_vpm, n);
|
add_write_dep(state, &state->last_vpm, n);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case V3D_QPU_A_LDVPMV_IN:
|
||||||
|
case V3D_QPU_A_LDVPMD_IN:
|
||||||
|
case V3D_QPU_A_LDVPMG_IN:
|
||||||
|
case V3D_QPU_A_LDVPMP:
|
||||||
|
if (!separate_vpm_segment)
|
||||||
|
add_write_dep(state, &state->last_vpm, n);
|
||||||
|
break;
|
||||||
|
|
||||||
case V3D_QPU_A_VPMWT:
|
case V3D_QPU_A_VPMWT:
|
||||||
add_read_dep(state, state->last_vpm, n);
|
add_read_dep(state, state->last_vpm, n);
|
||||||
break;
|
break;
|
||||||
@@ -414,9 +427,16 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n)
|
|||||||
if (inst->sig.ldtlb | inst->sig.ldtlbu)
|
if (inst->sig.ldtlb | inst->sig.ldtlbu)
|
||||||
add_read_dep(state, state->last_tlb, n);
|
add_read_dep(state, state->last_tlb, n);
|
||||||
|
|
||||||
if (inst->sig.ldvpm)
|
if (inst->sig.ldvpm) {
|
||||||
add_write_dep(state, &state->last_vpm_read, n);
|
add_write_dep(state, &state->last_vpm_read, n);
|
||||||
|
|
||||||
|
/* At least for now, we're doing shared I/O segments, so queue
|
||||||
|
* all writes after all reads.
|
||||||
|
*/
|
||||||
|
if (!separate_vpm_segment)
|
||||||
|
add_write_dep(state, &state->last_vpm, n);
|
||||||
|
}
|
||||||
|
|
||||||
/* inst->sig.ldunif or sideband uniform read */
|
/* inst->sig.ldunif or sideband uniform read */
|
||||||
if (qinst->uniform != ~0)
|
if (qinst->uniform != ~0)
|
||||||
add_write_dep(state, &state->last_unif, n);
|
add_write_dep(state, &state->last_unif, n);
|
||||||
|
@@ -649,6 +649,11 @@ struct v3d_vs_prog_data {
|
|||||||
/* Total number of components written, for the shader state record. */
|
/* Total number of components written, for the shader state record. */
|
||||||
uint32_t vpm_output_size;
|
uint32_t vpm_output_size;
|
||||||
|
|
||||||
|
/* Set if there should be separate VPM segments for input and output.
|
||||||
|
* If unset, vpm_input_size will be 0.
|
||||||
|
*/
|
||||||
|
bool separate_segments;
|
||||||
|
|
||||||
/* Value to be programmed in VCM_CACHE_SIZE. */
|
/* Value to be programmed in VCM_CACHE_SIZE. */
|
||||||
uint8_t vcm_cache_size;
|
uint8_t vcm_cache_size;
|
||||||
};
|
};
|
||||||
|
@@ -789,6 +789,14 @@ uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
|
|||||||
prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
|
prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
|
||||||
prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
|
prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
|
||||||
|
|
||||||
|
/* Set us up for shared input/output segments. This is apparently
|
||||||
|
* necessary for our VCM setup to avoid varying corruption.
|
||||||
|
*/
|
||||||
|
prog_data->separate_segments = false;
|
||||||
|
prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size,
|
||||||
|
prog_data->vpm_input_size);
|
||||||
|
prog_data->vpm_input_size = 0;
|
||||||
|
|
||||||
/* Compute VCM cache size. We set up our program to take up less than
|
/* Compute VCM cache size. We set up our program to take up less than
|
||||||
* half of the VPM, so that any set of bin and render programs won't
|
* half of the VPM, so that any set of bin and render programs won't
|
||||||
* run out of space. We need space for at least one input segment,
|
* run out of space. We need space for at least one input segment,
|
||||||
|
@@ -201,12 +201,15 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
|||||||
/* XXX: Use combined input/output size flag in the common
|
/* XXX: Use combined input/output size flag in the common
|
||||||
* case.
|
* case.
|
||||||
*/
|
*/
|
||||||
shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true;
|
shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
|
||||||
shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true;
|
v3d->prog.cs->prog_data.vs->separate_segments;
|
||||||
|
shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
|
||||||
|
v3d->prog.vs->prog_data.vs->separate_segments;
|
||||||
|
|
||||||
shader.coordinate_shader_input_vpm_segment_size =
|
shader.coordinate_shader_input_vpm_segment_size =
|
||||||
MAX2(v3d->prog.cs->prog_data.vs->vpm_input_size, 1);
|
v3d->prog.cs->prog_data.vs->vpm_input_size;
|
||||||
shader.vertex_shader_input_vpm_segment_size =
|
shader.vertex_shader_input_vpm_segment_size =
|
||||||
MAX2(v3d->prog.vs->prog_data.vs->vpm_input_size, 1);
|
v3d->prog.vs->prog_data.vs->vpm_input_size;
|
||||||
|
|
||||||
shader.coordinate_shader_output_vpm_segment_size =
|
shader.coordinate_shader_output_vpm_segment_size =
|
||||||
v3d->prog.cs->prog_data.vs->vpm_output_size;
|
v3d->prog.cs->prog_data.vs->vpm_output_size;
|
||||||
|
Reference in New Issue
Block a user