intel/fs: Rework zero-length URB write handling
Originally we tried to handle this case based on slots_valid. However, there are a number of ways that this can go wrong. For one, we throw away any trailing slots which either aren't written or are set to VARYING_SLOT_PAD. Second, even if PSIZ is a valid slot, we may not actually write anything there. Between the lot of these, it was possible to end up in a case where we tried to do a regular URB write but ended up with a length of 1 which is invalid. This commit moves it to the end and makes it based on a new boolean flag urb_written. Reviewed-by: Iago Toral Quiroga <itoral@igalia.com> Cc: mesa-stable@lists.freedesktop.org
This commit is contained in:
@@ -566,34 +566,6 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||
else
|
||||
urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
|
||||
|
||||
/* If we don't have any valid slots to write, just do a minimal urb write
|
||||
* send to terminate the shader. This includes 1 slot of undefined data,
|
||||
* because it's invalid to write 0 data:
|
||||
*
|
||||
* From the Broadwell PRM, Volume 7: 3D Media GPGPU, Shared Functions -
|
||||
* Unified Return Buffer (URB) > URB_SIMD8_Write and URB_SIMD8_Read >
|
||||
* Write Data Payload:
|
||||
*
|
||||
* "The write data payload can be between 1 and 8 message phases long."
|
||||
*/
|
||||
if (vue_map->slots_valid == 0) {
|
||||
/* For GS, just turn EmitVertex() into a no-op. We don't want it to
|
||||
* end the thread, and emit_gs_thread_end() already emits a SEND with
|
||||
* EOT at the end of the program for us.
|
||||
*/
|
||||
if (stage == MESA_SHADER_GEOMETRY)
|
||||
return;
|
||||
|
||||
fs_reg payload = fs_reg(VGRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
|
||||
bld.exec_all().MOV(payload, urb_handle);
|
||||
|
||||
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
|
||||
inst->eot = true;
|
||||
inst->mlen = 2;
|
||||
inst->offset = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
opcode opcode = SHADER_OPCODE_URB_WRITE_SIMD8;
|
||||
int header_size = 1;
|
||||
fs_reg per_slot_offsets;
|
||||
@@ -645,6 +617,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||
last_slot--;
|
||||
}
|
||||
|
||||
bool urb_written = false;
|
||||
for (slot = 0; slot < vue_map->num_slots; slot++) {
|
||||
int varying = vue_map->slot_to_varying[slot];
|
||||
switch (varying) {
|
||||
@@ -730,7 +703,7 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||
* the last slot or if we need to flush (see BAD_FILE varying case
|
||||
* above), emit a URB write send now to flush out the data.
|
||||
*/
|
||||
if (length == 8 || slot == last_slot)
|
||||
if (length == 8 || (length > 0 && slot == last_slot))
|
||||
flush = true;
|
||||
if (flush) {
|
||||
fs_reg *payload_sources =
|
||||
@@ -755,8 +728,37 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
|
||||
urb_offset = starting_urb_offset + slot + 1;
|
||||
length = 0;
|
||||
flush = false;
|
||||
urb_written = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* If we don't have any valid slots to write, just do a minimal urb write
|
||||
* send to terminate the shader. This includes 1 slot of undefined data,
|
||||
* because it's invalid to write 0 data:
|
||||
*
|
||||
* From the Broadwell PRM, Volume 7: 3D Media GPGPU, Shared Functions -
|
||||
* Unified Return Buffer (URB) > URB_SIMD8_Write and URB_SIMD8_Read >
|
||||
* Write Data Payload:
|
||||
*
|
||||
* "The write data payload can be between 1 and 8 message phases long."
|
||||
*/
|
||||
if (!urb_written) {
|
||||
/* For GS, just turn EmitVertex() into a no-op. We don't want it to
|
||||
* end the thread, and emit_gs_thread_end() already emits a SEND with
|
||||
* EOT at the end of the program for us.
|
||||
*/
|
||||
if (stage == MESA_SHADER_GEOMETRY)
|
||||
return;
|
||||
|
||||
fs_reg payload = fs_reg(VGRF, alloc.allocate(2), BRW_REGISTER_TYPE_UD);
|
||||
bld.exec_all().MOV(payload, urb_handle);
|
||||
|
||||
fs_inst *inst = bld.emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
|
||||
inst->eot = true;
|
||||
inst->mlen = 2;
|
||||
inst->offset = 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
Reference in New Issue
Block a user