i965/cnl: Make URB {VS, GS, HS, DS} sizes non multiple of 3

v1: By Ben Widawsky <benjamin.widawsky@intel.com>
v2: v1 had an assert only for VS. Add the restriction for GS, HS and
    DS as well and make sure the allocated sizes are not multiple of 3.
v3: Move the entry_size checks in to compiler code (Ken)

Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Anuj Phogat
2016-01-05 08:41:39 -08:00
parent b76659997e
commit f9e31a26d4
5 changed files with 34 additions and 4 deletions

View File

@@ -1197,6 +1197,14 @@ brw_compile_tes(const struct brw_compiler *compiler,
/* URB entry sizes are stored as a multiple of 64 bytes. */ /* URB entry sizes are stored as a multiple of 64 bytes. */
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
/* On Cannonlake software shall not program an allocation size that
* specifies a size that is a multiple of 3 64B (512-bit) cachelines.
*/
if (devinfo->gen == 10 &&
prog_data->base.urb_entry_size % 3 == 0)
prog_data->base.urb_entry_size++;
prog_data->base.urb_read_length = 0; prog_data->base.urb_read_length = 0;
STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1); STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);

View File

@@ -2839,10 +2839,17 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
const unsigned vue_entries = const unsigned vue_entries =
MAX2(nr_attribute_slots, (unsigned)prog_data->base.vue_map.num_slots); MAX2(nr_attribute_slots, (unsigned)prog_data->base.vue_map.num_slots);
if (compiler->devinfo->gen == 6) if (compiler->devinfo->gen == 6) {
prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8); prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8);
else } else {
prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4); prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4);
/* On Cannonlake software shall not program an allocation size that
* specifies a size that is a multiple of 3 64B (512-bit) cachelines.
*/
if (compiler->devinfo->gen == 10 &&
prog_data->base.urb_entry_size % 3 == 0)
prog_data->base.urb_entry_size++;
}
if (INTEL_DEBUG & DEBUG_VS) { if (INTEL_DEBUG & DEBUG_VS) {
fprintf(stderr, "VS Output "); fprintf(stderr, "VS Output ");

View File

@@ -817,10 +817,17 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and /* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
* a multiple of 128 bytes in gen6. * a multiple of 128 bytes in gen6.
*/ */
if (compiler->devinfo->gen >= 7) if (compiler->devinfo->gen >= 7) {
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
else /* On Cannonlake software shall not program an allocation size that
* specifies a size that is a multiple of 3 64B (512-bit) cachelines.
*/
if (compiler->devinfo->gen == 10 &&
prog_data->base.urb_entry_size % 3 == 0)
prog_data->base.urb_entry_size++;
} else {
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128; prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
}
assert(shader->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim)); assert(shader->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim));
prog_data->output_topology = prog_data->output_topology =

View File

@@ -441,6 +441,13 @@ brw_compile_tcs(const struct brw_compiler *compiler,
/* URB entry sizes are stored as a multiple of 64 bytes. */ /* URB entry sizes are stored as a multiple of 64 bytes. */
vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64; vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
/* On Cannonlake software shall not program an allocation size that
* specifies a size that is a multiple of 3 64B (512-bit) cachelines.
*/
if (devinfo->gen == 10 &&
vue_prog_data->urb_entry_size % 3 == 0)
vue_prog_data->urb_entry_size++;
/* HS does not use the usual payload pushing from URB to GRFs, /* HS does not use the usual payload pushing from URB to GRFs,
* because we don't have enough registers for a full-size payload, and * because we don't have enough registers for a full-size payload, and
* the hardware is broken on Haswell anyway. * the hardware is broken on Haswell anyway.

View File

@@ -224,6 +224,7 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size,
BEGIN_BATCH(8); BEGIN_BATCH(8);
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
assert(brw->gen != 10 || entry_size[i] % 3);
OUT_BATCH((_3DSTATE_URB_VS + i) << 16 | (2 - 2)); OUT_BATCH((_3DSTATE_URB_VS + i) << 16 | (2 - 2));
OUT_BATCH(entries[i] | OUT_BATCH(entries[i] |
((entry_size[i] - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) | ((entry_size[i] - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) |