i965/cnl: Make URB {VS, GS, HS, DS} sizes non multiple of 3
v1: By Ben Widawsky <benjamin.widawsky@intel.com> v2: v1 had an assert only for VS. Add the restriction for GS, HS and DS as well and make sure the allocated sizes are not multiple of 3. v3: Move the entry_size checks in to compiler code (Ken) Signed-off-by: Anuj Phogat <anuj.phogat@gmail.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
@@ -1197,6 +1197,14 @@ brw_compile_tes(const struct brw_compiler *compiler,
|
|||||||
|
|
||||||
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
||||||
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||||
|
|
||||||
|
/* On Cannonlake software shall not program an allocation size that
|
||||||
|
* specifies a size that is a multiple of 3 64B (512-bit) cachelines.
|
||||||
|
*/
|
||||||
|
if (devinfo->gen == 10 &&
|
||||||
|
prog_data->base.urb_entry_size % 3 == 0)
|
||||||
|
prog_data->base.urb_entry_size++;
|
||||||
|
|
||||||
prog_data->base.urb_read_length = 0;
|
prog_data->base.urb_read_length = 0;
|
||||||
|
|
||||||
STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
|
STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
|
||||||
|
@@ -2839,10 +2839,17 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
const unsigned vue_entries =
|
const unsigned vue_entries =
|
||||||
MAX2(nr_attribute_slots, (unsigned)prog_data->base.vue_map.num_slots);
|
MAX2(nr_attribute_slots, (unsigned)prog_data->base.vue_map.num_slots);
|
||||||
|
|
||||||
if (compiler->devinfo->gen == 6)
|
if (compiler->devinfo->gen == 6) {
|
||||||
prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8);
|
prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8);
|
||||||
else
|
} else {
|
||||||
prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4);
|
prog_data->base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4);
|
||||||
|
/* On Cannonlake software shall not program an allocation size that
|
||||||
|
* specifies a size that is a multiple of 3 64B (512-bit) cachelines.
|
||||||
|
*/
|
||||||
|
if (compiler->devinfo->gen == 10 &&
|
||||||
|
prog_data->base.urb_entry_size % 3 == 0)
|
||||||
|
prog_data->base.urb_entry_size++;
|
||||||
|
}
|
||||||
|
|
||||||
if (INTEL_DEBUG & DEBUG_VS) {
|
if (INTEL_DEBUG & DEBUG_VS) {
|
||||||
fprintf(stderr, "VS Output ");
|
fprintf(stderr, "VS Output ");
|
||||||
|
@@ -817,10 +817,17 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
|||||||
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
|
/* URB entry sizes are stored as a multiple of 64 bytes in gen7+ and
|
||||||
* a multiple of 128 bytes in gen6.
|
* a multiple of 128 bytes in gen6.
|
||||||
*/
|
*/
|
||||||
if (compiler->devinfo->gen >= 7)
|
if (compiler->devinfo->gen >= 7) {
|
||||||
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||||
else
|
/* On Cannonlake software shall not program an allocation size that
|
||||||
|
* specifies a size that is a multiple of 3 64B (512-bit) cachelines.
|
||||||
|
*/
|
||||||
|
if (compiler->devinfo->gen == 10 &&
|
||||||
|
prog_data->base.urb_entry_size % 3 == 0)
|
||||||
|
prog_data->base.urb_entry_size++;
|
||||||
|
} else {
|
||||||
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
|
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 128) / 128;
|
||||||
|
}
|
||||||
|
|
||||||
assert(shader->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim));
|
assert(shader->info.gs.output_primitive < ARRAY_SIZE(gl_prim_to_hw_prim));
|
||||||
prog_data->output_topology =
|
prog_data->output_topology =
|
||||||
|
@@ -441,6 +441,13 @@ brw_compile_tcs(const struct brw_compiler *compiler,
|
|||||||
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
/* URB entry sizes are stored as a multiple of 64 bytes. */
|
||||||
vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
vue_prog_data->urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
|
||||||
|
|
||||||
|
/* On Cannonlake software shall not program an allocation size that
|
||||||
|
* specifies a size that is a multiple of 3 64B (512-bit) cachelines.
|
||||||
|
*/
|
||||||
|
if (devinfo->gen == 10 &&
|
||||||
|
vue_prog_data->urb_entry_size % 3 == 0)
|
||||||
|
vue_prog_data->urb_entry_size++;
|
||||||
|
|
||||||
/* HS does not use the usual payload pushing from URB to GRFs,
|
/* HS does not use the usual payload pushing from URB to GRFs,
|
||||||
* because we don't have enough registers for a full-size payload, and
|
* because we don't have enough registers for a full-size payload, and
|
||||||
* the hardware is broken on Haswell anyway.
|
* the hardware is broken on Haswell anyway.
|
||||||
|
@@ -224,6 +224,7 @@ gen7_upload_urb(struct brw_context *brw, unsigned vs_size,
|
|||||||
|
|
||||||
BEGIN_BATCH(8);
|
BEGIN_BATCH(8);
|
||||||
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
|
for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
|
||||||
|
assert(brw->gen != 10 || entry_size[i] % 3);
|
||||||
OUT_BATCH((_3DSTATE_URB_VS + i) << 16 | (2 - 2));
|
OUT_BATCH((_3DSTATE_URB_VS + i) << 16 | (2 - 2));
|
||||||
OUT_BATCH(entries[i] |
|
OUT_BATCH(entries[i] |
|
||||||
((entry_size[i] - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) |
|
((entry_size[i] - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) |
|
||||||
|
Reference in New Issue
Block a user