diff --git a/src/gallium/drivers/iris/iris_binder.c b/src/gallium/drivers/iris/iris_binder.c index a6e444c976a..0a012018dbc 100644 --- a/src/gallium/drivers/iris/iris_binder.c +++ b/src/gallium/drivers/iris/iris_binder.c @@ -53,15 +53,10 @@ #include "iris_bufmgr.h" #include "iris_context.h" -#define BTP_ALIGNMENT 32 - -/* Avoid using offset 0, tools consider it NULL */ -#define INIT_INSERT_POINT BTP_ALIGNMENT - static bool binder_has_space(struct iris_binder *binder, unsigned size) { - return binder->insert_point + size <= IRIS_BINDER_SIZE; + return binder->insert_point + size <= binder->size; } static void @@ -74,10 +69,12 @@ binder_realloc(struct iris_context *ice) if (binder->bo) iris_bo_unreference(binder->bo); - binder->bo = iris_bo_alloc(bufmgr, "binder", IRIS_BINDER_SIZE, 1, - IRIS_MEMZONE_BINDER, 0); + binder->bo = iris_bo_alloc(bufmgr, "binder", binder->size, 1, + IRIS_MEMZONE_BINDER, 4096); binder->map = iris_bo_map(NULL, binder->bo, MAP_WRITE); - binder->insert_point = INIT_INSERT_POINT; + + /* Avoid using offset 0 - tools consider it NULL. */ + binder->insert_point = binder->alignment; /* Allocating a new binder requires changing Surface State Base Address, * which also invalidates all our previous binding tables - each entry @@ -95,7 +92,8 @@ binder_insert(struct iris_binder *binder, unsigned size) { uint32_t offset = binder->insert_point; - binder->insert_point = align(binder->insert_point + size, BTP_ALIGNMENT); + binder->insert_point = + align(binder->insert_point + size, binder->alignment); return offset; } @@ -141,7 +139,7 @@ iris_binder_reserve_3d(struct iris_context *ice) continue; /* Round up the size so our next table has an aligned starting offset */ - sizes[stage] = align(shaders[stage]->bt.size_bytes, BTP_ALIGNMENT); + sizes[stage] = align(shaders[stage]->bt.size_bytes, binder->alignment); } /* Make space for the new binding tables...this may take two tries. */ @@ -152,7 +150,7 @@ iris_binder_reserve_3d(struct iris_context *ice) total_size += sizes[stage]; } - assert(total_size < IRIS_BINDER_SIZE); + assert(total_size < binder->size); if (total_size == 0) return; @@ -201,7 +199,31 @@ iris_binder_reserve_compute(struct iris_context *ice) void iris_init_binder(struct iris_context *ice) { + struct iris_screen *screen = (void *) ice->ctx.screen; + const struct intel_device_info *devinfo = &screen->devinfo; + memset(&ice->state.binder, 0, sizeof(struct iris_binder)); + + /* We use different binding table pointer formats on various generations. + * + * - The 20:5 format gives us an alignment of 32B and max size of 1024kB. + * - The 18:8 format gives us an alignment of 256B and max size of 512kB. + * - The 15:5 format gives us an alignment of 32B and max size of 64kB. + * + * XeHP and later use the 20:5 format. Icelake and Tigerlake use 18:8 + * in iris, but can use 15:5 if desired, Older platforms require 15:5. + */ + if (devinfo->verx10 >= 125) { + ice->state.binder.alignment = 32; + ice->state.binder.size = 1024 * 1024; + } else if (devinfo->ver >= 11) { + ice->state.binder.alignment = 256; + ice->state.binder.size = 512 * 1024; + } else { + ice->state.binder.alignment = 32; + ice->state.binder.size = 64 * 1024; + } + binder_realloc(ice); } diff --git a/src/gallium/drivers/iris/iris_binder.h b/src/gallium/drivers/iris/iris_binder.h index 78449286c6d..78d38d162f3 100644 --- a/src/gallium/drivers/iris/iris_binder.h +++ b/src/gallium/drivers/iris/iris_binder.h @@ -39,6 +39,12 @@ struct iris_binder struct iris_bo *bo; void *map; + /** Required alignment for each binding table in bytes */ + uint32_t alignment; + + /** Binding table size in bytes */ + uint32_t size; + /** Insert new entries at this offset (in bytes) */ uint32_t insert_point; diff --git a/src/gallium/drivers/iris/iris_blorp.c b/src/gallium/drivers/iris/iris_blorp.c index b3198d8ecf8..123e47c6993 100644 --- a/src/gallium/drivers/iris/iris_blorp.c +++ b/src/gallium/drivers/iris/iris_blorp.c @@ -154,7 +154,7 @@ blorp_alloc_binding_table(struct blorp_batch *blorp_batch, unsigned num_entries, unsigned state_size, unsigned state_alignment, - uint32_t *bt_offset, + uint32_t *out_bt_offset, uint32_t *surface_offsets, void **surface_maps) { @@ -162,8 +162,11 @@ blorp_alloc_binding_table(struct blorp_batch *blorp_batch, struct iris_binder *binder = &ice->state.binder; struct iris_batch *batch = blorp_batch->driver_batch; - *bt_offset = iris_binder_reserve(ice, num_entries * sizeof(uint32_t)); - uint32_t *bt_map = binder->map + *bt_offset; + unsigned bt_offset = + iris_binder_reserve(ice, num_entries * sizeof(uint32_t)); + uint32_t *bt_map = binder->map + bt_offset; + + *out_bt_offset = bt_offset; for (unsigned i = 0; i < num_entries; i++) { surface_maps[i] = stream_state(batch, ice->state.surface_uploader, @@ -181,7 +184,8 @@ static uint32_t blorp_binding_table_offset_to_pointer(struct blorp_batch *batch, uint32_t offset) { - return offset; + /* See IRIS_BT_OFFSET_SHIFT in iris_state.c */ + return offset >> ((GFX_VER >= 11 && GFX_VERx10 < 125) ? 3 : 0); } static void * diff --git a/src/gallium/drivers/iris/iris_bufmgr.c b/src/gallium/drivers/iris/iris_bufmgr.c index 3f3d6b61d9a..6779c40f03e 100644 --- a/src/gallium/drivers/iris/iris_bufmgr.c +++ b/src/gallium/drivers/iris/iris_bufmgr.c @@ -2422,7 +2422,6 @@ iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse) STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull); const uint64_t _4GB = 1ull << 32; const uint64_t _2GB = 1ul << 31; - const uint64_t _1GB = 1ul << 30; /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */ const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE; @@ -2430,11 +2429,12 @@ iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse) util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SHADER], PAGE_SIZE, _4GB_minus_1 - PAGE_SIZE); util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_BINDER], - IRIS_MEMZONE_BINDER_START, _1GB - IRIS_BINDLESS_SIZE); + IRIS_MEMZONE_BINDER_START, IRIS_BINDER_ZONE_SIZE); util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_BINDLESS], IRIS_MEMZONE_BINDLESS_START, IRIS_BINDLESS_SIZE); util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SURFACE], - IRIS_MEMZONE_SURFACE_START, _4GB_minus_1 - _1GB); + IRIS_MEMZONE_SURFACE_START, _4GB_minus_1 - + IRIS_BINDER_ZONE_SIZE - IRIS_BINDLESS_SIZE); /* TODO: Why does limiting to 2GB help some state items on gfx12? * - CC Viewport Pointer * - Blend State Pointer diff --git a/src/gallium/drivers/iris/iris_bufmgr.h b/src/gallium/drivers/iris/iris_bufmgr.h index cda89b01337..ba11027a8da 100644 --- a/src/gallium/drivers/iris/iris_bufmgr.h +++ b/src/gallium/drivers/iris/iris_bufmgr.h @@ -83,13 +83,13 @@ enum iris_memory_zone { /* Intentionally exclude single buffer "zones" */ #define IRIS_MEMZONE_COUNT (IRIS_MEMZONE_OTHER + 1) -#define IRIS_BINDER_SIZE (64 * 1024) #define IRIS_BINDLESS_SIZE (8 * 1024 * 1024) +#define IRIS_BINDER_ZONE_SIZE ((1ull << 30) - IRIS_BINDLESS_SIZE) #define IRIS_MEMZONE_SHADER_START (0ull * (1ull << 32)) #define IRIS_MEMZONE_BINDER_START (1ull * (1ull << 32)) -#define IRIS_MEMZONE_BINDLESS_START (IRIS_MEMZONE_BINDER_START + (1ull << 30) - IRIS_BINDLESS_SIZE) -#define IRIS_MEMZONE_SURFACE_START (IRIS_MEMZONE_BINDER_START + (1ull << 30)) +#define IRIS_MEMZONE_BINDLESS_START (IRIS_MEMZONE_BINDER_START + IRIS_BINDER_ZONE_SIZE) +#define IRIS_MEMZONE_SURFACE_START (IRIS_MEMZONE_BINDLESS_START + IRIS_BINDLESS_SIZE) #define IRIS_MEMZONE_DYNAMIC_START (2ull * (1ull << 32)) #define IRIS_MEMZONE_OTHER_START (3ull * (1ull << 32)) diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index e09f542c090..d7edb94a49e 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -1015,6 +1015,22 @@ iris_init_common_context(struct iris_batch *batch) reg.EnabledTexelOffsetPrecisionFix = 1; reg.EnabledTexelOffsetPrecisionFixMask = 1; } +#endif + + /* Select 256B-aligned binding table mode on Icelake through Tigerlake, + * which gives us larger binding table pointers, at the cost of higher + * alignment requirements (bits 18:8 are valid instead of 15:5). When + * using this mode, we have to shift binding table pointers by 3 bits, + * as they're still stored in the same bit-location in the field. + */ +#if GFX_VER >= 11 && GFX_VERx10 < 125 + iris_emit_reg(batch, GENX(GT_MODE), reg) { + reg.BindingTableAlignment = BTP_18_8; + reg.BindingTableAlignmentMask = true; + } +#define IRIS_BT_OFFSET_SHIFT 3 +#else +#define IRIS_BT_OFFSET_SHIFT 0 #endif } @@ -6022,7 +6038,8 @@ iris_upload_dirty_render_state(struct iris_context *ice, << stage)) { iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) { ptr._3DCommandSubOpcode = 38 + stage; - ptr.PointertoVSBindingTable = binder->bt_offset[stage]; + ptr.PointertoVSBindingTable = + binder->bt_offset[stage] >> IRIS_BT_OFFSET_SHIFT; } } } @@ -7236,7 +7253,8 @@ iris_upload_gpgpu_walker(struct iris_context *ice, KSP(shader) + brw_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size); idd.SamplerStatePointer = shs->sampler_table.offset; - idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE]; + idd.BindingTablePointer = + binder->bt_offset[MESA_SHADER_COMPUTE] >> IRIS_BT_OFFSET_SHIFT; idd.NumberofThreadsinGPGPUThreadGroup = dispatch.threads; }