anv: device: calculate compute thread numbers using subslices numbers

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
This commit is contained in:
Lionel Landwerlin
2016-09-07 17:19:35 +01:00
parent 1f291369e4
commit 09394ee6cf
6 changed files with 74 additions and 18 deletions

View File

@@ -136,6 +136,41 @@ anv_physical_device_init(struct anv_physical_device *device,
bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
device->max_vs_threads = device->info->max_vs_threads;
device->max_hs_threads = device->info->max_hs_threads;
device->max_ds_threads = device->info->max_ds_threads;
device->max_gs_threads = device->info->max_gs_threads;
device->max_wm_threads = device->info->max_wm_threads;
/* GENs prior to 8 do not support EU/Subslice info */
if (device->info->gen >= 8) {
device->subslice_total = anv_gem_get_param(fd, I915_PARAM_SUBSLICE_TOTAL);
device->eu_total = anv_gem_get_param(fd, I915_PARAM_EU_TOTAL);
/* Without this information, we cannot get the right Braswell
* brandstrings, and we have to use conservative numbers for GPGPU on
* many platforms, but otherwise, things will just work.
*/
if (device->subslice_total < 1 || device->eu_total < 1) {
fprintf(stderr, "WARNING: Kernel 4.1 required to properly"
" query GPU properties.\n");
}
} else if (device->info->gen == 7) {
device->subslice_total = 1 << (device->info->gt - 1);
}
if (device->info->is_cherryview &&
device->subslice_total > 0 && device->eu_total > 0) {
/* Logical CS threads = EUs per subslice * 7 threads per EU */
device->max_cs_threads = device->eu_total / device->subslice_total * 7;
/* Fuse configurations may give more threads than expected, never less. */
if (device->max_cs_threads < device->info->max_cs_threads)
device->max_cs_threads = device->info->max_cs_threads;
} else {
device->max_cs_threads = device->info->max_cs_threads;
}
close(fd);
brw_process_intel_debug_variable();
@@ -503,11 +538,11 @@ void anv_GetPhysicalDeviceProperties(
.maxFragmentCombinedOutputResources = 8,
.maxComputeSharedMemorySize = 32768,
.maxComputeWorkGroupCount = { 65535, 65535, 65535 },
.maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads,
.maxComputeWorkGroupInvocations = 16 * pdevice->max_cs_threads,
.maxComputeWorkGroupSize = {
16 * devinfo->max_cs_threads,
16 * devinfo->max_cs_threads,
16 * devinfo->max_cs_threads,
16 * pdevice->max_cs_threads,
16 * pdevice->max_cs_threads,
16 * pdevice->max_cs_threads,
},
.subPixelPrecisionBits = 4 /* FIXME */,
.subTexelPrecisionBits = 4 /* FIXME */,