intel/dev: generate declarations for struct intel_device_info

Serialization of intel_device_info requires the specification of all
aspects of the type declaration in Python.

To avoid duplication, use the Python type information to generate the
struct as well as the serialization implementation.

This step is implemented first, because it provides explicit types for
some anonymous structures within intel_device_info.  For example, the
'urb' member struct within intel_device_info cannot be serialized in a
C function unless we give it a type (eg, intel_device_info_urb_desc).

Reviewed-by: Jordan Justen <jordan.l.justen@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27540>
This commit is contained in:
Mark Janes
2024-01-19 00:47:39 -08:00
committed by Marge Bot
parent 13c7194525
commit a52c1994aa
3 changed files with 178 additions and 473 deletions

View File

@@ -36,59 +36,12 @@
#include "intel/common/intel_engine.h"
#include "intel/dev/intel_wa.h"
#include "intel/dev/intel_device_info_gen.h"
#ifdef __cplusplus
extern "C" {
#endif
#define INTEL_DEVICE_MAX_NAME_SIZE 64
#define INTEL_DEVICE_MAX_SLICES 8
#define INTEL_DEVICE_MAX_SUBSLICES (8) /* Maximum on gfx11 */
#define INTEL_DEVICE_MAX_EUS_PER_SUBSLICE (16) /* Maximum on gfx12 */
#define INTEL_DEVICE_MAX_PIXEL_PIPES (16) /* Maximum on DG2 */
#define INTEL_PLATFORM_GROUP_START(group, new_enum) \
new_enum, INTEL_PLATFORM_ ## group ## _START = new_enum
#define INTEL_PLATFORM_GROUP_END(group, new_enum) \
new_enum, INTEL_PLATFORM_ ## group ## _END = new_enum
enum intel_platform {
INTEL_PLATFORM_GFX3 = 1,
INTEL_PLATFORM_I965,
INTEL_PLATFORM_ILK,
INTEL_PLATFORM_G4X,
INTEL_PLATFORM_SNB,
INTEL_PLATFORM_IVB,
INTEL_PLATFORM_BYT,
INTEL_PLATFORM_HSW,
INTEL_PLATFORM_BDW,
INTEL_PLATFORM_CHV,
INTEL_PLATFORM_SKL,
INTEL_PLATFORM_BXT,
INTEL_PLATFORM_KBL,
INTEL_PLATFORM_GLK,
INTEL_PLATFORM_CFL,
INTEL_PLATFORM_ICL,
INTEL_PLATFORM_EHL,
INTEL_PLATFORM_TGL,
INTEL_PLATFORM_RKL,
INTEL_PLATFORM_DG1,
INTEL_PLATFORM_ADL,
INTEL_PLATFORM_RPL,
INTEL_PLATFORM_GROUP_START(DG2, INTEL_PLATFORM_DG2_G10),
INTEL_PLATFORM_DG2_G11,
INTEL_PLATFORM_GROUP_END(DG2, INTEL_PLATFORM_DG2_G12),
INTEL_PLATFORM_GROUP_START(ATSM, INTEL_PLATFORM_ATSM_G10),
INTEL_PLATFORM_GROUP_END(ATSM, INTEL_PLATFORM_ATSM_G11),
INTEL_PLATFORM_GROUP_START(MTL, INTEL_PLATFORM_MTL_U),
INTEL_PLATFORM_GROUP_END(MTL, INTEL_PLATFORM_MTL_H),
INTEL_PLATFORM_GROUP_START(ARL, INTEL_PLATFORM_ARL_U),
INTEL_PLATFORM_GROUP_END(ARL, INTEL_PLATFORM_ARL_H),
INTEL_PLATFORM_LNL,
};
#undef INTEL_PLATFORM_GROUP_START
#undef INTEL_PLATFORM_GROUP_END
#define intel_platform_in_range(platform, platform_range) \
(((platform) >= INTEL_PLATFORM_ ## platform_range ## _START) && \
((platform) <= INTEL_PLATFORM_ ## platform_range ## _END))
@@ -112,30 +65,6 @@ enum intel_platform {
#define intel_device_info_is_mtl_or_arl(devinfo) \
(intel_device_info_is_mtl(devinfo) || intel_device_info_is_arl(devinfo))
struct intel_memory_class_instance {
/* Kernel backend specific class value, no translation needed yet */
uint16_t klass;
uint16_t instance;
};
enum intel_device_info_mmap_mode {
INTEL_DEVICE_INFO_MMAP_MODE_UC = 0,
INTEL_DEVICE_INFO_MMAP_MODE_WC,
INTEL_DEVICE_INFO_MMAP_MODE_WB,
};
enum intel_device_info_coherency_mode {
INTEL_DEVICE_INFO_COHERENCY_MODE_NONE = 0,
INTEL_DEVICE_INFO_COHERENCY_MODE_1WAY, /* CPU caches are snooped by GPU */
INTEL_DEVICE_INFO_COHERENCY_MODE_2WAY /* Fully coherent between GPU and CPU */
};
struct intel_device_info_pat_entry {
uint8_t index;
enum intel_device_info_mmap_mode mmap;
enum intel_device_info_coherency_mode coherency;
};
#define PAT_ENTRY(index_, mmap_, coh_) \
{ \
.index = index_, \
@@ -143,404 +72,6 @@ struct intel_device_info_pat_entry {
.coherency = INTEL_DEVICE_INFO_COHERENCY_MODE_##coh_ \
}
enum intel_cmat_scope
{
INTEL_CMAT_SCOPE_NONE = 0,
INTEL_CMAT_SCOPE_SUBGROUP,
};
enum intel_cooperative_matrix_component_type
{
INTEL_CMAT_FLOAT16,
INTEL_CMAT_FLOAT32,
INTEL_CMAT_SINT32,
INTEL_CMAT_SINT8,
INTEL_CMAT_UINT32,
INTEL_CMAT_UINT8,
};
struct intel_cooperative_matrix_configuration
{
enum intel_cmat_scope scope;
/* Matrix A is MxK.
* Matrix B is KxN.
* Matrix C and Matrix Result are MxN.
*
* Result = A * B + C;
*/
uint8_t m, n, k;
enum intel_cooperative_matrix_component_type a, b, c, result;
};
/**
* Intel hardware information and quirks
*/
struct intel_device_info
{
enum intel_kmd_type kmd_type;
/* Driver internal numbers used to differentiate platforms. */
int ver;
int verx10;
int display_ver;
/**
* This revision is from ioctl (I915_PARAM_REVISION) unlike
* pci_revision_id from drm device. Its value is not always
* same as the pci_revision_id.
*/
int revision;
int gt;
/* PCI info */
uint16_t pci_domain;
uint8_t pci_bus;
uint8_t pci_dev;
uint8_t pci_func;
uint16_t pci_device_id;
uint8_t pci_revision_id;
enum intel_platform platform;
bool has_hiz_and_separate_stencil;
bool must_use_separate_stencil;
bool has_sample_with_hiz;
bool has_bit6_swizzle;
bool has_llc;
bool has_pln;
bool has_64bit_float;
bool has_64bit_float_via_math_pipe;
bool has_64bit_int;
bool has_integer_dword_mul;
bool has_compr4;
bool has_surface_tile_offset;
bool supports_simd16_3src;
bool disable_ccs_repack;
/**
* True if CCS needs to be initialized before use.
*/
bool has_illegal_ccs_values;
/**
* True if CCS uses a flat virtual address translation to a memory
* carve-out, rather than aux map translations, or additional surfaces.
*/
bool has_flat_ccs;
bool has_aux_map;
bool has_caching_uapi;
bool has_tiling_uapi;
bool has_ray_tracing;
bool has_ray_query;
bool has_local_mem;
bool has_lsc;
bool has_mesh_shading;
bool has_mmap_offset;
bool has_userptr_probe;
bool has_context_isolation;
bool has_set_pat_uapi;
bool has_indirect_unroll;
/**
* \name Intel hardware quirks
* @{
*/
bool has_negative_rhw_bug;
/**
* Whether this platform supports fragment shading rate controlled by a
* primitive in geometry shaders and by a control buffer.
*/
bool has_coarse_pixel_primitive_and_cb;
/**
* Whether this platform has compute engine
*/
bool has_compute_engine;
/**
* Some versions of Gen hardware don't do centroid interpolation correctly
* on unlit pixels, causing incorrect values for derivatives near triangle
* edges. Enabling this flag causes the fragment shader to use
* non-centroid interpolation for unlit pixels, at the expense of two extra
* fragment shader instructions.
*/
bool needs_unlit_centroid_workaround;
/**
* We need this for ADL-N specific Wa_14014966230.
*/
bool is_adl_n;
/** @} */
/**
* \name GPU hardware limits
*
* In general, you can find shader thread maximums by looking at the "Maximum
* Number of Threads" field in the Intel PRM description of the 3DSTATE_VS,
* 3DSTATE_GS, 3DSTATE_HS, 3DSTATE_DS, and 3DSTATE_PS commands. URB entry
* limits come from the "Number of URB Entries" field in the
* 3DSTATE_URB_VS command and friends.
*
* These fields are used to calculate the scratch space to allocate. The
* amount of scratch space can be larger without being harmful on modern
* GPUs, however, prior to Haswell, programming the maximum number of threads
* to greater than the hardware maximum would cause GPU performance to tank.
*
* @{
*/
/**
* Total number of slices present on the device whether or not they've been
* fused off.
*
* XXX: CS thread counts are limited by the inability to do cross subslice
* communication. It is the effectively the number of logical threads which
* can be executed in a subslice. Fuse configurations may cause this number
* to change, so we program @max_cs_threads as the lower maximum.
*/
unsigned num_slices;
/**
* Maximum number of slices present on this device (can be more than
* num_slices if some slices are fused).
*/
unsigned max_slices;
/**
* Number of subslices for each slice (used to be uniform until CNL).
*/
unsigned num_subslices[INTEL_DEVICE_MAX_SLICES];
/**
* Maximum number of subslices per slice present on this device (can be
* more than the maximum value in the num_subslices[] array if some
* subslices are fused).
*/
unsigned max_subslices_per_slice;
/**
* Number of subslices on each pixel pipe (ICL).
*/
unsigned ppipe_subslices[INTEL_DEVICE_MAX_PIXEL_PIPES];
/**
* Maximum number of EUs per subslice (some EUs can be fused off).
*/
unsigned max_eus_per_subslice;
/**
* Number of threads per eu, varies between 4 and 8 between generations.
*/
unsigned num_thread_per_eu;
/**
* A bit mask of the slices available.
*/
uint8_t slice_masks;
/**
* An array of bit mask of the subslices available, use subslice_slice_stride
* to access this array.
*/
uint8_t subslice_masks[INTEL_DEVICE_MAX_SLICES *
DIV_ROUND_UP(INTEL_DEVICE_MAX_SUBSLICES, 8)];
/**
* The number of enabled subslices (considering fusing). For exactly which
* subslices are enabled, see subslice_masks[].
*/
unsigned subslice_total;
/**
* An array of bit mask of EUs available, use eu_slice_stride &
* eu_subslice_stride to access this array.
*/
uint8_t eu_masks[INTEL_DEVICE_MAX_SLICES *
INTEL_DEVICE_MAX_SUBSLICES *
DIV_ROUND_UP(INTEL_DEVICE_MAX_EUS_PER_SUBSLICE, 8)];
/**
* Stride to access subslice_masks[].
*/
uint16_t subslice_slice_stride;
/**
* Strides to access eu_masks[].
*/
uint16_t eu_slice_stride;
uint16_t eu_subslice_stride;
unsigned l3_banks;
unsigned max_vs_threads; /**< Maximum Vertex Shader threads */
unsigned max_tcs_threads; /**< Maximum Hull Shader threads */
unsigned max_tes_threads; /**< Maximum Domain Shader threads */
unsigned max_gs_threads; /**< Maximum Geometry Shader threads. */
/**
* Theoretical maximum number of Pixel Shader threads.
*
* PSD means Pixel Shader Dispatcher. On modern Intel GPUs, hardware will
* automatically scale pixel shader thread count, based on a single value
* programmed into 3DSTATE_PS.
*
* To calculate the maximum number of threads for Gfx8 beyond (which have
* multiple Pixel Shader Dispatchers):
*
* - Look up 3DSTATE_PS and find "Maximum Number of Threads Per PSD"
* - Usually there's only one PSD per subslice, so use the number of
* subslices for number of PSDs.
* - For max_wm_threads, the total should be PSD threads * #PSDs.
*/
unsigned max_wm_threads;
unsigned max_threads_per_psd;
/**
* Maximum Compute Shader threads.
*
* Thread count * number of EUs per subslice
*/
unsigned max_cs_threads;
/**
* Maximum number of threads per workgroup supported by the GPGPU_WALKER or
* COMPUTE_WALKER command.
*
* This may be smaller than max_cs_threads as it takes into account added
* restrictions on the GPGPU/COMPUTE_WALKER commands. While max_cs_threads
* expresses the total parallelism of the GPU, this expresses the maximum
* number of threads we can dispatch in a single workgroup.
*/
unsigned max_cs_workgroup_threads;
/**
* The maximum number of potential scratch ids. Due to hardware
* implementation details, the range of scratch ids may be larger than the
* number of subslices.
*/
unsigned max_scratch_ids[MESA_SHADER_STAGES];
struct {
/**
* Fixed size of the URB.
*
* On Gfx6 and DG1, this is measured in KB. Gfx4-5 instead measure
* this in 512b blocks, as that's more convenient there.
*
* On most Gfx7+ platforms, the URB is a section of the L3 cache,
* and can be resized based on the L3 programming. For those platforms,
* simply leave this field blank (zero) - it isn't used.
*/
unsigned size;
/**
* The minimum number of URB entries. See the 3DSTATE_URB_<XS> docs.
*/
unsigned min_entries[4];
/**
* The maximum number of URB entries. See the 3DSTATE_URB_<XS> docs.
*/
unsigned max_entries[4];
} urb;
/* Maximum size in Kb that can be allocated to constants in the URB, this
* is usually divided among the stages for implementing push constants.
* See 3DSTATE_PUSH_CONSTANT_ALLOC_*.
*/
unsigned max_constant_urb_size_kb;
/* Maximum size that can be allocated to constants in mesh pipeline.
* This essentially applies to fragment shaders only, since mesh stages
* don't need to allocate space for push constants.
*/
unsigned mesh_max_constant_urb_size_kb;
/**
* Size of the command streamer prefetch. This is important to know for
* self modifying batches.
*/
unsigned engine_class_prefetch[INTEL_ENGINE_CLASS_COMPUTE + 1];
/**
* Memory alignment requirement for this device.
*/
unsigned mem_alignment;
/**
* For the longest time the timestamp frequency for Gen's timestamp counter
* could be assumed to be 12.5MHz, where the least significant bit neatly
* corresponded to 80 nanoseconds.
*
* Since Gfx9 the numbers aren't so round, with a a frequency of 12MHz for
* SKL (or scale factor of 83.33333333) and a frequency of 19200000Hz for
* BXT.
*
* For simplicity to fit with the current code scaling by a single constant
* to map from raw timestamps to nanoseconds we now do the conversion in
* floating point instead of integer arithmetic.
*
* In general it's probably worth noting that the documented constants we
* have for the per-platform timestamp frequencies aren't perfect and
* shouldn't be trusted for scaling and comparing timestamps with a large
* delta.
*
* E.g. with crude testing on my system using the 'correct' scale factor I'm
* seeing a drift of ~2 milliseconds per second.
*/
uint64_t timestamp_frequency;
uint64_t aperture_bytes;
uint64_t gtt_size;
/**
* ID to put into the .aub files.
*/
int simulator_id;
/**
* holds the name of the device
*/
char name[INTEL_DEVICE_MAX_NAME_SIZE];
/**
* no_hw is true when the pci_device_id has been overridden
*/
bool no_hw;
/**
* apply_hwconfig is true when the platform should apply hwconfig values
*/
bool apply_hwconfig;
struct {
bool use_class_instance;
struct {
struct intel_memory_class_instance mem;
struct {
uint64_t size;
uint64_t free;
} mappable, unmappable;
} sram, vram;
} mem;
struct {
/* To be used when CPU access is frequent, WB + 1 or 2 way coherent */
struct intel_device_info_pat_entry cached_coherent;
/* scanout and external BOs */
struct intel_device_info_pat_entry scanout;
/* BOs without special needs, can be WB not coherent or WC it depends on the platforms and KMD */
struct intel_device_info_pat_entry writeback_incoherent;
struct intel_device_info_pat_entry writecombining;
} pat;
BITSET_DECLARE(workarounds, INTEL_WA_NUM);
struct intel_cooperative_matrix_configuration cooperative_matrix_configurations[4];
/** @} */
};
#ifdef GFX_VER
#define intel_device_info_is_9lp(devinfo) \

View File

@@ -0,0 +1,166 @@
#!/usr/bin/env python3
COPYRIGHT = """\
/*
* Copyright 2024 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
"""
import argparse
import os
from textwrap import indent
from mako.template import Template
from mako import exceptions
from intel_device_info import TYPES_BY_NAME, Enum
template = COPYRIGHT + """
/* DO NOT EDIT - This file generated automatically by intel_device_info_gen_h.py script */
#ifndef INTEL_DEVICE_INFO_GEN_H
#define INTEL_DEVICE_INFO_GEN_H
#include <stdbool.h>
#include <stdint.h>
#include "util/macros.h"
#include "compiler/shader_enums.h"
#include "intel/common/intel_engine.h"
#include "intel/dev/intel_wa.h"
#ifdef __cplusplus
extern "C" {
#endif
<%! import intel_device_info %>
% for decl in intel_device_info.TYPES:
% if isinstance(decl, intel_device_info.Define):
${format_define(decl)}
% elif isinstance(decl, intel_device_info.Enum) and not decl.external:
enum ${decl.name} {
% for value in decl.values:
${format_enum_value(value)}
% endfor
};
% elif isinstance(decl, intel_device_info.Struct):
struct ${decl.name}
{
% for member in decl.members:
${format_struct_member(member)}
% endfor
% if decl.name == "intel_device_info":
BITSET_DECLARE(workarounds, INTEL_WA_NUM);
% endif
};
% endif
% endfor
#ifdef __cplusplus
}
#endif
#endif /* INTEL_DEVICE_INFO_GEN_H */
"""
def format_enum_value(v):
"""
Routine to format the individual lines within an enum declaration.
This is inconvenient to implement with mako. Templates are an
inconvenient tool for conditionally formatting:
- inline comments
- "grouped" values as required by intel_platform
- specific values
"""
comment = ""
if v.comment:
comment = f" /* {v.comment} */"
value = ""
if v.value is not None:
value = f" = {v.value}"
decl = f"{v.name}{value},{comment}"
if v.group_begin:
decl = f"{decl}\nINTEL_PLATFORM_{v.group_begin}_START = {v.name},"
if v.group_end:
decl = f"{decl}\nINTEL_PLATFORM_{v.group_end}_END = {v.name},"
return indent(decl, " ")
def format_define(v):
"""
Routine to format the printing of a macro declaration. Conditional
inline comments are difficult to format in mako.
"""
comment = ""
if v.comment:
comment = f" /* {v.comment} */"
return f"#define {v.name} ({v.value}){comment}"
def format_struct_member(m):
"""
Routine to format the printing of a struct member. Mako templates are not
helpful in formatting the following aspects of intel_device_info structs:
- multiline vs single line comments
- optional array lengths
- enum / struct member type declarations
"""
comment = ""
if m.comment:
if "\n" in m.comment:
comment_lines = [ f" * {line}".rstrip() for line in m.comment.split('\n')]
comment_lines.insert(0, "\n/**")
comment_lines.append(" */\n")
comment = '\n'.join(comment_lines)
else:
comment = f"\n/* {m.comment} */\n"
array = ""
if m.array:
array = f"[{m.array}]"
member_type = m.member_type
if member_type in TYPES_BY_NAME:
if isinstance(TYPES_BY_NAME[member_type], Enum):
member_type = f"enum {member_type}"
else:
member_type = f"struct {member_type}"
return indent(f"{comment}{member_type} {m.name}{array};", " ")
def main():
"""print intel_device_info_gen.h at the specified path"""
parser = argparse.ArgumentParser()
parser.add_argument('--outdir', required=True,
help='Directory to put the generated files in')
args = parser.parse_args()
path = os.path.join(args.outdir, 'intel_device_info_gen.h')
with open(path, 'w', encoding='utf-8') as f:
try:
f.write(Template(template).render(format_enum_value=format_enum_value,
format_struct_member=format_struct_member,
format_define=format_define))
except:
# provide some debug information to the user
print(exceptions.text_error_template().render(format_enum_value=format_enum_value,
format_struct_member=format_struct_member,
format_define=format_define))
if __name__ == "__main__":
main()

View File

@@ -40,21 +40,29 @@ intel_dev_wa_src = custom_target('intel_wa.[ch]',
output : ['intel_wa.h', 'intel_wa.c'],
command : [prog_python, '@INPUT@', '@OUTPUT@'])
intel_dev_info_gen_src = custom_target('intel_device_info_gen.h',
input : ['intel_device_info_gen_h.py', 'intel_device_info.py'],
output : ['intel_device_info_gen.h'],
command : [prog_python, '@INPUT0@', '--outdir', meson.current_build_dir()])
# ensures intel_wa.h exists before implementation files are compiled
idep_intel_dev_wa = declare_dependency(sources : [intel_dev_wa_src[0]])
idep_intel_dev_info_gen = declare_dependency(sources : [intel_dev_info_gen_src[0]])
libintel_dev = static_library(
'intel_dev',
[files_libintel_dev, sha1_h, [intel_dev_wa_src]],
include_directories : [inc_include, inc_src, inc_intel],
dependencies : [dep_libdrm, idep_mesautil, idep_intel_dev_wa],
dependencies : [dep_libdrm, idep_mesautil, idep_intel_dev_wa,
idep_intel_dev_info_gen],
c_args : [no_override_init_args],
gnu_symbol_visibility : 'hidden',
)
idep_intel_dev = declare_dependency(
link_with : libintel_dev,
dependencies : idep_intel_dev_wa,
dependencies : [idep_intel_dev_wa, idep_intel_dev_info_gen]
)
if with_tests