Files
third_party_mesa3d/src/intel/compiler/intel_clc.c
Kenneth Graunke 72e9843991 intel/compiler: Introduce a new brw_isa_info structure
This structure will contain the opcode mapping tables in the next
commit.  For now, this is the mechanical change to plumb it into all
the necessary places, and it continues simply holding devinfo.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17309>
2022-06-30 23:46:35 +00:00

575 lines
19 KiB
C

/*
* Copyright © 2021 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "brw_compiler.h"
#include "brw_kernel.h"
#include "common/intel_disasm.h"
#include "compiler/clc/clc.h"
#include "compiler/glsl_types.h"
#include "dev/intel_debug.h"
#include "util/build_id.h"
#include "util/disk_cache.h"
#include "util/macros.h"
#include "util/mesa-sha1.h"
#include "util/u_dynarray.h"
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <inttypes.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
/* Shader functions */
#define SPIR_V_MAGIC_NUMBER 0x07230203
static struct disk_cache *
get_disk_cache(struct brw_compiler *compiler)
{
#ifdef ENABLE_SHADER_CACHE
char renderer[14];
ASSERTED int len = snprintf(renderer, sizeof(renderer), "brw_clc_%04x",
compiler->devinfo->pci_device_id);
assert(len == sizeof(renderer) - 2);
const struct build_id_note *note =
build_id_find_nhdr_for_addr(get_disk_cache);
if (note == NULL) {
fprintf(stderr, "Failed to find build-id\n");
abort();
}
unsigned build_id_len = build_id_length(note);
if (build_id_len < 20) {
fprintf(stderr, "build-id too short. It needs to be a SHA\n");
abort();
}
struct mesa_sha1 sha1_ctx;
uint8_t sha1[20];
_mesa_sha1_init(&sha1_ctx);
_mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
_mesa_sha1_final(&sha1_ctx, sha1);
char timestamp[41];
_mesa_sha1_format(timestamp, sha1);
const uint64_t driver_flags = brw_get_compiler_config_value(compiler);
return disk_cache_create(renderer, timestamp, driver_flags);
#endif
return NULL;
}
static void
compiler_log(void *data, unsigned *id, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
}
static void
msg_callback(void *priv, const char *msg)
{
(void)priv;
fprintf(stderr, "%s", msg);
}
static void
print_u32_data(FILE *fp, const char *prefix, const char *arr_name,
const uint32_t *data, size_t len)
{
assert(len % 4 == 0);
fprintf(fp, "static const uint32_t %s_%s[] = {", prefix, arr_name);
for (unsigned i = 0; i < (len / 4); i++) {
if (i % 4 == 0)
fprintf(fp,"\n ");
fprintf(fp, " 0x%08" PRIx32 ",", data[i]);
}
fprintf(fp, "\n};\n");
}
static const char *
reloc_type_str(enum brw_shader_reloc_type type)
{
switch (type) {
#define CASE(e) case e: return #e;
CASE(BRW_SHADER_RELOC_TYPE_U32)
CASE(BRW_SHADER_RELOC_TYPE_MOV_IMM)
#undef CASE
default:
unreachable("Unknown relocation type");
}
}
static void
print_cs_prog_data_fields(FILE *fp, const char *prefix, const char *pad,
const struct brw_cs_prog_data *cs_prog_data)
{
#define PROG_DATA_FIELD(fmt, field) \
fprintf(fp, "%s." #field " = " fmt ",\n", pad, cs_prog_data->field)
#define PROG_DATA_BOOL_FIELD(field) \
fprintf(fp, "%s." #field " = %s,\n", pad, \
cs_prog_data->field ? "true" : "false")
PROG_DATA_FIELD("%u", base.nr_params);
assert(cs_prog_data->base.stage == MESA_SHADER_COMPUTE);
fprintf(fp, "%s.base.stage = MESA_SHADER_COMPUTE,\n", pad);
assert(cs_prog_data->base.zero_push_reg == 0);
assert(cs_prog_data->base.push_reg_mask_param == 0);
PROG_DATA_FIELD("%u", base.curb_read_length);
PROG_DATA_FIELD("%u", base.total_scratch);
PROG_DATA_FIELD("%u", base.total_shared);
PROG_DATA_FIELD("%u", base.program_size);
PROG_DATA_FIELD("%u", base.const_data_size);
PROG_DATA_FIELD("%u", base.const_data_offset);
PROG_DATA_FIELD("%u", base.num_relocs);
fprintf(fp, "%s.base.relocs = %s_relocs,\n", pad, prefix);
assert(!cs_prog_data->base.has_ubo_pull);
assert(cs_prog_data->base.dispatch_grf_start_reg == 0);
assert(!cs_prog_data->base.use_alt_mode);
assert(cs_prog_data->base.param == 0);
PROG_DATA_BOOL_FIELD(base.uses_atomic_load_store);
fprintf(fp, "%s.local_size = { %u, %u, %u },\n", pad,
cs_prog_data->local_size[0],
cs_prog_data->local_size[1],
cs_prog_data->local_size[2]);
fprintf(fp, "%s.prog_offset = { %u, %u, %u },\n", pad,
cs_prog_data->prog_offset[0],
cs_prog_data->prog_offset[1],
cs_prog_data->prog_offset[2]);
PROG_DATA_FIELD("%u", prog_mask);
PROG_DATA_FIELD("%u", prog_spilled);
PROG_DATA_BOOL_FIELD(uses_barrier);
PROG_DATA_BOOL_FIELD(uses_num_work_groups);
assert(!cs_prog_data->uses_inline_data);
assert(!cs_prog_data->uses_btd_stack_ids);
PROG_DATA_FIELD("%u", push.per_thread.dwords);
PROG_DATA_FIELD("%u", push.per_thread.regs);
PROG_DATA_FIELD("%u", push.per_thread.size);
PROG_DATA_FIELD("%u", push.cross_thread.dwords);
PROG_DATA_FIELD("%u", push.cross_thread.regs);
PROG_DATA_FIELD("%u", push.cross_thread.size);
#undef PROG_DATA_FIELD
#undef PROG_DATA_BOOL_FIELD
}
static void
print_kernel(FILE *fp, const char *prefix,
const struct brw_kernel *kernel,
const struct brw_isa_info *isa)
{
struct mesa_sha1 sha1_ctx;
_mesa_sha1_init(&sha1_ctx);
#define SHA1_UPDATE_VALUE(val) \
_mesa_sha1_update(&sha1_ctx, &val, sizeof(val))
fprintf(fp, "#include \"intel/compiler/brw_kernel.h\"\n");
fprintf(fp, "\n");
fprintf(fp, "static const struct brw_shader_reloc %s_relocs[] = {\n",
prefix);
for (unsigned i = 0; i < kernel->prog_data.base.num_relocs; i++) {
const struct brw_shader_reloc *reloc = &kernel->prog_data.base.relocs[i];
fprintf(fp, " { %"PRIu32", %s, %"PRIu32", %"PRIu32" },\n",
reloc->id, reloc_type_str(reloc->type),
reloc->offset, reloc->delta);
}
fprintf(fp, "};\n");
_mesa_sha1_update(&sha1_ctx, kernel->prog_data.base.relocs,
kernel->prog_data.base.num_relocs *
sizeof(kernel->prog_data.base.relocs[0]));
/* Get rid of the pointers before we hash */
struct brw_cs_prog_data cs_prog_data = kernel->prog_data;
cs_prog_data.base.relocs = NULL;
assert(cs_prog_data.base.param == NULL);
_mesa_sha1_update(&sha1_ctx, &cs_prog_data, sizeof(cs_prog_data));
SHA1_UPDATE_VALUE(kernel->args_size);
SHA1_UPDATE_VALUE(kernel->arg_count);
_mesa_sha1_update(&sha1_ctx, kernel->args,
kernel->arg_count * sizeof(kernel->args[0]));
fprintf(fp, "static const struct brw_kernel_arg_desc %s_args[] = {\n",
prefix);
for (unsigned i = 0; i < kernel->arg_count; i++) {
fprintf(fp, " { %d, %d },\n",
kernel->args[i].offset, kernel->args[i].size);
}
fprintf(fp, "};\n\n");
_mesa_sha1_update(&sha1_ctx, kernel->code,
kernel->prog_data.base.program_size);
fprintf(fp, "#if 0 /* BEGIN KERNEL ASSEMBLY */\n");
fprintf(fp, "\n");
intel_disassemble(isa, kernel->code, 0, fp);
fprintf(fp, "\n");
fprintf(fp, "#endif /* END KERNEL ASSEMBLY */\n");
print_u32_data(fp, prefix, "code", kernel->code,
kernel->prog_data.base.program_size);
fprintf(fp, "static const struct brw_kernel %s = {\n", prefix);
fprintf(fp, " .prog_data = {\n");
print_cs_prog_data_fields(fp, prefix, " ", &kernel->prog_data);
fprintf(fp, " },\n");
fprintf(fp, " .args_size = %d,\n", (int)kernel->args_size);
fprintf(fp, " .arg_count = %d,\n", (int)kernel->arg_count);
fprintf(fp, " .args = %s_args,\n", prefix);
fprintf(fp, " .code = %s_code,\n", prefix);
fprintf(fp, "};\n");
unsigned char sha1[20];
_mesa_sha1_final(&sha1_ctx, sha1);
char sha1_str[41];
_mesa_sha1_format(sha1_str, sha1);
fprintf(fp, "const char *%s_sha1 = \"%s\";\n", prefix, sha1_str);
}
static void
print_usage(char *exec_name, FILE *f)
{
fprintf(f,
"Usage: %s [options] [clang args | input file]\n"
"Options:\n"
" -h --help Print this help.\n"
" -e, --entrypoint <name> Specify the entry-point name.\n"
" -p, --platform <name> Specify the target platform name.\n"
" --prefix <prefix> Prefix for variable names in generated C code.\n"
" -g, --out <filename> Specify the output filename.\n"
" -s, --spv <filename> Specify the output filename for spirv.\n"
, exec_name);
}
#define OPT_PREFIX 1000
static uint32_t
get_module_spirv_version(const uint32_t *spirv, size_t size)
{
assert(size >= 8);
assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
return spirv[1];
}
static void
set_module_spirv_version(uint32_t *spirv, size_t size, uint32_t version)
{
assert(size >= 8);
assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
spirv[1] = version;
}
int main(int argc, char **argv)
{
brw_process_intel_debug_variable();
static struct option long_options[] ={
{"help", no_argument, 0, 'h'},
{"entrypoint", required_argument, 0, 'e'},
{"platform", required_argument, 0, 'p'},
{"prefix", required_argument, 0, OPT_PREFIX},
{"in", required_argument, 0, 'i'},
{"out", required_argument, 0, 'o'},
{"spv", required_argument, 0, 's'},
{"info", no_argument, 0, 'i'},
{0, 0, 0, 0}
};
char *entry_point = NULL, *platform = NULL, *outfile = NULL, *spv_outfile = NULL, *prefix = NULL;
struct util_dynarray clang_args;
struct util_dynarray input_files;
struct util_dynarray spirv_objs;
struct util_dynarray spirv_ptr_objs;
bool print_info = false;
void *mem_ctx = ralloc_context(NULL);
util_dynarray_init(&clang_args, mem_ctx);
util_dynarray_init(&input_files, mem_ctx);
util_dynarray_init(&spirv_objs, mem_ctx);
util_dynarray_init(&spirv_ptr_objs, mem_ctx);
int ch;
while ((ch = getopt_long(argc, argv, "he:p:s:o:i", long_options, NULL)) != -1)
{
switch (ch)
{
case 'h':
print_usage(argv[0], stdout);
return 0;
case 'e':
entry_point = optarg;
break;
case 'p':
platform = optarg;
break;
case 'o':
outfile = optarg;
break;
case 's':
spv_outfile = optarg;
break;
case 'i':
print_info = true;
break;
case OPT_PREFIX:
prefix = optarg;
break;
default:
fprintf(stderr, "Unrecognized option \"%s\".\n", optarg);
print_usage(argv[0], stderr);
return 1;
}
}
for (int i = optind; i < argc; i++) {
if (argv[i][0] == '-')
util_dynarray_append(&clang_args, char *, argv[i]);
else
util_dynarray_append(&input_files, char *, argv[i]);
}
if (util_dynarray_num_elements(&input_files, char *) == 0) {
fprintf(stderr, "No input file(s).\n");
print_usage(argv[0], stderr);
return -1;
}
if (platform == NULL) {
fprintf(stderr, "No target platform name specified.\n");
print_usage(argv[0], stderr);
return -1;
}
int pci_id = intel_device_name_to_pci_device_id(platform);
if (pci_id < 0) {
fprintf(stderr, "Invalid target platform name: %s\n", platform);
return -1;
}
struct intel_device_info _devinfo, *devinfo = &_devinfo;
if (!intel_get_device_info_from_pci_id(pci_id, devinfo)) {
fprintf(stderr, "Failed to get device information.\n");
return -1;
}
if (devinfo->verx10 < 125) {
fprintf(stderr, "Platform currently not supported.\n");
return -1;
}
struct brw_isa_info _isa, *isa = &_isa;
brw_init_isa_info(isa, devinfo);
if (entry_point == NULL) {
fprintf(stderr, "No entry-point name specified.\n");
print_usage(argv[0], stderr);
return -1;
}
struct clc_logger logger = {
.error = msg_callback,
.warning = msg_callback,
};
util_dynarray_foreach(&input_files, char *, infile) {
int fd = open(*infile, O_RDONLY);
if (fd < 0) {
fprintf(stderr, "Failed to open %s\n", *infile);
ralloc_free(mem_ctx);
return 1;
}
off_t len = lseek(fd, 0, SEEK_END);
const void *map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (map == MAP_FAILED) {
fprintf(stderr, "Failed to mmap the file: errno=%d, %s\n",
errno, strerror(errno));
ralloc_free(mem_ctx);
return 1;
}
const char *allowed_spirv_extensions[] = {
"SPV_EXT_shader_atomic_float_add",
"SPV_EXT_shader_atomic_float_min_max",
"SPV_KHR_float_controls",
"SPV_INTEL_subgroups",
NULL,
};
struct clc_compile_args clc_args = {
.source = {
.name = *infile,
.value = map,
},
.features = {
.fp16 = true,
.intel_subgroups = true,
.subgroups = true,
},
.args = util_dynarray_begin(&clang_args),
.num_args = util_dynarray_num_elements(&clang_args, char *),
.allowed_spirv_extensions = allowed_spirv_extensions,
};
struct clc_binary *spirv_out =
util_dynarray_grow(&spirv_objs, struct clc_binary, 1);
if (!clc_compile_c_to_spirv(&clc_args, &logger, spirv_out)) {
ralloc_free(mem_ctx);
return 1;
}
util_dynarray_append(&spirv_ptr_objs, struct clc_binary *, spirv_out);
}
/* The SPIRV-Tools linker started checking that all modules have the same
* version. But SPIRV-LLVM-Translator picks the lower required version for
* each module it compiles. So we have to iterate over all of them and set
* the max found to make SPIRV-Tools link our modules.
*
* TODO: This is not the correct thing to do. We need SPIRV-LLVM-Translator
* to pick a given SPIRV version given to it and have all the modules
* at that version. We should remove this hack when this issue is
* fixed :
* https://github.com/KhronosGroup/SPIRV-LLVM-Translator/issues/1445
*/
uint32_t max_spirv_version = 0;
util_dynarray_foreach(&spirv_ptr_objs, struct clc_binary *, module) {
max_spirv_version = MAX2(max_spirv_version,
get_module_spirv_version((*module)->data,
(*module)->size));
}
assert(max_spirv_version > 0);
util_dynarray_foreach(&spirv_ptr_objs, struct clc_binary *, module) {
set_module_spirv_version((*module)->data, (*module)->size,
max_spirv_version);
}
struct clc_linker_args link_args = {
.in_objs = util_dynarray_begin(&spirv_ptr_objs),
.num_in_objs = util_dynarray_num_elements(&spirv_ptr_objs,
struct clc_binary *),
.create_library = true,
};
struct clc_binary final_spirv;
if (!clc_link_spirv(&link_args, &logger, &final_spirv)) {
ralloc_free(mem_ctx);
return 1;
}
if (spv_outfile) {
FILE *fp = fopen(spv_outfile, "w");
fwrite(final_spirv.data, final_spirv.size, 1, fp);
fclose(fp);
}
struct clc_parsed_spirv parsed_spirv_data;
if (!clc_parse_spirv(&final_spirv, &logger, &parsed_spirv_data)) {
ralloc_free(mem_ctx);
return 1;
}
const struct clc_kernel_info *kernel_info = NULL;
for (unsigned i = 0; i < parsed_spirv_data.num_kernels; i++) {
if (strcmp(parsed_spirv_data.kernels[i].name, entry_point) == 0) {
kernel_info = &parsed_spirv_data.kernels[i];
break;
}
}
if (kernel_info == NULL) {
fprintf(stderr, "Kernel entrypoint %s not found\n", entry_point);
ralloc_free(mem_ctx);
return 1;
}
struct brw_kernel kernel = {};
char *error_str;
struct brw_compiler *compiler = brw_compiler_create(mem_ctx, devinfo);
compiler->shader_debug_log = compiler_log;
compiler->shader_perf_log = compiler_log;
struct disk_cache *disk_cache = get_disk_cache(compiler);
glsl_type_singleton_init_or_ref();
if (!brw_kernel_from_spirv(compiler, disk_cache, &kernel, NULL, mem_ctx,
final_spirv.data, final_spirv.size,
entry_point, &error_str)) {
fprintf(stderr, "Compile failed: %s\n", error_str);
ralloc_free(mem_ctx);
return 1;
}
if (print_info) {
fprintf(stdout, "kernel info:\n");
fprintf(stdout, " uses_barrier : %u\n", kernel.prog_data.uses_barrier);
fprintf(stdout, " uses_num_work_groups : %u\n", kernel.prog_data.uses_num_work_groups);
fprintf(stdout, " uses_inline_data : %u\n", kernel.prog_data.uses_inline_data);
fprintf(stdout, " local_size : %ux%ux%u\n",
kernel.prog_data.local_size[0],
kernel.prog_data.local_size[1],
kernel.prog_data.local_size[2]);
fprintf(stdout, " curb_read_length : %u\n", kernel.prog_data.base.curb_read_length);
fprintf(stdout, " total_scratch : %u\n", kernel.prog_data.base.total_scratch);
fprintf(stdout, " total_shared : %u\n", kernel.prog_data.base.total_shared);
fprintf(stdout, " program_size : %u\n", kernel.prog_data.base.program_size);
fprintf(stdout, " const_data_size : %u\n", kernel.prog_data.base.const_data_size);
fprintf(stdout, " uses_atomic_load_store : %u\n", kernel.prog_data.base.uses_atomic_load_store);
fprintf(stdout, " dispatch_grf_start_reg : %u\n", kernel.prog_data.base.dispatch_grf_start_reg);
}
glsl_type_singleton_decref();
char prefix_tmp[256];
if (prefix == NULL) {
bool is_pt_5 = (devinfo->verx10 % 10) == 5;
snprintf(prefix_tmp, sizeof(prefix_tmp), "gfx%d%s_clc_%s",
devinfo->ver, is_pt_5 ? "5" : "", entry_point);
prefix = prefix_tmp;
}
if (outfile != NULL) {
FILE *fp = fopen(outfile, "w");
print_kernel(fp, prefix, &kernel, isa);
fclose(fp);
} else {
print_kernel(stdout, prefix, &kernel, isa);
}
ralloc_free(mem_ctx);
return 0;
}