diff --git a/src/intel/compiler/brw_kernel.c b/src/intel/compiler/brw_kernel.c index bb16dfea518..3ae93c316bb 100644 --- a/src/intel/compiler/brw_kernel.c +++ b/src/intel/compiler/brw_kernel.c @@ -452,3 +452,200 @@ brw_kernel_from_spirv(struct brw_compiler *compiler, return kernel->code != NULL; } + + +nir_shader * +brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size) +{ + struct spirv_to_nir_options spirv_options = { + .environment = NIR_SPIRV_OPENCL, + .caps = { + .address = true, + .groups = true, + .image_write_without_format = true, + .int8 = true, + .int16 = true, + .int64 = true, + .int64_atomics = true, + .kernel = true, + .linkage = true, /* We receive linked kernel from clc */ + .float_controls = true, + .generic_pointers = true, + .storage_8bit = true, + .storage_16bit = true, + .subgroup_arithmetic = true, + .subgroup_basic = true, + .subgroup_ballot = true, + .subgroup_dispatch = true, + .subgroup_quad = true, + .subgroup_shuffle = true, + .subgroup_vote = true, + + .intel_subgroup_shuffle = true, + .intel_subgroup_buffer_block_io = true, + }, + .shared_addr_format = nir_address_format_62bit_generic, + .global_addr_format = nir_address_format_62bit_generic, + .temp_addr_format = nir_address_format_62bit_generic, + .constant_addr_format = nir_address_format_64bit_global, + .create_library = true, + }; + + assert(spirv_size % 4 == 0); + nir_shader *nir = + spirv_to_nir(spirv, spirv_size / 4, NULL, 0, MESA_SHADER_KERNEL, + "library", &spirv_options, &brw_scalar_nir_options); + nir_validate_shader(nir, "after spirv_to_nir"); + nir_validate_ssa_dominance(nir, "after spirv_to_nir"); + ralloc_steal(mem_ctx, nir); + nir->info.name = ralloc_strdup(nir, "library"); + + if (INTEL_DEBUG(DEBUG_CS)) { + /* Re-index SSA defs so we print more sensible numbers. */ + nir_foreach_function_impl(impl, nir) { + nir_index_ssa_defs(impl); + } + + fprintf(stderr, "NIR (from SPIR-V) for kernel\n"); + nir_print_shader(nir, stderr); + } + + NIR_PASS_V(nir, implement_intel_builtins); + NIR_PASS_V(nir, nir_link_shader_functions, spirv_options.clc_shader); + + /* We have to lower away local constant initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_shader_temp | + nir_var_function_temp)); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo | + nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL); + { + bool progress; + do + { + progress = false; + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + NIR_PASS(progress, nir, nir_opt_deref); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_opt_undef); + NIR_PASS(progress, nir, nir_opt_constant_folding); + NIR_PASS(progress, nir, nir_opt_cse); + NIR_PASS(progress, nir, nir_lower_vars_to_ssa); + NIR_PASS(progress, nir, nir_opt_algebraic); + } while (progress); + } + + NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp); + NIR_PASS_V(nir, nir_lower_returns); + NIR_PASS_V(nir, nir_inline_functions); + + assert(nir->scratch_size == 0); + NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align); + + { + bool progress; + do + { + progress = false; + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + NIR_PASS(progress, nir, nir_opt_deref); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_opt_undef); + NIR_PASS(progress, nir, nir_opt_constant_folding); + NIR_PASS(progress, nir, nir_opt_cse); + NIR_PASS(progress, nir, nir_split_var_copies); + NIR_PASS(progress, nir, nir_lower_var_copies); + NIR_PASS(progress, nir, nir_lower_vars_to_ssa); + NIR_PASS(progress, nir, nir_opt_algebraic); + NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_optimize_phi_true_false); + NIR_PASS(progress, nir, nir_opt_dead_cf); + NIR_PASS(progress, nir, nir_opt_remove_phis); + NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true); + NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform); + NIR_PASS(progress, nir, nir_opt_memcpy); + } while (progress); + } + + NIR_PASS_V(nir, nir_scale_fdiv); + + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo | + nir_var_mem_constant | nir_var_function_temp | nir_var_image, NULL); + + + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL); + + nir->scratch_size = 0; + NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, + nir_var_mem_shared | nir_var_function_temp | nir_var_mem_global | nir_var_mem_constant, + glsl_get_cl_type_size_align); + + // Lower memcpy - needs to wait until types are sized + { + bool progress; + do { + progress = false; + NIR_PASS(progress, nir, nir_opt_memcpy); + NIR_PASS(progress, nir, nir_copy_prop); + NIR_PASS(progress, nir, nir_opt_copy_prop_vars); + NIR_PASS(progress, nir, nir_opt_deref); + NIR_PASS(progress, nir, nir_opt_dce); + NIR_PASS(progress, nir, nir_split_var_copies); + NIR_PASS(progress, nir, nir_lower_var_copies); + NIR_PASS(progress, nir, nir_lower_vars_to_ssa); + NIR_PASS(progress, nir, nir_opt_constant_folding); + NIR_PASS(progress, nir, nir_opt_cse); + } while (progress); + } + NIR_PASS_V(nir, nir_lower_memcpy); + + NIR_PASS_V(nir, nir_lower_explicit_io, + nir_var_mem_shared | nir_var_function_temp | nir_var_uniform, + nir_address_format_32bit_offset_as_64bit); + + NIR_PASS_V(nir, nir_lower_system_values); + + /* Lower again, this time after dead-variables to get more compact variable + * layouts. + */ + nir->global_mem_size = 0; + nir->scratch_size = 0; + nir->info.shared_size = 0; + NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, + nir_var_shader_temp | nir_var_function_temp | + nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant, + glsl_get_cl_type_size_align); + if (nir->constant_data_size > 0) { + assert(nir->constant_data == NULL); + nir->constant_data = rzalloc_size(nir, nir->constant_data_size); + nir_gather_explicit_io_initializers(nir, nir->constant_data, + nir->constant_data_size, + nir_var_mem_constant); + } + + NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_constant, + nir_address_format_64bit_global); + + NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_uniform, + nir_address_format_32bit_offset_as_64bit); + + NIR_PASS_V(nir, nir_lower_explicit_io, + nir_var_shader_temp | nir_var_function_temp | + nir_var_mem_shared | nir_var_mem_global, + nir_address_format_62bit_generic); + + if (INTEL_DEBUG(DEBUG_CS)) { + /* Re-index SSA defs so we print more sensible numbers. */ + nir_foreach_function_impl(impl, nir) { + nir_index_ssa_defs(impl); + } + + fprintf(stderr, "NIR (before I/O lowering) for kernel\n"); + nir_print_shader(nir, stderr); + } + + return nir; +} diff --git a/src/intel/compiler/brw_kernel.h b/src/intel/compiler/brw_kernel.h index b71b54b8666..8b2f5915f60 100644 --- a/src/intel/compiler/brw_kernel.h +++ b/src/intel/compiler/brw_kernel.h @@ -67,6 +67,9 @@ brw_kernel_from_spirv(struct brw_compiler *compiler, const char *entrypoint_name, char **error_str); +nir_shader * +brw_nir_from_spirv(void *mem_ctx, const uint32_t *spirv, size_t spirv_size); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/intel/compiler/intel_clc.c b/src/intel/compiler/intel_clc.c index 2806be95b5d..427d5d1e237 100644 --- a/src/intel/compiler/intel_clc.c +++ b/src/intel/compiler/intel_clc.c @@ -26,6 +26,7 @@ #include "common/intel_disasm.h" #include "compiler/clc/clc.h" #include "compiler/glsl_types.h" +#include "compiler/nir/nir_serialize.h" #include "dev/intel_debug.h" #include "util/build_id.h" #include "util/disk_cache.h" @@ -114,6 +115,20 @@ print_u32_data(FILE *fp, const char *prefix, const char *arr_name, fprintf(fp, "\n};\n"); } +static void +print_u8_data(FILE *fp, const char *prefix, const char *arr_name, + const uint8_t *data, size_t len) +{ + fprintf(fp, "static const uint8_t %s_%s[] = {", prefix, arr_name); + for (unsigned i = 0; i < len; i++) { + if (i % 16 == 0) + fprintf(fp,"\n "); + + fprintf(fp, " 0x%02" PRIx8 ",", data[i]); + } + fprintf(fp, "\n};\n"); +} + static const char * reloc_type_str(enum brw_shader_reloc_type type) { @@ -268,6 +283,7 @@ print_usage(char *exec_name, FILE *f) " -o, --out Specify the output filename.\n" " -i, --in Specify one input filename. Accepted multiple times.\n" " -s, --spv Specify the output filename for spirv.\n" +" -n, --nir Specify whether to output serialized NIR instead of ISA.\n" " -v, --verbose Print more information during compilation.\n" , exec_name); } @@ -281,6 +297,7 @@ struct intel_clc_params { char *spv_outfile; char *prefix; + bool output_nir; bool print_info; void *mem_ctx; @@ -288,6 +305,74 @@ struct intel_clc_params { struct intel_device_info devinfo; }; +#include "compiler/spirv/nir_spirv.h" + +static int +output_nir(const struct intel_clc_params *params, struct clc_binary *binary) +{ + struct spirv_to_nir_options spirv_options = { + .environment = NIR_SPIRV_OPENCL, + .caps = { + .address = true, + .groups = true, + .image_write_without_format = true, + .int8 = true, + .int16 = true, + .int64 = true, + .int64_atomics = true, + .kernel = true, + .linkage = true, /* We receive linked kernel from clc */ + .float_controls = true, + .generic_pointers = true, + .storage_8bit = true, + .storage_16bit = true, + .subgroup_arithmetic = true, + .subgroup_basic = true, + .subgroup_ballot = true, + .subgroup_dispatch = true, + .subgroup_quad = true, + .subgroup_shuffle = true, + .subgroup_vote = true, + + .intel_subgroup_shuffle = true, + .intel_subgroup_buffer_block_io = true, + }, + .shared_addr_format = nir_address_format_62bit_generic, + .global_addr_format = nir_address_format_62bit_generic, + .temp_addr_format = nir_address_format_62bit_generic, + .constant_addr_format = nir_address_format_64bit_global, + .create_library = true, + }; + + FILE *fp = params->outfile != NULL ? + fopen(params->outfile, "w") : stdout; + if (!fp) { + fprintf(stderr, "Failed to open %s\n", params->outfile); + return -1; + } + + spirv_library_to_nir_builder(fp, binary->data, binary->size / 4, + &spirv_options); + + nir_shader *nir = brw_nir_from_spirv(params->mem_ctx, + binary->data, binary->size); + if (!nir) { + fprintf(stderr, "Failed to generate NIR out of SPIRV\n"); + return -1; + } + + struct blob blob; + blob_init(&blob); + nir_serialize(&blob, nir, false /* strip */); + print_u8_data(fp, params->prefix, "nir", blob.data, blob.size); + blob_finish(&blob); + + if (params->outfile) + fclose(fp); + + return 0; +} + static int output_isa(const struct intel_clc_params *params, struct clc_binary *binary) { @@ -362,6 +447,7 @@ int main(int argc, char **argv) {"in", required_argument, 0, 'i'}, {"out", required_argument, 0, 'o'}, {"spv", required_argument, 0, 's'}, + {"nir", no_argument, 0, 'n'}, {"verbose", no_argument, 0, 'v'}, {0, 0, 0, 0} }; @@ -381,7 +467,7 @@ int main(int argc, char **argv) util_dynarray_init(&input_files, params.mem_ctx); int ch; - while ((ch = getopt_long(argc, argv, "he:p:s:i:o:v", long_options, NULL)) != -1) + while ((ch = getopt_long(argc, argv, "he:p:s:i:no:v", long_options, NULL)) != -1) { switch (ch) { @@ -399,6 +485,9 @@ int main(int argc, char **argv) break; case 'i': util_dynarray_append(&input_files, char *, optarg); + break; + case 'n': + params.output_nir = true; break; case 's': params.spv_outfile = optarg; @@ -426,34 +515,6 @@ int main(int argc, char **argv) goto fail; } - if (params.platform == NULL) { - fprintf(stderr, "No target platform name specified.\n"); - print_usage(argv[0], stderr); - goto fail; - } - - int pci_id = intel_device_name_to_pci_device_id(params.platform); - if (pci_id < 0) { - fprintf(stderr, "Invalid target platform name: %s\n", params.platform); - goto fail; - } - - if (!intel_get_device_info_from_pci_id(pci_id, ¶ms.devinfo)) { - fprintf(stderr, "Failed to get device information.\n"); - goto fail; - } - - if (params.devinfo.verx10 < 125) { - fprintf(stderr, "Platform currently not supported.\n"); - goto fail; - } - - if (params.entry_point == NULL) { - fprintf(stderr, "No entry-point name specified.\n"); - print_usage(argv[0], stderr); - goto fail; - } - struct clc_logger logger = { .error = msg_callback, .warning = msg_callback, @@ -516,25 +577,57 @@ int main(int argc, char **argv) fclose(fp); } - if (!clc_parse_spirv(&spirv_obj, &logger, &parsed_spirv_data)) { - goto fail; - } - - const struct clc_kernel_info *kernel_info = NULL; - for (unsigned i = 0; i < parsed_spirv_data.num_kernels; i++) { - if (strcmp(parsed_spirv_data.kernels[i].name, params.entry_point) == 0) { - kernel_info = &parsed_spirv_data.kernels[i]; - break; - } - } - if (kernel_info == NULL) { - fprintf(stderr, "Kernel entrypoint %s not found\n", params.entry_point); - goto fail; - } - glsl_type_singleton_init_or_ref(); - exit_code = output_isa(¶ms, &spirv_obj); + if (params.output_nir) { + exit_code = output_nir(¶ms, &spirv_obj); + } else { + if (params.platform == NULL) { + fprintf(stderr, "No target platform name specified.\n"); + print_usage(argv[0], stderr); + goto fail; + } + + int pci_id = intel_device_name_to_pci_device_id(params.platform); + if (pci_id < 0) { + fprintf(stderr, "Invalid target platform name: %s\n", params.platform); + goto fail; + } + + if (!intel_get_device_info_from_pci_id(pci_id, ¶ms.devinfo)) { + fprintf(stderr, "Failed to get device information.\n"); + goto fail; + } + + if (params.devinfo.verx10 < 125) { + fprintf(stderr, "Platform currently not supported.\n"); + goto fail; + } + + if (params.entry_point == NULL) { + fprintf(stderr, "No entry-point name specified.\n"); + print_usage(argv[0], stderr); + goto fail; + } + + struct clc_parsed_spirv parsed_spirv_data; + if (!clc_parse_spirv(&spirv_obj, &logger, &parsed_spirv_data)) + goto fail; + + const struct clc_kernel_info *kernel_info = NULL; + for (unsigned i = 0; i < parsed_spirv_data.num_kernels; i++) { + if (strcmp(parsed_spirv_data.kernels[i].name, params.entry_point) == 0) { + kernel_info = &parsed_spirv_data.kernels[i]; + break; + } + } + if (kernel_info == NULL) { + fprintf(stderr, "Kernel entrypoint %s not found\n", params.entry_point); + goto fail; + } + + exit_code = output_isa(¶ms, &spirv_obj); + } glsl_type_singleton_decref();