radv: Add the concept of radv shader binaries.

This simplifies a bunch of stuff by
(1) Keeping all the things in a single allocation, making things easier
 for the cache.
(2) creating a shader_variant creation helper.

This is immediately put to use by creating rtld shader binaries. This
is the main reason for the binaries, as we need to do the linking at
upload time, i.e. post caching. We do not enable rtld yet.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
This commit is contained in:
Bas Nieuwenhuizen
2019-07-01 01:29:24 +02:00
parent 43f2f01cc8
commit 726a31df70
6 changed files with 344 additions and 231 deletions

View File

@@ -3863,32 +3863,33 @@ static unsigned ac_llvm_compile(LLVMModuleRef M,
static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm,
LLVMModuleRef llvm_module, LLVMModuleRef llvm_module,
struct ac_shader_binary *binary, struct radv_shader_binary **rbinary,
struct ac_shader_config *config,
struct radv_shader_variant_info *shader_info, struct radv_shader_variant_info *shader_info,
gl_shader_stage stage, gl_shader_stage stage,
const struct radv_nir_compiler_options *options) const struct radv_nir_compiler_options *options)
{ {
struct ac_shader_binary binary;
struct ac_shader_config config;
if (options->dump_shader) if (options->dump_shader)
ac_dump_module(llvm_module); ac_dump_module(llvm_module);
memset(binary, 0, sizeof(*binary)); memset(&binary, 0, sizeof(binary));
if (options->record_llvm_ir) { if (options->record_llvm_ir) {
char *llvm_ir = LLVMPrintModuleToString(llvm_module); char *llvm_ir = LLVMPrintModuleToString(llvm_module);
binary->llvm_ir_string = strdup(llvm_ir); binary.llvm_ir_string = strdup(llvm_ir);
LLVMDisposeMessage(llvm_ir); LLVMDisposeMessage(llvm_ir);
} }
int v = ac_llvm_compile(llvm_module, binary, ac_llvm); int v = ac_llvm_compile(llvm_module, &binary, ac_llvm);
if (v) { if (v) {
fprintf(stderr, "compile failed\n"); fprintf(stderr, "compile failed\n");
} }
if (options->dump_shader) if (options->dump_shader)
fprintf(stderr, "disasm:\n%s\n", binary->disasm_string); fprintf(stderr, "disasm:\n%s\n", binary.disasm_string);
ac_shader_binary_read_config(binary, config, 0, options->supports_spill); ac_shader_binary_read_config(&binary, &config, 0, options->supports_spill);
LLVMContextRef ctx = LLVMGetModuleContext(llvm_module); LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
LLVMDisposeModule(llvm_module); LLVMDisposeModule(llvm_module);
@@ -3896,43 +3897,43 @@ static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm,
if (stage == MESA_SHADER_FRAGMENT) { if (stage == MESA_SHADER_FRAGMENT) {
shader_info->num_input_vgprs = 0; shader_info->num_input_vgprs = 0;
if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr)) if (G_0286CC_PERSP_SAMPLE_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 2; shader_info->num_input_vgprs += 2;
if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr)) if (G_0286CC_PERSP_CENTER_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 2; shader_info->num_input_vgprs += 2;
if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr)) if (G_0286CC_PERSP_CENTROID_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 2; shader_info->num_input_vgprs += 2;
if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr)) if (G_0286CC_PERSP_PULL_MODEL_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 3; shader_info->num_input_vgprs += 3;
if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr)) if (G_0286CC_LINEAR_SAMPLE_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 2; shader_info->num_input_vgprs += 2;
if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr)) if (G_0286CC_LINEAR_CENTER_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 2; shader_info->num_input_vgprs += 2;
if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr)) if (G_0286CC_LINEAR_CENTROID_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 2; shader_info->num_input_vgprs += 2;
if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr)) if (G_0286CC_LINE_STIPPLE_TEX_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 1; shader_info->num_input_vgprs += 1;
if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr)) if (G_0286CC_POS_X_FLOAT_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 1; shader_info->num_input_vgprs += 1;
if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr)) if (G_0286CC_POS_Y_FLOAT_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 1; shader_info->num_input_vgprs += 1;
if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr)) if (G_0286CC_POS_Z_FLOAT_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 1; shader_info->num_input_vgprs += 1;
if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr)) if (G_0286CC_POS_W_FLOAT_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 1; shader_info->num_input_vgprs += 1;
if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr)) if (G_0286CC_FRONT_FACE_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 1; shader_info->num_input_vgprs += 1;
if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr)) if (G_0286CC_ANCILLARY_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 1; shader_info->num_input_vgprs += 1;
if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr)) if (G_0286CC_SAMPLE_COVERAGE_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 1; shader_info->num_input_vgprs += 1;
if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr)) if (G_0286CC_POS_FIXED_PT_ENA(config.spi_ps_input_addr))
shader_info->num_input_vgprs += 1; shader_info->num_input_vgprs += 1;
} }
config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs); config.num_vgprs = MAX2(config.num_vgprs, shader_info->num_input_vgprs);
/* +3 for scratch wave offset and VCC */ /* +3 for scratch wave offset and VCC */
config->num_sgprs = MAX2(config->num_sgprs, config.num_sgprs = MAX2(config.num_sgprs,
shader_info->num_input_sgprs + 3); shader_info->num_input_sgprs + 3);
/* Enable 64-bit and 16-bit denormals, because there is no performance /* Enable 64-bit and 16-bit denormals, because there is no performance
@@ -3947,7 +3948,35 @@ static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm,
* have to stop using those. * have to stop using those.
* - GFX6 & GFX7 would be very slow. * - GFX6 & GFX7 would be very slow.
*/ */
config->float_mode |= V_00B028_FP_64_DENORMS; config.float_mode |= V_00B028_FP_64_DENORMS;
size_t disasm_size = binary.disasm_string ? strlen(binary.disasm_string) : 0;
size_t llvm_ir_size = binary.llvm_ir_string ? strlen(binary.llvm_ir_string) : 0;
size_t alloc_size = sizeof(struct radv_shader_binary_legacy) + binary.code_size +
disasm_size + llvm_ir_size + 2;
struct radv_shader_binary_legacy *lbin = calloc(1, alloc_size);
memcpy(lbin->data, binary.code, binary.code_size);
if (binary.llvm_ir_string)
memcpy(lbin->data + binary.code_size, binary.llvm_ir_string, llvm_ir_size + 1);
if (binary.disasm_string)
memcpy(lbin->data + binary.code_size + llvm_ir_size + 1, binary.disasm_string, disasm_size + 1);
lbin->base.type = RADV_BINARY_TYPE_LEGACY;
lbin->base.stage = stage;
lbin->base.total_size = alloc_size;
lbin->config = config;
lbin->code_size = binary.code_size;
lbin->llvm_ir_size = llvm_ir_size;
lbin->disasm_size = disasm_size;
*rbinary = &lbin->base;
free(binary.code);
free(binary.config);
free(binary.rodata);
free(binary.global_symbol_offsets);
free(binary.relocs);
free(binary.disasm_string);
free(binary.llvm_ir_string);
} }
static void static void
@@ -3990,8 +4019,7 @@ ac_fill_shader_info(struct radv_shader_variant_info *shader_info, struct nir_sha
void void
radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
struct ac_shader_binary *binary, struct radv_shader_binary **rbinary,
struct ac_shader_config *config,
struct radv_shader_variant_info *shader_info, struct radv_shader_variant_info *shader_info,
struct nir_shader *const *nir, struct nir_shader *const *nir,
int nir_count, int nir_count,
@@ -4003,7 +4031,7 @@ radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, shader_info, llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, shader_info,
options); options);
ac_compile_llvm_module(ac_llvm, llvm_module, binary, config, shader_info, ac_compile_llvm_module(ac_llvm, llvm_module, rbinary, shader_info,
nir[nir_count - 1]->info.stage, options); nir[nir_count - 1]->info.stage, options);
for (int i = 0; i < nir_count; ++i) for (int i = 0; i < nir_count; ++i)
@@ -4115,8 +4143,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
void void
radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
struct nir_shader *geom_shader, struct nir_shader *geom_shader,
struct ac_shader_binary *binary, struct radv_shader_binary **rbinary,
struct ac_shader_config *config,
struct radv_shader_variant_info *shader_info, struct radv_shader_variant_info *shader_info,
const struct radv_nir_compiler_options *options) const struct radv_nir_compiler_options *options)
{ {
@@ -4156,6 +4183,8 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options); ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, options);
ac_compile_llvm_module(ac_llvm, ctx.ac.module, binary, config, shader_info, ac_compile_llvm_module(ac_llvm, ctx.ac.module, rbinary, shader_info,
MESA_SHADER_VERTEX, options); MESA_SHADER_VERTEX, options);
(*rbinary)->is_gs_copy_shader = true;
} }

View File

@@ -2131,8 +2131,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
struct radv_shader_module fs_m = {0}; struct radv_shader_module fs_m = {0};
struct radv_shader_module *modules[MESA_SHADER_STAGES] = { 0, }; struct radv_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
nir_shader *nir[MESA_SHADER_STAGES] = {0}; nir_shader *nir[MESA_SHADER_STAGES] = {0};
void *codes[MESA_SHADER_STAGES] = {0}; struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL};
unsigned code_sizes[MESA_SHADER_STAGES] = {0};
struct radv_shader_variant_key keys[MESA_SHADER_STAGES] = {{{{0}}}}; struct radv_shader_variant_key keys[MESA_SHADER_STAGES] = {{{{0}}}};
unsigned char hash[20], gs_copy_hash[20]; unsigned char hash[20], gs_copy_hash[20];
@@ -2229,9 +2228,9 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]); radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
pipeline->shaders[MESA_SHADER_FRAGMENT] = pipeline->shaders[MESA_SHADER_FRAGMENT] =
radv_shader_variant_create(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1, radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
pipeline->layout, keys + MESA_SHADER_FRAGMENT, pipeline->layout, keys + MESA_SHADER_FRAGMENT,
&codes[MESA_SHADER_FRAGMENT], &code_sizes[MESA_SHADER_FRAGMENT]); &binaries[MESA_SHADER_FRAGMENT]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false); radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
} }
@@ -2259,10 +2258,9 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
radv_start_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL]); radv_start_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL]);
pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_create(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2, pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
pipeline->layout, pipeline->layout,
&key, &codes[MESA_SHADER_TESS_CTRL], &key, &binaries[MESA_SHADER_TESS_CTRL]);
&code_sizes[MESA_SHADER_TESS_CTRL]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false); radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
} }
@@ -2278,10 +2276,9 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
radv_start_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY]); radv_start_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY]);
pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_create(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2, pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
pipeline->layout, pipeline->layout,
&keys[pre_stage] , &codes[MESA_SHADER_GEOMETRY], &keys[pre_stage] , &binaries[MESA_SHADER_GEOMETRY]);
&code_sizes[MESA_SHADER_GEOMETRY]);
radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false); radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
} }
@@ -2300,48 +2297,42 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
radv_start_feedback(stage_feedbacks[i]); radv_start_feedback(stage_feedbacks[i]);
pipeline->shaders[i] = radv_shader_variant_create(device, modules[i], &nir[i], 1, pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1,
pipeline->layout, pipeline->layout,
keys + i, &codes[i], keys + i, &binaries[i]);
&code_sizes[i]);
radv_stop_feedback(stage_feedbacks[i], false); radv_stop_feedback(stage_feedbacks[i], false);
} }
} }
if(modules[MESA_SHADER_GEOMETRY]) { if(modules[MESA_SHADER_GEOMETRY]) {
void *gs_copy_code = NULL; struct radv_shader_binary *gs_copy_binary = NULL;
unsigned gs_copy_code_size = 0;
if (!pipeline->gs_copy_shader) { if (!pipeline->gs_copy_shader) {
pipeline->gs_copy_shader = radv_create_gs_copy_shader( pipeline->gs_copy_shader = radv_create_gs_copy_shader(
device, nir[MESA_SHADER_GEOMETRY], &gs_copy_code, device, nir[MESA_SHADER_GEOMETRY], &gs_copy_binary,
&gs_copy_code_size,
keys[MESA_SHADER_GEOMETRY].has_multiview_view_index); keys[MESA_SHADER_GEOMETRY].has_multiview_view_index);
} }
if (pipeline->gs_copy_shader) { if (pipeline->gs_copy_shader) {
void *code[MESA_SHADER_STAGES] = {0}; struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL};
unsigned code_size[MESA_SHADER_STAGES] = {0};
struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0}; struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
code[MESA_SHADER_GEOMETRY] = gs_copy_code; binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary;
code_size[MESA_SHADER_GEOMETRY] = gs_copy_code_size;
variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader; variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader;
radv_pipeline_cache_insert_shaders(device, cache, radv_pipeline_cache_insert_shaders(device, cache,
gs_copy_hash, gs_copy_hash,
variants, variants,
(const void**)code, binaries);
code_size);
} }
free(gs_copy_code); free(gs_copy_binary);
} }
radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders, radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders,
(const void**)codes, code_sizes); binaries);
for (int i = 0; i < MESA_SHADER_STAGES; ++i) { for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
free(codes[i]); free(binaries[i]);
if (nir[i]) { if (nir[i]) {
if (!pipeline->device->keep_shader_info) if (!pipeline->device->keep_shader_info)
ralloc_free(nir[i]); ralloc_free(nir[i]);

View File

@@ -31,17 +31,12 @@
#include "ac_nir_to_llvm.h" #include "ac_nir_to_llvm.h"
struct cache_entry_variant_info {
struct radv_shader_variant_info variant_info;
struct ac_shader_config config;
};
struct cache_entry { struct cache_entry {
union { union {
unsigned char sha1[20]; unsigned char sha1[20];
uint32_t sha1_dw[5]; uint32_t sha1_dw[5];
}; };
uint32_t code_sizes[MESA_SHADER_STAGES]; uint32_t binary_sizes[MESA_SHADER_STAGES];
struct radv_shader_variant *variants[MESA_SHADER_STAGES]; struct radv_shader_variant *variants[MESA_SHADER_STAGES];
char code[0]; char code[0];
}; };
@@ -92,8 +87,8 @@ entry_size(struct cache_entry *entry)
{ {
size_t ret = sizeof(*entry); size_t ret = sizeof(*entry);
for (int i = 0; i < MESA_SHADER_STAGES; ++i) for (int i = 0; i < MESA_SHADER_STAGES; ++i)
if (entry->code_sizes[i]) if (entry->binary_sizes[i])
ret += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i]; ret += entry->binary_sizes[i];
return ret; return ret;
} }
@@ -308,31 +303,14 @@ radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
char *p = entry->code; char *p = entry->code;
for(int i = 0; i < MESA_SHADER_STAGES; ++i) { for(int i = 0; i < MESA_SHADER_STAGES; ++i) {
if (!entry->variants[i] && entry->code_sizes[i]) { if (!entry->variants[i] && entry->binary_sizes[i]) {
struct radv_shader_variant *variant; struct radv_shader_binary *binary = calloc(1, entry->binary_sizes[i]);
struct cache_entry_variant_info info; memcpy(binary, p, entry->binary_sizes[i]);
p += entry->binary_sizes[i];
variant = calloc(1, sizeof(struct radv_shader_variant)); entry->variants[i] = radv_shader_variant_create(device, binary);
if (!variant) { } else if (entry->binary_sizes[i]) {
pthread_mutex_unlock(&cache->mutex); p += entry->binary_sizes[i];
return false;
}
memcpy(&info, p, sizeof(struct cache_entry_variant_info));
p += sizeof(struct cache_entry_variant_info);
variant->config = info.config;
variant->info = info.variant_info;
variant->code_size = entry->code_sizes[i];
variant->ref_count = 1;
void *ptr = radv_alloc_shader_memory(device, variant);
memcpy(ptr, p, entry->code_sizes[i]);
p += entry->code_sizes[i];
entry->variants[i] = variant;
} else if (entry->code_sizes[i]) {
p += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
} }
} }
@@ -351,8 +329,7 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device,
struct radv_pipeline_cache *cache, struct radv_pipeline_cache *cache,
const unsigned char *sha1, const unsigned char *sha1,
struct radv_shader_variant **variants, struct radv_shader_variant **variants,
const void *const *codes, struct radv_shader_binary *const *binaries)
const unsigned *code_sizes)
{ {
if (!cache) if (!cache)
cache = device->mem_cache; cache = device->mem_cache;
@@ -385,7 +362,7 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device,
size_t size = sizeof(*entry); size_t size = sizeof(*entry);
for (int i = 0; i < MESA_SHADER_STAGES; ++i) for (int i = 0; i < MESA_SHADER_STAGES; ++i)
if (variants[i]) if (variants[i])
size += sizeof(struct cache_entry_variant_info) + code_sizes[i]; size += binaries[i]->total_size;
entry = vk_alloc(&cache->alloc, size, 8, entry = vk_alloc(&cache->alloc, size, 8,
@@ -399,22 +376,15 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device,
memcpy(entry->sha1, sha1, 20); memcpy(entry->sha1, sha1, 20);
char* p = entry->code; char* p = entry->code;
struct cache_entry_variant_info info;
memset(&info, 0, sizeof(info));
for (int i = 0; i < MESA_SHADER_STAGES; ++i) { for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
if (!variants[i]) if (!variants[i])
continue; continue;
entry->code_sizes[i] = code_sizes[i]; entry->binary_sizes[i] = binaries[i]->total_size;
info.config = variants[i]->config; memcpy(p, binaries[i], binaries[i]->total_size);
info.variant_info = variants[i]->info; p += binaries[i]->total_size;
memcpy(p, &info, sizeof(struct cache_entry_variant_info));
p += sizeof(struct cache_entry_variant_info);
memcpy(p, codes[i], code_sizes[i]);
p += code_sizes[i];
} }
/* Always add cache items to disk. This will allow collection of /* Always add cache items to disk. This will allow collection of

View File

@@ -397,6 +397,9 @@ struct radv_pipeline_key {
uint32_t optimisations_disabled : 1; uint32_t optimisations_disabled : 1;
}; };
struct radv_shader_binary;
struct radv_shader_variant;
void void
radv_pipeline_cache_init(struct radv_pipeline_cache *cache, radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
struct radv_device *device); struct radv_device *device);
@@ -406,8 +409,6 @@ bool
radv_pipeline_cache_load(struct radv_pipeline_cache *cache, radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
const void *data, size_t size); const void *data, size_t size);
struct radv_shader_variant;
bool bool
radv_create_shader_variants_from_pipeline_cache(struct radv_device *device, radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
struct radv_pipeline_cache *cache, struct radv_pipeline_cache *cache,
@@ -420,8 +421,7 @@ radv_pipeline_cache_insert_shaders(struct radv_device *device,
struct radv_pipeline_cache *cache, struct radv_pipeline_cache *cache,
const unsigned char *sha1, const unsigned char *sha1,
struct radv_shader_variant **variants, struct radv_shader_variant **variants,
const void *const *codes, struct radv_shader_binary *const *binaries);
const unsigned *code_sizes);
enum radv_blit_ds_layout { enum radv_blit_ds_layout {
RADV_BLIT_DS_LAYOUT_TILE_ENABLE, RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
@@ -2107,14 +2107,12 @@ struct radv_nir_compiler_options;
void radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, void radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
struct nir_shader *geom_shader, struct nir_shader *geom_shader,
struct ac_shader_binary *binary, struct radv_shader_binary **rbinary,
struct ac_shader_config *config,
struct radv_shader_variant_info *shader_info, struct radv_shader_variant_info *shader_info,
const struct radv_nir_compiler_options *option); const struct radv_nir_compiler_options *option);
void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
struct ac_shader_binary *binary, struct radv_shader_binary **rbinary,
struct ac_shader_config *config,
struct radv_shader_variant_info *shader_info, struct radv_shader_variant_info *shader_info,
struct nir_shader *const *nir, struct nir_shader *const *nir,
int nir_count, int nir_count,

View File

@@ -43,6 +43,7 @@
#include "ac_binary.h" #include "ac_binary.h"
#include "ac_llvm_util.h" #include "ac_llvm_util.h"
#include "ac_nir_to_llvm.h" #include "ac_nir_to_llvm.h"
#include "ac_rtld.h"
#include "vk_format.h" #include "vk_format.h"
#include "util/debug.h" #include "util/debug.h"
#include "ac_exp_param.h" #include "ac_exp_param.h"
@@ -464,79 +465,78 @@ radv_destroy_shader_slabs(struct radv_device *device)
#define DEBUGGER_NUM_MARKERS 5 #define DEBUGGER_NUM_MARKERS 5
static unsigned static unsigned
radv_get_shader_binary_size(struct ac_shader_binary *binary) radv_get_shader_binary_size(size_t code_size)
{ {
return binary->code_size + DEBUGGER_NUM_MARKERS * 4; return code_size + DEBUGGER_NUM_MARKERS * 4;
} }
static void static void radv_postprocess_config(const struct radv_physical_device *pdevice,
radv_fill_shader_variant(struct radv_device *device, const struct ac_shader_config *config_in,
struct radv_shader_variant *variant, const struct radv_shader_variant_info *info,
struct radv_nir_compiler_options *options, gl_shader_stage stage,
struct ac_shader_binary *binary, struct ac_shader_config *config_out)
gl_shader_stage stage)
{ {
bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0; bool scratch_enabled = config_in->scratch_bytes_per_wave > 0;
struct radv_shader_info *info = &variant->info.info;
unsigned vgpr_comp_cnt = 0; unsigned vgpr_comp_cnt = 0;
variant->code_size = radv_get_shader_binary_size(binary); *config_out = *config_in;
variant->config.rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
S_00B12C_USER_SGPR_MSB_GFX9(variant->info.num_user_sgprs >> 5) |
S_00B12C_SCRATCH_EN(scratch_enabled) |
S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
S_00B12C_SO_EN(!!info->so.num_outputs);
variant->config.rsrc1 = S_00B848_VGPRS((variant->config.num_vgprs - 1) / 4) | config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
S_00B848_SGPRS((variant->config.num_sgprs - 1) / 8) | S_00B12C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5) |
S_00B848_DX10_CLAMP(1) | S_00B12C_SCRATCH_EN(scratch_enabled) |
S_00B848_FLOAT_MODE(variant->config.float_mode); S_00B12C_SO_BASE0_EN(!!info->info.so.strides[0]) |
S_00B12C_SO_BASE1_EN(!!info->info.so.strides[1]) |
S_00B12C_SO_BASE2_EN(!!info->info.so.strides[2]) |
S_00B12C_SO_BASE3_EN(!!info->info.so.strides[3]) |
S_00B12C_SO_EN(!!info->info.so.num_outputs);
config_out->rsrc1 = S_00B848_VGPRS((config_in->num_vgprs - 1) / 4) |
S_00B848_SGPRS((config_in->num_sgprs - 1) / 8) |
S_00B848_DX10_CLAMP(1) |
S_00B848_FLOAT_MODE(config_in->float_mode);
switch (stage) { switch (stage) {
case MESA_SHADER_TESS_EVAL: case MESA_SHADER_TESS_EVAL:
if (options->key.tes.as_es) { if (info->tes.as_es) {
assert(device->physical_device->rad_info.chip_class <= GFX8); assert(pdevice->rad_info.chip_class <= GFX8);
vgpr_comp_cnt = info->uses_prim_id ? 3 : 2; vgpr_comp_cnt = info->info.uses_prim_id ? 3 : 2;
} else { } else {
bool enable_prim_id = variant->info.tes.export_prim_id || info->uses_prim_id; bool enable_prim_id = info->tes.export_prim_id || info->info.uses_prim_id;
vgpr_comp_cnt = enable_prim_id ? 3 : 2; vgpr_comp_cnt = enable_prim_id ? 3 : 2;
} }
variant->config.rsrc2 |= S_00B12C_OC_LDS_EN(1); config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1);
break; break;
case MESA_SHADER_TESS_CTRL: case MESA_SHADER_TESS_CTRL:
if (device->physical_device->rad_info.chip_class >= GFX9) { if (pdevice->rad_info.chip_class >= GFX9) {
/* We need at least 2 components for LS. /* We need at least 2 components for LS.
* VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID). * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
* StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
*/ */
vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1; vgpr_comp_cnt = info->info.vs.needs_instance_id ? 2 : 1;
} else { } else {
variant->config.rsrc2 |= S_00B12C_OC_LDS_EN(1); config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1);
} }
break; break;
case MESA_SHADER_VERTEX: case MESA_SHADER_VERTEX:
if (variant->info.vs.as_ls) { if (info->vs.as_ls) {
assert(device->physical_device->rad_info.chip_class <= GFX8); assert(pdevice->rad_info.chip_class <= GFX8);
/* We need at least 2 components for LS. /* We need at least 2 components for LS.
* VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID). * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
* StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
*/ */
vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1; vgpr_comp_cnt = info->info.vs.needs_instance_id ? 2 : 1;
} else if (variant->info.vs.as_es) { } else if (info->vs.as_es) {
assert(device->physical_device->rad_info.chip_class <= GFX8); assert(pdevice->rad_info.chip_class <= GFX8);
/* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
vgpr_comp_cnt = info->vs.needs_instance_id ? 1 : 0; vgpr_comp_cnt = info->info.vs.needs_instance_id ? 1 : 0;
} else { } else {
/* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID) /* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID)
* If PrimID is disabled. InstanceID / StepRate1 is loaded instead. * If PrimID is disabled. InstanceID / StepRate1 is loaded instead.
* StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
*/ */
if (variant->info.vs.export_prim_id) { if (info->vs.export_prim_id) {
vgpr_comp_cnt = 2; vgpr_comp_cnt = 2;
} else if (info->vs.needs_instance_id) { } else if (info->info.vs.needs_instance_id) {
vgpr_comp_cnt = 1; vgpr_comp_cnt = 1;
} else { } else {
vgpr_comp_cnt = 0; vgpr_comp_cnt = 0;
@@ -547,30 +547,30 @@ radv_fill_shader_variant(struct radv_device *device,
case MESA_SHADER_GEOMETRY: case MESA_SHADER_GEOMETRY:
break; break;
case MESA_SHADER_COMPUTE: case MESA_SHADER_COMPUTE:
variant->config.rsrc2 |= config_out->rsrc2 |=
S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) | S_00B84C_TGID_X_EN(info->info.cs.uses_block_id[0]) |
S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) | S_00B84C_TGID_Y_EN(info->info.cs.uses_block_id[1]) |
S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) | S_00B84C_TGID_Z_EN(info->info.cs.uses_block_id[2]) |
S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2 : S_00B84C_TIDIG_COMP_CNT(info->info.cs.uses_thread_id[2] ? 2 :
info->cs.uses_thread_id[1] ? 1 : 0) | info->info.cs.uses_thread_id[1] ? 1 : 0) |
S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) | S_00B84C_TG_SIZE_EN(info->info.cs.uses_local_invocation_idx) |
S_00B84C_LDS_SIZE(variant->config.lds_size); S_00B84C_LDS_SIZE(config_in->lds_size);
break; break;
default: default:
unreachable("unsupported shader type"); unreachable("unsupported shader type");
break; break;
} }
if (device->physical_device->rad_info.chip_class >= GFX9 && if (pdevice->rad_info.chip_class >= GFX9 &&
stage == MESA_SHADER_GEOMETRY) { stage == MESA_SHADER_GEOMETRY) {
unsigned es_type = variant->info.gs.es_type; unsigned es_type = info->gs.es_type;
unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt; unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
if (es_type == MESA_SHADER_VERTEX) { if (es_type == MESA_SHADER_VERTEX) {
/* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
es_vgpr_comp_cnt = info->vs.needs_instance_id ? 1 : 0; es_vgpr_comp_cnt = info->info.vs.needs_instance_id ? 1 : 0;
} else if (es_type == MESA_SHADER_TESS_EVAL) { } else if (es_type == MESA_SHADER_TESS_EVAL) {
es_vgpr_comp_cnt = info->uses_prim_id ? 3 : 2; es_vgpr_comp_cnt = info->info.uses_prim_id ? 3 : 2;
} else { } else {
unreachable("invalid shader ES type"); unreachable("invalid shader ES type");
} }
@@ -578,34 +578,25 @@ radv_fill_shader_variant(struct radv_device *device,
/* If offsets 4, 5 are used, GS_VGPR_COMP_CNT is ignored and /* If offsets 4, 5 are used, GS_VGPR_COMP_CNT is ignored and
* VGPR[0:4] are always loaded. * VGPR[0:4] are always loaded.
*/ */
if (info->uses_invocation_id) { if (info->info.uses_invocation_id) {
gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */ gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
} else if (info->uses_prim_id) { } else if (info->info.uses_prim_id) {
gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */ gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
} else if (variant->info.gs.vertices_in >= 3) { } else if (info->gs.vertices_in >= 3) {
gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */ gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
} else { } else {
gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */ gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
} }
variant->config.rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt); config_out->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt);
variant->config.rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL); S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL);
} else if (device->physical_device->rad_info.chip_class >= GFX9 && } else if (pdevice->rad_info.chip_class >= GFX9 &&
stage == MESA_SHADER_TESS_CTRL) { stage == MESA_SHADER_TESS_CTRL) {
variant->config.rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt); config_out->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt);
} else { } else {
variant->config.rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt); config_out->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
} }
void *ptr = radv_alloc_shader_memory(device, variant);
memcpy(ptr, binary->code, binary->code_size);
/* Add end-of-code markers for the UMR disassembler. */
uint32_t *ptr32 = (uint32_t *)ptr + binary->code_size / 4;
for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++)
ptr32[i] = DEBUGGER_END_OF_CODE_MARKER;
} }
static void radv_init_llvm_target() static void radv_init_llvm_target()
@@ -647,26 +638,132 @@ static void radv_init_llvm_once(void)
call_once(&radv_init_llvm_target_once_flag, radv_init_llvm_target); call_once(&radv_init_llvm_target_once_flag, radv_init_llvm_target);
} }
struct radv_shader_variant *
radv_shader_variant_create(struct radv_device *device,
const struct radv_shader_binary *binary)
{
struct ac_shader_config config = {0};
struct ac_rtld_binary rtld_binary = {0};
struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
if (!variant)
return NULL;
variant->ref_count = 1;
if (binary->type == RADV_BINARY_TYPE_RTLD) {
struct ac_rtld_symbol lds_symbols[1];
unsigned num_lds_symbols = 0;
const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data;
size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size;
if (device->physical_device->rad_info.chip_class >= GFX9 &&
binary->stage == MESA_SHADER_GEOMETRY && !binary->is_gs_copy_shader) {
/* We add this symbol even on LLVM <= 8 to ensure that
* shader->config.lds_size is set correctly below.
*/
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
sym->name = "esgs_ring";
sym->size = 32 * 1024;
sym->align = 64 * 1024;
}
struct ac_rtld_open_info open_info = {
.info = &device->physical_device->rad_info,
.shader_type = binary->stage,
.num_parts = 1,
.elf_ptrs = &elf_data,
.elf_sizes = &elf_size,
.num_shared_lds_symbols = num_lds_symbols,
.shared_lds_symbols = lds_symbols,
};
if (!ac_rtld_open(&rtld_binary, open_info)) {
free(variant);
return NULL;
}
if (!ac_rtld_read_config(&rtld_binary, &config)) {
ac_rtld_close(&rtld_binary);
free(variant);
return NULL;
}
if (rtld_binary.lds_size > 0) {
unsigned alloc_granularity = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256;
config.lds_size = align(rtld_binary.lds_size, alloc_granularity) / alloc_granularity;
}
variant->code_size = rtld_binary.rx_size;
} else {
assert(binary->type == RADV_BINARY_TYPE_LEGACY);
config = ((struct radv_shader_binary_legacy *)binary)->config;
variant->code_size = radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size);
}
variant->info = binary->variant_info;
radv_postprocess_config(device->physical_device, &config, &binary->variant_info,
binary->stage, &variant->config);
void *dest_ptr = radv_alloc_shader_memory(device, variant);
if (binary->type == RADV_BINARY_TYPE_RTLD) {
struct radv_shader_binary_rtld* bin = (struct radv_shader_binary_rtld *)binary;
struct ac_rtld_upload_info info = {
.binary = &rtld_binary,
.rx_va = radv_buffer_get_va(variant->bo) + variant->bo_offset,
.rx_ptr = dest_ptr,
};
if (!ac_rtld_upload(&info)) {
radv_shader_variant_destroy(device, variant);
ac_rtld_close(&rtld_binary);
return NULL;
}
const char *disasm_data;
size_t disasm_size;
if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, &disasm_size)) {
radv_shader_variant_destroy(device, variant);
ac_rtld_close(&rtld_binary);
return NULL;
}
variant->llvm_ir_string = bin->llvm_ir_size ? strdup((const char*)(bin->data + bin->elf_size)) : NULL;
variant->disasm_string = malloc(disasm_size + 1);
memcpy(variant->disasm_string, disasm_data, disasm_size);
variant->disasm_string[disasm_size] = 0;
ac_rtld_close(&rtld_binary);
} else {
struct radv_shader_binary_legacy* bin = (struct radv_shader_binary_legacy *)binary;
memcpy(dest_ptr, bin->data, bin->code_size);
/* Add end-of-code markers for the UMR disassembler. */
uint32_t *ptr32 = (uint32_t *)dest_ptr + bin->code_size / 4;
for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++)
ptr32[i] = DEBUGGER_END_OF_CODE_MARKER;
variant->llvm_ir_string = bin->llvm_ir_size ? strdup((const char*)(bin->data + bin->code_size)) : NULL;
variant->disasm_string = bin->disasm_size ? strdup((const char*)(bin->data + bin->code_size + bin->llvm_ir_size)) : NULL;
}
return variant;
}
static struct radv_shader_variant * static struct radv_shader_variant *
shader_variant_create(struct radv_device *device, shader_variant_compile(struct radv_device *device,
struct radv_shader_module *module, struct radv_shader_module *module,
struct nir_shader * const *shaders, struct nir_shader * const *shaders,
int shader_count, int shader_count,
gl_shader_stage stage, gl_shader_stage stage,
struct radv_nir_compiler_options *options, struct radv_nir_compiler_options *options,
bool gs_copy_shader, bool gs_copy_shader,
void **code_out, struct radv_shader_binary **binary_out)
unsigned *code_size_out)
{ {
enum radeon_family chip_family = device->physical_device->rad_info.family; enum radeon_family chip_family = device->physical_device->rad_info.family;
enum ac_target_machine_options tm_options = 0; enum ac_target_machine_options tm_options = 0;
struct radv_shader_variant *variant;
struct ac_shader_binary binary;
struct ac_llvm_compiler ac_llvm; struct ac_llvm_compiler ac_llvm;
struct radv_shader_binary *binary = NULL;
struct radv_shader_variant_info variant_info = {0};
bool thread_compiler; bool thread_compiler;
variant = calloc(1, sizeof(struct radv_shader_variant));
if (!variant)
return NULL;
options->family = chip_family; options->family = chip_family;
options->chip_class = device->physical_device->rad_info.chip_class; options->chip_class = device->physical_device->rad_info.chip_class;
@@ -695,53 +792,45 @@ shader_variant_create(struct radv_device *device,
if (gs_copy_shader) { if (gs_copy_shader) {
assert(shader_count == 1); assert(shader_count == 1);
radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary, radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
&variant->config, &variant->info, &variant_info, options);
options);
} else { } else {
radv_compile_nir_shader(&ac_llvm, &binary, &variant->config, radv_compile_nir_shader(&ac_llvm, &binary, &variant_info,
&variant->info, shaders, shader_count, shaders, shader_count, options);
options);
} }
binary->variant_info = variant_info;
radv_destroy_llvm_compiler(&ac_llvm, thread_compiler); radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
radv_fill_shader_variant(device, variant, options, &binary, stage); struct radv_shader_variant *variant = radv_shader_variant_create(device, binary);
if (!variant) {
if (code_out) { free(binary);
*code_out = binary.code; return NULL;
*code_size_out = binary.code_size; }
} else
free(binary.code);
free(binary.config);
free(binary.rodata);
free(binary.global_symbol_offsets);
free(binary.relocs);
variant->ref_count = 1;
if (device->keep_shader_info) { if (device->keep_shader_info) {
variant->disasm_string = binary.disasm_string;
variant->llvm_ir_string = binary.llvm_ir_string;
if (!gs_copy_shader && !module->nir) { if (!gs_copy_shader && !module->nir) {
variant->nir = *shaders; variant->nir = *shaders;
variant->spirv = (uint32_t *)module->data; variant->spirv = (uint32_t *)module->data;
variant->spirv_size = module->size; variant->spirv_size = module->size;
} }
} else {
free(binary.disasm_string);
} }
if (binary_out)
*binary_out = binary;
else
free(binary);
return variant; return variant;
} }
struct radv_shader_variant * struct radv_shader_variant *
radv_shader_variant_create(struct radv_device *device, radv_shader_variant_compile(struct radv_device *device,
struct radv_shader_module *module, struct radv_shader_module *module,
struct nir_shader *const *shaders, struct nir_shader *const *shaders,
int shader_count, int shader_count,
struct radv_pipeline_layout *layout, struct radv_pipeline_layout *layout,
const struct radv_shader_variant_key *key, const struct radv_shader_variant_key *key,
void **code_out, struct radv_shader_binary **binary_out)
unsigned *code_size_out)
{ {
struct radv_nir_compiler_options options = {0}; struct radv_nir_compiler_options options = {0};
@@ -752,23 +841,22 @@ radv_shader_variant_create(struct radv_device *device,
options.unsafe_math = !!(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH); options.unsafe_math = !!(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH);
options.supports_spill = true; options.supports_spill = true;
return shader_variant_create(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage,
&options, false, code_out, code_size_out); &options, false, binary_out);
} }
struct radv_shader_variant * struct radv_shader_variant *
radv_create_gs_copy_shader(struct radv_device *device, radv_create_gs_copy_shader(struct radv_device *device,
struct nir_shader *shader, struct nir_shader *shader,
void **code_out, struct radv_shader_binary **binary_out,
unsigned *code_size_out,
bool multiview) bool multiview)
{ {
struct radv_nir_compiler_options options = {0}; struct radv_nir_compiler_options options = {0};
options.key.has_multiview_view_index = multiview; options.key.has_multiview_view_index = multiview;
return shader_variant_create(device, NULL, &shader, 1, MESA_SHADER_VERTEX, return shader_variant_compile(device, NULL, &shader, 1, MESA_SHADER_VERTEX,
&options, true, code_out, code_size_out); &options, true, binary_out);
} }
void void

View File

@@ -308,6 +308,41 @@ struct radv_shader_variant_info {
}; };
}; };
enum radv_shader_binary_type {
RADV_BINARY_TYPE_LEGACY,
RADV_BINARY_TYPE_RTLD
};
struct radv_shader_binary {
enum radv_shader_binary_type type;
gl_shader_stage stage;
bool is_gs_copy_shader;
struct radv_shader_variant_info variant_info;
/* Self-referential size so we avoid consistency issues. */
uint32_t total_size;
};
struct radv_shader_binary_legacy {
struct radv_shader_binary base;
struct ac_shader_config config;
unsigned code_size;
unsigned llvm_ir_size;
unsigned disasm_size;
/* data has size of code_size + llvm_ir_size + disasm_size + 2, where
* the +2 is for 0 of the ir strings. */
uint8_t data[0];
};
struct radv_shader_binary_rtld {
struct radv_shader_binary base;
unsigned elf_size;
unsigned llvm_ir_size;
uint8_t data[0];
};
struct radv_shader_variant { struct radv_shader_variant {
uint32_t ref_count; uint32_t ref_count;
@@ -360,17 +395,19 @@ radv_destroy_shader_slabs(struct radv_device *device);
struct radv_shader_variant * struct radv_shader_variant *
radv_shader_variant_create(struct radv_device *device, radv_shader_variant_create(struct radv_device *device,
struct radv_shader_module *module, const struct radv_shader_binary *binary);
struct nir_shader *const *shaders, struct radv_shader_variant *
int shader_count, radv_shader_variant_compile(struct radv_device *device,
struct radv_pipeline_layout *layout, struct radv_shader_module *module,
const struct radv_shader_variant_key *key, struct nir_shader *const *shaders,
void **code_out, int shader_count,
unsigned *code_size_out); struct radv_pipeline_layout *layout,
const struct radv_shader_variant_key *key,
struct radv_shader_binary **binary_out);
struct radv_shader_variant * struct radv_shader_variant *
radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir, radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir,
void **code_out, unsigned *code_size_out, struct radv_shader_binary **binary_out,
bool multiview); bool multiview);
void void