radv/gfx10: use the correct target machine for Wave32
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
This commit is contained in:
@@ -28,8 +28,10 @@
|
||||
class radv_llvm_per_thread_info {
|
||||
public:
|
||||
radv_llvm_per_thread_info(enum radeon_family arg_family,
|
||||
enum ac_target_machine_options arg_tm_options)
|
||||
: family(arg_family), tm_options(arg_tm_options), passes(NULL) {}
|
||||
enum ac_target_machine_options arg_tm_options,
|
||||
unsigned arg_wave_size)
|
||||
: family(arg_family), tm_options(arg_tm_options),
|
||||
wave_size(arg_wave_size), passes(NULL), passes_wave32(NULL) {}
|
||||
|
||||
~radv_llvm_per_thread_info()
|
||||
{
|
||||
@@ -47,19 +49,28 @@ public:
|
||||
if (!passes)
|
||||
return false;
|
||||
|
||||
if (llvm_info.tm_wave32) {
|
||||
passes_wave32 = ac_create_llvm_passes(llvm_info.tm_wave32);
|
||||
if (!passes_wave32)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool compile_to_memory_buffer(LLVMModuleRef module,
|
||||
char **pelf_buffer, size_t *pelf_size)
|
||||
{
|
||||
return ac_compile_module_to_elf(passes, module, pelf_buffer, pelf_size);
|
||||
struct ac_compiler_passes *p = wave_size == 32 ? passes_wave32 : passes;
|
||||
return ac_compile_module_to_elf(p, module, pelf_buffer, pelf_size);
|
||||
}
|
||||
|
||||
bool is_same(enum radeon_family arg_family,
|
||||
enum ac_target_machine_options arg_tm_options) {
|
||||
enum ac_target_machine_options arg_tm_options,
|
||||
unsigned arg_wave_size) {
|
||||
if (arg_family == family &&
|
||||
arg_tm_options == tm_options)
|
||||
arg_tm_options == tm_options &&
|
||||
arg_wave_size == wave_size)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
@@ -67,7 +78,9 @@ public:
|
||||
private:
|
||||
enum radeon_family family;
|
||||
enum ac_target_machine_options tm_options;
|
||||
unsigned wave_size;
|
||||
struct ac_compiler_passes *passes;
|
||||
struct ac_compiler_passes *passes_wave32;
|
||||
};
|
||||
|
||||
/* we have to store a linked list per thread due to the possiblity of multiple gpus being required */
|
||||
@@ -99,17 +112,18 @@ bool radv_compile_to_elf(struct ac_llvm_compiler *info,
|
||||
bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
|
||||
bool thread_compiler,
|
||||
enum radeon_family family,
|
||||
enum ac_target_machine_options tm_options)
|
||||
enum ac_target_machine_options tm_options,
|
||||
unsigned wave_size)
|
||||
{
|
||||
if (thread_compiler) {
|
||||
for (auto &I : radv_llvm_per_thread_list) {
|
||||
if (I.is_same(family, tm_options)) {
|
||||
if (I.is_same(family, tm_options, wave_size)) {
|
||||
*info = I.llvm_info;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
radv_llvm_per_thread_list.emplace_back(family, tm_options);
|
||||
radv_llvm_per_thread_list.emplace_back(family, tm_options, wave_size);
|
||||
radv_llvm_per_thread_info &tinfo = radv_llvm_per_thread_list.back();
|
||||
|
||||
if (!tinfo.init()) {
|
||||
|
@@ -1159,7 +1159,8 @@ shader_variant_compile(struct radv_device *device,
|
||||
radv_init_llvm_once();
|
||||
radv_init_llvm_compiler(&ac_llvm,
|
||||
thread_compiler,
|
||||
chip_family, tm_options);
|
||||
chip_family, tm_options,
|
||||
radv_get_shader_wave_size(device->physical_device, stage));
|
||||
if (gs_copy_shader) {
|
||||
assert(shader_count == 1);
|
||||
radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
|
||||
|
@@ -29,7 +29,8 @@ extern "C" {
|
||||
bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
|
||||
bool thread_compiler,
|
||||
enum radeon_family family,
|
||||
enum ac_target_machine_options tm_options);
|
||||
enum ac_target_machine_options tm_options,
|
||||
unsigned wave_size);
|
||||
void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info,
|
||||
bool thread_compiler);
|
||||
|
||||
|
Reference in New Issue
Block a user