From ecdcf22d5d848edb582ac1c49c6c6de74309a476 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 15 Oct 2020 16:38:13 +0100 Subject: [PATCH] aco: switch aco_print_asm to a FILE * MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Streams are really stateful and (IMO) difficult to read for non-trivial usage. This is also more consistent with NIR and the rest of ACO. Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_interface.cpp | 28 ++++++++++------ src/amd/compiler/aco_ir.h | 2 +- src/amd/compiler/aco_print_asm.cpp | 54 ++++++++++++------------------ src/amd/compiler/tests/helpers.cpp | 5 +-- 4 files changed, 42 insertions(+), 47 deletions(-) diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp index 55ddd19b638..7a62fdb0220 100644 --- a/src/amd/compiler/aco_interface.cpp +++ b/src/amd/compiler/aco_interface.cpp @@ -28,7 +28,6 @@ #include "vulkan/radv_shader_args.h" #include -#include static aco_compiler_statistic_info statistic_infos[] = { [aco::statistic_hash] = {"Hash", "CRC32 hash of code and constant data"}, @@ -173,16 +172,25 @@ void aco_compile_shader(unsigned shader_count, std::string disasm; if (get_disasm) { - std::ostringstream stream; - if (aco::print_asm(program.get(), code, exec_size / 4u, stream)) { - std::cerr << "Failed to disassemble program:\n"; - aco_print_program(program.get(), stderr); - std::cerr << stream.str() << std::endl; - abort(); + char *data = NULL; + size_t disasm_size = 0; + FILE *f = open_memstream(&data, &disasm_size); + if (f) { + bool fail = aco::print_asm(program.get(), code, exec_size / 4u, f); + fputc(0, f); + fclose(f); + + if (fail) { + fprintf(stderr, "Failed to disassemble program:\n"); + aco_print_program(program.get(), stderr); + fputs(data, stderr); + abort(); + } } - stream << '\0'; - disasm = stream.str(); - size += disasm.size(); + + disasm = std::string(data, data + disasm_size); + size += disasm_size; + free(data); } size_t stats_size = 0; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 4535d26e5e9..10c78b32af0 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1748,7 +1748,7 @@ void insert_wait_states(Program* program); void insert_NOPs(Program* program); unsigned emit_program(Program* program, std::vector& code); bool print_asm(Program *program, std::vector& binary, - unsigned exec_size, std::ostream& out); + unsigned exec_size, FILE *output); bool validate_ir(Program* program); bool validate_ra(Program* program); #ifndef NDEBUG diff --git a/src/amd/compiler/aco_print_asm.cpp b/src/amd/compiler/aco_print_asm.cpp index 663fa868724..af12314bf3f 100644 --- a/src/amd/compiler/aco_print_asm.cpp +++ b/src/amd/compiler/aco_print_asm.cpp @@ -16,7 +16,7 @@ namespace { * for GFX6-GFX7 if found on the system, this is better than nothing. */ bool print_asm_gfx6_gfx7(Program *program, std::vector& binary, - std::ostream& out) + FILE *output) { char path[] = "/tmp/fileXXXXXX"; char line[2048], command[128]; @@ -72,13 +72,13 @@ bool print_asm_gfx6_gfx7(Program *program, std::vector& binary, p = popen(command, "r"); if (p) { if (!fgets(line, sizeof(line), p)) { - out << "clrxdisasm not found\n"; + fprintf(output, "clrxdisasm not found\n"); pclose(p); goto fail; } do { - out << line; + fputs(line, output); } while (fgets(line, sizeof(line), p)); pclose(p); @@ -94,7 +94,7 @@ fail: std::pair disasm_instr(chip_class chip, LLVMDisasmContextRef disasm, uint32_t *binary, unsigned exec_size, size_t pos, - std::ostream& out) + char *outline, unsigned outline_size) { /* mask out src2 on v_writelane_b32 */ if (((chip == GFX8 || chip == GFX9) && (binary[pos] & 0xffff8000) == 0xd28a0000) || @@ -102,10 +102,9 @@ std::pair disasm_instr(chip_class chip, LLVMDisasmContextRef disas binary[pos+1] = binary[pos+1] & 0xF803FFFF; } - char outline[1024]; size_t l = LLVMDisasmInstruction(disasm, (uint8_t *) &binary[pos], (exec_size - pos) * sizeof(uint32_t), pos * 4, - outline, sizeof(outline)); + outline, outline_size); if (chip >= GFX10 && l == 8 && ((binary[pos] & 0xffff0000) == 0xd7610000) && @@ -122,34 +121,32 @@ std::pair disasm_instr(chip_class chip, LLVMDisasmContextRef disas (chip <= GFX9 && (binary[pos] & 0xffff8000) == 0xd1268000) || /* v_add_u16_e64 + clamp */ (chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd76d8000) || /* v_add3_u32 + clamp */ (chip == GFX9 && (binary[pos] & 0xffff8000) == 0xd1ff8000)) /* v_add3_u32 + clamp */) { - out << "\tinteger addition + clamp"; + strcpy(outline, "\tinteger addition + clamp"); bool has_literal = chip >= GFX10 && (((binary[pos+1] & 0x1ff) == 0xff) || (((binary[pos+1] >> 9) & 0x1ff) == 0xff)); size = 2 + has_literal; } else if (chip >= GFX10 && l == 4 && ((binary[pos] & 0xfe0001ff) == 0x020000f9)) { - out << "\tv_cndmask_b32 + sdwa"; + strcpy(outline, "\tv_cndmask_b32 + sdwa"); size = 2; } else if (!l) { - out << "(invalid instruction)"; + strcpy(outline, "(invalid instruction)"); size = 1; invalid = true; } else { - out << outline; assert(l % 4 == 0); size = l / 4; } - out << std::right; return std::make_pair(invalid, size); } } /* end namespace */ bool print_asm(Program *program, std::vector& binary, - unsigned exec_size, std::ostream& out) + unsigned exec_size, FILE *output) { if (program->chip_class <= GFX7) { /* Do not abort if clrxdisasm isn't found. */ - print_asm_gfx6_gfx7(program, binary, out); + print_asm_gfx6_gfx7(program, binary, output); return false; } @@ -201,58 +198,51 @@ bool print_asm(Program *program, std::vector& binary, pos += prev_size; continue; } else { - if (repeat_count) { - out << std::left << std::setw(0) << std::dec << std::setfill(' ') << "\t(then repeated " << repeat_count << " times)" << std::endl; - } + if (repeat_count) + fprintf(output, "\t(then repeated %u times)\n", repeat_count); repeat_count = 0; } while (next_block < program->blocks.size() && pos == program->blocks[next_block].offset) { if (referenced_blocks[next_block]) - out << "BB" << std::dec << next_block << ":" << std::endl; + fprintf(output, "BB%u:\n", next_block); next_block++; } - const int align_width = 60; - out << std::left << std::setw(align_width) << std::setfill(' '); - + char outline[1024]; std::pair res = disasm_instr( - program->chip_class, disasm, binary.data(), exec_size, pos, out); + program->chip_class, disasm, binary.data(), exec_size, pos, outline, sizeof(outline)); invalid |= res.first; - out << std::right; + fprintf(output, "%-60s ;", outline); - out << " ;"; for (unsigned i = 0; i < res.second; i++) - out << " " << std::setfill('0') << std::setw(8) << std::hex << binary[pos + i]; - out << std::endl; + fprintf(output, " %.8x", binary[pos + i]); + fputc('\n', output); prev_size = res.second; prev_pos = pos; pos += res.second; } - out << std::setfill(' ') << std::setw(0) << std::dec; assert(next_block == program->blocks.size()); LLVMDisasmDispose(disasm); if (program->constant_data.size()) { - out << std::endl << "/* constant data */" << std::endl; + fputs("\n/* constant data */\n", output); for (unsigned i = 0; i < program->constant_data.size(); i += 32) { - out << '[' << std::setw(6) << std::setfill('0') << std::dec << i << ']'; + fprintf(output, "[%.6u]", i); unsigned line_size = std::min(program->constant_data.size() - i, 32); for (unsigned j = 0; j < line_size; j += 4) { unsigned size = std::min(program->constant_data.size() - (i + j), 4); uint32_t v = 0; memcpy(&v, &program->constant_data[i + j], size); - out << " " << std::setw(8) << std::setfill('0') << std::hex << v; + fprintf(output, " %.8x", v); } - out << std::endl; + fputc('\n', output); } } - out << std::setfill(' ') << std::setw(0) << std::dec; - return invalid; } diff --git a/src/amd/compiler/tests/helpers.cpp b/src/amd/compiler/tests/helpers.cpp index 295108e9879..05395cdfa89 100644 --- a/src/amd/compiler/tests/helpers.cpp +++ b/src/amd/compiler/tests/helpers.cpp @@ -187,10 +187,7 @@ void finish_assembler_test() } else if (program->chip_class == GFX10 && LLVM_VERSION_MAJOR < 9) { skip_test("LLVM 9 needed for GFX10 disassembly"); } else if (program->chip_class >= GFX8) { - std::ostringstream ss; - print_asm(program.get(), binary, exec_size / 4u, ss); - - fputs(ss.str().c_str(), output); + print_asm(program.get(), binary, exec_size / 4u, output); } else { //TODO: maybe we should use CLRX and skip this test if it's not available? for (uint32_t dword : binary)