diff --git a/src/nouveau/mme/meson.build b/src/nouveau/mme/meson.build index 205d7b3ca8e..1210d363f83 100644 --- a/src/nouveau/mme/meson.build +++ b/src/nouveau/mme/meson.build @@ -1,87 +1,26 @@ # Copyright © 2022 Collabora, Ltd. # SPDX-License-Identifier: MIT -mme_tu104_isa_depend_files = [ - 'mme_tu104.xml', - isaspec_py_deps -] - -mme_tu104_isa = custom_target( - 'mme_isa', - input: ['mme_tu104.xml'], - output: ['mme_tu104_isa.c', 'mme_tu104_isa.h'], - command: [ - prog_isaspec_decode, '--xml', '@INPUT@', - '--out-c', '@OUTPUT0@', '--out-h', '@OUTPUT1@', - ], - depend_files: mme_tu104_isa_depend_files, -) - -mme_tu104_encode_h = custom_target( - 'mme-tu104-encode.h', - input: ['mme_tu104.xml'], - output: 'mme_tu104_encode.h', - command: [ - prog_isaspec_encode, '--xml', '@INPUT@', '--out-h', '@OUTPUT@' - ], - depend_files: mme_tu104_isa_depend_files, -) - -mme_fermi_isa_depend_files = [ - 'mme_fermi.xml', - isaspec_py_deps -] - -mme_fermi_isa = custom_target( - 'mme_fermi_isa', - input: ['mme_fermi.xml'], - output: ['mme_fermi_isa.c', 'mme_fermi_isa.h'], - command: [ - prog_isaspec_decode, '--xml', '@INPUT@', - '--out-c', '@OUTPUT0@', '--out-h', '@OUTPUT1@', - ], - depend_files: mme_fermi_isa_depend_files, -) - -mme_fermi_encode_h = custom_target( - 'mme-fermi-encode.h', - input: ['mme_fermi.xml'], - output: 'mme_fermi_encode.h', - command: [ - prog_isaspec_encode, '--xml', '@INPUT@', '--out-h', '@OUTPUT@' - ], - depend_files: mme_fermi_isa_depend_files, -) - libnouveau_mme_files = files( 'mme_builder.h', 'mme_fermi.c', 'mme_fermi.h', 'mme_fermi_builder.c', - 'mme_fermi_dump.c', 'mme_fermi_sim.c', 'mme_tu104.c', 'mme_tu104.h', 'mme_tu104_builder.c', - 'mme_tu104_dump.c', 'mme_tu104_sim.c', 'mme_tu104_sim.h', ) _libnouveau_mme = static_library( 'nouveau_mme', - [ - libnouveau_mme_files, - mme_fermi_isa, - mme_fermi_encode_h, - mme_tu104_isa, - mme_tu104_encode_h, - ], + libnouveau_mme_files, include_directories : [inc_include, inc_src], gnu_symbol_visibility : 'hidden', dependencies : [ idep_mesautil, - idep_isaspec_decode, idep_nvidia_headers, ], ) diff --git a/src/nouveau/mme/mme_bitpack_helpers.h b/src/nouveau/mme/mme_bitpack_helpers.h new file mode 100644 index 00000000000..93c23f85813 --- /dev/null +++ b/src/nouveau/mme/mme_bitpack_helpers.h @@ -0,0 +1,71 @@ +/* + * Copyright © 2024 Collabora, Ltd. + * SPDX-License-Identifier: MIT + */ +#ifndef MME_BITPACK_HELPERS_H +#define MME_BITPACK_HELPERS_H + +#include "util/bitpack_helpers.h" +#include "util/u_math.h" + +static inline void +pack_uint(uint32_t *b, unsigned start, unsigned end, uint64_t data) +{ + assert(end >= start); + + uint32_t dw = start / 32; + start -= dw * 32; + end -= dw * 32; + assert(end < 64); + + uint64_t packed = util_bitpack_uint(data, start, end); + + b[dw] |= packed; + if (end >= 32) + b[dw + 1] |= packed >> 32; +} + +static inline void +pack_sint(uint32_t *b, unsigned start, unsigned end, int64_t data) +{ + assert(end >= start); + + uint32_t dw = start / 32; + start -= dw * 32; + end -= dw * 32; + assert(end < 64); + + uint64_t packed = util_bitpack_sint(data, start, end); + + b[dw] |= packed; + if (end >= 32) + b[dw + 1] |= packed >> 32; +} + +static inline uint64_t +unpack_uint(const uint32_t *b, unsigned start, unsigned end) +{ + assert(end >= start); + + uint32_t dw = start / 32; + start -= dw * 32; + end -= dw * 32; + assert(end < 64); + + uint64_t packed = b[dw]; + if (end >= 32) + packed |= (uint64_t)b[dw + 1] << 32; + + packed &= util_bitpack_ones(start, end); + + return packed >> start; +} + +static inline uint64_t +unpack_sint(const uint32_t *b, unsigned start, unsigned end) +{ + unsigned bits = end - start + 1; + return util_sign_extend(unpack_uint(b, start, end), bits); +} + +#endif /* MME_BITPACK_HELPERS_H */ diff --git a/src/nouveau/mme/mme_fermi.c b/src/nouveau/mme/mme_fermi.c index 94c080d0ce7..fd2fdc81634 100644 --- a/src/nouveau/mme/mme_fermi.c +++ b/src/nouveau/mme/mme_fermi.c @@ -3,9 +3,8 @@ * SPDX-License-Identifier: MIT */ #include "mme_fermi.h" -#include "mme_fermi_encode.h" -#include "util/u_math.h" +#include "mme_bitpack_helpers.h" #define OP_TO_STR(OP) [MME_FERMI_OP_##OP] = #OP static const char *op_to_str[] = { @@ -95,67 +94,77 @@ void mme_fermi_encode(uint32_t *out, uint32_t inst_count, const struct mme_fermi_inst *insts) { for (uint32_t i = 0; i < inst_count; i++) { - bitmask_t enc = encode__instruction(NULL, NULL, insts[i]); - out[i] = enc.bitset[0]; + uint32_t *b = &out[i]; + *b = 0; + + pack_uint(b, 0, 3, insts[i].op); + pack_uint(b, 7, 7, insts[i].end_next); + pack_uint(b, 8, 10, insts[i].dst); + + if (insts[i].op != MME_FERMI_OP_BRANCH) { + pack_uint(b, 4, 6, insts[i].assign_op); + } + + if (insts[i].op == MME_FERMI_OP_ALU_REG) { + pack_uint(b, 11, 13, insts[i].src[0]); + pack_uint(b, 14, 16, insts[i].src[1]); + pack_uint(b, 17, 21, insts[i].alu_op); + } else if (insts[i].op == MME_FERMI_OP_ADD_IMM || + insts[i].op == MME_FERMI_OP_STATE) { + pack_uint(b, 11, 13, insts[i].src[0]); + pack_sint(b, 14, 31, insts[i].imm); + } else if (insts[i].op == MME_FERMI_OP_MERGE || + insts[i].op == MME_FERMI_OP_BFE_LSL_IMM || + insts[i].op == MME_FERMI_OP_BFE_LSL_REG) { + pack_uint(b, 11, 13, insts[i].src[0]); + pack_uint(b, 14, 16, insts[i].src[1]); + pack_uint(b, 17, 21, insts[i].bitfield.src_bit); + pack_uint(b, 22, 26, insts[i].bitfield.size); + pack_uint(b, 27, 31, insts[i].bitfield.dst_bit); + } else if (insts[i].op == MME_FERMI_OP_BRANCH) { + pack_uint(b, 4, 4, insts[i].branch.not_zero); + pack_uint(b, 5, 5, insts[i].branch.no_delay); + pack_uint(b, 11, 13, insts[i].src[0]); + pack_sint(b, 14, 31, insts[i].imm); + } } } -static uint64_t -unpack_field(bitmask_t bitmask, unsigned low, unsigned high, bool is_signed) -{ - bitmask_t field, mask; - - assert(high >= low); - - BITSET_ZERO(mask.bitset); - BITSET_SET_RANGE(mask.bitset, 0, high - low); - - BITSET_COPY(field.bitset, bitmask.bitset); - BITSET_SHR(field.bitset, low); - BITSET_AND(field.bitset, field.bitset, mask.bitset); - - uint64_t data = bitmask_to_uint64_t(field); - if (is_signed) - data = util_sign_extend(data, high - low + 1); - - return data; -} - void mme_fermi_decode(struct mme_fermi_inst *insts, const uint32_t *in, uint32_t inst_count) { for (uint32_t i = 0; i < inst_count; i++) { - bitmask_t enc = { .bitset = { in[i] }}; + const uint32_t *b = &in[i]; - insts[i].op = unpack_field(enc, 0, 3, false); - insts[i].end_next = unpack_field(enc, 7, 7, false); - insts[i].dst = unpack_field(enc, 8, 10, false); + insts[i].op = unpack_uint(b, 0, 3); + insts[i].end_next = unpack_uint(b, 7, 7); + insts[i].dst = unpack_uint(b, 8, 10); if (insts[i].op != MME_FERMI_OP_BRANCH) { - insts[i].assign_op = unpack_field(enc, 4, 6, false); + insts[i].assign_op = unpack_uint(b, 4, 6); } if (insts[i].op == MME_FERMI_OP_ALU_REG) { - insts[i].src[0] = unpack_field(enc, 11, 13, false); - insts[i].src[1] = unpack_field(enc, 14, 16, false); - insts[i].alu_op = unpack_field(enc, 17, 21, false); + insts[i].src[0] = unpack_uint(b, 11, 13); + insts[i].src[1] = unpack_uint(b, 14, 16); + insts[i].alu_op = unpack_uint(b, 17, 21); } else if (insts[i].op == MME_FERMI_OP_ADD_IMM || insts[i].op == MME_FERMI_OP_STATE) { - insts[i].src[0] = unpack_field(enc, 11, 13, false); - insts[i].imm = unpack_field(enc, 14, 31, false); + insts[i].src[0] = unpack_uint(b, 11, 13); + insts[i].imm = unpack_sint(b, 14, 31); } else if (insts[i].op == MME_FERMI_OP_MERGE || insts[i].op == MME_FERMI_OP_BFE_LSL_IMM || insts[i].op == MME_FERMI_OP_BFE_LSL_REG) { - insts[i].src[0] = unpack_field(enc, 11, 13, false); - insts[i].src[1] = unpack_field(enc, 14, 16, false); - insts[i].bitfield.src_bit = unpack_field(enc, 17, 21, false); - insts[i].bitfield.size = unpack_field(enc, 22, 26, false); - insts[i].bitfield.dst_bit = unpack_field(enc, 27, 31, false); + insts[i].src[0] = unpack_uint(b, 11, 13); + insts[i].src[1] = unpack_uint(b, 14, 16); + insts[i].bitfield.src_bit = unpack_uint(b, 17, 21); + insts[i].bitfield.size = unpack_uint(b, 22, 26); + insts[i].bitfield.dst_bit = unpack_uint(b, 27, 31); } else if (insts[i].op == MME_FERMI_OP_BRANCH) { - insts[i].branch.not_zero = unpack_field(enc, 4, 4, false); - insts[i].branch.no_delay = unpack_field(enc, 5, 5, false); - insts[i].src[0] = unpack_field(enc, 11, 13, false); - insts[i].imm = unpack_field(enc, 14, 31, false); + insts[i].branch.not_zero = unpack_uint(b, 4, 4); + insts[i].branch.no_delay = unpack_uint(b, 5, 5); + insts[i].src[0] = unpack_uint(b, 11, 13); + insts[i].imm = unpack_sint(b, 14, 31); } } } @@ -281,3 +290,14 @@ mme_fermi_print(FILE *fp, const struct mme_fermi_inst *insts, mme_fermi_print_inst(fp, 1, &insts[i]); } } + +void +mme_fermi_dump(FILE *fp, uint32_t *encoded, size_t encoded_size) +{ + uint32_t inst_count = encoded_size / 4; + for (uint32_t i = 0; i < inst_count; i++) { + struct mme_fermi_inst inst; + mme_fermi_decode(&inst, &encoded[i], 1); + mme_fermi_print_inst(fp, 1, &inst); + } +} diff --git a/src/nouveau/mme/mme_fermi.xml b/src/nouveau/mme/mme_fermi.xml deleted file mode 100644 index 13f2ef763fc..00000000000 --- a/src/nouveau/mme/mme_fermi.xml +++ /dev/null @@ -1,201 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - {NAME} {ASSIGN_OP} {ALU_OP} {DST} {SRC0} {SRC1} - - - - x - - - - - - - src.assign_op - src.alu_op - src.dst - src.src[0] - src.src[1] - - - - - - {NAME} {ASSIGN_OP} {DST} {SRC0} {IMM} - - - - x - - - - - - src.assign_op - src.dst - src.src[0] - src.imm - - - - - - {NAME} {ASSIGN_OP} {DST} {SRC0} {SRC1} {BF_SRC_BIT} {BF_SIZE} {BF_DST_BIT} - - - - x - - - - - - - - - src.assign_op - src.dst - src.src[0] - src.src[1] - src.bitfield.src_bit - src.bitfield.size - src.bitfield.dst_bit - - - - - - {NO_DELAY} B{NOT_ZERO} {SRC0} {IMM} - - - - - xxxxx - - - - - src.branch.not_zero - src.branch.no_delay - src.src[0] - src.imm - - - - - - Encoding of a NVIDIA Fermi Macro Method instruction. All instructions are 32b. - - - {END_NEXT} {OP} {ALU_OP_ENCODING} {SRC0_IMM_ENCODING} {BF_ENCODING} {BRANCH_ENCODING} - - - xxx - - xxxxxxxxxxxxxxxxxxxxxxxx - - - {OP} == 0 - - - - - {OP} == 1 || {OP} == 5 - - - - - {OP} == 2 || {OP} == 3 || {OP} == 4 - - - - - {OP} == 7 - - - - - src.end_next - src.op - src - src - src - src - - - diff --git a/src/nouveau/mme/mme_fermi_dump.c b/src/nouveau/mme/mme_fermi_dump.c deleted file mode 100644 index fa8bd093f74..00000000000 --- a/src/nouveau/mme/mme_fermi_dump.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright © 2022 Mary Guillemard - * SPDX-License-Identifier: MIT - */ -#include "mme_fermi.h" - -#include "mme_fermi_isa.h" -#include "isa.h" - -#include - -static void -disasm_instr_cb(void *d, unsigned n, void *instr) -{ - fprintf(d, "%3d[%08x]", n, *(uint32_t *)instr); -} - -void -mme_fermi_dump(FILE *fp, uint32_t *encoded, size_t encoded_size) -{ - const struct isa_decode_options opts = { - .show_errors = true, - .branch_labels = true, - .cbdata = fp, - .pre_instr_cb = disasm_instr_cb, - }; - isa_disasm(encoded, encoded_size, fp, &opts); -} diff --git a/src/nouveau/mme/mme_tu104.c b/src/nouveau/mme/mme_tu104.c index 5b13e2c6023..28a1575b56f 100644 --- a/src/nouveau/mme/mme_tu104.c +++ b/src/nouveau/mme/mme_tu104.c @@ -3,9 +3,8 @@ * SPDX-License-Identifier: MIT */ #include "mme_tu104.h" -#include "mme_tu104_encode.h" -#include "util/u_math.h" +#include "mme_bitpack_helpers.h" #include @@ -88,68 +87,70 @@ mme_tu104_encode(uint32_t *out, uint32_t inst_count, const struct mme_tu104_inst *insts) { for (uint32_t i = 0; i < inst_count; i++) { - bitmask_t enc = encode__instruction(NULL, NULL, insts[i]); + uint32_t b[3] = { 0, 0, 0}; + + pack_uint(b, 0, 0, insts[i].end_next); + pack_uint(b, 1, 4, insts[i].pred_mode); + pack_uint(b, 5, 9, insts[i].pred); + + pack_uint(b, 10, 14, insts[i].alu[0].op); + pack_uint(b, 15, 19, insts[i].alu[0].dst); + pack_uint(b, 20, 24, insts[i].alu[0].src[0]); + pack_uint(b, 25, 29, insts[i].alu[0].src[1]); + pack_uint(b, 30, 45, insts[i].imm[0]); + + pack_uint(b, 46, 50, insts[i].alu[1].op); + pack_uint(b, 51, 55, insts[i].alu[1].dst); + pack_uint(b, 56, 60, insts[i].alu[1].src[0]); + pack_uint(b, 61, 65, insts[i].alu[1].src[1]); + pack_uint(b, 66, 81, insts[i].imm[1]); + + pack_uint(b, 82, 84, insts[i].out[0].mthd); + pack_uint(b, 85, 88, insts[i].out[0].emit); + + pack_uint(b, 89, 91, insts[i].out[1].mthd); + pack_uint(b, 92, 95, insts[i].out[1].emit); /* Annoyingly, the words are reversed in the actual encoding */ - out[i * 3 + 0] = enc.bitset[2]; - out[i * 3 + 1] = enc.bitset[1]; - out[i * 3 + 2] = enc.bitset[0]; + out[i * 3 + 2] = b[0]; + out[i * 3 + 1] = b[1]; + out[i * 3 + 0] = b[2]; } } -static uint64_t -unpack_field(bitmask_t bitmask, unsigned low, unsigned high, bool is_signed) -{ - bitmask_t field, mask; - - assert(high >= low); - - BITSET_ZERO(mask.bitset); - BITSET_SET_RANGE(mask.bitset, 0, high - low); - - BITSET_COPY(field.bitset, bitmask.bitset); - BITSET_SHR(field.bitset, low); - BITSET_AND(field.bitset, field.bitset, mask.bitset); - - uint64_t data = bitmask_to_uint64_t(field); - if (is_signed) - data = util_sign_extend(data, high - low + 1); - - return data; -} - void mme_tu104_decode(struct mme_tu104_inst *insts, const uint32_t *in, uint32_t inst_count) { for (uint32_t i = 0; i < inst_count; i++) { /* Annoyingly, the words are reversed in the actual encoding */ - bitmask_t enc; - enc.bitset[0] = in[i * 3 + 2]; - enc.bitset[1] = in[i * 3 + 1]; - enc.bitset[2] = in[i * 3 + 0]; + const uint32_t b[3] = { + in[i * 3 + 2], + in[i * 3 + 1], + in[i * 3 + 0], + }; - insts[i].end_next = unpack_field(enc, 0, 0, false); - insts[i].pred_mode = unpack_field(enc, 1, 4, false); - insts[i].pred = unpack_field(enc, 5, 9, false); + insts[i].end_next = unpack_uint(b, 0, 0); + insts[i].pred_mode = unpack_uint(b, 1, 4); + insts[i].pred = unpack_uint(b, 5, 9); - insts[i].alu[0].op = unpack_field(enc, 10, 14, false); - insts[i].alu[0].dst = unpack_field(enc, 15, 19, false); - insts[i].alu[0].src[0] = unpack_field(enc, 20, 24, false); - insts[i].alu[0].src[1] = unpack_field(enc, 25, 29, false); - insts[i].imm[0] = unpack_field(enc, 30, 45, false); + insts[i].alu[0].op = unpack_uint(b, 10, 14); + insts[i].alu[0].dst = unpack_uint(b, 15, 19); + insts[i].alu[0].src[0] = unpack_uint(b, 20, 24); + insts[i].alu[0].src[1] = unpack_uint(b, 25, 29); + insts[i].imm[0] = unpack_uint(b, 30, 45); - insts[i].alu[1].op = unpack_field(enc, 46, 50, false); - insts[i].alu[1].dst = unpack_field(enc, 51, 55, false); - insts[i].alu[1].src[0] = unpack_field(enc, 56, 60, false); - insts[i].alu[1].src[1] = unpack_field(enc, 61, 65, false); - insts[i].imm[1] = unpack_field(enc, 66, 81, false); + insts[i].alu[1].op = unpack_uint(b, 46, 50); + insts[i].alu[1].dst = unpack_uint(b, 51, 55); + insts[i].alu[1].src[0] = unpack_uint(b, 56, 60); + insts[i].alu[1].src[1] = unpack_uint(b, 61, 65); + insts[i].imm[1] = unpack_uint(b, 66, 81); - insts[i].out[0].mthd = unpack_field(enc, 82, 84, false); - insts[i].out[0].emit = unpack_field(enc, 85, 88, false); + insts[i].out[0].mthd = unpack_uint(b, 82, 84); + insts[i].out[0].emit = unpack_uint(b, 85, 88); - insts[i].out[1].mthd = unpack_field(enc, 89, 91, false); - insts[i].out[1].emit = unpack_field(enc, 92, 95, false); + insts[i].out[1].mthd = unpack_uint(b, 89, 91); + insts[i].out[1].emit = unpack_uint(b, 92, 95); } } @@ -552,3 +553,14 @@ mme_tu104_print(FILE *fp, const struct mme_tu104_inst *insts, mme_tu104_print_inst(fp, 1, &insts[i]); } } + +void +mme_tu104_dump(FILE *fp, uint32_t *encoded, size_t encoded_size) +{ + uint32_t inst_count = encoded_size / 12; + for (uint32_t i = 0; i < inst_count; i++) { + struct mme_tu104_inst inst; + mme_tu104_decode(&inst, &encoded[i * 3], 1); + mme_tu104_print_inst(fp, 1, &inst); + } +} diff --git a/src/nouveau/mme/mme_tu104.xml b/src/nouveau/mme/mme_tu104.xml deleted file mode 100644 index 78c7ef9ac24..00000000000 --- a/src/nouveau/mme/mme_tu104.xml +++ /dev/null @@ -1,161 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - {DST} = {OP} {SRC0} {SRC1} - - - - - - - src.dst - src.op - src.src[0] - src.src[1] - - - - - - {EMIT} -> {MTHD} - - - - - src.mthd - src.emit - - - - - - {END_NEXT}({PRED} {PRED_MODE}) imm=[{IMM0}, {IMM1}], {ALU0}, {ALU1}, {OUT0}, {OUT1} - - - - - - - - - - - - src.end_next - src.pred_mode - src.pred - src.alu[0] - src.imm[0] - src.alu[1] - src.imm[1] - src.out[0] - src.out[1] - - - diff --git a/src/nouveau/mme/mme_tu104_dump.c b/src/nouveau/mme/mme_tu104_dump.c deleted file mode 100644 index 639122fba6f..00000000000 --- a/src/nouveau/mme/mme_tu104_dump.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright © 2022 Collabora Ltd. - * SPDX-License-Identifier: MIT - */ -#include "mme_tu104.h" - -#include "mme_tu104_isa.h" -#include "isa.h" - -#include - -static void -disasm_instr_cb(void *d, unsigned n, void *instr) -{ - uint32_t *dwords = (uint32_t *)instr; - fprintf(d, "%3d[%08x_%08x_%08x] ", n, dwords[2], dwords[1], dwords[0]); -} - -void -mme_tu104_dump(FILE *fp, uint32_t *encoded, size_t encoded_size) -{ - assert(encoded_size % 12 == 0); - - uint32_t *swapped = malloc(encoded_size); - for (uint32_t i = 0; i < (encoded_size / 12); i++) { - swapped[i * 3 + 0] = encoded[i * 3 + 2]; - swapped[i * 3 + 1] = encoded[i * 3 + 1]; - swapped[i * 3 + 2] = encoded[i * 3 + 0]; - } - - const struct isa_decode_options opts = { - .show_errors = true, - .branch_labels = true, - .cbdata = fp, - .pre_instr_cb = disasm_instr_cb, - }; - isa_disasm(swapped, encoded_size, fp, &opts); - - free(swapped); -}