freedreno/crashdec: Split out mempool decoding
Before we start adding GMU HFI decoding, lets split the other big section specific decoding (mempool) out into it's own file. Signed-off-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13937>
This commit is contained in:
313
src/freedreno/decode/crashdec-mempool.c
Normal file
313
src/freedreno/decode/crashdec-mempool.c
Normal file
@@ -0,0 +1,313 @@
|
|||||||
|
/*
|
||||||
|
* Copyright © 2020 Valve Corporation
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include "crashdec.h"
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context,
|
||||||
|
bool pipe)
|
||||||
|
{
|
||||||
|
if (pipe) {
|
||||||
|
struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg);
|
||||||
|
printf("\t\twrite %s (%02x) pipe\n", info->name, reg);
|
||||||
|
|
||||||
|
if (!strcmp(info->typeinfo->name, "void")) {
|
||||||
|
/* registers that ignore their payload */
|
||||||
|
} else {
|
||||||
|
printf("\t\t\t");
|
||||||
|
dump_register(rnn_pipe, reg, data);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context);
|
||||||
|
dump_register_val(reg, data, 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
dump_mem_pool_chunk(const uint32_t *chunk)
|
||||||
|
{
|
||||||
|
struct __attribute__((packed)) {
|
||||||
|
bool reg0_enabled : 1;
|
||||||
|
bool reg1_enabled : 1;
|
||||||
|
uint32_t data0 : 32;
|
||||||
|
uint32_t data1 : 32;
|
||||||
|
uint32_t reg0 : 18;
|
||||||
|
uint32_t reg1 : 18;
|
||||||
|
bool reg0_pipe : 1;
|
||||||
|
bool reg1_pipe : 1;
|
||||||
|
uint32_t reg0_context : 1;
|
||||||
|
uint32_t reg1_context : 1;
|
||||||
|
uint32_t padding : 22;
|
||||||
|
} fields;
|
||||||
|
|
||||||
|
memcpy(&fields, chunk, 4 * sizeof(uint32_t));
|
||||||
|
|
||||||
|
if (fields.reg0_enabled) {
|
||||||
|
dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context,
|
||||||
|
fields.reg0_pipe);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fields.reg1_enabled) {
|
||||||
|
dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context,
|
||||||
|
fields.reg1_pipe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
dump_cp_mem_pool(uint32_t *mempool)
|
||||||
|
{
|
||||||
|
/* The mem pool is a shared pool of memory used for storing in-flight
|
||||||
|
* register writes. There are 6 different queues, one for each
|
||||||
|
* cluster. Writing to $data (or for some special registers, $addr)
|
||||||
|
* pushes data onto the appropriate queue, and each queue is pulled
|
||||||
|
* from by the appropriate cluster. The queues are thus written to
|
||||||
|
* in-order, but may be read out-of-order.
|
||||||
|
*
|
||||||
|
* The queues are conceptually divided into 128-bit "chunks", and the
|
||||||
|
* read and write pointers are in units of chunks. These chunks are
|
||||||
|
* organized internally into 8-chunk "blocks", and memory is allocated
|
||||||
|
* dynamically in terms of blocks. Each queue is represented as a
|
||||||
|
* singly-linked list of blocks, as well as 3-bit start/end chunk
|
||||||
|
* pointers that point within the first/last block. The next pointers
|
||||||
|
* are located in a separate array, rather than inline.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* TODO: The firmware CP_MEM_POOL save/restore routines do something
|
||||||
|
* like:
|
||||||
|
*
|
||||||
|
* cread $02, [ $00 + 0 ]
|
||||||
|
* and $02, $02, 0x118
|
||||||
|
* ...
|
||||||
|
* brne $02, 0, #label
|
||||||
|
* mov $03, 0x2000
|
||||||
|
* mov $03, 0x1000
|
||||||
|
* label:
|
||||||
|
* ...
|
||||||
|
*
|
||||||
|
* I think that control register 0 is the GPU version, and some
|
||||||
|
* versions have a smaller mem pool. It seems some models have a mem
|
||||||
|
* pool that's half the size, and a bunch of offsets are shifted
|
||||||
|
* accordingly. Unfortunately the kernel driver's dumping code doesn't
|
||||||
|
* seem to take this into account, even the downstream android driver,
|
||||||
|
* and we don't know which versions 0x8, 0x10, or 0x100 correspond
|
||||||
|
* to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
|
||||||
|
*/
|
||||||
|
bool small_mem_pool = false;
|
||||||
|
|
||||||
|
/* The array of next pointers for each block. */
|
||||||
|
const uint32_t *next_pointers =
|
||||||
|
small_mem_pool ? &mempool[0x800] : &mempool[0x1000];
|
||||||
|
|
||||||
|
/* Maximum number of blocks in the pool, also the size of the pointers
|
||||||
|
* array.
|
||||||
|
*/
|
||||||
|
const int num_blocks = small_mem_pool ? 0x30 : 0x80;
|
||||||
|
|
||||||
|
/* Number of queues */
|
||||||
|
const unsigned num_queues = 6;
|
||||||
|
|
||||||
|
/* Unfortunately the per-queue state is a little more complicated than
|
||||||
|
* a simple pair of begin/end pointers. Instead of a single beginning
|
||||||
|
* block, there are *two*, with the property that either the two are
|
||||||
|
* equal or the second is the "next" of the first. Similarly there are
|
||||||
|
* two end blocks. Thus the queue either looks like this:
|
||||||
|
*
|
||||||
|
* A -> B -> ... -> C -> D
|
||||||
|
*
|
||||||
|
* Or like this, or some combination:
|
||||||
|
*
|
||||||
|
* A/B -> ... -> C/D
|
||||||
|
*
|
||||||
|
* However, there's only one beginning/end chunk offset. Now the
|
||||||
|
* question is, which of A or B is the actual start? I.e. is the chunk
|
||||||
|
* offset an offset inside A or B? It depends. I'll show a typical read
|
||||||
|
* cycle, starting here (read pointer marked with a *) with a chunk
|
||||||
|
* offset of 0:
|
||||||
|
*
|
||||||
|
* A B
|
||||||
|
* _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
||||||
|
* |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
|
||||||
|
*
|
||||||
|
* Once the pointer advances far enough, the hardware decides to free
|
||||||
|
* A, after which the read-side state looks like:
|
||||||
|
*
|
||||||
|
* (free) A/B
|
||||||
|
* _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
||||||
|
* |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
|
||||||
|
*
|
||||||
|
* Then after advancing the pointer a bit more, the hardware fetches
|
||||||
|
* the "next" pointer for A and stores it in B:
|
||||||
|
*
|
||||||
|
* (free) A B
|
||||||
|
* _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
||||||
|
* |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
|
||||||
|
*
|
||||||
|
* Then the read pointer advances into B, at which point we've come
|
||||||
|
* back to the first state having advanced a whole block:
|
||||||
|
*
|
||||||
|
* (free) A B
|
||||||
|
* _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
||||||
|
* |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* There is a similar cycle for the write pointer. Now, the question
|
||||||
|
* is, how do we know which state we're in? We need to know this to
|
||||||
|
* know whether the pointer (*) is in A or B if they're different. It
|
||||||
|
* seems like there should be some bit somewhere describing this, but
|
||||||
|
* after lots of experimentation I've come up empty-handed. For now we
|
||||||
|
* assume that if the pointer is in the first half, then we're in
|
||||||
|
* either the first or second state and use B, and otherwise we're in
|
||||||
|
* the second or third state and use A. So far I haven't seen anything
|
||||||
|
* that violates this assumption.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct {
|
||||||
|
uint32_t unk0;
|
||||||
|
uint32_t padding0[7]; /* Mirrors of unk0 */
|
||||||
|
|
||||||
|
struct {
|
||||||
|
uint32_t chunk : 3;
|
||||||
|
uint32_t first_block : 32 - 3;
|
||||||
|
} writer[6];
|
||||||
|
uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */
|
||||||
|
|
||||||
|
uint32_t unk1;
|
||||||
|
uint32_t padding2[7]; /* Mirrors of unk1 */
|
||||||
|
|
||||||
|
uint32_t writer_second_block[6];
|
||||||
|
uint32_t padding3[2];
|
||||||
|
|
||||||
|
uint32_t unk2[6];
|
||||||
|
uint32_t padding4[2];
|
||||||
|
|
||||||
|
struct {
|
||||||
|
uint32_t chunk : 3;
|
||||||
|
uint32_t first_block : 32 - 3;
|
||||||
|
} reader[6];
|
||||||
|
uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */
|
||||||
|
|
||||||
|
uint32_t unk3;
|
||||||
|
uint32_t padding6[7]; /* Mirrors of unk3 */
|
||||||
|
|
||||||
|
uint32_t reader_second_block[6];
|
||||||
|
uint32_t padding7[2];
|
||||||
|
|
||||||
|
uint32_t block_count[6];
|
||||||
|
uint32_t padding[2];
|
||||||
|
|
||||||
|
uint32_t unk4;
|
||||||
|
uint32_t padding9[7]; /* Mirrors of unk4 */
|
||||||
|
} data1;
|
||||||
|
|
||||||
|
const uint32_t *data1_ptr =
|
||||||
|
small_mem_pool ? &mempool[0xc00] : &mempool[0x1800];
|
||||||
|
memcpy(&data1, data1_ptr, sizeof(data1));
|
||||||
|
|
||||||
|
/* Based on the kernel, the first dword is the mem pool size (in
|
||||||
|
* blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
|
||||||
|
*/
|
||||||
|
const uint32_t *data2_ptr =
|
||||||
|
small_mem_pool ? &mempool[0x1000] : &mempool[0x2000];
|
||||||
|
const int data2_size = 0x60;
|
||||||
|
|
||||||
|
/* This seems to be the size of each queue in chunks. */
|
||||||
|
const uint32_t *queue_sizes = &data2_ptr[0x18];
|
||||||
|
|
||||||
|
printf("\tdata2:\n");
|
||||||
|
dump_hex_ascii(data2_ptr, 4 * data2_size, 1);
|
||||||
|
|
||||||
|
/* These seem to be some kind of counter of allocated/deallocated blocks */
|
||||||
|
if (verbose) {
|
||||||
|
printf("\tunk0: %x\n", data1.unk0);
|
||||||
|
printf("\tunk1: %x\n", data1.unk1);
|
||||||
|
printf("\tunk3: %x\n", data1.unk3);
|
||||||
|
printf("\tunk4: %x\n\n", data1.unk4);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int queue = 0; queue < num_queues; queue++) {
|
||||||
|
const char *cluster_names[6] = {"FE", "SP_VS", "PC_VS",
|
||||||
|
"GRAS", "SP_PS", "PS"};
|
||||||
|
printf("\tCLUSTER_%s:\n\n", cluster_names[queue]);
|
||||||
|
|
||||||
|
if (verbose) {
|
||||||
|
printf("\t\twriter_first_block: 0x%x\n",
|
||||||
|
data1.writer[queue].first_block);
|
||||||
|
printf("\t\twriter_second_block: 0x%x\n",
|
||||||
|
data1.writer_second_block[queue]);
|
||||||
|
printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk);
|
||||||
|
printf("\t\treader_first_block: 0x%x\n",
|
||||||
|
data1.reader[queue].first_block);
|
||||||
|
printf("\t\treader_second_block: 0x%x\n",
|
||||||
|
data1.reader_second_block[queue]);
|
||||||
|
printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk);
|
||||||
|
printf("\t\tblock_count: %d\n", data1.block_count[queue]);
|
||||||
|
printf("\t\tunk2: 0x%x\n", data1.unk2[queue]);
|
||||||
|
printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t cur_chunk = data1.reader[queue].chunk;
|
||||||
|
uint32_t cur_block = cur_chunk > 3 ? data1.reader[queue].first_block
|
||||||
|
: data1.reader_second_block[queue];
|
||||||
|
uint32_t last_chunk = data1.writer[queue].chunk;
|
||||||
|
uint32_t last_block = last_chunk > 3 ? data1.writer[queue].first_block
|
||||||
|
: data1.writer_second_block[queue];
|
||||||
|
|
||||||
|
if (verbose)
|
||||||
|
printf("\tblock %x\n", cur_block);
|
||||||
|
if (cur_block >= num_blocks) {
|
||||||
|
fprintf(stderr, "block %x too large\n", cur_block);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
unsigned calculated_queue_size = 0;
|
||||||
|
while (cur_block != last_block || cur_chunk != last_chunk) {
|
||||||
|
calculated_queue_size++;
|
||||||
|
uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4];
|
||||||
|
|
||||||
|
dump_mem_pool_chunk(chunk_ptr);
|
||||||
|
|
||||||
|
printf("\t%05x: %08x %08x %08x %08x\n",
|
||||||
|
4 * (cur_block * 0x20 + cur_chunk + 4), chunk_ptr[0],
|
||||||
|
chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]);
|
||||||
|
|
||||||
|
cur_chunk++;
|
||||||
|
if (cur_chunk == 8) {
|
||||||
|
cur_block = next_pointers[cur_block];
|
||||||
|
if (verbose)
|
||||||
|
printf("\tblock %x\n", cur_block);
|
||||||
|
if (cur_block >= num_blocks) {
|
||||||
|
fprintf(stderr, "block %x too large\n", cur_block);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
cur_chunk = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (calculated_queue_size != queue_sizes[queue]) {
|
||||||
|
printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n",
|
||||||
|
calculated_queue_size);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@@ -36,54 +36,20 @@
|
|||||||
* or times out after 5min)
|
* or times out after 5min)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <assert.h>
|
|
||||||
#include <getopt.h>
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <stdarg.h>
|
|
||||||
#include <stdbool.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
#include "freedreno_pm4.h"
|
#include "crashdec.h"
|
||||||
|
|
||||||
#include "ir3/instr-a3xx.h"
|
|
||||||
#include "buffers.h"
|
|
||||||
#include "cffdec.h"
|
|
||||||
#include "disasm.h"
|
|
||||||
#include "pager.h"
|
|
||||||
#include "rnnutil.h"
|
|
||||||
#include "util.h"
|
|
||||||
|
|
||||||
static FILE *in;
|
static FILE *in;
|
||||||
static bool verbose;
|
bool verbose;
|
||||||
|
|
||||||
static struct rnn *rnn_gmu;
|
struct rnn *rnn_gmu;
|
||||||
static struct rnn *rnn_control;
|
struct rnn *rnn_control;
|
||||||
static struct rnn *rnn_pipe;
|
struct rnn *rnn_pipe;
|
||||||
|
|
||||||
static struct cffdec_options options = {
|
struct cffdec_options options = {
|
||||||
.draw_filter = -1,
|
.draw_filter = -1,
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline bool
|
|
||||||
is_a6xx(void)
|
|
||||||
{
|
|
||||||
return (600 <= options.gpu_id) && (options.gpu_id < 700);
|
|
||||||
}
|
|
||||||
static inline bool
|
|
||||||
is_a5xx(void)
|
|
||||||
{
|
|
||||||
return (500 <= options.gpu_id) && (options.gpu_id < 600);
|
|
||||||
}
|
|
||||||
static inline bool
|
|
||||||
is_64b(void)
|
|
||||||
{
|
|
||||||
return options.gpu_id >= 500;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Helpers to read register values:
|
* Helpers to read register values:
|
||||||
*/
|
*/
|
||||||
@@ -417,7 +383,7 @@ decode_bos(void)
|
|||||||
* Decode registers section:
|
* Decode registers section:
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
void
|
||||||
dump_register(struct rnn *rnn, uint32_t offset, uint32_t value)
|
dump_register(struct rnn *rnn, uint32_t offset, uint32_t value)
|
||||||
{
|
{
|
||||||
struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset);
|
struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset);
|
||||||
@@ -563,292 +529,6 @@ dump_cp_ucode_dbg(uint32_t *dbg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context,
|
|
||||||
bool pipe)
|
|
||||||
{
|
|
||||||
if (pipe) {
|
|
||||||
struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg);
|
|
||||||
printf("\t\twrite %s (%02x) pipe\n", info->name, reg);
|
|
||||||
|
|
||||||
if (!strcmp(info->typeinfo->name, "void")) {
|
|
||||||
/* registers that ignore their payload */
|
|
||||||
} else {
|
|
||||||
printf("\t\t\t");
|
|
||||||
dump_register(rnn_pipe, reg, data);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context);
|
|
||||||
dump_register_val(reg, data, 2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
dump_mem_pool_chunk(const uint32_t *chunk)
|
|
||||||
{
|
|
||||||
struct __attribute__((packed)) {
|
|
||||||
bool reg0_enabled : 1;
|
|
||||||
bool reg1_enabled : 1;
|
|
||||||
uint32_t data0 : 32;
|
|
||||||
uint32_t data1 : 32;
|
|
||||||
uint32_t reg0 : 18;
|
|
||||||
uint32_t reg1 : 18;
|
|
||||||
bool reg0_pipe : 1;
|
|
||||||
bool reg1_pipe : 1;
|
|
||||||
uint32_t reg0_context : 1;
|
|
||||||
uint32_t reg1_context : 1;
|
|
||||||
uint32_t padding : 22;
|
|
||||||
} fields;
|
|
||||||
|
|
||||||
memcpy(&fields, chunk, 4 * sizeof(uint32_t));
|
|
||||||
|
|
||||||
if (fields.reg0_enabled) {
|
|
||||||
dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context,
|
|
||||||
fields.reg0_pipe);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fields.reg1_enabled) {
|
|
||||||
dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context,
|
|
||||||
fields.reg1_pipe);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
dump_cp_mem_pool(uint32_t *mempool)
|
|
||||||
{
|
|
||||||
/* The mem pool is a shared pool of memory used for storing in-flight
|
|
||||||
* register writes. There are 6 different queues, one for each
|
|
||||||
* cluster. Writing to $data (or for some special registers, $addr)
|
|
||||||
* pushes data onto the appropriate queue, and each queue is pulled
|
|
||||||
* from by the appropriate cluster. The queues are thus written to
|
|
||||||
* in-order, but may be read out-of-order.
|
|
||||||
*
|
|
||||||
* The queues are conceptually divided into 128-bit "chunks", and the
|
|
||||||
* read and write pointers are in units of chunks. These chunks are
|
|
||||||
* organized internally into 8-chunk "blocks", and memory is allocated
|
|
||||||
* dynamically in terms of blocks. Each queue is represented as a
|
|
||||||
* singly-linked list of blocks, as well as 3-bit start/end chunk
|
|
||||||
* pointers that point within the first/last block. The next pointers
|
|
||||||
* are located in a separate array, rather than inline.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* TODO: The firmware CP_MEM_POOL save/restore routines do something
|
|
||||||
* like:
|
|
||||||
*
|
|
||||||
* cread $02, [ $00 + 0 ]
|
|
||||||
* and $02, $02, 0x118
|
|
||||||
* ...
|
|
||||||
* brne $02, 0, #label
|
|
||||||
* mov $03, 0x2000
|
|
||||||
* mov $03, 0x1000
|
|
||||||
* label:
|
|
||||||
* ...
|
|
||||||
*
|
|
||||||
* I think that control register 0 is the GPU version, and some
|
|
||||||
* versions have a smaller mem pool. It seems some models have a mem
|
|
||||||
* pool that's half the size, and a bunch of offsets are shifted
|
|
||||||
* accordingly. Unfortunately the kernel driver's dumping code doesn't
|
|
||||||
* seem to take this into account, even the downstream android driver,
|
|
||||||
* and we don't know which versions 0x8, 0x10, or 0x100 correspond
|
|
||||||
* to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
|
|
||||||
*/
|
|
||||||
bool small_mem_pool = false;
|
|
||||||
|
|
||||||
/* The array of next pointers for each block. */
|
|
||||||
const uint32_t *next_pointers =
|
|
||||||
small_mem_pool ? &mempool[0x800] : &mempool[0x1000];
|
|
||||||
|
|
||||||
/* Maximum number of blocks in the pool, also the size of the pointers
|
|
||||||
* array.
|
|
||||||
*/
|
|
||||||
const int num_blocks = small_mem_pool ? 0x30 : 0x80;
|
|
||||||
|
|
||||||
/* Number of queues */
|
|
||||||
const unsigned num_queues = 6;
|
|
||||||
|
|
||||||
/* Unfortunately the per-queue state is a little more complicated than
|
|
||||||
* a simple pair of begin/end pointers. Instead of a single beginning
|
|
||||||
* block, there are *two*, with the property that either the two are
|
|
||||||
* equal or the second is the "next" of the first. Similarly there are
|
|
||||||
* two end blocks. Thus the queue either looks like this:
|
|
||||||
*
|
|
||||||
* A -> B -> ... -> C -> D
|
|
||||||
*
|
|
||||||
* Or like this, or some combination:
|
|
||||||
*
|
|
||||||
* A/B -> ... -> C/D
|
|
||||||
*
|
|
||||||
* However, there's only one beginning/end chunk offset. Now the
|
|
||||||
* question is, which of A or B is the actual start? I.e. is the chunk
|
|
||||||
* offset an offset inside A or B? It depends. I'll show a typical read
|
|
||||||
* cycle, starting here (read pointer marked with a *) with a chunk
|
|
||||||
* offset of 0:
|
|
||||||
*
|
|
||||||
* A B
|
|
||||||
* _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
|
||||||
* |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
|
|
||||||
*
|
|
||||||
* Once the pointer advances far enough, the hardware decides to free
|
|
||||||
* A, after which the read-side state looks like:
|
|
||||||
*
|
|
||||||
* (free) A/B
|
|
||||||
* _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
|
||||||
* |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
|
|
||||||
*
|
|
||||||
* Then after advancing the pointer a bit more, the hardware fetches
|
|
||||||
* the "next" pointer for A and stores it in B:
|
|
||||||
*
|
|
||||||
* (free) A B
|
|
||||||
* _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
|
||||||
* |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
|
|
||||||
*
|
|
||||||
* Then the read pointer advances into B, at which point we've come
|
|
||||||
* back to the first state having advanced a whole block:
|
|
||||||
*
|
|
||||||
* (free) A B
|
|
||||||
* _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
|
||||||
* |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* There is a similar cycle for the write pointer. Now, the question
|
|
||||||
* is, how do we know which state we're in? We need to know this to
|
|
||||||
* know whether the pointer (*) is in A or B if they're different. It
|
|
||||||
* seems like there should be some bit somewhere describing this, but
|
|
||||||
* after lots of experimentation I've come up empty-handed. For now we
|
|
||||||
* assume that if the pointer is in the first half, then we're in
|
|
||||||
* either the first or second state and use B, and otherwise we're in
|
|
||||||
* the second or third state and use A. So far I haven't seen anything
|
|
||||||
* that violates this assumption.
|
|
||||||
*/
|
|
||||||
|
|
||||||
struct {
|
|
||||||
uint32_t unk0;
|
|
||||||
uint32_t padding0[7]; /* Mirrors of unk0 */
|
|
||||||
|
|
||||||
struct {
|
|
||||||
uint32_t chunk : 3;
|
|
||||||
uint32_t first_block : 32 - 3;
|
|
||||||
} writer[6];
|
|
||||||
uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */
|
|
||||||
|
|
||||||
uint32_t unk1;
|
|
||||||
uint32_t padding2[7]; /* Mirrors of unk1 */
|
|
||||||
|
|
||||||
uint32_t writer_second_block[6];
|
|
||||||
uint32_t padding3[2];
|
|
||||||
|
|
||||||
uint32_t unk2[6];
|
|
||||||
uint32_t padding4[2];
|
|
||||||
|
|
||||||
struct {
|
|
||||||
uint32_t chunk : 3;
|
|
||||||
uint32_t first_block : 32 - 3;
|
|
||||||
} reader[6];
|
|
||||||
uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */
|
|
||||||
|
|
||||||
uint32_t unk3;
|
|
||||||
uint32_t padding6[7]; /* Mirrors of unk3 */
|
|
||||||
|
|
||||||
uint32_t reader_second_block[6];
|
|
||||||
uint32_t padding7[2];
|
|
||||||
|
|
||||||
uint32_t block_count[6];
|
|
||||||
uint32_t padding[2];
|
|
||||||
|
|
||||||
uint32_t unk4;
|
|
||||||
uint32_t padding9[7]; /* Mirrors of unk4 */
|
|
||||||
} data1;
|
|
||||||
|
|
||||||
const uint32_t *data1_ptr =
|
|
||||||
small_mem_pool ? &mempool[0xc00] : &mempool[0x1800];
|
|
||||||
memcpy(&data1, data1_ptr, sizeof(data1));
|
|
||||||
|
|
||||||
/* Based on the kernel, the first dword is the mem pool size (in
|
|
||||||
* blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
|
|
||||||
*/
|
|
||||||
const uint32_t *data2_ptr =
|
|
||||||
small_mem_pool ? &mempool[0x1000] : &mempool[0x2000];
|
|
||||||
const int data2_size = 0x60;
|
|
||||||
|
|
||||||
/* This seems to be the size of each queue in chunks. */
|
|
||||||
const uint32_t *queue_sizes = &data2_ptr[0x18];
|
|
||||||
|
|
||||||
printf("\tdata2:\n");
|
|
||||||
dump_hex_ascii(data2_ptr, 4 * data2_size, 1);
|
|
||||||
|
|
||||||
/* These seem to be some kind of counter of allocated/deallocated blocks */
|
|
||||||
if (verbose) {
|
|
||||||
printf("\tunk0: %x\n", data1.unk0);
|
|
||||||
printf("\tunk1: %x\n", data1.unk1);
|
|
||||||
printf("\tunk3: %x\n", data1.unk3);
|
|
||||||
printf("\tunk4: %x\n\n", data1.unk4);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int queue = 0; queue < num_queues; queue++) {
|
|
||||||
const char *cluster_names[6] = {"FE", "SP_VS", "PC_VS",
|
|
||||||
"GRAS", "SP_PS", "PS"};
|
|
||||||
printf("\tCLUSTER_%s:\n\n", cluster_names[queue]);
|
|
||||||
|
|
||||||
if (verbose) {
|
|
||||||
printf("\t\twriter_first_block: 0x%x\n",
|
|
||||||
data1.writer[queue].first_block);
|
|
||||||
printf("\t\twriter_second_block: 0x%x\n",
|
|
||||||
data1.writer_second_block[queue]);
|
|
||||||
printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk);
|
|
||||||
printf("\t\treader_first_block: 0x%x\n",
|
|
||||||
data1.reader[queue].first_block);
|
|
||||||
printf("\t\treader_second_block: 0x%x\n",
|
|
||||||
data1.reader_second_block[queue]);
|
|
||||||
printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk);
|
|
||||||
printf("\t\tblock_count: %d\n", data1.block_count[queue]);
|
|
||||||
printf("\t\tunk2: 0x%x\n", data1.unk2[queue]);
|
|
||||||
printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t cur_chunk = data1.reader[queue].chunk;
|
|
||||||
uint32_t cur_block = cur_chunk > 3 ? data1.reader[queue].first_block
|
|
||||||
: data1.reader_second_block[queue];
|
|
||||||
uint32_t last_chunk = data1.writer[queue].chunk;
|
|
||||||
uint32_t last_block = last_chunk > 3 ? data1.writer[queue].first_block
|
|
||||||
: data1.writer_second_block[queue];
|
|
||||||
|
|
||||||
if (verbose)
|
|
||||||
printf("\tblock %x\n", cur_block);
|
|
||||||
if (cur_block >= num_blocks) {
|
|
||||||
fprintf(stderr, "block %x too large\n", cur_block);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
unsigned calculated_queue_size = 0;
|
|
||||||
while (cur_block != last_block || cur_chunk != last_chunk) {
|
|
||||||
calculated_queue_size++;
|
|
||||||
uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4];
|
|
||||||
|
|
||||||
dump_mem_pool_chunk(chunk_ptr);
|
|
||||||
|
|
||||||
printf("\t%05x: %08x %08x %08x %08x\n",
|
|
||||||
4 * (cur_block * 0x20 + cur_chunk + 4), chunk_ptr[0],
|
|
||||||
chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]);
|
|
||||||
|
|
||||||
cur_chunk++;
|
|
||||||
if (cur_chunk == 8) {
|
|
||||||
cur_block = next_pointers[cur_block];
|
|
||||||
if (verbose)
|
|
||||||
printf("\tblock %x\n", cur_block);
|
|
||||||
if (cur_block >= num_blocks) {
|
|
||||||
fprintf(stderr, "block %x too large\n", cur_block);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
cur_chunk = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (calculated_queue_size != queue_sizes[queue]) {
|
|
||||||
printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n",
|
|
||||||
calculated_queue_size);
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
decode_indexed_registers(void)
|
decode_indexed_registers(void)
|
||||||
{
|
{
|
||||||
|
77
src/freedreno/decode/crashdec.h
Normal file
77
src/freedreno/decode/crashdec.h
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
/*
|
||||||
|
* Copyright © 2021 Google, Inc.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice (including the next
|
||||||
|
* paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
* Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __CRASHDEC_H__
|
||||||
|
#define __CRASHDEC_H__
|
||||||
|
|
||||||
|
#include <assert.h>
|
||||||
|
#include <getopt.h>
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "freedreno_pm4.h"
|
||||||
|
|
||||||
|
#include "ir3/instr-a3xx.h"
|
||||||
|
#include "buffers.h"
|
||||||
|
#include "cffdec.h"
|
||||||
|
#include "disasm.h"
|
||||||
|
#include "pager.h"
|
||||||
|
#include "rnnutil.h"
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
|
extern struct rnn *rnn_gmu;
|
||||||
|
extern struct rnn *rnn_control;
|
||||||
|
extern struct rnn *rnn_pipe;
|
||||||
|
|
||||||
|
extern bool verbose;
|
||||||
|
|
||||||
|
extern struct cffdec_options options;
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
is_a6xx(void)
|
||||||
|
{
|
||||||
|
return (600 <= options.gpu_id) && (options.gpu_id < 700);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
is_a5xx(void)
|
||||||
|
{
|
||||||
|
return (500 <= options.gpu_id) && (options.gpu_id < 600);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
is_64b(void)
|
||||||
|
{
|
||||||
|
return options.gpu_id >= 500;
|
||||||
|
}
|
||||||
|
|
||||||
|
void dump_register(struct rnn *rnn, uint32_t offset, uint32_t value);
|
||||||
|
void dump_cp_mem_pool(uint32_t *mempool);
|
||||||
|
|
||||||
|
#endif /* __CRASHDEC_H__ */
|
@@ -132,7 +132,11 @@ endif
|
|||||||
|
|
||||||
crashdec = executable(
|
crashdec = executable(
|
||||||
'crashdec',
|
'crashdec',
|
||||||
'crashdec.c',
|
[
|
||||||
|
'crashdec.c',
|
||||||
|
'crashdec.h',
|
||||||
|
'crashdec-mempool.c',
|
||||||
|
],
|
||||||
include_directories: [
|
include_directories: [
|
||||||
inc_freedreno,
|
inc_freedreno,
|
||||||
inc_freedreno_rnn,
|
inc_freedreno_rnn,
|
||||||
|
Reference in New Issue
Block a user