freedreno: Deduplicate ringbuffer macros with computerator/fdperf

They're sugar around freedreno_ringbuffer.h, so put them there and reuse them.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4957>
This commit is contained in:
Eric Anholt
2020-05-07 15:40:52 -07:00
parent 094c7646a3
commit 6c688ae81f
6 changed files with 147 additions and 301 deletions

View File

@@ -80,102 +80,6 @@ to_ ## _to(struct _from *f) \
struct backend *a6xx_init(struct fd_device *dev, uint32_t gpu_id);
/*
* cmdstream helpers:
*/
static inline void
BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
{
if (ring->cur + ndwords > ring->end)
fd_ringbuffer_grow(ring, ndwords);
}
static inline void
OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
{
fd_ringbuffer_emit(ring, data);
}
static inline unsigned
_odd_parity_bit(unsigned val)
{
/* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
* note that we want odd parity so 0x6996 is inverted.
*/
val ^= val >> 16;
val ^= val >> 8;
val ^= val >> 4;
val &= 0xf;
return (~0x6996 >> val) & 1;
}
static inline void
OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{
BEGIN_RING(ring, cnt+1);
OUT_RING(ring, CP_TYPE4_PKT | cnt |
(_odd_parity_bit(cnt) << 7) |
((regindx & 0x3ffff) << 8) |
((_odd_parity_bit(regindx) << 27)));
}
static inline void
OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{
BEGIN_RING(ring, cnt+1);
OUT_RING(ring, CP_TYPE7_PKT | cnt |
(_odd_parity_bit(cnt) << 15) |
((opcode & 0x7f) << 16) |
((_odd_parity_bit(opcode) << 23)));
}
/*
* NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+
*/
static inline void
__out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint64_t or, int32_t shift, uint32_t flags)
{
debug_assert(offset < fd_bo_size(bo));
fd_ringbuffer_reloc(ring, &(struct fd_reloc){
.bo = bo,
.flags = flags,
.offset = offset,
.or = or,
.shift = shift,
.orhi = or >> 32,
});
}
static inline void
OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint64_t or, int32_t shift)
{
__out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ);
}
static inline void
OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint64_t or, int32_t shift)
{
__out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_WRITE);
}
static inline void
OUT_RELOCD(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint64_t or, int32_t shift)
{
__out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_DUMP);
}
static inline void
OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
}
/* for conditionally setting boolean flag(s): */
#define COND(bool, val) ((bool) ? (val) : 0)

View File

@@ -27,9 +27,13 @@
#ifndef FREEDRENO_RINGBUFFER_H_
#define FREEDRENO_RINGBUFFER_H_
#include <stdio.h>
#include "util/u_debug.h"
#include "util/u_dynarray.h"
#include "freedreno_drmif.h"
#include "adreno_common.xml.h"
#include "adreno_pm4.xml.h"
struct fd_submit;
struct fd_ringbuffer;
@@ -209,5 +213,143 @@ fd_ringbuffer_size(struct fd_ringbuffer *ring)
return offset_bytes(ring->cur, ring->start);
}
#define LOG_DWORDS 0
static inline void
OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
{
if (LOG_DWORDS) {
fprintf(stderr, "ring[%p]: OUT_RING %04x: %08x", ring,
(uint32_t)(ring->cur - ring->start), data);
}
fd_ringbuffer_emit(ring, data);
}
/*
* NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+
*/
static inline void
__out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint64_t or, int32_t shift, uint32_t flags)
{
if (LOG_DWORDS) {
fprintf(stderr, "ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring,
(uint32_t)(ring->cur - ring->start), bo, offset, shift);
}
debug_assert(offset < fd_bo_size(bo));
fd_ringbuffer_reloc(ring, &(struct fd_reloc){
.bo = bo,
.flags = flags,
.offset = offset,
.or = or,
.shift = shift,
.orhi = or >> 32,
});
}
static inline void
OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint64_t or, int32_t shift)
{
__out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ);
}
static inline void
OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint64_t or, int32_t shift)
{
__out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_WRITE);
}
static inline void
OUT_RELOCD(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint64_t or, int32_t shift)
{
__out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_DUMP);
}
static inline void
OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
}
static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
{
if (unlikely(ring->cur + ndwords > ring->end))
fd_ringbuffer_grow(ring, ndwords);
}
static inline void
OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{
BEGIN_RING(ring, cnt+1);
OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
}
static inline void
OUT_PKT2(struct fd_ringbuffer *ring)
{
BEGIN_RING(ring, 1);
OUT_RING(ring, CP_TYPE2_PKT);
}
static inline void
OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{
BEGIN_RING(ring, cnt+1);
OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
}
/*
* Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
*/
static inline unsigned
_odd_parity_bit(unsigned val)
{
/* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
* note that we want odd parity so 0x6996 is inverted.
*/
val ^= val >> 16;
val ^= val >> 8;
val ^= val >> 4;
val &= 0xf;
return (~0x6996 >> val) & 1;
}
static inline void
OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{
BEGIN_RING(ring, cnt+1);
OUT_RING(ring, CP_TYPE4_PKT | cnt |
(_odd_parity_bit(cnt) << 7) |
((regindx & 0x3ffff) << 8) |
((_odd_parity_bit(regindx) << 27)));
}
static inline void
OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{
BEGIN_RING(ring, cnt+1);
OUT_RING(ring, CP_TYPE7_PKT | cnt |
(_odd_parity_bit(cnt) << 15) |
((opcode & 0x7f) << 16) |
((_odd_parity_bit(opcode) << 23)));
}
static inline void
OUT_WFI(struct fd_ringbuffer *ring)
{
OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
OUT_RING(ring, 0x00000000);
}
static inline void
OUT_WFI5(struct fd_ringbuffer *ring)
{
OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
}
#endif /* FREEDRENO_RINGBUFFER_H_ */

View File

@@ -37,7 +37,10 @@ libfreedreno_drm_files = files(
libfreedreno_drm = static_library(
'freedreno_drm',
libfreedreno_drm_files,
[
libfreedreno_drm_files,
freedreno_xml_header_files,
],
include_directories : [
inc_freedreno,
inc_include,

View File

@@ -20,9 +20,9 @@
inc_freedreno = include_directories(['.', './registers'])
subdir('registers')
subdir('drm')
subdir('ir3')
subdir('registers')
subdir('fdl')
subdir('perfcntrs')
subdir('computerator')

View File

@@ -155,70 +155,6 @@ delta(uint32_t a, uint32_t b)
return b - a;
}
/*
* TODO de-duplicate OUT_RING() and friends
*/
#define CP_WAIT_FOR_IDLE 38
#define CP_TYPE0_PKT 0x00000000
#define CP_TYPE3_PKT 0xc0000000
#define CP_TYPE4_PKT 0x40000000
#define CP_TYPE7_PKT 0x70000000
static inline void
OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
{
*(ring->cur++) = data;
}
static inline void
OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{
OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
}
static inline void
OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{
OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
}
/*
* Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
*/
static inline unsigned
_odd_parity_bit(unsigned val)
{
/* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
* note that we want odd parity so 0x6996 is inverted.
*/
val ^= val >> 16;
val ^= val >> 8;
val ^= val >> 4;
val &= 0xf;
return (~0x6996 >> val) & 1;
}
static inline void
OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{
OUT_RING(ring, CP_TYPE4_PKT | cnt |
(_odd_parity_bit(cnt) << 7) |
((regindx & 0x3ffff) << 8) |
((_odd_parity_bit(regindx) << 27)));
}
static inline void
OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{
OUT_RING(ring, CP_TYPE7_PKT | cnt |
(_odd_parity_bit(cnt) << 15) |
((opcode & 0x7f) << 16) |
((_odd_parity_bit(opcode) << 23)));
}
/*
* code to find stuff in /proc/device-tree:
*

View File

@@ -216,20 +216,8 @@ fd_calc_guardband(unsigned x)
return 511 - ((l - 8) * 65);
}
#define LOG_DWORDS 0
static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx);
static inline void
OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
{
if (LOG_DWORDS) {
DBG("ring[%p]: OUT_RING %04x: %08x", ring,
(uint32_t)(ring->cur - ring->start), data);
}
fd_ringbuffer_emit(ring, data);
}
/* like OUT_RING() but appends a cmdstream patch point to 'buf' */
static inline void
OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data,
@@ -245,133 +233,6 @@ OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data,
}));
}
/*
* NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+
*/
static inline void
__out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint64_t or, int32_t shift, uint32_t flags)
{
if (LOG_DWORDS) {
DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring,
(uint32_t)(ring->cur - ring->start), bo, offset, shift);
}
debug_assert(offset < fd_bo_size(bo));
fd_ringbuffer_reloc(ring, &(struct fd_reloc){
.bo = bo,
.flags = flags,
.offset = offset,
.or = or,
.shift = shift,
.orhi = or >> 32,
});
}
static inline void
OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint64_t or, int32_t shift)
{
__out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ);
}
static inline void
OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint64_t or, int32_t shift)
{
__out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_WRITE);
}
static inline void
OUT_RELOCD(struct fd_ringbuffer *ring, struct fd_bo *bo,
uint32_t offset, uint64_t or, int32_t shift)
{
__out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_DUMP);
}
static inline void
OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
{
fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
}
static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
{
if (unlikely(ring->cur + ndwords > ring->end))
fd_ringbuffer_grow(ring, ndwords);
}
static inline void
OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{
BEGIN_RING(ring, cnt+1);
OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
}
static inline void
OUT_PKT2(struct fd_ringbuffer *ring)
{
BEGIN_RING(ring, 1);
OUT_RING(ring, CP_TYPE2_PKT);
}
static inline void
OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{
BEGIN_RING(ring, cnt+1);
OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
}
/*
* Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
*/
static inline unsigned
_odd_parity_bit(unsigned val)
{
/* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
* note that we want odd parity so 0x6996 is inverted.
*/
val ^= val >> 16;
val ^= val >> 8;
val ^= val >> 4;
val &= 0xf;
return (~0x6996 >> val) & 1;
}
static inline void
OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
{
BEGIN_RING(ring, cnt+1);
OUT_RING(ring, CP_TYPE4_PKT | cnt |
(_odd_parity_bit(cnt) << 7) |
((regindx & 0x3ffff) << 8) |
((_odd_parity_bit(regindx) << 27)));
}
static inline void
OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
{
BEGIN_RING(ring, cnt+1);
OUT_RING(ring, CP_TYPE7_PKT | cnt |
(_odd_parity_bit(cnt) << 15) |
((opcode & 0x7f) << 16) |
((_odd_parity_bit(opcode) << 23)));
}
static inline void
OUT_WFI(struct fd_ringbuffer *ring)
{
OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
OUT_RING(ring, 0x00000000);
}
static inline void
OUT_WFI5(struct fd_ringbuffer *ring)
{
OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
}
static inline void
__OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target)
{