r600: use GET_BUFFER_RESINFO vtx fetch on eg instead of setting up consts

Contrary to what the comment said, this appears to work just fine on my rv770
(tested with piglit textureSize 140 fs/vs samplerBuffer).
Dave Airlie confirmed it working on cayman too.
I have no clue though if it's actually preferrable to use it (unfortunately
we cannot get rid of the tex constants completely, as we still require them
for cube map txq).
Albeit filling in the format (1 channels or 4?) and the stuff related to mega-
or mini-fetch (what the hell is this...) is just a guess based on other usage
of vtx fetch instructions...

v2: it really needs to be done through texture cache (I botched the
testing because sb optimizations turned it automatically into tc, but
can't rely on it and isn't happening on tes).

Tested-by: Konstantin Kharlamov <hi-angel@yandex.ru>
Reviewed-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Roland Scheidegger
2018-01-02 23:39:34 +01:00
parent 0be1dc25cf
commit c5162fd3c4
4 changed files with 50 additions and 58 deletions

View File

@@ -653,11 +653,12 @@ static void evergreen_fill_buffer_resource_words(struct r600_context *rctx,
S_030008_ENDIAN_SWAP(endian); S_030008_ENDIAN_SWAP(endian);
tex_resource_words[3] = swizzle_res | S_03000C_UNCACHED(params->uncached); tex_resource_words[3] = swizzle_res | S_03000C_UNCACHED(params->uncached);
/* /*
* in theory dword 4 is for number of elements, for use with resinfo, * dword 4 is for number of elements, for use with resinfo,
* but it seems to utterly fail to work, the amd gpu shader analyser * albeit the amd gpu shader analyser
* uses a const buffer to store the element sizes for buffer txq * uses a const buffer to store the element sizes for buffer txq
*/ */
tex_resource_words[4] = 0; tex_resource_words[4] = params->size / stride;
tex_resource_words[5] = tex_resource_words[6] = 0; tex_resource_words[5] = tex_resource_words[6] = 0;
tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER); tex_resource_words[7] = S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER);
} }

View File

@@ -1510,7 +1510,8 @@ int cm_bytecode_add_cf_end(struct r600_bytecode *bc)
/* common to all 3 families */ /* common to all 3 families */
static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id) static int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id)
{ {
bc->bytecode[id] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) | bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(vtx->op) |
S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) | S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) | S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x); S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);

View File

@@ -6949,22 +6949,18 @@ static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_l
static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int offset) static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int offset)
{ {
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_alu alu;
int r; int r;
int id = tgsi_tex_get_src_gpr(ctx, reg_idx) + offset; int id = tgsi_tex_get_src_gpr(ctx, reg_idx) + offset;
int sampler_index_mode = inst->Src[reg_idx].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
if (ctx->bc->chip_class < EVERGREEN) {
struct r600_bytecode_alu alu;
memset(&alu, 0, sizeof(struct r600_bytecode_alu)); memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV; alu.op = ALU_OP1_MOV;
alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
if (ctx->bc->chip_class >= EVERGREEN) {
/* with eg each dword is either buf size or number of cubes */
alu.src[0].sel += id / 4;
alu.src[0].chan = id % 4;
} else {
/* r600 we have them at channel 2 of the second dword */ /* r600 we have them at channel 2 of the second dword */
alu.src[0].sel += (id * 2) + 1; alu.src[0].sel += (id * 2) + 1;
alu.src[0].chan = 1; alu.src[0].chan = 1;
}
alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;
tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
alu.last = 1; alu.last = 1;
@@ -6972,8 +6968,29 @@ static int r600_do_buffer_txq(struct r600_shader_ctx *ctx, int reg_idx, int offs
if (r) if (r)
return r; return r;
return 0; return 0;
} else {
struct r600_bytecode_vtx vtx;
memset(&vtx, 0, sizeof(vtx));
vtx.op = FETCH_OP_GDS_MIN_UINT; /* aka GET_BUFFER_RESINFO */
vtx.buffer_id = id + R600_MAX_CONST_BUFFERS;
vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
vtx.src_gpr = 0;
vtx.mega_fetch_count = 16; /* no idea here really... */
vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */
vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 4 : 7; /* SEL_Y */
vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 4 : 7; /* SEL_Z */
vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 4 : 7; /* SEL_W */
vtx.data_format = FMT_32_32_32_32;
vtx.buffer_index_mode = sampler_index_mode;
if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
return r;
return 0;
}
} }
static int tgsi_tex(struct r600_shader_ctx *ctx) static int tgsi_tex(struct r600_shader_ctx *ctx)
{ {
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
@@ -7027,6 +7044,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) {
if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) {
if (ctx->bc->chip_class < EVERGREEN)
ctx->shader->uses_tex_buffers = true; ctx->shader->uses_tex_buffers = true;
return r600_do_buffer_txq(ctx, 1, 0); return r600_do_buffer_txq(ctx, 1, 0);
} }
@@ -7617,7 +7635,7 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
if (ctx->bc->chip_class >= EVERGREEN) { if (ctx->bc->chip_class >= EVERGREEN) {
/* with eg each dword is either buf size or number of cubes */ /* with eg each dword is number of cubes */
alu.src[0].sel += id / 4; alu.src[0].sel += id / 4;
alu.src[0].chan = id % 4; alu.src[0].chan = id % 4;
} else { } else {
@@ -8760,6 +8778,7 @@ static int tgsi_resq(struct r600_shader_ctx *ctx)
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
(inst->Src[0].Register.File == TGSI_FILE_IMAGE && inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) { (inst->Src[0].Register.File == TGSI_FILE_IMAGE && inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) {
if (ctx->bc->chip_class < EVERGREEN)
ctx->shader->uses_tex_buffers = true; ctx->shader->uses_tex_buffers = true;
return r600_do_buffer_txq(ctx, 0, ctx->shader->image_size_const_offset); return r600_do_buffer_txq(ctx, 0, ctx->shader->image_size_const_offset);
} }
@@ -8784,7 +8803,7 @@ static int tgsi_resq(struct r600_shader_ctx *ctx)
alu.op = ALU_OP1_MOV; alu.op = ALU_OP1_MOV;
alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL;
/* with eg each dword is either buf size or number of cubes */ /* with eg each dword is either number of cubes */
alu.src[0].sel += id / 4; alu.src[0].sel += id / 4;
alu.src[0].chan = id % 4; alu.src[0].chan = id % 4;
alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER;

View File

@@ -1357,14 +1357,12 @@ static void r600_setup_buffer_constants(struct r600_context *rctx, int shader_ty
} }
/* On evergreen we store one value /* On evergreen we store one value
* 1. buffer size for TXQ or * 1. number of cube layers in a cube map array.
* 2. number of cube layers in a cube map array.
*/ */
void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type) void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type)
{ {
struct r600_textures_info *samplers = &rctx->samplers[shader_type]; struct r600_textures_info *samplers = &rctx->samplers[shader_type];
struct r600_image_state *images = NULL; struct r600_image_state *images = NULL;
struct r600_image_state *buffers = NULL;
int bits, sview_bits, img_bits; int bits, sview_bits, img_bits;
uint32_t array_size; uint32_t array_size;
int i; int i;
@@ -1373,29 +1371,23 @@ void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type)
if (shader_type == PIPE_SHADER_FRAGMENT) { if (shader_type == PIPE_SHADER_FRAGMENT) {
images = &rctx->fragment_images; images = &rctx->fragment_images;
buffers = &rctx->fragment_buffers;
} else if (shader_type == PIPE_SHADER_COMPUTE) { } else if (shader_type == PIPE_SHADER_COMPUTE) {
images = &rctx->compute_images; images = &rctx->compute_images;
buffers = &rctx->compute_buffers;
} }
if (!samplers->views.dirty_buffer_constants && if (!samplers->views.dirty_buffer_constants &&
!(images && images->dirty_buffer_constants) && !(images && images->dirty_buffer_constants))
!(buffers && buffers->dirty_buffer_constants))
return; return;
if (images) if (images)
images->dirty_buffer_constants = FALSE; images->dirty_buffer_constants = FALSE;
if (buffers)
buffers->dirty_buffer_constants = FALSE;
samplers->views.dirty_buffer_constants = FALSE; samplers->views.dirty_buffer_constants = FALSE;
bits = sview_bits = util_last_bit(samplers->views.enabled_mask); bits = sview_bits = util_last_bit(samplers->views.enabled_mask);
if (images) if (images)
bits += util_last_bit(images->enabled_mask); bits += util_last_bit(images->enabled_mask);
img_bits = bits; img_bits = bits;
if (buffers)
bits += util_last_bit(buffers->enabled_mask);
array_size = bits * sizeof(uint32_t); array_size = bits * sizeof(uint32_t);
constants = r600_alloc_buf_consts(rctx, shader_type, array_size, constants = r600_alloc_buf_consts(rctx, shader_type, array_size,
@@ -1404,39 +1396,18 @@ void eg_setup_buffer_constants(struct r600_context *rctx, int shader_type)
for (i = 0; i < sview_bits; i++) { for (i = 0; i < sview_bits; i++) {
if (samplers->views.enabled_mask & (1 << i)) { if (samplers->views.enabled_mask & (1 << i)) {
uint32_t offset = (base_offset / 4) + i; uint32_t offset = (base_offset / 4) + i;
if (samplers->views.views[i]->base.target == PIPE_BUFFER) {
constants[offset] = samplers->views.views[i]->base.u.buf.size /
util_format_get_blocksize(samplers->views.views[i]->base.format);
} else {
constants[offset] = samplers->views.views[i]->base.texture->array_size / 6; constants[offset] = samplers->views.views[i]->base.texture->array_size / 6;
} }
} }
}
if (images) { if (images) {
for (i = sview_bits; i < img_bits; i++) { for (i = sview_bits; i < img_bits; i++) {
int idx = i - sview_bits; int idx = i - sview_bits;
if (images->enabled_mask & (1 << idx)) { if (images->enabled_mask & (1 << idx)) {
uint32_t offset = (base_offset / 4) + i; uint32_t offset = (base_offset / 4) + i;
if (images->views[i].base.resource->target == PIPE_BUFFER) {
constants[offset] = images->views[i].base.u.buf.size /
util_format_get_blocksize(images->views[i].base.format);
} else {
constants[offset] = images->views[i].base.resource->array_size / 6; constants[offset] = images->views[i].base.resource->array_size / 6;
} }
} }
} }
}
if (buffers) {
for (i = img_bits; i < bits; i++) {
int idx = i - img_bits;
if (buffers->enabled_mask & (1 << idx)) {
uint32_t offset = (base_offset / 4) + i;
assert(buffers->views[i].base.resource->target == PIPE_BUFFER);
constants[offset] = buffers->views[i].base.u.buf.size /
util_format_get_blocksize(buffers->views[i].base.format);
}
}
}
} }
/* set sample xy locations as array of fragment shader constants */ /* set sample xy locations as array of fragment shader constants */