r600: add ARB_query_buffer_object support
This uses a different shader than radeonsi, as we can't address non-256 aligned ssbos, which the radeonsi code does. This passes some extra offsets into the shader. It also contains a set of u64 instruction implementation that may or may not be complete (at least the u64div is definitely not something that works outside this use-case). If r600 grows 64-bit integers, it will use the GLSL lowering for divmod. Reviewed-by: Roland Scheidegger <sroland@vmware.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
@@ -22,6 +22,7 @@
|
||||
*/
|
||||
#include "r600_formats.h"
|
||||
#include "r600_shader.h"
|
||||
#include "r600_query.h"
|
||||
#include "evergreend.h"
|
||||
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
@@ -4235,6 +4236,64 @@ static void evergreen_set_shader_images(struct pipe_context *ctx,
|
||||
r600_mark_atom_dirty(rctx, &istate->atom);
|
||||
}
|
||||
|
||||
static void evergreen_get_pipe_constant_buffer(struct r600_context *rctx,
|
||||
enum pipe_shader_type shader, uint slot,
|
||||
struct pipe_constant_buffer *cbuf)
|
||||
{
|
||||
struct r600_constbuf_state *state = &rctx->constbuf_state[shader];
|
||||
struct pipe_constant_buffer *cb;
|
||||
cbuf->user_buffer = NULL;
|
||||
|
||||
cb = &state->cb[slot];
|
||||
|
||||
cbuf->buffer_size = cb->buffer_size;
|
||||
pipe_resource_reference(&cbuf->buffer, cb->buffer);
|
||||
}
|
||||
|
||||
static void evergreen_get_shader_buffers(struct r600_context *rctx,
|
||||
enum pipe_shader_type shader,
|
||||
uint start_slot, uint count,
|
||||
struct pipe_shader_buffer *sbuf)
|
||||
{
|
||||
assert(shader == PIPE_SHADER_COMPUTE);
|
||||
int idx, i;
|
||||
struct r600_image_state *istate = &rctx->compute_buffers;
|
||||
struct r600_image_view *rview;
|
||||
|
||||
for (i = start_slot, idx = 0; i < start_slot + count; i++, idx++) {
|
||||
|
||||
rview = &istate->views[i];
|
||||
|
||||
pipe_resource_reference(&sbuf[idx].buffer, rview->base.resource);
|
||||
if (rview->base.resource) {
|
||||
uint64_t rview_va = ((struct r600_resource *)rview->base.resource)->gpu_address;
|
||||
|
||||
uint64_t prog_va = rview->resource_words[0];
|
||||
|
||||
prog_va += ((uint64_t)G_030008_BASE_ADDRESS_HI(rview->resource_words[2])) << 32;
|
||||
prog_va -= rview_va;
|
||||
|
||||
sbuf[idx].buffer_offset = prog_va & 0xffffffff;
|
||||
sbuf[idx].buffer_size = rview->resource_words[1] + 1;;
|
||||
} else {
|
||||
sbuf[idx].buffer_offset = 0;
|
||||
sbuf[idx].buffer_size = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void evergreen_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
st->saved_compute = rctx->cs_shader_state.shader;
|
||||
|
||||
/* save constant buffer 0 */
|
||||
evergreen_get_pipe_constant_buffer(rctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
|
||||
/* save ssbo 0 */
|
||||
evergreen_get_shader_buffers(rctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
|
||||
}
|
||||
|
||||
|
||||
void evergreen_init_state_functions(struct r600_context *rctx)
|
||||
{
|
||||
unsigned id = 1;
|
||||
@@ -4332,6 +4391,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
|
||||
else
|
||||
rctx->b.b.get_sample_position = cayman_get_sample_position;
|
||||
rctx->b.dma_copy = evergreen_dma_copy;
|
||||
rctx->b.save_qbo_state = evergreen_save_qbo_state;
|
||||
|
||||
evergreen_init_compute_state_functions(rctx);
|
||||
}
|
||||
|
@@ -123,6 +123,11 @@ void r600_flush_emit(struct r600_context *rctx)
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
||||
}
|
||||
|
||||
if (rctx->b.flags & R600_CONTEXT_CS_PARTIAL_FLUSH) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
||||
}
|
||||
|
||||
if (wait_until) {
|
||||
/* Use of WAIT_UNTIL is deprecated on Cayman+ */
|
||||
if (rctx->b.family < CHIP_CAYMAN) {
|
||||
|
@@ -352,6 +352,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_TGSI_PACK_HALF_FLOAT:
|
||||
case PIPE_CAP_TGSI_CLOCK:
|
||||
case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
return family >= CHIP_CEDAR ? 1 : 0;
|
||||
case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
|
||||
return family >= CHIP_CEDAR ? 4 : 0;
|
||||
@@ -384,7 +385,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:
|
||||
case PIPE_CAP_GENERATE_MIPMAP:
|
||||
case PIPE_CAP_STRING_MARKER:
|
||||
case PIPE_CAP_QUERY_BUFFER_OBJECT:
|
||||
case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
|
||||
case PIPE_CAP_TGSI_VOTE:
|
||||
case PIPE_CAP_MAX_WINDOW_RECTANGLES:
|
||||
@@ -759,7 +759,7 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws,
|
||||
R600_CONTEXT_INV_VERTEX_CACHE |
|
||||
R600_CONTEXT_INV_TEX_CACHE |
|
||||
R600_CONTEXT_INV_CONST_CACHE;
|
||||
rscreen->b.barrier_flags.compute_to_L2 = R600_CONTEXT_PS_PARTIAL_FLUSH;
|
||||
rscreen->b.barrier_flags.compute_to_L2 = R600_CONTEXT_CS_PARTIAL_FLUSH | R600_CONTEXT_FLUSH_AND_INV;
|
||||
|
||||
rscreen->global_pool = compute_memory_pool_new(rscreen);
|
||||
|
||||
|
@@ -63,6 +63,7 @@
|
||||
#define R600_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 8)
|
||||
#define R600_CONTEXT_WAIT_3D_IDLE (R600_CONTEXT_PRIVATE_FLAG << 9)
|
||||
#define R600_CONTEXT_WAIT_CP_DMA_IDLE (R600_CONTEXT_PRIVATE_FLAG << 10)
|
||||
#define R600_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 11)
|
||||
|
||||
/* the number of CS dwords for flushing and drawing */
|
||||
#define R600_MAX_FLUSH_CS_DWORDS 18
|
||||
|
@@ -134,6 +134,7 @@ unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen)
|
||||
}
|
||||
|
||||
void r600_gfx_wait_fence(struct r600_common_context *ctx,
|
||||
struct r600_resource *buf,
|
||||
uint64_t va, uint32_t ref, uint32_t mask)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = ctx->gfx.cs;
|
||||
@@ -145,6 +146,10 @@ void r600_gfx_wait_fence(struct r600_common_context *ctx,
|
||||
radeon_emit(cs, ref); /* reference value */
|
||||
radeon_emit(cs, mask); /* mask */
|
||||
radeon_emit(cs, 4); /* poll interval */
|
||||
|
||||
if (buf)
|
||||
r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_QUERY);
|
||||
}
|
||||
|
||||
void r600_draw_rectangle(struct blitter_context *blitter,
|
||||
|
@@ -679,6 +679,7 @@ void r600_gfx_write_event_eop(struct r600_common_context *ctx,
|
||||
uint32_t new_fence, unsigned query_type);
|
||||
unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
|
||||
void r600_gfx_wait_fence(struct r600_common_context *ctx,
|
||||
struct r600_resource *buf,
|
||||
uint64_t va, uint32_t ref, uint32_t mask);
|
||||
void r600_draw_rectangle(struct blitter_context *blitter,
|
||||
void *vertex_elements_cso,
|
||||
|
@@ -535,7 +535,8 @@ static bool r600_query_hw_prepare_buffer(struct r600_common_screen *rscreen,
|
||||
memset(results, 0, buffer->b.b.width0);
|
||||
|
||||
if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
|
||||
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
|
||||
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
|
||||
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
|
||||
unsigned max_rbs = rscreen->info.num_render_backends;
|
||||
unsigned enabled_rb_mask = rscreen->info.enabled_rb_mask;
|
||||
unsigned num_results;
|
||||
@@ -620,6 +621,7 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscree
|
||||
switch (query_type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
|
||||
query->result_size = 16 * rscreen->info.num_render_backends;
|
||||
query->result_size += 16; /* for the fence + alignment */
|
||||
query->num_cs_dw_begin = 6;
|
||||
@@ -676,7 +678,8 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
|
||||
unsigned type, int diff)
|
||||
{
|
||||
if (type == PIPE_QUERY_OCCLUSION_COUNTER ||
|
||||
type == PIPE_QUERY_OCCLUSION_PREDICATE) {
|
||||
type == PIPE_QUERY_OCCLUSION_PREDICATE ||
|
||||
type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
|
||||
bool old_enable = rctx->num_occlusion_queries != 0;
|
||||
bool old_perfect_enable =
|
||||
rctx->num_perfect_occlusion_queries != 0;
|
||||
@@ -685,7 +688,7 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
|
||||
rctx->num_occlusion_queries += diff;
|
||||
assert(rctx->num_occlusion_queries >= 0);
|
||||
|
||||
if (type == PIPE_QUERY_OCCLUSION_COUNTER) {
|
||||
if (type != PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE) {
|
||||
rctx->num_perfect_occlusion_queries += diff;
|
||||
assert(rctx->num_perfect_occlusion_queries >= 0);
|
||||
}
|
||||
@@ -730,6 +733,7 @@ static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
|
||||
switch (query->b.type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
|
||||
radeon_emit(cs, va);
|
||||
@@ -810,6 +814,7 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
|
||||
switch (query->b.type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
|
||||
va += 8;
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
|
||||
@@ -922,6 +927,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
|
||||
switch (query->b.type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
|
||||
op = PRED_OP(PREDICATION_OP_ZPASS);
|
||||
break;
|
||||
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
|
||||
@@ -1084,6 +1090,7 @@ static void r600_get_hw_query_params(struct r600_common_context *rctx,
|
||||
switch (rquery->b.type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE:
|
||||
params->start_offset = 0;
|
||||
params->end_offset = 8;
|
||||
params->fence_offset = max_rbs * 16;
|
||||
@@ -1176,7 +1183,8 @@ static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
|
||||
}
|
||||
break;
|
||||
}
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE: {
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: {
|
||||
for (unsigned i = 0; i < max_rbs; ++i) {
|
||||
unsigned results_base = i * 16;
|
||||
result->b = result->b ||
|
||||
@@ -1383,6 +1391,8 @@ bool r600_query_hw_get_result(struct r600_common_context *rctx,
|
||||
* 1.x = fence_offset
|
||||
* 1.y = pair_stride
|
||||
* 1.z = pair_count
|
||||
* 1.w = result_offset
|
||||
* 2.x = buffer0 offset
|
||||
*
|
||||
* BUFFER[0] = query result buffer
|
||||
* BUFFER[1] = previous summary buffer
|
||||
@@ -1404,7 +1414,7 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
|
||||
"DCL BUFFER[0]\n"
|
||||
"DCL BUFFER[1]\n"
|
||||
"DCL BUFFER[2]\n"
|
||||
"DCL CONST[0][0..1]\n"
|
||||
"DCL CONST[0][0..2]\n"
|
||||
"DCL TEMP[0..5]\n"
|
||||
"IMM[0] UINT32 {0, 31, 2147483647, 4294967295}\n"
|
||||
"IMM[1] UINT32 {1, 2, 4, 8}\n"
|
||||
@@ -1415,14 +1425,16 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
|
||||
"AND TEMP[5], CONST[0][0].wwww, IMM[2].xxxx\n"
|
||||
"UIF TEMP[5]\n"
|
||||
/* Check result availability. */
|
||||
"LOAD TEMP[1].x, BUFFER[0], CONST[0][1].xxxx\n"
|
||||
"UADD TEMP[1].x, CONST[0][1].xxxx, CONST[0][2].xxxx\n"
|
||||
"LOAD TEMP[1].x, BUFFER[0], TEMP[1].xxxx\n"
|
||||
"ISHR TEMP[0].z, TEMP[1].xxxx, IMM[0].yyyy\n"
|
||||
"MOV TEMP[1], TEMP[0].zzzz\n"
|
||||
"NOT TEMP[0].z, TEMP[0].zzzz\n"
|
||||
|
||||
/* Load result if available. */
|
||||
"UIF TEMP[1]\n"
|
||||
"LOAD TEMP[0].xy, BUFFER[0], IMM[0].xxxx\n"
|
||||
"UADD TEMP[0].x, IMM[0].xxxx, CONST[0][2].xxxx\n"
|
||||
"LOAD TEMP[0].xy, BUFFER[0], TEMP[0].xxxx\n"
|
||||
"ENDIF\n"
|
||||
"ELSE\n"
|
||||
/* Load previously accumulated result if requested. */
|
||||
@@ -1447,6 +1459,7 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
|
||||
|
||||
/* Load fence and check result availability */
|
||||
"UMAD TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy, CONST[0][1].xxxx\n"
|
||||
"UADD TEMP[5].x, TEMP[5].xxxx, CONST[0][2].xxxx\n"
|
||||
"LOAD TEMP[5].x, BUFFER[0], TEMP[5].xxxx\n"
|
||||
"ISHR TEMP[0].z, TEMP[5].xxxx, IMM[0].yyyy\n"
|
||||
"NOT TEMP[0].z, TEMP[0].zzzz\n"
|
||||
@@ -1459,6 +1472,7 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
|
||||
/* Load start and end. */
|
||||
"UMUL TEMP[5].x, TEMP[1].xxxx, CONST[0][0].yyyy\n"
|
||||
"UMAD TEMP[5].x, TEMP[1].yyyy, CONST[0][1].yyyy, TEMP[5].xxxx\n"
|
||||
"UADD TEMP[5].x, TEMP[5].xxxx, CONST[0][2].xxxx\n"
|
||||
"LOAD TEMP[2].xy, BUFFER[0], TEMP[5].xxxx\n"
|
||||
|
||||
"UADD TEMP[5].y, TEMP[5].xxxx, CONST[0][0].xxxx\n"
|
||||
@@ -1497,18 +1511,18 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
|
||||
"AND TEMP[4], CONST[0][0].wwww, IMM[1].yyyy\n"
|
||||
"UIF TEMP[4]\n"
|
||||
/* Store accumulated data for chaining. */
|
||||
"STORE BUFFER[2].xyz, IMM[0].xxxx, TEMP[0]\n"
|
||||
"STORE BUFFER[2].xyz, CONST[0][1].wwww, TEMP[0]\n"
|
||||
"ELSE\n"
|
||||
"AND TEMP[4], CONST[0][0].wwww, IMM[1].zzzz\n"
|
||||
"UIF TEMP[4]\n"
|
||||
/* Store result availability. */
|
||||
"NOT TEMP[0].z, TEMP[0]\n"
|
||||
"AND TEMP[0].z, TEMP[0].zzzz, IMM[1].xxxx\n"
|
||||
"STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].zzzz\n"
|
||||
"STORE BUFFER[2].x, CONST[0][1].wwww, TEMP[0].zzzz\n"
|
||||
|
||||
"AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
|
||||
"UIF TEMP[4]\n"
|
||||
"STORE BUFFER[2].y, IMM[0].xxxx, IMM[0].xxxx\n"
|
||||
"STORE BUFFER[2].y, CONST[0][1].wwww, IMM[0].xxxx\n"
|
||||
"ENDIF\n"
|
||||
"ELSE\n"
|
||||
/* Store result if it is available. */
|
||||
@@ -1531,7 +1545,7 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
|
||||
|
||||
"AND TEMP[4], CONST[0][0].wwww, IMM[2].zzzz\n"
|
||||
"UIF TEMP[4]\n"
|
||||
"STORE BUFFER[2].xy, IMM[0].xxxx, TEMP[0].xyxy\n"
|
||||
"STORE BUFFER[2].xy, CONST[0][1].wwww, TEMP[0].xyxy\n"
|
||||
"ELSE\n"
|
||||
/* Clamping */
|
||||
"UIF TEMP[0].yyyy\n"
|
||||
@@ -1543,7 +1557,7 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
|
||||
"UMIN TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz\n"
|
||||
"ENDIF\n"
|
||||
|
||||
"STORE BUFFER[2].x, IMM[0].xxxx, TEMP[0].xxxx\n"
|
||||
"STORE BUFFER[2].x, CONST[0][1].wwww, TEMP[0].xxxx\n"
|
||||
"ENDIF\n"
|
||||
"ENDIF\n"
|
||||
"ENDIF\n"
|
||||
@@ -1611,6 +1625,8 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
|
||||
uint32_t fence_offset;
|
||||
uint32_t pair_stride;
|
||||
uint32_t pair_count;
|
||||
uint32_t buffer_offset;
|
||||
uint32_t buffer0_offset;
|
||||
} consts;
|
||||
|
||||
if (!rctx->query_result_shader) {
|
||||
@@ -1656,7 +1672,8 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
|
||||
consts.config = 0;
|
||||
if (index < 0)
|
||||
consts.config |= 4;
|
||||
if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE)
|
||||
if (query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE ||
|
||||
query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE)
|
||||
consts.config |= 8;
|
||||
else if (query->b.type == PIPE_QUERY_SO_OVERFLOW_PREDICATE ||
|
||||
query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
|
||||
@@ -1696,18 +1713,20 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
|
||||
params.start_offset += qbuf->results_end - query->result_size;
|
||||
}
|
||||
|
||||
rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
|
||||
|
||||
ssbo[0].buffer = &qbuf->buf->b.b;
|
||||
ssbo[0].buffer_offset = params.start_offset;
|
||||
ssbo[0].buffer_size = qbuf->results_end - params.start_offset;
|
||||
|
||||
ssbo[0].buffer_offset = params.start_offset & ~0xff;
|
||||
ssbo[0].buffer_size = qbuf->results_end - ssbo[0].buffer_offset;
|
||||
consts.buffer0_offset = (params.start_offset & 0xff);
|
||||
if (!qbuf->previous) {
|
||||
ssbo[2].buffer = resource;
|
||||
ssbo[2].buffer_offset = offset;
|
||||
ssbo[2].buffer_size = 8;
|
||||
|
||||
}
|
||||
ssbo[2].buffer = resource;
|
||||
ssbo[2].buffer_offset = offset & ~0xff;
|
||||
ssbo[2].buffer_size = offset + 8;
|
||||
consts.buffer_offset = (offset & 0xff);
|
||||
} else
|
||||
consts.buffer_offset = 0;
|
||||
|
||||
rctx->b.set_constant_buffer(&rctx->b, PIPE_SHADER_COMPUTE, 0, &constant_buffer);
|
||||
|
||||
rctx->b.set_shader_buffers(&rctx->b, PIPE_SHADER_COMPUTE, 0, 3, ssbo);
|
||||
|
||||
@@ -1721,7 +1740,7 @@ static void r600_query_hw_get_result_resource(struct r600_common_context *rctx,
|
||||
va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
|
||||
va += params.fence_offset;
|
||||
|
||||
r600_gfx_wait_fence(rctx, va, 0x80000000, 0x80000000);
|
||||
r600_gfx_wait_fence(rctx, qbuf->buf, va, 0x80000000, 0x80000000);
|
||||
}
|
||||
|
||||
rctx->b.launch_grid(&rctx->b, &grid);
|
||||
|
@@ -770,7 +770,7 @@ static int single_alu_op3(struct r600_shader_ctx *ctx, int op,
|
||||
int r;
|
||||
|
||||
/* validate this for other ops */
|
||||
assert(op == ALU_OP3_MULADD_UINT24 || op == ALU_OP3_CNDE_INT);
|
||||
assert(op == ALU_OP3_MULADD_UINT24 || op == ALU_OP3_CNDE_INT || op == ALU_OP3_BFE_UINT);
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
alu.src[0].sel = src0_sel;
|
||||
@@ -9457,7 +9457,8 @@ static int tgsi_opdst(struct r600_shader_ctx *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type)
|
||||
static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type,
|
||||
struct r600_bytecode_alu_src *src)
|
||||
{
|
||||
struct r600_bytecode_alu alu;
|
||||
int r;
|
||||
@@ -9471,7 +9472,7 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type
|
||||
alu.dst.write = 1;
|
||||
alu.dst.chan = 0;
|
||||
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
|
||||
alu.src[0] = *src;
|
||||
alu.src[1].sel = V_SQ_ALU_SRC_0;
|
||||
alu.src[1].chan = 0;
|
||||
|
||||
@@ -9697,7 +9698,8 @@ static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
|
||||
}
|
||||
#endif
|
||||
|
||||
static int emit_if(struct r600_shader_ctx *ctx, int opcode)
|
||||
static int emit_if(struct r600_shader_ctx *ctx, int opcode,
|
||||
struct r600_bytecode_alu_src *src)
|
||||
{
|
||||
int alu_type = CF_OP_ALU_PUSH_BEFORE;
|
||||
|
||||
@@ -9711,7 +9713,7 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode)
|
||||
alu_type = CF_OP_ALU;
|
||||
}
|
||||
|
||||
emit_logic_pred(ctx, opcode, alu_type);
|
||||
emit_logic_pred(ctx, opcode, alu_type, src);
|
||||
|
||||
r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP);
|
||||
|
||||
@@ -9723,12 +9725,17 @@ static int emit_if(struct r600_shader_ctx *ctx, int opcode)
|
||||
|
||||
static int tgsi_if(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
return emit_if(ctx, ALU_OP2_PRED_SETNE);
|
||||
struct r600_bytecode_alu_src alu_src;
|
||||
r600_bytecode_src(&alu_src, &ctx->src[0], 0);
|
||||
|
||||
return emit_if(ctx, ALU_OP2_PRED_SETNE, &alu_src);
|
||||
}
|
||||
|
||||
static int tgsi_uif(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
return emit_if(ctx, ALU_OP2_PRED_SETNE_INT);
|
||||
struct r600_bytecode_alu_src alu_src;
|
||||
r600_bytecode_src(&alu_src, &ctx->src[0], 0);
|
||||
return emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
|
||||
}
|
||||
|
||||
static int tgsi_else(struct r600_shader_ctx *ctx)
|
||||
@@ -10077,6 +10084,684 @@ static int tgsi_clock(struct r600_shader_ctx *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int emit_u64add(struct r600_shader_ctx *ctx, int op,
|
||||
int treg,
|
||||
int src0_sel, int src0_chan,
|
||||
int src1_sel, int src1_chan)
|
||||
{
|
||||
struct r600_bytecode_alu alu;
|
||||
int r;
|
||||
int opc;
|
||||
|
||||
if (op == ALU_OP2_ADD_INT)
|
||||
opc = ALU_OP2_ADDC_UINT;
|
||||
else
|
||||
opc = ALU_OP2_SUBB_UINT;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op; ;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 0;
|
||||
alu.dst.write = 1;
|
||||
alu.src[0].sel = src0_sel;
|
||||
alu.src[0].chan = src0_chan + 0;
|
||||
alu.src[1].sel = src1_sel;
|
||||
alu.src[1].chan = src1_chan + 0;
|
||||
alu.src[1].neg = 0;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 1;
|
||||
alu.dst.write = 1;
|
||||
alu.src[0].sel = src0_sel;
|
||||
alu.src[0].chan = src0_chan + 1;
|
||||
alu.src[1].sel = src1_sel;
|
||||
alu.src[1].chan = src1_chan + 1;
|
||||
alu.src[1].neg = 0;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = opc;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 2;
|
||||
alu.dst.write = 1;
|
||||
alu.last = 1;
|
||||
alu.src[0].sel = src0_sel;
|
||||
alu.src[0].chan = src0_chan + 0;
|
||||
alu.src[1].sel = src1_sel;
|
||||
alu.src[1].chan = src1_chan + 0;
|
||||
alu.src[1].neg = 0;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 1;
|
||||
alu.dst.write = 1;
|
||||
alu.src[0].sel = treg;
|
||||
alu.src[0].chan = 1;
|
||||
alu.src[1].sel = treg;
|
||||
alu.src[1].chan = 2;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int egcm_u64add(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
struct r600_bytecode_alu alu;
|
||||
int r;
|
||||
int treg = ctx->temp_reg;
|
||||
int op = ALU_OP2_ADD_INT, opc = ALU_OP2_ADDC_UINT;
|
||||
|
||||
if (ctx->src[1].neg) {
|
||||
op = ALU_OP2_SUB_INT;
|
||||
opc = ALU_OP2_SUBB_UINT;
|
||||
}
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op; ;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 0;
|
||||
alu.dst.write = 1;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
|
||||
alu.src[1].neg = 0;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 1;
|
||||
alu.dst.write = 1;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
|
||||
alu.src[1].neg = 0;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = opc ;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 2;
|
||||
alu.dst.write = 1;
|
||||
alu.last = 1;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
|
||||
alu.src[1].neg = 0;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
|
||||
alu.src[0].sel = treg;
|
||||
alu.src[0].chan = 1;
|
||||
alu.src[1].sel = treg;
|
||||
alu.src[1].chan = 2;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
|
||||
alu.src[0].sel = treg;
|
||||
alu.src[0].chan = 0;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* result.y = mul_high a, b
|
||||
result.x = mul a,b
|
||||
result.y += a.x * b.y + a.y * b.x;
|
||||
*/
|
||||
static int egcm_u64mul(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
struct r600_bytecode_alu alu;
|
||||
int r;
|
||||
int treg = ctx->temp_reg;
|
||||
|
||||
/* temp.x = mul_lo a.x, b.x */
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_MULLO_UINT;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 0;
|
||||
alu.dst.write = 1;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
|
||||
r = emit_mul_int_op(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* temp.y = mul_hi a.x, b.x */
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_MULHI_UINT;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 1;
|
||||
alu.dst.write = 1;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
|
||||
r = emit_mul_int_op(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* temp.z = mul a.x, b.y */
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_MULLO_UINT;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 2;
|
||||
alu.dst.write = 1;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
|
||||
r = emit_mul_int_op(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* temp.w = mul a.y, b.x */
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_MULLO_UINT;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 3;
|
||||
alu.dst.write = 1;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
|
||||
r = emit_mul_int_op(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* temp.z = temp.z + temp.w */
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_ADD_INT;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 2;
|
||||
alu.dst.write = 1;
|
||||
alu.src[0].sel = treg;
|
||||
alu.src[0].chan = 2;
|
||||
alu.src[1].sel = treg;
|
||||
alu.src[1].chan = 3;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* temp.y = temp.y + temp.z */
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_ADD_INT;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 1;
|
||||
alu.dst.write = 1;
|
||||
alu.src[0].sel = treg;
|
||||
alu.src[0].chan = 1;
|
||||
alu.src[1].sel = treg;
|
||||
alu.src[1].chan = 2;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* dst.x = temp.x */
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
|
||||
alu.src[0].sel = treg;
|
||||
alu.src[0].chan = 0;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* dst.y = temp.y */
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
|
||||
alu.src[0].sel = treg;
|
||||
alu.src[0].chan = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int emit_u64sge(struct r600_shader_ctx *ctx,
|
||||
int treg,
|
||||
int src0_sel, int src0_base_chan,
|
||||
int src1_sel, int src1_base_chan)
|
||||
{
|
||||
int r;
|
||||
/* for 64-bit sge */
|
||||
/* result = (src0.y > src1.y) || ((src0.y == src1.y) && src0.x >= src1.x)) */
|
||||
r = single_alu_op2(ctx, ALU_OP2_SETGT_UINT,
|
||||
treg, 1,
|
||||
src0_sel, src0_base_chan + 1,
|
||||
src1_sel, src1_base_chan + 1);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
|
||||
treg, 0,
|
||||
src0_sel, src0_base_chan,
|
||||
src1_sel, src1_base_chan);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_SETE_INT,
|
||||
treg, 2,
|
||||
src0_sel, src0_base_chan + 1,
|
||||
src1_sel, src1_base_chan + 1);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_AND_INT,
|
||||
treg, 0,
|
||||
treg, 0,
|
||||
treg, 2);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_OR_INT,
|
||||
treg, 0,
|
||||
treg, 0,
|
||||
treg, 1);
|
||||
if (r)
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* this isn't a complete div it's just enough for qbo shader to work */
|
||||
static int egcm_u64div(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct r600_bytecode_alu alu;
|
||||
struct r600_bytecode_alu_src alu_num_hi, alu_num_lo, alu_denom_hi, alu_denom_lo, alu_src;
|
||||
int r, i;
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
|
||||
/* make sure we are dividing my a const with 0 in the high bits */
|
||||
if (ctx->src[1].sel != V_SQ_ALU_SRC_LITERAL)
|
||||
return -1;
|
||||
if (ctx->src[1].value[ctx->src[1].swizzle[1]] != 0)
|
||||
return -1;
|
||||
/* make sure we are doing one division */
|
||||
if (inst->Dst[0].Register.WriteMask != 0x3)
|
||||
return -1;
|
||||
|
||||
/* emit_if uses ctx->temp_reg so we can't */
|
||||
int treg = r600_get_temp(ctx);
|
||||
int tmp_num = r600_get_temp(ctx);
|
||||
int sub_tmp = r600_get_temp(ctx);
|
||||
|
||||
/* tmp quot are tmp_num.zw */
|
||||
r600_bytecode_src(&alu_num_lo, &ctx->src[0], 0);
|
||||
r600_bytecode_src(&alu_num_hi, &ctx->src[0], 1);
|
||||
r600_bytecode_src(&alu_denom_lo, &ctx->src[1], 0);
|
||||
r600_bytecode_src(&alu_denom_hi, &ctx->src[1], 1);
|
||||
|
||||
/* MOV tmp_num.xy, numerator */
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
tmp_num, 0,
|
||||
alu_num_lo.sel, alu_num_lo.chan,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
tmp_num, 1,
|
||||
alu_num_hi.sel, alu_num_hi.chan,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
tmp_num, 2,
|
||||
V_SQ_ALU_SRC_LITERAL, 0,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
tmp_num, 3,
|
||||
V_SQ_ALU_SRC_LITERAL, 0,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* treg 0 is log2_denom */
|
||||
/* normally this gets the MSB for the denom high value
|
||||
- however we know this will always be 0 here. */
|
||||
r = single_alu_op2(ctx,
|
||||
ALU_OP1_MOV,
|
||||
treg, 0,
|
||||
V_SQ_ALU_SRC_LITERAL, 32,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* normally check demon hi for 0, but we know it is already */
|
||||
/* t0.z = num_hi >= denom_lo */
|
||||
r = single_alu_op2(ctx,
|
||||
ALU_OP2_SETGE_UINT,
|
||||
treg, 1,
|
||||
alu_num_hi.sel, alu_num_hi.chan,
|
||||
V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu_src, 0, sizeof(alu_src));
|
||||
alu_src.sel = treg;
|
||||
alu_src.chan = 1;
|
||||
r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* for loops in here */
|
||||
/* get msb t0.x = msb(src[1].x) first */
|
||||
int msb_lo = util_last_bit(alu_denom_lo.value);
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
treg, 0,
|
||||
V_SQ_ALU_SRC_LITERAL, msb_lo,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* unroll the asm here */
|
||||
for (i = 0; i < 31; i++) {
|
||||
r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
|
||||
treg, 2,
|
||||
V_SQ_ALU_SRC_LITERAL, i,
|
||||
treg, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* we can do this on the CPU */
|
||||
uint32_t denom_lo_shl = alu_denom_lo.value << (31 - i);
|
||||
/* t0.z = tmp_num.y >= t0.z */
|
||||
r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
|
||||
treg, 1,
|
||||
tmp_num, 1,
|
||||
V_SQ_ALU_SRC_LITERAL, denom_lo_shl);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_AND_INT,
|
||||
treg, 1,
|
||||
treg, 1,
|
||||
treg, 2);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu_src, 0, sizeof(alu_src));
|
||||
alu_src.sel = treg;
|
||||
alu_src.chan = 1;
|
||||
r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_SUB_INT,
|
||||
tmp_num, 1,
|
||||
tmp_num, 1,
|
||||
V_SQ_ALU_SRC_LITERAL, denom_lo_shl);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_OR_INT,
|
||||
tmp_num, 3,
|
||||
tmp_num, 3,
|
||||
V_SQ_ALU_SRC_LITERAL, 1U << (31 - i));
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = tgsi_endif(ctx);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* log2_denom is always <= 31, so manually peel the last loop
|
||||
* iteration.
|
||||
*/
|
||||
r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
|
||||
treg, 1,
|
||||
tmp_num, 1,
|
||||
V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu_src, 0, sizeof(alu_src));
|
||||
alu_src.sel = treg;
|
||||
alu_src.chan = 1;
|
||||
r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_SUB_INT,
|
||||
tmp_num, 1,
|
||||
tmp_num, 1,
|
||||
V_SQ_ALU_SRC_LITERAL, alu_denom_lo.value);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_OR_INT,
|
||||
tmp_num, 3,
|
||||
tmp_num, 3,
|
||||
V_SQ_ALU_SRC_LITERAL, 1U);
|
||||
if (r)
|
||||
return r;
|
||||
r = tgsi_endif(ctx);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = tgsi_endif(ctx);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* onto the second loop to unroll */
|
||||
for (i = 0; i < 31; i++) {
|
||||
r = single_alu_op2(ctx, ALU_OP2_SETGE_UINT,
|
||||
treg, 1,
|
||||
V_SQ_ALU_SRC_LITERAL, (63 - (31 - i)),
|
||||
treg, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
uint64_t denom_shl = (uint64_t)alu_denom_lo.value << (31 - i);
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
treg, 2,
|
||||
V_SQ_ALU_SRC_LITERAL, (denom_shl & 0xffffffff),
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
treg, 3,
|
||||
V_SQ_ALU_SRC_LITERAL, (denom_shl >> 32),
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = emit_u64sge(ctx, sub_tmp,
|
||||
tmp_num, 0,
|
||||
treg, 2);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_AND_INT,
|
||||
treg, 1,
|
||||
treg, 1,
|
||||
sub_tmp, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu_src, 0, sizeof(alu_src));
|
||||
alu_src.sel = treg;
|
||||
alu_src.chan = 1;
|
||||
r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
||||
r = emit_u64add(ctx, ALU_OP2_SUB_INT,
|
||||
sub_tmp,
|
||||
tmp_num, 0,
|
||||
treg, 2);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
tmp_num, 0,
|
||||
sub_tmp, 0,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
tmp_num, 1,
|
||||
sub_tmp, 1,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_OR_INT,
|
||||
tmp_num, 2,
|
||||
tmp_num, 2,
|
||||
V_SQ_ALU_SRC_LITERAL, 1U << (31 - i));
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = tgsi_endif(ctx);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* log2_denom is always <= 63, so manually peel the last loop
|
||||
* iteration.
|
||||
*/
|
||||
uint64_t denom_shl = (uint64_t)alu_denom_lo.value;
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
treg, 2,
|
||||
V_SQ_ALU_SRC_LITERAL, (denom_shl & 0xffffffff),
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
treg, 3,
|
||||
V_SQ_ALU_SRC_LITERAL, (denom_shl >> 32),
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = emit_u64sge(ctx, sub_tmp,
|
||||
tmp_num, 0,
|
||||
treg, 2);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu_src, 0, sizeof(alu_src));
|
||||
alu_src.sel = sub_tmp;
|
||||
alu_src.chan = 0;
|
||||
r = emit_if(ctx, ALU_OP2_PRED_SETNE_INT, &alu_src);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = emit_u64add(ctx, ALU_OP2_SUB_INT,
|
||||
sub_tmp,
|
||||
tmp_num, 0,
|
||||
treg, 2);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = single_alu_op2(ctx, ALU_OP2_OR_INT,
|
||||
tmp_num, 2,
|
||||
tmp_num, 2,
|
||||
V_SQ_ALU_SRC_LITERAL, 1U);
|
||||
if (r)
|
||||
return r;
|
||||
r = tgsi_endif(ctx);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
|
||||
alu.src[0].sel = tmp_num;
|
||||
alu.src[0].chan = 2;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst);
|
||||
alu.src[0].sel = tmp_num;
|
||||
alu.src[0].chan = 3;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int egcm_u64sne(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
struct r600_bytecode_alu alu;
|
||||
int r;
|
||||
int treg = ctx->temp_reg;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_SETNE_INT;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 0;
|
||||
alu.dst.write = 1;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], 0);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[1], 0);
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_SETNE_INT;
|
||||
alu.dst.sel = treg;
|
||||
alu.dst.chan = 1;
|
||||
alu.dst.write = 1;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[1], 1);
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_OR_INT;
|
||||
tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst);
|
||||
alu.src[0].sel = treg;
|
||||
alu.src[0].chan = 0;
|
||||
alu.src[1].sel = treg;
|
||||
alu.src[1].chan = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
|
||||
[TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl},
|
||||
[TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2},
|
||||
@@ -10497,6 +11182,10 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
|
||||
[TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
|
||||
[TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
|
||||
[TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
|
||||
[TGSI_OPCODE_U64SNE] = { ALU_OP0_NOP, egcm_u64sne },
|
||||
[TGSI_OPCODE_U64ADD] = { ALU_OP0_NOP, egcm_u64add },
|
||||
[TGSI_OPCODE_U64MUL] = { ALU_OP0_NOP, egcm_u64mul },
|
||||
[TGSI_OPCODE_U64DIV] = { ALU_OP0_NOP, egcm_u64div },
|
||||
[TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
};
|
||||
|
||||
@@ -10719,5 +11408,9 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
|
||||
[TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int},
|
||||
[TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double},
|
||||
[TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr},
|
||||
[TGSI_OPCODE_U64SNE] = { ALU_OP0_NOP, egcm_u64sne },
|
||||
[TGSI_OPCODE_U64ADD] = { ALU_OP0_NOP, egcm_u64add },
|
||||
[TGSI_OPCODE_U64MUL] = { ALU_OP0_NOP, egcm_u64mul },
|
||||
[TGSI_OPCODE_U64DIV] = { ALU_OP0_NOP, egcm_u64div },
|
||||
[TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
};
|
||||
|
@@ -3219,6 +3219,7 @@ void r600_init_common_state_functions(struct r600_context *rctx)
|
||||
rctx->b.b.texture_barrier = r600_texture_barrier;
|
||||
rctx->b.b.set_stream_output_targets = r600_set_streamout_targets;
|
||||
rctx->b.b.set_active_query_state = r600_set_active_query_state;
|
||||
|
||||
rctx->b.b.draw_vbo = r600_draw_vbo;
|
||||
rctx->b.invalidate_buffer = r600_invalidate_buffer;
|
||||
rctx->b.need_gfx_cs_space = r600_need_gfx_cs_space;
|
||||
|
@@ -124,6 +124,7 @@
|
||||
#define SURFACE_BASE_UPDATE_COLOR_NUM(x) (((1 << x) - 1) << 1)
|
||||
#define SURFACE_BASE_UPDATE_STRMOUT(x) (0x200 << (x))
|
||||
|
||||
#define EVENT_TYPE_CS_PARTIAL_FLUSH 0x07 /* eg+ */
|
||||
#define EVENT_TYPE_PS_PARTIAL_FLUSH 0x10
|
||||
#define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
|
||||
#define EVENT_TYPE_ZPASS_DONE 0x15
|
||||
|
Reference in New Issue
Block a user