intel/mi_builder: Drop the gen_ prefix

mi_ is already a unique prefix in Mesa so the gen_ isn't really gaining
us anything except extra characters.  It's possible that MI_ may
conflict a tiny bit with GenXML but it doesn't seem to be a problem
today and we can deal with that in the future if it's ever an issue.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Anuj Phogat <anuj.phogat@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9393>
This commit is contained in:
Jason Ekstrand
2021-03-03 12:29:39 -06:00
committed by Marge Bot
parent 6d522538b6
commit 1e53e0d2c7
8 changed files with 757 additions and 783 deletions

View File

@@ -76,7 +76,7 @@ __gen_combine_address(struct iris_batch *batch, void *location,
/* CS_GPR(15) is reserved for combining conditional rendering predicates
* with GL_ARB_indirect_parameters draw number predicates.
*/
#define GEN_MI_BUILDER_NUM_ALLOC_GPRS 15
#define MI_BUILDER_NUM_ALLOC_GPRS 15
#include "common/mi_builder.h"
#define _iris_pack_command(batch, cmd, dst, name) \

View File

@@ -95,7 +95,7 @@ struct iris_query_so_overflow {
} stream[4];
};
static struct gen_mi_value
static struct mi_value
query_mem64(struct iris_query *q, uint32_t offset)
{
struct iris_address addr = {
@@ -103,7 +103,7 @@ query_mem64(struct iris_query *q, uint32_t offset)
.offset = q->query_state_ref.offset + offset,
.access = IRIS_DOMAIN_OTHER_WRITE
};
return gen_mi_mem64(addr);
return mi_mem64(addr);
}
/**
@@ -334,33 +334,33 @@ calculate_result_on_cpu(const struct gen_device_info *devinfo,
*
* (num_prims[1] - num_prims[0]) - (storage_needed[1] - storage_needed[0])
*/
static struct gen_mi_value
calc_overflow_for_stream(struct gen_mi_builder *b,
static struct mi_value
calc_overflow_for_stream(struct mi_builder *b,
struct iris_query *q,
int idx)
{
#define C(counter, i) query_mem64(q, \
offsetof(struct iris_query_so_overflow, stream[idx].counter[i]))
return gen_mi_isub(b, gen_mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
gen_mi_isub(b, C(prim_storage_needed, 1),
C(prim_storage_needed, 0)));
return mi_isub(b, mi_isub(b, C(num_prims, 1), C(num_prims, 0)),
mi_isub(b, C(prim_storage_needed, 1),
C(prim_storage_needed, 0)));
#undef C
}
/**
* Calculate whether any stream has overflowed.
*/
static struct gen_mi_value
calc_overflow_any_stream(struct gen_mi_builder *b, struct iris_query *q)
static struct mi_value
calc_overflow_any_stream(struct mi_builder *b, struct iris_query *q)
{
struct gen_mi_value stream_result[MAX_VERTEX_STREAMS];
struct mi_value stream_result[MAX_VERTEX_STREAMS];
for (int i = 0; i < MAX_VERTEX_STREAMS; i++)
stream_result[i] = calc_overflow_for_stream(b, q, i);
struct gen_mi_value result = stream_result[0];
struct mi_value result = stream_result[0];
for (int i = 1; i < MAX_VERTEX_STREAMS; i++)
result = gen_mi_ior(b, result, stream_result[i]);
result = mi_ior(b, result, stream_result[i]);
return result;
}
@@ -382,15 +382,15 @@ query_is_boolean(enum pipe_query_type type)
/**
* Calculate the result using MI_MATH.
*/
static struct gen_mi_value
static struct mi_value
calculate_result_on_gpu(const struct gen_device_info *devinfo,
struct gen_mi_builder *b,
struct mi_builder *b,
struct iris_query *q)
{
struct gen_mi_value result;
struct gen_mi_value start_val =
struct mi_value result;
struct mi_value start_val =
query_mem64(q, offsetof(struct iris_query_snapshots, start));
struct gen_mi_value end_val =
struct mi_value end_val =
query_mem64(q, offsetof(struct iris_query_snapshots, end));
switch (q->type) {
@@ -406,18 +406,18 @@ calculate_result_on_gpu(const struct gen_device_info *devinfo,
* launch an actual shader to calculate this with full precision.
*/
uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
result = gen_mi_iand(b, gen_mi_imm((1ull << 36) - 1),
gen_mi_imul_imm(b, start_val, scale));
result = mi_iand(b, mi_imm((1ull << 36) - 1),
mi_imul_imm(b, start_val, scale));
break;
}
case PIPE_QUERY_TIME_ELAPSED: {
/* TODO: This discards fractional bits (see above). */
uint32_t scale = 1000000000ull / devinfo->timestamp_frequency;
result = gen_mi_imul_imm(b, gen_mi_isub(b, end_val, start_val), scale);
result = mi_imul_imm(b, mi_isub(b, end_val, start_val), scale);
break;
}
default:
result = gen_mi_isub(b, end_val, start_val);
result = mi_isub(b, end_val, start_val);
break;
}
@@ -425,10 +425,10 @@ calculate_result_on_gpu(const struct gen_device_info *devinfo,
if (GEN_GEN == 8 &&
q->type == PIPE_QUERY_PIPELINE_STATISTICS_SINGLE &&
q->index == PIPE_STAT_QUERY_PS_INVOCATIONS)
result = gen_mi_ushr32_imm(b, result, 2);
result = mi_ushr32_imm(b, result, 2);
if (query_is_boolean(q->type))
result = gen_mi_iand(b, gen_mi_nz(b, result), gen_mi_imm(1));
result = mi_iand(b, mi_nz(b, result), mi_imm(1));
return result;
}
@@ -709,23 +709,23 @@ iris_get_query_result_resource(struct pipe_context *ctx,
bool predicated = !wait && !q->stalled;
struct gen_mi_builder b;
gen_mi_builder_init(&b, batch);
struct mi_builder b;
mi_builder_init(&b, batch);
iris_batch_sync_region_start(batch);
struct gen_mi_value result = calculate_result_on_gpu(devinfo, &b, q);
struct gen_mi_value dst =
struct mi_value result = calculate_result_on_gpu(devinfo, &b, q);
struct mi_value dst =
result_type <= PIPE_QUERY_TYPE_U32 ?
gen_mi_mem32(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE)) :
gen_mi_mem64(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE));
mi_mem32(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE)) :
mi_mem64(rw_bo(dst_bo, offset, IRIS_DOMAIN_OTHER_WRITE));
if (predicated) {
gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
gen_mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
gen_mi_store_if(&b, dst, result);
mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
mi_mem64(ro_bo(query_bo, snapshots_landed_offset)));
mi_store_if(&b, dst, result);
} else {
gen_mi_store(&b, dst, result);
mi_store(&b, dst, result);
}
iris_batch_sync_region_end(batch);
@@ -780,10 +780,10 @@ set_predicate_for_result(struct iris_context *ice,
PIPE_CONTROL_FLUSH_ENABLE);
q->stalled = true;
struct gen_mi_builder b;
gen_mi_builder_init(&b, batch);
struct mi_builder b;
mi_builder_init(&b, batch);
struct gen_mi_value result;
struct mi_value result;
switch (q->type) {
case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
@@ -794,17 +794,17 @@ set_predicate_for_result(struct iris_context *ice,
break;
default: {
/* PIPE_QUERY_OCCLUSION_* */
struct gen_mi_value start =
struct mi_value start =
query_mem64(q, offsetof(struct iris_query_snapshots, start));
struct gen_mi_value end =
struct mi_value end =
query_mem64(q, offsetof(struct iris_query_snapshots, end));
result = gen_mi_isub(&b, end, start);
result = mi_isub(&b, end, start);
break;
}
}
result = inverted ? gen_mi_z(&b, result) : gen_mi_nz(&b, result);
result = gen_mi_iand(&b, result, gen_mi_imm(1));
result = inverted ? mi_z(&b, result) : mi_nz(&b, result);
result = mi_iand(&b, result, mi_imm(1));
/* We immediately set the predicate on the render batch, as all the
* counters come from 3D operations. However, we may need to predicate
@@ -812,10 +812,10 @@ set_predicate_for_result(struct iris_context *ice,
* a different MI_PREDICATE_RESULT register. So, we save the result to
* memory and reload it in iris_launch_grid.
*/
gen_mi_value_ref(&b, result);
gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT), result);
gen_mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
predicate_result)), result);
mi_value_ref(&b, result);
mi_store(&b, mi_reg32(MI_PREDICATE_RESULT), result);
mi_store(&b, query_mem64(q, offsetof(struct iris_query_snapshots,
predicate_result)), result);
ice->state.compute_predicate = bo;
iris_batch_sync_region_end(batch);

View File

@@ -6643,19 +6643,17 @@ iris_upload_render_state(struct iris_context *ice,
PIPE_CONTROL_FLUSH_ENABLE);
if (ice->state.predicate == IRIS_PREDICATE_STATE_USE_BIT) {
struct gen_mi_builder b;
gen_mi_builder_init(&b, batch);
struct mi_builder b;
mi_builder_init(&b, batch);
/* comparison = draw id < draw count */
struct gen_mi_value comparison =
gen_mi_ult(&b, gen_mi_imm(draw->drawid),
gen_mi_mem32(ro_bo(draw_count_bo,
draw_count_offset)));
struct mi_value comparison =
mi_ult(&b, mi_imm(draw->drawid),
mi_mem32(ro_bo(draw_count_bo, draw_count_offset)));
/* predicate = comparison & conditional rendering predicate */
gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_RESULT),
gen_mi_iand(&b, comparison,
gen_mi_reg32(CS_GPR(15))));
mi_store(&b, mi_reg32(MI_PREDICATE_RESULT),
mi_iand(&b, comparison, mi_reg32(CS_GPR(15))));
} else {
uint32_t mi_predicate;
@@ -6731,16 +6729,16 @@ iris_upload_render_state(struct iris_context *ice,
"draw count from stream output stall",
PIPE_CONTROL_CS_STALL);
struct gen_mi_builder b;
gen_mi_builder_init(&b, batch);
struct mi_builder b;
mi_builder_init(&b, batch);
struct iris_address addr =
ro_bo(iris_resource_bo(so->offset.res), so->offset.offset);
struct gen_mi_value offset =
gen_mi_iadd_imm(&b, gen_mi_mem32(addr), -so->base.buffer_offset);
struct mi_value offset =
mi_iadd_imm(&b, mi_mem32(addr), -so->base.buffer_offset);
gen_mi_store(&b, gen_mi_reg32(_3DPRIM_VERTEX_COUNT),
gen_mi_udiv32_imm(&b, offset, so->stride));
mi_store(&b, mi_reg32(_3DPRIM_VERTEX_COUNT),
mi_udiv32_imm(&b, offset, so->stride));
_iris_emit_lri(batch, _3DPRIM_START_VERTEX, 0);
_iris_emit_lri(batch, _3DPRIM_BASE_VERTEX, 0);

File diff suppressed because it is too large Load Diff

View File

@@ -32,7 +32,7 @@
#include "genxml/gen_macros.h"
#include "util/macros.h"
class gen_mi_builder_test;
class mi_builder_test;
struct address {
uint32_t gem_handle;
@@ -40,11 +40,11 @@ struct address {
};
#define __gen_address_type struct address
#define __gen_user_data ::gen_mi_builder_test
#define __gen_user_data ::mi_builder_test
uint64_t __gen_combine_address(gen_mi_builder_test *test, void *location,
uint64_t __gen_combine_address(mi_builder_test *test, void *location,
struct address addr, uint32_t delta);
void * __gen_get_batch_dwords(gen_mi_builder_test *test, unsigned num_dwords);
void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
struct address
__gen_address_offset(address addr, uint64_t offset)
@@ -58,7 +58,7 @@ __gen_address_offset(address addr, uint64_t offset)
#else
#define RSVD_TEMP_REG 0x2430 /* GEN7_3DPRIM_START_VERTEX */
#endif
#define GEN_MI_BUILDER_NUM_ALLOC_GPRS 15
#define MI_BUILDER_NUM_ALLOC_GPRS 15
#define INPUT_DATA_OFFSET 0
#define OUTPUT_DATA_OFFSET 2048
@@ -78,10 +78,10 @@ __gen_address_offset(address addr, uint64_t offset)
#include <vector>
class gen_mi_builder_test : public ::testing::Test {
class mi_builder_test : public ::testing::Test {
public:
gen_mi_builder_test();
~gen_mi_builder_test();
mi_builder_test();
~mi_builder_test();
void SetUp();
@@ -104,24 +104,24 @@ public:
return addr;
}
inline gen_mi_value in_mem64(uint32_t offset)
inline mi_value in_mem64(uint32_t offset)
{
return gen_mi_mem64(in_addr(offset));
return mi_mem64(in_addr(offset));
}
inline gen_mi_value in_mem32(uint32_t offset)
inline mi_value in_mem32(uint32_t offset)
{
return gen_mi_mem32(in_addr(offset));
return mi_mem32(in_addr(offset));
}
inline gen_mi_value out_mem64(uint32_t offset)
inline mi_value out_mem64(uint32_t offset)
{
return gen_mi_mem64(out_addr(offset));
return mi_mem64(out_addr(offset));
}
inline gen_mi_value out_mem32(uint32_t offset)
inline mi_value out_mem32(uint32_t offset)
{
return gen_mi_mem32(out_addr(offset));
return mi_mem32(out_addr(offset));
}
int fd;
@@ -140,14 +140,14 @@ public:
char *output;
uint64_t canary;
gen_mi_builder b;
mi_builder b;
};
gen_mi_builder_test::gen_mi_builder_test() :
mi_builder_test::mi_builder_test() :
fd(-1)
{ }
gen_mi_builder_test::~gen_mi_builder_test()
mi_builder_test::~mi_builder_test()
{
close(fd);
}
@@ -157,7 +157,7 @@ gen_mi_builder_test::~gen_mi_builder_test()
#define DATA_BO_SIZE 4096
void
gen_mi_builder_test::SetUp()
mi_builder_test::SetUp()
{
drmDevicePtr devices[8];
int max_devices = drmGetDevices2(0, devices, 8);
@@ -255,11 +255,11 @@ gen_mi_builder_test::SetUp()
memset(data_map, 139, DATA_BO_SIZE);
memset(&canary, 139, sizeof(canary));
gen_mi_builder_init(&b, this);
mi_builder_init(&b, this);
}
void *
gen_mi_builder_test::emit_dwords(int num_dwords)
mi_builder_test::emit_dwords(int num_dwords)
{
void *ptr = (void *)((char *)batch_map + batch_offset);
batch_offset += num_dwords * 4;
@@ -268,13 +268,13 @@ gen_mi_builder_test::emit_dwords(int num_dwords)
}
void
gen_mi_builder_test::submit_batch()
mi_builder_test::submit_batch()
{
gen_mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
// Round batch up to an even number of dwords.
if (batch_offset & 4)
gen_mi_builder_emit(&b, GENX(MI_NOOP), noop);
mi_builder_emit(&b, GENX(MI_NOOP), noop);
drm_i915_gem_exec_object2 objects[2];
memset(objects, 0, sizeof(objects));
@@ -314,7 +314,7 @@ gen_mi_builder_test::submit_batch()
}
uint64_t
__gen_combine_address(gen_mi_builder_test *test, void *location,
__gen_combine_address(mi_builder_test *test, void *location,
address addr, uint32_t delta)
{
drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
@@ -328,7 +328,7 @@ __gen_combine_address(gen_mi_builder_test *test, void *location,
}
void *
__gen_get_batch_dwords(gen_mi_builder_test *test, unsigned num_dwords)
__gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
{
return test->emit_dwords(num_dwords);
}
@@ -336,12 +336,12 @@ __gen_get_batch_dwords(gen_mi_builder_test *test, unsigned num_dwords)
#include "genxml/genX_pack.h"
#include "mi_builder.h"
TEST_F(gen_mi_builder_test, imm_mem)
TEST_F(mi_builder_test, imm_mem)
{
const uint64_t value = 0x0123456789abcdef;
gen_mi_store(&b, out_mem64(0), gen_mi_imm(value));
gen_mi_store(&b, out_mem32(8), gen_mi_imm(value));
mi_store(&b, out_mem64(0), mi_imm(value));
mi_store(&b, out_mem32(8), mi_imm(value));
submit_batch();
@@ -355,15 +355,15 @@ TEST_F(gen_mi_builder_test, imm_mem)
/* mem -> mem copies are only supported on HSW+ */
#if GEN_GEN >= 8 || GEN_IS_HASWELL
TEST_F(gen_mi_builder_test, mem_mem)
TEST_F(mi_builder_test, mem_mem)
{
const uint64_t value = 0x0123456789abcdef;
*(uint64_t *)input = value;
gen_mi_store(&b, out_mem64(0), in_mem64(0));
gen_mi_store(&b, out_mem32(8), in_mem64(0));
gen_mi_store(&b, out_mem32(16), in_mem32(0));
gen_mi_store(&b, out_mem64(24), in_mem32(0));
mi_store(&b, out_mem64(0), in_mem64(0));
mi_store(&b, out_mem32(8), in_mem64(0));
mi_store(&b, out_mem32(16), in_mem32(0));
mi_store(&b, out_mem64(24), in_mem32(0));
submit_batch();
@@ -383,17 +383,17 @@ TEST_F(gen_mi_builder_test, mem_mem)
}
#endif
TEST_F(gen_mi_builder_test, imm_reg)
TEST_F(mi_builder_test, imm_reg)
{
const uint64_t value = 0x0123456789abcdef;
gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary));
gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(value));
gen_mi_store(&b, out_mem64(0), gen_mi_reg64(RSVD_TEMP_REG));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary));
gen_mi_store(&b, gen_mi_reg32(RSVD_TEMP_REG), gen_mi_imm(value));
gen_mi_store(&b, out_mem64(8), gen_mi_reg64(RSVD_TEMP_REG));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
submit_batch();
@@ -405,26 +405,26 @@ TEST_F(gen_mi_builder_test, imm_reg)
EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
}
TEST_F(gen_mi_builder_test, mem_reg)
TEST_F(mi_builder_test, mem_reg)
{
const uint64_t value = 0x0123456789abcdef;
*(uint64_t *)input = value;
gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary));
gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), in_mem64(0));
gen_mi_store(&b, out_mem64(0), gen_mi_reg64(RSVD_TEMP_REG));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary));
gen_mi_store(&b, gen_mi_reg32(RSVD_TEMP_REG), in_mem64(0));
gen_mi_store(&b, out_mem64(8), gen_mi_reg64(RSVD_TEMP_REG));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary));
gen_mi_store(&b, gen_mi_reg32(RSVD_TEMP_REG), in_mem32(0));
gen_mi_store(&b, out_mem64(16), gen_mi_reg64(RSVD_TEMP_REG));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), gen_mi_imm(canary));
gen_mi_store(&b, gen_mi_reg64(RSVD_TEMP_REG), in_mem32(0));
gen_mi_store(&b, out_mem64(24), gen_mi_reg64(RSVD_TEMP_REG));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
submit_batch();
@@ -443,11 +443,11 @@ TEST_F(gen_mi_builder_test, mem_reg)
EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
}
TEST_F(gen_mi_builder_test, memset)
TEST_F(mi_builder_test, memset)
{
const unsigned memset_size = 256;
gen_mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
submit_batch();
@@ -456,7 +456,7 @@ TEST_F(gen_mi_builder_test, memset)
EXPECT_EQ(out_u32[i], 0xdeadbeef);
}
TEST_F(gen_mi_builder_test, memcpy)
TEST_F(mi_builder_test, memcpy)
{
const unsigned memcpy_size = 256;
@@ -464,7 +464,7 @@ TEST_F(gen_mi_builder_test, memcpy)
for (unsigned i = 0; i < memcpy_size; i++)
in_u8[i] = i;
gen_mi_memcpy(&b, out_addr(0), in_addr(0), 256);
mi_memcpy(&b, out_addr(0), in_addr(0), 256);
submit_batch();
@@ -476,7 +476,7 @@ TEST_F(gen_mi_builder_test, memcpy)
/* Start of MI_MATH section */
#if GEN_GEN >= 8 || GEN_IS_HASWELL
#define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, gen_mi_value_to_u64(imm))
#define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
/* Test adding of immediates of all kinds including
*
@@ -484,40 +484,40 @@ TEST_F(gen_mi_builder_test, memcpy)
* - All ones
* - inverted constants
*/
TEST_F(gen_mi_builder_test, add_imm)
TEST_F(mi_builder_test, add_imm)
{
const uint64_t value = 0x0123456789abcdef;
const uint64_t add = 0xdeadbeefac0ffee2;
memcpy(input, &value, sizeof(value));
gen_mi_store(&b, out_mem64(0),
gen_mi_iadd(&b, in_mem64(0), gen_mi_imm(0)));
gen_mi_store(&b, out_mem64(8),
gen_mi_iadd(&b, in_mem64(0), gen_mi_imm(-1)));
gen_mi_store(&b, out_mem64(16),
gen_mi_iadd(&b, in_mem64(0), gen_mi_inot(&b, gen_mi_imm(0))));
gen_mi_store(&b, out_mem64(24),
gen_mi_iadd(&b, in_mem64(0), gen_mi_inot(&b, gen_mi_imm(-1))));
gen_mi_store(&b, out_mem64(32),
gen_mi_iadd(&b, in_mem64(0), gen_mi_imm(add)));
gen_mi_store(&b, out_mem64(40),
gen_mi_iadd(&b, in_mem64(0), gen_mi_inot(&b, gen_mi_imm(add))));
gen_mi_store(&b, out_mem64(48),
gen_mi_iadd(&b, gen_mi_imm(0), in_mem64(0)));
gen_mi_store(&b, out_mem64(56),
gen_mi_iadd(&b, gen_mi_imm(-1), in_mem64(0)));
gen_mi_store(&b, out_mem64(64),
gen_mi_iadd(&b, gen_mi_inot(&b, gen_mi_imm(0)), in_mem64(0)));
gen_mi_store(&b, out_mem64(72),
gen_mi_iadd(&b, gen_mi_inot(&b, gen_mi_imm(-1)), in_mem64(0)));
gen_mi_store(&b, out_mem64(80),
gen_mi_iadd(&b, gen_mi_imm(add), in_mem64(0)));
gen_mi_store(&b, out_mem64(88),
gen_mi_iadd(&b, gen_mi_inot(&b, gen_mi_imm(add)), in_mem64(0)));
mi_store(&b, out_mem64(0),
mi_iadd(&b, in_mem64(0), mi_imm(0)));
mi_store(&b, out_mem64(8),
mi_iadd(&b, in_mem64(0), mi_imm(-1)));
mi_store(&b, out_mem64(16),
mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
mi_store(&b, out_mem64(24),
mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
mi_store(&b, out_mem64(32),
mi_iadd(&b, in_mem64(0), mi_imm(add)));
mi_store(&b, out_mem64(40),
mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
mi_store(&b, out_mem64(48),
mi_iadd(&b, mi_imm(0), in_mem64(0)));
mi_store(&b, out_mem64(56),
mi_iadd(&b, mi_imm(-1), in_mem64(0)));
mi_store(&b, out_mem64(64),
mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
mi_store(&b, out_mem64(72),
mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
mi_store(&b, out_mem64(80),
mi_iadd(&b, mi_imm(add), in_mem64(0)));
mi_store(&b, out_mem64(88),
mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
// And som add_imm just for good measure
gen_mi_store(&b, out_mem64(96), gen_mi_iadd_imm(&b, in_mem64(0), 0));
gen_mi_store(&b, out_mem64(104), gen_mi_iadd_imm(&b, in_mem64(0), add));
mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
submit_batch();
@@ -537,7 +537,7 @@ TEST_F(gen_mi_builder_test, add_imm)
EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
}
TEST_F(gen_mi_builder_test, ilt_uge)
TEST_F(mi_builder_test, ilt_uge)
{
uint64_t values[8] = {
0x0123456789abcdef,
@@ -553,10 +553,10 @@ TEST_F(gen_mi_builder_test, ilt_uge)
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
gen_mi_store(&b, out_mem64(i * 128 + j * 16 + 0),
gen_mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
gen_mi_store(&b, out_mem64(i * 128 + j * 16 + 8),
gen_mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
mi_store(&b, out_mem64(i * 128 + j * 16 + 0),
mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
mi_store(&b, out_mem64(i * 128 + j * 16 + 8),
mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
}
}
@@ -565,15 +565,15 @@ TEST_F(gen_mi_builder_test, ilt_uge)
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
uint64_t *out_u64 = (uint64_t *)(output + i * 128 + j * 16);
EXPECT_EQ_IMM(out_u64[0], gen_mi_ult(&b, gen_mi_imm(values[i]),
gen_mi_imm(values[j])));
EXPECT_EQ_IMM(out_u64[1], gen_mi_uge(&b, gen_mi_imm(values[i]),
gen_mi_imm(values[j])));
EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
mi_imm(values[j])));
EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
mi_imm(values[j])));
}
}
}
TEST_F(gen_mi_builder_test, z_nz)
TEST_F(mi_builder_test, z_nz)
{
uint64_t values[8] = {
0,
@@ -585,20 +585,20 @@ TEST_F(gen_mi_builder_test, z_nz)
memcpy(input, values, sizeof(values));
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
gen_mi_store(&b, out_mem64(i * 16 + 0), gen_mi_nz(&b, in_mem64(i * 8)));
gen_mi_store(&b, out_mem64(i * 16 + 8), gen_mi_z(&b, in_mem64(i * 8)));
mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
}
submit_batch();
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
uint64_t *out_u64 = (uint64_t *)(output + i * 16);
EXPECT_EQ_IMM(out_u64[0], gen_mi_nz(&b, gen_mi_imm(values[i])));
EXPECT_EQ_IMM(out_u64[1], gen_mi_z(&b, gen_mi_imm(values[i])));
EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
}
}
TEST_F(gen_mi_builder_test, iand)
TEST_F(mi_builder_test, iand)
{
const uint64_t values[2] = {
0x0123456789abcdef,
@@ -606,15 +606,15 @@ TEST_F(gen_mi_builder_test, iand)
};
memcpy(input, values, sizeof(values));
gen_mi_store(&b, out_mem64(0), gen_mi_iand(&b, in_mem64(0), in_mem64(8)));
mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
submit_batch();
EXPECT_EQ_IMM(*(uint64_t *)output, gen_mi_iand(&b, gen_mi_imm(values[0]),
gen_mi_imm(values[1])));
EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
mi_imm(values[1])));
}
TEST_F(gen_mi_builder_test, imul_imm)
TEST_F(mi_builder_test, imul_imm)
{
uint64_t lhs[2] = {
0x0123456789abcdef,
@@ -636,8 +636,8 @@ TEST_F(gen_mi_builder_test, imul_imm)
for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
gen_mi_store(&b, out_mem64(i * 160 + j * 8),
gen_mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
mi_store(&b, out_mem64(i * 160 + j * 8),
mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
}
}
@@ -646,12 +646,12 @@ TEST_F(gen_mi_builder_test, imul_imm)
for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
gen_mi_imul_imm(&b, gen_mi_imm(lhs[i]), rhs[j]));
mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
}
}
}
TEST_F(gen_mi_builder_test, ishl_imm)
TEST_F(mi_builder_test, ishl_imm)
{
const uint64_t value = 0x0123456789abcdef;
memcpy(input, &value, sizeof(value));
@@ -659,17 +659,17 @@ TEST_F(gen_mi_builder_test, ishl_imm)
const unsigned max_shift = 64;
for (unsigned i = 0; i <= max_shift; i++)
gen_mi_store(&b, out_mem64(i * 8), gen_mi_ishl_imm(&b, in_mem64(0), i));
mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
submit_batch();
for (unsigned i = 0; i <= max_shift; i++) {
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
gen_mi_ishl_imm(&b, gen_mi_imm(value), i));
mi_ishl_imm(&b, mi_imm(value), i));
}
}
TEST_F(gen_mi_builder_test, ushr32_imm)
TEST_F(mi_builder_test, ushr32_imm)
{
const uint64_t value = 0x0123456789abcdef;
memcpy(input, &value, sizeof(value));
@@ -677,17 +677,17 @@ TEST_F(gen_mi_builder_test, ushr32_imm)
const unsigned max_shift = 64;
for (unsigned i = 0; i <= max_shift; i++)
gen_mi_store(&b, out_mem64(i * 8), gen_mi_ushr32_imm(&b, in_mem64(0), i));
mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
submit_batch();
for (unsigned i = 0; i <= max_shift; i++) {
EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
gen_mi_ushr32_imm(&b, gen_mi_imm(value), i));
mi_ushr32_imm(&b, mi_imm(value), i));
}
}
TEST_F(gen_mi_builder_test, udiv32_imm)
TEST_F(mi_builder_test, udiv32_imm)
{
/* Some random 32-bit unsigned integers. The first four have been
* hand-chosen just to ensure some good low integers; the rest were
@@ -704,8 +704,8 @@ TEST_F(gen_mi_builder_test, udiv32_imm)
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
gen_mi_store(&b, out_mem32(i * 80 + j * 4),
gen_mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
mi_store(&b, out_mem32(i * 80 + j * 4),
mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
}
}
@@ -714,12 +714,12 @@ TEST_F(gen_mi_builder_test, udiv32_imm)
for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
gen_mi_udiv32_imm(&b, gen_mi_imm(values[i]), values[j]));
mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
}
}
}
TEST_F(gen_mi_builder_test, store_if)
TEST_F(mi_builder_test, store_if)
{
uint64_t u64 = 0xb453b411deadc0deull;
uint32_t u32 = 0x1337d00d;
@@ -731,8 +731,8 @@ TEST_F(gen_mi_builder_test, store_if)
mip.CompareOperation = COMPARE_TRUE;
}
gen_mi_store_if(&b, out_mem64(0), gen_mi_imm(u64));
gen_mi_store_if(&b, out_mem32(8), gen_mi_imm(u32));
mi_store_if(&b, out_mem64(0), mi_imm(u64));
mi_store_if(&b, out_mem32(8), mi_imm(u32));
/* Set predicate to false, write garbage that shouldn't land */
emit_cmd(GENX(MI_PREDICATE), mip) {
@@ -741,8 +741,8 @@ TEST_F(gen_mi_builder_test, store_if)
mip.CompareOperation = COMPARE_FALSE;
}
gen_mi_store_if(&b, out_mem64(0), gen_mi_imm(0xd0d0d0d0d0d0d0d0ull));
gen_mi_store_if(&b, out_mem32(8), gen_mi_imm(0xc000c000));
mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
submit_batch();

View File

@@ -3021,7 +3021,7 @@ struct anv_cmd_buffer {
* instructions storing performance counters. The array length is
* anv_physical_device::n_perf_query_commands.
*/
struct gen_mi_address_token *self_mod_locations;
struct mi_address_token *self_mod_locations;
/**
* Index tracking which of the self_mod_locations items have already been

View File

@@ -39,7 +39,7 @@
* - GPR 14 for secondary command buffer returns
* - GPR 15 for conditional rendering
*/
#define GEN_MI_BUILDER_NUM_ALLOC_GPRS 14
#define MI_BUILDER_NUM_ALLOC_GPRS 14
#define __gen_get_batch_dwords anv_batch_emit_dwords
#define __gen_address_offset anv_address_add
#include "common/mi_builder.h"
@@ -479,8 +479,8 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_END_OF_PIPE_SYNC_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
for (uint32_t a = 0; a < layer_count; a++) {
const uint32_t layer = base_layer + a;
@@ -543,8 +543,7 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
if (isl_aux_usage_has_ccs(image->planes[plane].aux_usage))
new_aux_entry |= GEN_AUX_MAP_ENTRY_VALID_BIT;
gen_mi_store(&b, gen_mi_mem64(aux_entry_address),
gen_mi_imm(new_aux_entry));
mi_store(&b, mi_mem64(aux_entry_address), mi_imm(new_aux_entry));
}
}
@@ -778,12 +777,12 @@ anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
enum isl_aux_op resolve_op,
enum anv_fast_clear_type fast_clear_supported)
{
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
const struct gen_mi_value fast_clear_type =
gen_mi_mem32(anv_image_get_fast_clear_type_addr(cmd_buffer->device,
image, aspect));
const struct mi_value fast_clear_type =
mi_mem32(anv_image_get_fast_clear_type_addr(cmd_buffer->device,
image, aspect));
if (resolve_op == ISL_AUX_OP_FULL_RESOLVE) {
/* In this case, we're doing a full resolve which means we want the
@@ -794,13 +793,12 @@ anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
* if the first slice has been fast-cleared, it is also marked as
* compressed. See also set_image_fast_clear_state.
*/
const struct gen_mi_value compression_state =
gen_mi_mem32(anv_image_get_compression_state_addr(cmd_buffer->device,
image, aspect,
level, array_layer));
gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_SRC0),
compression_state);
gen_mi_store(&b, compression_state, gen_mi_imm(0));
const struct mi_value compression_state =
mi_mem32(anv_image_get_compression_state_addr(cmd_buffer->device,
image, aspect,
level, array_layer));
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), compression_state);
mi_store(&b, compression_state, mi_imm(0));
if (level == 0 && array_layer == 0) {
/* If the predicate is true, we want to write 0 to the fast clear type
@@ -808,10 +806,10 @@ anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
*
* clear_type = clear_type & ~predicate;
*/
struct gen_mi_value new_fast_clear_type =
gen_mi_iand(&b, fast_clear_type,
gen_mi_inot(&b, gen_mi_reg64(MI_PREDICATE_SRC0)));
gen_mi_store(&b, fast_clear_type, new_fast_clear_type);
struct mi_value new_fast_clear_type =
mi_iand(&b, fast_clear_type,
mi_inot(&b, mi_reg64(MI_PREDICATE_SRC0)));
mi_store(&b, fast_clear_type, new_fast_clear_type);
}
} else if (level == 0 && array_layer == 0) {
/* In this case, we are doing a partial resolve to get rid of fast-clear
@@ -822,19 +820,18 @@ anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
assert(fast_clear_supported < ANV_FAST_CLEAR_ANY);
/* We need to compute (fast_clear_supported < image->fast_clear) */
struct gen_mi_value pred =
gen_mi_ult(&b, gen_mi_imm(fast_clear_supported), fast_clear_type);
gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_SRC0),
gen_mi_value_ref(&b, pred));
struct mi_value pred =
mi_ult(&b, mi_imm(fast_clear_supported), fast_clear_type);
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), mi_value_ref(&b, pred));
/* If the predicate is true, we want to write 0 to the fast clear type
* and, if it's false, leave it alone. We can do this by writing
*
* clear_type = clear_type & ~predicate;
*/
struct gen_mi_value new_fast_clear_type =
gen_mi_iand(&b, fast_clear_type, gen_mi_inot(&b, pred));
gen_mi_store(&b, fast_clear_type, new_fast_clear_type);
struct mi_value new_fast_clear_type =
mi_iand(&b, fast_clear_type, mi_inot(&b, pred));
mi_store(&b, fast_clear_type, new_fast_clear_type);
} else {
/* In this case, we're trying to do a partial resolve on a slice that
* doesn't have clear color. There's nothing to do.
@@ -844,7 +841,7 @@ anv_cmd_compute_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
}
/* Set src1 to 0 and use a != condition */
gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_SRC1), gen_mi_imm(0));
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOADINV;
@@ -863,11 +860,11 @@ anv_cmd_simple_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
enum isl_aux_op resolve_op,
enum anv_fast_clear_type fast_clear_supported)
{
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
struct gen_mi_value fast_clear_type_mem =
gen_mi_mem32(anv_image_get_fast_clear_type_addr(cmd_buffer->device,
struct mi_value fast_clear_type_mem =
mi_mem32(anv_image_get_fast_clear_type_addr(cmd_buffer->device,
image, aspect));
/* This only works for partial resolves and only when the clear color is
@@ -885,9 +882,9 @@ anv_cmd_simple_resolve_predicate(struct anv_cmd_buffer *cmd_buffer,
* can't sample from CCS surfaces. It's enough to just load the fast clear
* state into the predicate register.
*/
gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_SRC0), fast_clear_type_mem);
gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_SRC1), gen_mi_imm(0));
gen_mi_store(&b, fast_clear_type_mem, gen_mi_imm(0));
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), fast_clear_type_mem);
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
mi_store(&b, fast_clear_type_mem, mi_imm(0));
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOADINV;
@@ -1075,13 +1072,13 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
#endif
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
if (copy_from_surface_state) {
gen_mi_memcpy(&b, entry_addr, ss_clear_addr, copy_size);
mi_memcpy(&b, entry_addr, ss_clear_addr, copy_size);
} else {
gen_mi_memcpy(&b, ss_clear_addr, entry_addr, copy_size);
mi_memcpy(&b, ss_clear_addr, entry_addr, copy_size);
/* Updating a surface state object may require that the state cache be
* invalidated. From the SKL PRM, Shared Functions -> State -> State
@@ -1820,10 +1817,10 @@ genX(CmdExecuteCommands)(
* with conditional rendering, we should satisfy this dependency
* regardless of conditional rendering being enabled in primary.
*/
struct gen_mi_builder b;
gen_mi_builder_init(&b, &primary->batch);
gen_mi_store(&b, gen_mi_reg64(ANV_PREDICATE_RESULT_REG),
gen_mi_imm(UINT64_MAX));
struct mi_builder b;
mi_builder_init(&b, &primary->batch);
mi_store(&b, mi_reg64(ANV_PREDICATE_RESULT_REG),
mi_imm(UINT64_MAX));
}
}
#endif
@@ -3936,23 +3933,20 @@ void genX(CmdDrawIndirectByteCountEXT)(
if (!pipeline->use_primitive_replication)
instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct gen_mi_value count =
gen_mi_mem32(anv_address_add(counter_buffer->address,
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
struct mi_value count =
mi_mem32(anv_address_add(counter_buffer->address,
counterBufferOffset));
if (counterOffset)
count = gen_mi_isub(&b, count, gen_mi_imm(counterOffset));
count = gen_mi_udiv32_imm(&b, count, vertexStride);
gen_mi_store(&b, gen_mi_reg32(GEN7_3DPRIM_VERTEX_COUNT), count);
count = mi_isub(&b, count, mi_imm(counterOffset));
count = mi_udiv32_imm(&b, count, vertexStride);
mi_store(&b, mi_reg32(GEN7_3DPRIM_VERTEX_COUNT), count);
gen_mi_store(&b, gen_mi_reg32(GEN7_3DPRIM_START_VERTEX),
gen_mi_imm(firstVertex));
gen_mi_store(&b, gen_mi_reg32(GEN7_3DPRIM_INSTANCE_COUNT),
gen_mi_imm(instanceCount));
gen_mi_store(&b, gen_mi_reg32(GEN7_3DPRIM_START_INSTANCE),
gen_mi_imm(firstInstance));
gen_mi_store(&b, gen_mi_reg32(GEN7_3DPRIM_BASE_VERTEX), gen_mi_imm(0));
mi_store(&b, mi_reg32(GEN7_3DPRIM_START_VERTEX), mi_imm(firstVertex));
mi_store(&b, mi_reg32(GEN7_3DPRIM_INSTANCE_COUNT), mi_imm(instanceCount));
mi_store(&b, mi_reg32(GEN7_3DPRIM_START_INSTANCE), mi_imm(firstInstance));
mi_store(&b, mi_reg32(GEN7_3DPRIM_BASE_VERTEX), mi_imm(0));
anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
prim.IndirectParameterEnable = true;
@@ -3969,36 +3963,36 @@ load_indirect_parameters(struct anv_cmd_buffer *cmd_buffer,
struct anv_address addr,
bool indexed)
{
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
gen_mi_store(&b, gen_mi_reg32(GEN7_3DPRIM_VERTEX_COUNT),
gen_mi_mem32(anv_address_add(addr, 0)));
mi_store(&b, mi_reg32(GEN7_3DPRIM_VERTEX_COUNT),
mi_mem32(anv_address_add(addr, 0)));
struct gen_mi_value instance_count = gen_mi_mem32(anv_address_add(addr, 4));
struct mi_value instance_count = mi_mem32(anv_address_add(addr, 4));
unsigned view_count = anv_subpass_view_count(cmd_buffer->state.subpass);
if (view_count > 1) {
#if GEN_IS_HASWELL || GEN_GEN >= 8
instance_count = gen_mi_imul_imm(&b, instance_count, view_count);
instance_count = mi_imul_imm(&b, instance_count, view_count);
#else
anv_finishme("Multiview + indirect draw requires MI_MATH; "
"MI_MATH is not supported on Ivy Bridge");
#endif
}
gen_mi_store(&b, gen_mi_reg32(GEN7_3DPRIM_INSTANCE_COUNT), instance_count);
mi_store(&b, mi_reg32(GEN7_3DPRIM_INSTANCE_COUNT), instance_count);
gen_mi_store(&b, gen_mi_reg32(GEN7_3DPRIM_START_VERTEX),
gen_mi_mem32(anv_address_add(addr, 8)));
mi_store(&b, mi_reg32(GEN7_3DPRIM_START_VERTEX),
mi_mem32(anv_address_add(addr, 8)));
if (indexed) {
gen_mi_store(&b, gen_mi_reg32(GEN7_3DPRIM_BASE_VERTEX),
gen_mi_mem32(anv_address_add(addr, 12)));
gen_mi_store(&b, gen_mi_reg32(GEN7_3DPRIM_START_INSTANCE),
gen_mi_mem32(anv_address_add(addr, 16)));
mi_store(&b, mi_reg32(GEN7_3DPRIM_BASE_VERTEX),
mi_mem32(anv_address_add(addr, 12)));
mi_store(&b, mi_reg32(GEN7_3DPRIM_START_INSTANCE),
mi_mem32(anv_address_add(addr, 16)));
} else {
gen_mi_store(&b, gen_mi_reg32(GEN7_3DPRIM_START_INSTANCE),
gen_mi_mem32(anv_address_add(addr, 12)));
gen_mi_store(&b, gen_mi_reg32(GEN7_3DPRIM_BASE_VERTEX), gen_mi_imm(0));
mi_store(&b, mi_reg32(GEN7_3DPRIM_START_INSTANCE),
mi_mem32(anv_address_add(addr, 12)));
mi_store(&b, mi_reg32(GEN7_3DPRIM_BASE_VERTEX), mi_imm(0));
}
}
@@ -4101,27 +4095,25 @@ void genX(CmdDrawIndexedIndirect)(
}
}
static struct gen_mi_value
static struct mi_value
prepare_for_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
struct gen_mi_builder *b,
struct mi_builder *b,
struct anv_address count_address,
const bool conditional_render_enabled)
{
struct gen_mi_value ret = gen_mi_imm(0);
struct mi_value ret = mi_imm(0);
if (conditional_render_enabled) {
#if GEN_GEN >= 8 || GEN_IS_HASWELL
ret = gen_mi_new_gpr(b);
gen_mi_store(b, gen_mi_value_ref(b, ret), gen_mi_mem32(count_address));
ret = mi_new_gpr(b);
mi_store(b, mi_value_ref(b, ret), mi_mem32(count_address));
#endif
} else {
/* Upload the current draw count from the draw parameters buffer to
* MI_PREDICATE_SRC0.
*/
gen_mi_store(b, gen_mi_reg64(MI_PREDICATE_SRC0),
gen_mi_mem32(count_address));
gen_mi_store(b, gen_mi_reg32(MI_PREDICATE_SRC1 + 4), gen_mi_imm(0));
mi_store(b, mi_reg64(MI_PREDICATE_SRC0), mi_mem32(count_address));
mi_store(b, mi_reg32(MI_PREDICATE_SRC1 + 4), mi_imm(0));
}
return ret;
@@ -4129,11 +4121,11 @@ prepare_for_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
static void
emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
struct gen_mi_builder *b,
struct mi_builder *b,
uint32_t draw_index)
{
/* Upload the index of the current primitive to MI_PREDICATE_SRC1. */
gen_mi_store(b, gen_mi_reg32(MI_PREDICATE_SRC1), gen_mi_imm(draw_index));
mi_store(b, mi_reg32(MI_PREDICATE_SRC1), mi_imm(draw_index));
if (draw_index == 0) {
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
@@ -4161,22 +4153,22 @@ emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
static void
emit_draw_count_predicate_with_conditional_render(
struct anv_cmd_buffer *cmd_buffer,
struct gen_mi_builder *b,
struct mi_builder *b,
uint32_t draw_index,
struct gen_mi_value max)
struct mi_value max)
{
struct gen_mi_value pred = gen_mi_ult(b, gen_mi_imm(draw_index), max);
pred = gen_mi_iand(b, pred, gen_mi_reg64(ANV_PREDICATE_RESULT_REG));
struct mi_value pred = mi_ult(b, mi_imm(draw_index), max);
pred = mi_iand(b, pred, mi_reg64(ANV_PREDICATE_RESULT_REG));
#if GEN_GEN >= 8
gen_mi_store(b, gen_mi_reg64(MI_PREDICATE_RESULT), pred);
mi_store(b, mi_reg64(MI_PREDICATE_RESULT), pred);
#else
/* MI_PREDICATE_RESULT is not whitelisted in i915 command parser
* so we emit MI_PREDICATE to set it.
*/
gen_mi_store(b, gen_mi_reg64(MI_PREDICATE_SRC0), pred);
gen_mi_store(b, gen_mi_reg64(MI_PREDICATE_SRC1), gen_mi_imm(0));
mi_store(b, mi_reg64(MI_PREDICATE_SRC0), pred);
mi_store(b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOADINV;
@@ -4208,11 +4200,11 @@ void genX(CmdDrawIndirectCount)(
genX(cmd_buffer_flush_state)(cmd_buffer);
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
struct anv_address count_address =
anv_address_add(count_buffer->address, countBufferOffset);
struct gen_mi_value max =
struct mi_value max =
prepare_for_draw_count_predicate(cmd_buffer, &b, count_address,
cmd_state->conditional_render_enabled);
@@ -4222,7 +4214,7 @@ void genX(CmdDrawIndirectCount)(
#if GEN_GEN >= 8 || GEN_IS_HASWELL
if (cmd_state->conditional_render_enabled) {
emit_draw_count_predicate_with_conditional_render(
cmd_buffer, &b, i, gen_mi_value_ref(&b, max));
cmd_buffer, &b, i, mi_value_ref(&b, max));
} else {
emit_draw_count_predicate(cmd_buffer, &b, i);
}
@@ -4255,7 +4247,7 @@ void genX(CmdDrawIndirectCount)(
offset += stride;
}
gen_mi_value_unref(&b, max);
mi_value_unref(&b, max);
}
void genX(CmdDrawIndexedIndirectCount)(
@@ -4279,11 +4271,11 @@ void genX(CmdDrawIndexedIndirectCount)(
genX(cmd_buffer_flush_state)(cmd_buffer);
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
struct anv_address count_address =
anv_address_add(count_buffer->address, countBufferOffset);
struct gen_mi_value max =
struct mi_value max =
prepare_for_draw_count_predicate(cmd_buffer, &b, count_address,
cmd_state->conditional_render_enabled);
@@ -4293,7 +4285,7 @@ void genX(CmdDrawIndexedIndirectCount)(
#if GEN_GEN >= 8 || GEN_IS_HASWELL
if (cmd_state->conditional_render_enabled) {
emit_draw_count_predicate_with_conditional_render(
cmd_buffer, &b, i, gen_mi_value_ref(&b, max));
cmd_buffer, &b, i, mi_value_ref(&b, max));
} else {
emit_draw_count_predicate(cmd_buffer, &b, i);
}
@@ -4327,7 +4319,7 @@ void genX(CmdDrawIndexedIndirectCount)(
offset += stride;
}
gen_mi_value_unref(&b, max);
mi_value_unref(&b, max);
}
void genX(CmdBeginTransformFeedbackEXT)(
@@ -4755,21 +4747,21 @@ void genX(CmdDispatchIndirect)(
genX(cmd_buffer_flush_compute_state)(cmd_buffer);
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
struct gen_mi_value size_x = gen_mi_mem32(anv_address_add(addr, 0));
struct gen_mi_value size_y = gen_mi_mem32(anv_address_add(addr, 4));
struct gen_mi_value size_z = gen_mi_mem32(anv_address_add(addr, 8));
struct mi_value size_x = mi_mem32(anv_address_add(addr, 0));
struct mi_value size_y = mi_mem32(anv_address_add(addr, 4));
struct mi_value size_z = mi_mem32(anv_address_add(addr, 8));
gen_mi_store(&b, gen_mi_reg32(GPGPU_DISPATCHDIMX), size_x);
gen_mi_store(&b, gen_mi_reg32(GPGPU_DISPATCHDIMY), size_y);
gen_mi_store(&b, gen_mi_reg32(GPGPU_DISPATCHDIMZ), size_z);
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMX), size_x);
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMY), size_y);
mi_store(&b, mi_reg32(GPGPU_DISPATCHDIMZ), size_z);
#if GEN_GEN <= 7
/* predicate = (compute_dispatch_indirect_x_size == 0); */
gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_SRC0), size_x);
gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_SRC1), gen_mi_imm(0));
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0), size_x);
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_SET;
@@ -4777,7 +4769,7 @@ void genX(CmdDispatchIndirect)(
}
/* predicate |= (compute_dispatch_indirect_y_size == 0); */
gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_SRC0), size_y);
mi_store(&b, mi_reg32(MI_PREDICATE_SRC0), size_y);
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_OR;
@@ -4785,7 +4777,7 @@ void genX(CmdDispatchIndirect)(
}
/* predicate |= (compute_dispatch_indirect_z_size == 0); */
gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_SRC0), size_z);
mi_store(&b, mi_reg32(MI_PREDICATE_SRC0), size_z);
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_OR;
@@ -4802,8 +4794,8 @@ void genX(CmdDispatchIndirect)(
#if GEN_IS_HASWELL
if (cmd_buffer->state.conditional_render_enabled) {
/* predicate &= !(conditional_rendering_predicate == 0); */
gen_mi_store(&b, gen_mi_reg32(MI_PREDICATE_SRC0),
gen_mi_reg32(ANV_PREDICATE_RESULT_REG));
mi_store(&b, mi_reg32(MI_PREDICATE_SRC0),
mi_reg32(ANV_PREDICATE_RESULT_REG));
anv_batch_emit(batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOADINV;
mip.CombineOperation = COMBINE_AND;
@@ -6198,12 +6190,12 @@ void
genX(cmd_emit_conditional_render_predicate)(struct anv_cmd_buffer *cmd_buffer)
{
#if GEN_GEN >= 8 || GEN_IS_HASWELL
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_SRC0),
gen_mi_reg32(ANV_PREDICATE_RESULT_REG));
gen_mi_store(&b, gen_mi_reg64(MI_PREDICATE_SRC1), gen_mi_imm(0));
mi_store(&b, mi_reg64(MI_PREDICATE_SRC0),
mi_reg32(ANV_PREDICATE_RESULT_REG));
mi_store(&b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(0));
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOADINV;
@@ -6231,8 +6223,8 @@ void genX(CmdBeginConditionalRenderingEXT)(
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
/* Section 19.4 of the Vulkan 1.1.85 spec says:
*
@@ -6245,15 +6237,15 @@ void genX(CmdBeginConditionalRenderingEXT)(
*
* So it's perfectly fine to read a value from the buffer once.
*/
struct gen_mi_value value = gen_mi_mem32(value_address);
struct mi_value value = mi_mem32(value_address);
/* Precompute predicate result, it is necessary to support secondary
* command buffers since it is unknown if conditional rendering is
* inverted when populating them.
*/
gen_mi_store(&b, gen_mi_reg64(ANV_PREDICATE_RESULT_REG),
isInverted ? gen_mi_uge(&b, gen_mi_imm(0), value) :
gen_mi_ult(&b, gen_mi_imm(0), value));
mi_store(&b, mi_reg64(ANV_PREDICATE_RESULT_REG),
isInverted ? mi_uge(&b, mi_imm(0), value) :
mi_ult(&b, mi_imm(0), value));
}
void genX(CmdEndConditionalRenderingEXT)(

View File

@@ -36,8 +36,8 @@
* - GPR 14 for perf queries
* - GPR 15 for conditional rendering
*/
#define GEN_MI_BUILDER_NUM_ALLOC_GPRS 14
#define GEN_MI_BUILDER_CAN_WRITE_BATCH GEN_GEN >= 8
#define MI_BUILDER_NUM_ALLOC_GPRS 14
#define MI_BUILDER_CAN_WRITE_BATCH GEN_GEN >= 8
#define __gen_get_batch_dwords anv_batch_emit_dwords
#define __gen_address_offset anv_address_add
#define __gen_get_batch_address(b, a) anv_address_physical(anv_batch_address(b, a))
@@ -217,16 +217,16 @@ VkResult genX(CreateQueryPool)(
#if GEN_GEN >= 8
if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) {
for (uint32_t p = 0; p < pool->n_passes; p++) {
struct gen_mi_builder b;
struct mi_builder b;
struct anv_batch batch = {
.start = pool->bo->map + khr_perf_query_preamble_offset(pool, p),
.end = pool->bo->map + khr_perf_query_preamble_offset(pool, p) + pool->data_offset,
};
batch.next = batch.start;
gen_mi_builder_init(&b, &batch);
gen_mi_store(&b, gen_mi_reg64(ANV_PERF_QUERY_OFFSET_REG),
gen_mi_imm(p * pool->pass_size));
mi_builder_init(&b, &batch);
mi_store(&b, mi_reg64(ANV_PERF_QUERY_OFFSET_REG),
mi_imm(p * pool->pass_size));
anv_batch_emit(&batch, GENX(MI_BATCH_BUFFER_END), bbe);
}
}
@@ -632,11 +632,11 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
}
static void
emit_query_mi_availability(struct gen_mi_builder *b,
emit_query_mi_availability(struct mi_builder *b,
struct anv_address addr,
bool available)
{
gen_mi_store(b, gen_mi_mem64(addr), gen_mi_imm(available));
mi_store(b, mi_mem64(addr), mi_imm(available));
}
static void
@@ -661,7 +661,7 @@ emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer,
*/
static void
emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
struct gen_mi_builder *b, struct anv_query_pool *pool,
struct mi_builder *b, struct anv_query_pool *pool,
uint32_t first_index, uint32_t num_queries)
{
switch (pool->type) {
@@ -690,7 +690,7 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
for (uint32_t i = 0; i < num_queries; i++) {
struct anv_address slot_addr =
anv_query_address(pool, first_index + i);
gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
emit_query_mi_availability(b, slot_addr, true);
}
break;
@@ -699,9 +699,8 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
for (uint32_t i = 0; i < num_queries; i++) {
for (uint32_t p = 0; p < pool->n_passes; p++) {
gen_mi_memset(b,
khr_perf_query_data_address(pool, first_index + i, p, false),
0, 2 * pool->snapshot_size);
mi_memset(b, khr_perf_query_data_address(pool, first_index + i, p, false),
0, 2 * pool->snapshot_size);
emit_query_mi_availability(b,
khr_perf_query_availability_address(pool, first_index + i, p),
true);
@@ -715,7 +714,7 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
for (uint32_t i = 0; i < num_queries; i++) {
struct anv_address slot_addr =
anv_query_address(pool, first_index + i);
gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
emit_query_mi_availability(b, slot_addr, true);
}
break;
@@ -746,8 +745,8 @@ void genX(CmdResetQueryPool)(
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
for (uint32_t i = 0; i < queryCount; i++)
emit_query_mi_availability(&b, anv_query_address(pool, firstQuery + i), false);
@@ -756,8 +755,8 @@ void genX(CmdResetQueryPool)(
#if GEN_GEN >= 8
case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: {
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
for (uint32_t i = 0; i < queryCount; i++) {
for (uint32_t p = 0; p < pool->n_passes; p++) {
@@ -772,8 +771,8 @@ void genX(CmdResetQueryPool)(
#endif
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
for (uint32_t i = 0; i < queryCount; i++)
emit_query_mi_availability(&b, anv_query_address(pool, firstQuery + i), false);
@@ -824,33 +823,32 @@ static const uint32_t vk_pipeline_stat_to_reg[] = {
};
static void
emit_pipeline_stat(struct gen_mi_builder *b, uint32_t stat,
emit_pipeline_stat(struct mi_builder *b, uint32_t stat,
struct anv_address addr)
{
STATIC_ASSERT(ANV_PIPELINE_STATISTICS_MASK ==
(1 << ARRAY_SIZE(vk_pipeline_stat_to_reg)) - 1);
assert(stat < ARRAY_SIZE(vk_pipeline_stat_to_reg));
gen_mi_store(b, gen_mi_mem64(addr),
gen_mi_reg64(vk_pipeline_stat_to_reg[stat]));
mi_store(b, mi_mem64(addr), mi_reg64(vk_pipeline_stat_to_reg[stat]));
}
static void
emit_xfb_query(struct gen_mi_builder *b, uint32_t stream,
emit_xfb_query(struct mi_builder *b, uint32_t stream,
struct anv_address addr)
{
assert(stream < MAX_XFB_STREAMS);
gen_mi_store(b, gen_mi_mem64(anv_address_add(addr, 0)),
gen_mi_reg64(GENX(SO_NUM_PRIMS_WRITTEN0_num) + stream * 8));
gen_mi_store(b, gen_mi_mem64(anv_address_add(addr, 16)),
gen_mi_reg64(GENX(SO_PRIM_STORAGE_NEEDED0_num) + stream * 8));
mi_store(b, mi_mem64(anv_address_add(addr, 0)),
mi_reg64(GENX(SO_NUM_PRIMS_WRITTEN0_num) + stream * 8));
mi_store(b, mi_mem64(anv_address_add(addr, 16)),
mi_reg64(GENX(SO_PRIM_STORAGE_NEEDED0_num) + stream * 8));
}
static void
emit_perf_intel_query(struct anv_cmd_buffer *cmd_buffer,
struct anv_query_pool *pool,
struct gen_mi_builder *b,
struct mi_builder *b,
struct anv_address query_addr,
bool end)
{
@@ -875,12 +873,12 @@ emit_perf_intel_query(struct anv_cmd_buffer *cmd_buffer,
case GEN_PERF_QUERY_FIELD_TYPE_SRM_OA_B:
case GEN_PERF_QUERY_FIELD_TYPE_SRM_OA_C: {
struct anv_address addr = anv_address_add(data_addr, field->location);
struct gen_mi_value src = field->size == 8 ?
gen_mi_reg64(field->mmio_offset) :
gen_mi_reg32(field->mmio_offset);
struct gen_mi_value dst = field->size == 8 ?
gen_mi_mem64(addr) : gen_mi_mem32(addr);
gen_mi_store(b, dst, src);
struct mi_value src = field->size == 8 ?
mi_reg64(field->mmio_offset) :
mi_reg32(field->mmio_offset);
struct mi_value dst = field->size == 8 ?
mi_mem64(addr) : mi_mem32(addr);
mi_store(b, dst, src);
break;
}
@@ -911,8 +909,8 @@ void genX(CmdBeginQueryIndexedEXT)(
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
struct anv_address query_addr = anv_query_address(pool, query);
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION:
@@ -957,43 +955,43 @@ void genX(CmdBeginQueryIndexedEXT)(
for (uint32_t r = 0; r < layout->n_fields; r++) {
const struct gen_perf_query_field *field =
&layout->fields[end ? r : (layout->n_fields - 1 - r)];
struct gen_mi_value reg_addr =
gen_mi_iadd(
struct mi_value reg_addr =
mi_iadd(
&b,
gen_mi_imm(gen_canonical_address(pool->bo->offset +
khr_perf_query_data_offset(pool, query, 0, end) +
field->location)),
gen_mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
cmd_buffer->self_mod_locations[reloc_idx++] = gen_mi_store_address(&b, reg_addr);
mi_imm(gen_canonical_address(pool->bo->offset +
khr_perf_query_data_offset(pool, query, 0, end) +
field->location)),
mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
cmd_buffer->self_mod_locations[reloc_idx++] = mi_store_address(&b, reg_addr);
if (field->type != GEN_PERF_QUERY_FIELD_TYPE_MI_RPC &&
field->size == 8) {
reg_addr =
gen_mi_iadd(
mi_iadd(
&b,
gen_mi_imm(gen_canonical_address(pool->bo->offset +
khr_perf_query_data_offset(pool, query, 0, end) +
field->location + 4)),
gen_mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
cmd_buffer->self_mod_locations[reloc_idx++] = gen_mi_store_address(&b, reg_addr);
mi_imm(gen_canonical_address(pool->bo->offset +
khr_perf_query_data_offset(pool, query, 0, end) +
field->location + 4)),
mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
cmd_buffer->self_mod_locations[reloc_idx++] = mi_store_address(&b, reg_addr);
}
}
}
struct gen_mi_value availability_write_offset =
gen_mi_iadd(
struct mi_value availability_write_offset =
mi_iadd(
&b,
gen_mi_imm(
mi_imm(
gen_canonical_address(
pool->bo->offset +
khr_perf_query_availability_offset(pool, query, 0 /* pass */))),
gen_mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
mi_reg64(ANV_PERF_QUERY_OFFSET_REG));
cmd_buffer->self_mod_locations[reloc_idx++] =
gen_mi_store_address(&b, availability_write_offset);
mi_store_address(&b, availability_write_offset);
assert(reloc_idx == pdevice->n_perf_query_commands);
gen_mi_self_mod_barrier(&b);
mi_self_mod_barrier(&b);
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
pc.CommandStreamerStallEnable = true;
@@ -1013,10 +1011,10 @@ void genX(CmdBeginQueryIndexedEXT)(
GENX(MI_REPORT_PERF_COUNT_length),
GENX(MI_REPORT_PERF_COUNT),
.MemoryAddress = query_addr /* Will be overwritten */);
_gen_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_REPORT_PERF_COUNT_MemoryAddress_start) / 8);
_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_REPORT_PERF_COUNT_MemoryAddress_start) / 8);
break;
case GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
@@ -1029,10 +1027,10 @@ void genX(CmdBeginQueryIndexedEXT)(
GENX(MI_STORE_REGISTER_MEM),
.RegisterAddress = field->mmio_offset,
.MemoryAddress = query_addr /* Will be overwritten */ );
_gen_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
if (field->size == 8) {
dws =
anv_batch_emitn(&cmd_buffer->batch,
@@ -1040,10 +1038,10 @@ void genX(CmdBeginQueryIndexedEXT)(
GENX(MI_STORE_REGISTER_MEM),
.RegisterAddress = field->mmio_offset + 4,
.MemoryAddress = query_addr /* Will be overwritten */ );
_gen_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
}
break;
@@ -1088,8 +1086,8 @@ void genX(CmdEndQueryIndexedEXT)(
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
struct anv_address query_addr = anv_query_address(pool, query);
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
switch (pool->type) {
case VK_QUERY_TYPE_OCCLUSION:
@@ -1150,10 +1148,10 @@ void genX(CmdEndQueryIndexedEXT)(
GENX(MI_REPORT_PERF_COUNT_length),
GENX(MI_REPORT_PERF_COUNT),
.MemoryAddress = query_addr /* Will be overwritten */);
_gen_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_REPORT_PERF_COUNT_MemoryAddress_start) / 8);
_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_REPORT_PERF_COUNT_MemoryAddress_start) / 8);
break;
case GEN_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:
@@ -1166,10 +1164,10 @@ void genX(CmdEndQueryIndexedEXT)(
GENX(MI_STORE_REGISTER_MEM),
.RegisterAddress = field->mmio_offset,
.MemoryAddress = query_addr /* Will be overwritten */ );
_gen_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
if (field->size == 8) {
dws =
anv_batch_emitn(&cmd_buffer->batch,
@@ -1177,10 +1175,10 @@ void genX(CmdEndQueryIndexedEXT)(
GENX(MI_STORE_REGISTER_MEM),
.RegisterAddress = field->mmio_offset + 4,
.MemoryAddress = query_addr /* Will be overwritten */ );
_gen_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8);
}
break;
@@ -1195,10 +1193,10 @@ void genX(CmdEndQueryIndexedEXT)(
GENX(MI_STORE_DATA_IMM_length),
GENX(MI_STORE_DATA_IMM),
.ImmediateData = true);
_gen_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_STORE_DATA_IMM_Address_start) / 8);
_mi_resolve_address_token(&b,
cmd_buffer->self_mod_locations[cmd_buffer->perf_reloc_idx++],
dws +
GENX(MI_STORE_DATA_IMM_Address_start) / 8);
assert(cmd_buffer->perf_reloc_idx == pdevice->n_perf_query_commands);
break;
@@ -1211,8 +1209,8 @@ void genX(CmdEndQueryIndexedEXT)(
pc.StallAtPixelScoreboard = true;
}
uint32_t marker_offset = intel_perf_marker_offset();
gen_mi_store(&b, gen_mi_mem64(anv_address_add(query_addr, marker_offset)),
gen_mi_imm(cmd_buffer->intel_perf_marker));
mi_store(&b, mi_mem64(anv_address_add(query_addr, marker_offset)),
mi_imm(cmd_buffer->intel_perf_marker));
emit_perf_intel_query(cmd_buffer, pool, &b, query_addr, true);
emit_query_mi_availability(&b, query_addr, true);
break;
@@ -1252,13 +1250,13 @@ void genX(CmdWriteTimestamp)(
assert(pool->type == VK_QUERY_TYPE_TIMESTAMP);
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
switch (pipelineStage) {
case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
gen_mi_store(&b, gen_mi_mem64(anv_address_add(query_addr, 8)),
gen_mi_reg64(TIMESTAMP));
mi_store(&b, mi_mem64(anv_address_add(query_addr, 8)),
mi_reg64(TIMESTAMP));
break;
default:
@@ -1309,16 +1307,16 @@ void genX(CmdWriteTimestamp)(
*/
static void
gpu_write_query_result_cond(struct anv_cmd_buffer *cmd_buffer,
struct gen_mi_builder *b,
struct mi_builder *b,
struct anv_address poll_addr,
struct anv_address dst_addr,
uint64_t ref_value,
VkQueryResultFlags flags,
uint32_t value_index,
struct gen_mi_value query_result)
struct mi_value query_result)
{
gen_mi_store(b, gen_mi_reg64(MI_PREDICATE_SRC0), gen_mi_mem64(poll_addr));
gen_mi_store(b, gen_mi_reg64(MI_PREDICATE_SRC1), gen_mi_imm(ref_value));
mi_store(b, mi_reg64(MI_PREDICATE_SRC0), mi_mem64(poll_addr));
mi_store(b, mi_reg64(MI_PREDICATE_SRC1), mi_imm(ref_value));
anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
mip.LoadOperation = LOAD_LOAD;
mip.CombineOperation = COMBINE_SET;
@@ -1327,36 +1325,36 @@ gpu_write_query_result_cond(struct anv_cmd_buffer *cmd_buffer,
if (flags & VK_QUERY_RESULT_64_BIT) {
struct anv_address res_addr = anv_address_add(dst_addr, value_index * 8);
gen_mi_store_if(b, gen_mi_mem64(res_addr), query_result);
mi_store_if(b, mi_mem64(res_addr), query_result);
} else {
struct anv_address res_addr = anv_address_add(dst_addr, value_index * 4);
gen_mi_store_if(b, gen_mi_mem32(res_addr), query_result);
mi_store_if(b, mi_mem32(res_addr), query_result);
}
}
#endif /* GEN_GEN >= 8 || GEN_IS_HASWELL */
static void
gpu_write_query_result(struct gen_mi_builder *b,
gpu_write_query_result(struct mi_builder *b,
struct anv_address dst_addr,
VkQueryResultFlags flags,
uint32_t value_index,
struct gen_mi_value query_result)
struct mi_value query_result)
{
if (flags & VK_QUERY_RESULT_64_BIT) {
struct anv_address res_addr = anv_address_add(dst_addr, value_index * 8);
gen_mi_store(b, gen_mi_mem64(res_addr), query_result);
mi_store(b, mi_mem64(res_addr), query_result);
} else {
struct anv_address res_addr = anv_address_add(dst_addr, value_index * 4);
gen_mi_store(b, gen_mi_mem32(res_addr), query_result);
mi_store(b, mi_mem32(res_addr), query_result);
}
}
static struct gen_mi_value
compute_query_result(struct gen_mi_builder *b, struct anv_address addr)
static struct mi_value
compute_query_result(struct mi_builder *b, struct anv_address addr)
{
return gen_mi_isub(b, gen_mi_mem64(anv_address_add(addr, 8)),
gen_mi_mem64(anv_address_add(addr, 0)));
return mi_isub(b, mi_mem64(anv_address_add(addr, 8)),
mi_mem64(anv_address_add(addr, 0)));
}
void genX(CmdCopyQueryPoolResults)(
@@ -1373,9 +1371,9 @@ void genX(CmdCopyQueryPoolResults)(
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
struct gen_mi_value result;
struct mi_builder b;
mi_builder_init(&b, &cmd_buffer->batch);
struct mi_value result;
/* If render target writes are ongoing, request a render target cache flush
* to ensure proper ordering of the commands from the 3d pipe and the
@@ -1422,7 +1420,7 @@ void genX(CmdCopyQueryPoolResults)(
1 /* available */, flags, idx, result);
if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
gpu_write_query_result_cond(cmd_buffer, &b, query_addr, dest_addr,
0 /* unavailable */, flags, idx, gen_mi_imm(0));
0 /* unavailable */, flags, idx, mi_imm(0));
}
idx++;
#else /* GEN_GEN < 8 && !GEN_IS_HASWELL */
@@ -1442,7 +1440,7 @@ void genX(CmdCopyQueryPoolResults)(
if ((cmd_buffer->device->info.gen == 8 ||
cmd_buffer->device->info.is_haswell) &&
(1 << stat) == VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT) {
result = gen_mi_ushr32_imm(&b, result, 2);
result = mi_ushr32_imm(&b, result, 2);
}
gpu_write_query_result(&b, dest_addr, flags, idx++, result);
@@ -1459,7 +1457,7 @@ void genX(CmdCopyQueryPoolResults)(
break;
case VK_QUERY_TYPE_TIMESTAMP:
result = gen_mi_mem64(anv_address_add(query_addr, 8));
result = mi_mem64(anv_address_add(query_addr, 8));
gpu_write_query_result(&b, dest_addr, flags, 0, result);
break;
@@ -1475,7 +1473,7 @@ void genX(CmdCopyQueryPoolResults)(
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
gpu_write_query_result(&b, dest_addr, flags, idx,
gen_mi_mem64(query_addr));
mi_mem64(query_addr));
}
dest_addr = anv_address_add(dest_addr, destStride);