anv: Move mi_memcpy and mi_memset to gen_mi_builder
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
This commit is contained in:
@@ -450,6 +450,51 @@ gen_mi_store(struct gen_mi_builder *b,
|
||||
gen_mi_value_unref(b, dst);
|
||||
}
|
||||
|
||||
static inline void
|
||||
gen_mi_memset(struct gen_mi_builder *b, __gen_address_type dst,
|
||||
uint32_t value, uint32_t size)
|
||||
{
|
||||
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||
assert(b->num_math_dwords == 0);
|
||||
#endif
|
||||
|
||||
/* This memset operates in units of dwords. */
|
||||
assert(size % 4 == 0);
|
||||
|
||||
for (uint32_t i = 0; i < size; i += 4) {
|
||||
gen_mi_store(b, gen_mi_mem32(__gen_address_offset(dst, i)),
|
||||
gen_mi_imm(value));
|
||||
}
|
||||
}
|
||||
|
||||
/* NOTE: On IVB, this function stomps GEN7_3DPRIM_BASE_VERTEX */
|
||||
static inline void
|
||||
gen_mi_memcpy(struct gen_mi_builder *b, __gen_address_type dst,
|
||||
__gen_address_type src, uint32_t size)
|
||||
{
|
||||
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||
assert(b->num_math_dwords == 0);
|
||||
#endif
|
||||
|
||||
/* This memcpy operates in units of dwords. */
|
||||
assert(size % 4 == 0);
|
||||
|
||||
for (uint32_t i = 0; i < size; i += 4) {
|
||||
struct gen_mi_value dst_val = gen_mi_mem32(__gen_address_offset(dst, i));
|
||||
struct gen_mi_value src_val = gen_mi_mem32(__gen_address_offset(src, i));
|
||||
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||
gen_mi_store(b, dst_val, src_val);
|
||||
#else
|
||||
/* IVB does not have a general purpose register for command streamer
|
||||
* commands. Therefore, we use an alternate temporary register.
|
||||
*/
|
||||
struct gen_mi_value tmp_reg = gen_mi_reg32(0x2440); /* GEN7_3DPRIM_BASE_VERTEX */
|
||||
gen_mi_store(b, tmp_reg, src_val);
|
||||
gen_mi_store(b, dst_val, tmp_reg);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* MI_MATH Section. Only available on Haswell+
|
||||
*/
|
||||
|
@@ -422,6 +422,36 @@ TEST_F(gen_mi_builder_test, mem_reg)
|
||||
EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
|
||||
}
|
||||
|
||||
TEST_F(gen_mi_builder_test, memset)
|
||||
{
|
||||
const unsigned memset_size = 256;
|
||||
|
||||
gen_mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
|
||||
|
||||
submit_batch();
|
||||
|
||||
uint32_t *out_u32 = (uint32_t *)output;
|
||||
for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++)
|
||||
EXPECT_EQ(out_u32[i], 0xdeadbeef);
|
||||
}
|
||||
|
||||
TEST_F(gen_mi_builder_test, memcpy)
|
||||
{
|
||||
const unsigned memcpy_size = 256;
|
||||
|
||||
uint8_t *in_u8 = (uint8_t *)input;
|
||||
for (unsigned i = 0; i < memcpy_size; i++)
|
||||
in_u8[i] = i;
|
||||
|
||||
gen_mi_memcpy(&b, out_addr(0), in_addr(0), 256);
|
||||
|
||||
submit_batch();
|
||||
|
||||
uint8_t *out_u8 = (uint8_t *)output;
|
||||
for (unsigned i = 0; i < memcpy_size; i++)
|
||||
EXPECT_EQ(out_u8[i], i);
|
||||
}
|
||||
|
||||
/* Start of MI_MATH section */
|
||||
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||
|
||||
|
@@ -78,13 +78,5 @@ void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address dst, struct anv_address src,
|
||||
uint32_t size);
|
||||
|
||||
void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address dst, struct anv_address src,
|
||||
uint32_t size);
|
||||
|
||||
void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address dst, uint32_t value,
|
||||
uint32_t size);
|
||||
|
||||
void genX(blorp_exec)(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
|
@@ -822,12 +822,35 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
|
||||
anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
|
||||
unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size;
|
||||
|
||||
#if GEN_GEN == 7
|
||||
/* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM
|
||||
* and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
|
||||
* in-flight when they are issued even if the memory touched is not
|
||||
* currently active for rendering. The weird bit is that it is not the
|
||||
* MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
|
||||
* rendering hangs such that the next stalling command after the
|
||||
* MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
|
||||
*
|
||||
* It is unclear exactly why this hang occurs. Both MI commands come with
|
||||
* warnings about the 3D pipeline but that doesn't seem to fully explain
|
||||
* it. My (Jason's) best theory is that it has something to do with the
|
||||
* fact that we're using a GPU state register as our temporary and that
|
||||
* something with reading/writing it is causing problems.
|
||||
*
|
||||
* In order to work around this issue, we emit a PIPE_CONTROL with the
|
||||
* command streamer stall bit set.
|
||||
*/
|
||||
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
#endif
|
||||
|
||||
struct gen_mi_builder b;
|
||||
gen_mi_builder_init(&b, &cmd_buffer->batch);
|
||||
|
||||
if (copy_from_surface_state) {
|
||||
genX(cmd_buffer_mi_memcpy)(cmd_buffer, entry_addr,
|
||||
ss_clear_addr, copy_size);
|
||||
gen_mi_memcpy(&b, entry_addr, ss_clear_addr, copy_size);
|
||||
} else {
|
||||
genX(cmd_buffer_mi_memcpy)(cmd_buffer, ss_clear_addr,
|
||||
entry_addr, copy_size);
|
||||
gen_mi_memcpy(&b, ss_clear_addr, entry_addr, copy_size);
|
||||
|
||||
/* Updating a surface state object may require that the state cache be
|
||||
* invalidated. From the SKL PRM, Shared Functions -> State -> State
|
||||
|
@@ -51,80 +51,6 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
|
||||
return 1 << MIN2(a_log2, b_log2);
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address dst, struct anv_address src,
|
||||
uint32_t size)
|
||||
{
|
||||
/* This memcpy operates in units of dwords. */
|
||||
assert(size % 4 == 0);
|
||||
assert(dst.offset % 4 == 0);
|
||||
assert(src.offset % 4 == 0);
|
||||
|
||||
#if GEN_GEN == 7
|
||||
/* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM
|
||||
* and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
|
||||
* in-flight when they are issued even if the memory touched is not
|
||||
* currently active for rendering. The weird bit is that it is not the
|
||||
* MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
|
||||
* rendering hangs such that the next stalling command after the
|
||||
* MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
|
||||
*
|
||||
* It is unclear exactly why this hang occurs. Both MI commands come with
|
||||
* warnings about the 3D pipeline but that doesn't seem to fully explain
|
||||
* it. My (Jason's) best theory is that it has something to do with the
|
||||
* fact that we're using a GPU state register as our temporary and that
|
||||
* something with reading/writing it is causing problems.
|
||||
*
|
||||
* In order to work around this issue, we emit a PIPE_CONTROL with the
|
||||
* command streamer stall bit set.
|
||||
*/
|
||||
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
#endif
|
||||
|
||||
for (uint32_t i = 0; i < size; i += 4) {
|
||||
#if GEN_GEN >= 8
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) {
|
||||
cp.DestinationMemoryAddress = anv_address_add(dst, i);
|
||||
cp.SourceMemoryAddress = anv_address_add(src, i);
|
||||
}
|
||||
#else
|
||||
/* IVB does not have a general purpose register for command streamer
|
||||
* commands. Therefore, we use an alternate temporary register.
|
||||
*/
|
||||
#define TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) {
|
||||
load.RegisterAddress = TEMP_REG;
|
||||
load.MemoryAddress = anv_address_add(src, i);
|
||||
}
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) {
|
||||
store.RegisterAddress = TEMP_REG;
|
||||
store.MemoryAddress = anv_address_add(dst, i);
|
||||
}
|
||||
#undef TEMP_REG
|
||||
#endif
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address dst, uint32_t value,
|
||||
uint32_t size)
|
||||
{
|
||||
/* This memset operates in units of dwords. */
|
||||
assert(size % 4 == 0);
|
||||
assert(dst.offset % 4 == 0);
|
||||
|
||||
for (uint32_t i = 0; i < size; i += 4) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
|
||||
sdi.Address = anv_address_add(dst, i);
|
||||
sdi.ImmediateData = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address dst, struct anv_address src,
|
||||
|
@@ -363,14 +363,13 @@ emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
|
||||
*/
|
||||
static void
|
||||
emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_query_pool *pool,
|
||||
struct gen_mi_builder *b, struct anv_query_pool *pool,
|
||||
uint32_t first_index, uint32_t num_queries)
|
||||
{
|
||||
for (uint32_t i = 0; i < num_queries; i++) {
|
||||
struct anv_address slot_addr =
|
||||
anv_query_address(pool, first_index + i);
|
||||
genX(cmd_buffer_mi_memset)(cmd_buffer, anv_address_add(slot_addr, 8),
|
||||
0, pool->stride - 8);
|
||||
gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
|
||||
emit_query_availability(cmd_buffer, slot_addr);
|
||||
}
|
||||
}
|
||||
@@ -574,7 +573,7 @@ void genX(CmdEndQueryIndexedEXT)(
|
||||
const uint32_t num_queries =
|
||||
util_bitcount(cmd_buffer->state.subpass->view_mask);
|
||||
if (num_queries > 1)
|
||||
emit_zero_queries(cmd_buffer, pool, query + 1, num_queries - 1);
|
||||
emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -628,7 +627,7 @@ void genX(CmdWriteTimestamp)(
|
||||
const uint32_t num_queries =
|
||||
util_bitcount(cmd_buffer->state.subpass->view_mask);
|
||||
if (num_queries > 1)
|
||||
emit_zero_queries(cmd_buffer, pool, query + 1, num_queries - 1);
|
||||
emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user