anv: Move mi_memcpy and mi_memset to gen_mi_builder

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
This commit is contained in:
Jason Ekstrand
2019-03-30 21:00:26 -05:00
parent bacb21fc6b
commit d17dd46b09
6 changed files with 106 additions and 91 deletions

View File

@@ -450,6 +450,51 @@ gen_mi_store(struct gen_mi_builder *b,
gen_mi_value_unref(b, dst);
}
static inline void
gen_mi_memset(struct gen_mi_builder *b, __gen_address_type dst,
uint32_t value, uint32_t size)
{
#if GEN_GEN >= 8 || GEN_IS_HASWELL
assert(b->num_math_dwords == 0);
#endif
/* This memset operates in units of dwords. */
assert(size % 4 == 0);
for (uint32_t i = 0; i < size; i += 4) {
gen_mi_store(b, gen_mi_mem32(__gen_address_offset(dst, i)),
gen_mi_imm(value));
}
}
/* NOTE: On IVB, this function stomps GEN7_3DPRIM_BASE_VERTEX */
static inline void
gen_mi_memcpy(struct gen_mi_builder *b, __gen_address_type dst,
__gen_address_type src, uint32_t size)
{
#if GEN_GEN >= 8 || GEN_IS_HASWELL
assert(b->num_math_dwords == 0);
#endif
/* This memcpy operates in units of dwords. */
assert(size % 4 == 0);
for (uint32_t i = 0; i < size; i += 4) {
struct gen_mi_value dst_val = gen_mi_mem32(__gen_address_offset(dst, i));
struct gen_mi_value src_val = gen_mi_mem32(__gen_address_offset(src, i));
#if GEN_GEN >= 8 || GEN_IS_HASWELL
gen_mi_store(b, dst_val, src_val);
#else
/* IVB does not have a general purpose register for command streamer
* commands. Therefore, we use an alternate temporary register.
*/
struct gen_mi_value tmp_reg = gen_mi_reg32(0x2440); /* GEN7_3DPRIM_BASE_VERTEX */
gen_mi_store(b, tmp_reg, src_val);
gen_mi_store(b, dst_val, tmp_reg);
#endif
}
}
/*
* MI_MATH Section. Only available on Haswell+
*/

View File

@@ -422,6 +422,36 @@ TEST_F(gen_mi_builder_test, mem_reg)
EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
}
TEST_F(gen_mi_builder_test, memset)
{
const unsigned memset_size = 256;
gen_mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
submit_batch();
uint32_t *out_u32 = (uint32_t *)output;
for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++)
EXPECT_EQ(out_u32[i], 0xdeadbeef);
}
TEST_F(gen_mi_builder_test, memcpy)
{
const unsigned memcpy_size = 256;
uint8_t *in_u8 = (uint8_t *)input;
for (unsigned i = 0; i < memcpy_size; i++)
in_u8[i] = i;
gen_mi_memcpy(&b, out_addr(0), in_addr(0), 256);
submit_batch();
uint8_t *out_u8 = (uint8_t *)output;
for (unsigned i = 0; i < memcpy_size; i++)
EXPECT_EQ(out_u8[i], i);
}
/* Start of MI_MATH section */
#if GEN_GEN >= 8 || GEN_IS_HASWELL

View File

@@ -78,13 +78,5 @@ void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address dst, struct anv_address src,
uint32_t size);
void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address dst, struct anv_address src,
uint32_t size);
void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address dst, uint32_t value,
uint32_t size);
void genX(blorp_exec)(struct blorp_batch *batch,
const struct blorp_params *params);

View File

@@ -822,12 +822,35 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size;
#if GEN_GEN == 7
/* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM
* and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
* in-flight when they are issued even if the memory touched is not
* currently active for rendering. The weird bit is that it is not the
* MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
* rendering hangs such that the next stalling command after the
* MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
*
* It is unclear exactly why this hang occurs. Both MI commands come with
* warnings about the 3D pipeline but that doesn't seem to fully explain
* it. My (Jason's) best theory is that it has something to do with the
* fact that we're using a GPU state register as our temporary and that
* something with reading/writing it is causing problems.
*
* In order to work around this issue, we emit a PIPE_CONTROL with the
* command streamer stall bit set.
*/
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
#endif
struct gen_mi_builder b;
gen_mi_builder_init(&b, &cmd_buffer->batch);
if (copy_from_surface_state) {
genX(cmd_buffer_mi_memcpy)(cmd_buffer, entry_addr,
ss_clear_addr, copy_size);
gen_mi_memcpy(&b, entry_addr, ss_clear_addr, copy_size);
} else {
genX(cmd_buffer_mi_memcpy)(cmd_buffer, ss_clear_addr,
entry_addr, copy_size);
gen_mi_memcpy(&b, ss_clear_addr, entry_addr, copy_size);
/* Updating a surface state object may require that the state cache be
* invalidated. From the SKL PRM, Shared Functions -> State -> State

View File

@@ -51,80 +51,6 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
return 1 << MIN2(a_log2, b_log2);
}
void
genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address dst, struct anv_address src,
uint32_t size)
{
/* This memcpy operates in units of dwords. */
assert(size % 4 == 0);
assert(dst.offset % 4 == 0);
assert(src.offset % 4 == 0);
#if GEN_GEN == 7
/* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM
* and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
* in-flight when they are issued even if the memory touched is not
* currently active for rendering. The weird bit is that it is not the
* MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
* rendering hangs such that the next stalling command after the
* MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
*
* It is unclear exactly why this hang occurs. Both MI commands come with
* warnings about the 3D pipeline but that doesn't seem to fully explain
* it. My (Jason's) best theory is that it has something to do with the
* fact that we're using a GPU state register as our temporary and that
* something with reading/writing it is causing problems.
*
* In order to work around this issue, we emit a PIPE_CONTROL with the
* command streamer stall bit set.
*/
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
#endif
for (uint32_t i = 0; i < size; i += 4) {
#if GEN_GEN >= 8
anv_batch_emit(&cmd_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) {
cp.DestinationMemoryAddress = anv_address_add(dst, i);
cp.SourceMemoryAddress = anv_address_add(src, i);
}
#else
/* IVB does not have a general purpose register for command streamer
* commands. Therefore, we use an alternate temporary register.
*/
#define TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) {
load.RegisterAddress = TEMP_REG;
load.MemoryAddress = anv_address_add(src, i);
}
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) {
store.RegisterAddress = TEMP_REG;
store.MemoryAddress = anv_address_add(dst, i);
}
#undef TEMP_REG
#endif
}
return;
}
void
genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address dst, uint32_t value,
uint32_t size)
{
/* This memset operates in units of dwords. */
assert(size % 4 == 0);
assert(dst.offset % 4 == 0);
for (uint32_t i = 0; i < size; i += 4) {
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
sdi.Address = anv_address_add(dst, i);
sdi.ImmediateData = value;
}
}
}
void
genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address dst, struct anv_address src,

View File

@@ -363,14 +363,13 @@ emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
*/
static void
emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
struct anv_query_pool *pool,
struct gen_mi_builder *b, struct anv_query_pool *pool,
uint32_t first_index, uint32_t num_queries)
{
for (uint32_t i = 0; i < num_queries; i++) {
struct anv_address slot_addr =
anv_query_address(pool, first_index + i);
genX(cmd_buffer_mi_memset)(cmd_buffer, anv_address_add(slot_addr, 8),
0, pool->stride - 8);
gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
emit_query_availability(cmd_buffer, slot_addr);
}
}
@@ -574,7 +573,7 @@ void genX(CmdEndQueryIndexedEXT)(
const uint32_t num_queries =
util_bitcount(cmd_buffer->state.subpass->view_mask);
if (num_queries > 1)
emit_zero_queries(cmd_buffer, pool, query + 1, num_queries - 1);
emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
}
}
@@ -628,7 +627,7 @@ void genX(CmdWriteTimestamp)(
const uint32_t num_queries =
util_bitcount(cmd_buffer->state.subpass->view_mask);
if (num_queries > 1)
emit_zero_queries(cmd_buffer, pool, query + 1, num_queries - 1);
emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
}
}