anv: Move mi_memcpy and mi_memset to gen_mi_builder
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
This commit is contained in:
@@ -450,6 +450,51 @@ gen_mi_store(struct gen_mi_builder *b,
|
|||||||
gen_mi_value_unref(b, dst);
|
gen_mi_value_unref(b, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
gen_mi_memset(struct gen_mi_builder *b, __gen_address_type dst,
|
||||||
|
uint32_t value, uint32_t size)
|
||||||
|
{
|
||||||
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
|
assert(b->num_math_dwords == 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* This memset operates in units of dwords. */
|
||||||
|
assert(size % 4 == 0);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < size; i += 4) {
|
||||||
|
gen_mi_store(b, gen_mi_mem32(__gen_address_offset(dst, i)),
|
||||||
|
gen_mi_imm(value));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* NOTE: On IVB, this function stomps GEN7_3DPRIM_BASE_VERTEX */
|
||||||
|
static inline void
|
||||||
|
gen_mi_memcpy(struct gen_mi_builder *b, __gen_address_type dst,
|
||||||
|
__gen_address_type src, uint32_t size)
|
||||||
|
{
|
||||||
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
|
assert(b->num_math_dwords == 0);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* This memcpy operates in units of dwords. */
|
||||||
|
assert(size % 4 == 0);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < size; i += 4) {
|
||||||
|
struct gen_mi_value dst_val = gen_mi_mem32(__gen_address_offset(dst, i));
|
||||||
|
struct gen_mi_value src_val = gen_mi_mem32(__gen_address_offset(src, i));
|
||||||
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
|
gen_mi_store(b, dst_val, src_val);
|
||||||
|
#else
|
||||||
|
/* IVB does not have a general purpose register for command streamer
|
||||||
|
* commands. Therefore, we use an alternate temporary register.
|
||||||
|
*/
|
||||||
|
struct gen_mi_value tmp_reg = gen_mi_reg32(0x2440); /* GEN7_3DPRIM_BASE_VERTEX */
|
||||||
|
gen_mi_store(b, tmp_reg, src_val);
|
||||||
|
gen_mi_store(b, dst_val, tmp_reg);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* MI_MATH Section. Only available on Haswell+
|
* MI_MATH Section. Only available on Haswell+
|
||||||
*/
|
*/
|
||||||
|
@@ -422,6 +422,36 @@ TEST_F(gen_mi_builder_test, mem_reg)
|
|||||||
EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
|
EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(gen_mi_builder_test, memset)
|
||||||
|
{
|
||||||
|
const unsigned memset_size = 256;
|
||||||
|
|
||||||
|
gen_mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
|
||||||
|
|
||||||
|
submit_batch();
|
||||||
|
|
||||||
|
uint32_t *out_u32 = (uint32_t *)output;
|
||||||
|
for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++)
|
||||||
|
EXPECT_EQ(out_u32[i], 0xdeadbeef);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(gen_mi_builder_test, memcpy)
|
||||||
|
{
|
||||||
|
const unsigned memcpy_size = 256;
|
||||||
|
|
||||||
|
uint8_t *in_u8 = (uint8_t *)input;
|
||||||
|
for (unsigned i = 0; i < memcpy_size; i++)
|
||||||
|
in_u8[i] = i;
|
||||||
|
|
||||||
|
gen_mi_memcpy(&b, out_addr(0), in_addr(0), 256);
|
||||||
|
|
||||||
|
submit_batch();
|
||||||
|
|
||||||
|
uint8_t *out_u8 = (uint8_t *)output;
|
||||||
|
for (unsigned i = 0; i < memcpy_size; i++)
|
||||||
|
EXPECT_EQ(out_u8[i], i);
|
||||||
|
}
|
||||||
|
|
||||||
/* Start of MI_MATH section */
|
/* Start of MI_MATH section */
|
||||||
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||||
|
|
||||||
|
@@ -78,13 +78,5 @@ void genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
struct anv_address dst, struct anv_address src,
|
struct anv_address dst, struct anv_address src,
|
||||||
uint32_t size);
|
uint32_t size);
|
||||||
|
|
||||||
void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
|
||||||
struct anv_address dst, struct anv_address src,
|
|
||||||
uint32_t size);
|
|
||||||
|
|
||||||
void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
|
|
||||||
struct anv_address dst, uint32_t value,
|
|
||||||
uint32_t size);
|
|
||||||
|
|
||||||
void genX(blorp_exec)(struct blorp_batch *batch,
|
void genX(blorp_exec)(struct blorp_batch *batch,
|
||||||
const struct blorp_params *params);
|
const struct blorp_params *params);
|
||||||
|
@@ -822,12 +822,35 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
|
anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
|
||||||
unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size;
|
unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size;
|
||||||
|
|
||||||
|
#if GEN_GEN == 7
|
||||||
|
/* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM
|
||||||
|
* and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
|
||||||
|
* in-flight when they are issued even if the memory touched is not
|
||||||
|
* currently active for rendering. The weird bit is that it is not the
|
||||||
|
* MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
|
||||||
|
* rendering hangs such that the next stalling command after the
|
||||||
|
* MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
|
||||||
|
*
|
||||||
|
* It is unclear exactly why this hang occurs. Both MI commands come with
|
||||||
|
* warnings about the 3D pipeline but that doesn't seem to fully explain
|
||||||
|
* it. My (Jason's) best theory is that it has something to do with the
|
||||||
|
* fact that we're using a GPU state register as our temporary and that
|
||||||
|
* something with reading/writing it is causing problems.
|
||||||
|
*
|
||||||
|
* In order to work around this issue, we emit a PIPE_CONTROL with the
|
||||||
|
* command streamer stall bit set.
|
||||||
|
*/
|
||||||
|
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
|
||||||
|
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct gen_mi_builder b;
|
||||||
|
gen_mi_builder_init(&b, &cmd_buffer->batch);
|
||||||
|
|
||||||
if (copy_from_surface_state) {
|
if (copy_from_surface_state) {
|
||||||
genX(cmd_buffer_mi_memcpy)(cmd_buffer, entry_addr,
|
gen_mi_memcpy(&b, entry_addr, ss_clear_addr, copy_size);
|
||||||
ss_clear_addr, copy_size);
|
|
||||||
} else {
|
} else {
|
||||||
genX(cmd_buffer_mi_memcpy)(cmd_buffer, ss_clear_addr,
|
gen_mi_memcpy(&b, ss_clear_addr, entry_addr, copy_size);
|
||||||
entry_addr, copy_size);
|
|
||||||
|
|
||||||
/* Updating a surface state object may require that the state cache be
|
/* Updating a surface state object may require that the state cache be
|
||||||
* invalidated. From the SKL PRM, Shared Functions -> State -> State
|
* invalidated. From the SKL PRM, Shared Functions -> State -> State
|
||||||
|
@@ -51,80 +51,6 @@ gcd_pow2_u64(uint64_t a, uint64_t b)
|
|||||||
return 1 << MIN2(a_log2, b_log2);
|
return 1 << MIN2(a_log2, b_log2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
|
||||||
struct anv_address dst, struct anv_address src,
|
|
||||||
uint32_t size)
|
|
||||||
{
|
|
||||||
/* This memcpy operates in units of dwords. */
|
|
||||||
assert(size % 4 == 0);
|
|
||||||
assert(dst.offset % 4 == 0);
|
|
||||||
assert(src.offset % 4 == 0);
|
|
||||||
|
|
||||||
#if GEN_GEN == 7
|
|
||||||
/* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM
|
|
||||||
* and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
|
|
||||||
* in-flight when they are issued even if the memory touched is not
|
|
||||||
* currently active for rendering. The weird bit is that it is not the
|
|
||||||
* MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
|
|
||||||
* rendering hangs such that the next stalling command after the
|
|
||||||
* MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
|
|
||||||
*
|
|
||||||
* It is unclear exactly why this hang occurs. Both MI commands come with
|
|
||||||
* warnings about the 3D pipeline but that doesn't seem to fully explain
|
|
||||||
* it. My (Jason's) best theory is that it has something to do with the
|
|
||||||
* fact that we're using a GPU state register as our temporary and that
|
|
||||||
* something with reading/writing it is causing problems.
|
|
||||||
*
|
|
||||||
* In order to work around this issue, we emit a PIPE_CONTROL with the
|
|
||||||
* command streamer stall bit set.
|
|
||||||
*/
|
|
||||||
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
|
|
||||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < size; i += 4) {
|
|
||||||
#if GEN_GEN >= 8
|
|
||||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) {
|
|
||||||
cp.DestinationMemoryAddress = anv_address_add(dst, i);
|
|
||||||
cp.SourceMemoryAddress = anv_address_add(src, i);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
/* IVB does not have a general purpose register for command streamer
|
|
||||||
* commands. Therefore, we use an alternate temporary register.
|
|
||||||
*/
|
|
||||||
#define TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */
|
|
||||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) {
|
|
||||||
load.RegisterAddress = TEMP_REG;
|
|
||||||
load.MemoryAddress = anv_address_add(src, i);
|
|
||||||
}
|
|
||||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) {
|
|
||||||
store.RegisterAddress = TEMP_REG;
|
|
||||||
store.MemoryAddress = anv_address_add(dst, i);
|
|
||||||
}
|
|
||||||
#undef TEMP_REG
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
|
||||||
genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
|
|
||||||
struct anv_address dst, uint32_t value,
|
|
||||||
uint32_t size)
|
|
||||||
{
|
|
||||||
/* This memset operates in units of dwords. */
|
|
||||||
assert(size % 4 == 0);
|
|
||||||
assert(dst.offset % 4 == 0);
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < size; i += 4) {
|
|
||||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
|
|
||||||
sdi.Address = anv_address_add(dst, i);
|
|
||||||
sdi.ImmediateData = value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
|
||||||
struct anv_address dst, struct anv_address src,
|
struct anv_address dst, struct anv_address src,
|
||||||
|
@@ -363,14 +363,13 @@ emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
|
|||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
|
emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
|
||||||
struct anv_query_pool *pool,
|
struct gen_mi_builder *b, struct anv_query_pool *pool,
|
||||||
uint32_t first_index, uint32_t num_queries)
|
uint32_t first_index, uint32_t num_queries)
|
||||||
{
|
{
|
||||||
for (uint32_t i = 0; i < num_queries; i++) {
|
for (uint32_t i = 0; i < num_queries; i++) {
|
||||||
struct anv_address slot_addr =
|
struct anv_address slot_addr =
|
||||||
anv_query_address(pool, first_index + i);
|
anv_query_address(pool, first_index + i);
|
||||||
genX(cmd_buffer_mi_memset)(cmd_buffer, anv_address_add(slot_addr, 8),
|
gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
|
||||||
0, pool->stride - 8);
|
|
||||||
emit_query_availability(cmd_buffer, slot_addr);
|
emit_query_availability(cmd_buffer, slot_addr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -574,7 +573,7 @@ void genX(CmdEndQueryIndexedEXT)(
|
|||||||
const uint32_t num_queries =
|
const uint32_t num_queries =
|
||||||
util_bitcount(cmd_buffer->state.subpass->view_mask);
|
util_bitcount(cmd_buffer->state.subpass->view_mask);
|
||||||
if (num_queries > 1)
|
if (num_queries > 1)
|
||||||
emit_zero_queries(cmd_buffer, pool, query + 1, num_queries - 1);
|
emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -628,7 +627,7 @@ void genX(CmdWriteTimestamp)(
|
|||||||
const uint32_t num_queries =
|
const uint32_t num_queries =
|
||||||
util_bitcount(cmd_buffer->state.subpass->view_mask);
|
util_bitcount(cmd_buffer->state.subpass->view_mask);
|
||||||
if (num_queries > 1)
|
if (num_queries > 1)
|
||||||
emit_zero_queries(cmd_buffer, pool, query + 1, num_queries - 1);
|
emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user