intel/fs: Implement nir_intrinsic_{load,store}_global_block_intel

Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7448>
This commit is contained in:
Caio Marcelo de Oliveira Filho
2020-10-05 14:48:44 -07:00
committed by Marge Bot
parent d3d2b73fa3
commit 296137df53

View File

@@ -4072,6 +4072,36 @@ fs_visitor::swizzle_nir_scratch_addr(const brw::fs_builder &bld,
return addr;
}
static unsigned
choose_oword_block_size_dwords(unsigned dwords)
{
unsigned block;
if (dwords >= 32) {
block = 32;
} else if (dwords >= 16) {
block = 16;
} else {
block = 8;
}
assert(block <= dwords);
return block;
}
static void
increment_a64_address(const fs_builder &bld, fs_reg address, uint32_t v)
{
if (bld.shader->devinfo->has_64bit_int) {
bld.ADD(address, address, brw_imm_ud(v));
} else {
fs_reg low = retype(address, BRW_REGISTER_TYPE_UD);
fs_reg high = offset(low, bld, 1);
/* Add low and if that overflows, add carry to high. */
bld.ADD(low, low, brw_imm_ud(v))->conditional_mod = BRW_CONDITIONAL_O;
bld.ADD(high, high, brw_imm_ud(0x1))->predicate = BRW_PREDICATE_NORMAL;
}
}
void
fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr)
{
@@ -5326,6 +5356,71 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
break;
}
case nir_intrinsic_load_global_block_intel: {
assert(nir_dest_bit_size(instr->dest) == 32);
fs_reg address = bld.emit_uniformize(get_nir_src(instr->src[0]));
const fs_builder ubld1 = bld.exec_all().group(1, 0);
const fs_builder ubld8 = bld.exec_all().group(8, 0);
const fs_builder ubld16 = bld.exec_all().group(16, 0);
const unsigned total = instr->num_components * dispatch_width;
unsigned loaded = 0;
while (loaded < total) {
const unsigned block =
choose_oword_block_size_dwords(total - loaded);
const unsigned block_bytes = block * 4;
const fs_builder &ubld = block == 8 ? ubld8 : ubld16;
ubld.emit(SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
retype(byte_offset(dest, loaded * 4), BRW_REGISTER_TYPE_UD),
address,
fs_reg(), /* No source data */
brw_imm_ud(block))->size_written = block_bytes;
increment_a64_address(ubld1, address, block_bytes);
loaded += block;
}
assert(loaded == total);
break;
}
case nir_intrinsic_store_global_block_intel: {
assert(nir_src_bit_size(instr->src[0]) == 32);
fs_reg address = bld.emit_uniformize(get_nir_src(instr->src[1]));
fs_reg src = get_nir_src(instr->src[0]);
const fs_builder ubld1 = bld.exec_all().group(1, 0);
const fs_builder ubld8 = bld.exec_all().group(8, 0);
const fs_builder ubld16 = bld.exec_all().group(16, 0);
const unsigned total = instr->num_components * dispatch_width;
unsigned written = 0;
while (written < total) {
const unsigned block =
choose_oword_block_size_dwords(total - written);
const fs_builder &ubld = block == 8 ? ubld8 : ubld16;
ubld.emit(SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL,
fs_reg(),
address,
retype(byte_offset(src, written * 4), BRW_REGISTER_TYPE_UD),
brw_imm_ud(block));
const unsigned block_bytes = block * 4;
increment_a64_address(ubld1, address, block_bytes);
written += block;
}
assert(written == total);
break;
}
default:
unreachable("unknown intrinsic");
}