From 75209d5bd1f6e93cd52568d87d3ee84f516eec56 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 6 Aug 2020 15:45:45 -0500 Subject: [PATCH] intel/fs: Add and implement intel-specific ray-tracing intrinsics Reviewed-by: Caio Marcelo de Oliveira Filho Part-of: --- src/compiler/nir/nir_intrinsics.py | 5 ++ src/intel/compiler/brw_disasm.c | 9 ++++ src/intel/compiler/brw_eu.h | 18 +++++++ src/intel/compiler/brw_eu_defines.h | 13 +++++ src/intel/compiler/brw_fs.cpp | 52 +++++++++++++++++++ src/intel/compiler/brw_fs_nir.cpp | 22 ++++++++ src/intel/compiler/brw_ir_performance.cpp | 1 + .../compiler/brw_schedule_instructions.cpp | 1 + src/intel/compiler/brw_shader.cpp | 5 ++ 9 files changed, 126 insertions(+) diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 64824fa9c4c..63618d3d3ab 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -1103,6 +1103,11 @@ intrinsic("btd_resume_intel", indices=[BASE, RANGE]) # src[] = { } intrinsic("btd_retire_intel") +# Intel-specific ray-tracing intrinsics +intrinsic("trace_ray_initial_intel") +intrinsic("trace_ray_commit_intel") +intrinsic("trace_ray_continue_intel") + # System values used for ray-tracing on Intel system_value("ray_base_mem_addr_intel", 1, bit_sizes=[64]) system_value("ray_hw_stack_size_intel", 1) diff --git a/src/intel/compiler/brw_disasm.c b/src/intel/compiler/brw_disasm.c index 375c51abf3c..45024231d14 100644 --- a/src/intel/compiler/brw_disasm.c +++ b/src/intel/compiler/brw_disasm.c @@ -315,6 +315,7 @@ static const char *const gen6_sfid[16] = { [GEN7_SFID_PIXEL_INTERPOLATOR] = "pixel interp", [HSW_SFID_DATAPORT_DATA_CACHE_1] = "dp data 1", [HSW_SFID_CRE] = "cre", + [GEN_RT_SFID_RAY_TRACE_ACCELERATOR] = "rt accel", }; static const char *const gen7_gateway_subfuncid[8] = { @@ -2102,6 +2103,14 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo, } /* FALLTHROUGH */ + case GEN_RT_SFID_RAY_TRACE_ACCELERATOR: + if (devinfo->has_ray_tracing) { + format(file, " SIMD%d,", + brw_rt_trace_ray_desc_exec_size(devinfo, imm_desc)); + break; + } + /* FALLTHROUGH */ + default: format(file, "unsupported shared function ID %d", sfid); break; diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index a70e35fc1ea..fbcb9a03060 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -1050,6 +1050,24 @@ brw_btd_spawn_exec_size(const struct gen_device_info *devinfo, return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8)); } +static inline uint32_t +brw_rt_trace_ray_desc(const struct gen_device_info *devinfo, + unsigned exec_size) +{ + assert(devinfo->has_ray_tracing); + + return SET_BITS(0, 19, 19) | /* No header */ + SET_BITS(0, 17, 14) | /* Message type */ + SET_BITS(brw_mdc_sm2(exec_size), 8, 8); +} + +static inline uint32_t +brw_rt_trace_ray_desc_exec_size(const struct gen_device_info *devinfo, + uint32_t desc) +{ + return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8)); +} + /** * Construct a message descriptor immediate with the specified pixel * interpolator function controls. diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 8bba0dd5380..e518c8b0439 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -797,6 +797,8 @@ enum opcode { SHADER_OPCODE_GET_DSS_ID, SHADER_OPCODE_BTD_SPAWN_LOGICAL, SHADER_OPCODE_BTD_RETIRE_LOGICAL, + + RT_OPCODE_TRACE_RAY_LOGICAL, }; enum brw_urb_write_flags { @@ -1236,6 +1238,7 @@ enum brw_message_target { HSW_SFID_CRE = 13, GEN_RT_SFID_BINDLESS_THREAD_DISPATCH = 7, + GEN_RT_SFID_RAY_TRACE_ACCELERATOR = 8, }; #define GEN7_MESSAGE_TARGET_DP_DATA_CACHE 10 @@ -1630,4 +1633,14 @@ enum PACKED brw_rnd_mode { #define GEN_RT_BTD_MESSAGE_SPAWN 1 +#define GEN_RT_TRACE_RAY_INITAL 0 +#define GEN_RT_TRACE_RAY_INSTANCE 1 +#define GEN_RT_TRACE_RAY_COMMIT 2 +#define GEN_RT_TRACE_RAY_CONTINUE 3 + +#define GEN_RT_BTD_SHADER_TYPE_ANY_HIT 0 +#define GEN_RT_BTD_SHADER_TYPE_CLOSEST_HIT 1 +#define GEN_RT_BTD_SHADER_TYPE_MISS 2 +#define GEN_RT_BTD_SHADER_TYPE_INTERSECTION 3 + #endif /* BRW_EU_DEFINES_H */ diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 34d767d5c90..ea76a21fa17 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6103,6 +6103,54 @@ lower_btd_logical_send(const fs_builder &bld, fs_inst *inst) inst->src[3] = payload; } +static void +lower_trace_ray_logical_send(const fs_builder &bld, fs_inst *inst) +{ + const gen_device_info *devinfo = bld.shader->devinfo; + const fs_reg &bvh_level = inst->src[0]; + assert(inst->src[1].file == BRW_IMMEDIATE_VALUE); + const uint32_t trace_ray_control = inst->src[1].ud; + + const unsigned mlen = 1; + const fs_builder ubld = bld.exec_all().group(8, 0); + fs_reg header = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.MOV(header, brw_imm_ud(0)); + ubld.group(2, 0).MOV(header, + retype(brw_vec2_grf(2, 0), BRW_REGISTER_TYPE_UD)); + /* TODO: Bit 128 is ray_query */ + + const unsigned ex_mlen = inst->exec_size / 8; + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD); + const uint32_t trc_bits = SET_BITS(trace_ray_control, 9, 8); + if (bvh_level.file == BRW_IMMEDIATE_VALUE) { + bld.MOV(payload, brw_imm_ud(trc_bits | (bvh_level.ud & 0x7))); + } else { + bld.AND(payload, bvh_level, brw_imm_ud(0x7)); + if (trc_bits != 0) + bld.OR(payload, payload, brw_imm_ud(trc_bits)); + } + bld.AND(subscript(payload, BRW_REGISTER_TYPE_UW, 1), + retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UW), + brw_imm_uw(0x7ff)); + + /* Update the original instruction. */ + inst->opcode = SHADER_OPCODE_SEND; + inst->mlen = mlen; + inst->ex_mlen = ex_mlen; + inst->header_size = 0; /* HW docs require has_header = false */ + inst->send_has_side_effects = true; + inst->send_is_volatile = false; + + /* Set up SFID and descriptors */ + inst->sfid = GEN_RT_SFID_RAY_TRACE_ACCELERATOR; + inst->desc = brw_rt_trace_ray_desc(devinfo, inst->exec_size); + inst->resize_sources(4); + inst->src[0] = brw_imm_ud(0); /* desc */ + inst->src[1] = brw_imm_ud(0); /* ex_desc */ + inst->src[2] = header; + inst->src[3] = payload; +} + bool fs_visitor::lower_logical_sends() { @@ -6252,6 +6300,10 @@ fs_visitor::lower_logical_sends() lower_btd_logical_send(ibld, inst); break; + case RT_OPCODE_TRACE_RAY_LOGICAL: + lower_trace_ray_logical_send(ibld, inst); + break; + default: continue; } diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 243e4793673..bb9c6f2cb0d 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -24,6 +24,7 @@ #include "compiler/glsl/ir.h" #include "brw_fs.h" #include "brw_nir.h" +#include "brw_rt.h" #include "brw_eu.h" #include "nir_search_helpers.h" #include "util/u_math.h" @@ -3898,6 +3899,27 @@ fs_visitor::nir_emit_bs_intrinsic(const fs_builder &bld, bld.MOV(dest, retype(brw_vec1_grf(2, 2), dest.type)); break; + case nir_intrinsic_trace_ray_initial_intel: + bld.emit(RT_OPCODE_TRACE_RAY_LOGICAL, + bld.null_reg_ud(), + brw_imm_ud(BRW_RT_BVH_LEVEL_WORLD), + brw_imm_ud(GEN_RT_TRACE_RAY_INITAL)); + break; + + case nir_intrinsic_trace_ray_commit_intel: + bld.emit(RT_OPCODE_TRACE_RAY_LOGICAL, + bld.null_reg_ud(), + brw_imm_ud(BRW_RT_BVH_LEVEL_OBJECT), + brw_imm_ud(GEN_RT_TRACE_RAY_COMMIT)); + break; + + case nir_intrinsic_trace_ray_continue_intel: + bld.emit(RT_OPCODE_TRACE_RAY_LOGICAL, + bld.null_reg_ud(), + brw_imm_ud(BRW_RT_BVH_LEVEL_OBJECT), + brw_imm_ud(GEN_RT_TRACE_RAY_CONTINUE)); + break; + default: nir_emit_intrinsic(bld, instr); break; diff --git a/src/intel/compiler/brw_ir_performance.cpp b/src/intel/compiler/brw_ir_performance.cpp index 98049449c12..6129dd4da61 100644 --- a/src/intel/compiler/brw_ir_performance.cpp +++ b/src/intel/compiler/brw_ir_performance.cpp @@ -1089,6 +1089,7 @@ namespace { } case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH: + case GEN_RT_SFID_RAY_TRACE_ACCELERATOR: return calculate_desc(info, unit_spawner, 2, 0, 0, 0 /* XXX */, 0, 10 /* XXX */, 0, 0, 0, 0, 0); diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 38f74a7988f..db6f6ac36b1 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -525,6 +525,7 @@ schedule_node::set_latency_gen7(bool is_haswell) break; case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH: + case GEN_RT_SFID_RAY_TRACE_ACCELERATOR: /* TODO. * * We'll assume for the moment that this is pretty quick as it diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 11da2825500..f9ebf123fd1 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -543,6 +543,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case TES_OPCODE_GET_PRIMITIVE_ID: return "tes_get_primitive_id"; + case RT_OPCODE_TRACE_RAY_LOGICAL: + return "rt_trace_ray_logical"; + case SHADER_OPCODE_RND_MODE: return "rnd_mode"; case SHADER_OPCODE_FLOAT_CONTROL_MODE: @@ -1112,6 +1115,8 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL: case SHADER_OPCODE_BTD_SPAWN_LOGICAL: case SHADER_OPCODE_BTD_RETIRE_LOGICAL: + case RT_OPCODE_TRACE_RAY_LOGICAL: + case FS_OPCODE_DISCARD_JUMP: return true; default: return eot;