diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index df9f22f0607..d21c13ed978 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -945,6 +945,10 @@ load("global_ir3", [2, 1], indices=[ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN # Note that this doesn't actually turn into a HW instruction. intrinsic("bindless_resource_ir3", [1], dest_comp=1, indices=[DESC_SET], flags=[CAN_ELIMINATE, CAN_REORDER]) +# DXIL specific intrinsics +# src[] = { index, 16-byte-based-offset } +load("ubo_dxil", [1, 1], [], [CAN_ELIMINATE]) + # Intrinsics used by the Midgard/Bifrost blend pipeline. These are defined # within a blend shader to read/write the raw value from the tile buffer, # without applying any format conversion in the process. If the shader needs diff --git a/src/microsoft/compiler/dxil_nir.c b/src/microsoft/compiler/dxil_nir.c new file mode 100644 index 00000000000..a79c8f57ca5 --- /dev/null +++ b/src/microsoft/compiler/dxil_nir.c @@ -0,0 +1,156 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "dxil_nir.h" + +#include "nir_builder.h" +#include "nir_deref.h" +#include "util/u_math.h" + +static void +extract_comps_from_vec32(nir_builder *b, nir_ssa_def *vec32, + unsigned dst_bit_size, + nir_ssa_def **dst_comps, + unsigned num_dst_comps) +{ + unsigned step = DIV_ROUND_UP(dst_bit_size, 32); + unsigned comps_per32b = 32 / dst_bit_size; + nir_ssa_def *tmp; + + for (unsigned i = 0; i < vec32->num_components; i += step) { + switch (dst_bit_size) { + case 64: + tmp = nir_pack_64_2x32_split(b, nir_channel(b, vec32, i), + nir_channel(b, vec32, i + 1)); + dst_comps[i / 2] = tmp; + break; + case 32: + dst_comps[i] = nir_channel(b, vec32, i); + break; + case 16: + case 8: + unsigned dst_offs = i * comps_per32b; + + tmp = nir_unpack_bits(b, nir_channel(b, vec32, i), dst_bit_size); + for (unsigned j = 0; j < comps_per32b && dst_offs + j < num_dst_comps; j++) + dst_comps[dst_offs + j] = nir_channel(b, tmp, j); + + break; + } + } +} + +static nir_ssa_def * +ubo_load_select_32b_comps(nir_builder *b, nir_ssa_def *vec32, + nir_ssa_def *offset, unsigned num_bytes) +{ + assert(num_bytes == 16 || num_bytes == 12 || num_bytes == 8 || + num_bytes == 4 || num_bytes == 3 || num_bytes == 2 || + num_bytes == 1); + assert(vec32->num_components == 4); + + /* 16 and 12 byte types are always aligned on 16 bytes. */ + if (num_bytes > 8) + return vec32; + + nir_ssa_def *comps[4]; + nir_ssa_def *cond; + + for (unsigned i = 0; i < 4; i++) + comps[i] = nir_channel(b, vec32, i); + + /* If we have 8bytes or less to load, select which half the vec4 should + * be used. + */ + cond = nir_ine(b, nir_iand(b, offset, nir_imm_int(b, 0x8)), + nir_imm_int(b, 0)); + + comps[0] = nir_bcsel(b, cond, comps[2], comps[0]); + comps[1] = nir_bcsel(b, cond, comps[3], comps[1]); + + /* Thanks to the CL alignment constraints, if we want 8 bytes we're done. */ + if (num_bytes == 8) + return nir_vec(b, comps, 2); + + /* 4 bytes or less needed, select which of the 32bit component should be + * used and return it. The sub-32bit split is handled in + * extract_comps_from_vec32(). + */ + cond = nir_ine(b, nir_iand(b, offset, nir_imm_int(b, 0x4)), + nir_imm_int(b, 0)); + return nir_bcsel(b, cond, comps[1], comps[0]); +} + +nir_ssa_def * +build_load_ubo_dxil(nir_builder *b, nir_ssa_def *buffer, + nir_ssa_def *offset, unsigned num_components, + unsigned bit_size) +{ + nir_ssa_def *idx = nir_ushr(b, offset, nir_imm_int(b, 4)); + nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS]; + unsigned num_bits = num_components * bit_size; + unsigned comp_idx = 0; + + /* We need to split loads in 16byte chunks because that's the + * granularity of cBufferLoadLegacy(). + */ + for (unsigned i = 0; i < num_bits; i += (16 * 8)) { + /* For each 16byte chunk (or smaller) we generate a 32bit ubo vec + * load. + */ + unsigned subload_num_bits = MIN2(num_bits - i, 16 * 8); + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_load_ubo_dxil); + + load->num_components = 4; + load->src[0] = nir_src_for_ssa(buffer); + load->src[1] = nir_src_for_ssa(nir_iadd(b, idx, nir_imm_int(b, i / (16 * 8)))); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, + 32, NULL); + nir_builder_instr_insert(b, &load->instr); + + nir_ssa_def *vec32 = &load->dest.ssa; + + /* First re-arrange the vec32 to account for intra 16-byte offset. */ + vec32 = ubo_load_select_32b_comps(b, vec32, offset, subload_num_bits / 8); + + /* If we have 2 bytes or less to load we need to adjust the u32 value so + * we can always extract the LSB. + */ + if (subload_num_bits <= 16) { + nir_ssa_def *shift = nir_imul(b, nir_iand(b, offset, + nir_imm_int(b, 3)), + nir_imm_int(b, 8)); + vec32 = nir_ushr(b, vec32, shift); + } + + /* And now comes the pack/unpack step to match the original type. */ + extract_comps_from_vec32(b, vec32, bit_size, &comps[comp_idx], + subload_num_bits / bit_size); + comp_idx += subload_num_bits / bit_size; + } + + assert(comp_idx == num_components); + return nir_vec(b, comps, num_components); +} diff --git a/src/microsoft/compiler/dxil_nir.h b/src/microsoft/compiler/dxil_nir.h new file mode 100644 index 00000000000..49de8eb9934 --- /dev/null +++ b/src/microsoft/compiler/dxil_nir.h @@ -0,0 +1,40 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef DXIL_NIR_H +#define DXIL_NIR_H + +#include +#include "nir.h" +#include "nir_builder.h" + +bool dxil_nir_lower_8bit_conv(nir_shader *shader); +bool dxil_nir_lower_x2b(nir_shader *shader); +bool dxil_nir_lower_inot(nir_shader *shader); + +nir_ssa_def * +build_load_ubo_dxil(nir_builder *b, nir_ssa_def *buffer, + nir_ssa_def *offset, unsigned num_components, + unsigned bit_size); + +#endif /* DXIL_NIR_H */ diff --git a/src/microsoft/compiler/dxil_nir_algebraic.py b/src/microsoft/compiler/dxil_nir_algebraic.py new file mode 100644 index 00000000000..cb78d62ea44 --- /dev/null +++ b/src/microsoft/compiler/dxil_nir_algebraic.py @@ -0,0 +1,133 @@ +# +# Copyright (C) 2020 Microsoft Corporation +# +# Copyright (C) 2018 Alyssa Rosenzweig +# +# Copyright (C) 2016 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +import argparse +import sys +import math + +a = 'a' + +# The nir_lower_bit_size() pass gets rid of all 8bit ALUs but insert new u2u8 +# and i2i8 operations to convert the result back to the original type after the +# arithmetic operation is done. Those u2u8 and i2i8 operations, as any other +# 8bit operations, are not supported by DXIL and needs to be discarded. The +# dxil_nir_lower_8bit_conv() pass is here for that. +# Similarly, some hardware doesn't support 16bit values + +no_8bit_conv = [] +no_16bit_conv = [] + +def remove_unsupported_casts(arr, bit_size, mask, max_unsigned_float, min_signed_float, max_signed_float): + for outer_op_type in ('u2u', 'i2i', 'u2f', 'i2f'): + for outer_op_sz in (16, 32, 64): + if outer_op_sz == bit_size: + continue + outer_op = outer_op_type + str(int(outer_op_sz)) + for inner_op_type in ('u2u', 'i2i'): + inner_op = inner_op_type + str(int(bit_size)) + for src_sz in (16, 32, 64): + if (src_sz == bit_size): + continue + # Coming from integral, truncate appropriately + orig_seq = (outer_op, (inner_op, 'a@' + str(int(src_sz)))) + if (outer_op[0] == 'u'): + new_seq = ('iand', a, mask) + else: + shift = src_sz - bit_size + new_seq = ('ishr', ('ishl', a, shift), shift) + # Make sure the destination is the right type/size + if outer_op_sz != src_sz or outer_op[2] != inner_op[0]: + new_seq = (outer_op, new_seq) + arr += [(orig_seq, new_seq)] + for inner_op_type in ('f2u', 'f2i'): + inner_op = inner_op_type + str(int(bit_size)) + if (outer_op[2] == 'f'): + # From float and to float, just truncate via min/max, and ensure the right float size + for src_sz in (16, 32, 64): + if (src_sz == bit_size): + continue + orig_seq = (outer_op, (inner_op, 'a@' + str(int(src_sz)))) + if (outer_op[0] == 'u'): + new_seq = ('fmin', ('fmax', a, 0.0), max_unsigned_float) + else: + new_seq = ('fmin', ('fmax', a, min_signed_float), max_signed_float) + if outer_op_sz != src_sz: + new_seq = ('f2f' + str(int(outer_op_sz)), new_seq) + arr += [(orig_seq, new_seq)] + else: + # From float to integral, convert to integral type first, then truncate + orig_seq = (outer_op, (inner_op, a)) + float_conv = ('f2' + inner_op[2] + str(int(outer_op_sz)), a) + if (outer_op[0] == 'u'): + new_seq = ('iand', float_conv, mask) + else: + shift = outer_op_sz - bit_size + new_seq = ('ishr', ('ishl', float_conv, shift), shift) + arr += [(orig_seq, new_seq)] + +remove_unsupported_casts(no_8bit_conv, 8, 0xff, 255.0, -128.0, 127.0) +remove_unsupported_casts(no_16bit_conv, 16, 0xffff, 65535.0, -32768.0, 32767.0) + +lower_x2b = [ + (('b2b32', 'a'), ('b2i32', 'a')), + (('b2b1', 'a'), ('i2b1', 'a')), + (('i2b1', 'a'), ('ine', a, 0)), + (('f2b1', 'a'), ('fneu', a, 0)), +] + +no_16bit_conv += [ + (('f2f32', ('u2u16', 'a@32')), ('unpack_half_2x16_split_x', 'a')), + (('u2u32', ('f2f16_rtz', 'a@32')), ('pack_half_2x16_split', 'a', 0)), +] + +lower_inot = [ + (('inot', a), ('ixor', a, -1)), +] + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-p', '--import-path', required=True) + args = parser.parse_args() + sys.path.insert(0, args.import_path) + run() + + +def run(): + import nir_algebraic # pylint: disable=import-error + + print('#include "dxil_nir.h"') + + print(nir_algebraic.AlgebraicPass("dxil_nir_lower_8bit_conv", + no_8bit_conv).render()) + print(nir_algebraic.AlgebraicPass("dxil_nir_lower_16bit_conv", + no_16bit_conv).render()) + print(nir_algebraic.AlgebraicPass("dxil_nir_lower_x2b", + lower_x2b).render()) + print(nir_algebraic.AlgebraicPass("dxil_nir_lower_inot", + lower_inot).render()) + +if __name__ == '__main__': + main() diff --git a/src/microsoft/compiler/dxil_nir_lower_int_samplers.c b/src/microsoft/compiler/dxil_nir_lower_int_samplers.c new file mode 100644 index 00000000000..bc420b3fd39 --- /dev/null +++ b/src/microsoft/compiler/dxil_nir_lower_int_samplers.c @@ -0,0 +1,555 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "dxil_nir_lower_int_samplers.h" +#include "nir_builder.h" +#include "nir_builtin_builder.h" + +bool +lower_sample_to_txf_for_integer_tex_filter(const nir_instr *instr, + UNUSED const void *_options) +{ + if (instr->type != nir_instr_type_tex) + return false; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (tex->op != nir_texop_tex && + tex->op != nir_texop_txb && + tex->op != nir_texop_txl && + tex->op != nir_texop_txd) + return false; + + return (tex->dest_type & (nir_type_int | nir_type_uint)); +} + +nir_ssa_def * +dx_get_texture_lod(nir_builder *b, nir_tex_instr *tex) +{ + nir_tex_instr *tql; + + unsigned num_srcs = 0; + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (tex->src[i].src_type == nir_tex_src_coord || + tex->src[i].src_type == nir_tex_src_texture_deref || + tex->src[i].src_type == nir_tex_src_sampler_deref || + tex->src[i].src_type == nir_tex_src_texture_offset || + tex->src[i].src_type == nir_tex_src_sampler_offset || + tex->src[i].src_type == nir_tex_src_texture_handle || + tex->src[i].src_type == nir_tex_src_sampler_handle) + num_srcs++; + } + + tql = nir_tex_instr_create(b->shader, num_srcs); + tql->op = nir_texop_lod; + unsigned coord_components = tex->coord_components; + if (tex->is_array) + --coord_components; + + tql->coord_components = coord_components; + tql->sampler_dim = tex->sampler_dim; + tql->is_shadow = tex->is_shadow; + tql->is_new_style_shadow = tex->is_new_style_shadow; + tql->texture_index = tex->texture_index; + tql->sampler_index = tex->sampler_index; + tql->dest_type = nir_type_float; + + /* The coordinate needs special handling because we might have + * to strip the array index. Don't clutter the code with an additional + * check for is_array though, in the worst case we create an additional + * move the the optimization will remove later again. */ + int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); + nir_ssa_def *ssa_src = nir_channels(b, tex->src[coord_index].src.ssa, + (1 << coord_components) - 1); + nir_src src = nir_src_for_ssa(ssa_src); + nir_src_copy(&tql->src[0].src, &src, tql); + tql->src[0].src_type = nir_tex_src_coord; + + unsigned idx = 1; + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (tex->src[i].src_type == nir_tex_src_texture_deref || + tex->src[i].src_type == nir_tex_src_sampler_deref || + tex->src[i].src_type == nir_tex_src_texture_offset || + tex->src[i].src_type == nir_tex_src_sampler_offset || + tex->src[i].src_type == nir_tex_src_texture_handle || + tex->src[i].src_type == nir_tex_src_sampler_handle) { + nir_src_copy(&tql->src[idx].src, &tex->src[i].src, tql); + tql->src[idx].src_type = tex->src[i].src_type; + idx++; + } + } + + nir_ssa_dest_init(&tql->instr, &tql->dest, 2, 32, NULL); + nir_builder_instr_insert(b, &tql->instr); + + /* DirectX LOD only has a value in x channel */ + return nir_channel(b, &tql->dest.ssa, 0); +} + +typedef struct { + nir_ssa_def *coords; + nir_ssa_def *use_border_color; +} wrap_result_t; + +typedef struct { + nir_ssa_def *lod; + nir_ssa_def *size; + int ncoord_comp; + wrap_result_t wrap[3]; +} wrap_lower_param_t; + +static void +wrap_clamp_to_edge(nir_builder *b, wrap_result_t *wrap_params, nir_ssa_def *size) +{ + /* clamp(coord, 0, size - 1) */ + wrap_params->coords = nir_fmin(b, nir_fsub(b, size, nir_imm_float(b, 1.0f)), + nir_fmax(b, wrap_params->coords, nir_imm_float(b, 0.0f))); +} + +static void +wrap_repeat(nir_builder *b, wrap_result_t *wrap_params, nir_ssa_def *size) +{ + /* mod(coord, size) + * This instruction must be exact, otherwise certain sizes result in + * incorrect sampling */ + wrap_params->coords = nir_fmod(b, wrap_params->coords, size); + nir_instr_as_alu(wrap_params->coords->parent_instr)->exact = true; +} + +static nir_ssa_def * +mirror(nir_builder *b, nir_ssa_def *coord) +{ + /* coord if >= 0, otherwise -(1 + coord) */ + return nir_bcsel(b, nir_fge(b, coord, nir_imm_float(b, 0.0f)), coord, + nir_fneg(b, nir_fadd(b, nir_imm_float(b, 1.0f), coord))); +} + +static void +wrap_mirror_repeat(nir_builder *b, wrap_result_t *wrap_params, nir_ssa_def *size) +{ + /* (size − 1) − mirror(mod(coord, 2 * size) − size) */ + nir_ssa_def *coord_mod2size = nir_fmod(b, wrap_params->coords, nir_fmul(b, nir_imm_float(b, 2.0f), size)); + nir_instr_as_alu(coord_mod2size->parent_instr)->exact = true; + nir_ssa_def *a = nir_fsub(b, coord_mod2size, size); + wrap_params->coords = nir_fsub(b, nir_fsub(b, size, nir_imm_float(b, 1.0f)), mirror(b, a)); +} + +static void +wrap_mirror_clamp_to_edge(nir_builder *b, wrap_result_t *wrap_params, nir_ssa_def *size) +{ + /* clamp(mirror(coord), 0, size - 1) */ + wrap_params->coords = nir_fmin(b, nir_fsub(b, size, nir_imm_float(b, 1.0f)), + nir_fmax(b, mirror(b, wrap_params->coords), nir_imm_float(b, 0.0f))); +} + +static void +wrap_clamp(nir_builder *b, wrap_result_t *wrap_params, nir_ssa_def *size) +{ + nir_ssa_def *is_low = nir_flt(b, wrap_params->coords, nir_imm_float(b, 0.0)); + nir_ssa_def *is_high = nir_fge(b, wrap_params->coords, size); + wrap_params->use_border_color = nir_ior(b, is_low, is_high); +} + +static void +wrap_mirror_clamp(nir_builder *b, wrap_result_t *wrap_params, nir_ssa_def *size) +{ + /* We have to take care of the boundaries */ + nir_ssa_def *is_low = nir_flt(b, wrap_params->coords, nir_fmul(b, size, nir_imm_float(b, -1.0))); + nir_ssa_def *is_high = nir_flt(b, nir_fmul(b, size, nir_imm_float(b, 2.0)), wrap_params->coords); + wrap_params->use_border_color = nir_ior(b, is_low, is_high); + + /* Within the boundaries this acts like mirror_repeat */ + wrap_mirror_repeat(b, wrap_params, size); + +} + +static wrap_result_t +wrap_coords(nir_builder *b, nir_ssa_def *coords, enum pipe_tex_wrap wrap, + nir_ssa_def *size) +{ + wrap_result_t result = {coords, nir_imm_false(b)}; + + switch (wrap) { + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + wrap_clamp_to_edge(b, &result, size); + break; + case PIPE_TEX_WRAP_REPEAT: + wrap_repeat(b, &result, size); + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + wrap_mirror_repeat(b, &result, size); + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + wrap_mirror_clamp_to_edge(b, &result, size); + break; + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + wrap_clamp(b, &result, size); + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + wrap_mirror_clamp(b, &result, size); + break; + } + return result; +} + +static nir_ssa_def * +load_bordercolor(nir_builder *b, nir_tex_instr *tex, dxil_wrap_sampler_state *active_state, + const dxil_texture_swizzle_state *tex_swizzle) +{ + nir_const_value const_value[4] = {{0}}; + int ndest_comp = nir_dest_num_components(tex->dest); + + unsigned swizzle[4] = { + tex_swizzle->swizzle_r, + tex_swizzle->swizzle_g, + tex_swizzle->swizzle_b, + tex_swizzle->swizzle_a + }; + + for (int i = 0; i < ndest_comp; ++i) { + switch (swizzle[i]) { + case PIPE_SWIZZLE_0: + const_value[i].f32 = 0; + break; + case PIPE_SWIZZLE_1: + const_value[i].i32 = 1; + break; + case PIPE_SWIZZLE_X: + case PIPE_SWIZZLE_Y: + case PIPE_SWIZZLE_Z: + case PIPE_SWIZZLE_W: + const_value[i].f32 = active_state->border_color[swizzle[i]]; + break; + default: + unreachable("Unexpected swizzle value"); + } + } + + return nir_build_imm(b, ndest_comp, 32, const_value); +} + +nir_tex_instr * +create_txf_from_tex(nir_builder *b, nir_tex_instr *tex) +{ + nir_tex_instr *txf; + + unsigned num_srcs = 0; + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (tex->src[i].src_type == nir_tex_src_texture_deref || + tex->src[i].src_type == nir_tex_src_texture_offset || + tex->src[i].src_type == nir_tex_src_texture_handle) + num_srcs++; + } + + txf = nir_tex_instr_create(b->shader, num_srcs); + txf->op = nir_texop_txf; + txf->sampler_dim = tex->sampler_dim; + txf->is_array = tex->is_array; + txf->is_shadow = tex->is_shadow; + txf->is_new_style_shadow = tex->is_new_style_shadow; + txf->texture_index = tex->texture_index; + txf->sampler_index = tex->sampler_index; + txf->dest_type = tex->dest_type; + + unsigned idx = 0; + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (tex->src[i].src_type == nir_tex_src_texture_deref || + tex->src[i].src_type == nir_tex_src_texture_offset || + tex->src[i].src_type == nir_tex_src_texture_handle) { + nir_src_copy(&txf->src[idx].src, &tex->src[i].src, txf); + txf->src[idx].src_type = tex->src[i].src_type; + idx++; + } + } + + nir_ssa_dest_init(&txf->instr, &txf->dest, + nir_tex_instr_dest_size(txf), 32, NULL); + nir_builder_instr_insert(b, &txf->instr); + + return txf; +} + +static nir_ssa_def * +load_texel(nir_builder *b, nir_tex_instr *tex, wrap_lower_param_t *params) +{ + nir_ssa_def *texcoord = NULL; + + /* Put coordinates back together */ + switch (tex->coord_components) { + case 1: + texcoord = params->wrap[0].coords; + break; + case 2: + texcoord = nir_vec2(b, params->wrap[0].coords, params->wrap[1].coords); + break; + case 3: + texcoord = nir_vec3(b, params->wrap[0].coords, params->wrap[1].coords, params->wrap[2].coords); + break; + default: + ; + } + + texcoord = nir_f2i32(b, texcoord); + + nir_tex_instr *load = create_txf_from_tex(b, tex); + nir_tex_instr_add_src(load, nir_tex_src_lod, nir_src_for_ssa(params->lod)); + nir_tex_instr_add_src(load, nir_tex_src_coord, nir_src_for_ssa(texcoord)); + b->cursor = nir_after_instr(&load->instr); + return &load->dest.ssa; +} + +typedef struct { + dxil_wrap_sampler_state *aws; + float max_bias; + nir_ssa_def *size; + int ncoord_comp; +} lod_params; + +static nir_ssa_def * +evalute_active_lod(nir_builder *b, nir_tex_instr *tex, lod_params *params) +{ + static nir_ssa_def *lod = NULL; + + /* Later we use min_lod for clamping the LOD to a legal value */ + float min_lod = MAX2(params->aws->min_lod, 0.0f); + + /* Evaluate the LOD to be used for the texel fetch */ + if (unlikely(tex->op == nir_texop_txl)) { + int lod_index = nir_tex_instr_src_index(tex, nir_tex_src_lod); + /* if we have an explicite LOD, take it */ + lod = tex->src[lod_index].src.ssa; + } else if (unlikely(tex->op == nir_texop_txd)) { + int ddx_index = nir_tex_instr_src_index(tex, nir_tex_src_ddx); + int ddy_index = nir_tex_instr_src_index(tex, nir_tex_src_ddy); + assert(ddx_index >= 0 && ddy_index >= 0); + + nir_ssa_def *grad = nir_fmax(b, + tex->src[ddx_index].src.ssa, + tex->src[ddy_index].src.ssa); + + nir_ssa_def *r = nir_fmul(b, grad, nir_i2f32(b, params->size)); + nir_ssa_def *rho = nir_channel(b, r, 0); + for (int i = 1; i < params->ncoord_comp; ++i) + rho = nir_fmax(b, rho, nir_channel(b, r, i)); + lod = nir_flog2(b, rho); + } else if (b->shader->info.stage == MESA_SHADER_FRAGMENT){ + lod = dx_get_texture_lod(b, tex); + } else { + /* Only fragment shaders provide the gradient information to evaluate a LOD, + * so force 0 otherwise */ + lod = nir_imm_float(b, 0.0); + } + + /* Evaluate bias according to OpenGL (4.6 (Compatibility Profile) October 22, 2019), + * sec. 8.14.1, eq. (8.9) + * + * lod' = lambda + CLAMP(bias_texobj + bias_texunit + bias_shader) + * + * bias_texobj is the value of TEXTURE_LOD_BIAS for the bound texture object. ... + * bias_textunt is the value of TEXTURE_LOD_BIAS for the current texture unit, ... + * bias shader is the value of the optional bias parameter in the texture + * lookup functions available to fragment shaders. ... The sum of these values + * is clamped to the range [−bias_max, bias_max] where bias_max is the value + * of the implementation defined constant MAX_TEXTURE_LOD_BIAS. + * In core contexts the value bias_texunit is dropped from above equation. + * + * Gallium provides the value lod_bias as the sum of bias_texobj and bias_texunit + * in compatibility contexts and as bias_texobj in core contexts, hence the + * implementation here is the same in both cases. + */ + nir_ssa_def *lod_bias = nir_imm_float(b, params->aws->lod_bias); + + if (unlikely(tex->op == nir_texop_txb)) { + int bias_index = nir_tex_instr_src_index(tex, nir_tex_src_bias); + lod_bias = nir_fadd(b, lod_bias, tex->src[bias_index].src.ssa); + } + + lod = nir_fadd(b, lod, nir_fclamp(b, lod_bias, + nir_imm_float(b, -params->max_bias), + nir_imm_float(b, params->max_bias))); + + /* Clamp lod according to ibid. eq. (8.10) */ + lod = nir_fmax(b, lod, nir_imm_float(b, min_lod)); + + /* If the max lod is > max_bias = log2(max_texture_size), the lod will be clamped + * by the number of levels, no need to clamp it againt the max_lod first. */ + if (params->aws->max_lod <= params->max_bias) + lod = nir_fmin(b, lod, nir_imm_float(b, params->aws->max_lod)); + + /* Pick nearest LOD */ + lod = nir_f2i32(b, nir_fround_even(b, lod)); + + /* cap actual lod by number of available levels */ + return nir_imin(b, lod, nir_imm_int(b, params->aws->last_level)); +} + +typedef struct { + dxil_wrap_sampler_state *wrap_states; + dxil_texture_swizzle_state *tex_swizzles; + float max_bias; +} sampler_states; + + +static nir_ssa_def * +lower_sample_to_txf_for_integer_tex_impl(nir_builder *b, nir_instr *instr, + void *options) +{ + sampler_states *states = (sampler_states *)options; + wrap_lower_param_t params = {0}; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + dxil_wrap_sampler_state *active_wrap_state = &states->wrap_states[tex->sampler_index]; + + b->cursor = nir_before_instr(instr); + + int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); + nir_ssa_def *old_coord = tex->src[coord_index].src.ssa; + params.ncoord_comp = tex->coord_components; + if (tex->is_array) + params.ncoord_comp -= 1; + + /* This helper to get the texture size always uses LOD 0, and DirectX doesn't support + * giving another LOD when querying the texture size */ + nir_ssa_def *size0 = nir_get_texture_size(b, tex); + + params.lod = nir_imm_int(b, 0); + + if (active_wrap_state->last_level > 0) { + lod_params p = { + .aws = active_wrap_state, + .max_bias = states->max_bias, + .size = size0, + .ncoord_comp = params.ncoord_comp + }; + params.lod = evalute_active_lod(b, tex, &p); + + /* Evaluate actual level size*/ + params.size = nir_i2f32(b, nir_imax(b, nir_ishr(b, size0, params.lod), + nir_imm_int(b, 1))); + } else { + params.size = nir_i2f32(b, size0); + } + + nir_ssa_def *new_coord = old_coord; + if (!active_wrap_state->is_nonnormalized_coords) { + /* Evaluate the integer lookup coordinates for the requested LOD, don't touch the + * array index */ + if (!tex->is_array) { + new_coord = nir_fmul(b, params.size, old_coord); + } else { + nir_ssa_def *array_index = nir_channel(b, old_coord, params.ncoord_comp); + int mask = (1 << params.ncoord_comp) - 1; + nir_ssa_def *coord = nir_fmul(b, nir_channels(b, params.size, mask), + nir_channels(b, old_coord, mask)); + switch (params.ncoord_comp) { + case 1: + new_coord = nir_vec2(b, coord, array_index); + break; + case 2: + new_coord = nir_vec3(b, nir_channel(b, coord, 0), + nir_channel(b, coord, 1), + array_index); + break; + default: + unreachable("unsupported number of non-array coordinates"); + } + } + } + + nir_ssa_def *coord_help[3]; + for (int i = 0; i < params.ncoord_comp; ++i) + coord_help[i] = nir_ffloor(b, nir_channel(b, new_coord, i)); + + // Note: array index needs to be rounded to nearest before clamp rather than floored + if (tex->is_array) + coord_help[params.ncoord_comp] = nir_fround_even(b, nir_channel(b, new_coord, params.ncoord_comp)); + + /* Correct the texture coordinates for the offsets. */ + int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); + if (offset_index >= 0) { + nir_ssa_def *offset = tex->src[offset_index].src.ssa; + for (int i = 0; i < params.ncoord_comp; ++i) + coord_help[i] = nir_fadd(b, coord_help[i], nir_i2f32(b, nir_channel(b, offset, i))); + } + + nir_ssa_def *use_border_color = nir_imm_false(b); + + if (!active_wrap_state->skip_boundary_conditions) { + + for (int i = 0; i < params.ncoord_comp; ++i) { + params.wrap[i] = wrap_coords(b, coord_help[i], active_wrap_state->wrap[i], nir_channel(b, params.size, i)); + use_border_color = nir_ior(b, use_border_color, params.wrap[i].use_border_color); + } + + if (tex->is_array) + params.wrap[params.ncoord_comp] = + wrap_coords(b, coord_help[params.ncoord_comp], + PIPE_TEX_WRAP_CLAMP_TO_EDGE, + nir_i2f32(b, nir_channel(b, size0, params.ncoord_comp))); + } else { + /* When we emulate a cube map by using a texture array, the coordinates are always + * in range, and we don't have to take care of boundary conditions */ + for (unsigned i = 0; i < 3; ++i) { + params.wrap[i].coords = coord_help[i]; + params.wrap[i].use_border_color = nir_imm_false(b); + } + } + + const dxil_texture_swizzle_state one2one = { + PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W + }; + + nir_if *border_if = nir_push_if(b, use_border_color); + const dxil_texture_swizzle_state *swizzle = states->tex_swizzles ? + &states->tex_swizzles[tex->sampler_index]: + &one2one; + + nir_ssa_def *border_color = load_bordercolor(b, tex, active_wrap_state, swizzle); + nir_if *border_else = nir_push_else(b, border_if); + nir_ssa_def *sampler_color = load_texel(b, tex, ¶ms); + nir_pop_if(b, border_else); + + return nir_if_phi(b, border_color, sampler_color); +} + +/* Sampling from integer textures is not allowed in DirectX, so we have + * to use texel fetches. For this we have to scale the coordiantes + * to be integer based, and evaluate the LOD the texel fetch has to be + * applied on, and take care of the boundary conditions . + */ +bool +dxil_lower_sample_to_txf_for_integer_tex(nir_shader *s, + dxil_wrap_sampler_state *wrap_states, + dxil_texture_swizzle_state *tex_swizzles, + float max_bias) +{ + sampler_states states = {wrap_states, tex_swizzles, max_bias}; + + bool result = + nir_shader_lower_instructions(s, + lower_sample_to_txf_for_integer_tex_filter, + lower_sample_to_txf_for_integer_tex_impl, + &states); + return result; +} diff --git a/src/microsoft/compiler/dxil_nir_lower_int_samplers.h b/src/microsoft/compiler/dxil_nir_lower_int_samplers.h new file mode 100644 index 00000000000..eb979f7419a --- /dev/null +++ b/src/microsoft/compiler/dxil_nir_lower_int_samplers.h @@ -0,0 +1,64 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef DXIL_NIR_LOWER_INT_SAMPLERS_H +#define DXIL_NIR_LOWER_INT_SAMPLERS_H + +#include "pipe/p_state.h" +#include "nir.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + unsigned swizzle_r:3; + unsigned swizzle_g:3; + unsigned swizzle_b:3; + unsigned swizzle_a:3; +} dxil_texture_swizzle_state; + +typedef struct { + float border_color[4]; + float lod_bias; + float min_lod, max_lod; + int last_level; + uint8_t wrap[3]; + uint8_t is_int_sampler:1; + uint8_t is_nonnormalized_coords:1; + uint8_t is_linear_filtering:1; + uint8_t skip_boundary_conditions:1; + uint8_t unused:4; +} dxil_wrap_sampler_state; + +bool +dxil_lower_sample_to_txf_for_integer_tex(nir_shader *s, + dxil_wrap_sampler_state *wrap_states, + dxil_texture_swizzle_state *tex_swizzles, + float max_bias); + +#ifdef __cplusplus +} +#endif + +#endif // DXIL_NIR_LOWER_INT_SAMPLERS_H diff --git a/src/microsoft/compiler/meson.build b/src/microsoft/compiler/meson.build index 17caed47361..822694c1616 100644 --- a/src/microsoft/compiler/meson.build +++ b/src/microsoft/compiler/meson.build @@ -26,12 +26,27 @@ files_libdxil_compiler = files( 'dxil_enums.c', 'dxil_function.c', 'dxil_module.c', + 'dxil_nir.c', + 'dxil_nir_lower_int_samplers.c', 'dxil_signature.c', + 'nir_to_dxil.c', +) + +dxil_nir_algebraic_c = custom_target( + 'dxil_nir_algebraic.c', + input : 'dxil_nir_algebraic.py', + output : 'dxil_nir_algebraic.c', + command : [ + prog_python, '@INPUT@', + '-p', join_paths(meson.source_root(), 'src/compiler/nir/'), + ], + capture : true, + depend_files : nir_algebraic_py, ) libdxil_compiler = static_library( 'dxil_compiler', - [files_libdxil_compiler, sha1_h], + [files_libdxil_compiler, dxil_nir_algebraic_c, sha1_h], include_directories : [inc_include, inc_src, inc_mapi, inc_mesa, inc_compiler, inc_gallium], dependencies: [idep_nir_headers], gnu_symbol_visibility : 'hidden', diff --git a/src/microsoft/compiler/nir_to_dxil.c b/src/microsoft/compiler/nir_to_dxil.c new file mode 100644 index 00000000000..9964a16c106 --- /dev/null +++ b/src/microsoft/compiler/nir_to_dxil.c @@ -0,0 +1,3725 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir_to_dxil.h" + +#include "dxil_module.h" +#include "dxil_container.h" +#include "dxil_function.h" +#include "dxil_signature.h" +#include "dxil_enums.h" +#include "dxil_dump.h" +#include "dxil_nir.h" + +#include "util/u_debug.h" +#include "util/u_math.h" +#include "nir/nir_builder.h" + +#include "git_sha1.h" + +#include + +int debug_dxil = 0; + +static const struct debug_named_value +debug_options[] = { + { "verbose", DXIL_DEBUG_VERBOSE, NULL }, + { "dump_blob", DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" }, + { "trace", DXIL_DEBUG_TRACE , "Trace instruction conversion" }, + { "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"}, + DEBUG_NAMED_VALUE_END +}; + +DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", debug_options, 0) + +#define NIR_INSTR_UNSUPPORTED(instr) \ + if (debug_dxil & DXIL_DEBUG_VERBOSE) \ + do { \ + fprintf(stderr, "Unsupported instruction:"); \ + nir_print_instr(instr, stderr); \ + fprintf(stderr, "\n"); \ + } while (0) + +#define TRACE_CONVERSION(instr) \ + if (debug_dxil & DXIL_DEBUG_TRACE) \ + do { \ + fprintf(stderr, "Convert '"); \ + nir_print_instr(instr, stderr); \ + fprintf(stderr, "'\n"); \ + } while (0) + +static const nir_shader_compiler_options +nir_options = { + .lower_negate = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, + .lower_isign = true, + .lower_fsign = true, + .lower_iabs = true, + .lower_fmod = true, + .lower_fpow = true, + .lower_scmp = true, + .lower_ldexp = true, + .lower_flrp16 = true, + .lower_flrp32 = true, + .lower_flrp64 = true, + .lower_extract_word = true, + .lower_extract_byte = true, + .lower_all_io_to_elements = true, + .lower_hadd = true, + .lower_add_sat = true, + .lower_uadd_carry = true, + .lower_mul_high = true, + .lower_rotate = true, + .lower_pack_64_2x32_split = true, + .lower_pack_32_2x16_split = true, + .lower_unpack_64_2x32_split = true, + .lower_unpack_32_2x16_split = true, + .vertex_id_zero_based = true, + .lower_base_vertex = true, +}; + +const nir_shader_compiler_options* +dxil_get_nir_compiler_options(void) +{ + return &nir_options; +} + +static bool +emit_llvm_ident(struct dxil_module *m) +{ + const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1); + if (!compiler) + return false; + + const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1); + return llvm_ident && + dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1); +} + +static bool +emit_named_version(struct dxil_module *m, const char *name, + int major, int minor) +{ + const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major); + const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor); + const struct dxil_mdnode *version_nodes[] = { major_node, minor_node }; + const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes, + ARRAY_SIZE(version_nodes)); + return dxil_add_metadata_named_node(m, name, &version, 1); +} + +static const char * +get_shader_kind_str(enum dxil_shader_kind kind) +{ + switch (kind) { + case DXIL_PIXEL_SHADER: + return "ps"; + case DXIL_VERTEX_SHADER: + return "vs"; + case DXIL_GEOMETRY_SHADER: + return "gs"; + case DXIL_HULL_SHADER: + return "hs"; + case DXIL_DOMAIN_SHADER: + return "ds"; + case DXIL_COMPUTE_SHADER: + return "cs"; + default: + unreachable("invalid shader kind"); + } +} + +static bool +emit_dx_shader_model(struct dxil_module *m) +{ + const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind)); + const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version); + const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version); + const struct dxil_mdnode *shader_model[] = { type_node, major_node, + minor_node }; + const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model)); + + return dxil_add_metadata_named_node(m, "dx.shaderModel", + &dx_shader_model, 1); +} + +enum { + DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0, + DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1 +}; + +enum dxil_intr { + DXIL_INTR_LOAD_INPUT = 4, + DXIL_INTR_STORE_OUTPUT = 5, + DXIL_INTR_FABS = 6, + DXIL_INTR_SATURATE = 7, + + DXIL_INTR_ISFINITE = 10, + DXIL_INTR_ISNORMAL = 11, + + DXIL_INTR_FCOS = 12, + DXIL_INTR_FSIN = 13, + + DXIL_INTR_FEXP2 = 21, + DXIL_INTR_FRC = 22, + DXIL_INTR_FLOG2 = 23, + + DXIL_INTR_SQRT = 24, + DXIL_INTR_RSQRT = 25, + DXIL_INTR_ROUND_NE = 26, + DXIL_INTR_ROUND_NI = 27, + DXIL_INTR_ROUND_PI = 28, + DXIL_INTR_ROUND_Z = 29, + + DXIL_INTR_COUNTBITS = 31, + DXIL_INTR_FIRSTBIT_HI = 33, + + DXIL_INTR_FMAX = 35, + DXIL_INTR_FMIN = 36, + DXIL_INTR_IMAX = 37, + DXIL_INTR_IMIN = 38, + DXIL_INTR_UMAX = 39, + DXIL_INTR_UMIN = 40, + + DXIL_INTR_FFMA = 46, + + DXIL_INTR_CREATE_HANDLE = 57, + DXIL_INTR_CBUFFER_LOAD_LEGACY = 59, + + DXIL_INTR_SAMPLE = 60, + DXIL_INTR_SAMPLE_BIAS = 61, + DXIL_INTR_SAMPLE_LEVEL = 62, + DXIL_INTR_SAMPLE_GRAD = 63, + DXIL_INTR_SAMPLE_CMP = 64, + DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65, + + DXIL_INTR_TEXTURE_LOAD = 66, + DXIL_INTR_TEXTURE_STORE = 67, + + DXIL_INTR_BUFFER_LOAD = 68, + DXIL_INTR_BUFFER_STORE = 69, + + DXIL_INTR_TEXTURE_SIZE = 72, + + DXIL_INTR_ATOMIC_BINOP = 78, + DXIL_INTR_ATOMIC_CMPXCHG = 79, + DXIL_INTR_BARRIER = 80, + DXIL_INTR_TEXTURE_LOD = 81, + + DXIL_INTR_DISCARD = 82, + DXIL_INTR_DDX_COARSE = 83, + DXIL_INTR_DDY_COARSE = 84, + DXIL_INTR_DDX_FINE = 85, + DXIL_INTR_DDY_FINE = 86, + + DXIL_INTR_THREAD_ID = 93, + DXIL_INTR_GROUP_ID = 94, + DXIL_INTR_THREAD_ID_IN_GROUP = 95, + + DXIL_INTR_EMIT_STREAM = 97, + DXIL_INTR_CUT_STREAM = 98, + + DXIL_INTR_PRIMITIVE_ID = 108, + + DXIL_INTR_LEGACY_F32TOF16 = 130, + DXIL_INTR_LEGACY_F16TOF32 = 131, + + DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137, +}; + +enum dxil_atomic_op { + DXIL_ATOMIC_ADD = 0, + DXIL_ATOMIC_AND = 1, + DXIL_ATOMIC_OR = 2, + DXIL_ATOMIC_XOR = 3, + DXIL_ATOMIC_IMIN = 4, + DXIL_ATOMIC_IMAX = 5, + DXIL_ATOMIC_UMIN = 6, + DXIL_ATOMIC_UMAX = 7, + DXIL_ATOMIC_EXCHANGE = 8, +}; + +typedef struct { + unsigned id; + unsigned binding; + unsigned size; +} resource_array_layout; + +static void +fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields, + const struct dxil_type *struct_type, + const char *name, const resource_array_layout *layout) +{ + const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type); + const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type); + + fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID + fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol + fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name + fields[3] = dxil_get_metadata_int32(m, 0); // space ID + fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound + fields[5] = dxil_get_metadata_int32(m, layout->size); // range size +} + +static const struct dxil_mdnode * +emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type, + const char *name, const resource_array_layout *layout, + enum dxil_component_type comp_type, + enum dxil_resource_kind res_kind) +{ + const struct dxil_mdnode *fields[9]; + + const struct dxil_mdnode *buffer_element_type_tag = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG); + const struct dxil_mdnode *element_type = dxil_get_metadata_int32(m, comp_type); + const struct dxil_mdnode *metadata_tag_nodes[] = { + buffer_element_type_tag, element_type + }; + + fill_resource_metadata(m, fields, elem_type, name, layout); + fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape + fields[7] = dxil_get_metadata_int1(m, 0); // sample count + fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata + + return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); +} + +static const struct dxil_mdnode * +emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type, + const char *name, const resource_array_layout *layout, + enum dxil_component_type comp_type, + enum dxil_resource_kind res_kind) +{ + const struct dxil_mdnode *fields[11]; + + const struct dxil_mdnode *metadata_tag_nodes[2]; + + fill_resource_metadata(m, fields, struct_type, name, layout); + fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape + fields[7] = dxil_get_metadata_int1(m, false); // globally-coherent + fields[8] = dxil_get_metadata_int1(m, false); // has counter + fields[9] = dxil_get_metadata_int1(m, false); // is ROV + if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER && + res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) { + metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG); + metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type); + fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata + } else + fields[10] = NULL; + + return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); +} + +static const struct dxil_mdnode * +emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type, + const char *name, const resource_array_layout *layout, + unsigned size) +{ + const struct dxil_mdnode *fields[8]; + + fill_resource_metadata(m, fields, struct_type, name, layout); + fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size + fields[7] = NULL; // metadata + + return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); +} + +static const struct dxil_mdnode * +emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type, + nir_variable *var, const resource_array_layout *layout) +{ + const struct dxil_mdnode *fields[8]; + const struct glsl_type *type = glsl_without_array(var->type); + + fill_resource_metadata(m, fields, struct_type, var->name, layout); + fields[6] = dxil_get_metadata_int32(m, DXIL_SAMPLER_KIND_DEFAULT); // sampler kind + enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ? + DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT; + fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind + fields[7] = NULL; // metadata + + return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields)); +} + +static const struct dxil_type * +get_glsl_basetype(struct dxil_module *m, enum glsl_base_type type) +{ + switch (type) { + case GLSL_TYPE_BOOL: + return dxil_module_get_int_type(m, 1); + + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + return dxil_module_get_int_type(m, 32); + + default: + debug_printf("type: %s\n", glsl_get_type_name(glsl_scalar_type(type))); + unreachable("unexpected GLSL type"); + } +} + +static const struct dxil_type * +get_glsl_type(struct dxil_module *m, const struct glsl_type *type) +{ + assert(type); + + if (glsl_type_is_scalar(type)) + return get_glsl_basetype(m, glsl_get_base_type(type)); + + if (glsl_type_is_array(type)) + return dxil_module_get_array_type(m, + get_glsl_type(m, glsl_get_array_element(type)), + glsl_get_length(type)); + + unreachable("unexpected glsl type"); +} + + +#define MAX_SRVS 128 +#define MAX_UAVS 64 +#define MAX_CBVS 64 // ?? +#define MAX_SAMPLERS 64 // ?? + +struct dxil_def { + const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS]; +}; + +struct ntd_context { + void *ralloc_ctx; + const struct nir_to_dxil_options *opts; + + struct dxil_module mod; + + const struct dxil_mdnode *srv_metadata_nodes[MAX_SRVS]; + const struct dxil_value *srv_handles[MAX_SRVS]; + uint64_t srvs_used[2]; + unsigned num_srv_arrays; + + const struct dxil_mdnode *uav_metadata_nodes[MAX_UAVS]; + const struct dxil_value *uav_handles[MAX_UAVS]; + unsigned num_uavs; + unsigned num_uav_arrays; + + const struct dxil_mdnode *cbv_metadata_nodes[MAX_CBVS]; + const struct dxil_value *cbv_handles[MAX_CBVS]; + unsigned num_cbvs; + + const struct dxil_mdnode *sampler_metadata_nodes[MAX_SAMPLERS]; + const struct dxil_value *sampler_handles[MAX_SAMPLERS]; + uint64_t samplers_used : MAX_SAMPLERS; + unsigned num_sampler_arrays; + + struct dxil_resource resources[MAX_SRVS + MAX_UAVS + MAX_CBVS]; + unsigned num_resources; + + const struct dxil_mdnode *shader_property_nodes[6]; + size_t num_shader_property_nodes; + + struct dxil_def *defs; + unsigned num_defs; + struct hash_table *phis; + + const struct dxil_value *sharedvars; + const struct dxil_value *scratchvars; + struct hash_table *consts; + + nir_variable *ps_front_face; + nir_variable *system_value[SYSTEM_VALUE_MAX]; +}; + +static const char* +unary_func_name(enum dxil_intr intr) +{ + switch (intr) { + case DXIL_INTR_COUNTBITS: + case DXIL_INTR_FIRSTBIT_HI: + return "dx.op.unaryBits"; + case DXIL_INTR_ISFINITE: + case DXIL_INTR_ISNORMAL: + return "dx.op.isSpecialFloat"; + } + + return "dx.op.unary"; +} + +static const struct dxil_value * +emit_unary_call(struct ntd_context *ctx, enum overload_type overload, + enum dxil_intr intr, + const struct dxil_value *op0) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, + unary_func_name(intr), + overload); + if (!func) + return NULL; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); + if (!opcode) + return NULL; + + const struct dxil_value *args[] = { + opcode, + op0 + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_binary_call(struct ntd_context *ctx, enum overload_type overload, + enum dxil_intr intr, + const struct dxil_value *op0, const struct dxil_value *op1) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload); + if (!func) + return NULL; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); + if (!opcode) + return NULL; + + const struct dxil_value *args[] = { + opcode, + op0, + op1 + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload, + enum dxil_intr intr, + const struct dxil_value *op0, + const struct dxil_value *op1, + const struct dxil_value *op2) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload); + if (!func) + return NULL; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr); + if (!opcode) + return NULL; + + const struct dxil_value *args[] = { + opcode, + op0, + op1, + op2 + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32); + if (!func) + return NULL; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, + DXIL_INTR_THREAD_ID); + if (!opcode) + return NULL; + + const struct dxil_value *args[] = { + opcode, + comp + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_threadidingroup_call(struct ntd_context *ctx, + const struct dxil_value *comp) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32); + + if (!func) + return NULL; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, + DXIL_INTR_THREAD_ID_IN_GROUP); + if (!opcode) + return NULL; + + const struct dxil_value *args[] = { + opcode, + comp + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32); + + if (!func) + return NULL; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, + DXIL_INTR_GROUP_ID); + if (!opcode) + return NULL; + + const struct dxil_value *args[] = { + opcode, + comp + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_bufferload_call(struct ntd_context *ctx, + const struct dxil_value *handle, + const struct dxil_value *coord[2]) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", DXIL_I32); + if (!func) + return NULL; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, + DXIL_INTR_BUFFER_LOAD); + const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static bool +emit_bufferstore_call(struct ntd_context *ctx, + const struct dxil_value *handle, + const struct dxil_value *coord[2], + const struct dxil_value *value[4], + const struct dxil_value *write_mask, + enum overload_type overload) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload); + + if (!func) + return false; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, + DXIL_INTR_BUFFER_STORE); + const struct dxil_value *args[] = { + opcode, handle, coord[0], coord[1], + value[0], value[1], value[2], value[3], + write_mask + }; + + return dxil_emit_call_void(&ctx->mod, func, + args, ARRAY_SIZE(args)); +} + +static bool +emit_texturestore_call(struct ntd_context *ctx, + const struct dxil_value *handle, + const struct dxil_value *coord[3], + const struct dxil_value *value[4], + const struct dxil_value *write_mask, + enum overload_type overload) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload); + + if (!func) + return false; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, + DXIL_INTR_TEXTURE_STORE); + const struct dxil_value *args[] = { + opcode, handle, coord[0], coord[1], coord[2], + value[0], value[1], value[2], value[3], + write_mask + }; + + return dxil_emit_call_void(&ctx->mod, func, + args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_atomic_binop(struct ntd_context *ctx, + const struct dxil_value *handle, + enum dxil_atomic_op atomic_op, + const struct dxil_value *coord[3], + const struct dxil_value *value) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32); + + if (!func) + return false; + + const struct dxil_value *opcode = + dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP); + const struct dxil_value *atomic_op_value = + dxil_module_get_int32_const(&ctx->mod, atomic_op); + const struct dxil_value *args[] = { + opcode, handle, atomic_op_value, + coord[0], coord[1], coord[2], value + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_atomic_cmpxchg(struct ntd_context *ctx, + const struct dxil_value *handle, + const struct dxil_value *coord[3], + const struct dxil_value *cmpval, + const struct dxil_value *newval) +{ + const struct dxil_func *func = + dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32); + + if (!func) + return false; + + const struct dxil_value *opcode = + dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG); + const struct dxil_value *args[] = { + opcode, handle, coord[0], coord[1], coord[2], cmpval, newval + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_createhandle_call(struct ntd_context *ctx, + enum dxil_resource_class resource_class, + unsigned resource_range_id, + const struct dxil_value *resource_range_index, + bool non_uniform_resource_index) +{ + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE); + const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class); + const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id); + const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index); + if (!opcode || !resource_class_value || !resource_range_id_value || + !non_uniform_resource_index_value) + return NULL; + + const struct dxil_value *args[] = { + opcode, + resource_class_value, + resource_range_id_value, + resource_range_index, + non_uniform_resource_index_value + }; + + const struct dxil_func *func = + dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE); + + if (!func) + return NULL; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_createhandle_call_const_index(struct ntd_context *ctx, + enum dxil_resource_class resource_class, + unsigned resource_range_id, + unsigned resource_range_index, + bool non_uniform_resource_index) +{ + + const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index); + if (!resource_range_index_value) + return NULL; + + return emit_createhandle_call(ctx, resource_class, resource_range_id, + resource_range_index_value, + non_uniform_resource_index); +} + +static void +add_resource(struct ntd_context *ctx, enum dxil_resource_type type, + const resource_array_layout *layout) +{ + assert(ctx->num_resources < ARRAY_SIZE(ctx->resources)); + ctx->resources[ctx->num_resources].resource_type = type; + ctx->resources[ctx->num_resources].space = 0; + ctx->resources[ctx->num_resources].lower_bound = layout->binding; + ctx->resources[ctx->num_resources].upper_bound = layout->binding + layout->size - 1; + ctx->num_resources++; +} + +static bool +emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned binding, unsigned count) +{ + assert(ctx->num_srv_arrays < ARRAY_SIZE(ctx->srv_metadata_nodes)); + + unsigned id = ctx->num_srv_arrays; + resource_array_layout layout = {id, binding, count}; + + enum dxil_component_type comp_type = dxil_get_comp_type(var->type); + enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type); + const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, false /* readwrite */); + const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type, var->name, + &layout, comp_type, res_kind); + + if (!srv_meta) + return false; + + ctx->srv_metadata_nodes[ctx->num_srv_arrays++] = srv_meta; + add_resource(ctx, DXIL_RES_SRV_TYPED, &layout); + + for (unsigned i = 0; i < count; ++i) { + const struct dxil_value *handle = + emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_SRV, + id, binding + i, false); + if (!handle) + return false; + + int idx = var->data.binding + i; + uint64_t bit = 1ull << (idx % 64); + assert(!(ctx->srvs_used[idx / 64] & bit)); + ctx->srv_handles[idx] = handle; + ctx->srvs_used[idx / 64] |= bit; + + } + + return true; +} + +static bool +emit_uav(struct ntd_context *ctx, nir_variable *var, unsigned count) +{ + assert(ctx->num_uav_arrays < ARRAY_SIZE(ctx->uav_metadata_nodes)); + assert(ctx->num_uavs < ARRAY_SIZE(ctx->uav_handles)); + + unsigned id = ctx->num_uav_arrays; + unsigned idx = var->data.binding; + resource_array_layout layout = { id, idx, count }; + + enum dxil_component_type comp_type = dxil_get_comp_type(var->type); + enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type); + const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, true /* readwrite */); + const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, var->name, + &layout, comp_type, res_kind); + + if (!uav_meta) + return false; + + ctx->uav_metadata_nodes[ctx->num_uav_arrays++] = uav_meta; + if (ctx->num_uav_arrays > 8) + ctx->mod.feats.use_64uavs = 1; + add_resource(ctx, DXIL_RES_UAV_TYPED, &layout); + + for (unsigned i = 0; i < count; ++i) { + const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_UAV, + id, idx + i, false); + if (!handle) + return false; + + ctx->uav_handles[ctx->num_uavs++] = handle; + } + + return true; +} + +static unsigned get_dword_size(const struct glsl_type *type) +{ + unsigned factor = 1; + if (glsl_type_is_array(type)) { + factor = glsl_get_aoa_size(type); + type = glsl_without_array(type); + } + return (factor * glsl_get_components(type)); +} + +static bool +var_fill_const_array_with_vector_or_scalar(struct ntd_context *ctx, + const struct nir_constant *c, + const struct glsl_type *type, + void *const_vals, + unsigned int offset) +{ + assert(glsl_type_is_vector_or_scalar(type)); + enum glsl_base_type base_type = glsl_get_base_type(type); + unsigned int components = glsl_get_vector_elements(type); + unsigned bit_size = glsl_get_bit_size(type); + unsigned int increment = bit_size / 8; + + for (unsigned int comp = 0; comp < components; comp++) { + uint8_t *dst = (uint8_t *)const_vals + offset; + + switch (bit_size) { + case 64: + memcpy(dst, &c->values[comp].u64, sizeof(c->values[0].u64)); + break; + case 32: + memcpy(dst, &c->values[comp].u32, sizeof(c->values[0].u32)); + break; + case 16: + memcpy(dst, &c->values[comp].u16, sizeof(c->values[0].u16)); + break; + case 8: + assert(glsl_base_type_is_integer(base_type)); + memcpy(dst, &c->values[comp].u8, sizeof(c->values[0].u8)); + break; + default: + unreachable("unexpeted bit-size"); + } + + offset += increment; + } + + return true; +} + +static bool +var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c, + const struct glsl_type *type, void *const_vals, + unsigned int offset) +{ + assert(!glsl_type_is_interface(type)); + + if (glsl_type_is_vector_or_scalar(type)) { + return var_fill_const_array_with_vector_or_scalar(ctx, c, type, + const_vals, + offset); + } else if (glsl_type_is_array(type)) { + assert(!glsl_type_is_unsized_array(type)); + const struct glsl_type *without = glsl_without_array(type); + unsigned stride = glsl_get_explicit_stride(without); + enum glsl_base_type without_base = glsl_get_base_type(without); + + for (unsigned elt = 0; elt < glsl_get_length(type); elt++) { + if (!var_fill_const_array(ctx, c->elements[elt], without, + const_vals, offset + (elt * stride))) { + return false; + } + offset += glsl_get_cl_size(without); + } + return true; + } else if (glsl_type_is_struct(type)) { + for (unsigned int elt = 0; elt < glsl_get_length(type); elt++) { + const struct glsl_type *elt_type = glsl_get_struct_field(type, elt); + unsigned field_offset = glsl_get_struct_field_offset(type, elt); + + if (!var_fill_const_array(ctx, c->elements[elt], + elt_type, const_vals, + offset + field_offset)) { + return false; + } + } + return true; + } + + unreachable("unknown GLSL type in var_fill_const_array"); +} + +static bool +emit_cbv(struct ntd_context *ctx, unsigned binding, + unsigned size, char *name) +{ + unsigned idx = ctx->num_cbvs; + + assert(idx < ARRAY_SIZE(ctx->cbv_metadata_nodes)); + + const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32); + const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size); + const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name, + &array_type, 1); + resource_array_layout layout = {idx, binding, 1}; + const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, buffer_type, + name, &layout, 4 * size); + + if (!cbv_meta) + return false; + + ctx->cbv_metadata_nodes[ctx->num_cbvs] = cbv_meta; + add_resource(ctx, DXIL_RES_CBV, &layout); + + const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_CBV, + idx, binding, false); + if (!handle) + return false; + + assert(!ctx->cbv_handles[binding]); + ctx->cbv_handles[binding] = handle; + ctx->num_cbvs++; + + return true; +} + +static bool +emit_ubo_var(struct ntd_context *ctx, nir_variable *var) +{ + unsigned size = get_dword_size(var->type); + unsigned binding = var->data.binding; + return emit_cbv(ctx, var->data.binding, get_dword_size(var->type), var->name); +} + +static bool +emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned binding, unsigned count) +{ + assert(ctx->num_sampler_arrays < ARRAY_SIZE(ctx->sampler_metadata_nodes)); + + unsigned id = ctx->num_sampler_arrays; + resource_array_layout layout = {id, binding, count}; + const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); + const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1); + const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout); + + if (!sampler_meta) + return false; + + ctx->sampler_metadata_nodes[id] = sampler_meta; + add_resource(ctx, DXIL_RES_SAMPLER, &layout); + + for (unsigned i = 0; i < count; ++i) { + const struct dxil_value *handle = + emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_SAMPLER, + id, binding + i, false); + if (!handle) + return false; + + unsigned idx = var->data.binding + i; + uint64_t bit = 1ull << idx; + assert(!(ctx->samplers_used & bit)); + ctx->sampler_handles[idx] = handle; + ctx->samplers_used |= bit; + } + ctx->num_sampler_arrays++; + + return true; +} + +static const struct dxil_mdnode * +emit_gs_state(struct ntd_context *ctx, nir_shader *s) +{ + const struct dxil_mdnode *gs_state_nodes[5]; + + gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive)); + gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out); + gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.active_stream_mask); + gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive)); + gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations); + + for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) { + if (!gs_state_nodes[i]) + return NULL; + } + + return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes)); +} + +static const struct dxil_mdnode * +emit_threads(struct ntd_context *ctx, nir_shader *s) +{ + const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.cs.local_size[0], 1)); + const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.cs.local_size[1], 1)); + const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.cs.local_size[2], 1)); + if (!threads_x || !threads_y || !threads_z) + return false; + + const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z }; + return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes)); +} + +static int64_t +get_module_flags(struct ntd_context *ctx) +{ + /* See the DXIL documentation for the definition of these flags: + * + * https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags + */ + + uint64_t flags = 0; + if (ctx->mod.feats.doubles) + flags |= (1 << 2); + if (ctx->mod.raw_and_structured_buffers) + flags |= (1 << 4); + if (ctx->mod.feats.min_precision) + flags |= (1 << 5); + if (ctx->mod.feats.dx11_1_double_extensions) + flags |= (1 << 6); + if (ctx->mod.feats.inner_coverage) + flags |= (1 << 10); + if (ctx->mod.feats.typed_uav_load_additional_formats) + flags |= (1 << 13); + if (ctx->mod.feats.use_64uavs) + flags |= (1 << 15); + if (ctx->mod.feats.cs_4x_raw_sb) + flags |= (1 << 17); + if (ctx->mod.feats.wave_ops) + flags |= (1 << 19); + if (ctx->mod.feats.int64_ops) + flags |= (1 << 20); + if (ctx->mod.feats.stencil_ref) + flags |= (1 << 11); + if (ctx->mod.feats.native_low_precision) + flags |= (1 << 23) | (1 << 5); + + if (ctx->opts->disable_math_refactoring) + flags |= (1 << 1); + + return flags; +} + +static const struct dxil_mdnode * +emit_entrypoint(struct ntd_context *ctx, + const struct dxil_func *func, const char *name, + const struct dxil_mdnode *signatures, + const struct dxil_mdnode *resources, + const struct dxil_mdnode *shader_props) +{ + const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func); + const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, name); + const struct dxil_mdnode *nodes[] = { + func_md, + name_md, + signatures, + resources, + shader_props + }; + return dxil_get_metadata_node(&ctx->mod, nodes, + ARRAY_SIZE(nodes)); +} + +static const struct dxil_mdnode * +emit_resources(struct ntd_context *ctx) +{ + bool emit_resources = false; + const struct dxil_mdnode *resources_nodes[] = { + NULL, NULL, NULL, NULL + }; + + if (ctx->srvs_used[0] || ctx->srvs_used[1]) { + resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ctx->srv_metadata_nodes, ctx->num_srv_arrays); + emit_resources = true; + } + + if (ctx->num_uavs) { + resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ctx->uav_metadata_nodes, ctx->num_uav_arrays); + emit_resources = true; + } + + if (ctx->num_cbvs) { + resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ctx->cbv_metadata_nodes, ctx->num_cbvs); + emit_resources = true; + } + + if (ctx->samplers_used) { + resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ctx->sampler_metadata_nodes, ctx->num_sampler_arrays); + emit_resources = true; + } + + return emit_resources ? + dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL; +} + +static boolean +emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag, + const struct dxil_mdnode *value_node) +{ + const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag); + if (!tag_node || !value_node) + return false; + assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2); + ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node; + ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node; + + return true; +} + +static bool +emit_metadata(struct ntd_context *ctx, nir_shader *s) +{ + unsigned dxilMinor = ctx->mod.minor_version; + if (!emit_llvm_ident(&ctx->mod) || + !emit_named_version(&ctx->mod, "dx.version", 1, dxilMinor) || + !emit_named_version(&ctx->mod, "dx.valver", 1, 4) || + !emit_dx_shader_model(&ctx->mod)) + return false; + + const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod); + const struct dxil_type *main_func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0); + const struct dxil_func *main_func = dxil_add_function_def(&ctx->mod, "main", main_func_type); + if (!main_func) + return false; + + const struct dxil_mdnode *resources_node = emit_resources(ctx); + + const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func); + const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0); + + const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0); + const struct dxil_mdnode *nodes_4_27_27[] = { + node4, node27, node27 + }; + const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27, + ARRAY_SIZE(nodes_4_27_27)); + + const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1); + + const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1); + const struct dxil_mdnode *main_type_annotation_nodes[] = { + node3, main_entrypoint, node29 + }; + const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes, + ARRAY_SIZE(main_type_annotation_nodes)); + + if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) { + if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx, s))) + return false; + } else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) { + if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx, s))) + return false; + } + + uint64_t flags = get_module_flags(ctx); + if (flags != 0) { + if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags))) + return false; + } + const struct dxil_mdnode *shader_properties = NULL; + if (ctx->num_shader_property_nodes > 0) { + shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes, + ctx->num_shader_property_nodes); + if (!shader_properties) + return false; + } + + const struct dxil_mdnode *signatures = get_signatures(&ctx->mod, s); + + const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func, + "main", signatures, resources_node, shader_properties); + if (!dx_entry_point) + return false; + + if (resources_node) { + const struct dxil_mdnode *dx_resources = resources_node; + dxil_add_metadata_named_node(&ctx->mod, "dx.resources", + &dx_resources, 1); + } + + const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation }; + return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations", + dx_type_annotations, + ARRAY_SIZE(dx_type_annotations)) && + dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints", + &dx_entry_point, 1); +} + +static const struct dxil_value * +bitcast_to_int(struct ntd_context *ctx, unsigned bit_size, + const struct dxil_value *value) +{ + const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size); + if (!type) + return NULL; + + return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value); +} + +static const struct dxil_value * +bitcast_to_float(struct ntd_context *ctx, unsigned bit_size, + const struct dxil_value *value) +{ + const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size); + if (!type) + return NULL; + + return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value); +} + +static void +store_ssa_def(struct ntd_context *ctx, nir_ssa_def *ssa, unsigned chan, + const struct dxil_value *value) +{ + assert(ssa->index < ctx->num_defs); + assert(chan < ssa->num_components); + /* We pre-defined the dest value because of a phi node, so bitcast while storing if the + * base type differs */ + if (ctx->defs[ssa->index].chans[chan]) { + const struct dxil_type *expect_type = dxil_value_get_type(ctx->defs[ssa->index].chans[chan]); + const struct dxil_type *value_type = dxil_value_get_type(value); + if (dxil_type_to_nir_type(expect_type) != dxil_type_to_nir_type(value_type)) + value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, expect_type, value); + } + ctx->defs[ssa->index].chans[chan] = value; +} + +static void +store_dest_value(struct ntd_context *ctx, nir_dest *dest, unsigned chan, + const struct dxil_value *value) +{ + assert(dest->is_ssa); + assert(value); + store_ssa_def(ctx, &dest->ssa, chan, value); +} + +static void +store_dest(struct ntd_context *ctx, nir_dest *dest, unsigned chan, + const struct dxil_value *value, nir_alu_type type) +{ + switch (nir_alu_type_get_base_type(type)) { + case nir_type_float: + if (nir_dest_bit_size(*dest) == 64) + ctx->mod.feats.doubles = true; + /* fallthrough */ + case nir_type_uint: + case nir_type_int: + if (nir_dest_bit_size(*dest) == 16) + ctx->mod.feats.native_low_precision = true; + if (nir_dest_bit_size(*dest) == 64) + ctx->mod.feats.int64_ops = true; + /* fallthrough */ + case nir_type_bool: + store_dest_value(ctx, dest, chan, value); + break; + default: + unreachable("unexpected nir_alu_type"); + } +} + +static void +store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan, + const struct dxil_value *value) +{ + assert(!alu->dest.saturate); + store_dest(ctx, &alu->dest.dest, chan, value, + nir_op_infos[alu->op].output_type); +} + +static const struct dxil_value * +get_src_ssa(struct ntd_context *ctx, const nir_ssa_def *ssa, unsigned chan) +{ + assert(ssa->index < ctx->num_defs); + assert(chan < ssa->num_components); + assert(ctx->defs[ssa->index].chans[chan]); + return ctx->defs[ssa->index].chans[chan]; +} + +static const struct dxil_value * +get_src(struct ntd_context *ctx, nir_src *src, unsigned chan, + nir_alu_type type) +{ + assert(src->is_ssa); + const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan); + + const int bit_size = nir_src_bit_size(*src); + + switch (nir_alu_type_get_base_type(type)) { + case nir_type_int: + case nir_type_uint: { + assert(bit_size != 64 || ctx->mod.feats.int64_ops); + const struct dxil_type *expect_type = dxil_module_get_int_type(&ctx->mod, bit_size); + /* nohing to do */ + if (dxil_value_type_equal_to(value, expect_type)) + return value; + assert(dxil_value_type_bitsize_equal_to(value, bit_size)); + return bitcast_to_int(ctx, bit_size, value); + } + + case nir_type_float: + assert(nir_src_bit_size(*src) >= 16); + assert(nir_src_bit_size(*src) != 64 || (ctx->mod.feats.doubles && + ctx->mod.feats.int64_ops)); + if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size))) + return value; + assert(dxil_value_type_bitsize_equal_to(value, bit_size)); + return bitcast_to_float(ctx, bit_size, value); + + case nir_type_bool: + if (!dxil_value_type_bitsize_equal_to(value, 1)) { + return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC, + dxil_module_get_int_type(&ctx->mod, 1), value); + } + return value; + + default: + unreachable("unexpected nir_alu_type"); + } +} + +static const struct dxil_value * +get_src_ptr(struct ntd_context *ctx, nir_src *src, unsigned chan, + nir_alu_type type) +{ + /* May implement pointer casting */ + assert(src->is_ssa); + return get_src_ssa(ctx, src->ssa, chan); +} + +static const struct dxil_type * +get_alu_src_type(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src) +{ + assert(!alu->src[src].abs); + assert(!alu->src[src].negate); + nir_ssa_def *ssa_src = alu->src[src].src.ssa; + unsigned chan = alu->src[src].swizzle[0]; + const struct dxil_value *value = get_src_ssa(ctx, ssa_src, chan); + return dxil_value_get_type(value); +} + +static const struct dxil_value * +get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src) +{ + assert(!alu->src[src].abs); + assert(!alu->src[src].negate); + + unsigned chan = alu->src[src].swizzle[0]; + return get_src(ctx, &alu->src[src].src, chan, + nir_op_infos[alu->op].input_types[src]); +} + +static bool +emit_binop(struct ntd_context *ctx, nir_alu_instr *alu, + enum dxil_bin_opcode opcode, + const struct dxil_value *op0, const struct dxil_value *op1) +{ + bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float; + + enum dxil_opt_flags flags = 0; + if (is_float_op && !alu->exact) + flags |= DXIL_UNSAFE_ALGEBRA; + + const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags); + if (!v) + return false; + store_alu_dest(ctx, alu, 0, v); + return true; +} + +static bool +emit_shift(struct ntd_context *ctx, nir_alu_instr *alu, + enum dxil_bin_opcode opcode, + const struct dxil_value *op0, const struct dxil_value *op1) +{ + unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src); + unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src); + if (op0_bit_size != op1_bit_size) { + const struct dxil_type *type = + dxil_module_get_int_type(&ctx->mod, op0_bit_size); + enum dxil_cast_opcode cast_op = + op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC; + op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1); + } + + const struct dxil_value *v = + dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0); + if (!v) + return false; + store_alu_dest(ctx, alu, 0, v); + return true; +} + +static bool +emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu, + enum dxil_cmp_pred pred, + const struct dxil_value *op0, const struct dxil_value *op1) +{ + const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1); + if (!v) + return false; + store_alu_dest(ctx, alu, 0, v); + return true; +} + +static enum dxil_cast_opcode +get_cast_op(nir_alu_instr *alu) +{ + unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); + unsigned src_bits = nir_src_bit_size(alu->src[0].src); + + switch (alu->op) { + /* bool -> int */ + case nir_op_b2i16: + case nir_op_b2i32: + case nir_op_b2i64: + return DXIL_CAST_ZEXT; + + /* float -> float */ + case nir_op_f2f16_rtz: + case nir_op_f2f32: + case nir_op_f2f64: + assert(dst_bits != src_bits); + if (dst_bits < src_bits) + return DXIL_CAST_FPTRUNC; + else + return DXIL_CAST_FPEXT; + + /* int -> int */ + case nir_op_i2i16: + case nir_op_i2i32: + case nir_op_i2i64: + assert(dst_bits != src_bits); + if (dst_bits < src_bits) + return DXIL_CAST_TRUNC; + else + return DXIL_CAST_SEXT; + + /* uint -> uint */ + case nir_op_u2u16: + case nir_op_u2u32: + case nir_op_u2u64: + assert(dst_bits != src_bits); + if (dst_bits < src_bits) + return DXIL_CAST_TRUNC; + else + return DXIL_CAST_ZEXT; + + /* float -> int */ + case nir_op_f2i16: + case nir_op_f2i32: + case nir_op_f2i64: + return DXIL_CAST_FPTOSI; + + /* float -> uint */ + case nir_op_f2u16: + case nir_op_f2u32: + case nir_op_f2u64: + return DXIL_CAST_FPTOUI; + + /* int -> float */ + case nir_op_i2f32: + case nir_op_i2f64: + return DXIL_CAST_SITOFP; + + /* uint -> float */ + case nir_op_u2f32: + case nir_op_u2f64: + return DXIL_CAST_UITOFP; + + default: + unreachable("unexpected cast op"); + } +} + +static const struct dxil_type * +get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu) +{ + unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); + switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) { + case nir_type_bool: + assert(dst_bits == 1); + case nir_type_int: + case nir_type_uint: + return dxil_module_get_int_type(&ctx->mod, dst_bits); + + case nir_type_float: + return dxil_module_get_float_type(&ctx->mod, dst_bits); + + default: + unreachable("unknown nir_alu_type"); + } +} + +static bool +is_double(nir_alu_type alu_type, unsigned bit_size) +{ + return nir_alu_type_get_base_type(alu_type) == nir_type_float && + bit_size == 64; +} + +static bool +emit_cast(struct ntd_context *ctx, nir_alu_instr *alu, + const struct dxil_value *value) +{ + enum dxil_cast_opcode opcode = get_cast_op(alu); + const struct dxil_type *type = get_cast_dest_type(ctx, alu); + if (!type) + return false; + + const nir_op_info *info = &nir_op_infos[alu->op]; + switch (opcode) { + case DXIL_CAST_UITOFP: + case DXIL_CAST_SITOFP: + if (is_double(info->output_type, nir_dest_bit_size(alu->dest.dest))) + ctx->mod.feats.dx11_1_double_extensions = true; + break; + case DXIL_CAST_FPTOUI: + case DXIL_CAST_FPTOSI: + if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src))) + ctx->mod.feats.dx11_1_double_extensions = true; + break; + } + + const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type, + value); + if (!v) + return false; + store_alu_dest(ctx, alu, 0, v); + return true; +} + +static enum overload_type +get_overload(nir_alu_type alu_type, unsigned bit_size) +{ + switch (nir_alu_type_get_base_type(alu_type)) { + case nir_type_int: + case nir_type_uint: + switch (bit_size) { + case 16: return DXIL_I16; + case 32: return DXIL_I32; + case 64: return DXIL_I64; + default: + unreachable("unexpected bit_size"); + } + case nir_type_float: + switch (bit_size) { + case 16: return DXIL_F16; + case 32: return DXIL_F32; + case 64: return DXIL_F64; + default: + unreachable("unexpected bit_size"); + } + default: + unreachable("unexpected output type"); + } +} + +static bool +emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu, + enum dxil_intr intr, const struct dxil_value *op) +{ + const nir_op_info *info = &nir_op_infos[alu->op]; + unsigned src_bits = nir_src_bit_size(alu->src[0].src); + enum overload_type overload = get_overload(info->input_types[0], src_bits); + + const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op); + if (!v) + return false; + store_alu_dest(ctx, alu, 0, v); + return true; +} + +static bool +emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu, + enum dxil_intr intr, + const struct dxil_value *op0, const struct dxil_value *op1) +{ + const nir_op_info *info = &nir_op_infos[alu->op]; + assert(info->output_type == info->input_types[0]); + assert(info->output_type == info->input_types[1]); + unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); + assert(nir_src_bit_size(alu->src[0].src) == dst_bits); + assert(nir_src_bit_size(alu->src[1].src) == dst_bits); + enum overload_type overload = get_overload(info->output_type, dst_bits); + + const struct dxil_value *v = emit_binary_call(ctx, overload, intr, + op0, op1); + if (!v) + return false; + store_alu_dest(ctx, alu, 0, v); + return true; +} + +static bool +emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu, + enum dxil_intr intr, + const struct dxil_value *op0, + const struct dxil_value *op1, + const struct dxil_value *op2) +{ + const nir_op_info *info = &nir_op_infos[alu->op]; + assert(info->output_type == info->input_types[0]); + assert(info->output_type == info->input_types[1]); + assert(info->output_type == info->input_types[2]); + + unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); + assert(nir_src_bit_size(alu->src[0].src) == dst_bits); + assert(nir_src_bit_size(alu->src[1].src) == dst_bits); + assert(nir_src_bit_size(alu->src[2].src) == dst_bits); + + enum overload_type overload = get_overload(info->output_type, dst_bits); + + const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr, + op0, op1, op2); + if (!v) + return false; + store_alu_dest(ctx, alu, 0, v); + return true; +} + +static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu, + const struct dxil_value *sel, + const struct dxil_value *val_true, + const struct dxil_value *val_false) +{ + assert(sel); + assert(val_true); + assert(val_false); + + const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false); + if (!v) + return false; + + store_alu_dest(ctx, alu, 0, v); + return true; +} + +static bool +emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) +{ + assert(val); + + struct dxil_module *m = &ctx->mod; + + const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f); + const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f); + + if (!c0 || !c1) + return false; + + return emit_select(ctx, alu, val, c1, c0); +} + +static bool +emit_f2b32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) +{ + assert(val); + + const struct dxil_value *zero = dxil_module_get_float_const(&ctx->mod, 0.0f); + return emit_cmp(ctx, alu, DXIL_FCMP_UNE, val, zero); +} + +static bool +emit_ufind_msb(struct ntd_context *ctx, nir_alu_instr *alu, + const struct dxil_value *val) +{ + const nir_op_info *info = &nir_op_infos[alu->op]; + unsigned dst_bits = nir_dest_bit_size(alu->dest.dest); + unsigned src_bits = nir_src_bit_size(alu->src[0].src); + enum overload_type overload = get_overload(info->output_type, src_bits); + + const struct dxil_value *v = emit_unary_call(ctx, overload, + DXIL_INTR_FIRSTBIT_HI, val); + if (!v) + return false; + + const struct dxil_value *size = dxil_module_get_int32_const(&ctx->mod, + src_bits - 1); + const struct dxil_value *zero = dxil_module_get_int_const(&ctx->mod, 0, + src_bits); + if (!size || !zero) + return false; + + v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SUB, size, v, 0); + const struct dxil_value *cnd = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_NE, + val, zero); + if (!v || !cnd) + return false; + + const struct dxil_value *minus_one = + dxil_module_get_int_const(&ctx->mod, -1, dst_bits); + if (!minus_one) + return false; + + v = dxil_emit_select(&ctx->mod, cnd, v, minus_one); + if (!v) + return false; + + store_alu_dest(ctx, alu, 0, v); + return true; +} + +static bool +emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, + "dx.op.legacyF16ToF32", + DXIL_NONE); + if (!func) + return false; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32); + if (!opcode) + return false; + + const struct dxil_value *args[] = { + opcode, + val + }; + + const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); + if (!v) + return false; + store_alu_dest(ctx, alu, 0, v); + return true; +} + +static bool +emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, + "dx.op.legacyF32ToF16", + DXIL_NONE); + if (!func) + return false; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16); + if (!opcode) + return false; + + const struct dxil_value *args[] = { + opcode, + val + }; + + const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); + if (!v) + return false; + store_alu_dest(ctx, alu, 0, v); + return true; +} + +static bool +emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs) +{ + const struct dxil_type *type = get_alu_src_type(ctx, alu, 0); + nir_alu_type t = dxil_type_to_nir_type(type); + + for (unsigned i = 0; i < num_inputs; i++) + store_alu_dest(ctx, alu, i, get_src(ctx, &alu->src[i].src, + alu->src[i].swizzle[0], t)); + return true; +} + +static bool +emit_alu(struct ntd_context *ctx, nir_alu_instr *alu) +{ + /* handle vec-instructions first; they are the only ones that produce + * vector results. + */ + switch (alu->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + case nir_op_vec8: + case nir_op_vec16: + return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs); + case nir_op_mov: { + const struct dxil_type *type = get_alu_src_type(ctx, alu, 0); + nir_alu_type t = dxil_type_to_nir_type(type); + assert(nir_dest_num_components(alu->dest.dest) == 1); + store_alu_dest(ctx, alu, 0,get_src(ctx, &alu->src[0].src, + alu->src[0].swizzle[0], t)); + return true; + } + default: + /* silence warnings */ + ; + } + + /* other ops should be scalar */ + assert(alu->dest.write_mask == 1); + const struct dxil_value *src[4]; + assert(nir_op_infos[alu->op].num_inputs <= 4); + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) + src[i] = get_alu_src(ctx, alu, i); + + switch (alu->op) { + case nir_op_iadd: + case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]); + + case nir_op_isub: + case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]); + + case nir_op_imul: + case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]); + + case nir_op_idiv: + case nir_op_fdiv: return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]); + + case nir_op_udiv: return emit_binop(ctx, alu, DXIL_BINOP_UDIV, src[0], src[1]); + case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]); + case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]); + case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]); + case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]); + case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]); + case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]); + case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]); + case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]); + case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]); + case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]); + case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]); + case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]); + case nir_op_uge: return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]); + case nir_op_ilt: return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]); + case nir_op_ult: return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]); + case nir_op_feq: return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]); + case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]); + case nir_op_flt: return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]); + case nir_op_fge: return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]); + case nir_op_bcsel: return emit_select(ctx, alu, src[0], src[1], src[2]); + case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]); + case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]); + case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]); + case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]); + case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]); + case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]); + case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]); + case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]); + case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]); + + case nir_op_fddx: + case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]); + case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]); + case nir_op_fddy: + case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]); + case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]); + + case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]); + case nir_op_frcp: { + const struct dxil_value *one = dxil_module_get_float_const(&ctx->mod, 1.0f); + return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]); + } + case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]); + case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]); + case nir_op_ufind_msb: return emit_ufind_msb(ctx, alu, src[0]); + case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]); + case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]); + case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]); + case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]); + case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]); + case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]); + case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]); + case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]); + case nir_op_ffma: return emit_tertiary_intin(ctx, alu, DXIL_INTR_FFMA, src[0], src[1], src[2]); + + case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0]); + case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0]); + + case nir_op_b2i16: + case nir_op_i2i16: + case nir_op_f2i16: + case nir_op_f2u16: + case nir_op_u2u16: + case nir_op_u2f16: + case nir_op_i2f16: + case nir_op_f2f16_rtz: + case nir_op_b2i32: + case nir_op_f2f32: + case nir_op_f2i32: + case nir_op_f2u32: + case nir_op_i2f32: + case nir_op_i2i32: + case nir_op_u2f32: + case nir_op_u2u32: + case nir_op_b2i64: + case nir_op_f2f64: + case nir_op_f2i64: + case nir_op_f2u64: + case nir_op_i2f64: + case nir_op_i2i64: + case nir_op_u2f64: + case nir_op_u2u64: + return emit_cast(ctx, alu, src[0]); + + case nir_op_f2b32: return emit_f2b32(ctx, alu, src[0]); + case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]); + default: + NIR_INSTR_UNSUPPORTED(&alu->instr); + assert("Unimplemented ALU instruction"); + return false; + } +} + +const struct dxil_value * +load_ubo(struct ntd_context *ctx, const struct dxil_value *handle, + const struct dxil_value *offset, enum overload_type overload) +{ + assert(handle && offset); + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY); + if (!opcode) + return NULL; + + const struct dxil_value *args[] = { + opcode, handle, offset + }; + + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload); + if (!func) + return NULL; + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static bool +emit_load_primitiveid(struct ntd_context *ctx, + nir_intrinsic_instr *intr) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.primitiveID", DXIL_I32); + if (!func) + return false; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, + DXIL_INTR_PRIMITIVE_ID); + if (!opcode) + return false; + + const struct dxil_value *args[] = { + opcode + }; + + const struct dxil_value *primid = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); + store_dest_value(ctx, &intr->dest, 0, primid); + + return true; +} + +static const struct dxil_value * +get_int32_undef(struct dxil_module *m) +{ + const struct dxil_type *int32_type = + dxil_module_get_int_type(m, 32); + if (!int32_type) + return NULL; + + return dxil_module_get_undef(m, int32_type); +} + +static bool +emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr) +{ + nir_const_value *const_block_index = nir_src_as_const_value(intr->src[0]); + assert(const_block_index); // no dynamic indexing for now + const struct dxil_value *handle = ctx->cbv_handles[const_block_index->u32]; + assert(handle); + const struct dxil_value *offset; + nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]); + if (const_offset) { + offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4); + } else { + const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint); + const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4); + offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0); + } + + const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_F32); + + if (!agg) + return false; + + for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { + const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i); + store_dest(ctx, &intr->dest, i, retval, + nir_dest_bit_size(intr->dest) > 1 ? nir_type_float : nir_type_bool); + } + return true; +} + +static bool +emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr) +{ + assert(nir_dest_num_components(intr->dest) <= 4); + assert(nir_dest_bit_size(intr->dest) == 32); + + /* We only support const indexes right now. */ + nir_const_value *index = nir_src_as_const_value(intr->src[0]); + assert(index && index->u32 < ARRAY_SIZE(ctx->cbv_handles)); + + const struct dxil_value *offset = + get_src(ctx, &intr->src[1], 0, nir_type_uint); + + if (!index || !offset) + return false; + + const struct dxil_value *handle = ctx->cbv_handles[index->u32]; + if (!handle) + return false; + + const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_I32); + if (!agg) + return false; + + for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++) + store_dest_value(ctx, &intr->dest, i, + dxil_emit_extractval(&ctx->mod, agg, i)); + + return true; +} + +static bool +emit_store_output(struct ntd_context *ctx, nir_intrinsic_instr *intr, + nir_variable *output) +{ + nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(output->type)); + enum overload_type overload = get_overload(out_type, 32); + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.storeOutput", overload); + + if (!func) + return false; + + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_STORE_OUTPUT); + const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, (int)output->data.driver_location); + const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0); + + bool success = true; + uint32_t writemask = nir_intrinsic_write_mask(intr); + for (unsigned i = 0; i < nir_src_num_components(intr->src[1]) && success; ++i) { + if (writemask & (1 << i)) { + const struct dxil_value *col = dxil_module_get_int8_const(&ctx->mod, i); + const struct dxil_value *value = get_src(ctx, &intr->src[1], i, out_type); + const struct dxil_value *args[] = { + opcode, output_id, row, col, value + }; + success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); + } + } + return success; +} + +static bool +emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr) +{ + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + + switch (var->data.mode) { + case nir_var_shader_out: + return emit_store_output(ctx, intr, var); + + default: + unreachable("unsupported nir_variable_mode"); + } +} + +static bool +emit_load_input_array(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var, nir_src *index) +{ + assert(var); + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT); + const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location); + const struct dxil_value *vertex_id; + const struct dxil_value *row; + + if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) { + vertex_id = get_src(ctx, index, 0, nir_type_int); + row = dxil_module_get_int32_const(&ctx->mod, 0); + } else { + const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); + vertex_id = dxil_module_get_undef(&ctx->mod, int32_type); + row = get_src(ctx, index, 0, nir_type_int); + } + + nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(glsl_get_array_element(var->type))); + enum overload_type overload = get_overload(out_type, 32); + + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload); + + if (!func) + return false; + + for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { + const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i); + + const struct dxil_value *args[] = { + opcode, input_id, row, comp, vertex_id + }; + + const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); + if (!retval) + return false; + store_dest(ctx, &intr->dest, i, retval, out_type); + } + return true; +} + +static bool +emit_load_input_interpolated(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var) +{ + assert(var); + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT); + const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location); + const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0); + const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32); + const struct dxil_value *vertex_id = dxil_module_get_undef(&ctx->mod, int32_type); + + nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(var->type)); + enum overload_type overload = get_overload(out_type, 32); + + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload); + + if (!func) + return false; + + for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { + const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i); + + const struct dxil_value *args[] = { + opcode, input_id, row, comp, vertex_id + }; + + const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); + if (!retval) + return false; + store_dest(ctx, &intr->dest, i, retval, out_type); + } + return true; +} + +static bool +emit_load_input_flat(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable* var) +{ + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATTRIBUTE_AT_VERTEX); + const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, (int)var->data.driver_location); + const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0); + const struct dxil_value *vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex); + + nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(var->type)); + enum overload_type overload = get_overload(out_type, 32); + + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.attributeAtVertex", overload); + if (!func) + return false; + + for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { + const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i); + const struct dxil_value *args[] = { + opcode, input_id, row, comp, vertex_id + }; + + const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); + if (!retval) + return false; + + store_dest(ctx, &intr->dest, i, retval, out_type); + } + return true; +} + +static bool +emit_load_input(struct ntd_context *ctx, nir_intrinsic_instr *intr, + nir_variable *input) +{ + if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER || + input->data.interpolation != INTERP_MODE_FLAT || + !ctx->opts->interpolate_at_vertex || + ctx->opts->provoking_vertex == 0 || + glsl_type_is_integer(input->type)) + return emit_load_input_interpolated(ctx, intr, input); + else + return emit_load_input_flat(ctx, intr, input); +} + +static bool +emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr) +{ + assert(intr->src[0].is_ssa); + nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr); + nir_variable *var = nir_deref_instr_get_variable(deref); + + switch (var->data.mode) { + case nir_var_shader_in: + if (glsl_type_is_array(var->type)) + return emit_load_input_array(ctx, intr, var, &deref->arr.index); + return emit_load_input(ctx, intr, var); + + default: + unreachable("unsupported nir_variable_mode"); + } +} + +static bool +emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value) +{ + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD); + if (!opcode) + return false; + + const struct dxil_value *args[] = { + opcode, + value + }; + + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE); + if (!func) + return false; + + return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static bool +emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr) +{ + const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool); + return emit_discard_if_with_value(ctx, value); +} + +static bool +emit_discard(struct ntd_context *ctx) +{ + const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true); + return emit_discard_if_with_value(ctx, value); +} + +static bool +emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr) +{ + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM); + const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr)); + if (!opcode || !stream_id) + return false; + + const struct dxil_value *args[] = { + opcode, + stream_id + }; + + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE); + if (!func) + return false; + + return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static bool +emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr) +{ + const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM); + const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr)); + if (!opcode || !stream_id) + return false; + + const struct dxil_value *args[] = { + opcode, + stream_id + }; + + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE); + if (!func) + return false; + + return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static bool +emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr) +{ + int binding = nir_src_as_int(intr->src[0]); + + const struct dxil_value *handle = ctx->uav_handles[binding]; + if (!handle) + return false; + + const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); + if (!int32_undef) + return false; + + const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef }; + enum glsl_sampler_dim image_dim = nir_intrinsic_image_dim(intr); + unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim); + assert(num_coords <= nir_src_num_components(intr->src[1])); + for (unsigned i = 0; i < num_coords; ++i) { + coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint); + if (!coord[i]) + return false; + } + + nir_alu_type in_type = nir_intrinsic_type(intr); + enum overload_type overload = get_overload(in_type, 32); + + assert(nir_src_bit_size(intr->src[3]) == 32); + unsigned num_components = nir_src_num_components(intr->src[3]); + assert(num_components <= 4); + const struct dxil_value *value[4]; + for (unsigned i = 0; i < num_components; ++i) { + value[i] = get_src(ctx, &intr->src[3], i, in_type); + if (!value[i]) + return false; + } + + for (int i = num_components; i < 4; ++i) + value[i] = int32_undef; + + const struct dxil_value *write_mask = + dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1); + if (!write_mask) + return false; + + if (image_dim == GLSL_SAMPLER_DIM_BUF) { + coord[1] = int32_undef; + return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload); + } else + return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload); +} + +struct texop_parameters { + const struct dxil_value *tex; + const struct dxil_value *sampler; + const struct dxil_value *bias, *lod_or_sample, *min_lod; + const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3]; + const struct dxil_value *cmp; + enum overload_type overload; +}; + +static const struct dxil_value * +emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE); + if (!func) + return false; + + const struct dxil_value *args[] = { + dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE), + params->tex, + params->lod_or_sample + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static bool +emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr) +{ + int binding = nir_src_as_int(intr->src[0]); + + const struct dxil_value *handle = ctx->uav_handles[binding]; + if (!handle) + return false; + + const struct dxil_value *lod = get_src(ctx, &intr->src[1], 0, nir_type_uint); + if (!lod) + return false; + + struct texop_parameters params = { + .tex = handle, + .lod_or_sample = lod + }; + const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms); + if (!dimensions) + return false; + + for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) { + const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i); + store_dest(ctx, &intr->dest, i, retval, nir_type_uint); + } + + return true; +} + +static bool +emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr, + enum dxil_atomic_op op, nir_alu_type type) +{ + const struct dxil_value *buffer = + get_src(ctx, &intr->src[0], 0, nir_type_uint); + const struct dxil_value *offset = + get_src(ctx, &intr->src[1], 0, nir_type_uint); + const struct dxil_value *value = + get_src(ctx, &intr->src[2], 0, type); + + if (!value || !buffer || !offset) + return false; + + const struct dxil_value *handle = + emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_UAV, 0, buffer, + nir_src_is_const(intr->src[0])); + if (!handle) + return false; + + const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); + if (!int32_undef) + return false; + + const struct dxil_value *coord[3] = { + offset, int32_undef, int32_undef + }; + + const struct dxil_value *retval = + emit_atomic_binop(ctx, handle, op, coord, value); + + if (!retval) + return false; + + store_dest(ctx, &intr->dest, 0, retval, type); + return true; +} + +static bool +emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) +{ + const struct dxil_value *buffer = + get_src(ctx, &intr->src[0], 0, nir_type_uint); + const struct dxil_value *offset = + get_src(ctx, &intr->src[1], 0, nir_type_uint); + const struct dxil_value *cmpval = + get_src(ctx, &intr->src[2], 0, nir_type_int); + const struct dxil_value *newval = + get_src(ctx, &intr->src[3], 0, nir_type_int); + + if (!cmpval || !newval || !buffer || !offset) + return false; + + const struct dxil_value *handle = + emit_createhandle_call(ctx, DXIL_RESOURCE_CLASS_UAV, 0, buffer, + nir_src_is_const(intr->src[0])); + if (!handle) + return false; + + const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod); + if (!int32_undef) + return false; + + const struct dxil_value *coord[3] = { + offset, int32_undef, int32_undef + }; + + const struct dxil_value *retval = + emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval); + + if (!retval) + return false; + + store_dest(ctx, &intr->dest, 0, retval, nir_type_int); + return true; +} + +static bool +emit_shared_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr, + enum dxil_rmw_op op, nir_alu_type type) +{ + const struct dxil_value *zero, *index; + unsigned bit_size = nir_src_bit_size(intr->src[1]); + + assert(bit_size == 32); + + zero = dxil_module_get_int32_const(&ctx->mod, 0); + if (!zero) + return false; + + index = get_src(ctx, &intr->src[0], 0, nir_type_uint); + if (!index) + return false; + + const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; + const struct dxil_value *ptr, *value, *retval; + + ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); + if (!ptr) + return false; + + value = get_src(ctx, &intr->src[1], 0, type); + + retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, op, false, + DXIL_ATOMIC_ORDERING_ACQREL, + DXIL_SYNC_SCOPE_CROSSTHREAD); + if (!retval) + return false; + + store_dest(ctx, &intr->dest, 0, retval, type); + return true; +} + +static bool +emit_shared_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr) +{ + const struct dxil_value *zero, *index; + unsigned bit_size = nir_src_bit_size(intr->src[1]); + + assert(bit_size == 32); + + zero = dxil_module_get_int32_const(&ctx->mod, 0); + if (!zero) + return false; + + index = get_src(ctx, &intr->src[0], 0, nir_type_uint); + if (!index) + return false; + + const struct dxil_value *ops[] = { ctx->sharedvars, zero, index }; + const struct dxil_value *ptr, *cmpval, *newval, *retval; + + ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops)); + if (!ptr) + return false; + + cmpval = get_src(ctx, &intr->src[1], 0, nir_type_uint); + newval = get_src(ctx, &intr->src[2], 0, nir_type_uint); + + retval = dxil_emit_cmpxchg(&ctx->mod, cmpval, newval, ptr, false, + DXIL_ATOMIC_ORDERING_ACQREL, + DXIL_SYNC_SCOPE_CROSSTHREAD); + if (!retval) + return false; + + store_dest(ctx, &intr->dest, 0, retval, nir_type_uint); + return true; +} + +static bool +emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr) +{ + switch (intr->intrinsic) { + case nir_intrinsic_store_deref: + return emit_store_deref(ctx, intr); + case nir_intrinsic_load_deref: + return emit_load_deref(ctx, intr); + case nir_intrinsic_load_ubo: + return emit_load_ubo(ctx, intr); + case nir_intrinsic_load_ubo_dxil: + return emit_load_ubo_dxil(ctx, intr); + case nir_intrinsic_load_front_face: + return emit_load_input_interpolated(ctx, intr, + ctx->system_value[SYSTEM_VALUE_FRONT_FACE]); + case nir_intrinsic_load_vertex_id_zero_base: + return emit_load_input_interpolated(ctx, intr, + ctx->system_value[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]); + case nir_intrinsic_load_instance_id: + return emit_load_input_interpolated(ctx, intr, + ctx->system_value[SYSTEM_VALUE_INSTANCE_ID]); + case nir_intrinsic_load_primitive_id: + return emit_load_primitiveid(ctx, intr); + case nir_intrinsic_discard_if: + return emit_discard_if(ctx, intr); + case nir_intrinsic_discard: + return emit_discard(ctx); + case nir_intrinsic_emit_vertex: + return emit_emit_vertex(ctx, intr); + case nir_intrinsic_end_primitive: + return emit_end_primitive(ctx, intr); + + default: + NIR_INSTR_UNSUPPORTED(&intr->instr); + assert("Unimplemented intrinsic instruction"); + return false; + } +} + +static bool +emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const) +{ + for (int i = 0; i < load_const->def.num_components; ++i) { + const struct dxil_value *value; + switch (load_const->def.bit_size) { + case 1: + value = dxil_module_get_int1_const(&ctx->mod, + load_const->value[i].b); + break; + case 16: + ctx->mod.feats.native_low_precision = true; + value = dxil_module_get_int16_const(&ctx->mod, + load_const->value[i].u16); + break; + case 32: + value = dxil_module_get_int32_const(&ctx->mod, + load_const->value[i].u32); + break; + case 64: + ctx->mod.feats.int64_ops = true; + value = dxil_module_get_int64_const(&ctx->mod, + load_const->value[i].u64); + break; + default: + unreachable("unexpected bit_size"); + } + if (!value) + return false; + + store_ssa_def(ctx, &load_const->def, i, value); + } + return true; +} + +static bool +emit_deref(struct ntd_context* ctx, nir_deref_instr* instr) +{ + assert(instr->deref_type == nir_deref_type_var || + instr->deref_type == nir_deref_type_array); + return true; +} + +static bool +emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond, + int true_block, int false_block) +{ + assert(cond); + assert(true_block >= 0); + assert(false_block >= 0); + return dxil_emit_branch(&ctx->mod, cond, true_block, false_block); +} + +static bool +emit_branch(struct ntd_context *ctx, int block) +{ + assert(block >= 0); + return dxil_emit_branch(&ctx->mod, NULL, block, -1); +} + +static bool +emit_jump(struct ntd_context *ctx, nir_jump_instr *instr) +{ + switch (instr->type) { + case nir_jump_break: + case nir_jump_continue: + assert(instr->instr.block->successors[0]); + assert(!instr->instr.block->successors[1]); + return emit_branch(ctx, instr->instr.block->successors[0]->index); + + default: + unreachable("Unsupported jump type\n"); + } +} + +struct phi_block { + unsigned num_components; + struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS]; +}; + +static bool +emit_phi(struct ntd_context *ctx, nir_phi_instr *instr) +{ + unsigned bit_size = nir_dest_bit_size(instr->dest); + const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, + bit_size); + + struct phi_block *vphi = ralloc(ctx->phis, struct phi_block); + vphi->num_components = nir_dest_num_components(instr->dest); + + for (unsigned i = 0; i < vphi->num_components; ++i) { + struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type); + if (!phi) + return false; + store_dest_value(ctx, &instr->dest, i, dxil_instr_get_return_value(phi)); + } + _mesa_hash_table_insert(ctx->phis, instr, vphi); + return true; +} + +static void +fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr, + struct phi_block *vphi) +{ + const struct dxil_value *values[128]; + unsigned blocks[128]; + for (unsigned i = 0; i < vphi->num_components; ++i) { + size_t num_incoming = 0; + nir_foreach_phi_src(src, instr) { + assert(src->src.is_ssa); + const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i); + assert(num_incoming < ARRAY_SIZE(values)); + values[num_incoming] = val; + assert(num_incoming < ARRAY_SIZE(blocks)); + blocks[num_incoming] = src->pred->index; + ++num_incoming; + } + dxil_phi_set_incoming(vphi->comp[i], values, blocks, num_incoming); + } +} + +static unsigned +get_n_src(struct ntd_context *ctx, const struct dxil_value **values, + unsigned max_components, nir_tex_src *src, nir_alu_type type) +{ + unsigned num_components = nir_src_num_components(src->src); + unsigned i = 0; + + assert(num_components <= max_components); + + for (i = 0; i < num_components; ++i) { + values[i] = get_src(ctx, &src->src, i, type); + assert(values[i] != NULL); + } + + return num_components; +} + +#define PAD_SRC(ctx, array, components, undef) \ + for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \ + array[i] = undef; \ + } + +static const struct dxil_value * +emit_sample(struct ntd_context *ctx, struct texop_parameters *params) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload); + if (!func) + return NULL; + + const struct dxil_value *args[11] = { + dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE), + params->tex, params->sampler, + params->coord[0], params->coord[1], params->coord[2], params->coord[3], + params->offset[0], params->offset[1], params->offset[2], + params->min_lod + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload); + if (!func) + return NULL; + + assert(params->bias != NULL); + + const struct dxil_value *args[12] = { + dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS), + params->tex, params->sampler, + params->coord[0], params->coord[1], params->coord[2], params->coord[3], + params->offset[0], params->offset[1], params->offset[2], + params->bias, params->min_lod + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload); + if (!func) + return NULL; + + assert(params->lod_or_sample != NULL); + + const struct dxil_value *args[11] = { + dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL), + params->tex, params->sampler, + params->coord[0], params->coord[1], params->coord[2], params->coord[3], + params->offset[0], params->offset[1], params->offset[2], + params->lod_or_sample + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params) +{ + const struct dxil_func *func; + enum dxil_intr opcode; + int numparam; + + if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) { + func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32); + opcode = DXIL_INTR_SAMPLE_CMP; + numparam = 12; + } else { + func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32); + opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO; + numparam = 11; + } + + if (!func) + return NULL; + + const struct dxil_value *args[12] = { + dxil_module_get_int32_const(&ctx->mod, opcode), + params->tex, params->sampler, + params->coord[0], params->coord[1], params->coord[2], params->coord[3], + params->offset[0], params->offset[1], params->offset[2], + params->cmp, params->min_lod + }; + + return dxil_emit_call(&ctx->mod, func, args, numparam); +} + +static const struct dxil_value * +emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload); + if (!func) + return false; + + const struct dxil_value *args[17] = { + dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD), + params->tex, params->sampler, + params->coord[0], params->coord[1], params->coord[2], params->coord[3], + params->offset[0], params->offset[1], params->offset[2], + params->dx[0], params->dx[1], params->dx[2], + params->dy[0], params->dy[1], params->dy[2], + params->min_lod + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload); + if (!func) + return false; + + if (!params->lod_or_sample) + params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32)); + + const struct dxil_value *args[] = { + dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD), + params->tex, + params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2], + params->offset[0], params->offset[1], params->offset[2] + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static const struct dxil_value * +emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params) +{ + const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32); + if (!func) + return false; + + const struct dxil_value *args[] = { + dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD), + params->tex, + params->sampler, + params->coord[0], + params->coord[1], + params->coord[2], + dxil_module_get_int1_const(&ctx->mod, 1) + }; + + return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args)); +} + +static bool +emit_tex(struct ntd_context *ctx, nir_tex_instr *instr) +{ + assert(ctx->srvs_used[instr->texture_index / 64] & (1ull << (instr->texture_index % 64))); + assert(instr->op == nir_texop_txf || + instr->op == nir_texop_txf_ms || + nir_tex_instr_is_query(instr) || + ctx->samplers_used & (1ull << instr->sampler_index)); + + struct texop_parameters params; + memset(¶ms, 0, sizeof(struct texop_parameters)); + params.tex = ctx->srv_handles[instr->texture_index]; + params.sampler = ctx->sampler_handles[instr->sampler_index]; + + const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32); + const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32); + const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type); + const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type); + + unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0; + params.overload = get_overload(instr->dest_type, 32); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + nir_alu_type type = nir_tex_instr_src_type(instr, i); + + switch (instr->src[i].src_type) { + case nir_tex_src_coord: + coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord), + &instr->src[i], type); + break; + + case nir_tex_src_offset: + offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset), + &instr->src[i], nir_type_int); + break; + + case nir_tex_src_bias: + assert(instr->op == nir_texop_txb); + assert(nir_src_num_components(instr->src[i].src) == 1); + params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float); + assert(params.bias != NULL); + break; + + case nir_tex_src_lod: + assert(nir_src_num_components(instr->src[i].src) == 1); + /* Buffers don't have a LOD */ + if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) + params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type); + else + params.lod_or_sample = int_undef; + assert(params.lod_or_sample != NULL); + break; + + case nir_tex_src_min_lod: + assert(nir_src_num_components(instr->src[i].src) == 1); + params.min_lod = get_src(ctx, &instr->src[i].src, 0, type); + assert(params.min_lod != NULL); + break; + + case nir_tex_src_comparator: + assert(nir_src_num_components(instr->src[i].src) == 1); + params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float); + assert(params.cmp != NULL); + break; + + case nir_tex_src_ddx: + dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx), + &instr->src[i], nir_type_float); + assert(dx_components != 0); + break; + + case nir_tex_src_ddy: + dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy), + &instr->src[i], nir_type_float); + assert(dy_components != 0); + break; + + case nir_tex_src_ms_index: + params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int); + assert(params.lod_or_sample != NULL); + break; + + case nir_tex_src_projector: + unreachable("Texture projector should have been lowered"); + + default: + fprintf(stderr, "texture source: %d\n", instr->src[i].src_type); + unreachable("unknown texture source"); + } + } + + PAD_SRC(ctx, params.coord, coord_components, float_undef); + PAD_SRC(ctx, params.offset, offset_components, int_undef); + if (!params.min_lod) params.min_lod = float_undef; + + const struct dxil_value *sample = NULL; + switch (instr->op) { + case nir_texop_txb: + sample = emit_sample_bias(ctx, ¶ms); + break; + + case nir_texop_tex: + if (params.cmp != NULL) { + sample = emit_sample_cmp(ctx, ¶ms); + break; + } else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) { + sample = emit_sample(ctx, ¶ms); + break; + } + params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0); + /* fallthrough */ + case nir_texop_txl: + sample = emit_sample_level(ctx, ¶ms); + break; + + case nir_texop_txd: + PAD_SRC(ctx, params.dx, dx_components, float_undef); + PAD_SRC(ctx, params.dy, dy_components,float_undef); + sample = emit_sample_grad(ctx, ¶ms); + break; + + case nir_texop_txf: + case nir_texop_txf_ms: + if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) { + params.coord[1] = int_undef; + sample = emit_bufferload_call(ctx, params.tex, params.coord); + } + else { + PAD_SRC(ctx, params.coord, coord_components, int_undef); + sample = emit_texel_fetch(ctx, ¶ms); + } + break; + + case nir_texop_txs: + sample = emit_texture_size(ctx, ¶ms); + break; + + case nir_texop_lod: + sample = emit_texture_lod(ctx, ¶ms); + store_dest(ctx, &instr->dest, 0, sample, nir_alu_type_get_base_type(instr->dest_type)); + return true; + + case nir_texop_query_levels: + params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32); + sample = emit_texture_size(ctx, ¶ms); + const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3); + store_dest(ctx, &instr->dest, 0, retval, nir_alu_type_get_base_type(instr->dest_type)); + return true; + } + + if (!sample) + return false; + + for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) { + const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i); + store_dest(ctx, &instr->dest, i, retval, nir_alu_type_get_base_type(instr->dest_type)); + } + + return true; +} + +static bool +emit_undefined(struct ntd_context *ctx, nir_ssa_undef_instr *undef) +{ + for (unsigned i = 0; i < undef->def.num_components; ++i) + store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0)); + return true; +} + +static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + return emit_alu(ctx, nir_instr_as_alu(instr)); + case nir_instr_type_intrinsic: + return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr)); + case nir_instr_type_load_const: + return emit_load_const(ctx, nir_instr_as_load_const(instr)); + case nir_instr_type_deref: + return emit_deref(ctx, nir_instr_as_deref(instr)); + case nir_instr_type_jump: + return emit_jump(ctx, nir_instr_as_jump(instr)); + case nir_instr_type_phi: + return emit_phi(ctx, nir_instr_as_phi(instr)); + case nir_instr_type_tex: + return emit_tex(ctx, nir_instr_as_tex(instr)); + case nir_instr_type_ssa_undef: + return emit_undefined(ctx, nir_instr_as_ssa_undef(instr)); + default: + NIR_INSTR_UNSUPPORTED(instr); + unreachable("Unimplemented instruction type"); + return false; + } +} + + +static bool +emit_block(struct ntd_context *ctx, struct nir_block *block) +{ + assert(block->index < ctx->mod.num_basic_block_ids); + ctx->mod.basic_block_ids[block->index] = ctx->mod.curr_block; + + nir_foreach_instr(instr, block) { + TRACE_CONVERSION(instr); + + if (!emit_instr(ctx, instr)) { + return false; + } + } + return true; +} + +static bool +emit_cf_list(struct ntd_context *ctx, struct exec_list *list); + +static bool +emit_if(struct ntd_context *ctx, struct nir_if *if_stmt) +{ + assert(nir_src_num_components(if_stmt->condition) == 1); + const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0, + nir_type_bool); + + /* prepare blocks */ + nir_block *then_block = nir_if_first_then_block(if_stmt); + assert(nir_if_last_then_block(if_stmt)->successors[0]); + assert(!nir_if_last_then_block(if_stmt)->successors[1]); + int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index; + + nir_block *else_block = NULL; + int else_succ = -1; + if (!exec_list_is_empty(&if_stmt->else_list)) { + else_block = nir_if_first_else_block(if_stmt); + assert(nir_if_last_else_block(if_stmt)->successors[0]); + assert(!nir_if_last_else_block(if_stmt)->successors[1]); + else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index; + } + + if (!emit_cond_branch(ctx, cond, then_block->index, + else_block ? else_block->index : then_succ)) + return false; + + /* handle then-block */ + if (!emit_cf_list(ctx, &if_stmt->then_list) || + (!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) && + !emit_branch(ctx, then_succ))) + return false; + + if (else_block) { + /* handle else-block */ + if (!emit_cf_list(ctx, &if_stmt->else_list) || + (!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) && + !emit_branch(ctx, else_succ))) + return false; + } + + return true; +} + +static bool +emit_loop(struct ntd_context *ctx, nir_loop *loop) +{ + nir_block *first_block = nir_loop_first_block(loop); + + assert(nir_loop_last_block(loop)->successors[0]); + assert(!nir_loop_last_block(loop)->successors[1]); + + if (!emit_branch(ctx, first_block->index)) + return false; + + if (!emit_cf_list(ctx, &loop->body)) + return false; + + if (!emit_branch(ctx, first_block->index)) + return false; + + return true; +} + +static bool +emit_cf_list(struct ntd_context *ctx, struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, node, node, list) { + switch (node->type) { + case nir_cf_node_block: + if (!emit_block(ctx, nir_cf_node_as_block(node))) + return false; + break; + + case nir_cf_node_if: + if (!emit_if(ctx, nir_cf_node_as_if(node))) + return false; + break; + + case nir_cf_node_loop: + if (!emit_loop(ctx, nir_cf_node_as_loop(node))) + return false; + break; + + default: + unreachable("unsupported cf-list node"); + break; + } + } + return true; +} + +static void +insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var) +{ + nir_foreach_variable_in_list(var, var_list) { + if (var->data.binding > new_var->data.binding) { + exec_node_insert_node_before(&var->node, &new_var->node); + return; + } + } + exec_list_push_tail(var_list, &new_var->node); +} + + +static void +sort_uniforms_by_binding_and_remove_structs(nir_shader *s) +{ + struct exec_list new_list; + exec_list_make_empty(&new_list); + + nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) { + exec_node_remove(&var->node); + const struct glsl_type *type = glsl_without_array(var->type); + if (!glsl_type_is_struct(type)) + insert_sorted_by_binding(&new_list, var); + } + exec_list_append(&s->variables, &new_list); +} + +static void +prepare_phi_values(struct ntd_context *ctx, nir_shader *shader) +{ + /* PHI nodes are difficult to get right when tracking the types: + * Since the incoming sources are linked to blocks, we can't bitcast + * on the fly while loading. So scan the shader and insert a typed dummy + * value for each phi source, and when storing we convert if the incoming + * value has a different type then the one expected by the phi node. + * We choose int as default, because it supports more bit sizes. + */ + nir_foreach_function(function, shader) { + if (function->impl) { + nir_foreach_block(block, function->impl) { + nir_foreach_instr(instr, block) { + if (instr->type == nir_instr_type_phi) { + nir_phi_instr *ir = nir_instr_as_phi(instr); + unsigned bitsize = nir_dest_bit_size(ir->dest); + const struct dxil_value *dummy = dxil_module_get_int_const(&ctx->mod, 0, bitsize); + nir_foreach_phi_src(src, ir) { + for(unsigned int i = 0; i < ir->dest.ssa.num_components; ++i) + store_ssa_def(ctx, src->src.ssa, i, dummy); + } + } + } + } + } + } +} + +static bool +emit_cbvs(struct ntd_context *ctx, nir_shader *s) +{ + for (int i = ctx->opts->ubo_binding_offset; i < s->info.num_ubos; ++i) { + char name[64]; + snprintf(name, sizeof(name), "__ubo%d", i); + if (!emit_cbv(ctx, i, 16384 /*4096 vec4's*/, name)) + return false; + } + + return true; +} + +static bool +emit_module(struct ntd_context *ctx, nir_shader *s) +{ + unsigned binding; + + /* The validator forces us to emit resources in a specific order: + * CBVs, Samplers, SRVs, UAVs. While we are at it also remove + * stale struct uniforms, they are lowered but might not have been removed */ + sort_uniforms_by_binding_and_remove_structs(s); + + /* CBVs */ + if (!emit_cbvs(ctx, s)) + return false; + + /* Samplers */ + binding = 0; + nir_foreach_variable_with_modes(var, s, nir_var_uniform) { + unsigned count = glsl_type_get_sampler_count(var->type); + if (var->data.mode == nir_var_uniform && count && + glsl_get_sampler_result_type(glsl_without_array(var->type)) == GLSL_TYPE_VOID) { + if (!emit_sampler(ctx, var, binding, count)) + return false; + binding += count; + } + } + + /* SRVs */ + binding = 0; + nir_foreach_variable_with_modes(var, s, nir_var_uniform) { + unsigned count = glsl_type_get_sampler_count(var->type); + if (var->data.mode == nir_var_uniform && count && + glsl_get_sampler_result_type(glsl_without_array(var->type)) != GLSL_TYPE_VOID) { + if (!emit_srv(ctx, var, binding, count)) + return false; + binding += count; + } + } + + nir_foreach_variable_with_modes(var, s, nir_var_uniform) { + unsigned count = glsl_type_get_image_count(var->type); + if (var->data.mode == nir_var_uniform && count) { + if (!emit_uav(ctx, var, count)) + return false; + } + } + + nir_function_impl *entry = nir_shader_get_entrypoint(s); + nir_metadata_require(entry, nir_metadata_block_index); + + assert(entry->num_blocks > 0); + ctx->mod.basic_block_ids = rzalloc_array(ctx->ralloc_ctx, int, + entry->num_blocks); + if (!ctx->mod.basic_block_ids) + return false; + + for (int i = 0; i < entry->num_blocks; ++i) + ctx->mod.basic_block_ids[i] = -1; + ctx->mod.num_basic_block_ids = entry->num_blocks; + + ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def, + entry->ssa_alloc); + if (!ctx->defs) + return false; + ctx->num_defs = entry->ssa_alloc; + + ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx); + if (!ctx->phis) + return false; + + prepare_phi_values(ctx, s); + + if (!emit_cf_list(ctx, &entry->body)) + return false; + + hash_table_foreach(ctx->phis, entry) { + fixup_phi(ctx, (nir_phi_instr *)entry->key, + (struct phi_block *)entry->data); + } + + if (!dxil_emit_ret_void(&ctx->mod)) + return false; + + if (s->info.stage == MESA_SHADER_FRAGMENT) { + nir_foreach_variable_with_modes(var, s, nir_var_shader_out) { + if (var->data.location == FRAG_RESULT_STENCIL) { + ctx->mod.feats.stencil_ref = true; + } + } + } + + if (ctx->mod.feats.native_low_precision) + ctx->mod.minor_version = max(ctx->mod.minor_version, 2); + + return emit_metadata(ctx, s) && + dxil_emit_module(&ctx->mod); +} + +unsigned int +get_dxil_shader_kind(struct nir_shader *s) +{ + switch (s->info.stage) { + case MESA_SHADER_VERTEX: + return DXIL_VERTEX_SHADER; + case MESA_SHADER_GEOMETRY: + return DXIL_GEOMETRY_SHADER; + case MESA_SHADER_FRAGMENT: + return DXIL_PIXEL_SHADER; + case MESA_SHADER_COMPUTE: + return DXIL_COMPUTE_SHADER; + default: + unreachable("unknown shader stage in nir_to_dxil"); + return DXIL_COMPUTE_SHADER; + } +} + +static unsigned +lower_bit_size_callback(const nir_instr* instr, void *data) +{ + if (instr->type != nir_instr_type_alu) + return 0; + const nir_alu_instr *alu = nir_instr_as_alu(instr); + + if (nir_op_infos[alu->op].is_conversion) + return 0; + + unsigned num_inputs = nir_op_infos[alu->op].num_inputs; + const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data; + unsigned min_bit_size = opts->lower_int16 ? 32 : 16; + + unsigned ret = 0; + for (unsigned i = 0; i < num_inputs; i++) { + unsigned bit_size = nir_src_bit_size(alu->src[i].src); + if (bit_size != 1 && bit_size < min_bit_size) + ret = min_bit_size; + } + + return ret; +} + +static void +optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts) +{ + bool progress; + do { + progress = false; + NIR_PASS_V(s, nir_lower_vars_to_ssa); + NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, UINT32_MAX); + NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL); + NIR_PASS(progress, s, nir_copy_prop); + NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts); + NIR_PASS(progress, s, dxil_nir_lower_8bit_conv); + if (opts->lower_int16) + NIR_PASS(progress, s, dxil_nir_lower_16bit_conv); + NIR_PASS(progress, s, nir_opt_remove_phis); + NIR_PASS(progress, s, nir_opt_dce); + NIR_PASS(progress, s, nir_opt_if, true); + NIR_PASS(progress, s, nir_opt_dead_cf); + NIR_PASS(progress, s, nir_opt_cse); + NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true); + NIR_PASS(progress, s, nir_opt_algebraic); + NIR_PASS(progress, s, dxil_nir_lower_x2b); + NIR_PASS(progress, s, nir_lower_alu); + NIR_PASS(progress, s, dxil_nir_lower_inot); + NIR_PASS(progress, s, nir_opt_constant_folding); + NIR_PASS(progress, s, nir_opt_undef); + NIR_PASS(progress, s, nir_opt_deref); + NIR_PASS_V(s, nir_lower_system_values); + } while (progress); + + do { + progress = false; + NIR_PASS(progress, s, nir_opt_algebraic_late); + } while (progress); +} + +static +void dxil_fill_validation_state(struct ntd_context *ctx, + nir_shader *s, + struct dxil_validation_state *state) +{ + state->num_resources = ctx->num_resources; + state->resources = ctx->resources; + state->state.psv0.max_expected_wave_lane_count = UINT_MAX; + state->state.shader_stage = (uint8_t)ctx->mod.shader_kind; + state->state.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs; + state->state.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs; + //state->state.sig_patch_const_or_prim_elements = 0; + + switch (ctx->mod.shader_kind) { + case DXIL_VERTEX_SHADER: + state->state.psv0.vs.output_position_present = ctx->mod.info.has_out_position; + break; + case DXIL_PIXEL_SHADER: + /* TODO: handle depth outputs */ + state->state.psv0.ps.depth_output = ctx->mod.info.has_out_depth; + /* just guessing */ + state->state.psv0.ps.sample_frequency = 0; + break; + case DXIL_COMPUTE_SHADER: + break; + case DXIL_GEOMETRY_SHADER: + state->state.max_vertex_count = s->info.gs.vertices_out; + state->state.psv0.gs.input_primitive = dxil_get_input_primitive(s->info.gs.input_primitive); + state->state.psv0.gs.output_toplology = dxil_get_primitive_topology(s->info.gs.output_primitive); + state->state.psv0.gs.output_stream_mask = s->info.gs.active_stream_mask; + state->state.psv0.gs.output_position_present = ctx->mod.info.has_out_position; + break; + default: + assert(0 && "Shader type not (yet) supported"); + } +} + +static nir_variable * +add_sysvalue(struct ntd_context *ctx, nir_shader *s, + uint8_t value, char *name, + int driver_location) +{ + + nir_variable *var = rzalloc(s, nir_variable); + if (!var) + return NULL; + var->data.driver_location = driver_location; + var->data.location = value; + var->type = glsl_uint_type(); + var->name = name; + var->data.mode = nir_var_system_value; + var->data.interpolation = INTERP_MODE_FLAT; + return var; +} + +static bool +append_input_or_sysvalue(struct ntd_context *ctx, nir_shader *s, + int input_loc, int sv_slot, + char *name, int driver_location) +{ + if (input_loc >= 0) { + /* Check inputs whether a variable is available the corresponds + * to the sysvalue */ + nir_foreach_variable_with_modes(var, s, nir_var_shader_in) { + if (var->data.location == input_loc) { + ctx->system_value[sv_slot] = var; + return true; + } + } + } + + ctx->system_value[sv_slot] = add_sysvalue(ctx, s, sv_slot, name, driver_location); + if (!ctx->system_value[sv_slot]) + return false; + + nir_shader_add_variable(s, ctx->system_value[sv_slot]); + return true; +} + +struct sysvalue_name { + gl_system_value value; + int slot; + char *name; +} possible_sysvalues[] = { + {SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID"}, + {SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID"}, + {SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace"}, + {SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID"}, +}; + +static bool +allocate_sysvalues(struct ntd_context *ctx, nir_shader *s) +{ + unsigned driver_location = 0; + nir_foreach_variable_with_modes(var, s, nir_var_shader_in) + driver_location++; + nir_foreach_variable_with_modes(var, s, nir_var_system_value) + driver_location++; + + for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) { + struct sysvalue_name *info = &possible_sysvalues[i]; + if ((1 << info->value) & s->info.system_values_read) { + if (!append_input_or_sysvalue(ctx, s, info->slot, + info->value, info->name, + driver_location++)) + return false; + } + } + return true; +} + +bool +nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts, + struct blob *blob) +{ + assert(opts); + bool retval = true; + debug_dxil = (int)debug_get_option_debug_dxil(); + + struct ntd_context *ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return false; + + ctx->opts = opts; + + ctx->ralloc_ctx = ralloc_context(NULL); + if (!ctx->ralloc_ctx) { + retval = false; + goto out; + } + + dxil_module_init(&ctx->mod, ctx->ralloc_ctx); + ctx->mod.shader_kind = get_dxil_shader_kind(s); + ctx->mod.major_version = 6; + ctx->mod.minor_version = 1; + + NIR_PASS_V(s, nir_lower_pack); + NIR_PASS_V(s, nir_lower_frexp); + NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true); + + optimize_nir(s, opts); + + NIR_PASS_V(s, nir_remove_dead_variables, + nir_var_function_temp | nir_var_shader_temp, NULL); + + if (!allocate_sysvalues(ctx, s)) + return false; + + if (debug_dxil & DXIL_DEBUG_VERBOSE) + nir_print_shader(s, stderr); + + if (!emit_module(ctx, s)) { + debug_printf("D3D12: dxil_container_add_module failed\n"); + retval = false; + goto out; + } + + if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) { + struct dxil_dumper *dumper = dxil_dump_create(); + dxil_dump_module(dumper, &ctx->mod); + fprintf(stderr, "\n"); + dxil_dump_buf_to_file(dumper, stderr); + fprintf(stderr, "\n\n"); + dxil_dump_free(dumper); + } + + struct dxil_container container; + dxil_container_init(&container); + if (!dxil_container_add_features(&container, &ctx->mod.feats)) { + debug_printf("D3D12: dxil_container_add_features failed\n"); + retval = false; + goto out; + } + + if (!dxil_container_add_io_signature(&container, + DXIL_ISG1, + ctx->mod.num_sig_inputs, + ctx->mod.inputs)) { + debug_printf("D3D12: failed to write input signature\n"); + retval = false; + goto out; + } + + if (!dxil_container_add_io_signature(&container, + DXIL_OSG1, + ctx->mod.num_sig_outputs, + ctx->mod.outputs)) { + debug_printf("D3D12: failed to write output signature\n"); + retval = false; + goto out; + } + + struct dxil_validation_state validation_state; + memset(&validation_state, 0, sizeof(validation_state)); + dxil_fill_validation_state(ctx, s, &validation_state); + + if (!dxil_container_add_state_validation(&container,&ctx->mod, + &validation_state)) { + debug_printf("D3D12: failed to write state-validation\n"); + retval = false; + goto out; + } + + if (!dxil_container_add_module(&container, &ctx->mod)) { + debug_printf("D3D12: failed to write module\n"); + retval = false; + goto out; + } + + blob_init(blob); + if (!dxil_container_write(&container, blob)) { + debug_printf("D3D12: dxil_container_write failed\n"); + retval = false; + goto out; + } + dxil_container_finish(&container); + + if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) { + static int shader_id = 0; + char buffer[64]; + snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob", + get_shader_kind_str(ctx->mod.shader_kind), shader_id++); + debug_printf("Try to write blob to %s\n", buffer); + FILE *f = fopen(buffer, "wb"); + if (f) { + fwrite(blob->data, 1, blob->size, f); + fclose(f); + } + } + +out: + dxil_module_release(&ctx->mod); + ralloc_free(ctx->ralloc_ctx); + free(ctx); + return retval; +} + +static const char *generics_semantics[] = { + "GENERICAA", "GENERICAB", "GENERICAC", "GENERICAD", + "GENERICAE", "GENERICAF", "GENERICAG", "GENERICAH", + "GENERICBA", "GENERICBB", "GENERICBC", "GENERICBD", + "GENERICBE", "GENERICBF", "GENERICBG", "GENERICBH", + "GENERICCA", "GENERICCB", "GENERICCC", "GENERICCD", + "GENERICCE", "GENERICCF", "GENERICCG", "GENERICCH", + "GENERICDA", "GENERICDB", "GENERICDC", "GENERICDD", + "GENERICDE", "GENERICDF", "GENERICDG", "GENERICDH" +}; + +const char * +dxil_vs_attr_index_to_name(unsigned index) +{ + assert(index < 32); + return generics_semantics[index]; +} + +enum dxil_sysvalue_type +nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask) +{ + switch (var->data.location) { + case VARYING_SLOT_FACE: + return DXIL_GENERATED_SYSVALUE; + case VARYING_SLOT_POS: + case VARYING_SLOT_PRIMITIVE_ID: + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + case VARYING_SLOT_PSIZ: + if (!((1 << var->data.location) & other_stage_mask)) + return DXIL_SYSVALUE; + /* fallthrough */ + default: + return DXIL_NO_SYSVALUE; + } +} diff --git a/src/microsoft/compiler/nir_to_dxil.h b/src/microsoft/compiler/nir_to_dxil.h new file mode 100644 index 00000000000..d0d7d163f9b --- /dev/null +++ b/src/microsoft/compiler/nir_to_dxil.h @@ -0,0 +1,68 @@ +/* + * Copyright © Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef NIR_TO_DXIL_H +#define NIR_TO_DXIL_H + +#include + +#include "nir.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct blob; + +const char * +dxil_vs_attr_index_to_name(unsigned index); + +enum dxil_sysvalue_type { + DXIL_NO_SYSVALUE = 0, + DXIL_SYSVALUE, + DXIL_GENERATED_SYSVALUE +}; + +enum dxil_sysvalue_type +nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask); + +struct nir_to_dxil_options { + bool interpolate_at_vertex; + bool lower_int16; + bool disable_math_refactoring; + unsigned ubo_binding_offset; + unsigned provoking_vertex; +}; + +bool +nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts, + struct blob *blob); + +const nir_shader_compiler_options* +dxil_get_nir_compiler_options(void); + +#ifdef __cplusplus +} +#endif + +#endif