diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 345bb25a1c9..2054fd761d6 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1803,7 +1803,8 @@ typedef struct nir_io_semantics { unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */ unsigned medium_precision:1; /* GLSL mediump qualifier */ unsigned per_view:1; - unsigned _pad:7; + unsigned high_16bits:1; /* whether accessing low or high half of the slot */ + unsigned _pad:6; } nir_io_semantics; #define NIR_INTRINSIC_MAX_INPUTS 11 diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 5fbaecbf675..0b014411ce9 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -288,11 +288,13 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, void *dead_ctx) { uint64_t slot_mask = 0; + uint16_t slot_mask_16bit = 0; if (nir_intrinsic_infos[instr->intrinsic].index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0) { nir_io_semantics semantics = nir_intrinsic_io_semantics(instr); - if (semantics.location >= VARYING_SLOT_PATCH0) { + if (semantics.location >= VARYING_SLOT_PATCH0 && + semantics.location <= VARYING_SLOT_PATCH31) { /* Generic per-patch I/O. */ assert((shader->info.stage == MESA_SHADER_TESS_EVAL && instr->intrinsic == nir_intrinsic_load_input) || @@ -303,8 +305,16 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, semantics.location -= VARYING_SLOT_PATCH0; } - slot_mask = BITFIELD64_RANGE(semantics.location, semantics.num_slots); - assert(util_bitcount64(slot_mask) == semantics.num_slots); + if (semantics.location >= VARYING_SLOT_VAR0_16BIT && + semantics.location <= VARYING_SLOT_VAR15_16BIT) { + /* Convert num_slots from the units of half vectors to full vectors. */ + unsigned num_slots = (semantics.num_slots + semantics.high_16bits + 1) / 2; + slot_mask_16bit = + BITFIELD_RANGE(semantics.location - VARYING_SLOT_VAR0_16BIT, num_slots); + } else { + slot_mask = BITFIELD64_RANGE(semantics.location, semantics.num_slots); + assert(util_bitcount64(slot_mask) == semantics.num_slots); + } } switch (instr->intrinsic) { @@ -373,8 +383,11 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, shader->info.patch_inputs_read_indirectly |= slot_mask; } else { shader->info.inputs_read |= slot_mask; - if (!nir_src_is_const(*nir_get_io_offset_src(instr))) + shader->info.inputs_read_16bit |= slot_mask_16bit; + if (!nir_src_is_const(*nir_get_io_offset_src(instr))) { shader->info.inputs_read_indirectly |= slot_mask; + shader->info.inputs_read_indirectly_16bit |= slot_mask_16bit; + } } if (shader->info.stage == MESA_SHADER_TESS_CTRL && @@ -392,8 +405,11 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, shader->info.patch_outputs_accessed_indirectly |= slot_mask; } else { shader->info.outputs_read |= slot_mask; - if (!nir_src_is_const(*nir_get_io_offset_src(instr))) + shader->info.outputs_read_16bit |= slot_mask_16bit; + if (!nir_src_is_const(*nir_get_io_offset_src(instr))) { shader->info.outputs_accessed_indirectly |= slot_mask; + shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit; + } } if (shader->info.stage == MESA_SHADER_TESS_CTRL && @@ -415,8 +431,11 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, shader->info.patch_outputs_accessed_indirectly |= slot_mask; } else { shader->info.outputs_written |= slot_mask; - if (!nir_src_is_const(*nir_get_io_offset_src(instr))) + shader->info.outputs_written_16bit |= slot_mask_16bit; + if (!nir_src_is_const(*nir_get_io_offset_src(instr))) { shader->info.outputs_accessed_indirectly |= slot_mask; + shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit; + } } if (shader->info.stage == MESA_SHADER_FRAGMENT && @@ -839,6 +858,11 @@ nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) shader->info.inputs_read = 0; shader->info.outputs_written = 0; shader->info.outputs_read = 0; + shader->info.inputs_read_16bit = 0; + shader->info.outputs_written_16bit = 0; + shader->info.outputs_read_16bit = 0; + shader->info.inputs_read_indirectly_16bit = 0; + shader->info.outputs_accessed_indirectly_16bit = 0; shader->info.patch_outputs_read = 0; shader->info.patch_inputs_read = 0; shader->info.patch_outputs_written = 0; diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index af740daae5b..4ceac077da1 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -985,10 +985,12 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) } fprintf(fp, ")"); } - if (state->shader->info.stage == MESA_SHADER_FRAGMENT && - nir_intrinsic_io_semantics(instr).medium_precision) { + if (nir_intrinsic_io_semantics(instr).medium_precision) { fprintf(fp, " mediump"); } + if (nir_intrinsic_io_semantics(instr).high_16bits) { + fprintf(fp, " high_16bits"); + } } break; diff --git a/src/compiler/shader_enums.h b/src/compiler/shader_enums.h index c44f4e32f62..8883e8d2aa7 100644 --- a/src/compiler/shader_enums.h +++ b/src/compiler/shader_enums.h @@ -333,11 +333,74 @@ typedef enum VARYING_SLOT_VAR29, VARYING_SLOT_VAR30, VARYING_SLOT_VAR31, + /* Per-patch varyings for tessellation. */ + VARYING_SLOT_PATCH0, + VARYING_SLOT_PATCH1, + VARYING_SLOT_PATCH2, + VARYING_SLOT_PATCH3, + VARYING_SLOT_PATCH4, + VARYING_SLOT_PATCH5, + VARYING_SLOT_PATCH6, + VARYING_SLOT_PATCH7, + VARYING_SLOT_PATCH8, + VARYING_SLOT_PATCH9, + VARYING_SLOT_PATCH10, + VARYING_SLOT_PATCH11, + VARYING_SLOT_PATCH12, + VARYING_SLOT_PATCH13, + VARYING_SLOT_PATCH14, + VARYING_SLOT_PATCH15, + VARYING_SLOT_PATCH16, + VARYING_SLOT_PATCH17, + VARYING_SLOT_PATCH18, + VARYING_SLOT_PATCH19, + VARYING_SLOT_PATCH20, + VARYING_SLOT_PATCH21, + VARYING_SLOT_PATCH22, + VARYING_SLOT_PATCH23, + VARYING_SLOT_PATCH24, + VARYING_SLOT_PATCH25, + VARYING_SLOT_PATCH26, + VARYING_SLOT_PATCH27, + VARYING_SLOT_PATCH28, + VARYING_SLOT_PATCH29, + VARYING_SLOT_PATCH30, + VARYING_SLOT_PATCH31, + /* 32 16-bit vec4 slots packed in 16 32-bit vec4 slots for GLES/mediump. + * They are really just additional generic slots used for 16-bit data to + * prevent conflicts between neighboring mediump and non-mediump varyings + * that can't be packed without breaking one or the other, which is + * a limitation of separate shaders. This allows linking shaders in 32 bits + * and then get an optimally packed 16-bit varyings by remapping the IO + * locations to these slots. The remapping can also be undone trivially. + * + * nir_io_semantics::high_16bit determines which half of the slot is + * accessed. The low and high halves share the same IO "base" number. + * Drivers can treat these as 32-bit slots everywhere except for FP16 + * interpolation. + */ + VARYING_SLOT_VAR0_16BIT, + VARYING_SLOT_VAR1_16BIT, + VARYING_SLOT_VAR2_16BIT, + VARYING_SLOT_VAR3_16BIT, + VARYING_SLOT_VAR4_16BIT, + VARYING_SLOT_VAR5_16BIT, + VARYING_SLOT_VAR6_16BIT, + VARYING_SLOT_VAR7_16BIT, + VARYING_SLOT_VAR8_16BIT, + VARYING_SLOT_VAR9_16BIT, + VARYING_SLOT_VAR10_16BIT, + VARYING_SLOT_VAR11_16BIT, + VARYING_SLOT_VAR12_16BIT, + VARYING_SLOT_VAR13_16BIT, + VARYING_SLOT_VAR14_16BIT, + VARYING_SLOT_VAR15_16BIT, + + NUM_TOTAL_VARYING_SLOTS, } gl_varying_slot; #define VARYING_SLOT_MAX (VARYING_SLOT_VAR0 + MAX_VARYING) -#define VARYING_SLOT_PATCH0 (VARYING_SLOT_MAX) #define VARYING_SLOT_TESS_MAX (VARYING_SLOT_PATCH0 + MAX_VARYING) #define MAX_VARYINGS_INCL_PATCH (VARYING_SLOT_TESS_MAX - VARYING_SLOT_VAR0) diff --git a/src/compiler/shader_info.h b/src/compiler/shader_info.h index ebc2c43923f..d0c0dd28ddd 100644 --- a/src/compiler/shader_info.h +++ b/src/compiler/shader_info.h @@ -152,6 +152,15 @@ typedef struct shader_info { /* Which system values are actually read */ BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX); + /* Which 16-bit inputs and outputs are used corresponding to + * VARYING_SLOT_VARn_16BIT. + */ + uint16_t inputs_read_16bit; + uint16_t outputs_written_16bit; + uint16_t outputs_read_16bit; + uint16_t inputs_read_indirectly_16bit; + uint16_t outputs_accessed_indirectly_16bit; + /* Which patch inputs are actually read */ uint32_t patch_inputs_read; /* Which patch outputs are actually written */