From b2e1fc8976de00fb0924c08d6556106b44d2268b Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Fri, 25 Sep 2020 17:43:33 -0500 Subject: [PATCH] nir: Add a pass to lower vec3s to vec4s LLVM loves take advantage of the fact that vec3s in OpenCL are 16B aligned and so it can just read/write them as vec4s. This results in a LOT of vec4->vec3 casts on loads and stores. One solution to this problem is to get rid of all vec3 variables. Reviewed-by: Jesse Natalie Part-of: --- src/compiler/Makefile.sources | 1 + src/compiler/glsl_types.cpp | 68 +++++++++ src/compiler/glsl_types.h | 2 + src/compiler/nir/meson.build | 1 + src/compiler/nir/nir.h | 2 + src/compiler/nir/nir_lower_vec3_to_vec4.c | 166 ++++++++++++++++++++++ src/compiler/nir_types.cpp | 6 + src/compiler/nir_types.h | 2 + 8 files changed, 248 insertions(+) create mode 100644 src/compiler/nir/nir_lower_vec3_to_vec4.c diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index aaea6a08d6b..cd303e5341c 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -302,6 +302,7 @@ NIR_FILES = \ nir/nir_lower_vars_to_ssa.c \ nir/nir_lower_var_copies.c \ nir/nir_lower_vec_to_movs.c \ + nir/nir_lower_vec3_to_vec4.c \ nir/nir_lower_viewport_transform.c \ nir/nir_lower_wpos_center.c \ nir/nir_lower_wpos_ytransform.c \ diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index e160a479cbd..ff3ed678680 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -2540,6 +2540,74 @@ glsl_type::get_explicit_type_for_size_align(glsl_type_size_align_func type_info, } } +const glsl_type * +glsl_type::replace_vec3_with_vec4() const +{ + if (this->is_scalar() || this->is_vector() || this->is_matrix()) { + if (this->interface_row_major) { + if (this->matrix_columns == 3) { + return glsl_type::get_instance(this->base_type, + this->vector_elements, + 4, /* matrix columns */ + this->explicit_stride, + this->interface_row_major, + this->explicit_alignment); + } else { + return this; + } + } else { + if (this->vector_elements == 3) { + return glsl_type::get_instance(this->base_type, + 4, /* vector elements */ + this->matrix_columns, + this->explicit_stride, + this->interface_row_major, + this->explicit_alignment); + } else { + return this; + } + } + } else if (this->is_array()) { + const glsl_type *vec4_elem_type = + this->fields.array->replace_vec3_with_vec4(); + if (vec4_elem_type == this->fields.array) + return this; + return glsl_type::get_array_instance(vec4_elem_type, + this->length, + this->explicit_stride); + } else if (this->is_struct() || this->is_interface()) { + struct glsl_struct_field *fields = (struct glsl_struct_field *) + malloc(sizeof(struct glsl_struct_field) * this->length); + + bool needs_new_type = false; + for (unsigned i = 0; i < this->length; i++) { + fields[i] = this->fields.structure[i]; + assert(fields[i].matrix_layout != GLSL_MATRIX_LAYOUT_ROW_MAJOR); + fields[i].type = fields[i].type->replace_vec3_with_vec4(); + if (fields[i].type != this->fields.structure[i].type) + needs_new_type = true; + } + + const glsl_type *type; + if (!needs_new_type) { + type = this; + } else if (this->is_struct()) { + type = get_struct_instance(fields, this->length, this->name, + this->packed, this->explicit_alignment); + } else { + assert(!this->packed); + type = get_interface_instance(fields, this->length, + (enum glsl_interface_packing)this->interface_packing, + this->interface_row_major, + this->name); + } + free(fields); + return type; + } else { + unreachable("Unhandled type."); + } +} + unsigned glsl_type::count_vec4_slots(bool is_gl_vertex_input, bool is_bindless) const { diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h index d3ca3c86d5e..744b01dca9a 100644 --- a/src/compiler/glsl_types.h +++ b/src/compiler/glsl_types.h @@ -647,6 +647,8 @@ public: const glsl_type *get_explicit_type_for_size_align(glsl_type_size_align_func type_info, unsigned *size, unsigned *align) const; + const glsl_type *replace_vec3_with_vec4() const; + /** * Alignment in bytes of the start of this type in OpenCL memory. */ diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build index eff000ee066..56db0a1c328 100644 --- a/src/compiler/nir/meson.build +++ b/src/compiler/nir/meson.build @@ -180,6 +180,7 @@ files_libnir = files( 'nir_lower_vars_to_ssa.c', 'nir_lower_var_copies.c', 'nir_lower_vec_to_movs.c', + 'nir_lower_vec3_to_vec4.c', 'nir_lower_viewport_transform.c', 'nir_lower_wpos_center.c', 'nir_lower_wpos_ytransform.c', diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index ef4d33ab45f..28b9a6eb8ea 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -4234,6 +4234,8 @@ nir_lower_vars_to_explicit_types(nir_shader *shader, bool nir_lower_mem_constant_vars(nir_shader *shader, glsl_type_size_align_func type_info); +bool nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes); + typedef enum { /** * An address format which is a simple 32-bit global GPU address. diff --git a/src/compiler/nir/nir_lower_vec3_to_vec4.c b/src/compiler/nir/nir_lower_vec3_to_vec4.c new file mode 100644 index 00000000000..705140e4bc1 --- /dev/null +++ b/src/compiler/nir/nir_lower_vec3_to_vec4.c @@ -0,0 +1,166 @@ +/* + * Copyright © 2020 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir_builder.h" + +static bool +lower_vec3_to_vec4_impl(nir_function_impl *impl, nir_variable_mode modes) +{ + bool progress = false; + + if (modes & nir_var_function_temp) { + nir_foreach_function_temp_variable(var, impl) { + const struct glsl_type *vec4_type = + glsl_type_replace_vec3_with_vec4(var->type); + if (var->type != vec4_type) { + var->type = vec4_type; + progress = true; + } + } + } + + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(block, impl) { + nir_foreach_instr_safe(instr, block) { + switch (instr->type) { + case nir_instr_type_deref: { + nir_deref_instr *deref = nir_instr_as_deref(instr); + if (!(deref->mode & modes)) + continue; + + assert(!(deref->mode & ~modes)); + const struct glsl_type *vec4_type = + glsl_type_replace_vec3_with_vec4(deref->type); + if (deref->type != vec4_type) { + deref->type = vec4_type; + progress = true; + } + break; + } + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_load_deref: { + if (intrin->num_components != 3) + break; + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (!(deref->mode & modes)) + break; + + assert(intrin->dest.is_ssa); + intrin->num_components = 4; + intrin->dest.ssa.num_components = 4; + + b.cursor = nir_after_instr(&intrin->instr); + nir_ssa_def *vec3 = nir_channels(&b, &intrin->dest.ssa, 0x7); + nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, + nir_src_for_ssa(vec3), + vec3->parent_instr); + progress = true; + break; + } + + case nir_intrinsic_store_deref: { + if (intrin->num_components != 3) + break; + + nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); + if (!(deref->mode & modes)) + break; + + assert(intrin->src[1].is_ssa); + nir_ssa_def *data = intrin->src[1].ssa; + + b.cursor = nir_before_instr(&intrin->instr); + unsigned swiz[] = { 0, 1, 2, 2 }; + data = nir_swizzle(&b, data, swiz, 4); + + intrin->num_components = 4; + nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], + nir_src_for_ssa(data)); + progress = true; + break; + } + + case nir_intrinsic_copy_deref: { + ASSERTED nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); + ASSERTED nir_deref_instr *src = nir_src_as_deref(intrin->src[0]); + /* If we convert once side of a copy and not the other, that + * would be very bad. + */ + assert(!(src->mode & modes) == !(dst->mode & modes)); + break; + } + + default: + break; + } + break; + } + + default: + break; + } + } + } + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } else { + nir_metadata_preserve(impl, nir_metadata_all); + } + + return progress; +} + +bool +nir_lower_vec3_to_vec4(nir_shader *shader, nir_variable_mode modes) +{ + bool progress = false; + + if (modes & ~nir_var_function_temp) { + nir_foreach_variable_in_shader(var, shader) { + if (!(var->data.mode & modes)) + continue; + + const struct glsl_type *vec4_type = + glsl_type_replace_vec3_with_vec4(var->type); + if (var->type != vec4_type) { + var->type = vec4_type; + progress = true; + } + } + } + + nir_foreach_function(function, shader) { + if (function->impl && lower_vec3_to_vec4_impl(function->impl, modes)) + progress = true; + } + + return progress; +} diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index 698706e883c..e185d9f61bd 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -930,3 +930,9 @@ glsl_get_explicit_type_for_size_align(const struct glsl_type *type, { return type->get_explicit_type_for_size_align(type_info, size, align); } + +const struct glsl_type * +glsl_type_replace_vec3_with_vec4(const struct glsl_type *type) +{ + return type->replace_vec3_with_vec4(); +} diff --git a/src/compiler/nir_types.h b/src/compiler/nir_types.h index 6e778153b21..2e085efaed9 100644 --- a/src/compiler/nir_types.h +++ b/src/compiler/nir_types.h @@ -234,6 +234,8 @@ const struct glsl_type *glsl_get_explicit_type_for_size_align(const struct glsl_ glsl_type_size_align_func type_info, unsigned *size, unsigned *align); +const struct glsl_type *glsl_type_replace_vec3_with_vec4(const struct glsl_type *type); + unsigned glsl_type_get_sampler_count(const struct glsl_type *type); unsigned glsl_type_get_image_count(const struct glsl_type *type);