i965: Compile fp64 software routines and lower double-ops

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
2018-12-10 11:06:16 -08:00
parent 18b4e87370
commit 613ac3aaa2
4 changed files with 133 additions and 23 deletions
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -612,15 +612,6 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
      }
      OPT(nir_opt_remove_phis);
      OPT(nir_opt_undef);
-      OPT(nir_lower_doubles, nir_lower_drcp |
-                             nir_lower_dsqrt |
-                             nir_lower_drsq |
-                             nir_lower_dtrunc |
-                             nir_lower_dfloor |
-                             nir_lower_dceil |
-                             nir_lower_dfract |
-                             nir_lower_dround_even |
-                             nir_lower_dmod);
      OPT(nir_lower_pack);
   } while (progress);

@@ -668,6 +659,76 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)

   const bool is_scalar = compiler->scalar_stage[nir->info.stage];

+   if (is_scalar) {
+      OPT(nir_lower_alu_to_scalar);
+   }
+
+   /* Run opt_algebraic before int64 lowering so we can hopefully get rid
+    * of some int64 instructions.
+    */
+   OPT(nir_opt_algebraic);
+
+   /* Lower 64-bit operations before nir_optimize so that loop unrolling sees
+    * their actual cost.
+    */
+   nir_lower_int64_options int64_options =
+      nir_lower_imul64 |
+      nir_lower_isign64 |
+      nir_lower_divmod64 |
+      nir_lower_imul_high64;
+   nir_lower_doubles_options fp64_options =
+      nir_lower_drcp |
+      nir_lower_dsqrt |
+      nir_lower_drsq |
+      nir_lower_dtrunc |
+      nir_lower_dfloor |
+      nir_lower_dceil |
+      nir_lower_dfract |
+      nir_lower_dround_even |
+      nir_lower_dmod;
+
+   if (!devinfo->has_64bit_types) {
+      int64_options |= nir_lower_mov64 |
+                       nir_lower_icmp64 |
+                       nir_lower_iadd64 |
+                       nir_lower_iabs64 |
+                       nir_lower_ineg64 |
+                       nir_lower_logic64 |
+                       nir_lower_minmax64 |
+                       nir_lower_shift64;
+      fp64_options |= nir_lower_fp64_full_software;
+   }
+
+   bool lowered_64bit_ops = false;
+   do {
+      progress = false;
+
+      OPT(nir_lower_int64, int64_options);
+      OPT(nir_lower_doubles, fp64_options);
+
+      /* Necessary to lower add -> sub and div -> mul/rcp */
+      OPT(nir_opt_algebraic);
+
+      lowered_64bit_ops |= progress;
+   } while (progress);
+
+   if (lowered_64bit_ops) {
+      OPT(nir_lower_constant_initializers, nir_var_function);
+      OPT(nir_lower_returns);
+      OPT(nir_inline_functions);
+      OPT(nir_opt_deref);
+   }
+
+   const nir_function *entry_point = nir_shader_get_entrypoint(nir)->function;
+   foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
+      if (func != entry_point) {
+         exec_node_remove(&func->node);
+      }
+   }
+   assert(exec_list_length(&nir->functions) == 1);
+
+   OPT(nir_lower_constant_initializers, ~nir_var_function);
+
   if (nir->info.stage == MESA_SHADER_GEOMETRY)
      OPT(nir_lower_gs_intrinsics);

@@ -694,19 +755,6 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
   OPT(nir_split_var_copies);
   OPT(nir_split_struct_vars, nir_var_function);

-   /* Run opt_algebraic before int64 lowering so we can hopefully get rid
-    * of some int64 instructions.
-    */
-   OPT(nir_opt_algebraic);
-
-   /* Lower int64 instructions before nir_optimize so that loop unrolling
-    * sees their actual cost.
-    */
-   OPT(nir_lower_int64, nir_lower_imul64 |
-                        nir_lower_isign64 |
-                        nir_lower_divmod64 |
-                        nir_lower_imul_high64);
-
   nir = brw_nir_optimize(nir, compiler, is_scalar, true);

   /* This needs to be run after the first optimization pass but before we
--- a/src/mesa/drivers/dri/i965/Makefile.am
+++ b/src/mesa/drivers/dri/i965/Makefile.am
@@ -26,6 +26,7 @@ include Makefile.sources
 AM_CFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/src/ \
+	-I$(top_builddir)/src/ \
 	-I$(top_srcdir)/src/mapi \
 	-I$(top_srcdir)/src/mesa/ \
 	-I$(top_srcdir)/src/gallium/include \
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -41,6 +41,7 @@
 #include "util/ralloc.h"
 #include "compiler/glsl/ir.h"
 #include "compiler/glsl/glsl_to_nir.h"
+#include "compiler/glsl/float64_glsl.h"

 #include "brw_program.h"
 #include "brw_context.h"
@@ -53,6 +54,9 @@
 #include "brw_vs.h"
 #include "brw_wm.h"

+#include "main/shaderapi.h"
+#include "main/shaderobj.h"
+
 static bool
 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
 {
@@ -67,6 +71,54 @@ brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
   }
 }

+static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
+                                        GLuint id, bool is_arb_asm);
+
+static nir_shader *
+compile_fp64_funcs(struct gl_context *ctx,
+                   const nir_shader_compiler_options *options,
+                   void *mem_ctx,
+                   gl_shader_stage stage)
+{
+   const GLuint name = ~0;
+   struct gl_shader *sh;
+
+   sh = _mesa_new_shader(name, stage);
+
+   sh->Source = float64_source;
+   sh->CompileStatus = COMPILE_FAILURE;
+   _mesa_compile_shader(ctx, sh);
+
+   if (!sh->CompileStatus) {
+      if (sh->InfoLog) {
+         _mesa_problem(ctx,
+                       "fp64 software impl compile failed:\n%s\nsource:\n%s\n",
+                       sh->InfoLog, float64_source);
+      }
+   }
+
+   struct gl_shader_program *sh_prog;
+   sh_prog = _mesa_new_shader_program(name);
+   sh_prog->Label = NULL;
+   sh_prog->NumShaders = 1;
+   sh_prog->Shaders = malloc(sizeof(struct gl_shader *));
+   sh_prog->Shaders[0] = sh;
+
+   struct gl_linked_shader *linked = rzalloc(NULL, struct gl_linked_shader);
+   linked->Stage = stage;
+   linked->Program =
+      brwNewProgram(ctx,
+                    _mesa_shader_stage_to_program(stage),
+                    name, false);
+
+   linked->ir = sh->ir;
+   sh_prog->_LinkedShaders[stage] = linked;
+
+   nir_shader *nir = glsl_to_nir(sh_prog, stage, options);
+
+   return nir_shader_clone(mem_ctx, nir);
+}
+
 nir_shader *
 brw_create_nir(struct brw_context *brw,
               const struct gl_shader_program *shader_prog,
@@ -101,6 +153,15 @@ brw_create_nir(struct brw_context *brw,
   }
   nir_validate_shader(nir, "before brw_preprocess_nir");

+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+   if (nir->info.uses_64bit) {
+      nir_shader *fp64 = compile_fp64_funcs(ctx, options, ralloc_parent(nir), stage);
+
+      nir_validate_shader(fp64, "fp64");
+      exec_list_append(&nir->functions, &fp64->functions);
+   }
+
   nir = brw_preprocess_nir(brw->screen->compiler, nir);

   NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
--- a/src/mesa/drivers/dri/i965/meson.build
+++ b/src/mesa/drivers/dri/i965/meson.build
@@ -213,7 +213,7 @@ endif
 libi965 = static_library(
  'i965',
  [files_i965, i965_oa_sources, ir_expression_operation_h,
-   xmlpool_options_h],
+   xmlpool_options_h, float64_glsl_h],
  include_directories : [
    inc_common, inc_intel, inc_dri_common, inc_util, inc_drm_uapi,
  ],