v3d: Move the stores for fixed function VS output reads into NIR.

This lets us emit the VPM_WRITEs directly from nir_intrinsic_store_output() (useful once NIR scheduling is in place so that we can reduce register pressure), and lets future NIR scheduling schedule the math to generate them. Even in the meantime, it looks like this lets NIR DCE some more code and make better decisions. total instructions in shared programs: 6429246 -> 6412976 (-0.25%) total threads in shared programs: 153924 -> 153934 (<.01%) total loops in shared programs: 486 -> 483 (-0.62%) total uniforms in shared programs: 2385436 -> 2388195 (0.12%) Acked-by: Ian Romanick <ian.d.romanick@intel.com> (nir)
2019-02-22 14:26:26 -08:00
parent a9dd227a47
commit 2780a99ff8
5 changed files with 343 additions and 195 deletions
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -563,7 +563,7 @@ struct v3d_compile {
        int local_invocation_index_bits;

        uint8_t vattr_sizes[V3D_MAX_VS_INPUTS / 4];
-        uint32_t num_vpm_writes;
+        uint32_t vpm_output_size;

        /* Size in bytes of registers that have been spilled. This is how much
         * space needs to be available in the spill BO per thread per QPU.
@@ -607,10 +607,8 @@ struct v3d_compile {
        enum quniform_contents *uniform_contents;
        uint32_t uniform_array_size;
        uint32_t num_uniforms;
-        uint32_t num_outputs;
        uint32_t output_position_index;
        nir_variable *output_color_var[4];
-        uint32_t output_point_size_index;
        uint32_t output_sample_mask_index;

        struct qreg undef;