From 73188c6954299d57c5b3eb30c514977895283b66 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Fri, 18 Jun 2021 08:16:18 -0500
Subject: [PATCH] nir,docs: Add docs for NIR ALU instructions

About half or more of the text here is actually from Connor Abbot.  I've
edited it a bit to bring it up-to-date and make a few things more clear.

Reviewed-by: Connor Abbott <cwabbott0@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11438>
---
 docs/doxygen-wrapper.py |  1 +
 docs/index.rst          |  1 +
 docs/nir/alu.rst        | 69 +++++++++++++++++++++++++++++++++++++++++
 docs/nir/index.rst      | 13 ++++++++
 src/compiler/nir/nir.h  | 56 ++++++++++++++++++++++++++-------
 5 files changed, 128 insertions(+), 12 deletions(-)
 create mode 100644 docs/nir/alu.rst
 create mode 100644 docs/nir/index.rst

diff --git a/docs/doxygen-wrapper.py b/docs/doxygen-wrapper.py
index 49735b3504c..090b5bcd757 100755
--- a/docs/doxygen-wrapper.py
+++ b/docs/doxygen-wrapper.py
@@ -29,6 +29,7 @@ import subprocess
 import tempfile
 
 INPUT_PATHS = [
+    'src/compiler/nir/nir.h',
     'src/intel/isl',
 ]
 
diff --git a/docs/index.rst b/docs/index.rst
index cbb5d6f8879..1cad5402dc9 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -111,6 +111,7 @@ Linux, FreeBSD, and other operating systems.
    release-calendar
    dispatch
    gallium/index
+   nir/index
    isl/index
    android
    macos
diff --git a/docs/nir/alu.rst b/docs/nir/alu.rst
new file mode 100644
index 00000000000..315b99cf29e
--- /dev/null
+++ b/docs/nir/alu.rst
@@ -0,0 +1,69 @@
+NIR ALU Instructions
+====================
+
+ALU instructions represent simple operations, such as addition, multiplication,
+comparison, etc., that take a certain number of arguments and return a result
+that only depends on the arguments.  ALU instructions in NIR must be pure in
+the sense that they have no side effect and that identical inputs yields an
+identical output.  A good rule of thumb is that only things which can be
+constant folded should be ALU operations.  If it can't be constant folded, then
+it should probably be an intrinsic instead.
+
+Each ALU instruction has an opcode, which is a member of the :cpp:enum:`nir_op`
+enum, that describes what it does as well as how many arguments it takes.
+Associated with each opcode is an metadata structure,
+:cpp:struct:`nir_op_info`, which shows how many arguments the opcode takes,
+information about data types, and algebraic properties such as associativity
+and commutivity. The info structure for each opcode may be accessed through
+a global :cpp:var:`nir_op_infos` array that’s indexed by the opcode.
+
+ALU operations are typeless, meaning that they're only defined to convert
+a certain bit-pattern input to another bit-pattern output.  The only concrete
+notion of types for a NIR SSA value or register is that each value has a number
+of vector components and a bit-size.  How that data is interpreted is entirely
+controlled by the opcode.  NIR doesn't have opcodes for ``intBitsToFloat()``
+and friends because they are implicit.
+
+Even though ALU operations are typeless, each opcode also has an "ALU type"
+metadata for each of the sources and the destination which can be
+floating-point, boolean, integer, or unsigned integer.  The ALU type mainly
+helps back-ends which want to handle all conversion instructions, for instance,
+in a single switch case.  They're also important when a back-end requests the
+absolute value, negate, and saturate modifiers (not used by core NIR).  In that
+case, modifiers are interpreted with respect to the ALU type on the source or
+destination of the instruction.  In addition, if an operation takes a boolean
+argument, then the argument may be assumed to be either ``0`` for false or
+``~0`` (a.k.a ``-1``) for true even if it is not a 1-bit value.  If an
+operation’s result has a boolean type, then it may only produce only ``0`` or ``~0``.
+
+Most of the common ALU ops in NIR operate per-component, meaning that the
+operation is defined by what it does on a single scalar value and, when
+performed on vectors, it performs the same operation on each component.  Things
+like add, multiply, etc. fall into this category.  Per-component operations
+naturally scale to as many components as necessary.  Non-per-component ALU ops
+are things like :nir:alu-op:`vec4` or :nir:alu-op:`pack_64_2x32` where any
+given component in the result value may be a combination of any component in
+any source.  These ops have a number of destination components and a number of
+components required by each source which is fixed by the opcode.
+
+While most instruction types in NIR require vector sizes to perfectly match on
+inputs and outputs, ALU instruction sources have an additional
+:cpp:member:`nir_alu_src::swizzle` field which allows them to act on vectors
+which are not the native vector size of the instruction.  This is ideal for
+hardware with a native data type of :c:expr:`vec4` but also means that ALU
+instructions are often used (and required) for packing/unpacking vectors for
+use in other instruction types like intrinsics or texture ops.
+
+.. doxygenstruct:: nir_op_info
+    :members:
+
+.. doxygenvariable:: nir_op_infos
+
+.. doxygenstruct:: nir_alu_instr
+    :members:
+
+.. doxygenstruct:: nir_alu_src
+    :members:
+
+.. doxygenstruct:: nir_alu_dest
+    :members:
diff --git a/docs/nir/index.rst b/docs/nir/index.rst
new file mode 100644
index 00000000000..630ba011885
--- /dev/null
+++ b/docs/nir/index.rst
@@ -0,0 +1,13 @@
+NIR Intermediate Representation (NIR)
+=====================================
+
+The NIR Intermediate Representation (NIR) is the optimizing compiler stack that
+sits at the core of most Mesa drivers' shader compilers.  It consists of a set
+of enums and data structures that make up the IR as well as a suite of helper
+functions, optimization passes, and lowering passes for building a compiler
+stack.
+
+.. toctree::
+   :maxdepth: 2
+
+   alu
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 66aad033447..ca123a51a61 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1026,6 +1026,7 @@ void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
 void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
 
 typedef struct {
+   /** Base source */
    nir_src src;
 
    /**
@@ -1049,27 +1050,34 @@ typedef struct {
 
    /**
     * For each input component, says which component of the register it is
-    * chosen from. Note that which elements of the swizzle are used and which
-    * are ignored are based on the write mask for most opcodes - for example,
-    * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and
-    * a swizzle of {2, x, 1, 0} where x means "don't care."
+    * chosen from.
+    *
+    * Note that which elements of the swizzle are used and which are ignored
+    * are based on the write mask for most opcodes - for example, a statement
+    * like "foo.xzw = bar.zyx" would have a writemask of 1101b and a swizzle
+    * of {2, 1, x, 0} where x means "don't care."
     */
    uint8_t swizzle[NIR_MAX_VEC_COMPONENTS];
 } nir_alu_src;
 
 typedef struct {
+   /** Base destination */
    nir_dest dest;
 
    /**
-    * \name saturate output modifier
+    * Saturate output modifier
     *
     * Only valid for opcodes that output floating-point numbers. Clamps the
     * output to between 0.0 and 1.0 inclusive.
     */
-
    bool saturate;
 
-   unsigned write_mask : NIR_MAX_VEC_COMPONENTS; /* ignored if dest.is_ssa is true */
+   /**
+    * Write-mask
+    *
+    * Ignored if dest.is_ssa is true
+    */
+   unsigned write_mask : NIR_MAX_VEC_COMPONENTS;
 } nir_alu_dest;
 
 /** NIR sized and unsized types
@@ -1336,6 +1344,10 @@ typedef enum {
     * sources.
     */
    NIR_OP_IS_2SRC_COMMUTATIVE = (1 << 0),
+
+   /**
+    * Operation is associative
+    */
    NIR_OP_IS_ASSOCIATIVE = (1 << 1),
 } nir_op_algebraic_property;
 
@@ -1344,9 +1356,11 @@ typedef enum {
  */
 #define NIR_ALU_MAX_INPUTS NIR_MAX_VEC_COMPONENTS
 
-typedef struct {
+typedef struct nir_op_info {
+   /** Name of the NIR ALU opcode */
    const char *name;
 
+   /** Number of inputs (sources) */
    uint8_t num_inputs;
 
    /**
@@ -1372,11 +1386,13 @@ typedef struct {
     * The type of vector that the instruction outputs. Note that the
     * staurate modifier is only allowed on outputs with the float type.
     */
-
    nir_alu_type output_type;
 
    /**
     * The number of components in each input
+    *
+    * See nir_op_infos::output_size for more detail about the relationship
+    * between input and output sizes.
     */
    uint8_t input_sizes[NIR_ALU_MAX_INPUTS];
 
@@ -1387,16 +1403,21 @@ typedef struct {
     */
    nir_alu_type input_types[NIR_ALU_MAX_INPUTS];
 
+   /** Algebraic properties of this opcode */
    nir_op_algebraic_property algebraic_properties;
 
-   /* Whether this represents a numeric conversion opcode */
+   /** Whether this represents a numeric conversion opcode */
    bool is_conversion;
 } nir_op_info;
 
+/** Metadata for each nir_op, indexed by opcode */
 extern const nir_op_info nir_op_infos[nir_num_opcodes];
 
 typedef struct nir_alu_instr {
+   /** Base instruction */
    nir_instr instr;
+
+   /** Opcode */
    nir_op op;
 
    /** Indicates that this ALU instruction generates an exact value
@@ -1410,13 +1431,24 @@ typedef struct nir_alu_instr {
    bool exact:1;
 
    /**
-    * Indicates that this instruction do not cause wrapping to occur, in the
-    * form of overflow or underflow.
+    * Indicates that this instruction doese not cause signed integer wrapping
+    * to occur, in the form of overflow or underflow.
     */
    bool no_signed_wrap:1;
+
+   /**
+    * Indicates that this instruction does not cause unsigned integer wrapping
+    * to occur, in the form of overflow or underflow.
+    */
    bool no_unsigned_wrap:1;
 
+   /** Destination */
    nir_alu_dest dest;
+
+   /** Sources
+    *
+    * The size of the array is given by nir_op_info::num_inputs.
+    */
    nir_alu_src src[];
 } nir_alu_instr;