r600/sfn: Add algebraic lowering for fsin and fcos
* fsin and fcos require normalization of the input * bitfield_insert requires an additional shift of the insert value v2: drop bitfield_insert lowering code, it is already avaibable as compiler option (Rhys Perry) Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Reviewed-by: Kristian H. Kristensen <hoegsberg@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9452>
This commit is contained in:
@@ -49,6 +49,15 @@ $(intermediates)/egd_tables.h: $(MESA_TOP)/src/gallium/drivers/r600/egd_tables.p
|
|||||||
@echo "Gen Header: $(PRIVATE_MODULE) <= $(notdir $(@))"
|
@echo "Gen Header: $(PRIVATE_MODULE) <= $(notdir $(@))"
|
||||||
$(hide) $(MESA_PYTHON2) $(MESA_TOP)/src/gallium/drivers/r600/egd_tables.py $(MESA_TOP)/src/gallium/drivers/r600/evergreend.h > $@
|
$(hide) $(MESA_PYTHON2) $(MESA_TOP)/src/gallium/drivers/r600/egd_tables.py $(MESA_TOP)/src/gallium/drivers/r600/evergreend.h > $@
|
||||||
|
|
||||||
|
r600_nir_algebraic_gen := $(LOCAL_PATH)/sfn/r600_nir_algebraic.py
|
||||||
|
r600_nir_algebraic_deps := \
|
||||||
|
$(LOCAL_PATH)/sfn/r600_nir_algebraic.py \
|
||||||
|
$(MESA_TOP)/src/compiler/nir/nir_algebraic.py
|
||||||
|
|
||||||
|
$(intermediates)/sfn_nir_algebraic.c: $(r600_nir_algebraic_deps)
|
||||||
|
@mkdir -p $(dir $@)
|
||||||
|
$(hide) $(MESA_PYTHON2) $(nir_opt_algebraic_gen) $< > $@
|
||||||
|
|
||||||
ifeq ($(MESA_ENABLE_LLVM),true)
|
ifeq ($(MESA_ENABLE_LLVM),true)
|
||||||
$(call mesa-build-with-llvm)
|
$(call mesa-build-with-llvm)
|
||||||
endif
|
endif
|
||||||
|
@@ -163,4 +163,5 @@ CXX_SOURCES = \
|
|||||||
sfn/sfn_vertexstageexport.h
|
sfn/sfn_vertexstageexport.h
|
||||||
|
|
||||||
R600_GENERATED_FILES = \
|
R600_GENERATED_FILES = \
|
||||||
egd_tables.h
|
egd_tables.h \
|
||||||
|
sfn_nir_algebraic.c
|
||||||
|
@@ -188,6 +188,19 @@ egd_tables_h = custom_target(
|
|||||||
capture : true,
|
capture : true,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
sfn_nir_algebraic_c = custom_target(
|
||||||
|
'sfn_nir_algebraic.c',
|
||||||
|
input : 'sfn/sfn_nir_algebraic.py',
|
||||||
|
output : 'sfn_nir_algebraic.c',
|
||||||
|
command : [
|
||||||
|
prog_python, '@INPUT@',
|
||||||
|
'-p', join_paths(meson.source_root(), 'src/compiler/nir/'),
|
||||||
|
],
|
||||||
|
capture : true,
|
||||||
|
depend_files : nir_algebraic_py,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
r600_c_args = []
|
r600_c_args = []
|
||||||
if with_gallium_opencl
|
if with_gallium_opencl
|
||||||
r600_c_args += '-DHAVE_OPENCL'
|
r600_c_args += '-DHAVE_OPENCL'
|
||||||
@@ -195,7 +208,7 @@ endif
|
|||||||
|
|
||||||
libr600 = static_library(
|
libr600 = static_library(
|
||||||
'r600',
|
'r600',
|
||||||
[files_r600, egd_tables_h],
|
[files_r600, egd_tables_h, sfn_nir_algebraic_c],
|
||||||
c_args : [r600_c_args, '-Wstrict-overflow=0'],
|
c_args : [r600_c_args, '-Wstrict-overflow=0'],
|
||||||
gnu_symbol_visibility : 'hidden',
|
gnu_symbol_visibility : 'hidden',
|
||||||
include_directories : [
|
include_directories : [
|
||||||
|
@@ -83,10 +83,9 @@ bool EmitAluInstruction::do_emit(nir_instr* ir)
|
|||||||
case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
|
case nir_op_bany_inequal3: return emit_any_all_icomp(instr, op2_setne_int, 3, false);
|
||||||
case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
|
case nir_op_bany_inequal4: return emit_any_all_icomp(instr, op2_setne_int, 4, false);
|
||||||
case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
|
case nir_op_bcsel: return emit_alu_op3(instr, op3_cnde_int, {0, 2, 1});
|
||||||
case nir_op_bfi: return emit_alu_op3(instr, op3_bfi_int);
|
|
||||||
case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int);
|
case nir_op_bfm: return emit_alu_op2_int(instr, op2_bfm_int);
|
||||||
case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
|
case nir_op_bit_count: return emit_alu_op1(instr, op1_bcnt_int);
|
||||||
case nir_op_bitfield_insert: return emit_bitfield_insert(instr);
|
|
||||||
case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
|
case nir_op_bitfield_reverse: return emit_alu_op1(instr, op1_bfrev_int);
|
||||||
case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int);
|
case nir_op_bitfield_select: return emit_alu_op3(instr, op3_bfi_int);
|
||||||
case nir_op_cube_r600: return emit_cube(instr);
|
case nir_op_cube_r600: return emit_cube(instr);
|
||||||
@@ -97,7 +96,9 @@ bool EmitAluInstruction::do_emit(nir_instr* ir)
|
|||||||
case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
|
case nir_op_fabs: return emit_alu_op1(instr, op1_mov, {1 << alu_src0_abs});
|
||||||
case nir_op_fadd: return emit_alu_op2(instr, op2_add);
|
case nir_op_fadd: return emit_alu_op2(instr, op2_add);
|
||||||
case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
|
case nir_op_fceil: return emit_alu_op1(instr, op1_ceil);
|
||||||
case nir_op_fcos: return emit_alu_trig_op1(instr, op1_cos);
|
case nir_op_fcos_r600: return emit_alu_trans_op1(instr, op1_cos);
|
||||||
|
|
||||||
|
/* These are in the ALU instruction list, but they should be texture instructions */
|
||||||
case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
|
case nir_op_fddx: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
|
||||||
case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
|
case nir_op_fddx_coarse: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, false);
|
||||||
case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
|
case nir_op_fddx_fine: return emit_tex_fdd(instr, TexInstruction::get_gradient_h, true);
|
||||||
@@ -130,7 +131,7 @@ bool EmitAluInstruction::do_emit(nir_instr* ir)
|
|||||||
case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
|
case nir_op_fround_even: return emit_alu_op1(instr, op1_rndne);
|
||||||
case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
|
case nir_op_frsq: return emit_alu_trans_op1(instr, op1_recipsqrt_ieee1);
|
||||||
case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
|
case nir_op_fsat: return emit_alu_op1(instr, op1_mov, {1 << alu_dst_clamp});
|
||||||
case nir_op_fsin: return emit_alu_trig_op1(instr, op1_sin);
|
case nir_op_fsin_r600: return emit_alu_trans_op1(instr, op1_sin);
|
||||||
case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
|
case nir_op_fsqrt: return emit_alu_trans_op1(instr, op1_sqrt_ieee);
|
||||||
case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
|
case nir_op_fsub: return emit_alu_op2(instr, op2_add, op2_opt_neg_src1);
|
||||||
case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
|
case nir_op_ftrunc: return emit_alu_op1(instr, op1_trunc);
|
||||||
@@ -385,57 +386,6 @@ bool EmitAluInstruction::emit_mov(const nir_alu_instr& instr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool EmitAluInstruction::emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode)
|
|
||||||
{
|
|
||||||
// normalize by dividing by 2*PI, shift by 0.5, take fraction, and
|
|
||||||
// then shift back
|
|
||||||
|
|
||||||
const float inv_2_pi = 0.15915494f;
|
|
||||||
|
|
||||||
PValue v[4]; // this might need some additional temp register creation
|
|
||||||
for (unsigned i = 0; i < 4 ; ++i)
|
|
||||||
v[i] = from_nir(instr.dest, i);
|
|
||||||
|
|
||||||
PValue inv_pihalf = PValue(new LiteralValue(inv_2_pi, 0));
|
|
||||||
AluInstruction *ir = nullptr;
|
|
||||||
for (unsigned i = 0; i < 4 ; ++i) {
|
|
||||||
if (!(instr.dest.write_mask & (1 << i)))
|
|
||||||
continue;
|
|
||||||
ir = new AluInstruction(op3_muladd_ieee, v[i],
|
|
||||||
{m_src[0][i], inv_pihalf, Value::zero_dot_5},
|
|
||||||
{alu_write});
|
|
||||||
if (instr.src[0].negate) ir->set_flag(alu_src0_neg);
|
|
||||||
emit_instruction(ir);
|
|
||||||
}
|
|
||||||
make_last(ir);
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 4 ; ++i) {
|
|
||||||
if (!(instr.dest.write_mask & (1 << i)))
|
|
||||||
continue;
|
|
||||||
ir = new AluInstruction(op1_fract, v[i], v[i], {alu_write});
|
|
||||||
emit_instruction(ir);
|
|
||||||
}
|
|
||||||
make_last(ir);
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 4 ; ++i) {
|
|
||||||
if (!(instr.dest.write_mask & (1 << i)))
|
|
||||||
continue;
|
|
||||||
ir = new AluInstruction(op2_add, v[i], v[i], Value::zero_dot_5, write);
|
|
||||||
ir->set_flag(alu_src1_neg);
|
|
||||||
emit_instruction(ir);
|
|
||||||
}
|
|
||||||
make_last(ir);
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 4 ; ++i) {
|
|
||||||
if (!(instr.dest.write_mask & (1 << i)))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ir = new AluInstruction(opcode, v[i], v[i], last_write);
|
|
||||||
emit_instruction(ir);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
|
bool EmitAluInstruction::emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode,
|
||||||
bool absolute)
|
bool absolute)
|
||||||
{
|
{
|
||||||
@@ -1032,65 +982,6 @@ bool EmitAluInstruction::emit_tex_fdd(const nir_alu_instr& instr, TexInstruction
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool EmitAluInstruction::emit_bitfield_insert(const nir_alu_instr& instr)
|
|
||||||
{
|
|
||||||
auto t0 = get_temp_vec4();
|
|
||||||
auto t1 = get_temp_vec4();
|
|
||||||
auto t2 = get_temp_vec4();
|
|
||||||
auto t3 = get_temp_vec4();
|
|
||||||
|
|
||||||
PValue l32(new LiteralValue(32));
|
|
||||||
unsigned write_mask = instr.dest.write_mask;
|
|
||||||
if (!write_mask) return true;
|
|
||||||
|
|
||||||
AluInstruction *ir = nullptr;
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
if (!(write_mask & (1<<i)))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ir = new AluInstruction(op2_setge_int, t0[i], {m_src[3][i], l32}, {alu_write});
|
|
||||||
emit_instruction(ir);
|
|
||||||
}
|
|
||||||
make_last(ir);
|
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
if (!(write_mask & (1<<i)))
|
|
||||||
continue;
|
|
||||||
ir = new AluInstruction(op2_bfm_int, t1[i], {m_src[3][i], m_src[2][i]}, {alu_write});
|
|
||||||
emit_instruction(ir);
|
|
||||||
}
|
|
||||||
make_last(ir);
|
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
if (!(write_mask & (1<<i)))
|
|
||||||
continue;
|
|
||||||
ir = new AluInstruction(op2_lshl_int, t2[i], {m_src[1][i], m_src[2][i]}, {alu_write});
|
|
||||||
emit_instruction(ir);
|
|
||||||
}
|
|
||||||
make_last(ir);
|
|
||||||
|
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
if (!(write_mask & (1<<i)))
|
|
||||||
continue;
|
|
||||||
ir = new AluInstruction(op3_bfi_int, t3[i],
|
|
||||||
{t1[i], t2[i], m_src[0][i]}, {alu_write});
|
|
||||||
emit_instruction(ir);
|
|
||||||
}
|
|
||||||
make_last(ir);
|
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
|
||||||
if (!(write_mask & (1<<i)))
|
|
||||||
continue;
|
|
||||||
ir = new AluInstruction(op3_cnde_int, from_nir(instr.dest, i),
|
|
||||||
{t0[i], t3[i], m_src[1][i]}, {alu_write});
|
|
||||||
emit_instruction(ir);
|
|
||||||
}
|
|
||||||
make_last(ir);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
|
bool EmitAluInstruction::emit_unpack_32_2x16_split_y(const nir_alu_instr& instr)
|
||||||
{
|
{
|
||||||
auto tmp = get_temp_register();
|
auto tmp = get_temp_register();
|
||||||
|
@@ -65,7 +65,6 @@ private:
|
|||||||
|
|
||||||
bool emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, std::array<uint8_t, 3> reorder={0,1,2});
|
bool emit_alu_op3(const nir_alu_instr& instr, EAluOp opcode, std::array<uint8_t, 3> reorder={0,1,2});
|
||||||
bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false);
|
bool emit_alu_trans_op1(const nir_alu_instr& instr, EAluOp opcode, bool absolute = false);
|
||||||
bool emit_alu_trig_op1(const nir_alu_instr& instr, EAluOp opcode);
|
|
||||||
|
|
||||||
bool emit_alu_b2f(const nir_alu_instr& instr);
|
bool emit_alu_b2f(const nir_alu_instr& instr);
|
||||||
bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op);
|
bool emit_alu_i2orf2_b1(const nir_alu_instr& instr, EAluOp op);
|
||||||
@@ -88,7 +87,6 @@ private:
|
|||||||
bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp);
|
bool emit_unpack_64_2x32_split(const nir_alu_instr& instr, unsigned comp);
|
||||||
|
|
||||||
bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine);
|
bool emit_tex_fdd(const nir_alu_instr& instr, TexInstruction::Opcode op, bool fine);
|
||||||
bool emit_bitfield_insert(const nir_alu_instr& instr);
|
|
||||||
bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr);
|
bool emit_unpack_32_2x16_split_y(const nir_alu_instr& instr);
|
||||||
bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr);
|
bool emit_unpack_32_2x16_split_x(const nir_alu_instr& instr);
|
||||||
bool emit_pack_32_2x16_split(const nir_alu_instr& instr);
|
bool emit_pack_32_2x16_split(const nir_alu_instr& instr);
|
||||||
|
@@ -878,6 +878,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
|||||||
NIR_PASS_V(sel->nir, nir_lower_idiv,
|
NIR_PASS_V(sel->nir, nir_lower_idiv,
|
||||||
sel->nir->info.stage == MESA_SHADER_COMPUTE ?
|
sel->nir->info.stage == MESA_SHADER_COMPUTE ?
|
||||||
nir_lower_idiv_precise : nir_lower_idiv_fast);
|
nir_lower_idiv_precise : nir_lower_idiv_fast);
|
||||||
|
NIR_PASS_V(sel->nir, r600_lower_alu);
|
||||||
NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
|
NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
|
||||||
|
|
||||||
if (lower_64bit)
|
if (lower_64bit)
|
||||||
|
@@ -151,6 +151,7 @@ int r600_shader_from_nir(struct r600_context *rctx,
|
|||||||
struct r600_pipe_shader *pipeshader,
|
struct r600_pipe_shader *pipeshader,
|
||||||
union r600_shader_key *key);
|
union r600_shader_key *key);
|
||||||
|
|
||||||
|
bool r600_lower_alu(nir_shader *sh);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
49
src/gallium/drivers/r600/sfn/sfn_nir_algebraic.py
Normal file
49
src/gallium/drivers/r600/sfn/sfn_nir_algebraic.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
#
|
||||||
|
# Copyright (C) 2021 Collabora Ltd.
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
# copy of this software and associated documentation files (the "Software"),
|
||||||
|
# to deal in the Software without restriction, including without limitation
|
||||||
|
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
# and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
# Software is furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice (including the next
|
||||||
|
# paragraph) shall be included in all copies or substantial portions of the
|
||||||
|
# Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
# IN THE SOFTWARE.
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
|
||||||
|
lower_alu = [
|
||||||
|
# For chipfamily r600 one must do fma (2*pi ffract() - 0.5)
|
||||||
|
(('fsin', "a@32"), ('fsin_r600', ('fadd', ('ffract', ('ffma', 'a', 0.15915494, 0.5)), -0.5))),
|
||||||
|
(('fcos', "a@32"), ('fcos_r600', ('fadd', ('ffract', ('ffma', 'a', 0.15915494, 0.5)), -0.5))),
|
||||||
|
]
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('-p', '--import-path', required=True)
|
||||||
|
args = parser.parse_args()
|
||||||
|
sys.path.insert(0, args.import_path)
|
||||||
|
run()
|
||||||
|
|
||||||
|
|
||||||
|
def run():
|
||||||
|
import nir_algebraic # pylint: disable=import-error
|
||||||
|
|
||||||
|
print('#include "sfn/sfn_nir.h"')
|
||||||
|
|
||||||
|
print(nir_algebraic.AlgebraicPass("r600_lower_alu",
|
||||||
|
lower_alu).render())
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Reference in New Issue
Block a user