radeonsi: implement 32-bit pointers in user data SGPRs (v2)

User SGPRs changes:
    VS:     14 ->  9
    TCS:    14 -> 10
    TES:    10 ->  6
    GS:      8 ->  4
    GSCOPY:  2 ->  1
    PS:      9 ->  5
    Merged VS-TCS: 24 -> 16
    Merged VS-GS:  18 -> 11
    Merged TES-GS: 18 -> 11

SGPRS: 2170102 -> 2158430 (-0.54 %)
VGPRS: 1645656 -> 1641516 (-0.25 %)
Spilled SGPRs: 9078 -> 8810 (-2.95 %)
Spilled VGPRs: 130 -> 114 (-12.31 %)
Scratch size: 1508 -> 1492 (-1.06 %) dwords per thread
Code Size: 52094872 -> 52692540 (1.15 %) bytes
Max Waves: 371848 -> 372723 (0.24 %)

v2: - the shader cache needs to take address32_hi into account
    - set amdgpu-32bit-address-high-bits

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (v1)
This commit is contained in:
Marek Olšák
2018-01-01 21:04:22 +01:00
parent 5722cd4084
commit 931ec80eeb
7 changed files with 141 additions and 59 deletions

View File

@@ -34,10 +34,13 @@
extern "C" {
#endif
#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700)
enum {
/* CONST is the only address space that selects SMEM loads */
AC_CONST_ADDR_SPACE = HAVE_LLVM >= 0x700 ? 4 : 2,
AC_LOCAL_ADDR_SPACE = 3,
AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer type has 32 bits */
};
struct ac_llvm_context {
@@ -51,6 +54,7 @@ struct ac_llvm_context {
LLVMTypeRef i16;
LLVMTypeRef i32;
LLVMTypeRef i64;
LLVMTypeRef intptr;
LLVMTypeRef f16;
LLVMTypeRef f32;
LLVMTypeRef f64;
@@ -355,6 +359,7 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
LLVMValueRef src0);
LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type);
LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type);
#ifdef __cplusplus
}