r600g/compute: Enable PIPE_SHADER_IR_NATIVE for compute shaders v2

v2:
  - Drop dependency on LLVM >= 3.5.1
This commit is contained in:
Tom Stellard
2014-09-25 18:10:44 -07:00
parent e91735a641
commit fa07f4b68a
8 changed files with 177 additions and 94 deletions

View File

@@ -49,6 +49,7 @@
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
#include "radeon_llvm_util.h" #include "radeon_llvm_util.h"
#endif #endif
#include "radeon_elf_util.h"
#include <inttypes.h> #include <inttypes.h>
/** /**
@@ -198,18 +199,42 @@ void *evergreen_create_compute_state(
{ {
struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_context *ctx = (struct r600_context *)ctx_;
struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute); struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute);
#ifdef HAVE_OPENCL
const struct pipe_llvm_program_header * header; const struct pipe_llvm_program_header * header;
const unsigned char * code; const char *code;
unsigned i; void *p;
boolean use_kill;
shader->llvm_ctx = LLVMContextCreate();
COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n"); COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n");
header = cso->prog; header = cso->prog;
code = cso->prog + sizeof(struct pipe_llvm_program_header); code = cso->prog + sizeof(struct pipe_llvm_program_header);
#if HAVE_LLVM < 0x0306
#ifdef HAVE_OPENCL
(void)use_kill;
(void)p;
shader->llvm_ctx = LLVMContextCreate();
shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx,
code, header->num_bytes);
shader->kernels = CALLOC(sizeof(struct r600_kernel),
shader->num_kernels);
{
unsigned i;
for (i = 0; i < shader->num_kernels; i++) {
struct r600_kernel *kernel = &shader->kernels[i];
kernel->llvm_module = radeon_llvm_get_kernel_module(
shader->llvm_ctx, i, code, header->num_bytes);
}
}
#endif
#else
memset(&shader->binary, 0, sizeof(shader->binary));
radeon_elf_read(code, header->num_bytes, &shader->binary, true);
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
shader->bc.ndw * 4);
p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
ctx->b.ws->buffer_unmap(shader->code_bo->cs_buf);
#endif #endif
shader->ctx = (struct r600_context*)ctx; shader->ctx = (struct r600_context*)ctx;
@@ -217,17 +242,6 @@ void *evergreen_create_compute_state(
shader->private_size = cso->req_private_mem; shader->private_size = cso->req_private_mem;
shader->input_size = cso->req_input_mem; shader->input_size = cso->req_input_mem;
#ifdef HAVE_OPENCL
shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, code,
header->num_bytes);
shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels);
for (i = 0; i < shader->num_kernels; i++) {
struct r600_kernel *kernel = &shader->kernels[i];
kernel->llvm_module = radeon_llvm_get_kernel_module(shader->llvm_ctx, i,
code, header->num_bytes);
}
#endif
return shader; return shader;
} }
@@ -238,14 +252,6 @@ void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
if (!shader) if (!shader)
return; return;
FREE(shader->kernels);
#ifdef HAVE_OPENCL
if (shader->llvm_ctx){
LLVMContextDispose(shader->llvm_ctx);
}
#endif
FREE(shader); FREE(shader);
} }
@@ -347,7 +353,13 @@ static void evergreen_emit_direct_dispatch(
unsigned wave_divisor = (16 * num_pipes); unsigned wave_divisor = (16 * num_pipes);
int group_size = 1; int group_size = 1;
int grid_size = 1; int grid_size = 1;
unsigned lds_size = shader->local_size / 4 + shader->active_kernel->bc.nlds_dw; unsigned lds_size = shader->local_size / 4 +
#if HAVE_LLVM < 0x0306
shader->active_kernel->bc.nlds_dw;
#else
shader->bc.nlds_dw;
#endif
/* Calculate group_size/grid_size */ /* Calculate group_size/grid_size */
for (i = 0; i < 3; i++) { for (i = 0; i < 3; i++) {
@@ -520,19 +532,34 @@ void evergreen_emit_cs_shader(
struct r600_cs_shader_state *state = struct r600_cs_shader_state *state =
(struct r600_cs_shader_state*)atom; (struct r600_cs_shader_state*)atom;
struct r600_pipe_compute *shader = state->shader; struct r600_pipe_compute *shader = state->shader;
struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs; struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
uint64_t va;
struct r600_resource *code_bo;
unsigned ngpr, nstack;
#if HAVE_LLVM < 0x0306
struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
code_bo = kernel->code_bo;
va = kernel->code_bo->gpu_address;
ngpr = kernel->bc.ngpr;
nstack = kernel->bc.nstack;
#else
code_bo = shader->code_bo;
va = shader->code_bo->gpu_address + state->pc;
ngpr = shader->bc.ngpr;
nstack = shader->bc.nstack;
#endif
r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3); r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
radeon_emit(cs, kernel->code_bo->gpu_address >> 8); /* R_0288D0_SQ_PGM_START_LS */ radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
radeon_emit(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */ radeon_emit(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */
S_0288D4_NUM_GPRS(kernel->bc.ngpr) S_0288D4_NUM_GPRS(ngpr)
| S_0288D4_STACK_SIZE(kernel->bc.nstack)); | S_0288D4_STACK_SIZE(nstack));
radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */ radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0)); radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx, radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
kernel->code_bo, RADEON_USAGE_READ, code_bo, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA)); RADEON_PRIO_SHADER_DATA));
} }
@@ -542,46 +569,54 @@ static void evergreen_launch_grid(
uint32_t pc, const void *input) uint32_t pc, const void *input)
{ {
struct r600_context *ctx = (struct r600_context *)ctx_; struct r600_context *ctx = (struct r600_context *)ctx_;
struct r600_pipe_compute *shader = ctx->cs_shader_state.shader; struct r600_pipe_compute *shader = ctx->cs_shader_state.shader;
struct r600_kernel *kernel = &shader->kernels[pc]; boolean use_kill;
COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
#if HAVE_LLVM < 0x0306
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
struct r600_kernel *kernel = &shader->kernels[pc];
(void)use_kill;
if (!kernel->code_bo) {
void *p;
struct r600_bytecode *bc = &kernel->bc;
LLVMModuleRef mod = kernel->llvm_module;
boolean use_kill = false;
bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0;
unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS;
unsigned sb_disasm = use_sb ||
(ctx->screen->b.debug_flags & DBG_SB_DISASM);
if (!kernel->code_bo) { r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family,
void *p; ctx->screen->has_compressed_msaa_texturing);
struct r600_bytecode *bc = &kernel->bc; bc->type = TGSI_PROCESSOR_COMPUTE;
LLVMModuleRef mod = kernel->llvm_module; bc->isa = ctx->isa;
boolean use_kill = false; r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump);
bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0;
unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS;
unsigned sb_disasm = use_sb ||
(ctx->screen->b.debug_flags & DBG_SB_DISASM);
r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family, if (dump && !sb_disasm) {
ctx->screen->has_compressed_msaa_texturing); r600_bytecode_disasm(bc);
bc->type = TGSI_PROCESSOR_COMPUTE; } else if ((dump && sb_disasm) || use_sb) {
bc->isa = ctx->isa; if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb))
r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump); R600_ERR("r600_sb_bytecode_process failed!\n");
}
if (dump && !sb_disasm) { kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
r600_bytecode_disasm(bc); kernel->bc.ndw * 4);
} else if ((dump && sb_disasm) || use_sb) { p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE);
if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb)) memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
R600_ERR("r600_sb_bytecode_process failed!\n"); ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf);
} }
kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
kernel->bc.ndw * 4);
p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE);
memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf);
}
#endif #endif
shader->active_kernel = kernel; shader->active_kernel = kernel;
ctx->cs_shader_state.kernel_index = pc; ctx->cs_shader_state.kernel_index = pc;
#else
ctx->cs_shader_state.pc = pc;
/* Get the config information for this kernel. */
r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill);
#endif
COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input); evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
compute_emit_cs(ctx, block_layout, grid_layout); compute_emit_cs(ctx, block_layout, grid_layout);
} }

View File

@@ -27,6 +27,8 @@
#include "r600_asm.h" #include "r600_asm.h"
#if HAVE_LLVM < 0x0306
struct r600_kernel { struct r600_kernel {
unsigned count; unsigned count;
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
@@ -36,13 +38,21 @@ struct r600_kernel {
struct r600_bytecode bc; struct r600_bytecode bc;
}; };
#endif
struct r600_pipe_compute { struct r600_pipe_compute {
struct r600_context *ctx; struct r600_context *ctx;
#if HAVE_LLVM < 0x0306
unsigned num_kernels; unsigned num_kernels;
struct r600_kernel *kernels; struct r600_kernel *kernels;
struct r600_kernel *active_kernel; struct r600_kernel *active_kernel;
#endif
struct radeon_shader_binary binary;
struct r600_resource *code_bo;
struct r600_bytecode bc;
unsigned local_size; unsigned local_size;
unsigned private_size; unsigned private_size;
unsigned input_size; unsigned input_size;

View File

@@ -13,8 +13,9 @@
#include "r600_opcodes.h" #include "r600_opcodes.h"
#include "r600_shader.h" #include "r600_shader.h"
#include "r600_pipe.h" #include "r600_pipe.h"
#include "radeon/radeon_llvm.h" #include "radeon_llvm.h"
#include "radeon/radeon_llvm_emit.h" #include "radeon_llvm_emit.h"
#include "radeon_elf_util.h"
#include <stdio.h> #include <stdio.h>
@@ -818,31 +819,20 @@ LLVMModuleRef r600_tgsi_llvm(
#define R_028868_SQ_PGM_RESOURCES_VS 0x028868 #define R_028868_SQ_PGM_RESOURCES_VS 0x028868
#define R_028850_SQ_PGM_RESOURCES_PS 0x028850 #define R_028850_SQ_PGM_RESOURCES_PS 0x028850
unsigned r600_llvm_compile( void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
LLVMModuleRef mod, struct r600_bytecode *bc,
enum radeon_family family, uint64_t symbol_offset,
struct r600_bytecode *bc, boolean *use_kill)
boolean *use_kill,
unsigned dump)
{ {
unsigned r;
struct radeon_shader_binary binary;
const char * gpu_family = r600_get_llvm_processor_name(family);
unsigned i; unsigned i;
const unsigned char *config =
radeon_shader_binary_config_start(binary, symbol_offset);
memset(&binary, 0, sizeof(struct radeon_shader_binary)); for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
assert(binary.code_size % 4 == 0);
bc->bytecode = CALLOC(1, binary.code_size);
memcpy(bc->bytecode, binary.code, binary.code_size);
bc->ndw = binary.code_size / 4;
for (i = 0; i < binary.config_size; i+= 8) {
unsigned reg = unsigned reg =
util_le32_to_cpu(*(uint32_t*)(binary.config + i)); util_le32_to_cpu(*(uint32_t*)(config + i));
unsigned value = unsigned value =
util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4)); util_le32_to_cpu(*(uint32_t*)(config + i + 4));
switch (reg) { switch (reg) {
/* R600 / R700 */ /* R600 / R700 */
case R_028850_SQ_PGM_RESOURCES_PS: case R_028850_SQ_PGM_RESOURCES_PS:
@@ -851,8 +841,8 @@ unsigned r600_llvm_compile(
case R_028844_SQ_PGM_RESOURCES_PS: case R_028844_SQ_PGM_RESOURCES_PS:
case R_028860_SQ_PGM_RESOURCES_VS: case R_028860_SQ_PGM_RESOURCES_VS:
case R_0288D4_SQ_PGM_RESOURCES_LS: case R_0288D4_SQ_PGM_RESOURCES_LS:
bc->ngpr = G_028844_NUM_GPRS(value); bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value));
bc->nstack = G_028844_STACK_SIZE(value); bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value));
break; break;
case R_02880C_DB_SHADER_CONTROL: case R_02880C_DB_SHADER_CONTROL:
*use_kill = G_02880C_KILL_ENABLE(value); *use_kill = G_02880C_KILL_ENABLE(value);
@@ -863,6 +853,39 @@ unsigned r600_llvm_compile(
} }
} }
}
unsigned r600_create_shader(struct r600_bytecode *bc,
const struct radeon_shader_binary *binary,
boolean *use_kill)
{
assert(binary->code_size % 4 == 0);
bc->bytecode = CALLOC(1, binary->code_size);
memcpy(bc->bytecode, binary->code, binary->code_size);
bc->ndw = binary->code_size / 4;
r600_shader_binary_read_config(binary, bc, 0, use_kill);
return 0;
}
unsigned r600_llvm_compile(
LLVMModuleRef mod,
enum radeon_family family,
struct r600_bytecode *bc,
boolean *use_kill,
unsigned dump)
{
unsigned r;
struct radeon_shader_binary binary;
const char * gpu_family = r600_get_llvm_processor_name(family);
memset(&binary, 0, sizeof(struct radeon_shader_binary));
r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
r = r600_create_shader(bc, &binary, use_kill);
FREE(binary.code); FREE(binary.code);
FREE(binary.config); FREE(binary.config);

View File

@@ -10,6 +10,7 @@
struct r600_bytecode; struct r600_bytecode;
struct r600_shader_ctx; struct r600_shader_ctx;
struct radeon_llvm_context; struct radeon_llvm_context;
struct radeon_shader_binary;
enum radeon_family; enum radeon_family;
LLVMModuleRef r600_tgsi_llvm( LLVMModuleRef r600_tgsi_llvm(
@@ -23,6 +24,15 @@ unsigned r600_llvm_compile(
boolean *use_kill, boolean *use_kill,
unsigned dump); unsigned dump);
unsigned r600_create_shader(struct r600_bytecode *bc,
const struct radeon_shader_binary *binary,
boolean *use_kill);
void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
struct r600_bytecode *bc,
uint64_t symbol_offset,
boolean *use_kill);
#endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */ #endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */
#endif /* R600_LLVM_H */ #endif /* R600_LLVM_H */

View File

@@ -472,7 +472,11 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
return 16; return 16;
case PIPE_SHADER_CAP_PREFERRED_IR: case PIPE_SHADER_CAP_PREFERRED_IR:
if (shader == PIPE_SHADER_COMPUTE) { if (shader == PIPE_SHADER_COMPUTE) {
#if HAVE_LLVM < 0x0306
return PIPE_SHADER_IR_LLVM; return PIPE_SHADER_IR_LLVM;
#else
return PIPE_SHADER_IR_NATIVE;
#endif
} else { } else {
return PIPE_SHADER_IR_TGSI; return PIPE_SHADER_IR_TGSI;
} }

View File

@@ -146,6 +146,7 @@ struct r600_clip_state {
struct r600_cs_shader_state { struct r600_cs_shader_state {
struct r600_atom atom; struct r600_atom atom;
unsigned kernel_index; unsigned kernel_index;
unsigned pc;
struct r600_pipe_compute *shader; struct r600_pipe_compute *shader;
}; };

View File

@@ -34,7 +34,7 @@
#include <llvm-c/Transforms/PassManagerBuilder.h> #include <llvm-c/Transforms/PassManagerBuilder.h>
LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx, LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
const unsigned char * bitcode, unsigned bitcode_len) const char * bitcode, unsigned bitcode_len)
{ {
LLVMMemoryBufferRef buf; LLVMMemoryBufferRef buf;
LLVMModuleRef module; LLVMModuleRef module;
@@ -47,7 +47,7 @@ LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
} }
unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx, unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
const unsigned char *bitcode, unsigned bitcode_len) const char *bitcode, unsigned bitcode_len)
{ {
LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len); LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels"); return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
@@ -88,7 +88,7 @@ static void radeon_llvm_optimize(LLVMModuleRef mod)
} }
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
const unsigned char *bitcode, unsigned bitcode_len) const char *bitcode, unsigned bitcode_len)
{ {
LLVMModuleRef mod; LLVMModuleRef mod;
unsigned num_kernels; unsigned num_kernels;

View File

@@ -30,10 +30,10 @@
#include <llvm-c/Core.h> #include <llvm-c/Core.h>
LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx, LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
const unsigned char * bitcode, unsigned bitcode_len); const char * bitcode, unsigned bitcode_len);
unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx, unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
const unsigned char *bitcode, unsigned bitcode_len); const char *bitcode, unsigned bitcode_len);
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index, LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
const unsigned char *bitcode, unsigned bitcode_len); const char *bitcode, unsigned bitcode_len);
#endif #endif