r600g/compute: Enable PIPE_SHADER_IR_NATIVE for compute shaders v2

v2:
  - Drop dependency on LLVM >= 3.5.1
This commit is contained in:
Tom Stellard
2014-09-25 18:10:44 -07:00
parent e91735a641
commit fa07f4b68a
8 changed files with 177 additions and 94 deletions

View File

@@ -49,6 +49,7 @@
#ifdef HAVE_OPENCL
#include "radeon_llvm_util.h"
#endif
#include "radeon_elf_util.h"
#include <inttypes.h>
/**
@@ -198,18 +199,42 @@ void *evergreen_create_compute_state(
{
struct r600_context *ctx = (struct r600_context *)ctx_;
struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute);
#ifdef HAVE_OPENCL
const struct pipe_llvm_program_header * header;
const unsigned char * code;
unsigned i;
shader->llvm_ctx = LLVMContextCreate();
const char *code;
void *p;
boolean use_kill;
COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n");
header = cso->prog;
code = cso->prog + sizeof(struct pipe_llvm_program_header);
#if HAVE_LLVM < 0x0306
#ifdef HAVE_OPENCL
(void)use_kill;
(void)p;
shader->llvm_ctx = LLVMContextCreate();
shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx,
code, header->num_bytes);
shader->kernels = CALLOC(sizeof(struct r600_kernel),
shader->num_kernels);
{
unsigned i;
for (i = 0; i < shader->num_kernels; i++) {
struct r600_kernel *kernel = &shader->kernels[i];
kernel->llvm_module = radeon_llvm_get_kernel_module(
shader->llvm_ctx, i, code, header->num_bytes);
}
}
#endif
#else
memset(&shader->binary, 0, sizeof(shader->binary));
radeon_elf_read(code, header->num_bytes, &shader->binary, true);
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
shader->bc.ndw * 4);
p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
ctx->b.ws->buffer_unmap(shader->code_bo->cs_buf);
#endif
shader->ctx = (struct r600_context*)ctx;
@@ -217,17 +242,6 @@ void *evergreen_create_compute_state(
shader->private_size = cso->req_private_mem;
shader->input_size = cso->req_input_mem;
#ifdef HAVE_OPENCL
shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, code,
header->num_bytes);
shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels);
for (i = 0; i < shader->num_kernels; i++) {
struct r600_kernel *kernel = &shader->kernels[i];
kernel->llvm_module = radeon_llvm_get_kernel_module(shader->llvm_ctx, i,
code, header->num_bytes);
}
#endif
return shader;
}
@@ -238,14 +252,6 @@ void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
if (!shader)
return;
FREE(shader->kernels);
#ifdef HAVE_OPENCL
if (shader->llvm_ctx){
LLVMContextDispose(shader->llvm_ctx);
}
#endif
FREE(shader);
}
@@ -347,7 +353,13 @@ static void evergreen_emit_direct_dispatch(
unsigned wave_divisor = (16 * num_pipes);
int group_size = 1;
int grid_size = 1;
unsigned lds_size = shader->local_size / 4 + shader->active_kernel->bc.nlds_dw;
unsigned lds_size = shader->local_size / 4 +
#if HAVE_LLVM < 0x0306
shader->active_kernel->bc.nlds_dw;
#else
shader->bc.nlds_dw;
#endif
/* Calculate group_size/grid_size */
for (i = 0; i < 3; i++) {
@@ -520,19 +532,34 @@ void evergreen_emit_cs_shader(
struct r600_cs_shader_state *state =
(struct r600_cs_shader_state*)atom;
struct r600_pipe_compute *shader = state->shader;
struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
uint64_t va;
struct r600_resource *code_bo;
unsigned ngpr, nstack;
#if HAVE_LLVM < 0x0306
struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
code_bo = kernel->code_bo;
va = kernel->code_bo->gpu_address;
ngpr = kernel->bc.ngpr;
nstack = kernel->bc.nstack;
#else
code_bo = shader->code_bo;
va = shader->code_bo->gpu_address + state->pc;
ngpr = shader->bc.ngpr;
nstack = shader->bc.nstack;
#endif
r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
radeon_emit(cs, kernel->code_bo->gpu_address >> 8); /* R_0288D0_SQ_PGM_START_LS */
radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
radeon_emit(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */
S_0288D4_NUM_GPRS(kernel->bc.ngpr)
| S_0288D4_STACK_SIZE(kernel->bc.nstack));
S_0288D4_NUM_GPRS(ngpr)
| S_0288D4_STACK_SIZE(nstack));
radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
kernel->code_bo, RADEON_USAGE_READ,
code_bo, RADEON_USAGE_READ,
RADEON_PRIO_SHADER_DATA));
}
@@ -542,46 +569,54 @@ static void evergreen_launch_grid(
uint32_t pc, const void *input)
{
struct r600_context *ctx = (struct r600_context *)ctx_;
struct r600_pipe_compute *shader = ctx->cs_shader_state.shader;
struct r600_kernel *kernel = &shader->kernels[pc];
COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
boolean use_kill;
#if HAVE_LLVM < 0x0306
#ifdef HAVE_OPENCL
struct r600_kernel *kernel = &shader->kernels[pc];
(void)use_kill;
if (!kernel->code_bo) {
void *p;
struct r600_bytecode *bc = &kernel->bc;
LLVMModuleRef mod = kernel->llvm_module;
boolean use_kill = false;
bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0;
unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS;
unsigned sb_disasm = use_sb ||
(ctx->screen->b.debug_flags & DBG_SB_DISASM);
if (!kernel->code_bo) {
void *p;
struct r600_bytecode *bc = &kernel->bc;
LLVMModuleRef mod = kernel->llvm_module;
boolean use_kill = false;
bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0;
unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS;
unsigned sb_disasm = use_sb ||
(ctx->screen->b.debug_flags & DBG_SB_DISASM);
r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family,
ctx->screen->has_compressed_msaa_texturing);
bc->type = TGSI_PROCESSOR_COMPUTE;
bc->isa = ctx->isa;
r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump);
r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family,
ctx->screen->has_compressed_msaa_texturing);
bc->type = TGSI_PROCESSOR_COMPUTE;
bc->isa = ctx->isa;
r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump);
if (dump && !sb_disasm) {
r600_bytecode_disasm(bc);
} else if ((dump && sb_disasm) || use_sb) {
if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb))
R600_ERR("r600_sb_bytecode_process failed!\n");
}
if (dump && !sb_disasm) {
r600_bytecode_disasm(bc);
} else if ((dump && sb_disasm) || use_sb) {
if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb))
R600_ERR("r600_sb_bytecode_process failed!\n");
}
kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
kernel->bc.ndw * 4);
p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE);
memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf);
}
kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
kernel->bc.ndw * 4);
p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE);
memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf);
}
#endif
shader->active_kernel = kernel;
ctx->cs_shader_state.kernel_index = pc;
#else
ctx->cs_shader_state.pc = pc;
/* Get the config information for this kernel. */
r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill);
#endif
COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
compute_emit_cs(ctx, block_layout, grid_layout);
}

View File

@@ -27,6 +27,8 @@
#include "r600_asm.h"
#if HAVE_LLVM < 0x0306
struct r600_kernel {
unsigned count;
#ifdef HAVE_OPENCL
@@ -36,13 +38,21 @@ struct r600_kernel {
struct r600_bytecode bc;
};
#endif
struct r600_pipe_compute {
struct r600_context *ctx;
#if HAVE_LLVM < 0x0306
unsigned num_kernels;
struct r600_kernel *kernels;
struct r600_kernel *active_kernel;
#endif
struct radeon_shader_binary binary;
struct r600_resource *code_bo;
struct r600_bytecode bc;
unsigned local_size;
unsigned private_size;
unsigned input_size;

View File

@@ -13,8 +13,9 @@
#include "r600_opcodes.h"
#include "r600_shader.h"
#include "r600_pipe.h"
#include "radeon/radeon_llvm.h"
#include "radeon/radeon_llvm_emit.h"
#include "radeon_llvm.h"
#include "radeon_llvm_emit.h"
#include "radeon_elf_util.h"
#include <stdio.h>
@@ -818,31 +819,20 @@ LLVMModuleRef r600_tgsi_llvm(
#define R_028868_SQ_PGM_RESOURCES_VS 0x028868
#define R_028850_SQ_PGM_RESOURCES_PS 0x028850
unsigned r600_llvm_compile(
LLVMModuleRef mod,
enum radeon_family family,
struct r600_bytecode *bc,
boolean *use_kill,
unsigned dump)
void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
struct r600_bytecode *bc,
uint64_t symbol_offset,
boolean *use_kill)
{
unsigned r;
struct radeon_shader_binary binary;
const char * gpu_family = r600_get_llvm_processor_name(family);
unsigned i;
const unsigned char *config =
radeon_shader_binary_config_start(binary, symbol_offset);
memset(&binary, 0, sizeof(struct radeon_shader_binary));
r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
assert(binary.code_size % 4 == 0);
bc->bytecode = CALLOC(1, binary.code_size);
memcpy(bc->bytecode, binary.code, binary.code_size);
bc->ndw = binary.code_size / 4;
for (i = 0; i < binary.config_size; i+= 8) {
for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
unsigned reg =
util_le32_to_cpu(*(uint32_t*)(binary.config + i));
util_le32_to_cpu(*(uint32_t*)(config + i));
unsigned value =
util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
util_le32_to_cpu(*(uint32_t*)(config + i + 4));
switch (reg) {
/* R600 / R700 */
case R_028850_SQ_PGM_RESOURCES_PS:
@@ -851,8 +841,8 @@ unsigned r600_llvm_compile(
case R_028844_SQ_PGM_RESOURCES_PS:
case R_028860_SQ_PGM_RESOURCES_VS:
case R_0288D4_SQ_PGM_RESOURCES_LS:
bc->ngpr = G_028844_NUM_GPRS(value);
bc->nstack = G_028844_STACK_SIZE(value);
bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value));
bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value));
break;
case R_02880C_DB_SHADER_CONTROL:
*use_kill = G_02880C_KILL_ENABLE(value);
@@ -863,6 +853,39 @@ unsigned r600_llvm_compile(
}
}
}
unsigned r600_create_shader(struct r600_bytecode *bc,
const struct radeon_shader_binary *binary,
boolean *use_kill)
{
assert(binary->code_size % 4 == 0);
bc->bytecode = CALLOC(1, binary->code_size);
memcpy(bc->bytecode, binary->code, binary->code_size);
bc->ndw = binary->code_size / 4;
r600_shader_binary_read_config(binary, bc, 0, use_kill);
return 0;
}
unsigned r600_llvm_compile(
LLVMModuleRef mod,
enum radeon_family family,
struct r600_bytecode *bc,
boolean *use_kill,
unsigned dump)
{
unsigned r;
struct radeon_shader_binary binary;
const char * gpu_family = r600_get_llvm_processor_name(family);
memset(&binary, 0, sizeof(struct radeon_shader_binary));
r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
r = r600_create_shader(bc, &binary, use_kill);
FREE(binary.code);
FREE(binary.config);

View File

@@ -10,6 +10,7 @@
struct r600_bytecode;
struct r600_shader_ctx;
struct radeon_llvm_context;
struct radeon_shader_binary;
enum radeon_family;
LLVMModuleRef r600_tgsi_llvm(
@@ -23,6 +24,15 @@ unsigned r600_llvm_compile(
boolean *use_kill,
unsigned dump);
unsigned r600_create_shader(struct r600_bytecode *bc,
const struct radeon_shader_binary *binary,
boolean *use_kill);
void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
struct r600_bytecode *bc,
uint64_t symbol_offset,
boolean *use_kill);
#endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */
#endif /* R600_LLVM_H */

View File

@@ -472,7 +472,11 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
return 16;
case PIPE_SHADER_CAP_PREFERRED_IR:
if (shader == PIPE_SHADER_COMPUTE) {
#if HAVE_LLVM < 0x0306
return PIPE_SHADER_IR_LLVM;
#else
return PIPE_SHADER_IR_NATIVE;
#endif
} else {
return PIPE_SHADER_IR_TGSI;
}

View File

@@ -146,6 +146,7 @@ struct r600_clip_state {
struct r600_cs_shader_state {
struct r600_atom atom;
unsigned kernel_index;
unsigned pc;
struct r600_pipe_compute *shader;
};

View File

@@ -34,7 +34,7 @@
#include <llvm-c/Transforms/PassManagerBuilder.h>
LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
const unsigned char * bitcode, unsigned bitcode_len)
const char * bitcode, unsigned bitcode_len)
{
LLVMMemoryBufferRef buf;
LLVMModuleRef module;
@@ -47,7 +47,7 @@ LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
}
unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
const unsigned char *bitcode, unsigned bitcode_len)
const char *bitcode, unsigned bitcode_len)
{
LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
@@ -88,7 +88,7 @@ static void radeon_llvm_optimize(LLVMModuleRef mod)
}
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
const unsigned char *bitcode, unsigned bitcode_len)
const char *bitcode, unsigned bitcode_len)
{
LLVMModuleRef mod;
unsigned num_kernels;

View File

@@ -30,10 +30,10 @@
#include <llvm-c/Core.h>
LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
const unsigned char * bitcode, unsigned bitcode_len);
const char * bitcode, unsigned bitcode_len);
unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
const unsigned char *bitcode, unsigned bitcode_len);
const char *bitcode, unsigned bitcode_len);
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
const unsigned char *bitcode, unsigned bitcode_len);
const char *bitcode, unsigned bitcode_len);
#endif