r600g/compute: Enable PIPE_SHADER_IR_NATIVE for compute shaders v2
v2: - Drop dependency on LLVM >= 3.5.1
This commit is contained in:
@@ -49,6 +49,7 @@
|
||||
#ifdef HAVE_OPENCL
|
||||
#include "radeon_llvm_util.h"
|
||||
#endif
|
||||
#include "radeon_elf_util.h"
|
||||
#include <inttypes.h>
|
||||
|
||||
/**
|
||||
@@ -198,18 +199,42 @@ void *evergreen_create_compute_state(
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
struct r600_pipe_compute *shader = CALLOC_STRUCT(r600_pipe_compute);
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
const struct pipe_llvm_program_header * header;
|
||||
const unsigned char * code;
|
||||
unsigned i;
|
||||
|
||||
shader->llvm_ctx = LLVMContextCreate();
|
||||
const char *code;
|
||||
void *p;
|
||||
boolean use_kill;
|
||||
|
||||
COMPUTE_DBG(ctx->screen, "*** evergreen_create_compute_state\n");
|
||||
|
||||
header = cso->prog;
|
||||
code = cso->prog + sizeof(struct pipe_llvm_program_header);
|
||||
#if HAVE_LLVM < 0x0306
|
||||
#ifdef HAVE_OPENCL
|
||||
(void)use_kill;
|
||||
(void)p;
|
||||
shader->llvm_ctx = LLVMContextCreate();
|
||||
shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx,
|
||||
code, header->num_bytes);
|
||||
shader->kernels = CALLOC(sizeof(struct r600_kernel),
|
||||
shader->num_kernels);
|
||||
{
|
||||
unsigned i;
|
||||
for (i = 0; i < shader->num_kernels; i++) {
|
||||
struct r600_kernel *kernel = &shader->kernels[i];
|
||||
kernel->llvm_module = radeon_llvm_get_kernel_module(
|
||||
shader->llvm_ctx, i, code, header->num_bytes);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
memset(&shader->binary, 0, sizeof(shader->binary));
|
||||
radeon_elf_read(code, header->num_bytes, &shader->binary, true);
|
||||
r600_create_shader(&shader->bc, &shader->binary, &use_kill);
|
||||
|
||||
shader->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
|
||||
shader->bc.ndw * 4);
|
||||
p = r600_buffer_map_sync_with_rings(&ctx->b, shader->code_bo, PIPE_TRANSFER_WRITE);
|
||||
memcpy(p, shader->bc.bytecode, shader->bc.ndw * 4);
|
||||
ctx->b.ws->buffer_unmap(shader->code_bo->cs_buf);
|
||||
#endif
|
||||
|
||||
shader->ctx = (struct r600_context*)ctx;
|
||||
@@ -217,17 +242,6 @@ void *evergreen_create_compute_state(
|
||||
shader->private_size = cso->req_private_mem;
|
||||
shader->input_size = cso->req_input_mem;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
shader->num_kernels = radeon_llvm_get_num_kernels(shader->llvm_ctx, code,
|
||||
header->num_bytes);
|
||||
shader->kernels = CALLOC(sizeof(struct r600_kernel), shader->num_kernels);
|
||||
|
||||
for (i = 0; i < shader->num_kernels; i++) {
|
||||
struct r600_kernel *kernel = &shader->kernels[i];
|
||||
kernel->llvm_module = radeon_llvm_get_kernel_module(shader->llvm_ctx, i,
|
||||
code, header->num_bytes);
|
||||
}
|
||||
#endif
|
||||
return shader;
|
||||
}
|
||||
|
||||
@@ -238,14 +252,6 @@ void evergreen_delete_compute_state(struct pipe_context *ctx, void* state)
|
||||
if (!shader)
|
||||
return;
|
||||
|
||||
FREE(shader->kernels);
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
if (shader->llvm_ctx){
|
||||
LLVMContextDispose(shader->llvm_ctx);
|
||||
}
|
||||
#endif
|
||||
|
||||
FREE(shader);
|
||||
}
|
||||
|
||||
@@ -347,7 +353,13 @@ static void evergreen_emit_direct_dispatch(
|
||||
unsigned wave_divisor = (16 * num_pipes);
|
||||
int group_size = 1;
|
||||
int grid_size = 1;
|
||||
unsigned lds_size = shader->local_size / 4 + shader->active_kernel->bc.nlds_dw;
|
||||
unsigned lds_size = shader->local_size / 4 +
|
||||
#if HAVE_LLVM < 0x0306
|
||||
shader->active_kernel->bc.nlds_dw;
|
||||
#else
|
||||
shader->bc.nlds_dw;
|
||||
#endif
|
||||
|
||||
|
||||
/* Calculate group_size/grid_size */
|
||||
for (i = 0; i < 3; i++) {
|
||||
@@ -520,19 +532,34 @@ void evergreen_emit_cs_shader(
|
||||
struct r600_cs_shader_state *state =
|
||||
(struct r600_cs_shader_state*)atom;
|
||||
struct r600_pipe_compute *shader = state->shader;
|
||||
struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
|
||||
struct radeon_winsys_cs *cs = rctx->b.rings.gfx.cs;
|
||||
uint64_t va;
|
||||
struct r600_resource *code_bo;
|
||||
unsigned ngpr, nstack;
|
||||
|
||||
#if HAVE_LLVM < 0x0306
|
||||
struct r600_kernel *kernel = &shader->kernels[state->kernel_index];
|
||||
code_bo = kernel->code_bo;
|
||||
va = kernel->code_bo->gpu_address;
|
||||
ngpr = kernel->bc.ngpr;
|
||||
nstack = kernel->bc.nstack;
|
||||
#else
|
||||
code_bo = shader->code_bo;
|
||||
va = shader->code_bo->gpu_address + state->pc;
|
||||
ngpr = shader->bc.ngpr;
|
||||
nstack = shader->bc.nstack;
|
||||
#endif
|
||||
|
||||
r600_write_compute_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
|
||||
radeon_emit(cs, kernel->code_bo->gpu_address >> 8); /* R_0288D0_SQ_PGM_START_LS */
|
||||
radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
|
||||
radeon_emit(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */
|
||||
S_0288D4_NUM_GPRS(kernel->bc.ngpr)
|
||||
| S_0288D4_STACK_SIZE(kernel->bc.nstack));
|
||||
S_0288D4_NUM_GPRS(ngpr)
|
||||
| S_0288D4_STACK_SIZE(nstack));
|
||||
radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
|
||||
|
||||
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
|
||||
radeon_emit(cs, r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
|
||||
kernel->code_bo, RADEON_USAGE_READ,
|
||||
code_bo, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_SHADER_DATA));
|
||||
}
|
||||
|
||||
@@ -542,46 +569,54 @@ static void evergreen_launch_grid(
|
||||
uint32_t pc, const void *input)
|
||||
{
|
||||
struct r600_context *ctx = (struct r600_context *)ctx_;
|
||||
|
||||
struct r600_pipe_compute *shader = ctx->cs_shader_state.shader;
|
||||
struct r600_kernel *kernel = &shader->kernels[pc];
|
||||
|
||||
COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
|
||||
boolean use_kill;
|
||||
|
||||
#if HAVE_LLVM < 0x0306
|
||||
#ifdef HAVE_OPENCL
|
||||
struct r600_kernel *kernel = &shader->kernels[pc];
|
||||
(void)use_kill;
|
||||
if (!kernel->code_bo) {
|
||||
void *p;
|
||||
struct r600_bytecode *bc = &kernel->bc;
|
||||
LLVMModuleRef mod = kernel->llvm_module;
|
||||
boolean use_kill = false;
|
||||
bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0;
|
||||
unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS;
|
||||
unsigned sb_disasm = use_sb ||
|
||||
(ctx->screen->b.debug_flags & DBG_SB_DISASM);
|
||||
|
||||
if (!kernel->code_bo) {
|
||||
void *p;
|
||||
struct r600_bytecode *bc = &kernel->bc;
|
||||
LLVMModuleRef mod = kernel->llvm_module;
|
||||
boolean use_kill = false;
|
||||
bool dump = (ctx->screen->b.debug_flags & DBG_CS) != 0;
|
||||
unsigned use_sb = ctx->screen->b.debug_flags & DBG_SB_CS;
|
||||
unsigned sb_disasm = use_sb ||
|
||||
(ctx->screen->b.debug_flags & DBG_SB_DISASM);
|
||||
r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family,
|
||||
ctx->screen->has_compressed_msaa_texturing);
|
||||
bc->type = TGSI_PROCESSOR_COMPUTE;
|
||||
bc->isa = ctx->isa;
|
||||
r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump);
|
||||
|
||||
r600_bytecode_init(bc, ctx->b.chip_class, ctx->b.family,
|
||||
ctx->screen->has_compressed_msaa_texturing);
|
||||
bc->type = TGSI_PROCESSOR_COMPUTE;
|
||||
bc->isa = ctx->isa;
|
||||
r600_llvm_compile(mod, ctx->b.family, bc, &use_kill, dump);
|
||||
if (dump && !sb_disasm) {
|
||||
r600_bytecode_disasm(bc);
|
||||
} else if ((dump && sb_disasm) || use_sb) {
|
||||
if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb))
|
||||
R600_ERR("r600_sb_bytecode_process failed!\n");
|
||||
}
|
||||
|
||||
if (dump && !sb_disasm) {
|
||||
r600_bytecode_disasm(bc);
|
||||
} else if ((dump && sb_disasm) || use_sb) {
|
||||
if (r600_sb_bytecode_process(ctx, bc, NULL, dump, use_sb))
|
||||
R600_ERR("r600_sb_bytecode_process failed!\n");
|
||||
}
|
||||
|
||||
kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
|
||||
kernel->bc.ndw * 4);
|
||||
p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE);
|
||||
memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
|
||||
ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf);
|
||||
}
|
||||
kernel->code_bo = r600_compute_buffer_alloc_vram(ctx->screen,
|
||||
kernel->bc.ndw * 4);
|
||||
p = r600_buffer_map_sync_with_rings(&ctx->b, kernel->code_bo, PIPE_TRANSFER_WRITE);
|
||||
memcpy(p, kernel->bc.bytecode, kernel->bc.ndw * 4);
|
||||
ctx->b.ws->buffer_unmap(kernel->code_bo->cs_buf);
|
||||
}
|
||||
#endif
|
||||
shader->active_kernel = kernel;
|
||||
ctx->cs_shader_state.kernel_index = pc;
|
||||
#else
|
||||
ctx->cs_shader_state.pc = pc;
|
||||
/* Get the config information for this kernel. */
|
||||
r600_shader_binary_read_config(&shader->binary, &shader->bc, pc, &use_kill);
|
||||
#endif
|
||||
|
||||
COMPUTE_DBG(ctx->screen, "*** evergreen_launch_grid: pc = %u\n", pc);
|
||||
|
||||
|
||||
evergreen_compute_upload_input(ctx_, block_layout, grid_layout, input);
|
||||
compute_emit_cs(ctx, block_layout, grid_layout);
|
||||
}
|
||||
|
@@ -27,6 +27,8 @@
|
||||
|
||||
#include "r600_asm.h"
|
||||
|
||||
#if HAVE_LLVM < 0x0306
|
||||
|
||||
struct r600_kernel {
|
||||
unsigned count;
|
||||
#ifdef HAVE_OPENCL
|
||||
@@ -36,13 +38,21 @@ struct r600_kernel {
|
||||
struct r600_bytecode bc;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
struct r600_pipe_compute {
|
||||
struct r600_context *ctx;
|
||||
|
||||
#if HAVE_LLVM < 0x0306
|
||||
unsigned num_kernels;
|
||||
struct r600_kernel *kernels;
|
||||
|
||||
struct r600_kernel *active_kernel;
|
||||
#endif
|
||||
|
||||
struct radeon_shader_binary binary;
|
||||
struct r600_resource *code_bo;
|
||||
struct r600_bytecode bc;
|
||||
|
||||
unsigned local_size;
|
||||
unsigned private_size;
|
||||
unsigned input_size;
|
||||
|
@@ -13,8 +13,9 @@
|
||||
#include "r600_opcodes.h"
|
||||
#include "r600_shader.h"
|
||||
#include "r600_pipe.h"
|
||||
#include "radeon/radeon_llvm.h"
|
||||
#include "radeon/radeon_llvm_emit.h"
|
||||
#include "radeon_llvm.h"
|
||||
#include "radeon_llvm_emit.h"
|
||||
#include "radeon_elf_util.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
@@ -818,31 +819,20 @@ LLVMModuleRef r600_tgsi_llvm(
|
||||
#define R_028868_SQ_PGM_RESOURCES_VS 0x028868
|
||||
#define R_028850_SQ_PGM_RESOURCES_PS 0x028850
|
||||
|
||||
unsigned r600_llvm_compile(
|
||||
LLVMModuleRef mod,
|
||||
enum radeon_family family,
|
||||
struct r600_bytecode *bc,
|
||||
boolean *use_kill,
|
||||
unsigned dump)
|
||||
void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
|
||||
struct r600_bytecode *bc,
|
||||
uint64_t symbol_offset,
|
||||
boolean *use_kill)
|
||||
{
|
||||
unsigned r;
|
||||
struct radeon_shader_binary binary;
|
||||
const char * gpu_family = r600_get_llvm_processor_name(family);
|
||||
unsigned i;
|
||||
const unsigned char *config =
|
||||
radeon_shader_binary_config_start(binary, symbol_offset);
|
||||
|
||||
memset(&binary, 0, sizeof(struct radeon_shader_binary));
|
||||
r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
|
||||
|
||||
assert(binary.code_size % 4 == 0);
|
||||
bc->bytecode = CALLOC(1, binary.code_size);
|
||||
memcpy(bc->bytecode, binary.code, binary.code_size);
|
||||
bc->ndw = binary.code_size / 4;
|
||||
|
||||
for (i = 0; i < binary.config_size; i+= 8) {
|
||||
for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
|
||||
unsigned reg =
|
||||
util_le32_to_cpu(*(uint32_t*)(binary.config + i));
|
||||
util_le32_to_cpu(*(uint32_t*)(config + i));
|
||||
unsigned value =
|
||||
util_le32_to_cpu(*(uint32_t*)(binary.config + i + 4));
|
||||
util_le32_to_cpu(*(uint32_t*)(config + i + 4));
|
||||
switch (reg) {
|
||||
/* R600 / R700 */
|
||||
case R_028850_SQ_PGM_RESOURCES_PS:
|
||||
@@ -851,8 +841,8 @@ unsigned r600_llvm_compile(
|
||||
case R_028844_SQ_PGM_RESOURCES_PS:
|
||||
case R_028860_SQ_PGM_RESOURCES_VS:
|
||||
case R_0288D4_SQ_PGM_RESOURCES_LS:
|
||||
bc->ngpr = G_028844_NUM_GPRS(value);
|
||||
bc->nstack = G_028844_STACK_SIZE(value);
|
||||
bc->ngpr = MAX2(bc->ngpr, G_028844_NUM_GPRS(value));
|
||||
bc->nstack = MAX2(bc->nstack, G_028844_STACK_SIZE(value));
|
||||
break;
|
||||
case R_02880C_DB_SHADER_CONTROL:
|
||||
*use_kill = G_02880C_KILL_ENABLE(value);
|
||||
@@ -863,6 +853,39 @@ unsigned r600_llvm_compile(
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
unsigned r600_create_shader(struct r600_bytecode *bc,
|
||||
const struct radeon_shader_binary *binary,
|
||||
boolean *use_kill)
|
||||
|
||||
{
|
||||
assert(binary->code_size % 4 == 0);
|
||||
bc->bytecode = CALLOC(1, binary->code_size);
|
||||
memcpy(bc->bytecode, binary->code, binary->code_size);
|
||||
bc->ndw = binary->code_size / 4;
|
||||
|
||||
r600_shader_binary_read_config(binary, bc, 0, use_kill);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned r600_llvm_compile(
|
||||
LLVMModuleRef mod,
|
||||
enum radeon_family family,
|
||||
struct r600_bytecode *bc,
|
||||
boolean *use_kill,
|
||||
unsigned dump)
|
||||
{
|
||||
unsigned r;
|
||||
struct radeon_shader_binary binary;
|
||||
const char * gpu_family = r600_get_llvm_processor_name(family);
|
||||
|
||||
memset(&binary, 0, sizeof(struct radeon_shader_binary));
|
||||
r = radeon_llvm_compile(mod, &binary, gpu_family, dump);
|
||||
|
||||
r = r600_create_shader(bc, &binary, use_kill);
|
||||
|
||||
FREE(binary.code);
|
||||
FREE(binary.config);
|
||||
|
||||
|
@@ -10,6 +10,7 @@
|
||||
struct r600_bytecode;
|
||||
struct r600_shader_ctx;
|
||||
struct radeon_llvm_context;
|
||||
struct radeon_shader_binary;
|
||||
enum radeon_family;
|
||||
|
||||
LLVMModuleRef r600_tgsi_llvm(
|
||||
@@ -23,6 +24,15 @@ unsigned r600_llvm_compile(
|
||||
boolean *use_kill,
|
||||
unsigned dump);
|
||||
|
||||
unsigned r600_create_shader(struct r600_bytecode *bc,
|
||||
const struct radeon_shader_binary *binary,
|
||||
boolean *use_kill);
|
||||
|
||||
void r600_shader_binary_read_config(const struct radeon_shader_binary *binary,
|
||||
struct r600_bytecode *bc,
|
||||
uint64_t symbol_offset,
|
||||
boolean *use_kill);
|
||||
|
||||
#endif /* defined R600_USE_LLVM || defined HAVE_OPENCL */
|
||||
|
||||
#endif /* R600_LLVM_H */
|
||||
|
@@ -472,7 +472,11 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
|
||||
return 16;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
if (shader == PIPE_SHADER_COMPUTE) {
|
||||
#if HAVE_LLVM < 0x0306
|
||||
return PIPE_SHADER_IR_LLVM;
|
||||
#else
|
||||
return PIPE_SHADER_IR_NATIVE;
|
||||
#endif
|
||||
} else {
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
}
|
||||
|
@@ -146,6 +146,7 @@ struct r600_clip_state {
|
||||
struct r600_cs_shader_state {
|
||||
struct r600_atom atom;
|
||||
unsigned kernel_index;
|
||||
unsigned pc;
|
||||
struct r600_pipe_compute *shader;
|
||||
};
|
||||
|
||||
|
@@ -34,7 +34,7 @@
|
||||
#include <llvm-c/Transforms/PassManagerBuilder.h>
|
||||
|
||||
LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
|
||||
const unsigned char * bitcode, unsigned bitcode_len)
|
||||
const char * bitcode, unsigned bitcode_len)
|
||||
{
|
||||
LLVMMemoryBufferRef buf;
|
||||
LLVMModuleRef module;
|
||||
@@ -47,7 +47,7 @@ LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
|
||||
}
|
||||
|
||||
unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
|
||||
const unsigned char *bitcode, unsigned bitcode_len)
|
||||
const char *bitcode, unsigned bitcode_len)
|
||||
{
|
||||
LLVMModuleRef mod = radeon_llvm_parse_bitcode(ctx, bitcode, bitcode_len);
|
||||
return LLVMGetNamedMetadataNumOperands(mod, "opencl.kernels");
|
||||
@@ -88,7 +88,7 @@ static void radeon_llvm_optimize(LLVMModuleRef mod)
|
||||
}
|
||||
|
||||
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
|
||||
const unsigned char *bitcode, unsigned bitcode_len)
|
||||
const char *bitcode, unsigned bitcode_len)
|
||||
{
|
||||
LLVMModuleRef mod;
|
||||
unsigned num_kernels;
|
||||
|
@@ -30,10 +30,10 @@
|
||||
#include <llvm-c/Core.h>
|
||||
|
||||
LLVMModuleRef radeon_llvm_parse_bitcode(LLVMContextRef ctx,
|
||||
const unsigned char * bitcode, unsigned bitcode_len);
|
||||
const char * bitcode, unsigned bitcode_len);
|
||||
unsigned radeon_llvm_get_num_kernels(LLVMContextRef ctx,
|
||||
const unsigned char *bitcode, unsigned bitcode_len);
|
||||
const char *bitcode, unsigned bitcode_len);
|
||||
LLVMModuleRef radeon_llvm_get_kernel_module(LLVMContextRef ctx, unsigned index,
|
||||
const unsigned char *bitcode, unsigned bitcode_len);
|
||||
const char *bitcode, unsigned bitcode_len);
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user