st/glsl_to_tgsi: reduce stack explosion in recursive expression visitor
In optimized builds, visit(ir_expression *) experiences inlining with gcc that leads the function to have a roughly 32KB stack frame. This is a problem given that the function is called recursively. In non-optimized builds, the stack frame is much smaller, hence one gets crashes that happen only in optimized builds. Arguably there is a compiler bug or at least severe misfeature here. In any case, the easy thing to do for now seems to be moving the bulk of the non-recursive code into a separate function. This is sufficient to convince my version of gcc not to blow up the stack frame of the recursive part. Just to be sure, add the gcc-specific noinline attribute to prevent this bug from reoccuring if inliner heuristics change. v2: put ATTRIBUTE_NOINLINE into macros.h Cc: "11.1 11.2" <mesa-stable@lists.freedesktop.org> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95133 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=95026 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92850 Reviewed-by: Ilia Mirkin <imirkin@alum.mit.edu> Reviewed-by: Rob Clark <robdclark@gmail.com>
This commit is contained in:
@@ -450,6 +450,8 @@ public:
|
|||||||
virtual void visit(ir_barrier *);
|
virtual void visit(ir_barrier *);
|
||||||
/*@}*/
|
/*@}*/
|
||||||
|
|
||||||
|
void visit_expression(ir_expression *, st_src_reg *) ATTRIBUTE_NOINLINE;
|
||||||
|
|
||||||
void visit_atomic_counter_intrinsic(ir_call *);
|
void visit_atomic_counter_intrinsic(ir_call *);
|
||||||
void visit_ssbo_intrinsic(ir_call *);
|
void visit_ssbo_intrinsic(ir_call *);
|
||||||
void visit_membar_intrinsic(ir_call *);
|
void visit_membar_intrinsic(ir_call *);
|
||||||
@@ -1535,10 +1537,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
|
|||||||
void
|
void
|
||||||
glsl_to_tgsi_visitor::visit(ir_expression *ir)
|
glsl_to_tgsi_visitor::visit(ir_expression *ir)
|
||||||
{
|
{
|
||||||
unsigned int operand;
|
|
||||||
st_src_reg op[ARRAY_SIZE(ir->operands)];
|
st_src_reg op[ARRAY_SIZE(ir->operands)];
|
||||||
st_src_reg result_src;
|
|
||||||
st_dst_reg result_dst;
|
|
||||||
|
|
||||||
/* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
|
/* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
|
||||||
*/
|
*/
|
||||||
@@ -1561,7 +1560,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
|
|||||||
if (ir->operation == ir_quadop_vector)
|
if (ir->operation == ir_quadop_vector)
|
||||||
assert(!"ir_quadop_vector should have been lowered");
|
assert(!"ir_quadop_vector should have been lowered");
|
||||||
|
|
||||||
for (operand = 0; operand < ir->get_num_operands(); operand++) {
|
for (unsigned int operand = 0; operand < ir->get_num_operands(); operand++) {
|
||||||
this->result.file = PROGRAM_UNDEFINED;
|
this->result.file = PROGRAM_UNDEFINED;
|
||||||
ir->operands[operand]->accept(this);
|
ir->operands[operand]->accept(this);
|
||||||
if (this->result.file == PROGRAM_UNDEFINED) {
|
if (this->result.file == PROGRAM_UNDEFINED) {
|
||||||
@@ -1578,6 +1577,19 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
|
|||||||
assert(!ir->operands[operand]->type->is_matrix());
|
assert(!ir->operands[operand]->type->is_matrix());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
visit_expression(ir, op);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The non-recursive part of the expression visitor lives in a separate
|
||||||
|
* function and should be prevented from being inlined, to avoid a stack
|
||||||
|
* explosion when deeply nested expressions are visited.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
|
||||||
|
{
|
||||||
|
st_src_reg result_src;
|
||||||
|
st_dst_reg result_dst;
|
||||||
|
|
||||||
int vector_elements = ir->operands[0]->type->vector_elements;
|
int vector_elements = ir->operands[0]->type->vector_elements;
|
||||||
if (ir->operands[1]) {
|
if (ir->operands[1]) {
|
||||||
vector_elements = MAX2(vector_elements,
|
vector_elements = MAX2(vector_elements,
|
||||||
|
@@ -214,6 +214,12 @@ do { \
|
|||||||
#define MUST_CHECK
|
#define MUST_CHECK
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
|
||||||
|
#define ATTRIBUTE_NOINLINE __attribute__((noinline))
|
||||||
|
#else
|
||||||
|
#define ATTRIBUTE_NOINLINE
|
||||||
|
#endif
|
||||||
|
|
||||||
/** Compute ceiling of integer quotient of A divided by B. */
|
/** Compute ceiling of integer quotient of A divided by B. */
|
||||||
#define DIV_ROUND_UP( A, B ) ( (A) % (B) == 0 ? (A)/(B) : (A)/(B)+1 )
|
#define DIV_ROUND_UP( A, B ) ( (A) % (B) == 0 ? (A)/(B) : (A)/(B)+1 )
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user