Oh yes and missing ALIGN + MAX2 too. I guess we could easily move these to util code. That plus the things I already mentioned should be all needed I think. But I strongly believe either this needs to be done or we should revert it.
Roland Am 24.09.2014 00:44, schrieb Roland Scheidegger: > Actually I would definitely NAK this in the current state if it weren't > too late. > We cannot let mesa includes creep in there as that defeats the purpose > of the independent util code completely. > > I did a quick look and it seems the only include it really needs from > mesa is bitset, which could be moved too. > bitset itself uses a GLuint type, plus has a dependency on ffs which > needs imports.h. > > So I guess there needs to be some more work. > > Roland > > > Am 24.09.2014 00:40, schrieb Brian Paul: >> I just pushed a quick fix to get things compiling again. I'm wondering >> about that dependency too though. >> >> -Brian >> >> On 09/23/2014 04:26 PM, Roland Scheidegger wrote: >>> This change seems to cause compile failure with scons: >>> >>> Compiling src/util/register_allocate.c ... >>> src/util/register_allocate.c:76:26: fatal error: main/imports.h: No such >>> file or directory >>> #include "main/imports.h" >>> >>> Looks like it could be fixed by patching up the CPPPATH in the >>> SConscript file though I don't think it's actually a good idea to have >>> the util code which was meant to be outside of classic mesa to depend on >>> includes in there? >>> >>> >>> Roland >>> >>> >>> Am 23.09.2014 02:32, schrieb Eric Anholt: >>>> The r300 gallium driver is using it outside of the Mesa tree, and I >>>> wanted >>>> to do so for vc4 as well. Rather than make the multiple-definitions >>>> problem even more complicated, just move it to more-shared code. >>>> --- >>>> src/gallium/drivers/r300/Makefile.am | 14 +- >>>> src/gallium/drivers/r300/Makefile.sources | 3 - >>>> .../drivers/r300/compiler/radeon_pair_regalloc.c | 2 +- >>>> src/mesa/Makefile.sources | 1 - >>>> src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- >>>> src/mesa/drivers/dri/i965/brw_fs.h | 2 +- >>>> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +- >>>> .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 2 +- >>>> src/mesa/program/register_allocate.c | 654 >>>> --------------------- >>>> src/mesa/program/register_allocate.h | 79 --- >>>> src/util/Makefile.am | 3 + >>>> src/util/Makefile.sources | 2 + >>>> src/util/register_allocate.c | 654 >>>> +++++++++++++++++++++ >>>> src/util/register_allocate.h | 79 +++ >>>> 14 files changed, 745 insertions(+), 754 deletions(-) >>>> delete mode 100644 src/mesa/program/register_allocate.c >>>> delete mode 100644 src/mesa/program/register_allocate.h >>>> create mode 100644 src/util/register_allocate.c >>>> create mode 100644 src/util/register_allocate.h >>>> >>>> diff --git a/src/gallium/drivers/r300/Makefile.am >>>> b/src/gallium/drivers/r300/Makefile.am >>>> index 7692bd8..ead7a87 100644 >>>> --- a/src/gallium/drivers/r300/Makefile.am >>>> +++ b/src/gallium/drivers/r300/Makefile.am >>>> @@ -13,11 +13,11 @@ AM_CFLAGS = \ >>>> $(LLVM_CFLAGS) \ >>>> $(RADEON_CFLAGS) >>>> >>>> -noinst_LTLIBRARIES = libr300.la libr300-helper.la >>>> +noinst_LTLIBRARIES = libr300.la >>>> check_PROGRAMS = r300_compiler_tests >>>> TESTS = r300_compiler_tests >>>> >>>> -r300_compiler_tests_LDADD = libr300.la libr300-helper.la \ >>>> +r300_compiler_tests_LDADD = libr300.la \ >>>> $(top_builddir)/src/gallium/auxiliary/libgallium.la \ >>>> $(top_builddir)/src/util/libmesautil.la \ >>>> $(GALLIUM_COMMON_LIB_DEPS) >>>> @@ -28,16 +28,6 @@ r300_compiler_tests_SOURCES = >>>> $(COMPILER_TESTS_SOURCES) >>>> >>>> libr300_la_SOURCES = $(C_SOURCES) >>>> >>>> -# These two files are included in libmesagallium, which is included >>>> in the dri >>>> -# targets. So, they were added directly to r300g the dri-r300 target >>>> would have >>>> -# duplicated symbols, and if they weren't the other *-r300 targets >>>> would fail >>>> -# with undefined symbols. >>>> -# >>>> -# Solve this by building them into a separate helper library that >>>> can be linked >>>> -# in place of libmesagallium. >>>> -libr300_helper_la_CPPFLAGS = -I$(top_srcdir)/src >>>> -libr300_helper_la_SOURCES = $(HELPER_SOURCES) >>>> - >>>> EXTRA_DIST = Android.mk \ >>>> compiler/tests/omod_two_writers.test \ >>>> compiler/tests/regalloc_tex_1d_swizzle.test >>>> diff --git a/src/gallium/drivers/r300/Makefile.sources >>>> b/src/gallium/drivers/r300/Makefile.sources >>>> index ab1c9de..1ba6db0 100644 >>>> --- a/src/gallium/drivers/r300/Makefile.sources >>>> +++ b/src/gallium/drivers/r300/Makefile.sources >>>> @@ -108,6 +108,3 @@ COMPILER_TESTS_SOURCES := \ >>>> compiler/tests/rc_test_helpers.h \ >>>> compiler/tests/unit_test.c \ >>>> compiler/tests/unit_test.h >>>> - >>>> -HELPER_SOURCES := \ >>>> - register_allocate.c >>>> diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c >>>> b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c >>>> index b854a2f..64b225d 100644 >>>> --- a/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c >>>> +++ b/src/gallium/drivers/r300/compiler/radeon_pair_regalloc.c >>>> @@ -31,7 +31,7 @@ >>>> #include <stdio.h> >>>> >>>> #include "main/glheader.h" >>>> -#include "program/register_allocate.h" >>>> +#include "util/register_allocate.h" >>>> #include "util/u_memory.h" >>>> #include "util/ralloc.h" >>>> >>>> diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources >>>> index 12336c0..4755018 100644 >>>> --- a/src/mesa/Makefile.sources >>>> +++ b/src/mesa/Makefile.sources >>>> @@ -280,7 +280,6 @@ PROGRAM_FILES = \ >>>> $(SRCDIR)program/prog_print.c \ >>>> $(SRCDIR)program/prog_statevars.c \ >>>> $(SRCDIR)program/programopt.c \ >>>> - $(SRCDIR)program/register_allocate.c \ >>>> $(SRCDIR)program/sampler.cpp \ >>>> $(SRCDIR)program/string_to_uint_map.cpp \ >>>> $(SRCDIR)program/symbol_table.c \ >>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp >>>> b/src/mesa/drivers/dri/i965/brw_fs.cpp >>>> index fa95c81..5b628e0 100644 >>>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp >>>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp >>>> @@ -38,7 +38,7 @@ extern "C" { >>>> #include "main/fbobject.h" >>>> #include "program/prog_parameter.h" >>>> #include "program/prog_print.h" >>>> -#include "program/register_allocate.h" >>>> +#include "util/register_allocate.h" >>>> #include "program/sampler.h" >>>> #include "program/hash_table.h" >>>> #include "brw_context.h" >>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h >>>> b/src/mesa/drivers/dri/i965/brw_fs.h >>>> index d40a2e3..5e0a426 100644 >>>> --- a/src/mesa/drivers/dri/i965/brw_fs.h >>>> +++ b/src/mesa/drivers/dri/i965/brw_fs.h >>>> @@ -39,7 +39,7 @@ extern "C" { >>>> #include "program/prog_parameter.h" >>>> #include "program/prog_print.h" >>>> #include "program/prog_optimize.h" >>>> -#include "program/register_allocate.h" >>>> +#include "util/register_allocate.h" >>>> #include "program/sampler.h" >>>> #include "program/hash_table.h" >>>> #include "brw_context.h" >>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >>>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >>>> index 2d5318a..54643c1 100644 >>>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >>>> @@ -36,7 +36,7 @@ extern "C" { >>>> #include "program/prog_parameter.h" >>>> #include "program/prog_print.h" >>>> #include "program/prog_optimize.h" >>>> -#include "program/register_allocate.h" >>>> +#include "util/register_allocate.h" >>>> #include "program/sampler.h" >>>> #include "program/hash_table.h" >>>> #include "brw_context.h" >>>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp >>>> b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp >>>> index ddab342..29feec0 100644 >>>> --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp >>>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp >>>> @@ -23,7 +23,7 @@ >>>> >>>> extern "C" { >>>> #include "main/macros.h" >>>> -#include "program/register_allocate.h" >>>> +#include "util/register_allocate.h" >>>> } /* extern "C" */ >>>> >>>> #include "brw_vec4.h" >>>> diff --git a/src/mesa/program/register_allocate.c >>>> b/src/mesa/program/register_allocate.c >>>> deleted file mode 100644 >>>> index 7faf672..0000000 >>>> --- a/src/mesa/program/register_allocate.c >>>> +++ /dev/null >>>> @@ -1,654 +0,0 @@ >>>> -/* >>>> - * Copyright © 2010 Intel Corporation >>>> - * >>>> - * Permission is hereby granted, free of charge, to any person >>>> obtaining a >>>> - * copy of this software and associated documentation files (the >>>> "Software"), >>>> - * to deal in the Software without restriction, including without >>>> limitation >>>> - * the rights to use, copy, modify, merge, publish, distribute, >>>> sublicense, >>>> - * and/or sell copies of the Software, and to permit persons to whom >>>> the >>>> - * Software is furnished to do so, subject to the following conditions: >>>> - * >>>> - * The above copyright notice and this permission notice (including >>>> the next >>>> - * paragraph) shall be included in all copies or substantial >>>> portions of the >>>> - * Software. >>>> - * >>>> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >>>> EXPRESS OR >>>> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >>>> MERCHANTABILITY, >>>> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO >>>> EVENT SHALL >>>> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES >>>> OR OTHER >>>> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >>>> ARISING >>>> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >>>> OTHER DEALINGS >>>> - * IN THE SOFTWARE. >>>> - * >>>> - * Authors: >>>> - * Eric Anholt <e...@anholt.net> >>>> - * >>>> - */ >>>> - >>>> -/** @file register_allocate.c >>>> - * >>>> - * Graph-coloring register allocator. >>>> - * >>>> - * The basic idea of graph coloring is to make a node in a graph for >>>> - * every thing that needs a register (color) number assigned, and make >>>> - * edges in the graph between nodes that interfere (can't be allocated >>>> - * to the same register at the same time). >>>> - * >>>> - * During the "simplify" process, any any node with fewer edges than >>>> - * there are registers means that that edge can get assigned a >>>> - * register regardless of what its neighbors choose, so that node is >>>> - * pushed on a stack and removed (with its edges) from the graph. >>>> - * That likely causes other nodes to become trivially colorable as >>>> well. >>>> - * >>>> - * Then during the "select" process, nodes are popped off of that >>>> - * stack, their edges restored, and assigned a color different from >>>> - * their neighbors. Because they were pushed on the stack only when >>>> - * they were trivially colorable, any color chosen won't interfere >>>> - * with the registers to be popped later. >>>> - * >>>> - * The downside to most graph coloring is that real hardware often has >>>> - * limitations, like registers that need to be allocated to a node in >>>> - * pairs, or aligned on some boundary. This implementation follows >>>> - * the paper "Retargetable Graph-Coloring Register Allocation for >>>> - * Irregular Architectures" by Johan Runeson and Sven-Olof Nyström. >>>> - * >>>> - * In this system, there are register classes each containing various >>>> - * registers, and registers may interfere with other registers. For >>>> - * example, one might have a class of base registers, and a class of >>>> - * aligned register pairs that would each interfere with their pair of >>>> - * the base registers. Each node has a register class it needs to be >>>> - * assigned to. Define p(B) to be the size of register class B, and >>>> - * q(B,C) to be the number of registers in B that the worst choice >>>> - * register in C could conflict with. Then, this system replaces the >>>> - * basic graph coloring test of "fewer edges from this node than there >>>> - * are registers" with "For this node of class B, the sum of q(B,C) >>>> - * for each neighbor node of class C is less than pB". >>>> - * >>>> - * A nice feature of the pq test is that q(B,C) can be computed once >>>> - * up front and stored in a 2-dimensional array, so that the cost of >>>> - * coloring a node is constant with the number of registers. We do >>>> - * this during ra_set_finalize(). >>>> - */ >>>> - >>>> -#include <stdbool.h> >>>> - >>>> -#include "util/ralloc.h" >>>> -#include "main/imports.h" >>>> -#include "main/macros.h" >>>> -#include "main/mtypes.h" >>>> -#include "main/bitset.h" >>>> -#include "register_allocate.h" >>>> - >>>> -#define NO_REG ~0 >>>> - >>>> -struct ra_reg { >>>> - BITSET_WORD *conflicts; >>>> - unsigned int *conflict_list; >>>> - unsigned int conflict_list_size; >>>> - unsigned int num_conflicts; >>>> -}; >>>> - >>>> -struct ra_regs { >>>> - struct ra_reg *regs; >>>> - unsigned int count; >>>> - >>>> - struct ra_class **classes; >>>> - unsigned int class_count; >>>> - >>>> - bool round_robin; >>>> -}; >>>> - >>>> -struct ra_class { >>>> - /** >>>> - * Bitset indicating which registers belong to this class. >>>> - * >>>> - * (If bit N is set, then register N belongs to this class.) >>>> - */ >>>> - BITSET_WORD *regs; >>>> - >>>> - /** >>>> - * p(B) in Runeson/Nyström paper. >>>> - * >>>> - * This is "how many regs are in the set." >>>> - */ >>>> - unsigned int p; >>>> - >>>> - /** >>>> - * q(B,C) (indexed by C, B is this register class) in >>>> - * Runeson/Nyström paper. This is "how many registers of B could >>>> - * the worst choice register from C conflict with". >>>> - */ >>>> - unsigned int *q; >>>> -}; >>>> - >>>> -struct ra_node { >>>> - /** @{ >>>> - * >>>> - * List of which nodes this node interferes with. This should be >>>> - * symmetric with the other node. >>>> - */ >>>> - BITSET_WORD *adjacency; >>>> - unsigned int *adjacency_list; >>>> - unsigned int adjacency_list_size; >>>> - unsigned int adjacency_count; >>>> - /** @} */ >>>> - >>>> - unsigned int class; >>>> - >>>> - /* Register, if assigned, or NO_REG. */ >>>> - unsigned int reg; >>>> - >>>> - /** >>>> - * Set when the node is in the trivially colorable stack. When >>>> - * set, the adjacency to this node is ignored, to implement the >>>> - * "remove the edge from the graph" in simplification without >>>> - * having to actually modify the adjacency_list. >>>> - */ >>>> - bool in_stack; >>>> - >>>> - /** >>>> - * The q total, as defined in the Runeson/Nyström paper, for all the >>>> - * interfering nodes not in the stack. >>>> - */ >>>> - unsigned int q_total; >>>> - >>>> - /* For an implementation that needs register spilling, this is the >>>> - * approximate cost of spilling this node. >>>> - */ >>>> - float spill_cost; >>>> -}; >>>> - >>>> -struct ra_graph { >>>> - struct ra_regs *regs; >>>> - /** >>>> - * the variables that need register allocation. >>>> - */ >>>> - struct ra_node *nodes; >>>> - unsigned int count; /**< count of nodes. */ >>>> - >>>> - unsigned int *stack; >>>> - unsigned int stack_count; >>>> -}; >>>> - >>>> -/** >>>> - * Creates a set of registers for the allocator. >>>> - * >>>> - * mem_ctx is a ralloc context for the allocator. The reg set may >>>> be freed >>>> - * using ralloc_free(). >>>> - */ >>>> -struct ra_regs * >>>> -ra_alloc_reg_set(void *mem_ctx, unsigned int count) >>>> -{ >>>> - unsigned int i; >>>> - struct ra_regs *regs; >>>> - >>>> - regs = rzalloc(mem_ctx, struct ra_regs); >>>> - regs->count = count; >>>> - regs->regs = rzalloc_array(regs, struct ra_reg, count); >>>> - >>>> - for (i = 0; i < count; i++) { >>>> - regs->regs[i].conflicts = rzalloc_array(regs->regs, BITSET_WORD, >>>> - BITSET_WORDS(count)); >>>> - BITSET_SET(regs->regs[i].conflicts, i); >>>> - >>>> - regs->regs[i].conflict_list = ralloc_array(regs->regs, >>>> unsigned int, 4); >>>> - regs->regs[i].conflict_list_size = 4; >>>> - regs->regs[i].conflict_list[0] = i; >>>> - regs->regs[i].num_conflicts = 1; >>>> - } >>>> - >>>> - return regs; >>>> -} >>>> - >>>> -/** >>>> - * The register allocator by default prefers to allocate low >>>> register numbers, >>>> - * since it was written for hardware (gen4/5 Intel) that is limited >>>> in its >>>> - * multithreadedness by the number of registers used in a given shader. >>>> - * >>>> - * However, for hardware without that restriction, densely packed >>>> register >>>> - * allocation can put serious constraints on instruction >>>> scheduling. This >>>> - * function tells the allocator to rotate around the registers if >>>> possible as >>>> - * it allocates the nodes. >>>> - */ >>>> -void >>>> -ra_set_allocate_round_robin(struct ra_regs *regs) >>>> -{ >>>> - regs->round_robin = true; >>>> -} >>>> - >>>> -static void >>>> -ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned >>>> int r2) >>>> -{ >>>> - struct ra_reg *reg1 = ®s->regs[r1]; >>>> - >>>> - if (reg1->conflict_list_size == reg1->num_conflicts) { >>>> - reg1->conflict_list_size *= 2; >>>> - reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list, >>>> - unsigned int, reg1->conflict_list_size); >>>> - } >>>> - reg1->conflict_list[reg1->num_conflicts++] = r2; >>>> - BITSET_SET(reg1->conflicts, r2); >>>> -} >>>> - >>>> -void >>>> -ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned >>>> int r2) >>>> -{ >>>> - if (!BITSET_TEST(regs->regs[r1].conflicts, r2)) { >>>> - ra_add_conflict_list(regs, r1, r2); >>>> - ra_add_conflict_list(regs, r2, r1); >>>> - } >>>> -} >>>> - >>>> -/** >>>> - * Adds a conflict between base_reg and reg, and also between reg and >>>> - * anything that base_reg conflicts with. >>>> - * >>>> - * This can simplify code for setting up multiple register classes >>>> - * which are aggregates of some base hardware registers, compared to >>>> - * explicitly using ra_add_reg_conflict. >>>> - */ >>>> -void >>>> -ra_add_transitive_reg_conflict(struct ra_regs *regs, >>>> - unsigned int base_reg, unsigned int reg) >>>> -{ >>>> - int i; >>>> - >>>> - ra_add_reg_conflict(regs, reg, base_reg); >>>> - >>>> - for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) { >>>> - ra_add_reg_conflict(regs, reg, >>>> regs->regs[base_reg].conflict_list[i]); >>>> - } >>>> -} >>>> - >>>> -unsigned int >>>> -ra_alloc_reg_class(struct ra_regs *regs) >>>> -{ >>>> - struct ra_class *class; >>>> - >>>> - regs->classes = reralloc(regs->regs, regs->classes, struct >>>> ra_class *, >>>> - regs->class_count + 1); >>>> - >>>> - class = rzalloc(regs, struct ra_class); >>>> - regs->classes[regs->class_count] = class; >>>> - >>>> - class->regs = rzalloc_array(class, BITSET_WORD, >>>> BITSET_WORDS(regs->count)); >>>> - >>>> - return regs->class_count++; >>>> -} >>>> - >>>> -void >>>> -ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r) >>>> -{ >>>> - struct ra_class *class = regs->classes[c]; >>>> - >>>> - BITSET_SET(class->regs, r); >>>> - class->p++; >>>> -} >>>> - >>>> -/** >>>> - * Returns true if the register belongs to the given class. >>>> - */ >>>> -static bool >>>> -reg_belongs_to_class(unsigned int r, struct ra_class *c) >>>> -{ >>>> - return BITSET_TEST(c->regs, r); >>>> -} >>>> - >>>> -/** >>>> - * Must be called after all conflicts and register classes have been >>>> - * set up and before the register set is used for allocation. >>>> - * To avoid costly q value computation, use the q_values paramater >>>> - * to pass precomputed q values to this function. >>>> - */ >>>> -void >>>> -ra_set_finalize(struct ra_regs *regs, unsigned int **q_values) >>>> -{ >>>> - unsigned int b, c; >>>> - >>>> - for (b = 0; b < regs->class_count; b++) { >>>> - regs->classes[b]->q = ralloc_array(regs, unsigned int, >>>> regs->class_count); >>>> - } >>>> - >>>> - if (q_values) { >>>> - for (b = 0; b < regs->class_count; b++) { >>>> - for (c = 0; c < regs->class_count; c++) { >>>> - regs->classes[b]->q[c] = q_values[b][c]; >>>> - } >>>> - } >>>> - return; >>>> - } >>>> - >>>> - /* Compute, for each class B and C, how many regs of B an >>>> - * allocation to C could conflict with. >>>> - */ >>>> - for (b = 0; b < regs->class_count; b++) { >>>> - for (c = 0; c < regs->class_count; c++) { >>>> - unsigned int rc; >>>> - int max_conflicts = 0; >>>> - >>>> - for (rc = 0; rc < regs->count; rc++) { >>>> - int conflicts = 0; >>>> - int i; >>>> - >>>> - if (!reg_belongs_to_class(rc, regs->classes[c])) >>>> - continue; >>>> - >>>> - for (i = 0; i < regs->regs[rc].num_conflicts; i++) { >>>> - unsigned int rb = regs->regs[rc].conflict_list[i]; >>>> - if (BITSET_TEST(regs->classes[b]->regs, rb)) >>>> - conflicts++; >>>> - } >>>> - max_conflicts = MAX2(max_conflicts, conflicts); >>>> - } >>>> - regs->classes[b]->q[c] = max_conflicts; >>>> - } >>>> - } >>>> -} >>>> - >>>> -static void >>>> -ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned >>>> int n2) >>>> -{ >>>> - BITSET_SET(g->nodes[n1].adjacency, n2); >>>> - >>>> - if (n1 != n2) { >>>> - int n1_class = g->nodes[n1].class; >>>> - int n2_class = g->nodes[n2].class; >>>> - g->nodes[n1].q_total += g->regs->classes[n1_class]->q[n2_class]; >>>> - } >>>> - >>>> - if (g->nodes[n1].adjacency_count >= >>>> - g->nodes[n1].adjacency_list_size) { >>>> - g->nodes[n1].adjacency_list_size *= 2; >>>> - g->nodes[n1].adjacency_list = reralloc(g, >>>> g->nodes[n1].adjacency_list, >>>> - unsigned int, >>>> - >>>> g->nodes[n1].adjacency_list_size); >>>> - } >>>> - >>>> - g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2; >>>> - g->nodes[n1].adjacency_count++; >>>> -} >>>> - >>>> -struct ra_graph * >>>> -ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) >>>> -{ >>>> - struct ra_graph *g; >>>> - unsigned int i; >>>> - >>>> - g = rzalloc(regs, struct ra_graph); >>>> - g->regs = regs; >>>> - g->nodes = rzalloc_array(g, struct ra_node, count); >>>> - g->count = count; >>>> - >>>> - g->stack = rzalloc_array(g, unsigned int, count); >>>> - >>>> - for (i = 0; i < count; i++) { >>>> - int bitset_count = BITSET_WORDS(count); >>>> - g->nodes[i].adjacency = rzalloc_array(g, BITSET_WORD, >>>> bitset_count); >>>> - >>>> - g->nodes[i].adjacency_list_size = 4; >>>> - g->nodes[i].adjacency_list = >>>> - ralloc_array(g, unsigned int, >>>> g->nodes[i].adjacency_list_size); >>>> - g->nodes[i].adjacency_count = 0; >>>> - g->nodes[i].q_total = 0; >>>> - >>>> - ra_add_node_adjacency(g, i, i); >>>> - g->nodes[i].reg = NO_REG; >>>> - } >>>> - >>>> - return g; >>>> -} >>>> - >>>> -void >>>> -ra_set_node_class(struct ra_graph *g, >>>> - unsigned int n, unsigned int class) >>>> -{ >>>> - g->nodes[n].class = class; >>>> -} >>>> - >>>> -void >>>> -ra_add_node_interference(struct ra_graph *g, >>>> - unsigned int n1, unsigned int n2) >>>> -{ >>>> - if (!BITSET_TEST(g->nodes[n1].adjacency, n2)) { >>>> - ra_add_node_adjacency(g, n1, n2); >>>> - ra_add_node_adjacency(g, n2, n1); >>>> - } >>>> -} >>>> - >>>> -static bool >>>> -pq_test(struct ra_graph *g, unsigned int n) >>>> -{ >>>> - int n_class = g->nodes[n].class; >>>> - >>>> - return g->nodes[n].q_total < g->regs->classes[n_class]->p; >>>> -} >>>> - >>>> -static void >>>> -decrement_q(struct ra_graph *g, unsigned int n) >>>> -{ >>>> - unsigned int i; >>>> - int n_class = g->nodes[n].class; >>>> - >>>> - for (i = 0; i < g->nodes[n].adjacency_count; i++) { >>>> - unsigned int n2 = g->nodes[n].adjacency_list[i]; >>>> - unsigned int n2_class = g->nodes[n2].class; >>>> - >>>> - if (n != n2 && !g->nodes[n2].in_stack) { >>>> - assert(g->nodes[n2].q_total >= >>>> g->regs->classes[n2_class]->q[n_class]); >>>> - g->nodes[n2].q_total -= g->regs->classes[n2_class]->q[n_class]; >>>> - } >>>> - } >>>> -} >>>> - >>>> -/** >>>> - * Simplifies the interference graph by pushing all >>>> - * trivially-colorable nodes into a stack of nodes to be colored, >>>> - * removing them from the graph, and rinsing and repeating. >>>> - * >>>> - * If we encounter a case where we can't push any nodes on the >>>> stack, then >>>> - * we optimistically choose a node and push it on the stack. We >>>> heuristically >>>> - * push the node with the lowest total q value, since it has the fewest >>>> - * neighbors and therefore is most likely to be allocated. >>>> - */ >>>> -static void >>>> -ra_simplify(struct ra_graph *g) >>>> -{ >>>> - bool progress = true; >>>> - int i; >>>> - >>>> - while (progress) { >>>> - unsigned int best_optimistic_node = ~0; >>>> - unsigned int lowest_q_total = ~0; >>>> - >>>> - progress = false; >>>> - >>>> - for (i = g->count - 1; i >= 0; i--) { >>>> - if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG) >>>> - continue; >>>> - >>>> - if (pq_test(g, i)) { >>>> - decrement_q(g, i); >>>> - g->stack[g->stack_count] = i; >>>> - g->stack_count++; >>>> - g->nodes[i].in_stack = true; >>>> - progress = true; >>>> - } else { >>>> - unsigned int new_q_total = g->nodes[i].q_total; >>>> - if (new_q_total < lowest_q_total) { >>>> - best_optimistic_node = i; >>>> - lowest_q_total = new_q_total; >>>> - } >>>> - } >>>> - } >>>> - >>>> - if (!progress && best_optimistic_node != ~0) { >>>> - decrement_q(g, best_optimistic_node); >>>> - g->stack[g->stack_count] = best_optimistic_node; >>>> - g->stack_count++; >>>> - g->nodes[best_optimistic_node].in_stack = true; >>>> - progress = true; >>>> - } >>>> - } >>>> -} >>>> - >>>> -/** >>>> - * Pops nodes from the stack back into the graph, coloring them with >>>> - * registers as they go. >>>> - * >>>> - * If all nodes were trivially colorable, then this must succeed. If >>>> - * not (optimistic coloring), then it may return false; >>>> - */ >>>> -static bool >>>> -ra_select(struct ra_graph *g) >>>> -{ >>>> - int i; >>>> - int start_search_reg = 0; >>>> - >>>> - while (g->stack_count != 0) { >>>> - unsigned int ri; >>>> - unsigned int r = -1; >>>> - int n = g->stack[g->stack_count - 1]; >>>> - struct ra_class *c = g->regs->classes[g->nodes[n].class]; >>>> - >>>> - /* Find the lowest-numbered reg which is not used by a member >>>> - * of the graph adjacent to us. >>>> - */ >>>> - for (ri = 0; ri < g->regs->count; ri++) { >>>> - r = (start_search_reg + ri) % g->regs->count; >>>> - if (!reg_belongs_to_class(r, c)) >>>> - continue; >>>> - >>>> - /* Check if any of our neighbors conflict with this register >>>> choice. */ >>>> - for (i = 0; i < g->nodes[n].adjacency_count; i++) { >>>> - unsigned int n2 = g->nodes[n].adjacency_list[i]; >>>> - >>>> - if (!g->nodes[n2].in_stack && >>>> - BITSET_TEST(g->regs->regs[r].conflicts, g->nodes[n2].reg)) { >>>> - break; >>>> - } >>>> - } >>>> - if (i == g->nodes[n].adjacency_count) >>>> - break; >>>> - } >>>> - >>>> - /* set this to false even if we return here so that >>>> - * ra_get_best_spill_node() considers this node later. >>>> - */ >>>> - g->nodes[n].in_stack = false; >>>> - >>>> - if (ri == g->regs->count) >>>> - return false; >>>> - >>>> - g->nodes[n].reg = r; >>>> - g->stack_count--; >>>> - >>>> - if (g->regs->round_robin) >>>> - start_search_reg = r + 1; >>>> - } >>>> - >>>> - return true; >>>> -} >>>> - >>>> -bool >>>> -ra_allocate(struct ra_graph *g) >>>> -{ >>>> - ra_simplify(g); >>>> - return ra_select(g); >>>> -} >>>> - >>>> -unsigned int >>>> -ra_get_node_reg(struct ra_graph *g, unsigned int n) >>>> -{ >>>> - return g->nodes[n].reg; >>>> -} >>>> - >>>> -/** >>>> - * Forces a node to a specific register. This can be used to avoid >>>> - * creating a register class containing one node when handling data >>>> - * that must live in a fixed location and is known to not conflict >>>> - * with other forced register assignment (as is common with shader >>>> - * input data). These nodes do not end up in the stack during >>>> - * ra_simplify(), and thus at ra_select() time it is as if they were >>>> - * the first popped off the stack and assigned their fixed locations. >>>> - * Nodes that use this function do not need to be assigned a register >>>> - * class. >>>> - * >>>> - * Must be called before ra_simplify(). >>>> - */ >>>> -void >>>> -ra_set_node_reg(struct ra_graph *g, unsigned int n, unsigned int reg) >>>> -{ >>>> - g->nodes[n].reg = reg; >>>> - g->nodes[n].in_stack = false; >>>> -} >>>> - >>>> -static float >>>> -ra_get_spill_benefit(struct ra_graph *g, unsigned int n) >>>> -{ >>>> - int j; >>>> - float benefit = 0; >>>> - int n_class = g->nodes[n].class; >>>> - >>>> - /* Define the benefit of eliminating an interference between n, n2 >>>> - * through spilling as q(C, B) / p(C). This is similar to the >>>> - * "count number of edges" approach of traditional graph coloring, >>>> - * but takes classes into account. >>>> - */ >>>> - for (j = 0; j < g->nodes[n].adjacency_count; j++) { >>>> - unsigned int n2 = g->nodes[n].adjacency_list[j]; >>>> - if (n != n2) { >>>> - unsigned int n2_class = g->nodes[n2].class; >>>> - benefit += ((float)g->regs->classes[n_class]->q[n2_class] / >>>> - g->regs->classes[n_class]->p); >>>> - } >>>> - } >>>> - >>>> - return benefit; >>>> -} >>>> - >>>> -/** >>>> - * Returns a node number to be spilled according to the cost/benefit >>>> using >>>> - * the pq test, or -1 if there are no spillable nodes. >>>> - */ >>>> -int >>>> -ra_get_best_spill_node(struct ra_graph *g) >>>> -{ >>>> - unsigned int best_node = -1; >>>> - float best_benefit = 0.0; >>>> - unsigned int n; >>>> - >>>> - /* Consider any nodes that we colored successfully or the node we >>>> failed to >>>> - * color for spilling. When we failed to color a node in >>>> ra_select(), we >>>> - * only considered these nodes, so spilling any other ones would >>>> not result >>>> - * in us making progress. >>>> - */ >>>> - for (n = 0; n < g->count; n++) { >>>> - float cost = g->nodes[n].spill_cost; >>>> - float benefit; >>>> - >>>> - if (cost <= 0.0) >>>> - continue; >>>> - >>>> - if (g->nodes[n].in_stack) >>>> - continue; >>>> - >>>> - benefit = ra_get_spill_benefit(g, n); >>>> - >>>> - if (benefit / cost > best_benefit) { >>>> - best_benefit = benefit / cost; >>>> - best_node = n; >>>> - } >>>> - } >>>> - >>>> - return best_node; >>>> -} >>>> - >>>> -/** >>>> - * Only nodes with a spill cost set (cost != 0.0) will be considered >>>> - * for register spilling. >>>> - */ >>>> -void >>>> -ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost) >>>> -{ >>>> - g->nodes[n].spill_cost = cost; >>>> -} >>>> diff --git a/src/mesa/program/register_allocate.h >>>> b/src/mesa/program/register_allocate.h >>>> deleted file mode 100644 >>>> index dc68744..0000000 >>>> --- a/src/mesa/program/register_allocate.h >>>> +++ /dev/null >>>> @@ -1,79 +0,0 @@ >>>> -/* >>>> - * Copyright © 2010 Intel Corporation >>>> - * >>>> - * Permission is hereby granted, free of charge, to any person >>>> obtaining a >>>> - * copy of this software and associated documentation files (the >>>> "Software"), >>>> - * to deal in the Software without restriction, including without >>>> limitation >>>> - * the rights to use, copy, modify, merge, publish, distribute, >>>> sublicense, >>>> - * and/or sell copies of the Software, and to permit persons to whom >>>> the >>>> - * Software is furnished to do so, subject to the following conditions: >>>> - * >>>> - * The above copyright notice and this permission notice (including >>>> the next >>>> - * paragraph) shall be included in all copies or substantial >>>> portions of the >>>> - * Software. >>>> - * >>>> - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >>>> EXPRESS OR >>>> - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >>>> MERCHANTABILITY, >>>> - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO >>>> EVENT SHALL >>>> - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES >>>> OR OTHER >>>> - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >>>> ARISING >>>> - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >>>> OTHER DEALINGS >>>> - * IN THE SOFTWARE. >>>> - * >>>> - * Authors: >>>> - * Eric Anholt <e...@anholt.net> >>>> - * >>>> - */ >>>> - >>>> -#include <stdbool.h> >>>> - >>>> -struct ra_class; >>>> -struct ra_regs; >>>> - >>>> -/* @{ >>>> - * Register set setup. >>>> - * >>>> - * This should be done once at backend initializaion, as >>>> - * ra_set_finalize is O(r^2*c^2). The registers may be virtual >>>> - * registers, such as aligned register pairs that conflict with the >>>> - * two real registers from which they are composed. >>>> - */ >>>> -struct ra_regs *ra_alloc_reg_set(void *mem_ctx, unsigned int count); >>>> -void ra_set_allocate_round_robin(struct ra_regs *regs); >>>> -unsigned int ra_alloc_reg_class(struct ra_regs *regs); >>>> -void ra_add_reg_conflict(struct ra_regs *regs, >>>> - unsigned int r1, unsigned int r2); >>>> -void ra_add_transitive_reg_conflict(struct ra_regs *regs, >>>> - unsigned int base_reg, unsigned int reg); >>>> -void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned >>>> int reg); >>>> -void ra_set_num_conflicts(struct ra_regs *regs, unsigned int class_a, >>>> - unsigned int class_b, unsigned int >>>> num_conflicts); >>>> -void ra_set_finalize(struct ra_regs *regs, unsigned int **conflicts); >>>> -/** @} */ >>>> - >>>> -/** @{ Interference graph setup. >>>> - * >>>> - * Each interference graph node is a virtual variable in the IL. It >>>> - * is up to the user to ra_set_node_class() for the virtual variable, >>>> - * and compute live ranges and ra_node_interfere() between conflicting >>>> - * live ranges. Note that an interference *must not* be added between >>>> - * two nodes if their classes haven't been assigned yet. The user >>>> - * should set the class of each node before building the interference >>>> - * graph. >>>> - */ >>>> -struct ra_graph *ra_alloc_interference_graph(struct ra_regs *regs, >>>> - unsigned int count); >>>> -void ra_set_node_class(struct ra_graph *g, unsigned int n, unsigned >>>> int c); >>>> -void ra_add_node_interference(struct ra_graph *g, >>>> - unsigned int n1, unsigned int n2); >>>> -/** @} */ >>>> - >>>> -/** @{ Graph-coloring register allocation */ >>>> -bool ra_allocate(struct ra_graph *g); >>>> - >>>> -unsigned int ra_get_node_reg(struct ra_graph *g, unsigned int n); >>>> -void ra_set_node_reg(struct ra_graph * g, unsigned int n, unsigned >>>> int reg); >>>> -void ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, >>>> float cost); >>>> -int ra_get_best_spill_node(struct ra_graph *g); >>>> -/** @} */ >>>> - >>>> diff --git a/src/util/Makefile.am b/src/util/Makefile.am >>>> index 4733a1a..8d5f90e 100644 >>>> --- a/src/util/Makefile.am >>>> +++ b/src/util/Makefile.am >>>> @@ -28,6 +28,9 @@ noinst_LTLIBRARIES = libmesautil.la >>>> libmesautil_la_CPPFLAGS = \ >>>> $(DEFINES) \ >>>> -I$(top_srcdir)/include \ >>>> + -I$(top_srcdir)/src \ >>>> + -I$(top_srcdir)/src/mapi \ >>>> + -I$(top_srcdir)/src/mesa \ >>>> $(VISIBILITY_CFLAGS) >>>> >>>> libmesautil_la_SOURCES = \ >>>> diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources >>>> index c34475a..952b799 100644 >>>> --- a/src/util/Makefile.sources >>>> +++ b/src/util/Makefile.sources >>>> @@ -1,6 +1,8 @@ >>>> MESA_UTIL_FILES := \ >>>> hash_table.c \ >>>> ralloc.c \ >>>> + register_allocate.c \ >>>> + register_allocate.h \ >>>> rgtc.c >>>> >>>> MESA_UTIL_GENERATED_FILES = \ >>>> diff --git a/src/util/register_allocate.c b/src/util/register_allocate.c >>>> new file mode 100644 >>>> index 0000000..7faf672 >>>> --- /dev/null >>>> +++ b/src/util/register_allocate.c >>>> @@ -0,0 +1,654 @@ >>>> +/* >>>> + * Copyright © 2010 Intel Corporation >>>> + * >>>> + * Permission is hereby granted, free of charge, to any person >>>> obtaining a >>>> + * copy of this software and associated documentation files (the >>>> "Software"), >>>> + * to deal in the Software without restriction, including without >>>> limitation >>>> + * the rights to use, copy, modify, merge, publish, distribute, >>>> sublicense, >>>> + * and/or sell copies of the Software, and to permit persons to whom >>>> the >>>> + * Software is furnished to do so, subject to the following conditions: >>>> + * >>>> + * The above copyright notice and this permission notice (including >>>> the next >>>> + * paragraph) shall be included in all copies or substantial >>>> portions of the >>>> + * Software. >>>> + * >>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >>>> EXPRESS OR >>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >>>> MERCHANTABILITY, >>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO >>>> EVENT SHALL >>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES >>>> OR OTHER >>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >>>> ARISING >>>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >>>> OTHER DEALINGS >>>> + * IN THE SOFTWARE. >>>> + * >>>> + * Authors: >>>> + * Eric Anholt <e...@anholt.net> >>>> + * >>>> + */ >>>> + >>>> +/** @file register_allocate.c >>>> + * >>>> + * Graph-coloring register allocator. >>>> + * >>>> + * The basic idea of graph coloring is to make a node in a graph for >>>> + * every thing that needs a register (color) number assigned, and make >>>> + * edges in the graph between nodes that interfere (can't be allocated >>>> + * to the same register at the same time). >>>> + * >>>> + * During the "simplify" process, any any node with fewer edges than >>>> + * there are registers means that that edge can get assigned a >>>> + * register regardless of what its neighbors choose, so that node is >>>> + * pushed on a stack and removed (with its edges) from the graph. >>>> + * That likely causes other nodes to become trivially colorable as >>>> well. >>>> + * >>>> + * Then during the "select" process, nodes are popped off of that >>>> + * stack, their edges restored, and assigned a color different from >>>> + * their neighbors. Because they were pushed on the stack only when >>>> + * they were trivially colorable, any color chosen won't interfere >>>> + * with the registers to be popped later. >>>> + * >>>> + * The downside to most graph coloring is that real hardware often has >>>> + * limitations, like registers that need to be allocated to a node in >>>> + * pairs, or aligned on some boundary. This implementation follows >>>> + * the paper "Retargetable Graph-Coloring Register Allocation for >>>> + * Irregular Architectures" by Johan Runeson and Sven-Olof Nyström. >>>> + * >>>> + * In this system, there are register classes each containing various >>>> + * registers, and registers may interfere with other registers. For >>>> + * example, one might have a class of base registers, and a class of >>>> + * aligned register pairs that would each interfere with their pair of >>>> + * the base registers. Each node has a register class it needs to be >>>> + * assigned to. Define p(B) to be the size of register class B, and >>>> + * q(B,C) to be the number of registers in B that the worst choice >>>> + * register in C could conflict with. Then, this system replaces the >>>> + * basic graph coloring test of "fewer edges from this node than there >>>> + * are registers" with "For this node of class B, the sum of q(B,C) >>>> + * for each neighbor node of class C is less than pB". >>>> + * >>>> + * A nice feature of the pq test is that q(B,C) can be computed once >>>> + * up front and stored in a 2-dimensional array, so that the cost of >>>> + * coloring a node is constant with the number of registers. We do >>>> + * this during ra_set_finalize(). >>>> + */ >>>> + >>>> +#include <stdbool.h> >>>> + >>>> +#include "util/ralloc.h" >>>> +#include "main/imports.h" >>>> +#include "main/macros.h" >>>> +#include "main/mtypes.h" >>>> +#include "main/bitset.h" >>>> +#include "register_allocate.h" >>>> + >>>> +#define NO_REG ~0 >>>> + >>>> +struct ra_reg { >>>> + BITSET_WORD *conflicts; >>>> + unsigned int *conflict_list; >>>> + unsigned int conflict_list_size; >>>> + unsigned int num_conflicts; >>>> +}; >>>> + >>>> +struct ra_regs { >>>> + struct ra_reg *regs; >>>> + unsigned int count; >>>> + >>>> + struct ra_class **classes; >>>> + unsigned int class_count; >>>> + >>>> + bool round_robin; >>>> +}; >>>> + >>>> +struct ra_class { >>>> + /** >>>> + * Bitset indicating which registers belong to this class. >>>> + * >>>> + * (If bit N is set, then register N belongs to this class.) >>>> + */ >>>> + BITSET_WORD *regs; >>>> + >>>> + /** >>>> + * p(B) in Runeson/Nyström paper. >>>> + * >>>> + * This is "how many regs are in the set." >>>> + */ >>>> + unsigned int p; >>>> + >>>> + /** >>>> + * q(B,C) (indexed by C, B is this register class) in >>>> + * Runeson/Nyström paper. This is "how many registers of B could >>>> + * the worst choice register from C conflict with". >>>> + */ >>>> + unsigned int *q; >>>> +}; >>>> + >>>> +struct ra_node { >>>> + /** @{ >>>> + * >>>> + * List of which nodes this node interferes with. This should be >>>> + * symmetric with the other node. >>>> + */ >>>> + BITSET_WORD *adjacency; >>>> + unsigned int *adjacency_list; >>>> + unsigned int adjacency_list_size; >>>> + unsigned int adjacency_count; >>>> + /** @} */ >>>> + >>>> + unsigned int class; >>>> + >>>> + /* Register, if assigned, or NO_REG. */ >>>> + unsigned int reg; >>>> + >>>> + /** >>>> + * Set when the node is in the trivially colorable stack. When >>>> + * set, the adjacency to this node is ignored, to implement the >>>> + * "remove the edge from the graph" in simplification without >>>> + * having to actually modify the adjacency_list. >>>> + */ >>>> + bool in_stack; >>>> + >>>> + /** >>>> + * The q total, as defined in the Runeson/Nyström paper, for all the >>>> + * interfering nodes not in the stack. >>>> + */ >>>> + unsigned int q_total; >>>> + >>>> + /* For an implementation that needs register spilling, this is the >>>> + * approximate cost of spilling this node. >>>> + */ >>>> + float spill_cost; >>>> +}; >>>> + >>>> +struct ra_graph { >>>> + struct ra_regs *regs; >>>> + /** >>>> + * the variables that need register allocation. >>>> + */ >>>> + struct ra_node *nodes; >>>> + unsigned int count; /**< count of nodes. */ >>>> + >>>> + unsigned int *stack; >>>> + unsigned int stack_count; >>>> +}; >>>> + >>>> +/** >>>> + * Creates a set of registers for the allocator. >>>> + * >>>> + * mem_ctx is a ralloc context for the allocator. The reg set may >>>> be freed >>>> + * using ralloc_free(). >>>> + */ >>>> +struct ra_regs * >>>> +ra_alloc_reg_set(void *mem_ctx, unsigned int count) >>>> +{ >>>> + unsigned int i; >>>> + struct ra_regs *regs; >>>> + >>>> + regs = rzalloc(mem_ctx, struct ra_regs); >>>> + regs->count = count; >>>> + regs->regs = rzalloc_array(regs, struct ra_reg, count); >>>> + >>>> + for (i = 0; i < count; i++) { >>>> + regs->regs[i].conflicts = rzalloc_array(regs->regs, BITSET_WORD, >>>> + BITSET_WORDS(count)); >>>> + BITSET_SET(regs->regs[i].conflicts, i); >>>> + >>>> + regs->regs[i].conflict_list = ralloc_array(regs->regs, >>>> unsigned int, 4); >>>> + regs->regs[i].conflict_list_size = 4; >>>> + regs->regs[i].conflict_list[0] = i; >>>> + regs->regs[i].num_conflicts = 1; >>>> + } >>>> + >>>> + return regs; >>>> +} >>>> + >>>> +/** >>>> + * The register allocator by default prefers to allocate low >>>> register numbers, >>>> + * since it was written for hardware (gen4/5 Intel) that is limited >>>> in its >>>> + * multithreadedness by the number of registers used in a given shader. >>>> + * >>>> + * However, for hardware without that restriction, densely packed >>>> register >>>> + * allocation can put serious constraints on instruction >>>> scheduling. This >>>> + * function tells the allocator to rotate around the registers if >>>> possible as >>>> + * it allocates the nodes. >>>> + */ >>>> +void >>>> +ra_set_allocate_round_robin(struct ra_regs *regs) >>>> +{ >>>> + regs->round_robin = true; >>>> +} >>>> + >>>> +static void >>>> +ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned >>>> int r2) >>>> +{ >>>> + struct ra_reg *reg1 = ®s->regs[r1]; >>>> + >>>> + if (reg1->conflict_list_size == reg1->num_conflicts) { >>>> + reg1->conflict_list_size *= 2; >>>> + reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list, >>>> + unsigned int, reg1->conflict_list_size); >>>> + } >>>> + reg1->conflict_list[reg1->num_conflicts++] = r2; >>>> + BITSET_SET(reg1->conflicts, r2); >>>> +} >>>> + >>>> +void >>>> +ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned >>>> int r2) >>>> +{ >>>> + if (!BITSET_TEST(regs->regs[r1].conflicts, r2)) { >>>> + ra_add_conflict_list(regs, r1, r2); >>>> + ra_add_conflict_list(regs, r2, r1); >>>> + } >>>> +} >>>> + >>>> +/** >>>> + * Adds a conflict between base_reg and reg, and also between reg and >>>> + * anything that base_reg conflicts with. >>>> + * >>>> + * This can simplify code for setting up multiple register classes >>>> + * which are aggregates of some base hardware registers, compared to >>>> + * explicitly using ra_add_reg_conflict. >>>> + */ >>>> +void >>>> +ra_add_transitive_reg_conflict(struct ra_regs *regs, >>>> + unsigned int base_reg, unsigned int reg) >>>> +{ >>>> + int i; >>>> + >>>> + ra_add_reg_conflict(regs, reg, base_reg); >>>> + >>>> + for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) { >>>> + ra_add_reg_conflict(regs, reg, >>>> regs->regs[base_reg].conflict_list[i]); >>>> + } >>>> +} >>>> + >>>> +unsigned int >>>> +ra_alloc_reg_class(struct ra_regs *regs) >>>> +{ >>>> + struct ra_class *class; >>>> + >>>> + regs->classes = reralloc(regs->regs, regs->classes, struct >>>> ra_class *, >>>> + regs->class_count + 1); >>>> + >>>> + class = rzalloc(regs, struct ra_class); >>>> + regs->classes[regs->class_count] = class; >>>> + >>>> + class->regs = rzalloc_array(class, BITSET_WORD, >>>> BITSET_WORDS(regs->count)); >>>> + >>>> + return regs->class_count++; >>>> +} >>>> + >>>> +void >>>> +ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r) >>>> +{ >>>> + struct ra_class *class = regs->classes[c]; >>>> + >>>> + BITSET_SET(class->regs, r); >>>> + class->p++; >>>> +} >>>> + >>>> +/** >>>> + * Returns true if the register belongs to the given class. >>>> + */ >>>> +static bool >>>> +reg_belongs_to_class(unsigned int r, struct ra_class *c) >>>> +{ >>>> + return BITSET_TEST(c->regs, r); >>>> +} >>>> + >>>> +/** >>>> + * Must be called after all conflicts and register classes have been >>>> + * set up and before the register set is used for allocation. >>>> + * To avoid costly q value computation, use the q_values paramater >>>> + * to pass precomputed q values to this function. >>>> + */ >>>> +void >>>> +ra_set_finalize(struct ra_regs *regs, unsigned int **q_values) >>>> +{ >>>> + unsigned int b, c; >>>> + >>>> + for (b = 0; b < regs->class_count; b++) { >>>> + regs->classes[b]->q = ralloc_array(regs, unsigned int, >>>> regs->class_count); >>>> + } >>>> + >>>> + if (q_values) { >>>> + for (b = 0; b < regs->class_count; b++) { >>>> + for (c = 0; c < regs->class_count; c++) { >>>> + regs->classes[b]->q[c] = q_values[b][c]; >>>> + } >>>> + } >>>> + return; >>>> + } >>>> + >>>> + /* Compute, for each class B and C, how many regs of B an >>>> + * allocation to C could conflict with. >>>> + */ >>>> + for (b = 0; b < regs->class_count; b++) { >>>> + for (c = 0; c < regs->class_count; c++) { >>>> + unsigned int rc; >>>> + int max_conflicts = 0; >>>> + >>>> + for (rc = 0; rc < regs->count; rc++) { >>>> + int conflicts = 0; >>>> + int i; >>>> + >>>> + if (!reg_belongs_to_class(rc, regs->classes[c])) >>>> + continue; >>>> + >>>> + for (i = 0; i < regs->regs[rc].num_conflicts; i++) { >>>> + unsigned int rb = regs->regs[rc].conflict_list[i]; >>>> + if (BITSET_TEST(regs->classes[b]->regs, rb)) >>>> + conflicts++; >>>> + } >>>> + max_conflicts = MAX2(max_conflicts, conflicts); >>>> + } >>>> + regs->classes[b]->q[c] = max_conflicts; >>>> + } >>>> + } >>>> +} >>>> + >>>> +static void >>>> +ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned >>>> int n2) >>>> +{ >>>> + BITSET_SET(g->nodes[n1].adjacency, n2); >>>> + >>>> + if (n1 != n2) { >>>> + int n1_class = g->nodes[n1].class; >>>> + int n2_class = g->nodes[n2].class; >>>> + g->nodes[n1].q_total += g->regs->classes[n1_class]->q[n2_class]; >>>> + } >>>> + >>>> + if (g->nodes[n1].adjacency_count >= >>>> + g->nodes[n1].adjacency_list_size) { >>>> + g->nodes[n1].adjacency_list_size *= 2; >>>> + g->nodes[n1].adjacency_list = reralloc(g, >>>> g->nodes[n1].adjacency_list, >>>> + unsigned int, >>>> + >>>> g->nodes[n1].adjacency_list_size); >>>> + } >>>> + >>>> + g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2; >>>> + g->nodes[n1].adjacency_count++; >>>> +} >>>> + >>>> +struct ra_graph * >>>> +ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) >>>> +{ >>>> + struct ra_graph *g; >>>> + unsigned int i; >>>> + >>>> + g = rzalloc(regs, struct ra_graph); >>>> + g->regs = regs; >>>> + g->nodes = rzalloc_array(g, struct ra_node, count); >>>> + g->count = count; >>>> + >>>> + g->stack = rzalloc_array(g, unsigned int, count); >>>> + >>>> + for (i = 0; i < count; i++) { >>>> + int bitset_count = BITSET_WORDS(count); >>>> + g->nodes[i].adjacency = rzalloc_array(g, BITSET_WORD, >>>> bitset_count); >>>> + >>>> + g->nodes[i].adjacency_list_size = 4; >>>> + g->nodes[i].adjacency_list = >>>> + ralloc_array(g, unsigned int, >>>> g->nodes[i].adjacency_list_size); >>>> + g->nodes[i].adjacency_count = 0; >>>> + g->nodes[i].q_total = 0; >>>> + >>>> + ra_add_node_adjacency(g, i, i); >>>> + g->nodes[i].reg = NO_REG; >>>> + } >>>> + >>>> + return g; >>>> +} >>>> + >>>> +void >>>> +ra_set_node_class(struct ra_graph *g, >>>> + unsigned int n, unsigned int class) >>>> +{ >>>> + g->nodes[n].class = class; >>>> +} >>>> + >>>> +void >>>> +ra_add_node_interference(struct ra_graph *g, >>>> + unsigned int n1, unsigned int n2) >>>> +{ >>>> + if (!BITSET_TEST(g->nodes[n1].adjacency, n2)) { >>>> + ra_add_node_adjacency(g, n1, n2); >>>> + ra_add_node_adjacency(g, n2, n1); >>>> + } >>>> +} >>>> + >>>> +static bool >>>> +pq_test(struct ra_graph *g, unsigned int n) >>>> +{ >>>> + int n_class = g->nodes[n].class; >>>> + >>>> + return g->nodes[n].q_total < g->regs->classes[n_class]->p; >>>> +} >>>> + >>>> +static void >>>> +decrement_q(struct ra_graph *g, unsigned int n) >>>> +{ >>>> + unsigned int i; >>>> + int n_class = g->nodes[n].class; >>>> + >>>> + for (i = 0; i < g->nodes[n].adjacency_count; i++) { >>>> + unsigned int n2 = g->nodes[n].adjacency_list[i]; >>>> + unsigned int n2_class = g->nodes[n2].class; >>>> + >>>> + if (n != n2 && !g->nodes[n2].in_stack) { >>>> + assert(g->nodes[n2].q_total >= >>>> g->regs->classes[n2_class]->q[n_class]); >>>> + g->nodes[n2].q_total -= g->regs->classes[n2_class]->q[n_class]; >>>> + } >>>> + } >>>> +} >>>> + >>>> +/** >>>> + * Simplifies the interference graph by pushing all >>>> + * trivially-colorable nodes into a stack of nodes to be colored, >>>> + * removing them from the graph, and rinsing and repeating. >>>> + * >>>> + * If we encounter a case where we can't push any nodes on the >>>> stack, then >>>> + * we optimistically choose a node and push it on the stack. We >>>> heuristically >>>> + * push the node with the lowest total q value, since it has the fewest >>>> + * neighbors and therefore is most likely to be allocated. >>>> + */ >>>> +static void >>>> +ra_simplify(struct ra_graph *g) >>>> +{ >>>> + bool progress = true; >>>> + int i; >>>> + >>>> + while (progress) { >>>> + unsigned int best_optimistic_node = ~0; >>>> + unsigned int lowest_q_total = ~0; >>>> + >>>> + progress = false; >>>> + >>>> + for (i = g->count - 1; i >= 0; i--) { >>>> + if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG) >>>> + continue; >>>> + >>>> + if (pq_test(g, i)) { >>>> + decrement_q(g, i); >>>> + g->stack[g->stack_count] = i; >>>> + g->stack_count++; >>>> + g->nodes[i].in_stack = true; >>>> + progress = true; >>>> + } else { >>>> + unsigned int new_q_total = g->nodes[i].q_total; >>>> + if (new_q_total < lowest_q_total) { >>>> + best_optimistic_node = i; >>>> + lowest_q_total = new_q_total; >>>> + } >>>> + } >>>> + } >>>> + >>>> + if (!progress && best_optimistic_node != ~0) { >>>> + decrement_q(g, best_optimistic_node); >>>> + g->stack[g->stack_count] = best_optimistic_node; >>>> + g->stack_count++; >>>> + g->nodes[best_optimistic_node].in_stack = true; >>>> + progress = true; >>>> + } >>>> + } >>>> +} >>>> + >>>> +/** >>>> + * Pops nodes from the stack back into the graph, coloring them with >>>> + * registers as they go. >>>> + * >>>> + * If all nodes were trivially colorable, then this must succeed. If >>>> + * not (optimistic coloring), then it may return false; >>>> + */ >>>> +static bool >>>> +ra_select(struct ra_graph *g) >>>> +{ >>>> + int i; >>>> + int start_search_reg = 0; >>>> + >>>> + while (g->stack_count != 0) { >>>> + unsigned int ri; >>>> + unsigned int r = -1; >>>> + int n = g->stack[g->stack_count - 1]; >>>> + struct ra_class *c = g->regs->classes[g->nodes[n].class]; >>>> + >>>> + /* Find the lowest-numbered reg which is not used by a member >>>> + * of the graph adjacent to us. >>>> + */ >>>> + for (ri = 0; ri < g->regs->count; ri++) { >>>> + r = (start_search_reg + ri) % g->regs->count; >>>> + if (!reg_belongs_to_class(r, c)) >>>> + continue; >>>> + >>>> + /* Check if any of our neighbors conflict with this register >>>> choice. */ >>>> + for (i = 0; i < g->nodes[n].adjacency_count; i++) { >>>> + unsigned int n2 = g->nodes[n].adjacency_list[i]; >>>> + >>>> + if (!g->nodes[n2].in_stack && >>>> + BITSET_TEST(g->regs->regs[r].conflicts, g->nodes[n2].reg)) { >>>> + break; >>>> + } >>>> + } >>>> + if (i == g->nodes[n].adjacency_count) >>>> + break; >>>> + } >>>> + >>>> + /* set this to false even if we return here so that >>>> + * ra_get_best_spill_node() considers this node later. >>>> + */ >>>> + g->nodes[n].in_stack = false; >>>> + >>>> + if (ri == g->regs->count) >>>> + return false; >>>> + >>>> + g->nodes[n].reg = r; >>>> + g->stack_count--; >>>> + >>>> + if (g->regs->round_robin) >>>> + start_search_reg = r + 1; >>>> + } >>>> + >>>> + return true; >>>> +} >>>> + >>>> +bool >>>> +ra_allocate(struct ra_graph *g) >>>> +{ >>>> + ra_simplify(g); >>>> + return ra_select(g); >>>> +} >>>> + >>>> +unsigned int >>>> +ra_get_node_reg(struct ra_graph *g, unsigned int n) >>>> +{ >>>> + return g->nodes[n].reg; >>>> +} >>>> + >>>> +/** >>>> + * Forces a node to a specific register. This can be used to avoid >>>> + * creating a register class containing one node when handling data >>>> + * that must live in a fixed location and is known to not conflict >>>> + * with other forced register assignment (as is common with shader >>>> + * input data). These nodes do not end up in the stack during >>>> + * ra_simplify(), and thus at ra_select() time it is as if they were >>>> + * the first popped off the stack and assigned their fixed locations. >>>> + * Nodes that use this function do not need to be assigned a register >>>> + * class. >>>> + * >>>> + * Must be called before ra_simplify(). >>>> + */ >>>> +void >>>> +ra_set_node_reg(struct ra_graph *g, unsigned int n, unsigned int reg) >>>> +{ >>>> + g->nodes[n].reg = reg; >>>> + g->nodes[n].in_stack = false; >>>> +} >>>> + >>>> +static float >>>> +ra_get_spill_benefit(struct ra_graph *g, unsigned int n) >>>> +{ >>>> + int j; >>>> + float benefit = 0; >>>> + int n_class = g->nodes[n].class; >>>> + >>>> + /* Define the benefit of eliminating an interference between n, n2 >>>> + * through spilling as q(C, B) / p(C). This is similar to the >>>> + * "count number of edges" approach of traditional graph coloring, >>>> + * but takes classes into account. >>>> + */ >>>> + for (j = 0; j < g->nodes[n].adjacency_count; j++) { >>>> + unsigned int n2 = g->nodes[n].adjacency_list[j]; >>>> + if (n != n2) { >>>> + unsigned int n2_class = g->nodes[n2].class; >>>> + benefit += ((float)g->regs->classes[n_class]->q[n2_class] / >>>> + g->regs->classes[n_class]->p); >>>> + } >>>> + } >>>> + >>>> + return benefit; >>>> +} >>>> + >>>> +/** >>>> + * Returns a node number to be spilled according to the cost/benefit >>>> using >>>> + * the pq test, or -1 if there are no spillable nodes. >>>> + */ >>>> +int >>>> +ra_get_best_spill_node(struct ra_graph *g) >>>> +{ >>>> + unsigned int best_node = -1; >>>> + float best_benefit = 0.0; >>>> + unsigned int n; >>>> + >>>> + /* Consider any nodes that we colored successfully or the node we >>>> failed to >>>> + * color for spilling. When we failed to color a node in >>>> ra_select(), we >>>> + * only considered these nodes, so spilling any other ones would >>>> not result >>>> + * in us making progress. >>>> + */ >>>> + for (n = 0; n < g->count; n++) { >>>> + float cost = g->nodes[n].spill_cost; >>>> + float benefit; >>>> + >>>> + if (cost <= 0.0) >>>> + continue; >>>> + >>>> + if (g->nodes[n].in_stack) >>>> + continue; >>>> + >>>> + benefit = ra_get_spill_benefit(g, n); >>>> + >>>> + if (benefit / cost > best_benefit) { >>>> + best_benefit = benefit / cost; >>>> + best_node = n; >>>> + } >>>> + } >>>> + >>>> + return best_node; >>>> +} >>>> + >>>> +/** >>>> + * Only nodes with a spill cost set (cost != 0.0) will be considered >>>> + * for register spilling. >>>> + */ >>>> +void >>>> +ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost) >>>> +{ >>>> + g->nodes[n].spill_cost = cost; >>>> +} >>>> diff --git a/src/util/register_allocate.h b/src/util/register_allocate.h >>>> new file mode 100644 >>>> index 0000000..dc68744 >>>> --- /dev/null >>>> +++ b/src/util/register_allocate.h >>>> @@ -0,0 +1,79 @@ >>>> +/* >>>> + * Copyright © 2010 Intel Corporation >>>> + * >>>> + * Permission is hereby granted, free of charge, to any person >>>> obtaining a >>>> + * copy of this software and associated documentation files (the >>>> "Software"), >>>> + * to deal in the Software without restriction, including without >>>> limitation >>>> + * the rights to use, copy, modify, merge, publish, distribute, >>>> sublicense, >>>> + * and/or sell copies of the Software, and to permit persons to whom >>>> the >>>> + * Software is furnished to do so, subject to the following conditions: >>>> + * >>>> + * The above copyright notice and this permission notice (including >>>> the next >>>> + * paragraph) shall be included in all copies or substantial >>>> portions of the >>>> + * Software. >>>> + * >>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >>>> EXPRESS OR >>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >>>> MERCHANTABILITY, >>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO >>>> EVENT SHALL >>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES >>>> OR OTHER >>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, >>>> ARISING >>>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR >>>> OTHER DEALINGS >>>> + * IN THE SOFTWARE. >>>> + * >>>> + * Authors: >>>> + * Eric Anholt <e...@anholt.net> >>>> + * >>>> + */ >>>> + >>>> +#include <stdbool.h> >>>> + >>>> +struct ra_class; >>>> +struct ra_regs; >>>> + >>>> +/* @{ >>>> + * Register set setup. >>>> + * >>>> + * This should be done once at backend initializaion, as >>>> + * ra_set_finalize is O(r^2*c^2). The registers may be virtual >>>> + * registers, such as aligned register pairs that conflict with the >>>> + * two real registers from which they are composed. >>>> + */ >>>> +struct ra_regs *ra_alloc_reg_set(void *mem_ctx, unsigned int count); >>>> +void ra_set_allocate_round_robin(struct ra_regs *regs); >>>> +unsigned int ra_alloc_reg_class(struct ra_regs *regs); >>>> +void ra_add_reg_conflict(struct ra_regs *regs, >>>> + unsigned int r1, unsigned int r2); >>>> +void ra_add_transitive_reg_conflict(struct ra_regs *regs, >>>> + unsigned int base_reg, unsigned int reg); >>>> +void ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned >>>> int reg); >>>> +void ra_set_num_conflicts(struct ra_regs *regs, unsigned int class_a, >>>> + unsigned int class_b, unsigned int >>>> num_conflicts); >>>> +void ra_set_finalize(struct ra_regs *regs, unsigned int **conflicts); >>>> +/** @} */ >>>> + >>>> +/** @{ Interference graph setup. >>>> + * >>>> + * Each interference graph node is a virtual variable in the IL. It >>>> + * is up to the user to ra_set_node_class() for the virtual variable, >>>> + * and compute live ranges and ra_node_interfere() between conflicting >>>> + * live ranges. Note that an interference *must not* be added between >>>> + * two nodes if their classes haven't been assigned yet. The user >>>> + * should set the class of each node before building the interference >>>> + * graph. >>>> + */ >>>> +struct ra_graph *ra_alloc_interference_graph(struct ra_regs *regs, >>>> + unsigned int count); >>>> +void ra_set_node_class(struct ra_graph *g, unsigned int n, unsigned >>>> int c); >>>> +void ra_add_node_interference(struct ra_graph *g, >>>> + unsigned int n1, unsigned int n2); >>>> +/** @} */ >>>> + >>>> +/** @{ Graph-coloring register allocation */ >>>> +bool ra_allocate(struct ra_graph *g); >>>> + >>>> +unsigned int ra_get_node_reg(struct ra_graph *g, unsigned int n); >>>> +void ra_set_node_reg(struct ra_graph * g, unsigned int n, unsigned >>>> int reg); >>>> +void ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, >>>> float cost); >>>> +int ra_get_best_spill_node(struct ra_graph *g); >>>> +/** @} */ >>>> + >>>> >>> >>> _______________________________________________ >>> mesa-dev mailing list >>> mesa-dev@lists.freedesktop.org >>> https://urldefense.proofpoint.com/v1/url?u=http://lists.freedesktop.org/mailman/listinfo/mesa-dev&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=lGQMzzTgII0I7jefp2FHq7WtZ%2BTLs8wadB%2BiIj9xpBY%3D%0A&m=qAT%2B9yfjQX3a5v9SFwkJ8dYXl1%2Fd1P%2BDEtv3NijUfs4%3D%0A&s=ebd7a4e5c50b60a5f61299017fd0700dd2c9e55bbc8cdd325d30d89b9aaa6405 >>> >>> >> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev