Am 15.03.2011 23:42, schrieb Mikael Morin:
On second thought, maybe you're right; the speed-up / likeliness-to-break ratio doesn't look so interesting after all, and selecting only pure/implicitely pure functions for optimization would get rid of the weird cases without (hopefully) being too restrictive on the candidates for optimization.
Since this appears to be the consensus, here is an updated version of the patch which does indeed that.
Regression-tested. OK for trunk? 2010-03-14 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/22572 * frontend_passes (expr_array): New static variable. (expr_size): Likewise. (expr_count): Likewise. (current_code): Likewise. (current_ns): Likewise. (gfc_run_passes): Allocate and free space for expressions. (compare_functions): New function. (cfe_expr): New function. (create_var): New function. (cfc_expr_0): New function. (cfe_code): New function. (optimize_namespace): Invoke gfc_code_walker with cfe_code and cfe_expr_0. 2010-03-14 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/22572 * gfortran.dg/function_optimize_1.f90: New test.
Index: frontend-passes.c =================================================================== --- frontend-passes.c (Revision 170960) +++ frontend-passes.c (Arbeitskopie) @@ -40,6 +40,21 @@ static bool optimize_trim (gfc_expr *); static int count_arglist; +/* Pointer to an array of gfc_expr ** we operate on, plus its size + and counter. */ + +static gfc_expr ***expr_array; +static int expr_size, expr_count; + +/* Pointer to the gfc_code we currently work on - to be able to insert + a statement before. */ + +static gfc_code **current_code; + +/* The namespace we are currently dealing with. */ + +gfc_namespace *current_ns; + /* Entry point - run all passes for a namespace. So far, only an optimization pass is run. */ @@ -48,9 +63,16 @@ gfc_run_passes (gfc_namespace *ns) { if (optimize) { + expr_size = 20; + expr_array = XNEWVEC(gfc_expr **, expr_size); + optimize_namespace (ns); if (gfc_option.dump_fortran_optimized) gfc_dump_parse_tree (ns, stdout); + + /* FIXME: The following should be XDELETEVEC(expr_array); + but we cannot do that because it depends on free. */ + gfc_free (expr_array); } } @@ -106,11 +128,233 @@ optimize_expr (gfc_expr **e, int *walk_subtrees AT return 0; } +/* Compare two functions for equality. We could use gfc_dep_compare_expr + except that we also consider impure functions equal, because anybody + changing the return value of the function within an expression would + violate the Fortran standard. */ + +static bool +compare_functions (gfc_expr **ep1, gfc_expr **ep2) +{ + gfc_expr *e1, *e2; + + e1 = *ep1; + e2 = *ep2; + + if (e1->expr_type != EXPR_FUNCTION || e2->expr_type != EXPR_FUNCTION) + return false; + + if ((e1->value.function.esym && e2->value.function.esym + && e1->value.function.esym == e2->value.function.esym) + || (e1->value.function.isym && e2->value.function.isym + && e1->value.function.isym == e2->value.function.isym)) + { + gfc_actual_arglist *args1, *args2; + + args1 = e1->value.function.actual; + args2 = e2->value.function.actual; + + /* Compare the argument lists for equality. */ + while (args1 && args2) + { + /* Bitwise xor, since C has no non-bitwise xor operator. */ + if ((args1->expr == NULL) ^ (args2->expr == NULL)) + return false; + + if (args1->expr != NULL && args2->expr != NULL + && gfc_dep_compare_expr (args1->expr, args2->expr) != 0) + return false; + + args1 = args1->next; + args2 = args2->next; + } + return args1 == NULL && args2 == NULL; + } + else + return false; + +} + +/* Callback function for gfc_expr_walker, called from cfe_expr_0. Put all + eligible function expressions into expr_array. We can't do allocatable + functions. */ + +static int +cfe_expr (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) +{ + if ((*e)->expr_type != EXPR_FUNCTION) + return 0; + + /* We don't do character functions (yet). */ + if ((*e)->ts.type == BT_CHARACTER) + return 0; + + /* If we don't know the shape at compile time, we do not create a temporary + variable to hold the intermediate result. FIXME: Change this later when + allocation on assignment works for intrinsics. */ + + if ((*e)->rank > 0 && (*e)->shape == NULL) + return 0; + + if ((*e)->value.function.esym) + { + if ((*e)->value.function.esym->attr.allocatable) + return 0; + + if (!(*e)->value.function.esym->attr.pure + && !(*e)->value.function.esym->attr.implicit_pure + && !(*e)->value.function.esym->attr.elemental) + return 0; + } + + if ((*e)->value.function.isym) + { + if ((*e)->value.function.isym->id == GFC_ISYM_CONVERSION) + return 0; + + if (! (*e)->value.function.isym->pure + && !(*e)->value.function.isym->elemental) + return 0; + } + + if (expr_count >= expr_size) + { + expr_size += expr_size; + expr_array = XRESIZEVEC(gfc_expr **, expr_array, expr_size); + } + expr_array[expr_count] = e; + expr_count ++; + return 0; +} + +/* Returns a new expression (a variable) to be used in place of the old one, + with an an assignment statement before the current statement to set + the value of the variable. */ + +static gfc_expr* +create_var (gfc_expr * e) +{ + char name[GFC_MAX_SYMBOL_LEN +1]; + static int num = 1; + gfc_symtree *symtree; + gfc_symbol *symbol; + gfc_expr *result; + gfc_code *n; + int i; + + sprintf(name, "__var_%d",num++); + if (gfc_get_sym_tree (name, current_ns, &symtree, false) != 0) + gcc_unreachable (); + + symbol = symtree->n.sym; + symbol->ts = e->ts; + symbol->as = gfc_get_array_spec (); + symbol->as->rank = e->rank; + symbol->as->type = AS_EXPLICIT; + for (i=0; i<e->rank; i++) + { + gfc_expr *p, *q; + + p = gfc_get_constant_expr (BT_INTEGER, gfc_default_integer_kind, + &(e->where)); + mpz_set_si (p->value.integer, 1); + symbol->as->lower[i] = p; + + q = gfc_get_constant_expr (BT_INTEGER, gfc_index_integer_kind, + &(e->where)); + mpz_set (q->value.integer, e->shape[i]); + symbol->as->upper[i] = q; + } + + symbol->attr.flavor = FL_VARIABLE; + symbol->attr.referenced = 1; + symbol->attr.dimension = e->rank > 0; + gfc_commit_symbol (symbol); + + result = gfc_get_expr (); + result->expr_type = EXPR_VARIABLE; + result->ts = e->ts; + result->rank = e->rank; + result->shape = gfc_copy_shape (e->shape, e->rank); + result->symtree = symtree; + result->where = e->where; + if (e->rank > 0) + { + result->ref = gfc_get_ref (); + result->ref->type = REF_ARRAY; + result->ref->u.ar.type = AR_FULL; + result->ref->u.ar.where = e->where; + result->ref->u.ar.as = symbol->as; + } + + /* Generate the new assignment. */ + n = XCNEW (gfc_code); + n->op = EXEC_ASSIGN; + n->loc = (*current_code)->loc; + n->next = *current_code; + n->expr1 = gfc_copy_expr (result); + n->expr2 = e; + *current_code = n; + + return result; +} + +/* Callback function for the code walker for doing common function + elimination. This builds up the list of functions in the expression + and goes through them to detect duplicates, which it then replaces + by variables. */ + +static int +cfe_expr_0 (gfc_expr **e, int *walk_subtrees, + void *data ATTRIBUTE_UNUSED) +{ + int i,j; + gfc_expr *newvar; + + expr_count = 0; + gfc_expr_walker (e, cfe_expr, NULL); + /* Walk backwards through all the functions to make sure we + catch the leaf functions first. */ + for (i=expr_count-1; i>=1; i--) + { + newvar = NULL; + for (j=i-1; j>=0; j--) + { + if (compare_functions(expr_array[i], expr_array[j])) + { + if (newvar == NULL) + newvar = create_var (*(expr_array[i])); + gfc_free (*(expr_array[j])); + *(expr_array[j]) = gfc_copy_expr (newvar); + } + } + if (newvar) + *(expr_array[i]) = newvar; + } + + /* We did all the necessary walking in this function. */ + *walk_subtrees = 0; + return 0; +} + +static int +cfe_code (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, + void *data ATTRIBUTE_UNUSED) +{ + current_code = c; + return 0; +} + /* Optimize a namespace, including all contained namespaces. */ static void optimize_namespace (gfc_namespace *ns) { + + current_ns = ns; + + gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL); gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL); for (ns = ns->contained; ns; ns = ns->sibling)
! { dg-do compile } ! { dg-options "-O -fdump-tree-original" } program main implicit none real, dimension(2,2) :: a, b, c, d integer :: i character(60) :: line real, external :: ext_func interface elemental function elem(x) real, intent(in) :: x real :: elem end function elem pure function mypure(x) real, intent(in) :: x integer :: mypure end function mypure end interface real :: x data a /2., 3., 5., 7./ data b /11., 13., 17., 23./ write (unit=line, fmt='(4F7.2)') matmul(a,b) + matmul(a,b) d = sin(a) + cos(a) + sin(a) + cos(a) x = ext_func(a) + 23 + ext_func(a) print *,d,x d = elem(x) + elem(x) print *,d i = mypure(x) - mypure(x) print *,i end program main ! { dg-final { scan-tree-dump-times "matmul_r4" 1 "original" } } ! { dg-final { scan-tree-dump-times "__builtin_sinf" 1 "original" } } ! { dg-final { scan-tree-dump-times "__builtin_cosf" 1 "original" } } ! { dg-final { scan-tree-dump-times "ext_func" 2 "original" } } ! { dg-final { scan-tree-dump-times "elem" 1 "original" } } ! { dg-final { scan-tree-dump-times "mypure" 1 "original" } } ! { dg-final { cleanup-tree-dump "original" } }