--- src/lssi.c | 367 ++++++++++++++++++++++++++++++ src/lssi.h | 56 +++++ src/state-item.c | 563 +++++++++++++++++++++++++++++++++++++++++++++++ src/state-item.h | 100 +++++++++ 4 files changed, 1086 insertions(+) create mode 100644 src/lssi.c create mode 100644 src/lssi.h create mode 100644 src/state-item.c create mode 100644 src/state-item.h
diff --git a/src/lssi.c b/src/lssi.c new file mode 100644 index 00000000..c4c51b32 --- /dev/null +++ b/src/lssi.c @@ -0,0 +1,367 @@ +/* Lookahead sensative state item searches for counterexample generation + + Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> +#include <gl_linked_list.h> +#include <gl_xlist.h> +#include <stdlib.h> +#include "lssi.h" +#include "nullable.h" +#include "getargs.h" + +// lookahead sensative state item +struct lssi; +typedef struct lssi +{ + state_item_number si; + struct lssi *parent; + // this is the precise lookahead set (follow_L from the CupEx paper) + bitset lookahead; + bool free_lookahead; +} lssi; + +lssi * +new_lssi (state_item_number si, lssi *p, bitset l, bool free_l) +{ + lssi *ret = xmalloc (sizeof (lssi)); + ret->si = si; + ret->parent = p; + ret->lookahead = l; + ret->free_lookahead = free_l; + return ret; +} + +void +lssi_free (lssi *sn) +{ + if (sn == NULL) + return; + if (sn->free_lookahead) + bitset_free (sn->lookahead); + free (sn); +} + +static size_t +lssi_hasher (lssi *sn, size_t max) +{ + size_t hash = sn->si; + bitset_iterator biter; + symbol_number syn; + BITSET_FOR_EACH (biter, sn->lookahead, syn, 0) + hash += syn; + return hash % max; +} + +static bool +lssi_comparator (lssi *s1, lssi *s2) +{ + if (s1->si == s2->si) + { + if (s1->lookahead == s2->lookahead) + return true; + return bitset_equal_p (s1->lookahead, s2->lookahead); + } + return false; +} + +static inline bool +append_lssi (lssi *sn, Hash_table *visited, gl_list_t queue) +{ + if (hash_lookup (visited, sn)) + { + sn->free_lookahead = false; + lssi_free (sn); + return false; + } + if (!hash_insert (visited, sn)) + xalloc_die (); + gl_list_add_last (queue, sn); + return true; +} + +static void +lssi_print (lssi *l) +{ + print_state_item (state_items + l->si, stdout); + if (l->lookahead) + { + printf ("FOLLOWL = { "); + bitset_iterator biter; + symbol_number sin; + BITSET_FOR_EACH (biter, l->lookahead, sin, 0) + printf ("%s, \n", symbols[sin]->tag); + puts ("}"); + } +} + +/** + * Compute the set of state-items that can reach the given conflict item via + * a combination of transitions or production steps. + */ +bitset +eligible_state_items (state_item *target) +{ + bitset result = bitset_create (nstate_items, BITSET_FIXED); + gl_list_t queue = + gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1, + (const void **) &target); + while (gl_list_size (queue) > 0) + { + state_item *si = (state_item *) gl_list_get_at (queue, 0); + gl_list_remove_at (queue, 0); + if (bitset_test (result, si - state_items)) + continue; + bitset_set (result, si - state_items); + // search all reverse edges. + bitset rsi = si_revs[si - state_items]; + bitset_iterator biter; + state_item_number sin; + BITSET_FOR_EACH (biter, rsi, sin, 0) + gl_list_add_last (queue, &state_items[sin]); + } + gl_list_free (queue); + return result; +} + +/** + * Compute the shortest lookahead-sensitive path from the start state to + * this conflict. If optimized is true, only consider parser states + * that can reach the conflict state. + */ +gl_list_t +shortest_path_from_start (state_item_number target, symbol_number next_sym) +{ + bitset eligible = eligible_state_items (&state_items[target]); + Hash_table *visited = hash_initialize (32, + NULL, + (Hash_hasher) lssi_hasher, + (Hash_comparator) lssi_comparator, + (Hash_data_freer) lssi_free); + bitset il = bitset_create (nsyms, BITSET_FIXED); + bitset_set (il, 0); + lssi *init = new_lssi (0, NULL, il, true); + gl_list_t queue = gl_list_create (GL_LINKED_LIST, NULL, NULL, + NULL, + true, 1, (const void **) &init); + // breadth-first search + bool finished = false; + lssi *n; + while (gl_list_size (queue) > 0) + { + n = (lssi *) gl_list_get_at (queue, 0); + gl_list_remove_at (queue, 0); + state_item_number last = n->si; + if (target == last && bitset_test (n->lookahead, next_sym)) + { + finished = true; + break; + } + // Transitions don't change follow_L + if (si_trans[last] >= 0) + { + state_item_number nextSI = si_trans[last]; + if (bitset_test (eligible, nextSI)) + { + lssi *next = new_lssi (nextSI, n, n->lookahead, false); + append_lssi (next, visited, queue); + } + } + // For production steps, follow_L is based on the symbol after the + // non-terminal being produced. + // if no such symbol exists, follow_L is unchanged + // if the symbol is a terminal, follow_L only contains that terminal + // if the symbol is not nullable, follow_L is its FIRSTS set + // if the symbol is nullable, follow_L is its FIRSTS set unioned with + // this logic applied to the next symbol in the rule + bitset p = si_prods_lookup (last); + if (p) + { + state_item si = state_items[last]; + // Compute follow_L as above + bitset lookahead = bitset_create (nsyms, BITSET_FIXED); + item_number *pos = si.item + 1; + for (; !item_number_is_rule_number (*pos); ++pos) + { + item_number it = *pos; + if (ISTOKEN (it)) + { + bitset_set (lookahead, it); + break; + } + else + { + bitset_union (lookahead, lookahead, FIRSTS (it)); + if (!nullable[it - ntokens]) + break; + } + } + if (item_number_is_rule_number (*pos)) + bitset_union (lookahead, n->lookahead, lookahead); + + bool lookahead_used = false; + // Try all possible production steps within this parser state. + bitset_iterator biter; + state_item_number nextSI; + BITSET_FOR_EACH (biter, p, nextSI, 0) + { + if (!bitset_test (eligible, nextSI)) + continue; + lssi *next = new_lssi (nextSI, n, lookahead, + !lookahead_used); + lookahead_used = append_lssi (next, visited, queue) + || lookahead_used; + } + if (!lookahead_used) + bitset_free (lookahead); + } + } + if (!finished) + { + gl_list_free (queue); + fputs ("Cannot find shortest path to conflict state.", stderr); + exit (1); + } + gl_list_t ret = + gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true); + for (lssi *sn = n; sn != NULL; sn = sn->parent) + gl_list_add_first (ret, state_items + sn->si); + + hash_free (visited); + gl_list_free (queue); + + if (trace_flag & trace_cex) + { + puts ("REDUCE ITEM PATH:"); + gl_list_iterator_t it = gl_list_iterator (ret); + const void *sip; + while (gl_list_iterator_next (&it, &sip, NULL)) + print_state_item ((state_item *) sip, stdout); + } + return ret; +} + +/** + * Determine if the given terminal is in the given symbol set or can begin + * a nonterminal in the given symbol set. + */ +bool +intersect_symbol (symbol_number sym, bitset syms) +{ + if (!syms) + return true; + bitset_iterator biter; + symbol_number sn; + BITSET_FOR_EACH (biter, syms, sn, 0) + { + if (sym == sn) + return true; + if (ISVAR (sn) && bitset_test (FIRSTS (sn), sym)) + return true; + } + return false; +} + +/** + * Determine if any symbol in ts is in syms + * or can begin a nonterminal syms. + */ +bool +intersect (bitset ts, bitset syms) +{ + if (!syms || !ts) + return true; + bitset_iterator biter; + symbol_number sn; + BITSET_FOR_EACH (biter, syms, sn, 0) + { + if (bitset_test (ts, sn)) + return true; + if (ISVAR (sn) && !bitset_disjoint_p (ts, FIRSTS (sn))) + return true; + } + return false; +} + + +/** + * Compute a list of state_items that have a production to n with respect + * to its lookahead + */ +gl_list_t +lssi_reverse_production (const state_item *si, bitset lookahead) +{ + gl_list_t result = + gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true); + if (SI_TRANSITION (si)) + return result; + // A production step was made to the current lalr_item. + // Check that the next symbol in the parent lalr_item is + // compatible with the lookahead. + bitset_iterator biter; + state_item_number sin; + BITSET_FOR_EACH (biter, si_revs[si - state_items], sin, 0) + { + state_item *prevsi = state_items + sin; + if (!production_allowed (prevsi, si)) + continue; + bitset prev_lookahead = prevsi->lookahead; + if (item_number_is_rule_number (*(prevsi->item))) + { + // reduce item + // Check that some lookaheads can be preserved. + if (!intersect (prev_lookahead, lookahead)) + continue; + } + else + { + // shift item + if (lookahead) + { + // Check that lookahead is compatible with the first + // possible symbols in the rest of the production. + // Alternatively, if the rest of the production is + // nullable, the lookahead must be compatible with + // the lookahead of the corresponding item. + bool applicable = false; + bool nlable = true; + for (item_number *pos = prevsi->item + 1; + !applicable && nlable && item_number_is_symbol_number (*pos); + ++pos) + { + symbol_number next_sym = item_number_as_symbol_number (*pos); + if (ISTOKEN (next_sym)) + { + applicable = intersect_symbol (next_sym, lookahead); + nlable = false; + } + else + { + applicable = intersect (FIRSTS (next_sym), lookahead); + if (!applicable) + nlable = nullable[next_sym - ntokens]; + } + } + if (!applicable && !nlable) + continue; + } + } + gl_list_add_last (result, prevsi); + } + return result; +} diff --git a/src/lssi.h b/src/lssi.h new file mode 100644 index 00000000..c3a205ca --- /dev/null +++ b/src/lssi.h @@ -0,0 +1,56 @@ +/* Lookahead sensative state item searches for counterexample generation + + Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef LSSI_H +#define LSSI_H + +#include "state-item.h" +/* + All state-item graph nodes should also include a precise follow set (follow_L). + However, ignoring follow_L saves a lot of memory and is a pretty good approximation. + These functions exist to enforce restrictions caused by follow_L sets. + */ + +/* + * find shortest lookahead-sensitive path of state-items to target such that + * next_sym is in the follow_L set of target in that position. +*/ +gl_list_t shortest_path_from_start (state_item_number target, + symbol_number next_sym); + +/** + * Determine if the given terminal is in the given symbol set or can begin + * a nonterminal in the given symbol set. + */ +bool intersect_symbol (symbol_number sym, bitset syms); + +/** + * Determine if any symbol in ts is in syms + * or can begin with a nonterminal in syms. + */ +bool intersect (bitset ts, bitset syms); + +/** + * Compute a set of sequences of state-items that can make production steps + * to this state-item such that the resulting possible lookahead symbols are + * as given. + */ +gl_list_t lssi_reverse_production (const state_item *si, bitset lookahead); + +#endif /* LSSI_H */ diff --git a/src/state-item.c b/src/state-item.c new file mode 100644 index 00000000..69cb923d --- /dev/null +++ b/src/state-item.c @@ -0,0 +1,563 @@ +/* Counterexample Generation Search Nodes + + Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> +#include <stdlib.h> +#include <time.h> +#include <assert.h> +#include <gl_linked_list.h> +#include <gl_xlist.h> +#include "state-item.h" +#include "closure.h" +#include "nullable.h" +#include "getargs.h" + +size_t nstate_items; +state_item_number *state_item_map; +state_item *state_items; + +state_item_number *si_trans; +bitsetv si_revs; +Hash_table *si_prods; + +// hash functions for index -> bitset hash maps +typedef struct +{ + int key; + bitset l; +} hash_pair; + +static size_t +hash_pair_hasher (const hash_pair *sl, size_t max) +{ + return sl->key % max; +} + +static bool +hash_pair_comparator (const hash_pair *l, const hash_pair *r) +{ + return l->key == r->key; +} + +static void +hash_pair_free (hash_pair *hp) +{ + bitset_free (hp->l); + free (hp); +} + +static bitset +hash_pair_lookup (Hash_table *tab, int key) +{ + hash_pair *l = xmalloc (sizeof (hash_pair)); + l->key = key; + hash_pair *hp = (hash_pair *) hash_lookup (tab, l); + if (!hp) + return NULL; + return hp->l; +} + +static void +hash_pair_insert (Hash_table *tab, int key, bitset val) +{ + hash_pair *hp = xmalloc (sizeof (hash_pair)); + hp->key = key; + hp->l = val; + if (!hash_insert (tab, hp)) + xalloc_die (); +} + +static void +hash_pair_remove (Hash_table *tab, int key) +{ + hash_pair *hp = xmalloc (sizeof (hash_pair)); + hp->key = key; + hash_delete (tab, hp); +} + +/* return a state_item from a state's id and the offset of the item + within the state. + */ +state_item * +state_item_lookup (state_number s, state_item_number off) +{ + return &state_items[state_item_index_lookup (s, off)]; +} + +static inline void +state_item_set (state_item_number sidx, state *s, item_number off) +{ + state_item *si = state_items + sidx; + si->state = s; + si->item = &ritem[off]; + si->lookahead = NULL; + si_trans[sidx] = -1; +} + +/** + * Initialize state_items set + */ +static void +init_state_items () +{ + nstate_items = 0; + bitsetv production_items = bitsetv_create (nstates, nritems, BITSET_SPARSE); + for (int i = 0; i < nstates; ++i) + { + state *s = states[i]; + nstate_items += s->nitems; + closure (s->items, s->nitems); + for (size_t j = 0; j < nitemset; ++j) + if (itemset[j] > 0 + && item_number_is_rule_number (ritem[itemset[j] - 1])) + { + bitset_set (production_items[i], itemset[j]); + ++nstate_items; + } + } + state_item_map = xnmalloc (nstates + 1, sizeof (state_item_number)); + state_items = xnmalloc (nstate_items, sizeof (state_item)); + si_trans = xnmalloc (nstate_items, sizeof (state_item_number)); + si_revs = bitsetv_create (nstate_items, nstate_items, BITSET_SPARSE); + state_item_number sidx = 0; + for (int i = 0; i < nstates; ++i) + { + state_item_map[i] = sidx; + int rule_search_idx = 0; + state *s = states[i]; + reductions *red = s->reductions; + for (int j = 0; j < s->nitems; ++j) + { + state_item_set (sidx, s, s->items[j]); + state_item *si = state_items + sidx; + const rule *r = item_rule (si->item); + if (red->rules[rule_search_idx] < r) + ++rule_search_idx; + if (rule_search_idx < red->num && r == red->rules[rule_search_idx]) + { + bitsetv lookahead = red->lookahead_tokens; + if (lookahead) + si->lookahead = lookahead[rule_search_idx]; + } + ++sidx; + } + bitset_iterator biter; + item_number off; + BITSET_FOR_EACH (biter, production_items[i], off, 0) + { + state_item_set (sidx, s, off); + if (item_number_is_rule_number (ritem[off])) + { + bitsetv lookahead = red->lookahead_tokens; + if (lookahead) + state_items[sidx].lookahead = lookahead[rule_search_idx]; + ++rule_search_idx; + } + ++sidx; + } + + } + state_item_map[nstates] = nstate_items; +} + +static size_t +state_sym_hasher (const void *st, size_t max) +{ + return ((state *) st)->accessing_symbol % max; +} + +static bool +state_sym_comparator (const void *s1, const void *s2) +{ + return ((state *) s1)->accessing_symbol == ((state *) s2)->accessing_symbol; +} + +static state * +state_sym_lookup (symbol_number sym, Hash_table *h) +{ + state *s = xmalloc (sizeof (state)); + s->accessing_symbol = sym; + return hash_lookup (h, s); +} + +static void +init_trans () +{ + for (state_number i = 0; i < nstates; ++i) + { + // generate a hash set that maps from accepting symbols to the states + // this state transitions to + state *s = states[i]; + transitions *t = s->transitions; + Hash_table *transitions + = hash_initialize (t->num, NULL, (Hash_hasher) state_sym_hasher, + (Hash_comparator) state_sym_comparator, NULL); + for (int j = 0; j < t->num; ++j) + if (!TRANSITION_IS_DISABLED (t, j)) + if (!hash_insert (transitions, t->states[j])) + xalloc_die (); + for (int j = state_item_map[i]; j < state_item_map[i + 1]; ++j) + { + item_number *item = state_items[j].item; + if (item_number_is_rule_number (*item)) + continue; + state *dst = state_sym_lookup (*item, transitions); + if (!dst) + continue; + // find the item in the destination state that corresponds + // to the transition of item + for (int k = 0; k < dst->nitems; ++k) + { + if (item + 1 == ritem + dst->items[k]) + { + state_item_number dstSI = + state_item_index_lookup (dst->number, k); + + si_trans[j] = dstSI; + bitset_set (si_revs[dstSI], j); + break; + } + } + } + } +} + +bitset +si_prods_lookup (state_item_number si) +{ + return hash_pair_lookup (si_prods, si); +} + +static void +init_prods () +{ + si_prods = hash_initialize (nstate_items, + NULL, + (Hash_hasher) hash_pair_hasher, + (Hash_comparator) hash_pair_comparator, + (Hash_data_freer) hash_pair_free); + for (int i = 0; i < nstates; ++i) + { + state *state = states[i]; + // closure_map is a hash map from nonterminals to a set + // of the items that produce those nonterminals + Hash_table *closure_map + = hash_initialize (nsyms - ntokens, NULL, + (Hash_hasher) hash_pair_hasher, + (Hash_comparator) hash_pair_comparator, + NULL); + + // Add the nitems of state to skip to the production portion + // of that state's state_items + for (int j = state_item_map[i] + state->nitems; + j < state_item_map[i + 1]; ++j) + { + state_item *src = state_items + j; + item_number *item = src->item; + symbol_number lhs = item_rule (item)->lhs->number; + bitset itms = hash_pair_lookup (closure_map, lhs); + if (!itms) + { + itms = bitset_create (nstate_items, BITSET_SPARSE); + hash_pair_insert (closure_map, lhs, itms); + } + bitset_set (itms, j); + } + // For each item with a dot followed by a nonterminal, + // try to create a production edge. + for (int j = state_item_map[i]; j < state_item_map[i + 1]; ++j) + { + state_item *src = state_items + j; + item_number item = *(src->item); + // Skip reduce items and items with terminals after the dot + if (item_number_is_rule_number (item) || ISTOKEN (item)) + continue; + symbol_number sym = item_number_as_symbol_number (item); + bitset lb = hash_pair_lookup (closure_map, sym); + if (lb) + { + bitset copy = bitset_create (nstate_items, BITSET_SPARSE); + bitset_copy (copy, lb); + hash_pair *prod_hp = xmalloc (sizeof (hash_pair)); + prod_hp->key = j; + prod_hp->l = copy; + //update prods + if (!hash_insert (si_prods, prod_hp)) + xalloc_die (); + + //update revs + bitset_iterator biter; + state_item_number prod; + BITSET_FOR_EACH (biter, copy, prod, 0) + bitset_set (si_revs[prod], j); + } + } + + } +} + +/* Since lookaheads are only generated for reductions, + we need to propogate lookahead sets backwards as + the searches require each state_item to have a lookahead. + */ +static inline void +gen_lookaheads () +{ + for (state_item_number i = 0; i < nstate_items; ++i) + { + state_item *si = state_items + i; + if (item_number_is_symbol_number (*(si->item)) || !si->lookahead) + continue; + + bitset lookahead = si->lookahead; + gl_list_t queue = + gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1, + (const void **) &si); + + // For each reduction item, traverse through all state_items + // accessible through reverse transition steps, and set their + // lookaheads to the reduction items lookahead + while (gl_list_size (queue) > 0) + { + state_item *prev = (state_item *) gl_list_get_at (queue, 0); + gl_list_remove_at (queue, 0); + prev->lookahead = lookahead; + if (SI_TRANSITION (prev)) + { + bitset rsi = si_revs[prev - state_items]; + bitset_iterator biter; + state_item_number sin; + BITSET_FOR_EACH (biter, rsi, sin, 0) + gl_list_add_first (queue, &state_items[sin]); + } + } + } +} + +bitsetv firsts = NULL; + +void +init_firsts (void) +{ + firsts = bitsetv_create (nvars, nsyms, BITSET_FIXED); + for (rule_number i = 0; i < nrules; ++i) + { + rule *r = rules + i; + item_number *n = r->rhs; + // Iterate through nullable nonterminals to try to find a terminal. + while (item_number_is_symbol_number (*n) && ISVAR (*n) + && nullable[*n - ntokens]) + ++n; + if (item_number_is_rule_number (*n) || ISVAR (*n)) + continue; + + symbol_number lhs = r->lhs->number; + bitset_set (FIRSTS (lhs), *n); + } + bool change = true; + while (change) + { + change = false; + for (rule_number i = 0; i < nrules; ++i) + { + rule *r = rules + i; + symbol_number lhs = r->lhs->number; + bitset f_lhs = FIRSTS (lhs); + for (item_number *n = r->rhs; + item_number_is_symbol_number (*n) && + ISVAR (*n); + ++n) + { + bitset f = FIRSTS (*n); + if (!bitset_subset_p (f_lhs, f)) + { + change = true; + bitset_union (f_lhs, f_lhs, f); + } + if (!nullable[*n - ntokens]) + break; + } + } + } +} + +static inline void +disable_state_item (state_item_number sin) +{ + si_trans[sin] = -2; + hash_pair_remove (si_prods, sin); +} + +/* + To make searches more efficient, we can prune away paths that are + caused by disabled transitions. + */ +static void +prune_disabled_paths (void) +{ + for (int i = nstate_items - 1; i >= 0; --i) + { + state_item *si = state_items + i; + if (si_trans[i] == -1 && item_number_is_symbol_number (*si->item)) + { + // disable the transitions out of i + for (state_item_number j = si_trans[i]; j != -1; j = si_trans[j]) + disable_state_item (j); + + gl_list_t queue = + gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1, + (const void **) &si); + + // For each disabled transition, traverse through all state_items + // accessible through reverse transition steps, and set their + // lookaheads to the reduction items lookahead + while (gl_list_size (queue) > 0) + { + const state_item *prev = gl_list_get_at (queue, 0); + gl_list_remove_at (queue, 0); + state_item_number prev_num = prev - state_items; + disable_state_item (prev_num); + + bitset rsi = si_revs[prev_num]; + bitset_iterator biter; + state_item_number sin; + BITSET_FOR_EACH (biter, rsi, sin, 0) + { + if (SI_TRANSITION (prev)) + gl_list_add_first (queue, &state_items[sin]); + else + { + bitset p = si_prods_lookup (sin); + if (p) + bitset_reset (p, prev_num); + } + } + } + } + } +} + +void +print_state_item (const state_item *si, FILE *out) +{ + fprintf (out, "%d:", si->state->number); + item_print (si->item, NULL, out); + putc ('\n', out); +} + +/** + * Report set counts and the state_item graph if trace is enabled + */ +static void +state_items_report (void) +{ + printf ("# state items: %zu\n", nstate_items); + for (state_number i = 0; i < nstates; ++i) + { + printf ("State %d:\n", i); + for (int j = state_item_map[i]; j < state_item_map[i + 1]; ++j) + { + item_print (state_items[j].item, NULL, stdout); + puts (""); + if (si_trans[j] >= 0) + { + fputs (" -> ", stdout); + print_state_item (state_items + si_trans[j], stdout); + } + + bitset sets[2] = { si_prods_lookup (j), si_revs[j] }; + const char *txt[2] = { " => ", " <- " }; + for (int seti = 0; seti < 2; ++seti) + { + bitset b = sets[seti]; + if (b) + { + bitset_iterator biter; + state_item_number sin; + BITSET_FOR_EACH (biter, b, sin, 0) + { + fputs (txt[seti], stdout); + print_state_item (state_items + sin, stdout); + } + } + } + puts (""); + } + } + printf ("FIRSTS\n"); + for (symbol_number i = ntokens; i < nsyms; ++i) + { + printf (" %s firsts\n", symbols[i]->tag); + bitset_iterator iter; + symbol_number j; + BITSET_FOR_EACH (iter, FIRSTS (i), j, 0) + printf (" %s\n", symbols[j]->tag); + } + puts ("\n"); +} + +void +state_items_init (void) +{ + time_t start = time (NULL); + init_state_items (); + init_trans (); + init_prods (); + gen_lookaheads (); + init_firsts (); + prune_disabled_paths (); + if (trace_flag & trace_cex) + { + printf ("init: %f\n", difftime (time (NULL), start)); + state_items_report (); + } +} + +void +state_items_free (void) +{ + hash_free (si_prods); + bitsetv_free (si_revs); + free (si_trans); + free (state_items); + bitsetv_free (firsts); +} + +/** + * Determine, using precedence and associativity, whether the next + * production is allowed from the current production. + */ +bool +production_allowed (const state_item *si, const state_item *next) +{ + sym_content *s1 = item_rule (si->item)->lhs; + sym_content *s2 = item_rule (next->item)->lhs; + int prec1 = s1->prec; + int prec2 = s2->prec; + if (prec1 >= 0 && prec2 >= 0) + { + // Do not expand if lower precedence. + if (prec1 > prec2) + return false; + // Do not expand if same precedence, but left-associative. + if (prec1 == prec2 && s1->assoc == left_assoc) + return false; + } + return true; +} diff --git a/src/state-item.h b/src/state-item.h new file mode 100644 index 00000000..50996cd2 --- /dev/null +++ b/src/state-item.h @@ -0,0 +1,100 @@ +/* Counterexample Generation Search Nodes + + Copyright (C) 2019-2020 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef STATE_ITEM_H +#define STATE_ITEM_H + +#include <hash.h> +#include <gl_list.h> +#include <bitsetv.h> +#include "gram.h" +#include "state.h" + +/* + Initializes a graph connecting (state, production item) pairs to + pairs they can make a transition or production step to. This graph + is used to search for paths that represent counterexamples of some + conflict. + + state_items is an array of state state-item pairs ordered by state. + state_item_map maps state numbers to the first item which corresponds + to it in the array. A state's portion in state_items begins with its + items in the same order as it was in the state. This is then followed by + productions from the closure of the state in order by rule. + + There are two type of edges in this graph transitions and productions. + Transitions are the same as transitions from the parser except edges + are only between items from the same rule. These are stored as an + array "si_trans" (as most items will have transitions) which are indexed the + same way as state_items. + + Productions are edges from items with a nonterminal after the dot to + the production of that nonterminal in the same state. These edges are + stored as a hash map "si_prods" from a state_item to a set of what productions + it goes from/to + + The inverses of these edges are stored in an array of bitsets, "si_revs." + A state-item that begins with a dot will have reverse production edges, + and all others will have reverse transition edges. + + */ + +#define SI_DISABLED(sin) (si_trans[sin] == -2) +#define SI_PRODUCTION(si) ((si) == state_items || *((si)->item - 1) < 0) +#define SI_TRANSITION(si) ((si) != state_items && *((si)->item - 1) >= 0) + +typedef int state_item_number; + +typedef struct +{ + state *state; + item_number *item; + bitset lookahead; +} state_item; + +extern bitsetv firsts; +#define FIRSTS(sym) firsts[(sym) - ntokens] + +extern size_t nstate_items; +extern state_item_number *state_item_map; + +/** Array mapping state_item_numbers to state_items */ +extern state_item *state_items; + +/** state-item graph edges */ +extern state_item_number *si_trans; +extern bitsetv si_revs; + +state_item *state_item_lookup (state_number s, state_item_number off); + +static inline state_item_number +state_item_index_lookup (state_number s, state_item_number off) +{ + return state_item_map[s] + off; +} + +bitset si_prods_lookup (state_item_number si); + +void state_items_init (void); +void print_state_item (const state_item *si, FILE *out); +void state_items_free (void); + +bool production_allowed (const state_item *si, const state_item *next); + +#endif /* STATE_ITEM_H */ -- 2.20.1 (Apple Git-117)
