Author: cem Date: Tue Nov 19 04:30:23 2019 New Revision: 354847 URL: https://svnweb.freebsd.org/changeset/base/354847
Log: unifdef(1): Improve worst-case bound on symbol resolution Use RB_TREE to make some algorithms O(lg N) and O(N lg N) instead of O(N) and O(N^2). Because N is typically small and the former linear array also has great constant factors (as a property of CPU caching), this doesn't provide material benefit most or all of the time. While here, remove arbitrarily limit on number of macros understood. Modified: head/usr.bin/unifdef/unifdef.c Modified: head/usr.bin/unifdef/unifdef.c ============================================================================== --- head/usr.bin/unifdef/unifdef.c Tue Nov 19 04:23:57 2019 (r354846) +++ head/usr.bin/unifdef/unifdef.c Tue Nov 19 04:30:23 2019 (r354847) @@ -45,8 +45,11 @@ * it possible to handle all "dodgy" directives correctly. */ +#include <sys/param.h> #include <sys/stat.h> +#include <sys/tree.h> +#include <assert.h> #include <ctype.h> #include <err.h> #include <stdarg.h> @@ -149,7 +152,6 @@ static char const * const linestate_name[] = { */ #define MAXDEPTH 64 /* maximum #if nesting */ #define MAXLINE 4096 /* maximum length of line */ -#define MAXSYMS 16384 /* maximum number of symbols */ /* * Sometimes when editing a keyword the replacement text is longer, so @@ -158,6 +160,26 @@ static char const * const linestate_name[] = { #define EDITSLOP 10 /* + * C17/18 allow 63 characters per macro name, but up to 127 arbitrarily large + * parameters. + */ +struct macro { + RB_ENTRY(macro) entry; + const char *name; + const char *value; + bool ignore; /* -iDsym or -iUsym */ +}; + +static int +macro_cmp(struct macro *a, struct macro *b) +{ + return (strcmp(a->name, b->name)); +} + +static RB_HEAD(macrohd, macro) macro_tree = RB_INITIALIZER(¯o_tree); +RB_GENERATE_STATIC(macrohd, macro, entry, macro_cmp); + +/* * Globals. */ @@ -174,11 +196,6 @@ static bool symlist; /* -s: output symbol static bool symdepth; /* -S: output symbol depth */ static bool text; /* -t: this is a text file */ -static const char *symname[MAXSYMS]; /* symbol name */ -static const char *value[MAXSYMS]; /* -Dsym=value */ -static bool ignore[MAXSYMS]; /* -iDsym or -iUsym */ -static int nsyms; /* number of symbols */ - static FILE *input; /* input file pointer */ static const char *filename; /* input file name */ static int linenum; /* current line number */ @@ -227,12 +244,12 @@ static char *astrcat(const char *, const ch static void cleantemp(void); static void closeio(void); static void debug(const char *, ...); -static void debugsym(const char *, int); +static void debugsym(const char *, const struct macro *); static bool defundef(void); static void defundefile(const char *); static void done(void); static void error(const char *); -static int findsym(const char **); +static struct macro *findsym(const char **); static void flushline(bool); static void hashline(void); static void help(void); @@ -807,7 +824,7 @@ static Linetype parseline(void) { const char *cp; - int cursym; + struct macro *cursym; Linetype retval; Comment_state wascomment; @@ -829,15 +846,15 @@ parseline(void) if ((cp = matchsym("ifdef", keyword)) != NULL || (cp = matchsym("ifndef", keyword)) != NULL) { cp = skipcomment(cp); - if ((cursym = findsym(&cp)) < 0) + if ((cursym = findsym(&cp)) == NULL) retval = LT_IF; else { retval = (keyword[2] == 'n') ? LT_FALSE : LT_TRUE; - if (value[cursym] == NULL) + if (cursym->value == NULL) retval = (retval == LT_TRUE) ? LT_FALSE : LT_TRUE; - if (ignore[cursym]) + if (cursym->ignore) retval = (retval == LT_TRUE) ? LT_TRUEI : LT_FALSEI; } @@ -1037,7 +1054,7 @@ eval_unary(const struct ops *ops, long *valp, const ch { const char *cp; char *ep; - int sym; + struct macro *sym; bool defparen; Linetype lt; @@ -1102,27 +1119,27 @@ eval_unary(const struct ops *ops, long *valp, const ch debug("eval%d defined missing ')'", prec(ops)); return (LT_ERROR); } - if (sym < 0) { + if (sym == NULL) { debug("eval%d defined unknown", prec(ops)); lt = LT_IF; } else { - debug("eval%d defined %s", prec(ops), symname[sym]); - *valp = (value[sym] != NULL); + debug("eval%d defined %s", prec(ops), sym->name); + *valp = (sym->value != NULL); lt = *valp ? LT_TRUE : LT_FALSE; } constexpr = false; } else if (!endsym(*cp)) { debug("eval%d symbol", prec(ops)); sym = findsym(&cp); - if (sym < 0) { + if (sym == NULL) { lt = LT_IF; cp = skipargs(cp); - } else if (value[sym] == NULL) { + } else if (sym->value == NULL) { *valp = 0; lt = LT_FALSE; } else { - *valp = strtol(value[sym], &ep, 0); - if (*ep != '\0' || ep == value[sym]) + *valp = strtol(sym->value, &ep, 0); + if (*ep != '\0' || ep == sym->value) return (LT_ERROR); lt = *valp ? LT_TRUE : LT_FALSE; cp = skipargs(cp); @@ -1439,17 +1456,17 @@ matchsym(const char *s, const char *t) * Look for the symbol in the symbol table. If it is found, we return * the symbol table index, else we return -1. */ -static int +static struct macro * findsym(const char **strp) { const char *str; - int symind; + struct macro key, *res; str = *strp; *strp = skipsym(str); if (symlist) { if (*strp == str) - return (-1); + return (NULL); if (symdepth && firstsym) printf("%s%3d", zerosyms ? "" : "\n", depth); firstsym = zerosyms = false; @@ -1458,15 +1475,14 @@ findsym(const char **strp) (int)(*strp-str), str, symdepth ? "" : "\n"); /* we don't care about the value of the symbol */ - return (0); + return (NULL); } - for (symind = 0; symind < nsyms; ++symind) { - if (matchsym(symname[symind], str) != NULL) { - debugsym("findsym", symind); - return (symind); - } - } - return (-1); + + key.name = str; + res = RB_FIND(macrohd, ¯o_tree, &key); + if (res != NULL) + debugsym("findsym", res); + return (res); } /* @@ -1476,22 +1492,23 @@ static void indirectsym(void) { const char *cp; - int changed, sym, ind; + int changed; + struct macro *sym, *ind; do { changed = 0; - for (sym = 0; sym < nsyms; ++sym) { - if (value[sym] == NULL) + RB_FOREACH(sym, macrohd, ¯o_tree) { + if (sym->value == NULL) continue; - cp = value[sym]; + cp = sym->value; ind = findsym(&cp); - if (ind == -1 || ind == sym || + if (ind == NULL || ind == sym || *cp != '\0' || - value[ind] == NULL || - value[ind] == value[sym]) + ind->value == NULL || + ind->value == sym->value) continue; debugsym("indir...", sym); - value[sym] = value[ind]; + sym->value = ind->value; debugsym("...ectsym", sym); changed++; } @@ -1523,29 +1540,29 @@ addsym1(bool ignorethis, bool definethis, char *symval * Add a symbol to the symbol table. */ static void -addsym2(bool ignorethis, const char *sym, const char *val) +addsym2(bool ignorethis, const char *symname, const char *val) { - const char *cp = sym; - int symind; + const char *cp = symname; + struct macro *sym, *r; - symind = findsym(&cp); - if (symind < 0) { - if (nsyms >= MAXSYMS) - errx(2, "too many symbols"); - symind = nsyms++; + sym = findsym(&cp); + if (sym == NULL) { + sym = calloc(1, sizeof(*sym)); + sym->ignore = ignorethis; + sym->name = symname; + sym->value = val; + r = RB_INSERT(macrohd, ¯o_tree, sym); + assert(r == NULL); } - ignore[symind] = ignorethis; - symname[symind] = sym; - value[symind] = val; - debugsym("addsym", symind); + debugsym("addsym", sym); } static void -debugsym(const char *why, int symind) +debugsym(const char *why, const struct macro *sym) { - debug("%s %s%c%s", why, symname[symind], - value[symind] ? '=' : ' ', - value[symind] ? value[symind] : "undef"); + debug("%s %s%c%s", why, sym->name, + sym->value ? '=' : ' ', + sym->value ? sym->value : "undef"); } /* _______________________________________________ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"