Author: cem
Date: Tue Nov 19 04:30:23 2019
New Revision: 354847
URL: https://svnweb.freebsd.org/changeset/base/354847

Log:
  unifdef(1): Improve worst-case bound on symbol resolution
  
  Use RB_TREE to make some algorithms O(lg N) and O(N lg N) instead of O(N)
  and O(N^2).  Because N is typically small and the former linear array also has
  great constant factors (as a property of CPU caching), this doesn't provide
  material benefit most or all of the time.
  
  While here, remove arbitrarily limit on number of macros understood.

Modified:
  head/usr.bin/unifdef/unifdef.c

Modified: head/usr.bin/unifdef/unifdef.c
==============================================================================
--- head/usr.bin/unifdef/unifdef.c      Tue Nov 19 04:23:57 2019        
(r354846)
+++ head/usr.bin/unifdef/unifdef.c      Tue Nov 19 04:30:23 2019        
(r354847)
@@ -45,8 +45,11 @@
  *   it possible to handle all "dodgy" directives correctly.
  */
 
+#include <sys/param.h>
 #include <sys/stat.h>
+#include <sys/tree.h>
 
+#include <assert.h>
 #include <ctype.h>
 #include <err.h>
 #include <stdarg.h>
@@ -149,7 +152,6 @@ static char const * const linestate_name[] = {
  */
 #define        MAXDEPTH        64                      /* maximum #if nesting 
*/
 #define        MAXLINE         4096                    /* maximum length of 
line */
-#define        MAXSYMS         16384                   /* maximum number of 
symbols */
 
 /*
  * Sometimes when editing a keyword the replacement text is longer, so
@@ -158,6 +160,26 @@ static char const * const linestate_name[] = {
 #define        EDITSLOP        10
 
 /*
+ * C17/18 allow 63 characters per macro name, but up to 127 arbitrarily large
+ * parameters.
+ */
+struct macro {
+       RB_ENTRY(macro) entry;
+       const char      *name;
+       const char      *value;
+       bool            ignore;         /* -iDsym or -iUsym */
+};
+
+static int
+macro_cmp(struct macro *a, struct macro *b)
+{
+       return (strcmp(a->name, b->name));
+}
+
+static RB_HEAD(macrohd, macro) macro_tree = RB_INITIALIZER(&macro_tree);
+RB_GENERATE_STATIC(macrohd, macro, entry, macro_cmp);
+
+/*
  * Globals.
  */
 
@@ -174,11 +196,6 @@ static bool             symlist;           /* -s: output 
symbol
 static bool             symdepth;              /* -S: output symbol depth */
 static bool             text;                  /* -t: this is a text file */
 
-static const char      *symname[MAXSYMS];      /* symbol name */
-static const char      *value[MAXSYMS];                /* -Dsym=value */
-static bool             ignore[MAXSYMS];       /* -iDsym or -iUsym */
-static int              nsyms;                 /* number of symbols */
-
 static FILE            *input;                 /* input file pointer */
 static const char      *filename;              /* input file name */
 static int              linenum;               /* current line number */
@@ -227,12 +244,12 @@ static char            *astrcat(const char *, const ch
 static void             cleantemp(void);
 static void             closeio(void);
 static void             debug(const char *, ...);
-static void             debugsym(const char *, int);
+static void             debugsym(const char *, const struct macro *);
 static bool             defundef(void);
 static void             defundefile(const char *);
 static void             done(void);
 static void             error(const char *);
-static int              findsym(const char **);
+static struct macro    *findsym(const char **);
 static void             flushline(bool);
 static void             hashline(void);
 static void             help(void);
@@ -807,7 +824,7 @@ static Linetype
 parseline(void)
 {
        const char *cp;
-       int cursym;
+       struct macro *cursym;
        Linetype retval;
        Comment_state wascomment;
 
@@ -829,15 +846,15 @@ parseline(void)
        if ((cp = matchsym("ifdef", keyword)) != NULL ||
            (cp = matchsym("ifndef", keyword)) != NULL) {
                cp = skipcomment(cp);
-               if ((cursym = findsym(&cp)) < 0)
+               if ((cursym = findsym(&cp)) == NULL)
                        retval = LT_IF;
                else {
                        retval = (keyword[2] == 'n')
                            ? LT_FALSE : LT_TRUE;
-                       if (value[cursym] == NULL)
+                       if (cursym->value == NULL)
                                retval = (retval == LT_TRUE)
                                    ? LT_FALSE : LT_TRUE;
-                       if (ignore[cursym])
+                       if (cursym->ignore)
                                retval = (retval == LT_TRUE)
                                    ? LT_TRUEI : LT_FALSEI;
                }
@@ -1037,7 +1054,7 @@ eval_unary(const struct ops *ops, long *valp, const ch
 {
        const char *cp;
        char *ep;
-       int sym;
+       struct macro *sym;
        bool defparen;
        Linetype lt;
 
@@ -1102,27 +1119,27 @@ eval_unary(const struct ops *ops, long *valp, const ch
                        debug("eval%d defined missing ')'", prec(ops));
                        return (LT_ERROR);
                }
-               if (sym < 0) {
+               if (sym == NULL) {
                        debug("eval%d defined unknown", prec(ops));
                        lt = LT_IF;
                } else {
-                       debug("eval%d defined %s", prec(ops), symname[sym]);
-                       *valp = (value[sym] != NULL);
+                       debug("eval%d defined %s", prec(ops), sym->name);
+                       *valp = (sym->value != NULL);
                        lt = *valp ? LT_TRUE : LT_FALSE;
                }
                constexpr = false;
        } else if (!endsym(*cp)) {
                debug("eval%d symbol", prec(ops));
                sym = findsym(&cp);
-               if (sym < 0) {
+               if (sym == NULL) {
                        lt = LT_IF;
                        cp = skipargs(cp);
-               } else if (value[sym] == NULL) {
+               } else if (sym->value == NULL) {
                        *valp = 0;
                        lt = LT_FALSE;
                } else {
-                       *valp = strtol(value[sym], &ep, 0);
-                       if (*ep != '\0' || ep == value[sym])
+                       *valp = strtol(sym->value, &ep, 0);
+                       if (*ep != '\0' || ep == sym->value)
                                return (LT_ERROR);
                        lt = *valp ? LT_TRUE : LT_FALSE;
                        cp = skipargs(cp);
@@ -1439,17 +1456,17 @@ matchsym(const char *s, const char *t)
  * Look for the symbol in the symbol table. If it is found, we return
  * the symbol table index, else we return -1.
  */
-static int
+static struct macro *
 findsym(const char **strp)
 {
        const char *str;
-       int symind;
+       struct macro key, *res;
 
        str = *strp;
        *strp = skipsym(str);
        if (symlist) {
                if (*strp == str)
-                       return (-1);
+                       return (NULL);
                if (symdepth && firstsym)
                        printf("%s%3d", zerosyms ? "" : "\n", depth);
                firstsym = zerosyms = false;
@@ -1458,15 +1475,14 @@ findsym(const char **strp)
                       (int)(*strp-str), str,
                       symdepth ? "" : "\n");
                /* we don't care about the value of the symbol */
-               return (0);
+               return (NULL);
        }
-       for (symind = 0; symind < nsyms; ++symind) {
-               if (matchsym(symname[symind], str) != NULL) {
-                       debugsym("findsym", symind);
-                       return (symind);
-               }
-       }
-       return (-1);
+
+       key.name = str;
+       res = RB_FIND(macrohd, &macro_tree, &key);
+       if (res != NULL)
+               debugsym("findsym", res);
+       return (res);
 }
 
 /*
@@ -1476,22 +1492,23 @@ static void
 indirectsym(void)
 {
        const char *cp;
-       int changed, sym, ind;
+       int changed;
+       struct macro *sym, *ind;
 
        do {
                changed = 0;
-               for (sym = 0; sym < nsyms; ++sym) {
-                       if (value[sym] == NULL)
+               RB_FOREACH(sym, macrohd, &macro_tree) {
+                       if (sym->value == NULL)
                                continue;
-                       cp = value[sym];
+                       cp = sym->value;
                        ind = findsym(&cp);
-                       if (ind == -1 || ind == sym ||
+                       if (ind == NULL || ind == sym ||
                            *cp != '\0' ||
-                           value[ind] == NULL ||
-                           value[ind] == value[sym])
+                           ind->value == NULL ||
+                           ind->value == sym->value)
                                continue;
                        debugsym("indir...", sym);
-                       value[sym] = value[ind];
+                       sym->value = ind->value;
                        debugsym("...ectsym", sym);
                        changed++;
                }
@@ -1523,29 +1540,29 @@ addsym1(bool ignorethis, bool definethis, char *symval
  * Add a symbol to the symbol table.
  */
 static void
-addsym2(bool ignorethis, const char *sym, const char *val)
+addsym2(bool ignorethis, const char *symname, const char *val)
 {
-       const char *cp = sym;
-       int symind;
+       const char *cp = symname;
+       struct macro *sym, *r;
 
-       symind = findsym(&cp);
-       if (symind < 0) {
-               if (nsyms >= MAXSYMS)
-                       errx(2, "too many symbols");
-               symind = nsyms++;
+       sym = findsym(&cp);
+       if (sym == NULL) {
+               sym = calloc(1, sizeof(*sym));
+               sym->ignore = ignorethis;
+               sym->name = symname;
+               sym->value = val;
+               r = RB_INSERT(macrohd, &macro_tree, sym);
+               assert(r == NULL);
        }
-       ignore[symind] = ignorethis;
-       symname[symind] = sym;
-       value[symind] = val;
-       debugsym("addsym", symind);
+       debugsym("addsym", sym);
 }
 
 static void
-debugsym(const char *why, int symind)
+debugsym(const char *why, const struct macro *sym)
 {
-       debug("%s %s%c%s", why, symname[symind],
-           value[symind] ? '=' : ' ',
-           value[symind] ? value[symind] : "undef");
+       debug("%s %s%c%s", why, sym->name,
+           sym->value ? '=' : ' ',
+           sym->value ? sym->value : "undef");
 }
 
 /*
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to