Author: emaste
Date: Mon Oct 10 19:09:35 2016
New Revision: 307003
URL: https://svnweb.freebsd.org/changeset/base/307003

Log:
  makewhatis: make output reproducible
  
  The mandoc search database generation uses each page's inode number as
  a hash key to index hard linked pages only once.  However, it also
  processed the pages ordered by hash key resulting in effectively non-
  deterministic output.
  
  Instead:
  
  1) provide fts_open() with a comparison function to process directories
     and files in a deterministic order
  2) in addition to the existing hash, insert pages into a linked list
     which will be sorted (by virtue of 1)
  3) iterate over pages by the list in 2, instead of hash order
  
  I will work on upstreaming this change.
  
  Reviewed by:  bapt
  MFC after:    1 month
  Sponsored by: The FreeBSD Foundation
  Differential Revision:        https://reviews.freebsd.org/D8213

Modified:
  head/contrib/mdocml/mandocdb.c

Modified: head/contrib/mdocml/mandocdb.c
==============================================================================
--- head/contrib/mdocml/mandocdb.c      Mon Oct 10 18:36:26 2016        
(r307002)
+++ head/contrib/mdocml/mandocdb.c      Mon Oct 10 19:09:35 2016        
(r307003)
@@ -103,6 +103,7 @@ struct      mpage {
        char            *arch;    /* architecture from file content */
        char            *title;   /* title from file content */
        char            *desc;    /* description from file content */
+       struct mpage    *next;    /* singly linked list */
        struct mlink    *mlinks;  /* singly linked list */
        int              form;    /* format from file content */
        int              name_head_done;
@@ -146,6 +147,7 @@ static      void     dbadd_mlink_name(const stru
 static int      dbopen(int);
 static void     dbprune(void);
 static void     filescan(const char *);
+static int      fts_compare(const FTSENT *const *, const FTSENT *const *);
 static void     mlink_add(struct mlink *, const struct stat *);
 static void     mlink_check(struct mpage *, struct mlink *);
 static void     mlink_free(struct mlink *);
@@ -204,6 +206,7 @@ static      struct ohash     strings; /* table o
 static sqlite3         *db = NULL; /* current database */
 static sqlite3_stmt    *stmts[STMT__MAX]; /* current statements */
 static uint64_t         name_mask;
+static struct mpage    *mpage_head;
 
 static const struct mdoc_handler mdocs[MDOC_MAX] = {
        { NULL, 0 },  /* Ap */
@@ -571,6 +574,20 @@ usage:
        return (int)MANDOCLEVEL_BADARG;
 }
 
+static int
+fts_compare(const FTSENT *const *a, const FTSENT *const *b)
+{
+
+       /*
+        * The mpage list is processed in the opposite order to which pages are
+        * added, so traverse the hierarchy in reverse alpha order, resulting
+        * in database inserts in alpha order. This is not required for correct
+        * operation, but is helpful when inspecting the database during
+        * development.
+        */
+       return -strcmp((*a)->fts_name, (*b)->fts_name);
+}
+
 /*
  * Scan a directory tree rooted at "basedir" for manpages.
  * We use fts(), scanning directory parts along the way for clues to our
@@ -600,8 +617,8 @@ treescan(void)
        argv[0] = ".";
        argv[1] = (char *)NULL;
 
-       f = fts_open((char * const *)argv,
-           FTS_PHYSICAL | FTS_NOCHDIR, NULL);
+       f = fts_open((char * const *)argv, FTS_PHYSICAL | FTS_NOCHDIR,
+           fts_compare);
        if (f == NULL) {
                exitcode = (int)MANDOCLEVEL_SYSERR;
                say("", "&fts_open");
@@ -966,6 +983,8 @@ mlink_add(struct mlink *mlink, const str
                mpage = mandoc_calloc(1, sizeof(struct mpage));
                mpage->inodev.st_ino = inodev.st_ino;
                mpage->inodev.st_dev = inodev.st_dev;
+               mpage->next = mpage_head;
+               mpage_head = mpage;
                ohash_insert(&mpages, slot, mpage);
        } else
                mlink->next = mpage->mlinks;
@@ -989,20 +1008,18 @@ mpages_free(void)
 {
        struct mpage    *mpage;
        struct mlink    *mlink;
-       unsigned int     slot;
 
-       mpage = ohash_first(&mpages, &slot);
-       while (NULL != mpage) {
+       while (NULL != (mpage = mpage_head)) {
                while (NULL != (mlink = mpage->mlinks)) {
                        mpage->mlinks = mlink->next;
                        mlink_free(mlink);
                }
+               mpage_head = mpage->next;
                free(mpage->sec);
                free(mpage->arch);
                free(mpage->title);
                free(mpage->desc);
                free(mpage);
-               mpage = ohash_next(&mpages, &slot);
        }
 }
 
@@ -1123,16 +1140,14 @@ mpages_merge(struct mparse *mp)
        char                    *sodest;
        char                    *cp;
        int                      fd;
-       unsigned int             pslot;
 
        if ( ! nodb)
                SQL_EXEC("BEGIN TRANSACTION");
 
-       mpage = ohash_first(&mpages, &pslot);
-       while (mpage != NULL) {
+       for (mpage = mpage_head; mpage != NULL; mpage = mpage->next) {
                mlinks_undupe(mpage);
                if ((mlink = mpage->mlinks) == NULL) {
-                       mpage = ohash_next(&mpages, &pslot);
+                       mpage = mpage->next;
                        continue;
                }
 
@@ -1256,7 +1271,6 @@ mpages_merge(struct mparse *mp)
 nextpage:
                ohash_delete(&strings);
                ohash_delete(&names);
-               mpage = ohash_next(&mpages, &pslot);
        }
 
        if (0 == nodb)
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to