commit 0ea9cff075677315e44a096b6e4d61a1252b95e9
Author:     Laslo Hunhold <d...@frign.de>
AuthorDate: Sat Jan 8 15:45:39 2022 +0100
Commit:     Laslo Hunhold <d...@frign.de>
CommitDate: Sat Jan 8 16:43:02 2022 +0100

    gen/util: Add properties-handling and clean up old range-list-functions
    
    As already announced we will generate separate data-tables for separate
    properties. To make it all strict-aliasing-compliant, we have one
    properties-struct (which currently only contains one entry
    "break_property") that will however contain more entries, if needed.
    Playing around with void-pointers quickly turns into undefined behaviour,
    which is why it makes no sense to have separate definitions for each
    property-type.
    Each "user" only uses a certain subset of those fields in the struct.
    Given the unused ones will be just zero, they will make no difference
    in the compression.
    
    To avoid code-duplication, the generation of break-property-tables
    is handled by a single function, which is then called in the respective
    generation tool.
    
    Signed-off-by: Laslo Hunhold <d...@frign.de>

diff --git a/gen/properties.c b/gen/properties.c
index df30d7b..db08806 100644
--- a/gen/properties.c
+++ b/gen/properties.c
@@ -12,22 +12,6 @@
 #define FILE_EMOJI    "data/emoji-data.txt"
 #define FILE_GRAPHEME "data/GraphemeBreakProperty.txt"
 
-struct properties {
-       uint_least8_t char_break_property;
-};
-
-struct property_spec {
-       const char *enumname;
-       const char *file;
-       const char *ucdname;
-};
-
-struct property_payload {
-       struct properties *prop;
-       const struct property_spec *spec;
-       uint_least8_t speclen;
-};
-
 static const struct property_spec char_break_property[] = {
        {
                .enumname = "OTHER",
@@ -106,302 +90,14 @@ static const struct property_spec char_break_property[] = 
{
        },
 };
 
-static int
-break_property_callback(char *file, char **field, size_t nfields,
-                        char *comment, void *payload)
-{
-       /* prop always has the length 0x110000 */
-       struct property_payload *p = (struct property_payload *)payload;
-       struct range r;
-       uint_least8_t i;
-       uint_least32_t cp;
-
-       (void)comment;
-
-       if (nfields < 2) {
-               return 1;
-       }
-
-       for (i = 0; i < p->speclen; i++) {
-               /* identify fitting file and identifier */
-               if (p->spec[i].file &&
-                   !strcmp(p->spec[i].file, file) &&
-                   !strcmp(p->spec[i].ucdname, field[1])) {
-                       /* parse range in first field */
-                       if (range_parse(field[0], &r)) {
-                               return 1;
-                       }
-
-                       /* apply to all codepoints in the range */
-                       for (cp = r.lower; cp <= r.upper; cp++) {
-                               if (p->spec == char_break_property) {
-                                       if (p->prop[cp].char_break_property != 
0) {
-                                               fprintf(stderr, 
"break_property_callback: "
-                                                       "Character break 
property overlap.\n");
-                                               exit(1);
-                                       }
-                                       p->prop[cp].char_break_property = i;
-                               } else {
-                                       fprintf(stderr, 
"break_property_callback: "
-                                                       "Unknown 
specification.\n");
-                                       exit(1);
-                               }
-                       }
-
-                       break;
-               }
-       }
-
-       return 0;
-}
-
-struct compressed_properties {
-       size_t *offset;
-       struct properties *data;
-       size_t datalen;
-};
-
-static void
-compress_properties(const struct properties *prop,
-                    struct compressed_properties *comp)
-{
-       uint_least32_t cp, i;
-
-       /* initialization */
-       if (!(comp->offset = malloc((size_t)0x110000 * 
sizeof(*(comp->offset))))) {
-               fprintf(stderr, "malloc: %s\n", strerror(errno));
-               exit(1);
-       }
-       comp->data = NULL;
-       comp->datalen = 0;
-
-       for (cp = 0; cp < 0x110000; cp++) {
-               for (i = 0; i < comp->datalen; i++) {
-                       if (!memcmp(&(prop[cp]), &(comp->data[i]), 
sizeof(*prop))) {
-                               /* found a match! */
-                               comp->offset[cp] = i;
-                               break;
-                       }
-               }
-               if (i == comp->datalen) {
-                       /*
-                        * found no matching properties-struct, so
-                        * add current properties to data and add the
-                        * offset in the offset-table
-                        */
-                       if (!(comp->data = reallocarray(comp->data,
-                                                       ++(comp->datalen),
-                                                       
sizeof(*(comp->data))))) {
-                               fprintf(stderr, "reallocarray: %s\n",
-                                       strerror(errno));
-                               exit(1);
-                       }
-                       memcpy(&(comp->data[comp->datalen - 1]), &(prop[cp]),
-                              sizeof(*prop));
-                       comp->offset[cp] = comp->datalen - 1;
-               }
-       }
-}
-
-struct major_minor_properties {
-       size_t *major;
-       size_t *minor;
-       size_t minorlen;
-};
-
-static double
-get_major_minor_properties(const struct compressed_properties *comp,
-                           struct major_minor_properties *mm)
-{
-       size_t i, j, compression_count = 0;
-
-       /*
-        * we currently have an array comp->offset which maps the
-        * codepoints 0..0x110000 to offsets into comp->data.
-        * To improve cache-locality instead and allow a bit of
-        * compressing, instead of directly mapping a codepoint
-        * 0xAAAABB with comp->offset, we generate two arrays major
-        * and minor such that
-        *    comp->offset(0xAAAABB) == minor[major[0xAAAA] + 0xBB]
-        * This yields a major-array of length 2^16 and a minor array
-        * of variable length depending on how many common subsequences
-        * can be filtered out.
-        */
-
-       /* initialize */
-       if (!(mm->major = malloc((size_t)0x1100 * sizeof(*(mm->major))))) {
-               fprintf(stderr, "malloc: %s\n", strerror(errno));
-               exit(1);
-       }
-       mm->minor = NULL;
-       mm->minorlen = 0;
-
-       printf("#include <stdint.h>\n\n");
-
-       for (i = 0; i < (size_t)0x1100; i++) {
-               /*
-                * we now look at the cp-range (i << 8)..(i << 8 + 0xFF)
-                * and check if its corresponding offset-data already
-                * exists in minor (because then we just point there
-                * and need less storage)
-                */
-               for (j = 0; j + 0xFF < mm->minorlen; j++) {
-                       if (!memcmp(&(comp->offset[i << 8]),
-                                   &(mm->minor[j]),
-                                   sizeof(*(comp->offset)) * 0x100)) {
-                               break;
-                       }
-               }
-               if (j + 0xFF < mm->minorlen) {
-                       /* found an index */
-                       compression_count++;
-                       mm->major[i] = j;
-               } else {
-                       /*
-                        * add "new" sequence to minor and point to it
-                        * in major
-                        */
-                       mm->minorlen += 0x100;
-                       if (!(mm->minor = reallocarray(mm->minor,
-                                                      mm->minorlen,
-                                                      sizeof(*(mm->minor))))) {
-                               fprintf(stderr, "reallocarray: %s\n",
-                                       strerror(errno));
-                               exit(1);
-                       }
-                       memcpy(&(mm->minor[mm->minorlen - 0x100]),
-                              &(comp->offset[i << 8]),
-                              sizeof(*(mm->minor)) * 0x100);
-                       mm->major[i] = mm->minorlen - 0x100;
-               }
-       }
-
-       /* return compression ratio */
-       return (double)compression_count / 0x1100 * 100;
-}
-
-static void
-print_lookup_table(char *name, size_t *data, size_t datalen)
-{
-       char *type;
-       size_t i, maxval;
-
-       for (i = 0, maxval = 0; i < datalen; i++) {
-               if (data[i] > maxval) {
-                       maxval = data[i];
-               }
-       }
-
-       type = (maxval <= UINT_LEAST8_MAX)  ? "uint_least8_t"  :
-              (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" :
-              (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" :
-                                             "uint_least64_t";
-
-       printf("static const %s %s[] = {\n\t", type, name);
-       for (i = 0; i < datalen; i++) {
-               printf("%zu", data[i]);
-               if (i + 1 == datalen) {
-                       printf("\n");
-               } else if ((i + 1) % 8 != 0) {
-                       printf(", ");
-               } else {
-                       printf(",\n\t");
-               }
-
-       }
-       printf("};\n");
-}
-
-static uint_least8_t
-get_value(const void *payload, size_t offset)
-{
-       return ((const struct properties *)payload)[offset].char_break_property;
-}
-
-static void
-print_derived_lookup_table(char *name, size_t *offset, size_t offsetlen,
-                           uint_least8_t (*get_value)(const void *, size_t),
-                           const void *payload)
-{
-       size_t i;
-
-       printf("static const uint_least8_t %s[] = {\n\t", name);
-       for (i = 0; i < offsetlen; i++) {
-               printf("%"PRIuLEAST8, get_value(payload, offset[i]));
-               if (i + 1 == offsetlen) {
-                       printf("\n");
-               } else if ((i + 1) % 8 != 0) {
-                       printf(", ");
-               } else {
-                       printf(",\n\t");
-               }
-
-       }
-       printf("};\n");
-}
-
-static void
-print_enum(const struct property_spec *spec, size_t speclen,
-           const char *enumname, const char *enumprefix)
-{
-       size_t i;
-
-       printf("enum %s {\n", enumname);
-       for (i = 0; i < speclen; i++) {
-               printf("\t%s_%s,\n", enumprefix, spec[i].enumname);
-       }
-       printf("\tNUM_%sS,\n};\n\n", enumprefix);
-}
-
 int
 main(int argc, char *argv[])
 {
-       struct compressed_properties comp;
-       struct major_minor_properties mm;
-       struct property_payload payload;
-       struct properties *prop;
-
        (void)argc;
 
-       /* allocate property buffer for all codepoints */
-       if (!(prop = calloc(0x110000, sizeof(*prop)))) {
-               fprintf(stderr, "calloc: %s\n", strerror(errno));
-               exit(1);
-       }
-
-       /* extract properties */
-       payload.prop = prop;
-       payload.spec = char_break_property;
-       payload.speclen = LEN(char_break_property);
-
-       parse_file_with_callback(FILE_EMOJI, break_property_callback, &payload);
-       parse_file_with_callback(FILE_GRAPHEME, break_property_callback, 
&payload);
-
-       /*
-        * deduplicate by generating an array of offsets into prop where
-        * common data points at the same offset
-        */
-       compress_properties(prop, &comp);
-
-       /* generate major-minor-offset-tables */
-       fprintf(stderr, "%s: compression-ratio: %.2f%%\n", argv[0],
-               get_major_minor_properties(&comp, &mm));
-
-       /* print data */
-       print_enum(char_break_property, LEN(char_break_property),
-                  "char_break_property", "CHAR_BREAK_PROP");
-
-       print_lookup_table("major", mm.major, 0x1100);
-       printf("\n");
-       print_derived_lookup_table("minor", mm.minor, mm.minorlen, get_value,
-                                  comp.data);
-
-       /* free data */
-       free(prop);
-       free(comp.data);
-       free(comp.offset);
-       free(mm.major);
-       free(mm.minor);
+       properties_generate_break_property(char_break_property,
+                                          LEN(char_break_property),
+                                          "char", argv[0]);
 
        return 0;
 }
diff --git a/gen/util.c b/gen/util.c
index c6460de..ea08588 100644
--- a/gen/util.c
+++ b/gen/util.c
@@ -1,5 +1,7 @@
 /* See LICENSE file for copyright and license details. */
+#include <ctype.h>
 #include <errno.h>
+#include <inttypes.h>
 #include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -8,10 +10,28 @@
 
 #include "util.h"
 
-struct property_list_payload
-{
-       struct property *prop;
-       size_t numprops;
+struct range {
+       uint_least32_t lower;
+       uint_least32_t upper;
+};
+
+struct properties_payload {
+       struct properties *prop;
+       const struct property_spec *spec;
+       uint_least8_t speclen;
+       int (*set_value)(struct properties_payload *, uint_least32_t, 
uint_least8_t);
+};
+
+struct properties_compressed {
+       size_t *offset;
+       struct properties *data;
+       size_t datalen;
+};
+
+struct properties_major_minor {
+       size_t *major;
+       size_t *minor;
+       size_t minorlen;
 };
 
 struct segment_test_payload
@@ -63,7 +83,7 @@ hextocp(const char *str, size_t len, uint_least32_t *cp)
        return 0;
 }
 
-int
+static int
 range_parse(const char *str, struct range *range)
 {
        char *p;
@@ -85,28 +105,9 @@ range_parse(const char *str, struct range *range)
        return 0;
 }
 
-static void
-range_list_append(struct range **range, size_t *nranges, const struct range 
*new)
-{
-       if (*nranges > 0 && (*range)[*nranges - 1].upper == new->lower) {
-               /* we can merge with previous entry */
-               (*range)[*nranges - 1].upper = new->upper;
-       } else {
-               /* need to append new entry */
-               if ((*range = realloc(*range, (++(*nranges)) *
-                                     sizeof(**range))) == NULL) {
-                       fprintf(stderr, "range_list_append: realloc: %s.\n",
-                               strerror(errno));
-                       exit(1);
-               }
-               (*range)[*nranges - 1].lower = new->lower;
-               (*range)[*nranges - 1].upper = new->upper;
-       }
-}
-
 void
-parse_file_with_callback(char *fname, int (*callback)(char *, char **,
-                         size_t, char *, void *), void *payload)
+parse_file_with_callback(const char *fname, int (*callback)(const char *,
+                         char **, size_t, char *, void *), void *payload)
 {
        FILE *fp;
        char *line = NULL, **field = NULL, *comment;
@@ -197,12 +198,14 @@ parse_file_with_callback(char *fname, int 
(*callback)(char *, char **,
 }
 
 static int
-property_list_callback(char *fname, char **field, size_t nfields,
-                       char *comment, void *payload)
+properties_callback(const char *file, char **field, size_t nfields,
+                    char *comment, void *payload)
 {
-       struct property *prop = ((struct property_list_payload *)payload)->prop;
+       /* prop always has the length 0x110000 */
+       struct properties_payload *p = (struct properties_payload *)payload;
        struct range r;
-       size_t i, numprops = ((struct property_list_payload 
*)payload)->numprops;
+       uint_least8_t i;
+       uint_least32_t cp;
 
        (void)comment;
 
@@ -210,14 +213,22 @@ property_list_callback(char *fname, char **field, size_t 
nfields,
                return 1;
        }
 
-       for (i = 0; i < numprops; i++) {
-               if (!strcmp(field[1], prop[i].identifier) &&
-                   !strcmp(fname, prop[i].fname)) {
+       for (i = 0; i < p->speclen; i++) {
+               /* identify fitting file and identifier */
+               if (p->spec[i].file &&
+                   !strcmp(p->spec[i].file, file) &&
+                   !strcmp(p->spec[i].ucdname, field[1])) {
+                       /* parse range in first field */
                        if (range_parse(field[0], &r)) {
                                return 1;
                        }
-                       range_list_append(&(prop[i].table),
-                                         &(prop[i].tablelen), &r);
+
+                       /* apply to all codepoints in the range */
+                       for (cp = r.lower; cp <= r.upper; cp++) {
+                               if (p->set_value(payload, cp, i)) {
+                                       exit(1);
+                               }
+                       }
                        break;
                }
        }
@@ -225,73 +236,290 @@ property_list_callback(char *fname, char **field, size_t 
nfields,
        return 0;
 }
 
-void
-property_list_parse(struct property *prop, size_t numprops)
+static void
+properties_compress(const struct properties *prop,
+                    struct properties_compressed *comp)
 {
-       struct property_list_payload pl = {
-               .prop = prop,
-               .numprops = numprops
-       };
-       size_t i;
+       uint_least32_t cp, i;
 
-       /* make sure to parse each file only once */
-       for (i = 0; i < numprops; i++) {
-               if (prop[i].tablelen > 0) {
-                       /* property's file was already parsed */
-                       continue;
+       /* initialization */
+       if (!(comp->offset = malloc((size_t)0x110000 * 
sizeof(*(comp->offset))))) {
+               fprintf(stderr, "malloc: %s\n", strerror(errno));
+               exit(1);
+       }
+       comp->data = NULL;
+       comp->datalen = 0;
+
+       for (cp = 0; cp < 0x110000; cp++) {
+               for (i = 0; i < comp->datalen; i++) {
+                       if (!memcmp(&(prop[cp]), &(comp->data[i]), 
sizeof(*prop))) {
+                               /* found a match! */
+                               comp->offset[cp] = i;
+                               break;
+                       }
+               }
+               if (i == comp->datalen) {
+                       /*
+                        * found no matching properties-struct, so
+                        * add current properties to data and add the
+                        * offset in the offset-table
+                        */
+                       if (!(comp->data = reallocarray(comp->data,
+                                                       ++(comp->datalen),
+                                                       
sizeof(*(comp->data))))) {
+                               fprintf(stderr, "reallocarray: %s\n",
+                                       strerror(errno));
+                               exit(1);
+                       }
+                       memcpy(&(comp->data[comp->datalen - 1]), &(prop[cp]),
+                              sizeof(*prop));
+                       comp->offset[cp] = comp->datalen - 1;
                }
+       }
+}
 
-               parse_file_with_callback(prop[i].fname,
-                                        property_list_callback, &pl);
+static double
+properties_get_major_minor(const struct properties_compressed *comp,
+                           struct properties_major_minor *mm)
+{
+       size_t i, j, compression_count = 0;
+
+       /*
+        * we currently have an array comp->offset which maps the
+        * codepoints 0..0x110000 to offsets into comp->data.
+        * To improve cache-locality instead and allow a bit of
+        * compressing, instead of directly mapping a codepoint
+        * 0xAAAABB with comp->offset, we generate two arrays major
+        * and minor such that
+        *    comp->offset(0xAAAABB) == minor[major[0xAAAA] + 0xBB]
+        * This yields a major-array of length 2^16 and a minor array
+        * of variable length depending on how many common subsequences
+        * can be filtered out.
+        */
+
+       /* initialize */
+       if (!(mm->major = malloc((size_t)0x1100 * sizeof(*(mm->major))))) {
+               fprintf(stderr, "malloc: %s\n", strerror(errno));
+               exit(1);
+       }
+       mm->minor = NULL;
+       mm->minorlen = 0;
+
+       printf("#include <stdint.h>\n\n");
+
+       for (i = 0; i < (size_t)0x1100; i++) {
+               /*
+                * we now look at the cp-range (i << 8)..(i << 8 + 0xFF)
+                * and check if its corresponding offset-data already
+                * exists in minor (because then we just point there
+                * and need less storage)
+                */
+               for (j = 0; j + 0xFF < mm->minorlen; j++) {
+                       if (!memcmp(&(comp->offset[i << 8]),
+                                   &(mm->minor[j]),
+                                   sizeof(*(comp->offset)) * 0x100)) {
+                               break;
+                       }
+               }
+               if (j + 0xFF < mm->minorlen) {
+                       /* found an index */
+                       compression_count++;
+                       mm->major[i] = j;
+               } else {
+                       /*
+                        * add "new" sequence to minor and point to it
+                        * in major
+                        */
+                       mm->minorlen += 0x100;
+                       if (!(mm->minor = reallocarray(mm->minor,
+                                                      mm->minorlen,
+                                                      sizeof(*(mm->minor))))) {
+                               fprintf(stderr, "reallocarray: %s\n",
+                                       strerror(errno));
+                               exit(1);
+                       }
+                       memcpy(&(mm->minor[mm->minorlen - 0x100]),
+                              &(comp->offset[i << 8]),
+                              sizeof(*(mm->minor)) * 0x100);
+                       mm->major[i] = mm->minorlen - 0x100;
+               }
        }
+
+       /* return compression ratio */
+       return (double)compression_count / 0x1100 * 100;
 }
 
-void
-property_list_print(const struct property *prop, size_t numprops,
-                    const char *identifier, const char *progname)
+static void
+properties_print_lookup_table(char *name, size_t *data, size_t datalen)
 {
-       size_t i, j;
+       char *type;
+       size_t i, maxval;
 
-       printf("/* Automatically generated by %s */\n"
-              "#include <stdint.h>\n\n#include \"../gen/types.h\"\n\n",
-              progname);
+       for (i = 0, maxval = 0; i < datalen; i++) {
+               if (data[i] > maxval) {
+                       maxval = data[i];
+               }
+       }
+
+       type = (maxval <= UINT_LEAST8_MAX)  ? "uint_least8_t"  :
+              (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" :
+              (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" :
+                                             "uint_least64_t";
+
+       printf("static const %s %s[] = {\n\t", type, name);
+       for (i = 0; i < datalen; i++) {
+               printf("%zu", data[i]);
+               if (i + 1 == datalen) {
+                       printf("\n");
+               } else if ((i + 1) % 8 != 0) {
+                       printf(", ");
+               } else {
+                       printf(",\n\t");
+               }
 
-       /* print enum */
-       printf("enum %s {\n", identifier);
-       for (i = 0; i < numprops; i++) {
-               printf("\t%s,\n", prop[i].enumname);
        }
-       printf("};\n\n");
-
-       /* print table */
-       printf("static const struct range_list %s[] = {\n", identifier);
-       for (i = 0; i < numprops; i++) {
-               printf("\t[%s] = {\n\t\t.data = (struct range[]){\n",
-                      prop[i].enumname);
-               for (j = 0; j < prop[i].tablelen; j++) {
-                       printf("\t\t\t{ UINT32_C(0x%06X), UINT32_C(0x%06X) 
},\n",
-                              prop[i].table[j].lower,
-                              prop[i].table[j].upper);
+       printf("};\n");
+}
+
+static void
+properties_print_derived_lookup_table(char *name, size_t *offset, size_t 
offsetlen,
+                                      uint_least8_t (*get_value)(const struct 
properties *,
+                                      size_t), const void *payload)
+{
+       size_t i;
+
+       printf("static const uint_least8_t %s[] = {\n\t", name);
+       for (i = 0; i < offsetlen; i++) {
+               printf("%"PRIuLEAST8, get_value(payload, offset[i]));
+               if (i + 1 == offsetlen) {
+                       printf("\n");
+               } else if ((i + 1) % 8 != 0) {
+                       printf(", ");
+               } else {
+                       printf(",\n\t");
                }
-               printf("\t\t},\n\t\t.len = %zu,\n\t},\n", prop[i].tablelen);
+
        }
        printf("};\n");
 }
 
-void
-property_list_free(struct property *prop, size_t numprops)
+static void
+properties_print_enum(const struct property_spec *spec, size_t speclen,
+                      const char *enumname, const char *enumprefix)
 {
        size_t i;
 
-       for (i = 0; i < numprops; i++) {
-               free(prop[i].table);
-               prop[i].table = NULL;
-               prop[i].tablelen = 0;
+       printf("enum %s {\n", enumname);
+       for (i = 0; i < speclen; i++) {
+               printf("\t%s_%s,\n", enumprefix, spec[i].enumname);
+       }
+       printf("\tNUM_%sS,\n};\n\n", enumprefix);
+}
+
+static int
+set_value_bp(struct properties_payload *payload, uint_least32_t cp,
+             uint_least8_t value)
+{
+       if (payload->prop[cp].break_property != 0) {
+               fprintf(stderr, "set_value_bp: "
+                       "Character break property overlap.\n");
+               return 1;
+       }
+       payload->prop[cp].break_property = value;
+
+       return 0;
+}
+
+static uint_least8_t
+get_value_bp(const struct properties *prop, size_t offset)
+{
+       return prop[offset].break_property;
+}
+
+void
+properties_generate_break_property(const struct property_spec *spec,
+                                   uint_least8_t speclen, const char *prefix,
+                                  const char *argv0)
+{
+       struct properties_compressed comp;
+       struct properties_major_minor mm;
+       struct properties_payload payload;
+       struct properties *prop;
+       size_t i, j, prefixlen = strlen(prefix);
+       char buf1[64], prefix_uc[64], buf2[64], buf3[64], buf4[64];
+
+       /* allocate property buffer for all 0x110000 codepoints */
+       if (!(prop = calloc(0x110000, sizeof(*prop)))) {
+               fprintf(stderr, "calloc: %s\n", strerror(errno));
+               exit(1);
+       }
+
+       /* generate data */
+       payload.prop = prop;
+       payload.spec = spec;
+       payload.speclen = speclen;
+       payload.set_value = set_value_bp;
+
+       /* parse each file exactly once and ignore NULL-fields */
+       for (i = 0; i < speclen; i++) {
+               for (j = 0; j < i; j++) {
+                       if (spec[i].file && spec[j].file &&
+                           !strcmp(spec[i].file, spec[j].file)) {
+                               /* file has already been parsed */
+                               break;
+                       }
+               }
+               if (i == j && spec[i].file) {
+                       /* file has not been processed yet */
+                       parse_file_with_callback(spec[i].file,
+                                                properties_callback,
+                                                &payload);
+               }
+       }
+
+       /* compress data */
+       properties_compress(prop, &comp);
+
+       fprintf(stderr, "%s: compression-ratio: %.2f%%\n", argv0,
+               properties_get_major_minor(&comp, &mm));
+
+       /* prepare names */
+       if ((size_t)snprintf(buf1, LEN(buf1), "%s_break_property", prefix) >= 
LEN(buf1)) {
+               fprintf(stderr, "snprintf: String truncated.\n");
+               exit(1);
        }
+       if (LEN(prefix_uc) + 1 < prefixlen) {
+               fprintf(stderr, "snprintf: Buffer too small.\n");
+               exit(1);
+       }
+       for (i = 0; i < prefixlen; i++) {
+               prefix_uc[i] = (char)toupper(prefix[i]);
+       }
+       prefix_uc[prefixlen] = '\0';
+       if ((size_t)snprintf(buf2, LEN(buf2), "%s_BREAK_PROP", prefix_uc) >= 
LEN(buf2) ||
+           (size_t)snprintf(buf3, LEN(buf3), "%s_break_major", prefix) >= 
LEN(buf3)   ||
+           (size_t)snprintf(buf4, LEN(buf4), "%s_break_minor", prefix) >= 
LEN(buf4)) {
+               fprintf(stderr, "snprintf: String truncated.\n");
+               exit(1);
+       }
+
+       /* print data */
+       properties_print_enum(spec, speclen, buf1, buf2);
+       properties_print_lookup_table(buf3, mm.major, 0x1100);
+       printf("\n");
+       properties_print_derived_lookup_table(buf4, mm.minor, mm.minorlen,
+                                             get_value_bp, comp.data);
+
+       /* free data */
+       free(prop);
+       free(comp.data);
+       free(comp.offset);
+       free(mm.major);
+       free(mm.minor);
 }
 
 static int
-segment_test_callback(char *fname, char **field, size_t nfields,
+segment_test_callback(const char *fname, char **field, size_t nfields,
                       char *comment, void *payload)
 {
        struct segment_test *t,
diff --git a/gen/util.h b/gen/util.h
index 5b701ef..033c572 100644
--- a/gen/util.h
+++ b/gen/util.h
@@ -7,17 +7,14 @@
 
 #define LEN(x) (sizeof (x) / sizeof *(x))
 
-struct range {
-       uint_least32_t lower;
-       uint_least32_t upper;
+struct property_spec {
+       const char *enumname;
+       const char *file;
+       const char *ucdname;
 };
 
-struct property {
-       char         *enumname;
-       char         *identifier;
-       char         *fname;
-       struct range *table;
-       size_t        tablelen;
+struct properties {
+       uint_least8_t break_property;
 };
 
 struct segment_test {
@@ -28,15 +25,12 @@ struct segment_test {
        char *descr;
 };
 
-int range_parse(const char *, struct range *);
+void parse_file_with_callback(const char *, int (*callback)(const char *,
+                              char **, size_t, char *, void *), void *payload);
 
-void parse_file_with_callback(char *, int (*callback)(char *, char **,
-                              size_t, char *, void *), void *payload);
-
-void property_list_parse(struct property *, size_t);
-void property_list_print(const struct property *, size_t, const char *,
-                         const char *);
-void property_list_free(struct property *, size_t);
+void properties_generate_break_property(const struct property_spec *,
+                                        uint_least8_t, const char *,
+                                        const char *);
 
 void segment_test_list_parse(char *, struct segment_test **, size_t *);
 void segment_test_list_print(const struct segment_test *, size_t,
diff --git a/src/character.c b/src/character.c
index 239463d..462e572 100644
--- a/src/character.c
+++ b/src/character.c
@@ -106,7 +106,8 @@ static enum char_break_property
 get_break_prop(uint_least32_t cp)
 {
        if (likely(cp <= 0x10FFFF)) {
-               return (enum char_break_property)minor[major[cp >> 8] + (cp & 
0xff)];
+               return (enum char_break_property)
+                      char_break_minor[char_break_major[cp >> 8] + (cp & 
0xff)];
        } else {
                return CHAR_BREAK_PROP_OTHER;
        }

Reply via email to