I would prefer the “libgrapheme_” prefix, so that it is obvious that the functions belong to the libgrapheme library.
Regards, Mattias Andrée On Sat, 18 Dec 2021 19:54:57 +0100 <g...@suckless.org> wrote: > commit 4483b44e8444d4a57bcbb31dbe9eac3e6b80c1ad > Author: Laslo Hunhold <d...@frign.de> > AuthorDate: Sat Dec 18 19:49:34 2021 +0100 > Commit: Laslo Hunhold <d...@frign.de> > CommitDate: Sat Dec 18 19:49:34 2021 +0100 > > Rename API functions to improve readability > > I thought about how to address the fact that "isbreak" and "nextbreak" > kind of breaks the snake case, but "grapheme_character_is_break" sounds > convoluted. > > The solution is to loosen the naming a bit and not require the > "component" (in this case "character") to immediately follow the > "grapheme_" prefix. Instead, the "is" and "next" keywords are brought > to the front, which improves the readability substantially and the > functions are well-grouped into "is" and "next" functions. > > Analogously, it makes more sense to "decode_utf8" than "utf8_decode", > so this was changed as well, including going back to > GRAPHEME_INVALID_CODEPOINT, which just rolls off the tongue better. > > Signed-off-by: Laslo Hunhold <d...@frign.de> > > diff --git a/Makefile b/Makefile > index 8f6d694..cdda874 100644 > --- a/Makefile > +++ b/Makefile > @@ -25,10 +25,10 @@ TEST =\ > test/utf8-encode\ > > MAN3 =\ > - man/lg_grapheme_isbreak.3\ > - man/lg_grapheme_nextbreak.3\ > - man/lg_utf8_decode.3\ > - man/lg_utf8_encode.3\ > + man/grapheme_decode_utf8.3\ > + man/grapheme_encode_utf8.3\ > + man/grapheme_is_character_break.3\ > + man/grapheme_next_character_break.3\ > > MAN7 = man/libgrapheme.7 > > diff --git a/grapheme.h b/grapheme.h > index b9c381c..ea8a02d 100644 > --- a/grapheme.h > +++ b/grapheme.h > @@ -17,13 +17,13 @@ typedef struct grapheme_internal_segmentation_state { > uint_least16_t flags; > } GRAPHEME_STATE; > > -#define GRAPHEME_CODEPOINT_INVALID UINT32_C(0xFFFD) > +#define GRAPHEME_INVALID_CODEPOINT UINT32_C(0xFFFD) > > -size_t grapheme_character_nextbreak(const char *); > +size_t grapheme_next_character_break(const char *); > > -bool grapheme_character_isbreak(uint_least32_t, uint_least32_t, > GRAPHEME_STATE *); > +bool grapheme_is_character_break(uint_least32_t, uint_least32_t, > GRAPHEME_STATE *); > > -size_t grapheme_utf8_decode(const char *, size_t, uint_least32_t *); > -size_t grapheme_utf8_encode(uint_least32_t, char *, size_t); > +size_t grapheme_decode_utf8(const char *, size_t, uint_least32_t *); > +size_t grapheme_encode_utf8(uint_least32_t, char *, size_t); > > #endif /* GRAPHEME_H */ > diff --git a/man/grapheme_utf8_decode.3 b/man/grapheme_decode_utf8.3 > similarity index 84% > rename from man/grapheme_utf8_decode.3 > rename to man/grapheme_decode_utf8.3 > index 6a1f5c2..26e3afb 100644 > --- a/man/grapheme_utf8_decode.3 > +++ b/man/grapheme_decode_utf8.3 > @@ -1,16 +1,16 @@ > .Dd 2021-12-17 > -.Dt GRAPHEME_UTF8_DECODE 3 > +.Dt GRAPHEME_DECODE_UTF8 3 > .Os suckless.org > .Sh NAME > -.Nm grapheme_utf8_decode > +.Nm grapheme_decode_utf8 > .Nd decode first codepoint in UTF-8-encoded string > .Sh SYNOPSIS > .In grapheme.h > .Ft size_t > -.Fn grapheme_utf8_decode "const char *str" "size_t len" "uint_least32_t *cp" > +.Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp" > .Sh DESCRIPTION > The > -.Fn grapheme_utf8_decode > +.Fn grapheme_decode_utf8 > function decodes the next codepoint in the UTF-8-encoded string > .Va str > of length > @@ -18,7 +18,7 @@ of length > If the UTF-8-sequence is invalid (overlong encoding, unexpected byte, > string ends unexpectedly, empty string, etc.) the decoding is stopped > at the last processed byte and the decoded codepoint set to > -.Dv GRAPHEME_CODEPOINT_INVALID. > +.Dv GRAPHEME_INVALID_CODEPOINT. > .Pp > If > .Va cp > @@ -39,7 +39,7 @@ is 0 (see > for an example). > .Sh RETURN VALUES > The > -.Fn grapheme_utf8_decode > +.Fn grapheme_decode_utf8 > function returns the number of processed bytes and 0 if > .Va str > is > @@ -65,7 +65,7 @@ print_cps(const char *str, size_t len) > uint_least32_t cp; > > for (off = 0; off < len; off += ret) { > - if ((ret = grapheme_utf8_decode(str + off, > + if ((ret = grapheme_decode_utf8(str + off, > len - off, &cp)) > (len - off)) > { > /* > * string ended unexpectedly in the middle of a > @@ -86,7 +86,7 @@ print_cps_nul_terminated(const char *str) > size_t ret, off; > uint_least32_t cp; > > - for (off = 0; (ret = grapheme_utf8_decode(str + off, > + for (off = 0; (ret = grapheme_decode_utf8(str + off, > (size_t)-1, &cp)) > 0 && > cp != 0; off += ret) { > printf("%"PRIxLEAST32"\\n", cp); > @@ -94,8 +94,8 @@ print_cps_nul_terminated(const char *str) > } > .Ed > .Sh SEE ALSO > -.Xr grapheme_utf8_encode 3 , > -.Xr grapheme_character_isbreak 3 , > +.Xr grapheme_encode_utf8 3 , > +.Xr grapheme_is_character_break 3 , > .Xr libgrapheme 7 > .Sh AUTHORS > .An Laslo Hunhold Aq Mt d...@frign.de > diff --git a/man/grapheme_utf8_encode.3 b/man/grapheme_encode_utf8.3 > similarity index 82% > rename from man/grapheme_utf8_encode.3 > rename to man/grapheme_encode_utf8.3 > index c56f2ca..42dbbe5 100644 > --- a/man/grapheme_utf8_encode.3 > +++ b/man/grapheme_encode_utf8.3 > @@ -1,16 +1,16 @@ > .Dd 2021-12-17 > -.Dt GRAPHEME_UTF8_ENCODE 3 > +.Dt GRAPHEME_ENCODE_UTF8 3 > .Os suckless.org > .Sh NAME > -.Nm grapheme_utf8_encode > +.Nm grapheme_encode_utf8 > .Nd encode codepoint into UTF-8 string > .Sh SYNOPSIS > .In grapheme.h > .Ft size_t > -.Fn grapheme_utf8_encode "uint_least32_t cp" "char *" "size_t" > +.Fn grapheme_encode_utf8 "uint_least32_t cp" "char *" "size_t" > .Sh DESCRIPTION > The > -.Fn grapheme_utf8_encode > +.Fn grapheme_encode_utf8 > function encodes the codepoint > .Va cp > into a UTF-8-string. > @@ -24,7 +24,7 @@ is large enough it writes the UTF-8-string to the memory > pointed to by > .Va str . > .Sh RETURN VALUES > The > -.Fn grapheme_utf8_encode > +.Fn grapheme_encode_utf8 > function returns the length (in bytes) of the UTF-8-string resulting > from encoding > .Va cp . > @@ -45,7 +45,7 @@ cps_to_utf8(const uint_least32_t *cp, size_t cplen, char > *str, size_t len) > size_t i, off, ret; > > for (i = 0, off = 0; i < cplen; i++, off += ret) { > - if ((ret = grapheme_utf8_encode(cp[i], str + off, > + if ((ret = grapheme_encode_utf8(cp[i], str + off, > len - off)) > (len - off)) { > /* buffer too small */ > break; > @@ -61,7 +61,7 @@ cps_bytelen(const uint_least32_t *cp, size_t cplen) > size_t i, len; > > for (i = 0, len = 0; i < cplen; i++) { > - len += grapheme_utf8_encode(cp[i], NULL, 0); > + len += grapheme_encode_utf8(cp[i], NULL, 0); > } > > return len; > @@ -80,7 +80,7 @@ cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen) > } > > for (i = 0, off = 0; i < cplen; i++, off += ret) { > - if ((ret = grapheme_utf8_encode(cp[i], str + off, > + if ((ret = grapheme_encode_utf8(cp[i], str + off, > len - off)) > (len - off)) { > /* buffer too small */ > break; > @@ -92,7 +92,7 @@ cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen) > } > .Ed > .Sh SEE ALSO > -.Xr grapheme_utf8_decode 3 , > +.Xr grapheme_decode_utf8 3 , > .Xr libgrapheme 7 > .Sh AUTHORS > .An Laslo Hunhold Aq Mt d...@frign.de > diff --git a/man/grapheme_character_isbreak.3 > b/man/grapheme_is_character_break.3 > similarity index 75% > rename from man/grapheme_character_isbreak.3 > rename to man/grapheme_is_character_break.3 > index 8d813ec..507842c 100644 > --- a/man/grapheme_character_isbreak.3 > +++ b/man/grapheme_is_character_break.3 > @@ -1,16 +1,16 @@ > .Dd 2021-12-18 > -.Dt GRAPHEME_CHARACTER_ISBREAK 3 > +.Dt GRAPHEME_IS_CHARACTER_BREAK 3 > .Os suckless.org > .Sh NAME > -.Nm grapheme_character_isbreak > +.Nm grapheme_is_character_break > .Nd test for a grapheme cluster break between two codepoints > .Sh SYNOPSIS > .In grapheme.h > .Ft size_t > -.Fn grapheme_character_isbreak "uint_least32_t cp1" "uint_least32_t cp2" > "GRAPHEME_STATE *state" > +.Fn grapheme_is_character_break "uint_least32_t cp1" "uint_least32_t cp2" > "GRAPHEME_STATE *state" > .Sh DESCRIPTION > The > -.Fn grapheme_character_isbreak > +.Fn grapheme_is_character_break > function determines if there is a grapheme cluster break (see > .Xr libgrapheme 7 ) > between the two codepoints > @@ -26,11 +26,11 @@ If > .Va state > is > .Dv NULL > -.Fn grapheme_character_isbreak > +.Fn grapheme_is_character_break > behaves as if it was called with a fully reset state. > .Sh RETURN VALUES > The > -.Fn grapheme_character_isbreak > +.Fn grapheme_is_character_break > function returns > .Va true > if there is a grapheme cluster break between the codepoints > @@ -56,13 +56,13 @@ main(void) > size_t i; > > for (i = 0; i + 1 < sizeof(s1) / sizeof(*s1); i++) { > - if (grapheme_character_isbreak(s[i], s[i + 1], &state)) { > + if (grapheme_is_character_break(s[i], s[i + 1], &state)) { > printf("break in s1 at offset %zu\n", i); > } > } > memset(&state, 0, sizeof(state)); /* reset state */ > for (i = 0; i + 1 < sizeof(s2) / sizeof(*s2); i++) { > - if (grapheme_character_isbreak(s[i], s[i + 1], &state)) { > + if (grapheme_is_character_break(s[i], s[i + 1], &state)) { > printf("break in s2 at offset %zu\n", i); > } > } > @@ -71,10 +71,10 @@ main(void) > } > .Ed > .Sh SEE ALSO > -.Xr grapheme_character_nextbreak 3 , > +.Xr grapheme_next_character_break 3 , > .Xr libgrapheme 7 > .Sh STANDARDS > -.Fn grapheme_character_isbreak > +.Fn grapheme_is_character_break > is compliant with the Unicode 14.0.0 specification. > .Sh AUTHORS > .An Laslo Hunhold Aq Mt d...@frign.de > diff --git a/man/grapheme_character_nextbreak.3 > b/man/grapheme_next_character_break.3 > similarity index 80% > rename from man/grapheme_character_nextbreak.3 > rename to man/grapheme_next_character_break.3 > index 2cc0365..1e96383 100644 > --- a/man/grapheme_character_nextbreak.3 > +++ b/man/grapheme_next_character_break.3 > @@ -1,16 +1,16 @@ > .Dd 2021-12-18 > -.Dt GRAPHEME_CHARACTER_NEXTBREAK 3 > +.Dt GRAPHEME_NEXT_CHARACTER_BREAK 3 > .Os suckless.org > .Sh NAME > -.Nm grapheme_character_nextbreak > +.Nm grapheme_next_character_break > .Nd determine byte-offset to next grapheme cluster break > .Sh SYNOPSIS > .In grapheme.h > .Ft size_t > -.Fn grapheme_character_nextbreak "const char *str" > +.Fn grapheme_next_character_break "const char *str" > .Sh DESCRIPTION > The > -.Fn grapheme_character_nextbreak > +.Fn grapheme_next_character_break > function computes the offset (in bytes) to the next grapheme > cluster break (see > .Xr libgrapheme 7 ) > @@ -21,11 +21,11 @@ If a grapheme cluster begins at > this offset is equal to the length of said grapheme cluster. > .Pp > For non-UTF-8 input data > -.Xr grapheme_character_isbreak 3 > +.Xr grapheme_is_character_break 3 > can be used instead. > .Sh RETURN VALUES > The > -.Fn grapheme_character_nextbreak > +.Fn grapheme_next_character_break > function returns the offset (in bytes) to the next grapheme cluster > break in > .Va str > @@ -54,7 +54,7 @@ main(void) > > /* print each grapheme cluster with byte-length */ > for (; *s != '\\0';) { > - len = grapheme_character_nextbreak(s); > + len = grapheme_next_character_break(s); > printf("%2zu bytes | %.*s\\n", len, (int)len, s, len); > s += len; > } > @@ -63,10 +63,10 @@ main(void) > } > .Ed > .Sh SEE ALSO > -.Xr grapheme_character_isbreak 3 , > +.Xr grapheme_is_character_break 3 , > .Xr libgrapheme 7 > .Sh STANDARDS > -.Fn grapheme_character_nextbreak > +.Fn grapheme_next_character_break > is compliant with the Unicode 14.0.0 specification. > .Sh AUTHORS > .An Laslo Hunhold Aq Mt d...@frign.de > diff --git a/man/libgrapheme.7 b/man/libgrapheme.7 > index dc3e83e..47412ea 100644 > --- a/man/libgrapheme.7 > +++ b/man/libgrapheme.7 > @@ -15,10 +15,10 @@ see > .Sx MOTIVATION ) > according to the Unicode specification. > .Sh SEE ALSO > -.Xr grapheme_character_isbreak 3 , > -.Xr grapheme_character_nextbreak 3 , > -.Xr grapheme_utf8_decode 3 , > -.Xr grapheme_utf8_encode 3 > +.Xr grapheme_is_character_break 3 , > +.Xr grapheme_next_character_break 3 , > +.Xr grapheme_decode_utf8 3 , > +.Xr grapheme_encode_utf8 3 > .Sh STANDARDS > .Nm > is compliant with the Unicode 14.0.0 specification. > diff --git a/src/character.c b/src/character.c > index be49a34..015b4e0 100644 > --- a/src/character.c > +++ b/src/character.c > @@ -14,7 +14,7 @@ enum { > }; > > bool > -grapheme_character_isbreak(uint_least32_t a, uint_least32_t b, > GRAPHEME_STATE *state) > +grapheme_is_character_break(uint_least32_t a, uint_least32_t b, > GRAPHEME_STATE *state) > { > struct grapheme_internal_heisenstate *p[2] = { 0 }; > uint_least16_t flags = 0; > @@ -179,7 +179,7 @@ hasbreak: > } > > size_t > -grapheme_character_nextbreak(const char *str) > +grapheme_next_character_break(const char *str) > { > uint_least32_t cp0, cp1; > size_t ret, len = 0; > @@ -190,7 +190,7 @@ grapheme_character_nextbreak(const char *str) > } > > /* > - * grapheme_utf8_decode, when it encounters an unexpected byte, > + * grapheme_decode_utf8, when it encounters an unexpected byte, > * does not count it to the error and instead assumes that the > * unexpected byte is the beginning of a new sequence. > * This way, when the string ends with a null byte, we never > @@ -202,17 +202,17 @@ grapheme_character_nextbreak(const char *str) > */ > > /* get first codepoint */ > - len += grapheme_utf8_decode(str, (size_t)-1, &cp0); > - if (cp0 == GRAPHEME_CODEPOINT_INVALID) { > + len += grapheme_decode_utf8(str, (size_t)-1, &cp0); > + if (cp0 == GRAPHEME_INVALID_CODEPOINT) { > return len; > } > > while (cp0 != 0) { > /* get next codepoint */ > - ret = grapheme_utf8_decode(str + len, (size_t)-1, &cp1); > + ret = grapheme_decode_utf8(str + len, (size_t)-1, &cp1); > > - if (cp1 == GRAPHEME_CODEPOINT_INVALID || > - grapheme_character_isbreak(cp0, cp1, &state)) { > + if (cp1 == GRAPHEME_INVALID_CODEPOINT || > + grapheme_is_character_break(cp0, cp1, &state)) { > /* we read an invalid cp or have a breakpoint */ > break; > } else { > diff --git a/src/utf8.c b/src/utf8.c > index 851f075..fe7775c 100644 > --- a/src/utf8.c > +++ b/src/utf8.c > @@ -48,13 +48,13 @@ static const struct { > }; > > size_t > -grapheme_utf8_decode(const char *s, size_t n, uint_least32_t *cp) > +grapheme_decode_utf8(const char *s, size_t n, uint_least32_t *cp) > { > size_t off, i; > > if (s == NULL || n == 0) { > /* a sequence must be at least 1 byte long */ > - *cp = GRAPHEME_CODEPOINT_INVALID; > + *cp = GRAPHEME_INVALID_CODEPOINT; > return 0; > } > > @@ -79,14 +79,14 @@ grapheme_utf8_decode(const char *s, size_t n, > uint_least32_t *cp) > * this also includes the cases where bits higher than > * the 8th are set on systems with CHAR_BIT > 8 > */ > - *cp = GRAPHEME_CODEPOINT_INVALID; > + *cp = GRAPHEME_INVALID_CODEPOINT; > return 1; > } > if (1 + off > n) { > /* > * input is not long enough, set cp as invalid > */ > - *cp = GRAPHEME_CODEPOINT_INVALID; > + *cp = GRAPHEME_INVALID_CODEPOINT; > > /* > * count the following continuation bytes, but nothing > @@ -125,7 +125,7 @@ grapheme_utf8_decode(const char *s, size_t n, > uint_least32_t *cp) > * higher than the 8th are set on systems > * with CHAR_BIT > 8 > */ > - *cp = GRAPHEME_CODEPOINT_INVALID; > + *cp = GRAPHEME_INVALID_CODEPOINT; > return 1 + (i - 1); > } > /* > @@ -144,14 +144,14 @@ grapheme_utf8_decode(const char *s, size_t n, > uint_least32_t *cp) > * not representable in UTF-16 (>0x10FFFF) (RFC-3629 > * specifies the latter two conditions) > */ > - *cp = GRAPHEME_CODEPOINT_INVALID; > + *cp = GRAPHEME_INVALID_CODEPOINT; > } > > return 1 + off; > } > > size_t > -grapheme_utf8_encode(uint_least32_t cp, char *s, size_t n) > +grapheme_encode_utf8(uint_least32_t cp, char *s, size_t n) > { > size_t off, i; > > @@ -162,7 +162,7 @@ grapheme_utf8_encode(uint_least32_t cp, char *s, size_t n) > * (0xD800..0xDFFF) or not representable in UTF-16 > * (>0x10FFFF), which RFC-3629 deems invalid for UTF-8. > */ > - cp = GRAPHEME_CODEPOINT_INVALID; > + cp = GRAPHEME_INVALID_CODEPOINT; > } > > /* determine necessary sequence type */ > diff --git a/test/character-performance.c b/test/character-performance.c > index a7fdf8f..1005ab4 100644 > --- a/test/character-performance.c > +++ b/test/character-performance.c > @@ -45,7 +45,7 @@ main(int argc, char *argv[]) > for (i = 0; i < NUM_ITERATIONS; i++) { > memset(&state, 0, sizeof(state)); > for (j = 0; j < bufsiz - 1; j++) { > - (void)grapheme_character_isbreak(buf[j], buf[j+1], > &state); > + (void)grapheme_is_character_break(buf[j], buf[j+1], > &state); > } > if (i % (NUM_ITERATIONS / 10) == 0) { > printf("."); > diff --git a/test/character.c b/test/character.c > index 02dbde9..d156980 100644 > --- a/test/character.c > +++ b/test/character.c > @@ -21,7 +21,7 @@ main(int argc, char *argv[]) > memset(&state, 0, sizeof(state)); > for (j = 0, k = 0, len = 1; j < character_test[i].cplen; j++) { > if ((j + 1) == character_test[i].cplen || > - grapheme_character_isbreak(character_test[i].cp[j], > + grapheme_is_character_break(character_test[i].cp[j], > character_test[i].cp[j + > 1], > &state)) { > /* check if our resulting length matches */ > diff --git a/test/utf8-decode.c b/test/utf8-decode.c > index 7d5e389..ad1495f 100644 > --- a/test/utf8-decode.c > +++ b/test/utf8-decode.c > @@ -21,7 +21,7 @@ static const struct { > .arr = NULL, > .len = 0, > .exp_len = 0, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid lead byte > @@ -31,7 +31,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xFD }, > .len = 1, > .exp_len = 1, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* valid 1-byte sequence > @@ -61,7 +61,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xC3 }, > .len = 1, > .exp_len = 2, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 2-byte sequence (second byte malformed) > @@ -71,7 +71,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xC3, 0xFF }, > .len = 2, > .exp_len = 1, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 2-byte sequence (overlong encoded) > @@ -81,7 +81,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xC1, 0xBF }, > .len = 2, > .exp_len = 2, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* valid 3-byte sequence > @@ -101,7 +101,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xE0 }, > .len = 1, > .exp_len = 3, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 3-byte sequence (second byte malformed) > @@ -111,7 +111,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xE0, 0x7F, 0xBF }, > .len = 3, > .exp_len = 1, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 3-byte sequence (short string, second byte malformed) > @@ -121,7 +121,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xE0, 0x7F }, > .len = 2, > .exp_len = 1, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 3-byte sequence (third byte missing) > @@ -131,7 +131,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xE0, 0xBF }, > .len = 2, > .exp_len = 3, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 3-byte sequence (third byte malformed) > @@ -141,7 +141,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0x7F }, > .len = 3, > .exp_len = 2, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 3-byte sequence (overlong encoded) > @@ -151,7 +151,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xE0, 0x9F, 0xBF }, > .len = 3, > .exp_len = 3, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 3-byte sequence (UTF-16 surrogate half) > @@ -161,7 +161,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xED, 0xA0, 0x80 }, > .len = 3, > .exp_len = 3, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* valid 4-byte sequence > @@ -181,7 +181,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xF3 }, > .len = 1, > .exp_len = 4, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 4-byte sequence (second byte malformed) > @@ -191,7 +191,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF, 0xBF }, > .len = 4, > .exp_len = 1, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 4-byte sequence (short string 1, second byte > malformed) > @@ -201,7 +201,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xF3, 0x7F }, > .len = 2, > .exp_len = 1, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 4-byte sequence (short string 2, second byte > malformed) > @@ -211,7 +211,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF }, > .len = 3, > .exp_len = 1, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > > { > @@ -222,7 +222,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xF3, 0xBF }, > .len = 2, > .exp_len = 4, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 4-byte sequence (third byte malformed) > @@ -232,7 +232,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F, 0xBF }, > .len = 4, > .exp_len = 2, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 4-byte sequence (short string, third byte malformed) > @@ -242,7 +242,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F }, > .len = 3, > .exp_len = 2, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 4-byte sequence (fourth byte missing) > @@ -252,7 +252,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF }, > .len = 3, > .exp_len = 4, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 4-byte sequence (fourth byte malformed) > @@ -262,7 +262,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0x7F }, > .len = 4, > .exp_len = 3, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 4-byte sequence (overlong encoded) > @@ -272,7 +272,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xF0, 0x80, 0x81, 0xBF }, > .len = 4, > .exp_len = 4, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > { > /* invalid 4-byte sequence (UTF-16-unrepresentable) > @@ -282,7 +282,7 @@ static const struct { > .arr = (char *)(unsigned char[]){ 0xF4, 0x90, 0x80, 0x80 }, > .len = 4, > .exp_len = 4, > - .exp_cp = GRAPHEME_CODEPOINT_INVALID, > + .exp_cp = GRAPHEME_INVALID_CODEPOINT, > }, > }; > > @@ -298,7 +298,7 @@ main(int argc, char *argv[]) > size_t len; > uint_least32_t cp; > > - len = grapheme_utf8_decode(dec_test[i].arr, > + len = grapheme_decode_utf8(dec_test[i].arr, > dec_test[i].len, &cp); > > if (len != dec_test[i].exp_len || > diff --git a/test/utf8-encode.c b/test/utf8-encode.c > index 6dd5637..ded2af0 100644 > --- a/test/utf8-encode.c > +++ b/test/utf8-encode.c > @@ -62,7 +62,7 @@ main(int argc, char *argv[]) > char arr[4]; > size_t len; > > - len = grapheme_utf8_encode(enc_test[i].cp, arr, LEN(arr)); > + len = grapheme_encode_utf8(enc_test[i].cp, arr, LEN(arr)); > > if (len != enc_test[i].exp_len || > memcmp(arr, enc_test[i].exp_arr, len)) { >