Re: [hackers] [libgrapheme] Rename API functions to improve readability || Laslo Hunhold

Mattias Andrée Sat, 18 Dec 2021 11:45:33 -0800

I would prefer the “libgrapheme_” prefix, so that it
is obvious that the functions belong to the libgrapheme
library.



Regards,
Mattias Andrée


On Sat, 18 Dec 2021 19:54:57 +0100
<g...@suckless.org> wrote:

> commit 4483b44e8444d4a57bcbb31dbe9eac3e6b80c1ad
> Author:     Laslo Hunhold <d...@frign.de>
> AuthorDate: Sat Dec 18 19:49:34 2021 +0100
> Commit:     Laslo Hunhold <d...@frign.de>
> CommitDate: Sat Dec 18 19:49:34 2021 +0100
> 
>     Rename API functions to improve readability
>     
>     I thought about how to address the fact that "isbreak" and "nextbreak"
>     kind of breaks the snake case, but "grapheme_character_is_break" sounds
>     convoluted.
>     
>     The solution is to loosen the naming a bit and not require the
>     "component" (in this case "character") to immediately follow the
>     "grapheme_" prefix. Instead, the "is" and "next" keywords are brought
>     to the front, which improves the readability substantially and the
>     functions are well-grouped into "is" and "next" functions.
>     
>     Analogously, it makes more sense to "decode_utf8" than "utf8_decode",
>     so this was changed as well, including going back to
>     GRAPHEME_INVALID_CODEPOINT, which just rolls off the tongue better.
>     
>     Signed-off-by: Laslo Hunhold <d...@frign.de>
> 
> diff --git a/Makefile b/Makefile
> index 8f6d694..cdda874 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -25,10 +25,10 @@ TEST =\
>       test/utf8-encode\
>  
>  MAN3 =\
> -     man/lg_grapheme_isbreak.3\
> -     man/lg_grapheme_nextbreak.3\
> -     man/lg_utf8_decode.3\
> -     man/lg_utf8_encode.3\
> +     man/grapheme_decode_utf8.3\
> +     man/grapheme_encode_utf8.3\
> +     man/grapheme_is_character_break.3\
> +     man/grapheme_next_character_break.3\
>  
>  MAN7 = man/libgrapheme.7
>  
> diff --git a/grapheme.h b/grapheme.h
> index b9c381c..ea8a02d 100644
> --- a/grapheme.h
> +++ b/grapheme.h
> @@ -17,13 +17,13 @@ typedef struct grapheme_internal_segmentation_state {
>       uint_least16_t flags;
>  } GRAPHEME_STATE;
>  
> -#define GRAPHEME_CODEPOINT_INVALID UINT32_C(0xFFFD)
> +#define GRAPHEME_INVALID_CODEPOINT UINT32_C(0xFFFD)
>  
> -size_t grapheme_character_nextbreak(const char *);
> +size_t grapheme_next_character_break(const char *);
>  
> -bool grapheme_character_isbreak(uint_least32_t, uint_least32_t, 
> GRAPHEME_STATE *);
> +bool grapheme_is_character_break(uint_least32_t, uint_least32_t, 
> GRAPHEME_STATE *);
>  
> -size_t grapheme_utf8_decode(const char *, size_t, uint_least32_t *);
> -size_t grapheme_utf8_encode(uint_least32_t, char *, size_t);
> +size_t grapheme_decode_utf8(const char *, size_t, uint_least32_t *);
> +size_t grapheme_encode_utf8(uint_least32_t, char *, size_t);
>  
>  #endif /* GRAPHEME_H */
> diff --git a/man/grapheme_utf8_decode.3 b/man/grapheme_decode_utf8.3
> similarity index 84%
> rename from man/grapheme_utf8_decode.3
> rename to man/grapheme_decode_utf8.3
> index 6a1f5c2..26e3afb 100644
> --- a/man/grapheme_utf8_decode.3
> +++ b/man/grapheme_decode_utf8.3
> @@ -1,16 +1,16 @@
>  .Dd 2021-12-17
> -.Dt GRAPHEME_UTF8_DECODE 3
> +.Dt GRAPHEME_DECODE_UTF8 3
>  .Os suckless.org
>  .Sh NAME
> -.Nm grapheme_utf8_decode
> +.Nm grapheme_decode_utf8
>  .Nd decode first codepoint in UTF-8-encoded string
>  .Sh SYNOPSIS
>  .In grapheme.h
>  .Ft size_t
> -.Fn grapheme_utf8_decode "const char *str" "size_t len" "uint_least32_t *cp"
> +.Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp"
>  .Sh DESCRIPTION
>  The
> -.Fn grapheme_utf8_decode
> +.Fn grapheme_decode_utf8
>  function decodes the next codepoint in the UTF-8-encoded string
>  .Va str
>  of length
> @@ -18,7 +18,7 @@ of length
>  If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
>  string ends unexpectedly, empty string, etc.) the decoding is stopped
>  at the last processed byte and the decoded codepoint set to
> -.Dv GRAPHEME_CODEPOINT_INVALID.
> +.Dv GRAPHEME_INVALID_CODEPOINT.
>  .Pp
>  If
>  .Va cp
> @@ -39,7 +39,7 @@ is 0 (see
>  for an example).
>  .Sh RETURN VALUES
>  The
> -.Fn grapheme_utf8_decode
> +.Fn grapheme_decode_utf8
>  function returns the number of processed bytes and 0 if
>  .Va str
>  is
> @@ -65,7 +65,7 @@ print_cps(const char *str, size_t len)
>       uint_least32_t cp;
>  
>       for (off = 0; off < len; off += ret) {
> -             if ((ret = grapheme_utf8_decode(str + off,
> +             if ((ret = grapheme_decode_utf8(str + off,
>                                               len - off, &cp)) > (len - off)) 
> {
>                       /*
>                        * string ended unexpectedly in the middle of a
> @@ -86,7 +86,7 @@ print_cps_nul_terminated(const char *str)
>       size_t ret, off;
>       uint_least32_t cp;
>  
> -     for (off = 0; (ret = grapheme_utf8_decode(str + off,
> +     for (off = 0; (ret = grapheme_decode_utf8(str + off,
>                                                 (size_t)-1, &cp)) > 0 &&
>            cp != 0; off += ret) {
>               printf("%"PRIxLEAST32"\\n", cp);
> @@ -94,8 +94,8 @@ print_cps_nul_terminated(const char *str)
>  }
>  .Ed
>  .Sh SEE ALSO
> -.Xr grapheme_utf8_encode 3 ,
> -.Xr grapheme_character_isbreak 3 ,
> +.Xr grapheme_encode_utf8 3 ,
> +.Xr grapheme_is_character_break 3 ,
>  .Xr libgrapheme 7
>  .Sh AUTHORS
>  .An Laslo Hunhold Aq Mt d...@frign.de
> diff --git a/man/grapheme_utf8_encode.3 b/man/grapheme_encode_utf8.3
> similarity index 82%
> rename from man/grapheme_utf8_encode.3
> rename to man/grapheme_encode_utf8.3
> index c56f2ca..42dbbe5 100644
> --- a/man/grapheme_utf8_encode.3
> +++ b/man/grapheme_encode_utf8.3
> @@ -1,16 +1,16 @@
>  .Dd 2021-12-17
> -.Dt GRAPHEME_UTF8_ENCODE 3
> +.Dt GRAPHEME_ENCODE_UTF8 3
>  .Os suckless.org
>  .Sh NAME
> -.Nm grapheme_utf8_encode
> +.Nm grapheme_encode_utf8
>  .Nd encode codepoint into UTF-8 string
>  .Sh SYNOPSIS
>  .In grapheme.h
>  .Ft size_t
> -.Fn grapheme_utf8_encode "uint_least32_t cp" "char *" "size_t"
> +.Fn grapheme_encode_utf8 "uint_least32_t cp" "char *" "size_t"
>  .Sh DESCRIPTION
>  The
> -.Fn grapheme_utf8_encode
> +.Fn grapheme_encode_utf8
>  function encodes the codepoint
>  .Va cp
>  into a UTF-8-string.
> @@ -24,7 +24,7 @@ is large enough it writes the UTF-8-string to the memory 
> pointed to by
>  .Va str .
>  .Sh RETURN VALUES
>  The
> -.Fn grapheme_utf8_encode
> +.Fn grapheme_encode_utf8
>  function returns the length (in bytes) of the UTF-8-string resulting
>  from encoding
>  .Va cp .
> @@ -45,7 +45,7 @@ cps_to_utf8(const uint_least32_t *cp, size_t cplen, char 
> *str, size_t len)
>       size_t i, off, ret;
>  
>       for (i = 0, off = 0; i < cplen; i++, off += ret) {
> -             if ((ret = grapheme_utf8_encode(cp[i], str + off,
> +             if ((ret = grapheme_encode_utf8(cp[i], str + off,
>                                               len - off)) > (len - off)) {
>                       /* buffer too small */
>                       break;
> @@ -61,7 +61,7 @@ cps_bytelen(const uint_least32_t *cp, size_t cplen)
>       size_t i, len;
>  
>       for (i = 0, len = 0; i < cplen; i++) {
> -             len += grapheme_utf8_encode(cp[i], NULL, 0);
> +             len += grapheme_encode_utf8(cp[i], NULL, 0);
>       }
>  
>       return len;
> @@ -80,7 +80,7 @@ cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen)
>       }
>  
>       for (i = 0, off = 0; i < cplen; i++, off += ret) {
> -             if ((ret = grapheme_utf8_encode(cp[i], str + off,
> +             if ((ret = grapheme_encode_utf8(cp[i], str + off,
>                                               len - off)) > (len - off)) {
>                       /* buffer too small */
>                       break;
> @@ -92,7 +92,7 @@ cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen)
>  }
>  .Ed
>  .Sh SEE ALSO
> -.Xr grapheme_utf8_decode 3 ,
> +.Xr grapheme_decode_utf8 3 ,
>  .Xr libgrapheme 7
>  .Sh AUTHORS
>  .An Laslo Hunhold Aq Mt d...@frign.de
> diff --git a/man/grapheme_character_isbreak.3 
> b/man/grapheme_is_character_break.3
> similarity index 75%
> rename from man/grapheme_character_isbreak.3
> rename to man/grapheme_is_character_break.3
> index 8d813ec..507842c 100644
> --- a/man/grapheme_character_isbreak.3
> +++ b/man/grapheme_is_character_break.3
> @@ -1,16 +1,16 @@
>  .Dd 2021-12-18
> -.Dt GRAPHEME_CHARACTER_ISBREAK 3
> +.Dt GRAPHEME_IS_CHARACTER_BREAK 3
>  .Os suckless.org
>  .Sh NAME
> -.Nm grapheme_character_isbreak
> +.Nm grapheme_is_character_break
>  .Nd test for a grapheme cluster break between two codepoints
>  .Sh SYNOPSIS
>  .In grapheme.h
>  .Ft size_t
> -.Fn grapheme_character_isbreak "uint_least32_t cp1" "uint_least32_t cp2" 
> "GRAPHEME_STATE *state"
> +.Fn grapheme_is_character_break "uint_least32_t cp1" "uint_least32_t cp2" 
> "GRAPHEME_STATE *state"
>  .Sh DESCRIPTION
>  The
> -.Fn grapheme_character_isbreak
> +.Fn grapheme_is_character_break
>  function determines if there is a grapheme cluster break (see
>  .Xr libgrapheme 7 )
>  between the two codepoints
> @@ -26,11 +26,11 @@ If
>  .Va state
>  is
>  .Dv NULL
> -.Fn grapheme_character_isbreak
> +.Fn grapheme_is_character_break
>  behaves as if it was called with a fully reset state.
>  .Sh RETURN VALUES
>  The
> -.Fn grapheme_character_isbreak
> +.Fn grapheme_is_character_break
>  function returns
>  .Va true
>  if there is a grapheme cluster break between the codepoints
> @@ -56,13 +56,13 @@ main(void)
>       size_t i;
>  
>       for (i = 0; i + 1 < sizeof(s1) / sizeof(*s1); i++) {
> -             if (grapheme_character_isbreak(s[i], s[i + 1], &state)) {
> +             if (grapheme_is_character_break(s[i], s[i + 1], &state)) {
>                       printf("break in s1 at offset %zu\n", i);
>               }
>       }
>       memset(&state, 0, sizeof(state)); /* reset state */
>       for (i = 0; i + 1 < sizeof(s2) / sizeof(*s2); i++) {
> -             if (grapheme_character_isbreak(s[i], s[i + 1], &state)) {
> +             if (grapheme_is_character_break(s[i], s[i + 1], &state)) {
>                       printf("break in s2 at offset %zu\n", i);
>               }
>       }
> @@ -71,10 +71,10 @@ main(void)
>  }
>  .Ed
>  .Sh SEE ALSO
> -.Xr grapheme_character_nextbreak 3 ,
> +.Xr grapheme_next_character_break 3 ,
>  .Xr libgrapheme 7
>  .Sh STANDARDS
> -.Fn grapheme_character_isbreak
> +.Fn grapheme_is_character_break
>  is compliant with the Unicode 14.0.0 specification.
>  .Sh AUTHORS
>  .An Laslo Hunhold Aq Mt d...@frign.de
> diff --git a/man/grapheme_character_nextbreak.3 
> b/man/grapheme_next_character_break.3
> similarity index 80%
> rename from man/grapheme_character_nextbreak.3
> rename to man/grapheme_next_character_break.3
> index 2cc0365..1e96383 100644
> --- a/man/grapheme_character_nextbreak.3
> +++ b/man/grapheme_next_character_break.3
> @@ -1,16 +1,16 @@
>  .Dd 2021-12-18
> -.Dt GRAPHEME_CHARACTER_NEXTBREAK 3
> +.Dt GRAPHEME_NEXT_CHARACTER_BREAK 3
>  .Os suckless.org
>  .Sh NAME
> -.Nm grapheme_character_nextbreak
> +.Nm grapheme_next_character_break
>  .Nd determine byte-offset to next grapheme cluster break
>  .Sh SYNOPSIS
>  .In grapheme.h
>  .Ft size_t
> -.Fn grapheme_character_nextbreak "const char *str"
> +.Fn grapheme_next_character_break "const char *str"
>  .Sh DESCRIPTION
>  The
> -.Fn grapheme_character_nextbreak
> +.Fn grapheme_next_character_break
>  function computes the offset (in bytes) to the next grapheme
>  cluster break (see
>  .Xr libgrapheme 7 )
> @@ -21,11 +21,11 @@ If a grapheme cluster begins at
>  this offset is equal to the length of said grapheme cluster.
>  .Pp
>  For non-UTF-8 input data
> -.Xr grapheme_character_isbreak 3
> +.Xr grapheme_is_character_break 3
>  can be used instead.
>  .Sh RETURN VALUES
>  The
> -.Fn grapheme_character_nextbreak
> +.Fn grapheme_next_character_break
>  function returns the offset (in bytes) to the next grapheme cluster
>  break in
>  .Va str
> @@ -54,7 +54,7 @@ main(void)
>  
>       /* print each grapheme cluster with byte-length */
>       for (; *s != '\\0';) {
> -             len = grapheme_character_nextbreak(s);
> +             len = grapheme_next_character_break(s);
>               printf("%2zu bytes | %.*s\\n", len, (int)len, s, len);
>               s += len;
>       }
> @@ -63,10 +63,10 @@ main(void)
>  }
>  .Ed
>  .Sh SEE ALSO
> -.Xr grapheme_character_isbreak 3 ,
> +.Xr grapheme_is_character_break 3 ,
>  .Xr libgrapheme 7
>  .Sh STANDARDS
> -.Fn grapheme_character_nextbreak
> +.Fn grapheme_next_character_break
>  is compliant with the Unicode 14.0.0 specification.
>  .Sh AUTHORS
>  .An Laslo Hunhold Aq Mt d...@frign.de
> diff --git a/man/libgrapheme.7 b/man/libgrapheme.7
> index dc3e83e..47412ea 100644
> --- a/man/libgrapheme.7
> +++ b/man/libgrapheme.7
> @@ -15,10 +15,10 @@ see
>  .Sx MOTIVATION )
>  according to the Unicode specification.
>  .Sh SEE ALSO
> -.Xr grapheme_character_isbreak 3 ,
> -.Xr grapheme_character_nextbreak 3 ,
> -.Xr grapheme_utf8_decode 3 ,
> -.Xr grapheme_utf8_encode 3
> +.Xr grapheme_is_character_break 3 ,
> +.Xr grapheme_next_character_break 3 ,
> +.Xr grapheme_decode_utf8 3 ,
> +.Xr grapheme_encode_utf8 3
>  .Sh STANDARDS
>  .Nm
>  is compliant with the Unicode 14.0.0 specification.
> diff --git a/src/character.c b/src/character.c
> index be49a34..015b4e0 100644
> --- a/src/character.c
> +++ b/src/character.c
> @@ -14,7 +14,7 @@ enum {
>  };
>  
>  bool
> -grapheme_character_isbreak(uint_least32_t a, uint_least32_t b, 
> GRAPHEME_STATE *state)
> +grapheme_is_character_break(uint_least32_t a, uint_least32_t b, 
> GRAPHEME_STATE *state)
>  {
>       struct grapheme_internal_heisenstate *p[2] = { 0 };
>       uint_least16_t flags = 0;
> @@ -179,7 +179,7 @@ hasbreak:
>  }
>  
>  size_t
> -grapheme_character_nextbreak(const char *str)
> +grapheme_next_character_break(const char *str)
>  {
>       uint_least32_t cp0, cp1;
>       size_t ret, len = 0;
> @@ -190,7 +190,7 @@ grapheme_character_nextbreak(const char *str)
>       }
>  
>       /*
> -      * grapheme_utf8_decode, when it encounters an unexpected byte,
> +      * grapheme_decode_utf8, when it encounters an unexpected byte,
>        * does not count it to the error and instead assumes that the
>        * unexpected byte is the beginning of a new sequence.
>        * This way, when the string ends with a null byte, we never
> @@ -202,17 +202,17 @@ grapheme_character_nextbreak(const char *str)
>        */
>  
>       /* get first codepoint */
> -     len += grapheme_utf8_decode(str, (size_t)-1, &cp0);
> -     if (cp0 == GRAPHEME_CODEPOINT_INVALID) {
> +     len += grapheme_decode_utf8(str, (size_t)-1, &cp0);
> +     if (cp0 == GRAPHEME_INVALID_CODEPOINT) {
>               return len;
>       }
>  
>       while (cp0 != 0) {
>               /* get next codepoint */
> -             ret = grapheme_utf8_decode(str + len, (size_t)-1, &cp1);
> +             ret = grapheme_decode_utf8(str + len, (size_t)-1, &cp1);
>  
> -             if (cp1 == GRAPHEME_CODEPOINT_INVALID ||
> -                 grapheme_character_isbreak(cp0, cp1, &state)) {
> +             if (cp1 == GRAPHEME_INVALID_CODEPOINT ||
> +                 grapheme_is_character_break(cp0, cp1, &state)) {
>                       /* we read an invalid cp or have a breakpoint */
>                       break;
>               } else {
> diff --git a/src/utf8.c b/src/utf8.c
> index 851f075..fe7775c 100644
> --- a/src/utf8.c
> +++ b/src/utf8.c
> @@ -48,13 +48,13 @@ static const struct {
>  };
>  
>  size_t
> -grapheme_utf8_decode(const char *s, size_t n, uint_least32_t *cp)
> +grapheme_decode_utf8(const char *s, size_t n, uint_least32_t *cp)
>  {
>       size_t off, i;
>  
>       if (s == NULL || n == 0) {
>               /* a sequence must be at least 1 byte long */
> -             *cp = GRAPHEME_CODEPOINT_INVALID;
> +             *cp = GRAPHEME_INVALID_CODEPOINT;
>               return 0;
>       }
>  
> @@ -79,14 +79,14 @@ grapheme_utf8_decode(const char *s, size_t n, 
> uint_least32_t *cp)
>                * this also includes the cases where bits higher than
>                * the 8th are set on systems with CHAR_BIT > 8
>                */
> -             *cp = GRAPHEME_CODEPOINT_INVALID;
> +             *cp = GRAPHEME_INVALID_CODEPOINT;
>               return 1;
>       }
>       if (1 + off > n) {
>               /*
>                * input is not long enough, set cp as invalid
>                */
> -             *cp = GRAPHEME_CODEPOINT_INVALID;
> +             *cp = GRAPHEME_INVALID_CODEPOINT;
>  
>               /*
>                * count the following continuation bytes, but nothing
> @@ -125,7 +125,7 @@ grapheme_utf8_decode(const char *s, size_t n, 
> uint_least32_t *cp)
>                        * higher than the 8th are set on systems
>                        * with CHAR_BIT > 8
>                        */
> -                     *cp = GRAPHEME_CODEPOINT_INVALID;
> +                     *cp = GRAPHEME_INVALID_CODEPOINT;
>                       return 1 + (i - 1);
>               }
>               /*
> @@ -144,14 +144,14 @@ grapheme_utf8_decode(const char *s, size_t n, 
> uint_least32_t *cp)
>                * not representable in UTF-16 (>0x10FFFF) (RFC-3629
>                * specifies the latter two conditions)
>                */
> -             *cp = GRAPHEME_CODEPOINT_INVALID;
> +             *cp = GRAPHEME_INVALID_CODEPOINT;
>       }
>  
>       return 1 + off;
>  }
>  
>  size_t
> -grapheme_utf8_encode(uint_least32_t cp, char *s, size_t n)
> +grapheme_encode_utf8(uint_least32_t cp, char *s, size_t n)
>  {
>       size_t off, i;
>  
> @@ -162,7 +162,7 @@ grapheme_utf8_encode(uint_least32_t cp, char *s, size_t n)
>                * (0xD800..0xDFFF) or not representable in UTF-16
>                * (>0x10FFFF), which RFC-3629 deems invalid for UTF-8.
>                */
> -             cp = GRAPHEME_CODEPOINT_INVALID;
> +             cp = GRAPHEME_INVALID_CODEPOINT;
>       }
>  
>       /* determine necessary sequence type */
> diff --git a/test/character-performance.c b/test/character-performance.c
> index a7fdf8f..1005ab4 100644
> --- a/test/character-performance.c
> +++ b/test/character-performance.c
> @@ -45,7 +45,7 @@ main(int argc, char *argv[])
>       for (i = 0; i < NUM_ITERATIONS; i++) {
>               memset(&state, 0, sizeof(state));
>               for (j = 0; j < bufsiz - 1; j++) {
> -                     (void)grapheme_character_isbreak(buf[j], buf[j+1], 
> &state);
> +                     (void)grapheme_is_character_break(buf[j], buf[j+1], 
> &state);
>               }
>               if (i % (NUM_ITERATIONS / 10) == 0) {
>                       printf(".");
> diff --git a/test/character.c b/test/character.c
> index 02dbde9..d156980 100644
> --- a/test/character.c
> +++ b/test/character.c
> @@ -21,7 +21,7 @@ main(int argc, char *argv[])
>               memset(&state, 0, sizeof(state));
>               for (j = 0, k = 0, len = 1; j < character_test[i].cplen; j++) {
>                       if ((j + 1) == character_test[i].cplen ||
> -                         grapheme_character_isbreak(character_test[i].cp[j],
> +                         grapheme_is_character_break(character_test[i].cp[j],
>                                                      character_test[i].cp[j + 
> 1],
>                                                      &state)) {
>                               /* check if our resulting length matches */
> diff --git a/test/utf8-decode.c b/test/utf8-decode.c
> index 7d5e389..ad1495f 100644
> --- a/test/utf8-decode.c
> +++ b/test/utf8-decode.c
> @@ -21,7 +21,7 @@ static const struct {
>               .arr     = NULL,
>               .len     = 0,
>               .exp_len = 0,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid lead byte
> @@ -31,7 +31,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xFD },
>               .len     = 1,
>               .exp_len = 1,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* valid 1-byte sequence
> @@ -61,7 +61,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xC3 },
>               .len     = 1,
>               .exp_len = 2,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 2-byte sequence (second byte malformed)
> @@ -71,7 +71,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xC3, 0xFF },
>               .len     = 2,
>               .exp_len = 1,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 2-byte sequence (overlong encoded)
> @@ -81,7 +81,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xC1, 0xBF },
>               .len     = 2,
>               .exp_len = 2,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* valid 3-byte sequence
> @@ -101,7 +101,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xE0 },
>               .len     = 1,
>               .exp_len = 3,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 3-byte sequence (second byte malformed)
> @@ -111,7 +111,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xE0, 0x7F, 0xBF },
>               .len     = 3,
>               .exp_len = 1,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 3-byte sequence (short string, second byte malformed)
> @@ -121,7 +121,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xE0, 0x7F },
>               .len     = 2,
>               .exp_len = 1,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 3-byte sequence (third byte missing)
> @@ -131,7 +131,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xE0, 0xBF },
>               .len     = 2,
>               .exp_len = 3,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 3-byte sequence (third byte malformed)
> @@ -141,7 +141,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xE0, 0xBF, 0x7F },
>               .len     = 3,
>               .exp_len = 2,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 3-byte sequence (overlong encoded)
> @@ -151,7 +151,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xE0, 0x9F, 0xBF },
>               .len     = 3,
>               .exp_len = 3,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 3-byte sequence (UTF-16 surrogate half)
> @@ -161,7 +161,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xED, 0xA0, 0x80 },
>               .len     = 3,
>               .exp_len = 3,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* valid 4-byte sequence
> @@ -181,7 +181,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xF3 },
>               .len     = 1,
>               .exp_len = 4,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 4-byte sequence (second byte malformed)
> @@ -191,7 +191,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF, 0xBF },
>               .len     = 4,
>               .exp_len = 1,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 4-byte sequence (short string 1, second byte 
> malformed)
> @@ -201,7 +201,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xF3, 0x7F },
>               .len     = 2,
>               .exp_len = 1,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 4-byte sequence (short string 2, second byte 
> malformed)
> @@ -211,7 +211,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF },
>               .len     = 3,
>               .exp_len = 1,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>  
>       {
> @@ -222,7 +222,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xF3, 0xBF },
>               .len     = 2,
>               .exp_len = 4,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 4-byte sequence (third byte malformed)
> @@ -232,7 +232,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F, 0xBF },
>               .len     = 4,
>               .exp_len = 2,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 4-byte sequence (short string, third byte malformed)
> @@ -242,7 +242,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F },
>               .len     = 3,
>               .exp_len = 2,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 4-byte sequence (fourth byte missing)
> @@ -252,7 +252,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF },
>               .len     = 3,
>               .exp_len = 4,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 4-byte sequence (fourth byte malformed)
> @@ -262,7 +262,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0x7F },
>               .len     = 4,
>               .exp_len = 3,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 4-byte sequence (overlong encoded)
> @@ -272,7 +272,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xF0, 0x80, 0x81, 0xBF },
>               .len     = 4,
>               .exp_len = 4,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>       {
>               /* invalid 4-byte sequence (UTF-16-unrepresentable)
> @@ -282,7 +282,7 @@ static const struct {
>               .arr     = (char *)(unsigned char[]){ 0xF4, 0x90, 0x80, 0x80 },
>               .len     = 4,
>               .exp_len = 4,
> -             .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
> +             .exp_cp  = GRAPHEME_INVALID_CODEPOINT,
>       },
>  };
>  
> @@ -298,7 +298,7 @@ main(int argc, char *argv[])
>               size_t len;
>               uint_least32_t cp;
>  
> -             len = grapheme_utf8_decode(dec_test[i].arr,
> +             len = grapheme_decode_utf8(dec_test[i].arr,
>                                          dec_test[i].len, &cp);
>  
>               if (len != dec_test[i].exp_len ||
> diff --git a/test/utf8-encode.c b/test/utf8-encode.c
> index 6dd5637..ded2af0 100644
> --- a/test/utf8-encode.c
> +++ b/test/utf8-encode.c
> @@ -62,7 +62,7 @@ main(int argc, char *argv[])
>               char arr[4];
>               size_t len;
>  
> -             len = grapheme_utf8_encode(enc_test[i].cp, arr, LEN(arr));
> +             len = grapheme_encode_utf8(enc_test[i].cp, arr, LEN(arr));
>  
>               if (len != enc_test[i].exp_len ||
>                   memcmp(arr, enc_test[i].exp_arr, len)) {
>

Re: [hackers] [libgrapheme] Rename API functions to improve readability || Laslo Hunhold

Reply via email to