Author: rinrab
Date: Wed May 20 08:15:53 2026
New Revision: 1934423
Log:
Implement routines to do alignment of UTF-8 string to the left or to the right
side. We'll use them in the cmdline code to properly format tables where we
want them to handle Unicode strings properly.
* subversion/include/private/svn_utf_private.h
(svn_utf__cstring_utf8_align_right,
svn_utf__cstring_utf8_align_left): Declare symbols.
* subversionlibsvn_subr/utf8proc.c
(utf8_skipn,
svn_utf__cstring_utf8_align_right,
svn_utf__cstring_utf8_align_left): New functions.
* subversiontests/libsvn_subr/utf-test.c
(test_utf8_align): Add test to check new API.
(test_funcs): Run the test.
Modified:
subversion/trunk/subversion/include/private/svn_utf_private.h
subversion/trunk/subversion/libsvn_subr/utf8proc.c
subversion/trunk/subversion/tests/libsvn_subr/utf-test.c
Modified: subversion/trunk/subversion/include/private/svn_utf_private.h
==============================================================================
--- subversion/trunk/subversion/include/private/svn_utf_private.h Wed May
20 06:45:03 2026 (r1934422)
+++ subversion/trunk/subversion/include/private/svn_utf_private.h Wed May
20 08:15:53 2026 (r1934423)
@@ -288,6 +288,35 @@ svn_utf__utf32_to_utf8(const svn_string_
apr_pool_t *scratch_pool);
+/* Return a new string with a copy of @a cstr allocated in @a pool aligned to
+ * the right side with spaces. This function takes UTF-8 multibyte encoding and
+ * wcwidth into an account. The new string will be have exacly as much
+ * printable characters as @a padding describes.
+ *
+ * Please note, there might be a little artifact when there is a wider
+ * character, then the string won't be perfectly aligned.
+ */
+char *
+svn_utf__cstring_utf8_align_right(const char *cstr,
+ int padding,
+ apr_pool_t *pool);
+
+/* Return a new string with a copy of @a cstr allocated in @a pool aligned to
+ * the left side with spaces. This function takes UTF-8 multibyte encoding and
+ * wcwidth into an account. The new string will be have exacly as much
+ * printable characters as @a padding describes.
+ *
+ * Please note, there might be a little artifact when there is a wider
+ * character, then the string won't be perfectly aligned.
+ *
+ * Similar to svn_utf__cstring_utf8_align_right() but doing alignment to the
+ * left side.
+ */
+char *
+svn_utf__cstring_utf8_align_left(const char *cstr,
+ int padding,
+ apr_pool_t *pool);
+
#ifdef __cplusplus
}
#endif /* __cplusplus */
Modified: subversion/trunk/subversion/libsvn_subr/utf8proc.c
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/utf8proc.c Wed May 20 06:45:03
2026 (r1934422)
+++ subversion/trunk/subversion/libsvn_subr/utf8proc.c Wed May 20 08:15:53
2026 (r1934423)
@@ -638,3 +638,71 @@ svn_utf_cstring_utf8_width(const char *c
return width;
}
+
+/* Advances CSTR by N printable UTF-8 characters */
+static const char *
+utf8_skipn(const char *cstr, apr_size_t n)
+{
+ apr_size_t i = 0;
+
+ while (*cstr && i < n)
+ {
+ apr_int32_t ucs;
+
+ int nbytes = utf8proc_iterate((apr_byte_t*)cstr, -1, &ucs);
+
+ if (nbytes < 0)
+ return NULL;
+
+ cstr += nbytes;
+ i += utf8proc_charwidth(ucs);
+ }
+
+ return cstr;
+}
+
+char *
+svn_utf__cstring_utf8_align_right(const char *cstr, int padding,
+ apr_pool_t *pool)
+{
+ int width = svn_utf_cstring_utf8_width(cstr);
+ int size = strlen(cstr);
+
+ if (width > padding)
+ {
+ int len = utf8_skipn(cstr, padding) - cstr;
+ return apr_pstrmemdup(pool, cstr + size - len, len);
+ }
+ else
+ {
+ int spaces = padding - width;
+ char *result = apr_palloc(pool, size + spaces);
+ memset(result, ' ', spaces);
+ memcpy(result + spaces, cstr, size);
+ result[size + spaces] = '\0';
+ return result;
+ }
+}
+
+char *
+svn_utf__cstring_utf8_align_left(const char *cstr, int padding,
+ apr_pool_t *pool)
+{
+ int width = svn_utf_cstring_utf8_width(cstr);
+
+ if (width > padding)
+ {
+ int len = utf8_skipn(cstr, padding) - cstr;
+ return apr_pstrmemdup(pool, cstr, len);
+ }
+ else
+ {
+ int size = strlen(cstr);
+ int spaces = padding - width;
+ char *result = apr_palloc(pool, size + spaces + 1);
+ memcpy(result, cstr, size);
+ memset(result + size, ' ', spaces);
+ result[size + spaces] = '\0';
+ return result;
+ }
+}
Modified: subversion/trunk/subversion/tests/libsvn_subr/utf-test.c
==============================================================================
--- subversion/trunk/subversion/tests/libsvn_subr/utf-test.c Wed May 20
06:45:03 2026 (r1934422)
+++ subversion/trunk/subversion/tests/libsvn_subr/utf-test.c Wed May 20
08:15:53 2026 (r1934423)
@@ -1028,6 +1028,49 @@ test_utf8_width(apr_pool_t *pool)
return SVN_NO_ERROR;
}
+static svn_error_t *
+test_utf8_align(apr_pool_t *pool)
+{
+ /* ASCII */
+ SVN_TEST_STRING_ASSERT(svn_utf__cstring_utf8_align_left("abc", 5, pool),
+ "abc ");
+ SVN_TEST_STRING_ASSERT(svn_utf__cstring_utf8_align_left("abc", 2, pool),
+ "ab");
+ SVN_TEST_STRING_ASSERT(svn_utf__cstring_utf8_align_right("abc", 5, pool),
+ " abc");
+ SVN_TEST_STRING_ASSERT(svn_utf__cstring_utf8_align_right("abc", 2, pool),
+ "bc");
+
+ /* two byte symbols */
+ SVN_TEST_STRING_ASSERT(
+ svn_utf__cstring_utf8_align_left("\xc5\xaf\xc5\xa1", 4, pool),
+ "\xc5\xaf\xc5\xa1 ");
+ SVN_TEST_STRING_ASSERT(
+ svn_utf__cstring_utf8_align_left("\xc5\xaf\xc5\xa1", 1, pool),
+ "\xc5\xaf");
+ SVN_TEST_STRING_ASSERT(
+ svn_utf__cstring_utf8_align_right("\xc5\xaf\xc5\xa1", 4, pool),
+ " \xc5\xaf\xc5\xa1");
+ SVN_TEST_STRING_ASSERT(
+ svn_utf__cstring_utf8_align_right("\xc5\xaf\xc5\xa1", 1, pool),
+ "\xc5\xa1");
+
+ /* an emoji */
+ SVN_TEST_STRING_ASSERT(
+ svn_utf__cstring_utf8_align_right("\xf0\x9f\xa5\xba", 2, pool),
+ "\xf0\x9f\xa5\xba");
+ SVN_TEST_STRING_ASSERT(
+ svn_utf__cstring_utf8_align_right("\xf0\x9f\xa5\xba", 3, pool),
+ " \xf0\x9f\xa5\xba");
+
+ /* this is technically wrong (?) */
+ SVN_TEST_STRING_ASSERT(
+ svn_utf__cstring_utf8_align_right("\xf0\x9f\xa5\xba", 1, pool),
+ "\xf0\x9f\xa5\xba");
+
+ return SVN_NO_ERROR;
+}
+
/* The test table. */
@@ -1060,6 +1103,8 @@ static struct svn_test_descriptor_t test
"test svn_utf__xfrm"),
SVN_TEST_PASS2(test_utf8_width,
"test svn_utf_cstring_utf8_width"),
+ SVN_TEST_PASS2(test_utf8_align,
+ "test utf8 alignment"),
SVN_TEST_NULL
};