On Fri, Feb 28, 2025 at 02:14:34PM +0100, Branko Čibej wrote:
> This is probably the minimal patch to disable this tweak.
Thanks, your diff looks good to me.
> There's a bunch of
> code that needs to be replaced or removed, though.
These bits of code are easy to spot and remove.
Here is a diff for that:
[[[
Remove optmized unaligned-access code paths.
The newer compilers using vector instructions these manual optimizations can
lead to build errors and do not provide benefits over compiler optimizations.
* subversion/libsvn_diff/diff_file.c
(contains_eol): Remove.
(find_identical_prefix, find_identical_suffix): Remove code under
#ifdef SVN_UNALIGNED_ACCESS_IS_OK.
* subversion/libsvn_fs_fs/tree.c
(hash_func): Remove code under #ifdef SVN_UNALIGNED_ACCESS_IS_OK.
* subversion/libsvn_fs_x/dag_cache.c
(cache_lookup): Remove code under #ifdef SVN_UNALIGNED_ACCESS_IS_OK.
* subversion/libsvn_fs_x/string_table.c
(copy_masks): Remove.
(table_copy_string): Remove code under #ifdef SVN_UNALIGNED_ACCESS_IS_OK.
* subversion/libsvn_subr/eol.c
(svn_eol__find_eol_start): Remove code under #ifdef
SVN_UNALIGNED_ACCESS_IS_OK.
* subversion/libsvn_subr/hash.c
(hashfunc_compatible): Remove code under #ifdef SVN_UNALIGNED_ACCESS_IS_OK.
* subversion/libsvn_subr/string.c
(svn_cstring__match_length): Remove code under #ifdef
SVN_UNALIGNED_ACCESS_IS_OK.
* subversion/libsvn_subr/utf_validate.c
(first_non_fsm_start_char): Remove code under #ifdef
SVN_UNALIGNED_ACCESS_IS_OK.
Reported by: Sam James from gentoo
]]]
Index: subversion/libsvn_diff/diff_file.c
===================================================================
--- subversion/libsvn_diff/diff_file.c (revision 1924098)
+++ subversion/libsvn_diff/diff_file.c (working copy)
@@ -355,23 +355,6 @@ is_one_at_eof(struct file_info file[], apr_size_t
return FALSE;
}
-/* Quickly determine whether there is a eol char in CHUNK.
- * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
- */
-
-#if SVN_UNALIGNED_ACCESS_IS_OK
-static svn_boolean_t contains_eol(apr_uintptr_t chunk)
-{
- apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
- apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
-
- r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
- n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
-
- return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
-}
-#endif
-
/* Find the prefix which is identical between all elements of the FILE array.
* Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be
* set to TRUE if one of the FILEs reached its end while scanning prefix,
@@ -396,10 +379,6 @@ find_identical_prefix(svn_boolean_t *reached_one_e
is_match = is_match && *file[0].curp == *file[i].curp;
while (is_match)
{
-#if SVN_UNALIGNED_ACCESS_IS_OK
- apr_ssize_t max_delta, delta;
-#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
-
/* ### TODO: see if we can take advantage of
diff options like ignore_eol_style or ignore_space. */
/* check for eol, and count */
@@ -419,53 +398,6 @@ find_identical_prefix(svn_boolean_t *reached_one_e
INCREMENT_POINTERS(file, file_len, pool);
-#if SVN_UNALIGNED_ACCESS_IS_OK
-
- /* Try to advance as far as possible with machine-word granularity.
- * Determine how far we may advance with chunky ops without reaching
- * endp for any of the files.
- * Signedness is important here if curp gets close to endp.
- */
- max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
- for (i = 1; i < file_len; i++)
- {
- delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
- if (delta < max_delta)
- max_delta = delta;
- }
-
- is_match = TRUE;
- for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
- {
- apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
- if (contains_eol(chunk))
- break;
-
- for (i = 1; i < file_len; i++)
- if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
- {
- is_match = FALSE;
- break;
- }
-
- if (! is_match)
- break;
- }
-
- if (delta /* > 0*/)
- {
- /* We either found a mismatch or an EOL at or shortly behind
curp+delta
- * or we cannot proceed with chunky ops without exceeding endp.
- * In any way, everything up to curp + delta is equal and not an EOL.
- */
- for (i = 0; i < file_len; i++)
- file[i].curp += delta;
-
- /* Skipped data without EOL markers, so last char was not a CR. */
- had_cr = FALSE;
- }
-#endif
-
*reached_one_eof = is_one_at_eof(file, file_len);
if (*reached_one_eof)
break;
@@ -611,11 +543,6 @@ find_identical_suffix(apr_off_t *suffix_lines, str
while (is_match)
{
svn_boolean_t reached_prefix;
-#if SVN_UNALIGNED_ACCESS_IS_OK
- /* Initialize the minimum pointer positions. */
- const char *min_curp[4];
- svn_boolean_t can_read_word;
-#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
/* ### TODO: see if we can take advantage of
diff options like ignore_eol_style or ignore_space. */
@@ -636,60 +563,6 @@ find_identical_suffix(apr_off_t *suffix_lines, str
DECREMENT_POINTERS(file_for_suffix, file_len, pool);
-#if SVN_UNALIGNED_ACCESS_IS_OK
- for (i = 0; i < file_len; i++)
- min_curp[i] = file_for_suffix[i].buffer;
-
- /* If we are in the same chunk that contains the last part of the common
- prefix, use the min_curp[0] pointer to make sure we don't get a
- suffix that overlaps the already determined common prefix. */
- if (file_for_suffix[0].chunk == suffix_min_chunk0)
- min_curp[0] += suffix_min_offset0;
-
- /* Scan quickly by reading with machine-word granularity. */
- for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
- can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
- > min_curp[i]);
-
- while (can_read_word)
- {
- apr_uintptr_t chunk;
-
- /* For each file curp is positioned at the current byte, but we
- want to examine the current byte and the ones before the current
- location as one machine word. */
-
- chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
- - sizeof(apr_uintptr_t));
- if (contains_eol(chunk))
- break;
-
- for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
- is_match = (chunk
- == *(const apr_uintptr_t *)
- (file_for_suffix[i].curp + 1
- - sizeof(apr_uintptr_t)));
-
- if (! is_match)
- break;
-
- for (i = 0; i < file_len; i++)
- {
- file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
- can_read_word = can_read_word
- && ( (file_for_suffix[i].curp + 1
- - sizeof(apr_uintptr_t))
- > min_curp[i]);
- }
-
- /* We skipped some bytes, so there are no closing EOLs */
- had_nl = FALSE;
- }
-
- /* The > min_curp[i] check leaves at least one final byte for checking
- in the non block optimized case below. */
-#endif
-
reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
&& (file_for_suffix[0].curp - file_for_suffix[0].buffer)
== suffix_min_offset0;
Index: subversion/libsvn_fs_fs/tree.c
===================================================================
--- subversion/libsvn_fs_fs/tree.c (revision 1924098)
+++ subversion/libsvn_fs_fs/tree.c (working copy)
@@ -220,11 +220,6 @@ hash_func(svn_revnum_t revision,
apr_size_t i;
apr_uint32_t hash_value = (apr_uint32_t)revision;
-#if SVN_UNALIGNED_ACCESS_IS_OK
- /* "randomizing" / distributing factor used in our hash function */
- const apr_uint32_t factor = 0xd1f3da69;
-#endif
-
/* Calculate the hash value
(HASH_VALUE has been initialized to REVISION).
@@ -233,35 +228,8 @@ hash_func(svn_revnum_t revision,
make as much of *PATH influence the result as possible to get an "even"
spread across the hash buckets (maximizes our cache retention rate and
thus the hit rates).
-
- When chunked access is possible (independent of the PATH pointer's
- value!), we read 4 bytes at once and multiply the hash value with a
- FACTOR that mirror / pattern / shift all 4 input bytes to various bits
- of the result. The final result will be taken from the MSBs.
-
- When chunked access is not possible (not supported by CPU or odd bytes
- at the end of *PATH), we use the simple traditional "* 33" hash
- function that works very well with texts / paths and that e.g. APR uses.
-
- Please note that the bytewise and the chunked calculation are *NOT*
- interchangeable as they will yield different results for the same input.
- For any given machine and *PATH, we must use a fixed combination of the
- two functions.
*/
- i = 0;
-#if SVN_UNALIGNED_ACCESS_IS_OK
- /* We relax the dependency chain between iterations by processing
- two chunks from the input per hash_value self-multiplication.
- The HASH_VALUE update latency is now 1 MUL latency + 1 ADD latency
- per 2 chunks instead of 1 chunk.
- */
- for (; i + 8 <= path_len; i += 8)
- hash_value = hash_value * factor * factor
- + ( *(const apr_uint32_t*)(path + i) * factor
- + *(const apr_uint32_t*)(path + i + 4));
-#endif
-
- for (; i < path_len; ++i)
+ for (i = 0; i < path_len; ++i)
/* Help GCC to minimize the HASH_VALUE update latency by splitting the
MUL 33 of the naive implementation: h = h * 33 + path[i]. This
shortens the dependency chain from 1 shift + 2 ADDs to 1 shift + 1 ADD.
Index: subversion/libsvn_fs_x/dag_cache.c
===================================================================
--- subversion/libsvn_fs_x/dag_cache.c (revision 1924098)
+++ subversion/libsvn_fs_x/dag_cache.c (working copy)
@@ -312,11 +312,6 @@ cache_lookup(svn_fs_x__dag_cache_t *cache,
apr_size_t path_len = path->len;
apr_uint32_t hash_value = (apr_uint32_t)(apr_uint64_t)change_set;
-#if SVN_UNALIGNED_ACCESS_IS_OK
- /* "randomizing" / distributing factor used in our hash function */
- const apr_uint32_t factor = 0xd1f3da69;
-#endif
-
/* optimistic lookup: hit the same bucket again? */
cache_entry_t *result = &cache->buckets[cache->last_hit];
if ( (result->change_set == change_set)
@@ -332,20 +327,7 @@ cache_lookup(svn_fs_x__dag_cache_t *cache,
/* need to do a full lookup. Calculate the hash value
(HASH_VALUE has been initialized to REVISION). */
- i = 0;
-#if SVN_UNALIGNED_ACCESS_IS_OK
- /* We relax the dependency chain between iterations by processing
- two chunks from the input per hash_value self-multiplication.
- The HASH_VALUE update latency is now 1 MUL latency + 1 ADD latency
- per 2 chunks instead of 1 chunk.
- */
- for (; i + 8 <= path_len; i += 8)
- hash_value = hash_value * factor * factor
- + ( *(const apr_uint32_t*)(path->data + i) * factor
- + *(const apr_uint32_t*)(path->data + i + 4));
-#endif
-
- for (; i < path_len; ++i)
+ for (i = 0; i < path_len; ++i)
/* Help GCC to minimize the HASH_VALUE update latency by splitting the
MUL 33 of the naive implementation: h = h * 33 + path[i]. This
shortens the dependency chain from 1 shift + 2 ADDs to 1 shift + 1 ADD.
Index: subversion/libsvn_fs_x/string_table.c
===================================================================
--- subversion/libsvn_fs_x/string_table.c (revision 1924098)
+++ subversion/libsvn_fs_x/string_table.c (working copy)
@@ -473,21 +473,6 @@ svn_fs_x__string_table_create(const string_table_b
return result;
}
-/* Masks used by table_copy_string. copy_mask[I] is used if the target
- content to be preserved starts at byte I within the current chunk.
- This is used to work around alignment issues.
- */
-#if SVN_UNALIGNED_ACCESS_IS_OK
-static const char *copy_masks[8] = { "\xff\xff\xff\xff\xff\xff\xff\xff",
- "\x00\xff\xff\xff\xff\xff\xff\xff",
- "\x00\x00\xff\xff\xff\xff\xff\xff",
- "\x00\x00\x00\xff\xff\xff\xff\xff",
- "\x00\x00\x00\x00\xff\xff\xff\xff",
- "\x00\x00\x00\x00\x00\xff\xff\xff",
- "\x00\x00\x00\x00\x00\x00\xff\xff",
- "\x00\x00\x00\x00\x00\x00\x00\xff" };
-#endif
-
static void
table_copy_string(char *buffer,
apr_size_t len,
@@ -499,40 +484,10 @@ table_copy_string(char *buffer,
{
assert(header->head_length <= len);
{
-#if SVN_UNALIGNED_ACCESS_IS_OK
- /* the sections that we copy tend to be short but we can copy
- *all* of it chunky because we made sure that source and target
- buffer have some extra padding to prevent segfaults. */
- apr_uint64_t mask;
- apr_size_t to_copy = len - header->head_length;
- apr_size_t copied = 0;
-
- const char *source = table->data + header->tail_start;
- char *target = buffer + header->head_length;
- len = header->head_length;
-
- /* copy whole chunks */
- while (to_copy >= copied + sizeof(apr_uint64_t))
- {
- *(apr_uint64_t *)(target + copied)
- = *(const apr_uint64_t *)(source + copied);
- copied += sizeof(apr_uint64_t);
- }
-
- /* copy the remainder assuming that we have up to 8 extra bytes
- of addressable buffer on the source and target sides.
- Now, we simply copy 8 bytes and use a mask to filter & merge
- old with new data. */
- mask = *(const apr_uint64_t *)copy_masks[to_copy - copied];
- *(apr_uint64_t *)(target + copied)
- = (*(apr_uint64_t *)(target + copied) & mask)
- | (*(const apr_uint64_t *)(source + copied) & ~mask);
-#else
memcpy(buffer + header->head_length,
table->data + header->tail_start,
len - header->head_length);
len = header->head_length;
-#endif
}
header = &table->short_strings[header->head_string];
Index: subversion/libsvn_subr/eol.c
===================================================================
--- subversion/libsvn_subr/eol.c (revision 1924098)
+++ subversion/libsvn_subr/eol.c (working copy)
@@ -33,34 +33,6 @@
char *
svn_eol__find_eol_start(char *buf, apr_size_t len)
{
-#if SVN_UNALIGNED_ACCESS_IS_OK
-
- /* Scan the input one machine word at a time. */
- for (; len > sizeof(apr_uintptr_t)
- ; buf += sizeof(apr_uintptr_t), len -= sizeof(apr_uintptr_t))
- {
- /* This is a variant of the well-known strlen test: */
- apr_uintptr_t chunk = *(const apr_uintptr_t *)buf;
-
- /* A byte in SVN__R_TEST is \0, iff it was \r in *BUF.
- * Similarly, SVN__N_TEST is an indicator for \n. */
- apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
- apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
-
- /* A byte in SVN__R_TEST can only be < 0x80, iff it has been \0 before
- * (i.e. \r in *BUF). Ditto for SVN__N_TEST. */
- r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
- n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
-
- /* Check whether at least one of the words contains a byte <0x80
- * (if one is detected, there was a \r or \n in CHUNK). */
- if ((r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET)
- break;
- }
-
-#endif
-
- /* The remaining odd bytes will be examined the naive way: */
for (; len > 0; ++buf, --len)
{
if (*buf == '\n' || *buf == '\r')
Index: subversion/libsvn_subr/hash.c
===================================================================
--- subversion/libsvn_subr/hash.c (revision 1924098)
+++ subversion/libsvn_subr/hash.c (working copy)
@@ -636,18 +636,8 @@ hashfunc_compatible(const char *char_key, apr_ssiz
if (*klen == APR_HASH_KEY_STRING)
*klen = strlen(char_key);
-#if SVN_UNALIGNED_ACCESS_IS_OK
for (p = key, i = *klen; i >= 4; i-=4, p+=4)
{
- apr_uint32_t chunk = *(const apr_uint32_t *)p;
-
- /* the ">> 17" part gives upper bits in the chunk a chance to make
- some impact as well */
- hash = hash * 33 * 33 * 33 * 33 + chunk + (chunk >> 17);
- }
-#else
- for (p = key, i = *klen; i >= 4; i-=4, p+=4)
- {
hash = hash * 33 * 33 * 33 * 33
+ p[0] * 33 * 33 * 33
+ p[1] * 33 * 33
@@ -654,7 +644,7 @@ hashfunc_compatible(const char *char_key, apr_ssiz
+ p[2] * 33
+ p[3];
}
-#endif
+
for (; i; i--, p++)
hash = hash * 33 + *p;
Index: subversion/libsvn_subr/string.c
===================================================================
--- subversion/libsvn_subr/string.c (revision 1924098)
+++ subversion/libsvn_subr/string.c (working copy)
@@ -1508,20 +1508,6 @@ svn_cstring__match_length(const char *a,
{
apr_size_t pos = 0;
-#if SVN_UNALIGNED_ACCESS_IS_OK
-
- /* Chunky processing is so much faster ...
- *
- * We can't make this work on architectures that require aligned access
- * because A and B will probably have different alignment. So, skipping
- * the first few chars until alignment is reached is not an option.
- */
- for (; max_len - pos >= sizeof(apr_size_t); pos += sizeof(apr_size_t))
- if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
- break;
-
-#endif
-
for (; pos < max_len; ++pos)
if (a[pos] != b[pos])
break;
@@ -1536,22 +1522,6 @@ svn_cstring__reverse_match_length(const char *a,
{
apr_size_t pos = 0;
-#if SVN_UNALIGNED_ACCESS_IS_OK
-
- /* Chunky processing is so much faster ...
- *
- * We can't make this work on architectures that require aligned access
- * because A and B will probably have different alignment. So, skipping
- * the first few chars until alignment is reached is not an option.
- */
- for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t))
- if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos))
- break;
-
- pos -= sizeof(apr_size_t);
-
-#endif
-
/* If we find a mismatch at -pos, pos-1 characters matched.
*/
while (++pos <= max_len)
Index: subversion/libsvn_subr/utf_validate.c
===================================================================
--- subversion/libsvn_subr/utf_validate.c (revision 1924098)
+++ subversion/libsvn_subr/utf_validate.c (working copy)
@@ -258,17 +258,6 @@ static const char machine [9][14] = {
static const char *
first_non_fsm_start_char(const char *data, apr_size_t max_len)
{
-#if SVN_UNALIGNED_ACCESS_IS_OK
-
- /* Scan the input one machine word at a time. */
- for (; max_len > sizeof(apr_uintptr_t)
- ; data += sizeof(apr_uintptr_t), max_len -= sizeof(apr_uintptr_t))
- if (*(const apr_uintptr_t *)data & SVN__BIT_7_SET)
- break;
-
-#endif
-
- /* The remaining odd bytes will be examined the naive way: */
for (; max_len > 0; ++data, --max_len)
if ((unsigned char)*data >= 0x80)
break;