在 2018/7/27 1:17, Zhaoxiu Zeng 写道: > 在 2018/7/23 2:37, Greg Kroah-Hartman 写道: >> On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote: >>> From: Zhaoxiu Zeng <zhaoxiu.z...@gmail.com> >>> >>> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy >>> and fast. >>> For the Sunday algorithm, to see >>> http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm >> >> So you say, but what does this really buy us? Why make this change? >> How was it tested? What is the downside of not taking this? >> >> thanks, >> >> greg k-h >> > > I use the following program to test on fc28. > Compile with O2, the new version is almost 2X faster than the original. > The code size of the original is 0x80, the newer is 0xB0. > > > #include <stdio.h> > #include <stdlib.h> > #include <stdint.h> > #include <string.h> > #include <time.h> > #include <unistd.h> > > /** > * strstr - Find the first substring in a %NUL terminated string > * @s1: The string to be searched > * @s2: The string to search for > */ > char *strstr1(const char *s1, const char *s2) > { > size_t l1, l2; > > l2 = strlen(s2); > if (!l2) > return (char *)s1; > l1 = strlen(s1); > while (l1 >= l2) { > l1--; > if (!memcmp(s1, s2, l2)) > return (char *)s1; > s1++; > } > return NULL; > } > > char *strstr2(const char *s1, const char *s2) > { > size_t l2; > const char *pchk = s1; > > #if 1//def __HAVE_ARCH_STRLEN > l2 = strlen(s2); > #else > for (l2 = 0; s2[l2] != 0; l2++) > /* nothing */; > #endif > if (!l2) > return (char *)s1; > > /* > * Sunday matching > */ > while (1) { > size_t i; > char k; > > /* compare */ > for (i = 0; i < l2; i++) { > if (s1[i] != s2[i]) > break; > } > if (i >= l2) > return (char *)s1; > > /* if s1 terminate early? */ > if (pchk < s1 + i) > pchk = s1 + i; > do { > k = *pchk; > if (k == 0) > return NULL; > } while (pchk++ != s1 + l2); > > /* find the k's last presence in s2 (k = s1[l2]) */ > for (i = l2; --i != (size_t)-1; ) { > if (k == s2[i]) > break; > } > > /* shift */ > s1 += l2 - i; > } > } > > #ifdef __i386__ > # define RDTSC_CLOBBER "%eax", "%ebx", "%ecx", "%edx" > #elif __x86_64__ > # define RDTSC_CLOBBER "%rax", "%rbx", "%rcx", "%rdx" > #else > # error unknown platform > #endif > > #define RDTSC_START(cycles) \ > do { \ > register unsigned cyc_high, cyc_low; \ > asm volatile("CPUID\n\t" \ > "RDTSC\n\t" \ > "mov %%edx, %0\n\t" \ > "mov %%eax, %1\n\t" \ > : "=r" (cyc_high), "=r" (cyc_low) \ > :: RDTSC_CLOBBER); \ > (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \ > } while (0) > > #define RDTSC_STOP(cycles) \ > do { \ > register unsigned cyc_high, cyc_low; \ > asm volatile("RDTSCP\n\t" \ > "mov %%edx, %0\n\t" \ > "mov %%eax, %1\n\t" \ > "CPUID\n\t" \ > : "=r" (cyc_high), "=r" (cyc_low) \ > :: RDTSC_CLOBBER); \ > (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \ > } while (0) > > typedef unsigned long long TIME_T; > #define start_time(start) RDTSC_START(start) > #define stop_time(stop) RDTSC_STOP(stop) > #define diff_time(start, stop) (stop - start) > > #define MAX_HAYSTACK_LENGTH 64 > #define MAX_NEEDLE_LENGTH 16 > #define TEST_LOOPS 1000 > > char haystack[MAX_HAYSTACK_LENGTH + 1]; > char needle[MAX_NEEDLE_LENGTH + 1]; > > int main(int argc, char **argv) > { > size_t i, j, n; > void *p1, *p2; > > srand(time(0)); > > for (i = 0; i < MAX_HAYSTACK_LENGTH; ) { > haystack[i] = rand(); > if (haystack[i] != 0) > i++; > } > haystack[i] = 0; > > for (i = 0; i < MAX_HAYSTACK_LENGTH; i++) { > TIME_T start, stop; > unsigned long long elapsed1, elapsed2; > > j = rand() % MAX_NEEDLE_LENGTH; > if (i <= MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j)) > n = i; > else > n = MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j); > memcpy(needle + j, haystack + n, MAX_NEEDLE_LENGTH - j); > needle[MAX_NEEDLE_LENGTH] = 0; > > elapsed1 = ~0UL; > for (n = 0; n < TEST_LOOPS; n++) { > start_time(start); > asm volatile("":::"memory"); > p1 = strstr1(haystack, needle + j); > asm volatile("":::"memory"); > stop_time(stop); > > if (elapsed1 > diff_time(start, stop)) > elapsed1 = diff_time(start, stop); > } > > elapsed2 = ~0UL; > for (n = 0; n < TEST_LOOPS; n++) { > start_time(start); > asm volatile("":::"memory"); > p2 = strstr2(haystack, needle + j); > asm volatile("":::"memory"); > stop_time(stop); > > if (elapsed2 > diff_time(start, stop)) > elapsed2 = diff_time(start, stop); > } > > if (p1 != p2) > fprintf(stderr, "Error: %p != %p\n", p1, p2); > else > fprintf(stdout, "%3d, %3d:\t%lld,\t%lld\n", i, j, elapsed1, > elapsed2); > } > > return 0; > } > > The test result: > [zzx@fedora strstr]$ ./test > 0, 3: 68, 68 > 1, 13: 74, 66 > 2, 2: 88, 94 > 3, 5: 96, 88 > 4, 8: 108, 80 > 5, 13: 110, 76 > 6, 12: 132, 82 > 7, 9: 142, 82 > 8, 11: 152, 88 > 9, 13: 146, 90 > 10, 14: 154, 94 > 11, 15: 132, 104 > 12, 1: 196, 114 > 13, 8: 206, 108 > 14, 14: 186, 110 > 15, 13: 194, 112 > 16, 0: 260, 124 > 17, 10: 258, 124 > 18, 15: 208, 136 > 19, 11: 276, 136 > 20, 13: 256, 128 > 21, 12: 292, 144 > 22, 11: 300, 142 > 23, 13: 278, 144 > 24, 7: 334, 152 > 25, 1: 346, 166 > 26, 6: 368, 168 > 27, 15: 278, 182 > 28, 1: 374, 168 > 29, 3: 382, 188 > 30, 8: 398, 172 > 31, 4: 402, 188 > 32, 0: 430, 180 > 33, 10: 398, 184 > 34, 10: 410, 192 > 35, 8: 434, 180 > 36, 7: 444, 198 > 37, 6: 454, 204 > 38, 1: 464, 220 > 39, 2: 472, 206 > 40, 4: 482, 210 > 41, 15: 388, 262 > 42, 2: 502, 210 > 43, 5: 510, 220 > 44, 8: 520, 214 > 45, 0: 564, 236 > 46, 3: 550, 242 > 47, 8: 552, 262 > 48, 10: 528, 270 > 49, 2: 572, 256 > 50, 3: 586, 246 > 51, 7: 590, 278 > 52, 14: 506, 308 > 53, 14: 514, 298 > 54, 4: 602, 240 > 55, 5: 626, 284 > 56, 15: 506, 316 > 57, 10: 606, 326 > 58, 4: 606, 240 > 59, 0: 596, 240 > 60, 13: 570, 316 > 61, 13: 576, 328 > 62, 5: 618, 284 > 63, 13: 576, 328 > > Thanks! >
The original strnstr might has a bug too! For example, assume s1 is "123\0abc...." and s2 is "abc\0", call strnstr(s1, s2, 7) will return &s1[4], but the correct result is NULL.