在 2018/7/27 1:17, Zhaoxiu Zeng 写道:
> 在 2018/7/23 2:37, Greg Kroah-Hartman 写道:
>> On Mon, Jul 23, 2018 at 01:37:15AM +0800, Zhaoxiu Zeng wrote:
>>> From: Zhaoxiu Zeng <zhaoxiu.z...@gmail.com>
>>>
>>> The Sunday algorithm is a variation of Boyer-Moore algorithm, it is easy 
>>> and fast.
>>> For the Sunday algorithm, to see
>>>     http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/sundayen.htm
>>
>> So you say, but what does this really buy us?  Why make this change?
>> How was it tested?  What is the downside of not taking this?
>>
>> thanks,
>>
>> greg k-h
>>
> 
> I use the following program to test on fc28.
> Compile with O2, the new version is almost 2X faster than the original.
> The code size of the original is 0x80, the newer is 0xB0.
> 
> 
> #include <stdio.h>
> #include <stdlib.h>
> #include <stdint.h>
> #include <string.h>
> #include <time.h>
> #include <unistd.h>
> 
> /**
>  * strstr - Find the first substring in a %NUL terminated string
>  * @s1: The string to be searched
>  * @s2: The string to search for
>  */
> char *strstr1(const char *s1, const char *s2)
> {
>       size_t l1, l2;
> 
>       l2 = strlen(s2);
>       if (!l2)
>               return (char *)s1;
>       l1 = strlen(s1);
>       while (l1 >= l2) {
>               l1--;
>               if (!memcmp(s1, s2, l2))
>                       return (char *)s1;
>               s1++;
>       }
>       return NULL;
> }
> 
> char *strstr2(const char *s1, const char *s2)
> {
>       size_t l2;
>       const char *pchk = s1;
> 
> #if 1//def __HAVE_ARCH_STRLEN
>       l2 = strlen(s2);
> #else
>       for (l2 = 0; s2[l2] != 0; l2++)
>               /* nothing */;
> #endif
>       if (!l2)
>               return (char *)s1;
> 
>       /*
>        * Sunday matching
>        */
>       while (1) {
>               size_t i;
>               char k;
> 
>               /* compare */
>               for (i = 0; i < l2; i++) {
>                       if (s1[i] != s2[i])
>                               break;
>               }
>               if (i >= l2)
>                       return (char *)s1;
> 
>               /* if s1 terminate early? */
>               if (pchk < s1 + i)
>                       pchk = s1 + i;
>               do {
>                       k = *pchk;
>                       if (k == 0)
>                               return NULL;
>               } while (pchk++ != s1 + l2);
> 
>               /* find the k's last presence in s2 (k = s1[l2]) */
>               for (i = l2; --i != (size_t)-1; ) {
>                       if (k == s2[i])
>                               break;
>               }
> 
>               /* shift */
>               s1 += l2 - i;
>       }
> }
> 
> #ifdef __i386__
> #  define RDTSC_CLOBBER "%eax", "%ebx", "%ecx", "%edx"
> #elif __x86_64__
> #  define RDTSC_CLOBBER "%rax", "%rbx", "%rcx", "%rdx"
> #else
> # error unknown platform
> #endif
> 
> #define RDTSC_START(cycles)                                \
>     do {                                                   \
>         register unsigned cyc_high, cyc_low;               \
>         asm volatile("CPUID\n\t"                           \
>                      "RDTSC\n\t"                           \
>                      "mov %%edx, %0\n\t"                   \
>                      "mov %%eax, %1\n\t"                   \
>                      : "=r" (cyc_high), "=r" (cyc_low)     \
>                      :: RDTSC_CLOBBER);                    \
>         (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;   \
>     } while (0)
> 
> #define RDTSC_STOP(cycles)                                 \
>     do {                                                   \
>         register unsigned cyc_high, cyc_low;               \
>         asm volatile("RDTSCP\n\t"                          \
>                      "mov %%edx, %0\n\t"                   \
>                      "mov %%eax, %1\n\t"                   \
>                      "CPUID\n\t"                           \
>                      : "=r" (cyc_high), "=r" (cyc_low)     \
>                      :: RDTSC_CLOBBER);                    \
>         (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;   \
>     } while (0)
> 
> typedef unsigned long long TIME_T;
> #define start_time(start)             RDTSC_START(start)
> #define stop_time(stop)                       RDTSC_STOP(stop)
> #define diff_time(start, stop)        (stop - start)
> 
> #define MAX_HAYSTACK_LENGTH 64
> #define MAX_NEEDLE_LENGTH 16
> #define TEST_LOOPS 1000
> 
> char haystack[MAX_HAYSTACK_LENGTH + 1];
> char needle[MAX_NEEDLE_LENGTH + 1];
> 
> int main(int argc, char **argv)
> {
>     size_t i, j, n;
>     void *p1, *p2;
> 
>     srand(time(0));
> 
>     for (i = 0; i < MAX_HAYSTACK_LENGTH; ) {
>         haystack[i] = rand();
>         if (haystack[i] != 0)
>             i++;
>     }
>     haystack[i] = 0;
> 
>     for (i = 0; i < MAX_HAYSTACK_LENGTH; i++) {
>         TIME_T start, stop;
>         unsigned long long elapsed1, elapsed2;
> 
>         j = rand() % MAX_NEEDLE_LENGTH;
>         if (i <= MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j))
>             n = i;
>         else
>             n = MAX_HAYSTACK_LENGTH - (MAX_NEEDLE_LENGTH - j);
>         memcpy(needle + j, haystack + n, MAX_NEEDLE_LENGTH - j);
>         needle[MAX_NEEDLE_LENGTH] = 0;
> 
>         elapsed1 = ~0UL;
>         for (n = 0; n < TEST_LOOPS; n++) {
>             start_time(start);
>             asm volatile("":::"memory");
>             p1 = strstr1(haystack, needle + j);
>             asm volatile("":::"memory");
>             stop_time(stop);
> 
>             if (elapsed1 > diff_time(start, stop))
>                 elapsed1 = diff_time(start, stop);
>         }
> 
>         elapsed2 = ~0UL;
>         for (n = 0; n < TEST_LOOPS; n++) {
>             start_time(start);
>             asm volatile("":::"memory");
>             p2 = strstr2(haystack, needle + j);
>             asm volatile("":::"memory");
>             stop_time(stop);
> 
>             if (elapsed2 > diff_time(start, stop))
>                 elapsed2 = diff_time(start, stop);
>         }
> 
>         if (p1 != p2)
>             fprintf(stderr, "Error: %p != %p\n", p1, p2);
>         else
>             fprintf(stdout, "%3d, %3d:\t%lld,\t%lld\n", i, j, elapsed1, 
> elapsed2);
>     }
> 
>     return 0;
> }
> 
> The test result:
> [zzx@fedora strstr]$ ./test
>   0,   3:     68,     68
>   1,  13:     74,     66
>   2,   2:     88,     94
>   3,   5:     96,     88
>   4,   8:     108,    80
>   5,  13:     110,    76
>   6,  12:     132,    82
>   7,   9:     142,    82
>   8,  11:     152,    88
>   9,  13:     146,    90
>  10,  14:     154,    94
>  11,  15:     132,    104
>  12,   1:     196,    114
>  13,   8:     206,    108
>  14,  14:     186,    110
>  15,  13:     194,    112
>  16,   0:     260,    124
>  17,  10:     258,    124
>  18,  15:     208,    136
>  19,  11:     276,    136
>  20,  13:     256,    128
>  21,  12:     292,    144
>  22,  11:     300,    142
>  23,  13:     278,    144
>  24,   7:     334,    152
>  25,   1:     346,    166
>  26,   6:     368,    168
>  27,  15:     278,    182
>  28,   1:     374,    168
>  29,   3:     382,    188
>  30,   8:     398,    172
>  31,   4:     402,    188
>  32,   0:     430,    180
>  33,  10:     398,    184
>  34,  10:     410,    192
>  35,   8:     434,    180
>  36,   7:     444,    198
>  37,   6:     454,    204
>  38,   1:     464,    220
>  39,   2:     472,    206
>  40,   4:     482,    210
>  41,  15:     388,    262
>  42,   2:     502,    210
>  43,   5:     510,    220
>  44,   8:     520,    214
>  45,   0:     564,    236
>  46,   3:     550,    242
>  47,   8:     552,    262
>  48,  10:     528,    270
>  49,   2:     572,    256
>  50,   3:     586,    246
>  51,   7:     590,    278
>  52,  14:     506,    308
>  53,  14:     514,    298
>  54,   4:     602,    240
>  55,   5:     626,    284
>  56,  15:     506,    316
>  57,  10:     606,    326
>  58,   4:     606,    240
>  59,   0:     596,    240
>  60,  13:     570,    316
>  61,  13:     576,    328
>  62,   5:     618,    284
>  63,  13:     576,    328
> 
> Thanks!
> 

The original strnstr might has a bug too!
For example, assume s1 is "123\0abc...." and s2 is "abc\0",
call strnstr(s1, s2, 7) will return &s1[4], but the correct result is NULL.

Reply via email to