Our strstr.c is based on musl but since the import a few minor tweaks
happened and this brings us back in sync. For memmem.c our libc has a very
simple implementation. This switches the code to the memmem.c from musl
which is O(n) like strstr.c. For memmem() similar configure checks are
done as for strstr to verify the runtime so it makes sense to provide a
good version in libc.

-- 
:wq Claudio

Index: string/memmem.c
===================================================================
RCS file: /cvs/src/lib/libc/string/memmem.c,v
retrieving revision 1.4
diff -u -p -r1.4 memmem.c
--- string/memmem.c     31 Aug 2015 02:53:57 -0000      1.4
+++ string/memmem.c     15 Apr 2020 07:38:09 -0000
@@ -1,64 +1,184 @@
-/*     $OpenBSD: memmem.c,v 1.4 2015/08/31 02:53:57 guenther Exp $ */
-/*-
- * Copyright (c) 2005 Pascal Gloor <pascal.gl...@spale.com>
+/*     $OpenBSD$ */
+
+/*
+ * Copyright (c) 2005-2020 Rich Felker, et al.
  *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
  *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include <string.h>
+#include <stdint.h>
+
+static char *
+twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
+{
+       uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
+       for (h+=2, k-=2; k; k--, hw = hw<<8 | *h++)
+               if (hw == nw) return (char *)h-2;
+       return hw == nw ? (char *)h-2 : 0;
+}
+
+static char *
+threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
+{
+       uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
+       uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
+       for (h+=3, k-=3; k; k--, hw = (hw|*h++)<<8)
+               if (hw == nw) return (char *)h-3;
+       return hw == nw ? (char *)h-3 : 0;
+}
+
+static char *
+fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
+{
+       uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
+       uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
+       for (h+=4, k-=4; k; k--, hw = hw<<8 | *h++)
+               if (hw == nw) return (char *)h-4;
+       return hw == nw ? (char *)h-4 : 0;
+}
+
+#define MAX(a,b) ((a)>(b)?(a):(b))
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+#define BITOP(a,b,op) \
+ ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
 
 /*
- * Find the first occurrence of the byte string s in byte string l.
+ * Maxime Crochemore and Dominique Perrin, Two-way string-matching,
+ * Journal of the ACM, 38(3):651-675, July 1991.
  */
+static char *
+twoway_memmem(const unsigned char *h, const unsigned char *z,
+    const unsigned char *n, size_t l)
+{
+       size_t i, ip, jp, k, p, ms, p0, mem, mem0;
+       size_t byteset[32 / sizeof(size_t)] = { 0 };
+       size_t shift[256];
+
+       /* Computing length of needle and fill shift table */
+       for (i=0; i<l; i++)
+               BITOP(byteset, n[i], |=), shift[n[i]] = i+1;
+
+       /* Compute maximal suffix */
+       ip = -1; jp = 0; k = p = 1;
+       while (jp+k<l) {
+               if (n[ip+k] == n[jp+k]) {
+                       if (k == p) {
+                               jp += p;
+                               k = 1;
+                       } else k++;
+               } else if (n[ip+k] > n[jp+k]) {
+                       jp += k;
+                       k = 1;
+                       p = jp - ip;
+               } else {
+                       ip = jp++;
+                       k = p = 1;
+               }
+       }
+       ms = ip;
+       p0 = p;
+
+       /* And with the opposite comparison */
+       ip = -1; jp = 0; k = p = 1;
+       while (jp+k<l) {
+               if (n[ip+k] == n[jp+k]) {
+                       if (k == p) {
+                               jp += p;
+                               k = 1;
+                       } else k++;
+               } else if (n[ip+k] < n[jp+k]) {
+                       jp += k;
+                       k = 1;
+                       p = jp - ip;
+               } else {
+                       ip = jp++;
+                       k = p = 1;
+               }
+       }
+       if (ip+1 > ms+1) ms = ip;
+       else p = p0;
+
+       /* Periodic needle? */
+       if (memcmp(n, n+p, ms+1)) {
+               mem0 = 0;
+               p = MAX(ms, l-ms-1) + 1;
+       } else mem0 = l-p;
+       mem = 0;
+
+       /* Search loop */
+       for (;;) {
+               /* If remainder of haystack is shorter than needle, done */
+               if (z-h < l) return 0;
+
+               /* Check last byte first; advance by shift on mismatch */
+               if (BITOP(byteset, h[l-1], &)) {
+                       k = l-shift[h[l-1]];
+                       if (k) {
+                               if (k < mem) k = mem;
+                               h += k;
+                               mem = 0;
+                               continue;
+                       }
+               } else {
+                       h += l;
+                       mem = 0;
+                       continue;
+               }
+
+               /* Compare right half */
+               for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++);
+               if (k < l) {
+                       h += k-ms;
+                       mem = 0;
+                       continue;
+               }
+               /* Compare left half */
+               for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
+               if (k <= mem) return (char *)h;
+               h += p;
+               mem = mem0;
+       }
+}
 
 void *
-memmem(const void *l, size_t l_len, const void *s, size_t s_len)
+memmem(const void *h0, size_t k, const void *n0, size_t l)
 {
-       const char *cur, *last;
-       const char *cl = l;
-       const char *cs = s;
-
-       /* a zero length needle should just return the haystack */
-       if (s_len == 0)
-               return (void *)cl;
-
-       /* "s" must be smaller or equal to "l" */
-       if (l_len < s_len)
-               return NULL;
-
-       /* special case where s_len == 1 */
-       if (s_len == 1)
-               return memchr(l, *cs, l_len);
-
-       /* the last position where its possible to find "s" in "l" */
-       last = cl + l_len - s_len;
-
-       for (cur = cl; cur <= last; cur++)
-               if (cur[0] == cs[0] && memcmp(cur, cs, s_len) == 0)
-                       return (void *)cur;
+       const unsigned char *h = h0, *n = n0;
+
+       /* Return immediately on empty needle */
+       if (!l) return (void *)h;
+
+       /* Return immediately when needle is longer than haystack */
+       if (k<l) return 0;
+
+       /* Use faster algorithms for short needles */
+       h = memchr(h0, *n, k);
+       if (!h || l==1) return (void *)h;
+       k -= h - (const unsigned char *)h0;
+       if (k<l) return 0;
+       if (l==2) return twobyte_memmem(h, k, n);
+       if (l==3) return threebyte_memmem(h, k, n);
+       if (l==4) return fourbyte_memmem(h, k, n);
 
-       return NULL;
+       return twoway_memmem(h, h+k, n, l);
 }
 DEF_WEAK(memmem);
Index: string/strstr.c
===================================================================
RCS file: /cvs/src/lib/libc/string/strstr.c,v
retrieving revision 1.8
diff -u -p -r1.8 strstr.c
--- string/strstr.c     30 Apr 2018 07:44:56 -0000      1.8
+++ string/strstr.c     15 Apr 2020 07:35:15 -0000
@@ -24,13 +24,8 @@
  */
 
 #include <string.h>
-#include <stdlib.h>
 #include <stdint.h>
 
-#ifdef DEBUG
-#include <stdio.h>
-#endif
-
 static char *
 twobyte_strstr(const unsigned char *h, const unsigned char *n)
 {
@@ -146,11 +141,8 @@ twoway_strstr(const unsigned char *h, co
                /* Check last byte first; advance by shift on mismatch */
                if (BITOP(byteset, h[l-1], &)) {
                        k = l-shift[h[l-1]];
-#ifdef DEBUG
-                       printf("adv by %zu (on %c) at [%s] (%zu;l=%zu)\n", k, 
h[l-1], h, shift[h[l-1]], l);
-#endif
                        if (k) {
-                               if (mem0 && mem && k < p) k = l-p;
+                               if (k < mem) k = mem;
                                h += k;
                                mem = 0;
                                continue;

Reply via email to