Date: Sunday, September 15, 2013 @ 15:03:44
  Author: allan
Revision: 194390

upgpkg: glibc 2.18-4

fix CVEs 2012-4412 (strcoll), 2012-4424 (strcoll) and 2013-4432 (malloc)

Added:
  glibc/trunk/glibc-2.18-malloc-corrupt-CVE-2013-4332.patch
  glibc/trunk/glibc-2.18-strcoll-CVE-2012-4412+4424.patch
Modified:
  glibc/trunk/PKGBUILD

-----------------------------------------------+
 PKGBUILD                                      |   17 
 glibc-2.18-malloc-corrupt-CVE-2013-4332.patch |   54 +
 glibc-2.18-strcoll-CVE-2012-4412+4424.patch   | 1004 ++++++++++++++++++++++++
 3 files changed, 1072 insertions(+), 3 deletions(-)

Modified: PKGBUILD
===================================================================
--- PKGBUILD    2013-09-15 12:35:01 UTC (rev 194389)
+++ PKGBUILD    2013-09-15 13:03:44 UTC (rev 194390)
@@ -6,7 +6,7 @@
 
 pkgname=glibc
 pkgver=2.18
-pkgrel=3
+pkgrel=4
 pkgdesc="GNU C Library"
 arch=('i686' 'x86_64')
 url="http://www.gnu.org/software/libc";
@@ -21,6 +21,8 @@
 install=glibc.install
 source=(http://ftp.gnu.org/gnu/libc/${pkgname}-${pkgver}.tar.xz{,.sig}
         glibc-2.18-readdir_r-CVE-2013-4237.patch
+       glibc-2.18-malloc-corrupt-CVE-2013-4332.patch
+       glibc-2.18-strcoll-CVE-2012-4412+4424.patch
         glibc-2.18-strstr-hackfix.patch
         nscd.service
         nscd.tmpfiles
@@ -29,6 +31,8 @@
 md5sums=('88fbbceafee809e82efd52efa1e3c58f'
          'SKIP'
          '154da6bf5a5248f42a7bf5bf08e01a47'
+         'b79561ab9dce900e9bbeaf0d49927c2b'
+         'c7264b99d0f7e51922a4d3126182c40a'
          '4441f6dfe7d75ced1fa75e54dd21d36e'
          'd5fab2cd3abea65aa5ae696ea4a47d6b'
          'da662ca76e7c8d7efbc7986ab7acea2d'
@@ -35,13 +39,20 @@
          '07ac979b6ab5eeb778d55f041529d623'
          '476e9113489f93b348b21e144b6a8fcf')
 
-
 prepare() {
   cd ${srcdir}/${pkgname}-${pkgver}
   
   # upstream commit 91ce4085
   patch -p1 -i $srcdir/glibc-2.18-readdir_r-CVE-2013-4237.patch
-  
+
+  # upstream commits 1159a193, 55e17aad and b73ed247
+  patch -p1 -i $srcdir/glibc-2.18-malloc-corrupt-CVE-2013-4332.patch
+
+  # upstream commit 1326ba1a and two not yet committed patches
+  # https://sourceware.org/ml/libc-alpha/2013-08/msg00394.html
+  # https://sourceware.org/ml/libc-alpha/2013-08/msg00462.html
+  patch -p1 -i $srcdir/glibc-2.18-strcoll-CVE-2012-4412+4424.patch
+
   # hack fix for strstr issues on x86
   patch -p1 -i $srcdir/glibc-2.18-strstr-hackfix.patch
 

Added: glibc-2.18-malloc-corrupt-CVE-2013-4332.patch
===================================================================
--- glibc-2.18-malloc-corrupt-CVE-2013-4332.patch                               
(rev 0)
+++ glibc-2.18-malloc-corrupt-CVE-2013-4332.patch       2013-09-15 13:03:44 UTC 
(rev 194390)
@@ -0,0 +1,54 @@
+diff --git a/malloc/malloc.c b/malloc/malloc.c
+index dd295f5..7f43ba3 100644
+--- a/malloc/malloc.c
++++ b/malloc/malloc.c
+@@ -3082,6 +3082,13 @@ __libc_pvalloc(size_t bytes)
+   size_t page_mask = GLRO(dl_pagesize) - 1;
+   size_t rounded_bytes = (bytes + page_mask) & ~(page_mask);
+ 
++  /* Check for overflow.  */
++  if (bytes > SIZE_MAX - 2*pagesz - MINSIZE)
++    {
++      __set_errno (ENOMEM);
++      return 0;
++    }
++
+   void *(*hook) (size_t, size_t, const void *) =
+     force_reg (__memalign_hook);
+   if (__builtin_expect (hook != NULL, 0))
+diff --git a/malloc/malloc.c b/malloc/malloc.c
+index 7f43ba3..3148c5f 100644
+--- a/malloc/malloc.c
++++ b/malloc/malloc.c
+@@ -3046,6 +3046,13 @@ __libc_valloc(size_t bytes)
+ 
+   size_t pagesz = GLRO(dl_pagesize);
+ 
++  /* Check for overflow.  */
++  if (bytes > SIZE_MAX - pagesz - MINSIZE)
++    {
++      __set_errno (ENOMEM);
++      return 0;
++    }
++
+   void *(*hook) (size_t, size_t, const void *) =
+     force_reg (__memalign_hook);
+   if (__builtin_expect (hook != NULL, 0))
+diff --git a/malloc/malloc.c b/malloc/malloc.c
+index 3148c5f..f7718a9 100644
+--- a/malloc/malloc.c
++++ b/malloc/malloc.c
+@@ -3015,6 +3015,13 @@ __libc_memalign(size_t alignment, size_t bytes)
+   /* Otherwise, ensure that it is at least a minimum chunk size */
+   if (alignment <  MINSIZE) alignment = MINSIZE;
+ 
++  /* Check for overflow.  */
++  if (bytes > SIZE_MAX - alignment - MINSIZE)
++    {
++      __set_errno (ENOMEM);
++      return 0;
++    }
++
+   arena_get(ar_ptr, bytes + alignment + MINSIZE);
+   if(!ar_ptr)
+     return 0;

Added: glibc-2.18-strcoll-CVE-2012-4412+4424.patch
===================================================================
--- glibc-2.18-strcoll-CVE-2012-4412+4424.patch                         (rev 0)
+++ glibc-2.18-strcoll-CVE-2012-4412+4424.patch 2013-09-15 13:03:44 UTC (rev 
194390)
@@ -0,0 +1,1004 @@
+diff --git a/string/strcoll_l.c b/string/strcoll_l.c
+index ecda08f..bb34a72 100644
+--- a/string/strcoll_l.c
++++ b/string/strcoll_l.c
+@@ -41,11 +41,434 @@
+ 
+ #include "../locale/localeinfo.h"
+ 
++/* Track status while looking for sequences in a string.  */
++typedef struct
++{
++  int len;                    /* Length of the current sequence.  */
++  size_t val;                 /* Position of the sequence relative to the
++                                 previous non-ignored sequence.  */
++  size_t idxnow;              /* Current index in sequences.  */
++  size_t idxmax;              /* Maximum index in sequences.  */
++  size_t idxcnt;              /* Current count of indices.  */
++  size_t backw;                       /* Current Backward sequence index.  */
++  size_t backw_stop;          /* Index where the backward sequences stop.  */
++  const USTRING_TYPE *us;     /* The string.  */
++  int32_t *idxarr;            /* Array to cache weight indices.  */
++  unsigned char *rulearr;     /* Array to cache rules.  */
++  unsigned char rule;         /* Saved rule for the first sequence.  */
++  int32_t idx;                        /* Index to weight of the current 
sequence.  */
++  int32_t save_idx;           /* Save looked up index of a forward
++                                 sequence after the last backward
++                                 sequence.  */
++  const USTRING_TYPE *back_us;        /* Beginning of the backward sequence.  
*/
++} coll_seq;
++
++/* Get next sequence.  The weight indices are cached, so we don't need to
++   traverse the string.  */
++static void
++get_next_seq_cached (coll_seq *seq, int nrules, int pass,
++                   const unsigned char *rulesets,
++                   const USTRING_TYPE *weights)
++{
++  size_t val = seq->val = 0;
++  int len = seq->len;
++  size_t backw_stop = seq->backw_stop;
++  size_t backw = seq->backw;
++  size_t idxcnt = seq->idxcnt;
++  size_t idxmax = seq->idxmax;
++  size_t idxnow = seq->idxnow;
++  unsigned char *rulearr = seq->rulearr;
++  int32_t *idxarr = seq->idxarr;
++
++  while (len == 0)
++    {
++      ++val;
++      if (backw_stop != ~0ul)
++      {
++        /* There is something pushed.  */
++        if (backw == backw_stop)
++          {
++            /* The last pushed character was handled.  Continue
++               with forward characters.  */
++            if (idxcnt < idxmax)
++              {
++                idxnow = idxcnt;
++                backw_stop = ~0ul;
++              }
++            else
++              {
++                /* Nothing any more.  The backward sequence
++                   ended with the last sequence in the string.  */
++                idxnow = ~0ul;
++                break;
++              }
++          }
++        else
++          idxnow = --backw;
++      }
++      else
++      {
++        backw_stop = idxcnt;
++
++        while (idxcnt < idxmax)
++          {
++            if ((rulesets[rulearr[idxcnt] * nrules + pass]
++                 & sort_backward) == 0)
++              /* No more backward characters to push.  */
++              break;
++            ++idxcnt;
++          }
++
++        if (backw_stop == idxcnt)
++          {
++            /* No sequence at all or just one.  */
++            if (idxcnt == idxmax)
++              /* Note that LEN is still zero.  */
++              break;
++
++            backw_stop = ~0ul;
++            idxnow = idxcnt++;
++          }
++        else
++          /* We pushed backward sequences.  */
++          idxnow = backw = idxcnt - 1;
++      }
++      len = weights[idxarr[idxnow]++];
++    }
++
++  /* Update the structure.  */
++  seq->val = val;
++  seq->len = len;
++  seq->backw_stop = backw_stop;
++  seq->backw = backw;
++  seq->idxcnt = idxcnt;
++  seq->idxnow = idxnow;
++}
++
++/* Get next sequence.  Traverse the string as required.  */
++static void
++get_next_seq (coll_seq *seq, int nrules, const unsigned char *rulesets,
++            const USTRING_TYPE *weights, const int32_t *table,
++            const USTRING_TYPE *extra, const int32_t *indirect)
++{
++#include WEIGHT_H
++  size_t val = seq->val = 0;
++  int len = seq->len;
++  size_t backw_stop = seq->backw_stop;
++  size_t backw = seq->backw;
++  size_t idxcnt = seq->idxcnt;
++  size_t idxmax = seq->idxmax;
++  size_t idxnow = seq->idxnow;
++  unsigned char *rulearr = seq->rulearr;
++  int32_t *idxarr = seq->idxarr;
++  const USTRING_TYPE *us = seq->us;
++
++  while (len == 0)
++    {
++      ++val;
++      if (backw_stop != ~0ul)
++      {
++        /* There is something pushed.  */
++        if (backw == backw_stop)
++          {
++            /* The last pushed character was handled.  Continue
++               with forward characters.  */
++            if (idxcnt < idxmax)
++              {
++                idxnow = idxcnt;
++                backw_stop = ~0ul;
++              }
++            else
++              /* Nothing any more.  The backward sequence ended with
++                 the last sequence in the string.  Note that LEN
++                 is still zero.  */
++              break;
++          }
++        else
++          idxnow = --backw;
++      }
++      else
++      {
++        backw_stop = idxmax;
++
++        while (*us != L('\0'))
++          {
++            int32_t tmp = findidx (&us, -1);
++            rulearr[idxmax] = tmp >> 24;
++            idxarr[idxmax] = tmp & 0xffffff;
++            idxcnt = idxmax++;
++
++            if ((rulesets[rulearr[idxcnt] * nrules]
++                 & sort_backward) == 0)
++              /* No more backward characters to push.  */
++              break;
++            ++idxcnt;
++          }
++
++        if (backw_stop >= idxcnt)
++          {
++            /* No sequence at all or just one.  */
++            if (idxcnt == idxmax || backw_stop > idxcnt)
++              /* Note that LEN is still zero.  */
++              break;
++
++            backw_stop = ~0ul;
++            idxnow = idxcnt;
++          }
++        else
++          /* We pushed backward sequences.  */
++          idxnow = backw = idxcnt - 1;
++      }
++      len = weights[idxarr[idxnow]++];
++    }
++
++  /* Update the structure.  */
++  seq->val = val;
++  seq->len = len;
++  seq->backw_stop = backw_stop;
++  seq->backw = backw;
++  seq->idxcnt = idxcnt;
++  seq->idxmax = idxmax;
++  seq->idxnow = idxnow;
++  seq->us = us;
++}
++
++/* Get next sequence.  Traverse the string as required.  This function does 
not
++   set or use any index or rule cache.  */
++static void
++get_next_seq_nocache (coll_seq *seq, int nrules, const unsigned char 
*rulesets,
++                    const USTRING_TYPE *weights, const int32_t *table,
++                    const USTRING_TYPE *extra, const int32_t *indirect,
++                    int pass)
++{
++#include WEIGHT_H
++  size_t val = seq->val = 0;
++  int len = seq->len;
++  size_t backw_stop = seq->backw_stop;
++  size_t backw = seq->backw;
++  size_t idxcnt = seq->idxcnt;
++  size_t idxmax = seq->idxmax;
++  int32_t idx = seq->idx;
++  const USTRING_TYPE *us = seq->us;
++
++  while (len == 0)
++    {
++      ++val;
++      if (backw_stop != ~0ul)
++      {
++        /* There is something pushed.  */
++        if (backw == backw_stop)
++          {
++            /* The last pushed character was handled.  Continue
++               with forward characters.  */
++            if (idxcnt < idxmax)
++              {
++                idx = seq->save_idx;
++                backw_stop = ~0ul;
++              }
++            else
++              {
++                /* Nothing anymore.  The backward sequence ended with
++                   the last sequence in the string.  Note that len is
++                   still zero.  */
++                idx = 0;
++                break;
++              }
++          }
++        else
++          {
++            /* XXX Traverse BACKW sequences from the beginning of
++               BACKW_STOP to get the next sequence.  Is ther a quicker way
++               to do this?  */
++            size_t i = backw_stop;
++            us = seq->back_us;
++            while (i < backw)
++              {
++                int32_t tmp = findidx (&us, -1);
++                idx = tmp & 0xffffff;
++                i++;
++              }
++            --backw;
++            us = seq->us;
++          }
++      }
++      else
++      {
++        backw_stop = idxmax;
++        int32_t prev_idx = idx;
++
++        while (*us != L('\0'))
++          {
++            int32_t tmp = findidx (&us, -1);
++            unsigned char rule = tmp >> 24;
++            prev_idx = idx;
++            idx = tmp & 0xffffff;
++            idxcnt = idxmax++;
++
++            /* Save the rule for the first sequence.  */
++            if (__glibc_unlikely (idxcnt == 0))
++              seq->rule = rule;
++
++            if ((rulesets[rule * nrules + pass]
++                 & sort_backward) == 0)
++              /* No more backward characters to push.  */
++              break;
++            ++idxcnt;
++          }
++
++        if (backw_stop >= idxcnt)
++          {
++            /* No sequence at all or just one.  */
++            if (idxcnt == idxmax || backw_stop > idxcnt)
++              /* Note that len is still zero.  */
++              break;
++
++            backw_stop = ~0ul;
++          }
++        else
++          {
++            /* We pushed backward sequences.  If the stream ended with the
++               backward sequence, then we process the last sequence we
++               found.  Otherwise we process the sequence before the last
++               one since the last one was a forward sequence.  */
++            seq->back_us = seq->us;
++            seq->us = us;
++            backw = idxcnt;
++            if (idxmax > idxcnt)
++              {
++                backw--;
++                seq->save_idx = idx;
++                idx = prev_idx;
++              }
++            if (backw > backw_stop)
++              backw--;
++          }
++      }
++
++      len = weights[idx++];
++      /* Skip over indices of previous levels.  */
++      for (int i = 0; i < pass; i++)
++      {
++        idx += len;
++        len = weights[idx];
++        idx++;
++      }
++    }
++
++  /* Update the structure.  */
++  seq->val = val;
++  seq->len = len;
++  seq->backw_stop = backw_stop;
++  seq->backw = backw;
++  seq->idxcnt = idxcnt;
++  seq->idxmax = idxmax;
++  seq->us = us;
++  seq->idx = idx;
++}
++
++/* Compare two sequences.  This version does not use the index and rules
++   cache.  */
++static int
++do_compare_nocache (coll_seq *seq1, coll_seq *seq2, int position,
++                  const USTRING_TYPE *weights)
++{
++  int seq1len = seq1->len;
++  int seq2len = seq2->len;
++  size_t val1 = seq1->val;
++  size_t val2 = seq2->val;
++  int idx1 = seq1->idx;
++  int idx2 = seq2->idx;
++  int result = 0;
++
++  /* Test for position if necessary.  */
++  if (position && val1 != val2)
++    {
++      result = val1 > val2 ? 1 : -1;
++      goto out;
++    }
++
++  /* Compare the two sequences.  */
++  do
++    {
++      if (weights[idx1] != weights[idx2])
++      {
++        /* The sequences differ.  */
++        result = weights[idx1] - weights[idx2];
++        goto out;
++      }
++
++      /* Increment the offsets.  */
++      ++idx1;
++      ++idx2;
++
++      --seq1len;
++      --seq2len;
++    }
++  while (seq1len > 0 && seq2len > 0);
++
++  if (position && seq1len != seq2len)
++    result = seq1len - seq2len;
++
++out:
++  seq1->len = seq1len;
++  seq2->len = seq2len;
++  seq1->idx = idx1;
++  seq2->idx = idx2;
++  return result;
++}
++
++/* Compare two sequences using the index cache.  */
++static int
++do_compare (coll_seq *seq1, coll_seq *seq2, int position,
++          const USTRING_TYPE *weights)
++{
++  int seq1len = seq1->len;
++  int seq2len = seq2->len;
++  size_t val1 = seq1->val;
++  size_t val2 = seq2->val;
++  int32_t *idx1arr = seq1->idxarr;
++  int32_t *idx2arr = seq2->idxarr;
++  int idx1now = seq1->idxnow;
++  int idx2now = seq2->idxnow;
++  int result = 0;
++
++  /* Test for position if necessary.  */
++  if (position && val1 != val2)
++    {
++      result = val1 > val2 ? 1 : -1;
++      goto out;
++    }
++
++  /* Compare the two sequences.  */
++  do
++    {
++      if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]])
++      {
++        /* The sequences differ.  */
++        result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]];
++        goto out;
++      }
++
++      /* Increment the offsets.  */
++      ++idx1arr[idx1now];
++      ++idx2arr[idx2now];
++
++      --seq1len;
++      --seq2len;
++    }
++  while (seq1len > 0 && seq2len > 0);
++
++  if (position && seq1len != seq2len)
++    result = seq1len - seq2len;
++
++out:
++  seq1->len = seq1len;
++  seq2->len = seq2len;
++  return result;
++}
++
+ int
+-STRCOLL (s1, s2, l)
+-     const STRING_TYPE *s1;
+-     const STRING_TYPE *s2;
+-     __locale_t l;
++STRCOLL (const STRING_TYPE *s1, const STRING_TYPE *s2, __locale_t l)
+ {
+   struct __locale_data *current = l->__locales[LC_COLLATE];
+   uint_fast32_t nrules = current->values[_NL_ITEM_INDEX 
(_NL_COLLATE_NRULES)].word;
+@@ -56,34 +479,6 @@ STRCOLL (s1, s2, l)
+   const USTRING_TYPE *weights;
+   const USTRING_TYPE *extra;
+   const int32_t *indirect;
+-  uint_fast32_t pass;
+-  int result = 0;
+-  const USTRING_TYPE *us1;
+-  const USTRING_TYPE *us2;
+-  size_t s1len;
+-  size_t s2len;
+-  int32_t *idx1arr;
+-  int32_t *idx2arr;
+-  unsigned char *rule1arr;
+-  unsigned char *rule2arr;
+-  size_t idx1max;
+-  size_t idx2max;
+-  size_t idx1cnt;
+-  size_t idx2cnt;
+-  size_t idx1now;
+-  size_t idx2now;
+-  size_t backw1_stop;
+-  size_t backw2_stop;
+-  size_t backw1;
+-  size_t backw2;
+-  int val1;
+-  int val2;
+-  int position;
+-  int seq1len;
+-  int seq2len;
+-  int use_malloc;
+-
+-#include WEIGHT_H
+ 
+   if (nrules == 0)
+     return STRCMP (s1, s2);
+@@ -98,7 +493,6 @@ STRCOLL (s1, s2, l)
+     current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
+   indirect = (const int32_t *)
+     current->values[_NL_ITEM_INDEX 
(CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
+-  use_malloc = 0;
+ 
+   assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
+   assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
+@@ -106,18 +500,13 @@ STRCOLL (s1, s2, l)
+   assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
+ 
+   /* We need this a few times.  */
+-  s1len = STRLEN (s1);
+-  s2len = STRLEN (s2);
++  size_t s1len = STRLEN (s1);
++  size_t s2len = STRLEN (s2);
+ 
+   /* Catch empty strings.  */
+-  if (__builtin_expect (s1len == 0, 0) || __builtin_expect (s2len == 0, 0))
++  if (__glibc_unlikely (s1len == 0) || __glibc_unlikely (s2len == 0))
+     return (s1len != 0) - (s2len != 0);
+ 
+-  /* We need the elements of the strings as unsigned values since they
+-     are used as indeces.  */
+-  us1 = (const USTRING_TYPE *) s1;
+-  us2 = (const USTRING_TYPE *) s2;
+-
+   /* Perform the first pass over the string and while doing this find
+      and store the weights for each character.  Since we want this to
+      be as fast as possible we are using `alloca' to store the temporary
+@@ -127,411 +516,124 @@ STRCOLL (s1, s2, l)
+ 
+      Please note that the localedef programs makes sure that `position'
+      is not used at the first level.  */
+-  if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1)))
+-    {
+-      idx1arr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 1));
+-      idx2arr = &idx1arr[s1len];
+-      rule1arr = (unsigned char *) &idx2arr[s2len];
+-      rule2arr = &rule1arr[s1len];
+-
+-      if (idx1arr == NULL)
+-      /* No memory.  Well, go with the stack then.
+-
+-         XXX Once this implementation is stable we will handle this
+-         differently.  Instead of precomputing the indeces we will
+-         do this in time.  This means, though, that this happens for
+-         every pass again.  */
+-      goto try_stack;
+-      use_malloc = 1;
+-    }
+-  else
+-    {
+-    try_stack:
+-      idx1arr = (int32_t *) alloca (s1len * sizeof (int32_t));
+-      idx2arr = (int32_t *) alloca (s2len * sizeof (int32_t));
+-      rule1arr = (unsigned char *) alloca (s1len);
+-      rule2arr = (unsigned char *) alloca (s2len);
+-    }
+ 
+-  idx1cnt = 0;
+-  idx2cnt = 0;
+-  idx1max = 0;
+-  idx2max = 0;
+-  idx1now = 0;
+-  idx2now = 0;
+-  backw1_stop = ~0ul;
+-  backw2_stop = ~0ul;
+-  backw1 = ~0ul;
+-  backw2 = ~0ul;
+-  seq1len = 0;
+-  seq2len = 0;
+-  position = rulesets[0] & sort_position;
+-  while (1)
+-    {
+-      val1 = 0;
+-      val2 = 0;
+-
+-      /* Get the next non-IGNOREd element for string `s1'.  */
+-      if (seq1len == 0)
+-      do
+-        {
+-          ++val1;
+-
+-          if (backw1_stop != ~0ul)
+-            {
+-              /* The is something pushed.  */
+-              if (backw1 == backw1_stop)
+-                {
+-                  /* The last pushed character was handled.  Continue
+-                     with forward characters.  */
+-                  if (idx1cnt < idx1max)
+-                    {
+-                      idx1now = idx1cnt;
+-                      backw1_stop = ~0ul;
+-                    }
+-                  else
+-                    /* Nothing anymore.  The backward sequence ended with
+-                       the last sequence in the string.  Note that seq1len
+-                       is still zero.  */
+-                    break;
+-                }
+-              else
+-                idx1now = --backw1;
+-            }
+-          else
+-            {
+-              backw1_stop = idx1max;
+-
+-              while (*us1 != L('\0'))
+-                {
+-                  int32_t tmp = findidx (&us1, -1);
+-                  rule1arr[idx1max] = tmp >> 24;
+-                  idx1arr[idx1max] = tmp & 0xffffff;
+-                  idx1cnt = idx1max++;
+-
+-                  if ((rulesets[rule1arr[idx1cnt] * nrules]
+-                       & sort_backward) == 0)
+-                    /* No more backward characters to push.  */
+-                    break;
+-                  ++idx1cnt;
+-                }
+-
+-              if (backw1_stop >= idx1cnt)
+-                {
+-                  /* No sequence at all or just one.  */
+-                  if (idx1cnt == idx1max || backw1_stop > idx1cnt)
+-                    /* Note that seq1len is still zero.  */
+-                    break;
+-
+-                  backw1_stop = ~0ul;
+-                  idx1now = idx1cnt;
+-                }
+-              else
+-                /* We pushed backward sequences.  */
+-                idx1now = backw1 = idx1cnt - 1;
+-            }
+-        }
+-      while ((seq1len = weights[idx1arr[idx1now]++]) == 0);
+-
+-      /* And the same for string `s2'.  */
+-      if (seq2len == 0)
+-      do
+-        {
+-          ++val2;
+-
+-          if (backw2_stop != ~0ul)
+-            {
+-              /* The is something pushed.  */
+-              if (backw2 == backw2_stop)
+-                {
+-                  /* The last pushed character was handled.  Continue
+-                     with forward characters.  */
+-                  if (idx2cnt < idx2max)
+-                    {
+-                      idx2now = idx2cnt;
+-                      backw2_stop = ~0ul;
+-                    }
+-                  else
+-                    /* Nothing anymore.  The backward sequence ended with
+-                       the last sequence in the string.  Note that seq2len
+-                       is still zero.  */
+-                    break;
+-                }
+-              else
+-                idx2now = --backw2;
+-            }
+-          else
+-            {
+-              backw2_stop = idx2max;
+-
+-              while (*us2 != L('\0'))
+-                {
+-                  int32_t tmp = findidx (&us2, -1);
+-                  rule2arr[idx2max] = tmp >> 24;
+-                  idx2arr[idx2max] = tmp & 0xffffff;
+-                  idx2cnt = idx2max++;
+-
+-                  if ((rulesets[rule2arr[idx2cnt] * nrules]
+-                       & sort_backward) == 0)
+-                    /* No more backward characters to push.  */
+-                    break;
+-                  ++idx2cnt;
+-                }
+-
+-              if (backw2_stop >= idx2cnt)
+-                {
+-                  /* No sequence at all or just one.  */
+-                  if (idx2cnt == idx2max || backw2_stop > idx2cnt)
+-                    /* Note that seq1len is still zero.  */
+-                    break;
+-
+-                  backw2_stop = ~0ul;
+-                  idx2now = idx2cnt;
+-                }
+-              else
+-                /* We pushed backward sequences.  */
+-                idx2now = backw2 = idx2cnt - 1;
+-            }
+-        }
+-      while ((seq2len = weights[idx2arr[idx2now]++]) == 0);
+-
+-      /* See whether any or both strings are empty.  */
+-      if (seq1len == 0 || seq2len == 0)
+-      {
+-        if (seq1len == seq2len)
+-          /* Both ended.  So far so good, both strings are equal at the
+-             first level.  */
+-          break;
+-
+-        /* This means one string is shorter than the other.  Find out
+-           which one and return an appropriate value.  */
+-        result = seq1len == 0 ? -1 : 1;
+-        goto free_and_return;
+-      }
++  coll_seq seq1, seq2;
++  bool use_malloc = false;
++  int result = 0;
+ 
+-      /* Test for position if necessary.  */
+-      if (position && val1 != val2)
+-      {
+-        result = val1 - val2;
+-        goto free_and_return;
+-      }
++  memset (&seq1, 0, sizeof (seq1));
++  seq2 = seq1;
+ 
+-      /* Compare the two sequences.  */
+-      do
+-      {
+-        if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]])
+-          {
+-            /* The sequences differ.  */
+-            result = weights[idx1arr[idx1now]] - weights[idx2arr[idx2now]];
+-            goto free_and_return;
+-          }
++  size_t size_max = SIZE_MAX / (sizeof (int32_t) + 1);
+ 
+-        /* Increment the offsets.  */
+-        ++idx1arr[idx1now];
+-        ++idx2arr[idx2now];
++  /* If the strings are long enough to cause overflow in the size request, 
then
++     skip the allocation and proceed with the non-cached routines.  */
++  if (MIN (s1len, s2len) > size_max
++      || MAX (s1len, s2len) > size_max - MIN (s1len, s2len))
++    goto begin_collate;
+ 
+-        --seq1len;
+-        --seq2len;
+-      }
+-      while (seq1len > 0 && seq2len > 0);
++  if (! __libc_use_alloca ((s1len + s2len) * (sizeof (int32_t) + 1)))
++    {
++      seq1.idxarr = (int32_t *) malloc ((s1len + s2len) * (sizeof (int32_t) + 
1));
+ 
+-      if (position && seq1len != seq2len)
++      /* If we failed to allocate memory, we leave everything as NULL so that
++       we use the nocache version of traversal and comparison functions.  */
++      if (seq1.idxarr != NULL)
+       {
+-        result = seq1len - seq2len;
+-        goto free_and_return;
++        seq2.idxarr = &seq1.idxarr[s1len];
++        seq1.rulearr = (unsigned char *) &seq2.idxarr[s2len];
++        seq2.rulearr = &seq1.rulearr[s1len];
++        use_malloc = true;
+       }
+     }
++  else
++    {
++      seq1.idxarr = (int32_t *) alloca (s1len * sizeof (int32_t));
++      seq2.idxarr = (int32_t *) alloca (s2len * sizeof (int32_t));
++      seq1.rulearr = (unsigned char *) alloca (s1len);
++      seq2.rulearr = (unsigned char *) alloca (s2len);
++    }
+ 
+-  /* Now the remaining passes over the weights.  We now use the
+-     indeces we found before.  */
+-  for (pass = 1; pass < nrules; ++pass)
++  int rule;
++
++ begin_collate:
++  rule = 0;
++  /* Cache values in the first pass and if needed, use them in subsequent
++     passes.  */
++  for (int pass = 0; pass < nrules; ++pass)
+     {
++      seq1.idxcnt = 0;
++      seq1.idx = 0;
++      seq2.idx = 0;
++      seq1.backw_stop = ~0ul;
++      seq1.backw = ~0ul;
++      seq2.idxcnt = 0;
++      seq2.backw_stop = ~0ul;
++      seq2.backw = ~0ul;
++
++      /* We need the elements of the strings as unsigned values since they
++       are used as indices.  */
++      seq1.us = (const USTRING_TYPE *) s1;
++      seq2.us = (const USTRING_TYPE *) s2;
++
+       /* We assume that if a rule has defined `position' in one section
+        this is true for all of them.  */
+-      idx1cnt = 0;
+-      idx2cnt = 0;
+-      backw1_stop = ~0ul;
+-      backw2_stop = ~0ul;
+-      backw1 = ~0ul;
+-      backw2 = ~0ul;
+-      position = rulesets[rule1arr[0] * nrules + pass] & sort_position;
++      int position = rulesets[rule * nrules + pass] & sort_position;
+ 
+       while (1)
+       {
+-        val1 = 0;
+-        val2 = 0;
+-
+-        /* Get the next non-IGNOREd element for string `s1'.  */
+-        if (seq1len == 0)
+-          do
+-            {
+-              ++val1;
+-
+-              if (backw1_stop != ~0ul)
+-                {
+-                  /* The is something pushed.  */
+-                  if (backw1 == backw1_stop)
+-                    {
+-                      /* The last pushed character was handled.  Continue
+-                         with forward characters.  */
+-                      if (idx1cnt < idx1max)
+-                        {
+-                          idx1now = idx1cnt;
+-                          backw1_stop = ~0ul;
+-                        }
+-                      else
+-                        {
+-                          /* Nothing anymore.  The backward sequence
+-                             ended with the last sequence in the string.  */
+-                          idx1now = ~0ul;
+-                          break;
+-                        }
+-                    }
+-                  else
+-                    idx1now = --backw1;
+-                }
+-              else
+-                {
+-                  backw1_stop = idx1cnt;
+-
+-                  while (idx1cnt < idx1max)
+-                    {
+-                      if ((rulesets[rule1arr[idx1cnt] * nrules + pass]
+-                           & sort_backward) == 0)
+-                        /* No more backward characters to push.  */
+-                        break;
+-                      ++idx1cnt;
+-                    }
+-
+-                  if (backw1_stop == idx1cnt)
+-                    {
+-                      /* No sequence at all or just one.  */
+-                      if (idx1cnt == idx1max)
+-                        /* Note that seq1len is still zero.  */
+-                        break;
+-
+-                      backw1_stop = ~0ul;
+-                      idx1now = idx1cnt++;
+-                    }
+-                  else
+-                    /* We pushed backward sequences.  */
+-                    idx1now = backw1 = idx1cnt - 1;
+-                }
+-            }
+-          while ((seq1len = weights[idx1arr[idx1now]++]) == 0);
+-
+-        /* And the same for string `s2'.  */
+-        if (seq2len == 0)
+-          do
+-            {
+-              ++val2;
+-
+-              if (backw2_stop != ~0ul)
+-                {
+-                  /* The is something pushed.  */
+-                  if (backw2 == backw2_stop)
+-                    {
+-                      /* The last pushed character was handled.  Continue
+-                         with forward characters.  */
+-                      if (idx2cnt < idx2max)
+-                        {
+-                          idx2now = idx2cnt;
+-                          backw2_stop = ~0ul;
+-                        }
+-                      else
+-                        {
+-                          /* Nothing anymore.  The backward sequence
+-                             ended with the last sequence in the string.  */
+-                          idx2now = ~0ul;
+-                          break;
+-                        }
+-                    }
+-                  else
+-                    idx2now = --backw2;
+-                }
+-              else
+-                {
+-                  backw2_stop = idx2cnt;
+-
+-                  while (idx2cnt < idx2max)
+-                    {
+-                      if ((rulesets[rule2arr[idx2cnt] * nrules + pass]
+-                           & sort_backward) == 0)
+-                        /* No more backward characters to push.  */
+-                        break;
+-                      ++idx2cnt;
+-                    }
+-
+-                  if (backw2_stop == idx2cnt)
+-                    {
+-                      /* No sequence at all or just one.  */
+-                      if (idx2cnt == idx2max)
+-                        /* Note that seq2len is still zero.  */
+-                        break;
+-
+-                      backw2_stop = ~0ul;
+-                      idx2now = idx2cnt++;
+-                    }
+-                  else
+-                    /* We pushed backward sequences.  */
+-                    idx2now = backw2 = idx2cnt - 1;
+-                }
+-            }
+-          while ((seq2len = weights[idx2arr[idx2now]++]) == 0);
++        if (__glibc_unlikely (seq1.idxarr == NULL))
++          {
++            get_next_seq_nocache (&seq1, nrules, rulesets, weights, table,
++                                  extra, indirect, pass);
++            get_next_seq_nocache (&seq2, nrules, rulesets, weights, table,
++                                  extra, indirect, pass);
++          }
++        else if (pass == 0)
++          {
++            get_next_seq (&seq1, nrules, rulesets, weights, table, extra,
++                          indirect);
++            get_next_seq (&seq2, nrules, rulesets, weights, table, extra,
++                          indirect);
++          }
++        else
++          {
++            get_next_seq_cached (&seq1, nrules, pass, rulesets, weights);
++            get_next_seq_cached (&seq2, nrules, pass, rulesets, weights);
++          }
+ 
+         /* See whether any or both strings are empty.  */
+-        if (seq1len == 0 || seq2len == 0)
++        if (seq1.len == 0 || seq2.len == 0)
+           {
+-            if (seq1len == seq2len)
++            if (seq1.len == seq2.len)
+               /* Both ended.  So far so good, both strings are equal
+                  at this level.  */
+               break;
+ 
+             /* This means one string is shorter than the other.  Find out
+                which one and return an appropriate value.  */
+-            result = seq1len == 0 ? -1 : 1;
++            result = seq1.len == 0 ? -1 : 1;
+             goto free_and_return;
+           }
+ 
+-        /* Test for position if necessary.  */
+-        if (position && val1 != val2)
+-          {
+-            result = val1 - val2;
+-            goto free_and_return;
+-          }
+-
+-        /* Compare the two sequences.  */
+-        do
+-          {
+-            if (weights[idx1arr[idx1now]] != weights[idx2arr[idx2now]])
+-              {
+-                /* The sequences differ.  */
+-                result = (weights[idx1arr[idx1now]]
+-                          - weights[idx2arr[idx2now]]);
+-                goto free_and_return;
+-              }
+-
+-            /* Increment the offsets.  */
+-            ++idx1arr[idx1now];
+-            ++idx2arr[idx2now];
+-
+-            --seq1len;
+-            --seq2len;
+-          }
+-        while (seq1len > 0 && seq2len > 0);
+-
+-        if (position && seq1len != seq2len)
+-          {
+-            result = seq1len - seq2len;
+-            goto free_and_return;
+-          }
++        if (__glibc_unlikely (seq1.idxarr == NULL))
++          result = do_compare_nocache (&seq1, &seq2, position, weights);
++        else
++          result = do_compare (&seq1, &seq2, position, weights);
++        if (result != 0)
++          goto free_and_return;
+       }
++
++      if (__glibc_likely (seq1.rulearr != NULL))
++      rule = seq1.rulearr[0];
++      else
++      rule = seq1.rule;
+     }
+ 
+   /* Free the memory if needed.  */
+  free_and_return:
+   if (use_malloc)
+-    free (idx1arr);
++    free (seq1.idxarr);
+ 
+   return result;
+ }

Reply via email to