Hi Marc,

Thanks for the additional information. It did help to locate the bug.
Please apply the attached patch.

Regards,
Sergey

>From aa2df3934c858977cd9033af2345f3566d136bd7 Mon Sep 17 00:00:00 2001
From: Sergey Poznyakoff <g...@gnu.org.ua>
Date: Sun, 23 May 2010 14:50:12 +0300
Subject: [PATCH] Fix improper handling of conversion errors in levenshtein.c (debian #582692)

* include/dico/utf8.h (utf8_mbstr_to_wc)
(utf8_mbstr_to_norm_wc): Change signature.
* lib/utf8.c (utf8_mbstr_to_wc)
(utf8_mbstr_to_norm_wc): Take additional return argument.
Return error code. All callers updated.
* lib/levenshtein.c (dico_levenshtein_distance): conv
returns non-zero (not necessarily negative) value on errors.
---
 include/dico/utf8.h |    4 ++--
 lib/levenshtein.c   |    6 +++---
 lib/utf8.c          |   44 +++++++++++++++++++++++---------------------
 3 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/include/dico/utf8.h b/include/dico/utf8.h
index 1f24b78..3462443 100644
--- a/include/dico/utf8.h
+++ b/include/dico/utf8.h
@@ -54,8 +54,8 @@ size_t utf8_wc_hash_string (const unsigned *ws, size_t n_buckets);
 int utf8_wc_strcmp (const unsigned *a, const unsigned *b);
 int utf8_wc_to_mbstr(const unsigned *wordbuf, size_t wordlen, char **sptr);
 
-int utf8_mbstr_to_wc(const char *str, unsigned **wptr);
-int utf8_mbstr_to_norm_wc(const char *str, unsigned **nptr);
+int utf8_mbstr_to_wc(const char *str, unsigned **wptr, size_t *plen);
+int utf8_mbstr_to_norm_wc(const char *str, unsigned **nptr, size_t *plen);
 
 int utf8_quote (const char *str, char **sptr);
 unsigned *utf8_wc_quote (const unsigned *s);
diff --git a/lib/levenshtein.c b/lib/levenshtein.c
index 366df04..125756f 100644
--- a/lib/levenshtein.c
+++ b/lib/levenshtein.c
@@ -42,12 +42,12 @@ dico_levenshtein_distance(const char *astr, const char *bstr, int flags)
     unsigned *row[3];
     int i, j, idx, nrows;
     int dist;
-    int (*conv) (const char *, unsigned **) =
+    int (*conv) (const char *, unsigned **, size_t *) =
 	(flags & DICO_LEV_NORM) ? utf8_mbstr_to_norm_wc : utf8_mbstr_to_wc;
     
-    if (conv(astr, &a) < 0) 
+    if (conv(astr, &a, NULL))
 	return -1;
-    if (conv(bstr, &b) < 0) {
+    if (conv(bstr, &b, NULL)) {
 	free(a);
 	return -1;
     }
diff --git a/lib/utf8.c b/lib/utf8.c
index 105cef6..bfe9951 100644
--- a/lib/utf8.c
+++ b/lib/utf8.c
@@ -1992,33 +1992,33 @@ utf8_wc_to_mbstr(const unsigned *wordbuf, size_t wordlen, char **sptr)
 }
 
 int
-utf8_mbstr_to_wc(const char *str, unsigned **wptr)
+utf8_mbstr_to_wc(const char *str, unsigned **wptr, size_t *plen)
 {
-  size_t sc = strlen(str);
-  size_t len, i;
-  unsigned *w = calloc(sizeof(w[0]), sc+1);
+    ssize_t sc = strlen(str);
+    size_t len, i;
+    unsigned *w = calloc(sizeof(w[0]), sc+1);
 
-  if (!w)
-    return -1;
-  for (i = 0, len = strlen(str); len; i++)
-    {
-      int rc = utf8_mbtowc (w + i, (unsigned char *)str, len);
-      if (rc <= 0)
-	{
-	  free(w);
-	  return -1;
+    if (!w)
+	return -1;
+    for (i = 0, len = strlen(str); len; i++) {
+	int rc = utf8_mbtowc (w + i, (unsigned char *)str, len);
+	if (rc <= 0) {
+	    free(w);
+	    return -1;
 	}
-      str += rc;
-      len -= rc;
+	str += rc;
+	len -= rc;
     }
-  *wptr = w;
-  return sc;
+    *wptr = w;
+    if (plen)
+	*plen = sc;
+    return 0;
 }
 
 #define ISWS(c) ((c)==' '||(c)=='\t'||(c)=='\n')
 
 int
-utf8_mbstr_to_norm_wc(const char *str, unsigned **nptr)
+utf8_mbstr_to_norm_wc(const char *str, unsigned **nptr, size_t *plen)
 {
     int inws = 0;
     size_t len = strlen(str);
@@ -2032,7 +2032,7 @@ utf8_mbstr_to_norm_wc(const char *str, unsigned **nptr)
 	unsigned wc;
 	int rc = utf8_mbtowc(&wc, (unsigned char *)str, len);
 	if (rc <= 0)
-	    return 1;
+	    return -1;
 	str += rc;
 	len -= rc;
 	if (rc == 1 && ISWS(wc)) {
@@ -2047,6 +2047,8 @@ utf8_mbstr_to_norm_wc(const char *str, unsigned **nptr)
     }
     base[i++] = 0;
     *nptr = realloc(base, i * sizeof(base[0]));
+    if (plen)
+      *plen = i;
     return 0;
 }
     
@@ -2056,8 +2058,8 @@ utf8_quote (const char *str, char **sptr)
   int rc;
   unsigned *ws, *ret;
   
-  rc = utf8_mbstr_to_wc (str, &ws);
-  if (rc < 0)
+  rc = utf8_mbstr_to_wc (str, &ws, NULL);
+  if (rc)
     return rc;
   ret = utf8_wc_quote (ws);
   if (ret)
-- 
1.6.0.3

Reply via email to