I attach the fix for the bug. Regex is fixed in Paul, thank you.
From 884c46aadbe6a2f7203f84d4173a515ca4ccf8de Mon Sep 17 00:00:00 2001
From: Norihiro Tanaka <[email protected]>
Date: Thu, 24 Sep 2020 10:39:46 +0900
Subject: [PATCH] grep: fix ignore-case Turkish bug
* src/grep.c (fgrep_icase_charlen): Do not assume that converting single-byte
character to upper yields a single-byte character.
* tests/turkish-eyes: Add new test cases for this changes.
---
src/grep.c | 29 +++++++++++++++--------------
tests/turkish-eyes | 16 ++++++++++++++++
2 files changed, 31 insertions(+), 14 deletions(-)
diff --git a/src/grep.c b/src/grep.c
index 1453b14..1efaf3b 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -2310,23 +2310,24 @@ contains_encoding_error (char const *pat, size_t patlen)
static int
fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
{
- int n = localeinfo.sbclen[to_uchar (*pat)];
- if (n < 0)
+ if (!localeinfo.multibyte)
+ return localeinfo.sbclen[to_uchar (*pat)];
+
+ wchar_t wc;
+ size_t wn = mbrtowc (&wc, pat, patlen, mbs);
+ wchar_t folded[CASE_FOLDED_BUFSIZE];
+
+ if (MB_LEN_MAX < wn || case_folded_counterparts (wc, folded))
+ return -1;
+
+ for (int i = wn; 0 < --i; )
{
- wchar_t wc;
- wchar_t folded[CASE_FOLDED_BUFSIZE];
- size_t wn = mbrtowc (&wc, pat, patlen, mbs);
- if (MB_LEN_MAX < wn || case_folded_counterparts (wc, folded))
+ unsigned char c = pat[i];
+ if (toupper (c) != c)
return -1;
- for (int i = wn; 0 < --i; )
- {
- unsigned char c = pat[i];
- if (toupper (c) != c)
- return -1;
- }
- n = wn;
}
- return n;
+
+ return wn;
}
/* Return true if the -F patterns PAT, of size PATLEN, contain only
diff --git a/tests/turkish-eyes b/tests/turkish-eyes
index ba1ea33..d1e7978 100755
--- a/tests/turkish-eyes
+++ b/tests/turkish-eyes
@@ -43,4 +43,20 @@ for opt in -E -F -G; do
compare out in || fail=1
done
+printf "$I\n" > in || framework_failure_
+search_str=i
+
+for opt in -E -F -G; do
+ LC_ALL=$L grep $opt -io "$search_str" in > out || fail=1
+ compare out in || fail=1
+done
+
+printf "$i\n" > in || framework_failure_
+search_str=I
+
+for opt in -E -F -G; do
+ LC_ALL=$L grep $opt -io "$search_str" in > out || fail=1
+ compare out in || fail=1
+done
+
Exit $fail
--
1.7.1