Log message for revision 108734: Treat fullwidth space characters defined in Unicode as valid whitespace. Patch by Manabu TERADA.
Changed: U Zope/trunk/src/Products/ZCTextIndex/QueryParser.py U Zope/trunk/src/Products/ZCTextIndex/tests/testQueryParser.py -=- Modified: Zope/trunk/src/Products/ZCTextIndex/QueryParser.py =================================================================== --- Zope/trunk/src/Products/ZCTextIndex/QueryParser.py 2010-02-03 14:15:23 UTC (rev 108733) +++ Zope/trunk/src/Products/ZCTextIndex/QueryParser.py 2010-02-03 15:13:48 UTC (rev 108734) @@ -94,6 +94,11 @@ ) """, re.VERBOSE) +# Use unicode regex to treat fullwidth space characters defined in Unicode +# as valid whitespace. +_tokenizer_unicode_regex = re.compile( + _tokenizer_regex.pattern, _tokenizer_regex.flags|re.UNICODE) + class QueryParser: implements(IQueryParser) @@ -109,7 +114,13 @@ def parseQuery(self, query): # Lexical analysis. - tokens = _tokenizer_regex.findall(query) + try: + # Try to use unicode and treat fullwidth whitespace as valid one. + if not isinstance(query, unicode): + query = query.decode('utf-8') + tokens = _tokenizer_unicode_regex.findall(query) + except UnicodeDecodeError: + tokens = _tokenizer_regex.findall(query) self._tokens = tokens # classify tokens self._tokentypes = [_keywords.get(token.upper(), _ATOM) Modified: Zope/trunk/src/Products/ZCTextIndex/tests/testQueryParser.py =================================================================== --- Zope/trunk/src/Products/ZCTextIndex/tests/testQueryParser.py 2010-02-03 14:15:23 UTC (rev 108733) +++ Zope/trunk/src/Products/ZCTextIndex/tests/testQueryParser.py 2010-02-03 15:13:48 UTC (rev 108734) @@ -210,6 +210,18 @@ self.expect("foo* bar", AndNode([GlobNode("foo*"), AtomNode("bar")])) + def test024(self): + # Split by UTF-8 fullwidth space + from Products.ZCTextIndex.ParseTree import AndNode + from Products.ZCTextIndex.ParseTree import AtomNode + self.expect("foo\xe3\x80\x80bar", AndNode([AtomNode("foo"), AtomNode("bar")])) + + def test025(self): + # Split by Unicode fullwidth space + from Products.ZCTextIndex.ParseTree import AndNode + from Products.ZCTextIndex.ParseTree import AtomNode + self.expect(u"foo\u3000bar", AndNode([AtomNode(u"foo"), AtomNode(u"bar")])) + def test101(self): self.failure("") _______________________________________________ Zope-Checkins maillist - Zope-Checkins@zope.org https://mail.zope.org/mailman/listinfo/zope-checkins