Thiemo Mättig (WMDE) has uploaded a new change for review. https://gerrit.wikimedia.org/r/207747
Change subject: Rework regex for quoted keyword:"..." extraction ...................................................................... Rework regex for quoted keyword:"..." extraction I'm touching this code for two reasons: 1. A trim() to remove the quotes was found in three places. We can remove the quotes right away in a single place. I think this makes the code easier to read and maintain. 2. A simple trim() will lead to unexpected results in cases like keyword:"\"foo\"". The escaped quotes should be part of the search string but trim() will remove them because trim() can not be limited to trim only 1 character left and right. Change-Id: Id6f58010c5a69bd20090bc2c400cf2b93c8da543 --- M includes/Searcher.php 1 file changed, 5 insertions(+), 6 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/47/207747/1 diff --git a/includes/Searcher.php b/includes/Searcher.php index 990ab3c..46b8db2 100644 --- a/includes/Searcher.php +++ b/includes/Searcher.php @@ -499,12 +499,13 @@ $escaper = $this->escaper; $fuzzyQuery = $this->fuzzyQuery; $this->extractSpecialSyntaxFromTerm( - '/(?<key>[a-z\\-]{7,15}):\s*(?<value>"(?:[^"]|(?<=\\\)")+"|[^ "]+) ?/', + '/(?<key>[a-z\\-]{7,15}):\s*(?:"(?:<quoted>(?:[^"]|(?<=\\\)")+)"|(?<unquoted>\S+)) ?/', function ( $matches ) use ( $searcher, $escaper, &$filters, &$notFilters, &$boostTemplates, &$searchContainedSyntax, &$fuzzyQuery, &$highlightSource ) { $key = $matches['key']; - $value = $matches['value']; // Note that if the user supplied quotes they are not removed - $value = str_replace( '\"', '"', $value ); + $value = isset( $matches['quoted'] ) + ? str_replace( '\"', '"', $matches['quoted'] ) + : $matches['unquoted']; $filterDestination = &$filters; $keepText = true; if ( $key[ 0 ] === '-' ) { @@ -514,14 +515,13 @@ } switch ( $key ) { case 'boost-templates': - $boostTemplates = Searcher::parseBoostTemplates( trim( $value, '"' ) ); + $boostTemplates = Searcher::parseBoostTemplates( $value ); if ( $boostTemplates === null ) { $boostTemplates = Searcher::getDefaultBoostTemplates(); } $searchContainedSyntax = true; return ''; case 'hastemplate': - $value = trim( $value, '"' ); // We emulate template syntax here as best as possible, // so things in NS_MAIN are prefixed with ":" and things // in NS_TEMPLATE don't have a prefix at all. Since we @@ -743,7 +743,6 @@ * @return \Elastica\Filter\Query for matching $title to $field */ public function matchPage( $field, $title, $underscores = false ) { - $title = trim( $title, '"' ); // Somtimes title is wrapped in quotes - throw them away. if ( $underscores ) { $title = str_replace( ' ', '_', $title ); } else { -- To view, visit https://gerrit.wikimedia.org/r/207747 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id6f58010c5a69bd20090bc2c400cf2b93c8da543 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits