jenkins-bot has submitted this change and it was merged. Change subject: Rework regex for quoted keyword:"..." extraction ......................................................................
Rework regex for quoted keyword:"..." extraction I'm touching this code for two reasons: 1. A trim() to remove the quotes was found in three places. We can remove the quotes right away in a single place. I think this makes the code easier to read and maintain. 2. A simple trim() will lead to unexpected results in cases like keyword:"\"foo\"". The escaped quotes should be part of the search string but trim() will remove them because trim() can not be limited to trim only 1 character left and right. Change-Id: Id6f58010c5a69bd20090bc2c400cf2b93c8da543 --- M includes/Searcher.php 1 file changed, 5 insertions(+), 6 deletions(-) Approvals: Cindy-the-browser-test-bot: Looks good to me, but someone else must approve EBernhardson: Looks good to me, approved jenkins-bot: Verified diff --git a/includes/Searcher.php b/includes/Searcher.php index f09225f..76583da 100644 --- a/includes/Searcher.php +++ b/includes/Searcher.php @@ -532,13 +532,14 @@ $fuzzyQuery = $this->fuzzyQuery; $isEmptyQuery = false; $this->extractSpecialSyntaxFromTerm( - '/(?<key>[a-z\\-]{7,15}):\s*(?<value>"(?:[^"]|(?<=\\\)")+"|[^ "]+) ?/', + '/(?<key>[a-z\\-]{7,15}):\s*(?:"(?<quoted>(?:[^"]|(?<=\\\)")+)"|(?<unquoted>\S+)) ?/', function ( $matches ) use ( $searcher, $escaper, &$filters, &$notFilters, &$boostTemplates, &$searchContainedSyntax, &$fuzzyQuery, &$highlightSource, &$isEmptyQuery ) { global $wgCirrusSearchMaxIncategoryOptions; $key = $matches['key']; - $value = $matches['value']; // Note that if the user supplied quotes they are not removed - $value = str_replace( '\"', '"', $value ); + $value = $matches['quoted'] !== '' + ? str_replace( '\"', '"', $matches['quoted'] ) + : $matches['unquoted']; $filterDestination = &$filters; $keepText = true; if ( $key[ 0 ] === '-' ) { @@ -548,14 +549,13 @@ } switch ( $key ) { case 'boost-templates': - $boostTemplates = Searcher::parseBoostTemplates( trim( $value, '"' ) ); + $boostTemplates = Searcher::parseBoostTemplates( $value ); if ( $boostTemplates === null ) { $boostTemplates = Searcher::getDefaultBoostTemplates(); } $searchContainedSyntax = true; return ''; case 'hastemplate': - $value = trim( $value, '"' ); // We emulate template syntax here as best as possible, // so things in NS_MAIN are prefixed with ":" and things // in NS_TEMPLATE don't have a prefix at all. Since we @@ -916,7 +916,6 @@ * @return \Elastica\Filter\Query for matching $title to $field */ public function matchPage( $field, $title, $underscores = false ) { - $title = trim( $title, '"' ); // Somtimes title is wrapped in quotes - throw them away. if ( $underscores ) { $title = str_replace( ' ', '_', $title ); } else { -- To view, visit https://gerrit.wikimedia.org/r/207747 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Id6f58010c5a69bd20090bc2c400cf2b93c8da543 Gerrit-PatchSet: 5 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de> Gerrit-Reviewer: Chad <ch...@wikimedia.org> Gerrit-Reviewer: Cindy-the-browser-test-bot <bernhardsone...@gmail.com> Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: Manybubbles <never...@wikimedia.org> Gerrit-Reviewer: Thiemo Mättig (WMDE) <thiemo.maet...@wikimedia.de> Gerrit-Reviewer: WMDE-Fisch <christoph.fisc...@wikimedia.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits