EBernhardson has submitted this change and it was merged. Change subject: Handle urls using title= query string ......................................................................
Handle urls using title= query string Change-Id: I96011f355c3cbd1162e00e2f10c7ed2e86ba9b93 --- M src/RelevanceScoring/Import/ImportedResult.php M tests/unit/RelevanceScoring/Import/ImportedResultTest.php 2 files changed, 25 insertions(+), 6 deletions(-) Approvals: EBernhardson: Verified; Looks good to me, approved diff --git a/src/RelevanceScoring/Import/ImportedResult.php b/src/RelevanceScoring/Import/ImportedResult.php index 9603a48..d3d2c2d 100644 --- a/src/RelevanceScoring/Import/ImportedResult.php +++ b/src/RelevanceScoring/Import/ImportedResult.php @@ -35,13 +35,24 @@ // make the bold assumption wikimedia wikis all // prefix with /wiki/ $prefix = '/wiki/'; - if ($prefix !== substr($path, 0, strlen($prefix))) { - throw new \Exception("Invalid url: $url"); + if ($prefix === substr($path, 0, strlen($prefix))) { + $titlePart = substr($path, strlen($prefix)); + $title = urldecode(strtr($titlePart, '_', ' ')); + if (!empty($title)) { + return new self($source, $title, $snippet, $position); + } } - $titlePart = substr($path, strlen($prefix)); - $title = urldecode(strtr($titlePart, '_', ' ')); - return new self($source, $title, $snippet, $position); + $query = parse_url($url, PHP_URL_QUERY); + if ($query) { + parse_str($query, $decoded); + if (!empty($decoded['title'])) { + $title = strtr($decoded['title'], '_', ' '); + return new self($source, $title, $snippet, $position); + } + } + + throw new \Exception("Invalid url: $url"); } public function getSource() diff --git a/tests/unit/RelevanceScoring/Import/ImportedResultTest.php b/tests/unit/RelevanceScoring/Import/ImportedResultTest.php index 506096c..aef9a7e 100644 --- a/tests/unit/RelevanceScoring/Import/ImportedResultTest.php +++ b/tests/unit/RelevanceScoring/Import/ImportedResultTest.php @@ -18,6 +18,14 @@ 'Fuller\'s Brewery', 'https://en.wikipedia.org/wiki/Fuller%27s_Brewery', ), + 'oddly formed url with query string' => array( + 'Talk:SL-1', + 'https://en.wikipedia.org/wiki?title=Talk:SL-1', + ), + 'query string with encoded parts' => array( + 'Foo & Bar', + 'https://en.wikipedia.org/w/index.php?title=Foo_%26_Bar', + ), ); } @@ -26,7 +34,7 @@ */ public function testCreateFromURL($title, $url) { - $result = ImportedResult::createFromURL('unitTest', $url, 1); + $result = ImportedResult::createFromURL('unitTest', $url, '', 1); $this->assertEquals('unitTest', $result->getSource()); $this->assertEquals($title, $result->getTitle()); -- To view, visit https://gerrit.wikimedia.org/r/286238 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I96011f355c3cbd1162e00e2f10c7ed2e86ba9b93 Gerrit-PatchSet: 2 Gerrit-Project: wikimedia/discovery/discernatron Gerrit-Branch: master Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits