EBernhardson has submitted this change and it was merged.

Change subject: Handle urls using title= query string
......................................................................


Handle urls using title= query string

Change-Id: I96011f355c3cbd1162e00e2f10c7ed2e86ba9b93
---
M src/RelevanceScoring/Import/ImportedResult.php
M tests/unit/RelevanceScoring/Import/ImportedResultTest.php
2 files changed, 25 insertions(+), 6 deletions(-)

Approvals:
  EBernhardson: Verified; Looks good to me, approved



diff --git a/src/RelevanceScoring/Import/ImportedResult.php 
b/src/RelevanceScoring/Import/ImportedResult.php
index 9603a48..d3d2c2d 100644
--- a/src/RelevanceScoring/Import/ImportedResult.php
+++ b/src/RelevanceScoring/Import/ImportedResult.php
@@ -35,13 +35,24 @@
          // make the bold assumption wikimedia wikis all
         // prefix with /wiki/
         $prefix = '/wiki/';
-        if ($prefix !== substr($path, 0, strlen($prefix))) {
-            throw new \Exception("Invalid url: $url");
+        if ($prefix === substr($path, 0, strlen($prefix))) {
+            $titlePart = substr($path, strlen($prefix));
+            $title = urldecode(strtr($titlePart, '_', ' '));
+            if (!empty($title)) {
+                return new self($source, $title, $snippet, $position);
+            }
         }
-        $titlePart = substr($path, strlen($prefix));
-        $title = urldecode(strtr($titlePart, '_', ' '));
 
-        return new self($source, $title, $snippet, $position);
+        $query = parse_url($url, PHP_URL_QUERY);
+        if ($query) {
+            parse_str($query, $decoded);
+            if (!empty($decoded['title'])) {
+                $title = strtr($decoded['title'], '_', ' ');
+                return new self($source, $title, $snippet, $position);
+            }
+        }
+
+        throw new \Exception("Invalid url: $url");
     }
 
     public function getSource()
diff --git a/tests/unit/RelevanceScoring/Import/ImportedResultTest.php 
b/tests/unit/RelevanceScoring/Import/ImportedResultTest.php
index 506096c..aef9a7e 100644
--- a/tests/unit/RelevanceScoring/Import/ImportedResultTest.php
+++ b/tests/unit/RelevanceScoring/Import/ImportedResultTest.php
@@ -18,6 +18,14 @@
                 'Fuller\'s Brewery',
                 'https://en.wikipedia.org/wiki/Fuller%27s_Brewery',
             ),
+            'oddly formed url with query string' => array(
+                'Talk:SL-1',
+                'https://en.wikipedia.org/wiki?title=Talk:SL-1',
+            ),
+            'query string with encoded parts' => array(
+                'Foo & Bar',
+                'https://en.wikipedia.org/w/index.php?title=Foo_%26_Bar',
+            ),
         );
     }
 
@@ -26,7 +34,7 @@
      */
     public function testCreateFromURL($title, $url)
     {
-        $result = ImportedResult::createFromURL('unitTest', $url, 1);
+        $result = ImportedResult::createFromURL('unitTest', $url, '', 1);
 
         $this->assertEquals('unitTest', $result->getSource());
         $this->assertEquals($title, $result->getTitle());

-- 
To view, visit https://gerrit.wikimedia.org/r/286238
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I96011f355c3cbd1162e00e2f10c7ed2e86ba9b93
Gerrit-PatchSet: 2
Gerrit-Project: wikimedia/discovery/discernatron
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org>
Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to