jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/385093 )
Change subject: Fix: handle question marks as a query parameter separator
..
Fix: handle question marks as a query parameter separator
Treat unencoded question marks as a query parameter separator not part
of the path. This is inline with the behavior of
https://en.wikipedia.org/wiki/?.
Change-Id: Ife6de3af8841bfeca6a704a3729169ac3472a37e
---
M src/common/data-clients/restbase-title-encoder.ts
M src/common/routers/api.test.ts
M src/common/routers/api.ts
3 files changed, 44 insertions(+), 8 deletions(-)
Approvals:
Jhernandez: Looks good to me, approved
jenkins-bot: Verified
diff --git a/src/common/data-clients/restbase-title-encoder.ts
b/src/common/data-clients/restbase-title-encoder.ts
index 6dfab56..1269a29 100644
--- a/src/common/data-clients/restbase-title-encoder.ts
+++ b/src/common/data-clients/restbase-title-encoder.ts
@@ -6,6 +6,24 @@
// RESTBase doesn't understand page titles with slashes in them. An encoded
// path cannot be blindly reencoded with encodeURIComponent() because that
may
// doubly encode characters which gives different meaning when its unencoded
- // only once. Instead, target slashes specifically.
+ // only once. Instead, target slashes specifically. Ampersands (&) and
percent
+ // signs (%) are understood, question marks are not supported unencoded by
+ // Marvin or wikipedia.org's /wiki/... endpoint.
+ //
+ // Supported:
+ // - http://localhost:3000/wiki//
+ // - http://localhost:3000/wiki/%2f
+ // - http://localhost:3000/wiki/%3f
+ // - https://en.wikipedia.org/wiki//
+ // - https://en.wikipedia.org/wiki/%2f
+ // - https://en.wikipedia.org/wiki/%3f
+ // - https://en.wikipedia.org/api/rest_v1/page/mobile-sections/%2f
+ // - https://en.wikipedia.org/api/rest_v1/page/mobile-sections/%3f
+ //
+ // Unsupported:
+ // - http://localhost:3000/wiki/?
+ // - https://en.wikipedia.org/wiki/?
+ // - https://en.wikipedia.org/api/rest_v1/page/mobile-sections//
+ // - https://en.wikipedia.org/api/rest_v1/page/mobile-sections/?
return title.replace("/", "%2f");
}
diff --git a/src/common/routers/api.test.ts b/src/common/routers/api.test.ts
index 1af1f34..47a93f0 100644
--- a/src/common/routers/api.test.ts
+++ b/src/common/routers/api.test.ts
@@ -121,20 +121,32 @@
params: { title: "/", revision: "123456789" }
},
{
+name: "wiki (title is a question mark)",
+route: wiki,
+path: "/wiki/%3f",
+params: { title: "%3f", revision: undefined }
+ },
+ {
+name: "wiki (title is a question mark, revision)",
+route: wiki,
+path: "/wiki/%3f/123456789/",
+params: { title: "%3f", revision: "123456789" }
+ },
+ {
name: "wiki (title with every supported character class)",
route: wiki,
-path: "/wiki/ %!\"$&'()*,-./0:;=?@A\\^_`a~\x80+",
+path: "/wiki/ %!\"$&'()*,-./0:;=@A\\^_`a~\x80+",
params: {
- title: " %!\"$&'()*,-./0:;=?@A\\^_`a~\x80+",
+ title: " %!\"$&'()*,-./0:;=@A\\^_`a~\x80+",
revision: undefined
}
},
{
name: "wiki (title with every supported character class, revision)",
route: wiki,
-path: "/wiki/ %!\"$&'()*,-./0:;=?@A\\^_`a~\x80+/123456789",
+path: "/wiki/ %!\"$&'()*,-./0:;=@A\\^_`a~\x80+/123456789",
params: {
- title: " %!\"$&'()*,-./0:;=?@A\\^_`a~\x80+",
+ title: " %!\"$&'()*,-./0:;=@A\\^_`a~\x80+",
revision: "123456789"
}
},
@@ -169,10 +181,16 @@
params: { title: "title/text" }
},
{
+name: "summary (title is a question mark)",
+route: summary,
+path: "/page/summary/%3f",
+params: { title: "%3f" }
+ },
+ {
name: "summary (title with every supported character class)",
route: summary,
-path: "/page/summary/ %!\"$&'()*,-./0:;=?@A\\^_`a~\x80+",
-params: { title: " %!\"$&'()*,-./0:;=?@A\\^_`a~\x80+" }
+path: "/page/summary/ %!\"$&'()*,-./0:;=@A\\^_`a~\x80+",
+params: { title: " %!\"$&'()*,-./0:;=@A\\^_`a~\x80+" }
},
{
name: "random (wiki)",
diff --git a/src/common/routers/api.ts b/src/common/routers/api.ts
index f3db4a5..5484e5f 100644
--- a/src/common/routers/api.ts
+++ b/src/common/routers/api.ts
@@ -21,7 +21,7 @@
});
const TITLE_CHARACTER_REGEX_STRING =
- "[ %!\"$&'\\(\\)*,\\-.\\/0-9:;=?@A-Z^_`a-z~\\x80-\\xFF+]";
+ "[ %!\"$&'\\(\\)*,\\-.\\/0-9:;=@A-Z^_`a-z~\\x80-\\xFF+]";
export const wiki: Route = newRoute({
// https://www.mediawiki.org/wiki/Manual:$wgLegalTitleChars
--
To view, visit https://gerrit.wikimedia.org/r/385093
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ife6de3af8841bfeca6a704