Ejegg has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/391416 )
Change subject: SECURITY: Handle -{}- syntax in attributes safely ...................................................................... SECURITY: Handle -{}- syntax in attributes safely Previously, if one had an attribute with the contents "-{}-foo-{}-", foo would get replaced by language converter as if it wasn't in an attribute. This lead to an XSS attack. This breaks doing manual conversions in url href's (or any other attribute that goes through an escaping method other than Sanitizer's). e.g. http://{sr-el:foo';sr-ec:bar}.com won't work anymore. See also T87332 Bug: T119158 Change-Id: Idbc45cac12c309b0ccb4adeff6474fa527b48edb --- M languages/LanguageConverter.php M tests/parser/parserTests.txt 2 files changed, 39 insertions(+), 10 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/16/391416/1 diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 4200978..87eb80d 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -377,9 +377,12 @@ $scriptfix = '<script[^>]*+>[^<]*+(?:(?:(?!<\/script>).)[^<]*+)*+<\/script>|'; // disable conversion of <pre> tags $prefix = '<pre[^>]*+>[^<]*+(?:(?:(?!<\/pre>).)[^<]*+)*+<\/pre>|'; + // The "|.*+)" at the end, is in case we missed some part of html syntax, + // we will fail securely (hopefully) by matching the rest of the string. + $htmlFullTag = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)|'; - $reg = '/' . $codefix . $scriptfix . $prefix . - '<[^>]++>|&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s'; + $reg = '/' . $codefix . $scriptfix . $prefix . $htmlFullTag . + '&[a-zA-Z#][a-z0-9]++;' . $marker . $htmlfix . '|\004$/s'; $startPos = 0; $sourceBlob = ''; $literalBlob = ''; @@ -660,29 +663,41 @@ $out = ''; $length = strlen( $text ); $shouldConvert = !$this->guessVariant( $text, $variant ); + $continue = 1; - while ( $startPos < $length ) { - $pos = strpos( $text, '-{', $startPos ); + $noScript = '<script.*?>.*?<\/script>(*SKIP)(*FAIL)'; + $noStyle = '<style.*?>.*?<\/style>(*SKIP)(*FAIL)'; + $noHtml = '<(?:[^>=]*+(?>[^>=]*+=\s*+(?:"[^"]*"|\'[^\']*\'|[^\'">\s]*+))*+[^>=]*+>|.*+)(*SKIP)(*FAIL)'; + while ( $startPos < $length && $continue ) { + $continue = preg_match( + // Only match -{ outside of html. + "/$noScript|$noStyle|$noHtml|-\{/", + $text, + $m, + PREG_OFFSET_CAPTURE, + $startPos + ); - if ( $pos === false ) { + if ( !$continue ) { // No more markup, append final segment $fragment = substr( $text, $startPos ); $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; return $out; } - // Markup found + // Offset of the match of the regex pattern. + $pos = $m[0][1]; + // Append initial segment $fragment = substr( $text, $startPos, $pos - $startPos ); $out .= $shouldConvert ? $this->autoConvert( $fragment, $variant ) : $fragment; - - // Advance position + // -{ marker found, not in attribute + // Advance position up to -{ marker. $startPos = $pos; - // Do recursive conversion + // Note: This passes $startPos by reference, and advances it. $out .= $this->recursiveConvertRule( $text, $variant, $startPos, $depth + 1 ); } - return $out; } diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 0838f82..4a52c46 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -16763,6 +16763,20 @@ !! end !! test +Language converter glossary rules inside attributes (T119158) +!! options +language=sr variant=sr-el +!! wikitext +-{H|abc=>sr-el:" onload="alert(1)" data-foo="}- + +[[File:Foobar.jpg|alt=-{}-abc-{}-]] +!! html +<p> +</p><p><a href="/wiki/%D0%94%D0%B0%D1%82%D0%BE%D1%82%D0%B5%D0%BA%D0%B0:Foobar.jpg" class="image"><img alt="" onload="alert(1)" data-foo="" src="http://example.com/images/3/3a/Foobar.jpg" width="1941" height="220"></a> +</p> +!! end + +!! test Self closed html pairs (bug 5487) !! options !! wikitext -- To view, visit https://gerrit.wikimedia.org/r/391416 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Idbc45cac12c309b0ccb4adeff6474fa527b48edb Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: fundraising/REL1_27 Gerrit-Owner: Ejegg <ej...@ejegg.com> Gerrit-Reviewer: Brian Wolff <bawolff...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits