MaxSem has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/357962 )

Change subject: WIP: deprecate $wgExperimentalHtmlIds
......................................................................

WIP: deprecate $wgExperimentalHtmlIds

Bug: T167470
Change-Id: I443d449fc2b0b7c62512f10d42545f4d9e4b8b31
---
M includes/DefaultSettings.php
M includes/Sanitizer.php
M includes/api/ApiMain.php
M includes/parser/Parser.php
M tests/parser/ParserTestRunner.php
M tests/qunit/suites/resources/mediawiki/mediawiki.util.test.js
6 files changed, 13 insertions(+), 83 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/62/357962/1

diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php
index 5b7ca3e..79b4cc6 100644
--- a/includes/DefaultSettings.php
+++ b/includes/DefaultSettings.php
@@ -3370,13 +3370,7 @@
 $wgDisableOutputCompression = false;
 
 /**
- * Should we allow a broader set of characters in id attributes, per HTML5?  If
- * not, use only HTML 4-compatible IDs.  This option is for testing -- when the
- * functionality is ready, it will be on by default with no option.
- *
- * Currently this appears to work fine in all browsers, but it's disabled by
- * default because it normalizes id's a bit too aggressively, breaking 
preexisting
- * content (particularly Cite).  See T29733, T29694, T29474.
+ * @deprecated Does nothing since 1.30
  */
 $wgExperimentalHtmlIds = false;
 
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php
index c4883ba..0ecb29a 100644
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -1139,10 +1139,8 @@
 
        /**
         * Given a value, escape it so that it can be used in an id attribute 
and
-        * return it.  This will use HTML5 validation if $wgExperimentalHtmlIds 
is
-        * true, allowing anything but ASCII whitespace.  Otherwise it will use
-        * HTML 4 rules, which means a narrow subset of ASCII, with bad 
characters
-        * escaped with lots of dots.
+        * return it.  This will HTML 4 rules, which means a narrow subset of 
ASCII,
+        * with bad characters escaped with lots of dots.
         *
         * To ensure we don't have to bother escaping anything, we also strip 
', ",
         * & even if $wgExperimentalIds is true.  TODO: Is this the best tactic?
@@ -1161,29 +1159,14 @@
         * @param string|array $options String or array of strings (default is 
array()):
         *   'noninitial': This is a non-initial fragment of an id, not a full 
id,
         *       so don't pay attention if the first character isn't valid at 
the
-        *       beginning of an id.  Only matters if $wgExperimentalHtmlIds is
-        *       false.
-        *   'legacy': Behave the way the old HTML 4-based ID escaping worked 
even
-        *       if $wgExperimentalHtmlIds is used, so we can generate extra
-        *       anchors and links won't break.
+        *       beginning of an id.
+        *   'legacy': Behave the way the old HTML 4-based ID escaping worked.
         * @return string
         */
        static function escapeId( $id, $options = [] ) {
-               global $wgExperimentalHtmlIds;
                $options = (array)$options;
 
                $id = Sanitizer::decodeCharReferences( $id );
-
-               if ( $wgExperimentalHtmlIds && !in_array( 'legacy', $options ) 
) {
-                       $id = preg_replace( '/[ \t\n\r\f_\'"&#%]+/', '_', $id );
-                       $id = trim( $id, '_' );
-                       if ( $id === '' ) {
-                               // Must have been all whitespace to start with.
-                               return '_';
-                       } else {
-                               return $id;
-                       }
-               }
 
                // HTML4-style escaping
                static $replace = [
@@ -1211,11 +1194,8 @@
         * @param string|array $options String or array of strings (default is 
array()):
         *   'noninitial': This is a non-initial fragment of an id, not a full 
id,
         *       so don't pay attention if the first character isn't valid at 
the
-        *       beginning of an id.  Only matters if $wgExperimentalHtmlIds is
-        *       false.
-        *   'legacy': Behave the way the old HTML 4-based ID escaping worked 
even
-        *       if $wgExperimentalHtmlIds is used, so we can generate extra
-        *       anchors and links won't break.
+        *       beginning of an id.
+        *   'legacy': Behave the way the old HTML 4-based ID escaping worked.
         * @return string
         */
        static function escapeIdReferenceList( $referenceString, $options = [] 
) {
diff --git a/includes/api/ApiMain.php b/includes/api/ApiMain.php
index 00f976e..61ab9cd 100644
--- a/includes/api/ApiMain.php
+++ b/includes/api/ApiMain.php
@@ -1927,10 +1927,9 @@
                        $header = $this->msg( 'api-help-datatypes-header' 
)->parse();
 
                        // Add an additional span with sanitized ID
-                       if ( !$this->getConfig()->get( 'ExperimentalHtmlIds' ) 
) {
-                               $header = Html::element( 'span', [ 'id' => 
Sanitizer::escapeId( 'main/datatypes' ) ] ) .
-                                       $header;
-                       }
+                       $header = Html::element( 'span', [ 'id' => 
Sanitizer::escapeId( 'main/datatypes' ) ] ) .
+                               $header;
+
                        $help['datatypes'] .= Html::rawElement( 'h' . min( 6, 
$level ),
                                [ 'id' => 'main/datatypes', 'class' => 
'apihelp-header' ],
                                $header
@@ -1949,10 +1948,6 @@
                        }
 
                        // Add an additional span with sanitized ID
-                       if ( !$this->getConfig()->get( 'ExperimentalHtmlIds' ) 
) {
-                               $header = Html::element( 'span', [ 'id' => 
Sanitizer::escapeId( 'main/credits' ) ] ) .
-                                       $header;
-                       }
                        $header = $this->msg( 'api-credits-header' )->parse();
                        $help['credits'] .= Html::rawElement( 'h' . min( 6, 
$level ),
                                [ 'id' => 'main/credits', 'class' => 
'apihelp-header' ],
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 34f6232..027eecc 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -4052,7 +4052,7 @@
         * @private
         */
        public function formatHeadings( $text, $origText, $isMain = true ) {
-               global $wgMaxTocLevel, $wgExperimentalHtmlIds;
+               global $wgMaxTocLevel;
 
                # Inhibit editsection links if requested in the page
                if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
@@ -4246,44 +4246,16 @@
                        # Save headline for section edit hint before it's 
escaped
                        $headlineHint = $safeHeadline;
 
-                       if ( $wgExperimentalHtmlIds ) {
-                               # For reverse compatibility, provide an id 
that's
-                               # HTML4-compatible, like we used to.
-                               # It may be worth noting, academically, that 
it's possible for
-                               # the legacy anchor to conflict with a 
non-legacy headline
-                               # anchor on the page.  In this case likely the 
"correct" thing
-                               # would be to either drop the legacy anchors or 
make sure
-                               # they're numbered first.  However, this would 
require people
-                               # to type in section names like 
"abc_.D7.93.D7.90.D7.A4"
-                               # manually, so let's not bother worrying about 
it.
-                               $legacyHeadline = Sanitizer::escapeId( 
$safeHeadline,
-                                       [ 'noninitial', 'legacy' ] );
-                               $safeHeadline = Sanitizer::escapeId( 
$safeHeadline );
-
-                               if ( $legacyHeadline == $safeHeadline ) {
-                                       # No reason to have both (in fact, we 
can't)
-                                       $legacyHeadline = false;
-                               }
-                       } else {
-                               $legacyHeadline = false;
-                               $safeHeadline = Sanitizer::escapeId( 
$safeHeadline,
-                                       'noninitial' );
-                       }
+                       $safeHeadline = Sanitizer::escapeId( $safeHeadline, 
'noninitial' );
 
                        # HTML names must be case-insensitively unique (T12721).
                        # This does not apply to Unicode characters per
                        # 
https://www.w3.org/TR/html5/infrastructure.html#case-sensitivity-and-string-comparison
                        # @todo FIXME: We may be changing them depending on the 
current locale.
                        $arrayKey = strtolower( $safeHeadline );
-                       if ( $legacyHeadline === false ) {
-                               $legacyArrayKey = false;
-                       } else {
-                               $legacyArrayKey = strtolower( $legacyHeadline );
-                       }
 
                        # Create the anchor for linking from the TOC to the 
section
                        $anchor = $safeHeadline;
-                       $legacyAnchor = $legacyHeadline;
                        if ( isset( $refers[$arrayKey] ) ) {
                                // @codingStandardsIgnoreStart
                                for ( $i = 2; isset( $refers["${arrayKey}_$i"] 
); ++$i );
@@ -4292,15 +4264,6 @@
                                $refers["${arrayKey}_$i"] = true;
                        } else {
                                $refers[$arrayKey] = true;
-                       }
-                       if ( $legacyHeadline !== false && isset( 
$refers[$legacyArrayKey] ) ) {
-                               // @codingStandardsIgnoreStart
-                               for ( $i = 2; isset( 
$refers["${legacyArrayKey}_$i"] ); ++$i );
-                               // @codingStandardsIgnoreEnd
-                               $legacyAnchor .= "_$i";
-                               $refers["${legacyArrayKey}_$i"] = true;
-                       } else {
-                               $refers[$legacyArrayKey] = true;
                        }
 
                        # Don't number the heading if it is the only one (looks 
silly)
@@ -4381,7 +4344,7 @@
                        }
                        $head[$headlineCount] = Linker::makeHeadline( $level,
                                $matches['attrib'][$headlineCount], $anchor, 
$headline,
-                               $editlink, $legacyAnchor );
+                               $editlink, false );
 
                        $headlineCount++;
                }
diff --git a/tests/parser/ParserTestRunner.php 
b/tests/parser/ParserTestRunner.php
index f44b0d5..981c513 100644
--- a/tests/parser/ParserTestRunner.php
+++ b/tests/parser/ParserTestRunner.php
@@ -215,7 +215,6 @@
                $setup['wgNoFollowLinks'] = true;
                $setup['wgNoFollowDomainExceptions'] = [ 'no-nofollow.org' ];
                $setup['wgExternalLinkTarget'] = false;
-               $setup['wgExperimentalHtmlIds'] = false;
                $setup['wgLocaltimezone'] = 'UTC';
                $setup['wgHtml5'] = true;
                $setup['wgDisableLangConversion'] = false;
diff --git a/tests/qunit/suites/resources/mediawiki/mediawiki.util.test.js 
b/tests/qunit/suites/resources/mediawiki/mediawiki.util.test.js
index da04c8d..3300b22 100644
--- a/tests/qunit/suites/resources/mediawiki/mediawiki.util.test.js
+++ b/tests/qunit/suites/resources/mediawiki/mediawiki.util.test.js
@@ -93,7 +93,6 @@
        } );
 
        QUnit.test( 'escapeId', function ( assert ) {
-               mw.config.set( 'wgExperimentalHtmlIds', false );
                $.each( {
                        '+': '.2B',
                        '&': '.26',

-- 
To view, visit https://gerrit.wikimedia.org/r/357962
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I443d449fc2b0b7c62512f10d42545f4d9e4b8b31
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: MaxSem <maxsem.w...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to