jenkins-bot has submitted this change and it was merged. Change subject: General cleanup of CSSParser ......................................................................
General cleanup of CSSParser * Add phpdoc comments * Update method signatures to reduce duplication * Rename some variables to be a bit more clear for new readers * Try to keep lines <80 chars (my own personal peeve I know) * Use === instead of == * Fix a few other small code style issues Change-Id: I52594fd34646af53fc91ec470fcf1d0be9c2b156 --- M CSSParser.php 1 file changed, 169 insertions(+), 96 deletions(-) Approvals: Jforrester: Looks good to me, approved jenkins-bot: Verified diff --git a/CSSParser.php b/CSSParser.php index 889d0cd..2e7bcb9 100644 --- a/CSSParser.php +++ b/CSSParser.php @@ -12,7 +12,9 @@ */ class CSSParser { + /** @var array $tokens */ private $tokens; + /** @var int $index */ private $index; /** @@ -25,6 +27,7 @@ * @param string $css */ function __construct( $css ) { + $this->index = 0; preg_match_all( '/( [ \n\t]+ (?# Sequences of whitespace ) @@ -46,51 +49,65 @@ (?# Unicode range literals ) | u\+ [0-9a-f?]{1,6} (?# Unicode mask literals ) - | .) + | . (?# Any unmatched token is reduced to single characters ) - /xis', $css, $match ); + )/xis', $css, $match ); - $space = false; - foreach ( $match[0] as $t ) { - if ( preg_match( '/^(?:[ \n\t]|\/\*)/', $t ) ) { - + $inWhitespaceRun = false; + foreach ( $match[0] as $token ) { + if ( preg_match( '/^(?: [ \n\t] | \/\* )/x', $token ) ) { // Fold any sequence of whitespace to a single space token - - if ( !$space ) { - $space = true; + if ( !$inWhitespaceRun ) { + $inWhitespaceRun = true; $this->tokens[] = ' '; continue; } } else { + // Decode any hexadecimal escape character into its + // corresponding UTF-8 sequence - output is UTF-8 so the + // escaping is unnecessary and this prevents trying to + // obfuscate ASCII in identifiers to prevent matches. + $token = preg_replace_callback( + '/\\\\([0-9a-f]{1,6})[ \n\t]?/', + function( $match ) { + return html_entity_decode( + '&#x' . $match[1] . ';', ENT_NOQUOTES, 'UTF-8' ); + }, + $token + ); - // decode any hexadecimal escape character into their corresponding UTF-8 - // sequence - our output is UTF-8 so the escaping is unnecessary and this - // prevents trying to obfuscate ASCII in identifiers to prevent matches. - - $t = preg_replace_callback( '/\\\\([0-9a-f]{1,6})[ \n\t]?/', function( $match ) { - return html_entity_decode( '&#x'.$match[1].';', ENT_NOQUOTES, 'UTF-8' ); - }, $t ); - - // close unclosed string literals - if ( preg_match( '/^([\'"])(.*)\n$/', $t, $match ) ) { - $t = $match[1] . $match[2] . $match[1]; + // Close unclosed string literals + if ( preg_match( '/^ ([\'"]) (.*) \n $/x', $token, $match ) ) { + $token = $match[1] . $match[2] . $match[1]; } - $space = false; - $this->tokens[] = $t; - + $inWhitespaceRun = false; + $this->tokens[] = $token; } } - $this->index = 0; } - private function peek( $i ) { - if ( $this->index+$i >= count( $this->tokens ) ) { + /** + * Get a token from the input stream without advancing the current + * position. + * + * @param int $offset Offset from current stream location + * @return string|null Token or null if offset is past the end of the + * input stream + */ + private function peek( $offset = 0 ) { + if ( ( $this->index + $offset ) >= count( $this->tokens ) ) { return null; } - return $this->tokens[$this->index+$i]; + return $this->tokens[$this->index + $offset]; } + /** + * Take a list of tokens from the input stream. + * + * @param int $num Number of tokens to take + * @return array List of tokens + */ private function consume( $num = 1 ) { if ( $num > 0 ) { if ( $this->index+$num >= count( $this->tokens ) ) { @@ -103,7 +120,22 @@ return []; } + /** + * Take tokens from the input stream up to but not including a delimiter + * from the provided list. + * + * The next token in the stream should always be validated after using + * this function as it may return early if the end of the token stream is + * reached. + * + * @param array $delim List of delimiters + * @return array List of tokens + */ private function consumeTo( $delim ) { + if ( !in_array( null, $delim ) ) { + // Make sure we don't hit an infinte loop on malformed input + $delim[] = null; + } $consume = 0; while ( !in_array( $this->peek( $consume ), $delim ) ) { $consume++; @@ -111,6 +143,11 @@ return $this->consume( $consume ); } + /** + * Take consecutive whitespace tokens from the input stream. + * + * @return array List of whitespace tokens + */ private function consumeWS() { $consume = 0; while ( $this->peek( $consume ) === ' ' ) { @@ -120,21 +157,23 @@ } /** - * Parses: - * decl : WS* IDENT WS* ':' TOKEN* ';' - * | WS* IDENT <error> ';' -> skip - * ; + * Parse a CSS declaration. * - * Returns: - * [ name => value ] + * Grammar: + * + * decl : WS* IDENT WS* ':' TOKEN* ';' + * | WS* IDENT <error> ';' -> skip + * ; + * + * @return array [ name => value ] */ private function parseDecl() { $this->consumeWS(); $name = $this->consume()[0]; $this->consumeWS(); - if ( $this->peek( 0 )!=':' ) { - $this->consumeTo( [';', '}', null] ); - if ( $this->peek( 0 ) ) { + if ( $this->peek() != ':' ) { + $this->consumeTo( [ ';', '}' ] ); + if ( $this->peek() ) { $this->consume(); $this->consumeWS(); } @@ -142,8 +181,8 @@ } $this->consume(); $this->consumeWS(); - $value = $this->consumeTo( [';', '}', null] ); - if ( $this->peek( 0 ) == ';' ) { + $value = $this->consumeTo( [ ';', '}' ] ); + if ( $this->peek() === ';' ) { $this->consume(); $this->consumeWS(); } @@ -151,17 +190,20 @@ } /** - * Parses: - * decls : '}' - * | decl decls - * ; + * Parse a list of CSS declarations. * - * Returns: - * [ decl* ] + * Grammar: + * + * decls : '}' + * | decl decls + * ; + * + * @return array List of decls + * @see parseDecl */ private function parseDecls() { $decls = []; - while ( $this->peek( 0 ) !== null and $this->peek( 0 ) != '}' ) { + while ( $this->peek() !== null && $this->peek() != '}' ) { $decl = $this->parseDecl(); if ( $decl ) { foreach ( $decl as $k => $d ) { @@ -173,25 +215,29 @@ } /** - * Parses: - * rule : WS* selectors ';' - * | WS* selectors '{' decls - * ; - * selectors : TOKEN* - * | selectors ',' TOKEN* - * ; + * Parse a CSS rule. * - * Returns: - * [ selectors => [ selector* ], decls => [ decl* ] ] + * Grammar: + * + * rule : WS* selectors ';' + * | WS* selectors '{' decls + * ; + * selectors : TOKEN* + * | selectors ',' TOKEN* + * ; + * + * @param string $baseSelectors Selector to prepend to all rules to + * enforce scoping. + * @return array|null [ selectors => [ selector* ], decls => [ decl* ] ] */ public function parseRule( $baseSelectors ) { $selectors = []; $text = ''; $this->consumeWS(); - while ( !in_array( $this->peek( 0 ), ['{', ';', null] ) ) { - if ( $this->peek( 0 ) == ',' ) { - if ( $text != '' ) { - $selectors[] = $baseSelectors . $text; + while ( !in_array( $this->peek(), [ '{', ';', null ] ) ) { + if ( $this->peek() === ',' ) { + if ( $text !== '' ) { + $selectors[] = "{$baseSelectors}{$text}"; } $this->consume(); $this->consumeWS(); @@ -200,12 +246,15 @@ $text .= $this->consume()[0]; } } - if ( $text != '' ) { - $selectors[] = $baseSelectors . $text; + if ( $text !== '' ) { + $selectors[] = "{$baseSelectors}{$text}"; } - if ( $this->peek( 0 ) == '{' ) { + if ( $this->peek() === '{' ) { $this->consume(); - return [ "selectors"=>$selectors, "decls"=>$this->parseDecls() ]; + return [ + 'selectors' => $selectors, + 'decls' => $this->parseDecls() + ]; } if ( $this->peek( 0 ) ) { $this->consume(); @@ -214,80 +263,104 @@ } /** - * Parses the token array, and returns a tree representing the CSS suitable - * for feeding CSSRenderer objects. + * Parses the token array, and returns a tree representing the CSS + * suitable for feeding CSSRenderer objects. * - * @param array $end An array of string representing tokens that can end the parse. Defaults - * to ending only at the end of the string. + * Grammar: + * + * anyrule : ATIDENT='@media' WS* TOKEN* '{' rules '}' + * | ATIDENT WS* TOKEN* ';' + * | ATIDENT WS* TOKEN* '{' decls '}' + * | rule + * ; + * rules : anyrule + * | rules anyrule + * ; + * + * Output: + * + * [ [ name=>ATIDENT? , text=>body? , rules=>rules? ]* ] + * + * @param string $baseSelectors Selector to prepend to all rules to + * enforce scoping. + * @param array $end An array of string representing tokens that can end + * the parse. Defaults to ending only at the end of the string. * @return array A tree describing the CSS rule blocks. - * - * Parses: - * anyrule : ATIDENT='@media' WS* TOKEN* '{' rules '}' - * | ATIDENT WS* TOKEN* ';' - * | ATIDENT WS* TOKEN* '{' decls '}' - * | rule - * ; - * rules : anyrule - * | rules anyrule - * ; - * - * Returns: - * [ [ name=>ATIDENT? , text=>body? , rules=>rules? ]* ] */ public function rules( $baseSelectors = '', $end = [ null ] ) { $atrules = []; $rules = []; $this->consumeWS(); - while ( !in_array( $this->peek( 0 ), $end ) ) { - if ( in_array( strtolower( $this->peek( 0 ) ), [ '@media' ] ) ) { + while ( !in_array( $this->peek(), $end ) ) { + if ( strtolower( $this->peek() ) === '@media' ) { $at = $this->consume()[0]; $this->consumeWS(); + $text = ''; - while ( !in_array( $this->peek( 0 ), ['{', ';', null] ) ) { + while ( !in_array( $this->peek(), [ '{', ';', null ] ) ) { $text .= $this->consume()[0]; } - if ( $this->peek( 0 ) == '{' ) { + + if ( $this->peek() === '{' ) { $this->consume(); $r = $this->rules( $baseSelectors, [ '}', null ] ); if ( $r ) { - $atrules[] = [ "name"=>$at, "text"=>$text, "rules"=>$r ]; + $atrules[] = [ + 'name' => $at, + 'text' => $text, + 'rules' => $r, + ]; } + } else { - $atrules[] = [ "name"=>$at, "text"=>$text ]; + $atrules[] = [ + 'name' => $at, + 'text' => $text, + ]; } - } elseif ( $this->peek( 0 )[0] == '@' ) { + } elseif ( $this->peek()[0] === '@' ) { $at = $this->consume()[0]; $text = ''; - while ( !in_array( $this->peek( 0 ), ['{', ';', null] ) ) { + while ( !in_array( $this->peek(), [ '{', ';', null ] ) ) { $text .= $this->consume()[0]; } - if ( $this->peek( 0 ) == '{' ) { + if ( $this->peek() === '{' ) { $this->consume(); $decl = $this->parseDecls(); if ( $decl ) { - $atrules[] = [ "name"=>$at, "text"=>$text, "rules"=>[ "selectors"=>'', "decls"=>$decl ] ]; + $atrules[] = [ + 'name' => $at, + 'text' => $text, + 'rules' => [ + 'selectors' => '', + 'decls' => $decl, + ], + ]; } } else { - $atrules[] = [ "name"=>$at, "text"=>$text ]; + $atrules[] = [ + 'name' => $at, + 'text' => $text, + ]; } - } elseif ( $this->peek( 0 ) == '}' ) { - + } elseif ( $this->peek() === '}' ) { $this->consume(); - } else { $rules[] = $this->parseRule( $baseSelectors ); } $this->consumeWS(); } if ( $rules ) { - $atrules[] = [ "name"=>'', "rules"=>$rules ]; + $atrules[] = [ + 'name' => '', + 'rules' => $rules, + ]; } $this->consumeWS(); - if ( $this->peek( 0 ) !== null ) { + if ( $this->peek() !== null ) { $this->consume(); } return $atrules; } - } -- To view, visit https://gerrit.wikimedia.org/r/284628 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I52594fd34646af53fc91ec470fcf1d0be9c2b156 Gerrit-PatchSet: 2 Gerrit-Project: mediawiki/extensions/TemplateStyles Gerrit-Branch: master Gerrit-Owner: BryanDavis <bda...@wikimedia.org> Gerrit-Reviewer: Jforrester <jforres...@wikimedia.org> Gerrit-Reviewer: coren <m...@uberbox.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits