Umherirrender has submitted this change and it was merged. Change subject: [Cargo] #cargo_query Fix issues with quotes and other parsing ......................................................................
[Cargo] #cargo_query Fix issues with quotes and other parsing CargoUtils.php ============== (1) Fix smartSplit so that parenthesis, separators and "the other quote" (single quote in a double quoted string or double quote in a single quoted string) inside a quoted string are not considered lexically. (2) Add 3 functions to CargoUtils to prepare regex expressions to match table names and field names, allowing for '$' as part of the identifier. CargoSQLQuery.php ================= (3) Change the quote elimination logic in validateValues so that "the other quote" inside a quoted string is not considered lexically. Earier double quotes were made into single quotes so if a double quote happened inside a single quoted string (or the other way around) things would get out of synch. (4) Eliminate quoted strings before all checks within validateValues, but for $limitStr -- i.e. $fieldsStr, $whereStr, $joinStr, $groupByStr, $havingStr and $orderByStr. (5) Fix sqlFunctionRegex within getAndValidateSQLFunctions to allow for multiple white spaces '\s' between the function name and the parenthesis. (6) Change setDescriptionsForFields, handleVirtualFields and addTablePrefixes use more resilent regex when mathing identifiers. (7) Bug T120583: Change the HOLDS check to match the field "book", but not the field "bookworm" when the field that allows for multiple values is "book". Makes the match case-insensitive since table and column names are usually not case sensitive. (8) Change the decomposition of table / field from explode('.') to a regex, adding resilience. (9) Fix the string literal identification in setDescriptionsForFields to non-greedy. (10) Add support for double quoted literal strings in setDescriptionsForFields. (11) Fix an issue where quoted strings were being scanned for function calls throwing and exception when an open parenthesis followed a word inside a quoted string. Change-Id: I56e9bee6237e1bfd66889bf9a63b2b3216ebd2d3 --- M CargoSQLQuery.php M CargoUtils.php 2 files changed, 198 insertions(+), 112 deletions(-) Approvals: Umherirrender: Verified; Looks good to me, approved Yaron Koren: Checked; Looks good to me, approved diff --git a/CargoSQLQuery.php b/CargoSQLQuery.php index b406a2f..69c30ea 100644 --- a/CargoSQLQuery.php +++ b/CargoSQLQuery.php @@ -95,6 +95,7 @@ '/\-\-/' => '--', '/#/' => '#', ); + // HTML-decode the string - this is necessary if the query // contains a call to {{PAGENAME}} and the page name has any // special characters, because {{PAGENAME]] unfortunately @@ -105,9 +106,12 @@ throw new MWException( "Error in \"where\" parameter: the string \"$displayString\" cannot be used within #cargo_query." ); } } - $simplifiedWhereStr = str_replace( array( '\"', "\'" ), '', $whereStr ); - $simplifiedWhereStr = preg_replace( '/"[^"]*"/', '', $simplifiedWhereStr ); - $simplifiedWhereStr = preg_replace( "/'[^']*'/", '', $simplifiedWhereStr ); + $noQuotesFieldsStr = CargoUtils::removeQuotedStrings( $fieldsStr ); + $noQuotesWhereStr = CargoUtils::removeQuotedStrings( $whereStr ); + $noQuotesJoinOnStr = CargoUtils::removeQuotedStrings( $joinOnStr ); + $noQuotesGroupByStr = CargoUtils::removeQuotedStrings( $groupByStr ); + $noQuotesHavingStr = CargoUtils::removeQuotedStrings( $havingStr ); + $noQuotesOrderByStr = CargoUtils::removeQuotedStrings( $orderByStr ); $regexps = array( '/\bselect\b/i' => 'SELECT', @@ -123,22 +127,22 @@ ); foreach ( $regexps as $regexp => $displayString ) { if ( preg_match( $regexp, $tablesStr ) || - preg_match( $regexp, $fieldsStr ) || - preg_match( $regexp, $simplifiedWhereStr ) || - preg_match( $regexp, $joinOnStr ) || - preg_match( $regexp, $groupByStr ) || - preg_match( $regexp, $havingStr ) || - preg_match( $regexp, $orderByStr ) || + preg_match( $regexp, $noQuotesFieldsStr ) || + preg_match( $regexp, $noQuotesWhereStr ) || + preg_match( $regexp, $noQuotesJoinOnStr ) || + preg_match( $regexp, $noQuotesGroupByStr ) || + preg_match( $regexp, $noQuotesHavingStr ) || + preg_match( $regexp, $noQuotesOrderByStr ) || preg_match( $regexp, $limitStr ) ) { throw new MWException( "Error: the string \"$displayString\" cannot be used within #cargo_query." ); } } - self::getAndValidateSQLFunctions( $simplifiedWhereStr ); - self::getAndValidateSQLFunctions( $joinOnStr ); - self::getAndValidateSQLFunctions( $groupByStr ); - self::getAndValidateSQLFunctions( $havingStr ); - self::getAndValidateSQLFunctions( $orderByStr ); + self::getAndValidateSQLFunctions( $noQuotesWhereStr ); + self::getAndValidateSQLFunctions( $noQuotesJoinOnStr ); + self::getAndValidateSQLFunctions( $noQuotesGroupByStr ); + self::getAndValidateSQLFunctions( $noQuotesHavingStr ); + self::getAndValidateSQLFunctions( $noQuotesOrderByStr ); self::getAndValidateSQLFunctions( $limitStr ); } @@ -340,7 +344,7 @@ global $wgCargoAllowedSQLFunctions; $sqlFunctionMatches = array(); - $sqlFunctionRegex = '/(\b|\W)(\w*?)\s?\(/'; + $sqlFunctionRegex = '/(\b|\W)(\w*?)\s*\(/'; preg_match_all( $sqlFunctionRegex, $str, $sqlFunctionMatches ); $sqlFunctions = array_map( 'strtoupper', $sqlFunctionMatches[2] ); // Throw an error if any of these functions @@ -376,23 +380,53 @@ $this->mFieldTables = array(); foreach ( $this->mAliasedFieldNames as $alias => $origFieldName ) { $tableName = null; - if ( strpos( $origFieldName, '.' ) !== false ) { - // This could probably be done better with - // regexps. - list( $tableName, $fieldName ) = explode( '.', $origFieldName, 2 ); - } else { - $fieldName = $origFieldName; - $tableName = null; - } + $fieldName = null; $description = new CargoFieldDescription(); + + $fieldPattern = '/^([\w$]+)([.]([\w$]+))?$/'; + $fieldPatternFound = preg_match( $fieldPattern, $origFieldName, $fieldPatternMatches ); + $stringPatternFound = false; + $hasFunctionCall = false; + + if ( $fieldPatternFound ) { + switch ( count( $fieldPatternMatches ) ) { + case 2: + $fieldName = $fieldPatternMatches[1]; + break; + case 4: + $tableName = $fieldPatternMatches[1]; + $fieldName = $fieldPatternMatches[3]; + break; + } + } else { + $stringPattern = '/^(([\'"]).*?\2)(.+)?$/'; + $stringPatternFound = preg_match( $stringPattern, $origFieldName, $stringPatternMatches ); + if ( $stringPatternFound ) { + // If the count is 3 we have a single quoted string + // If the count is 4 we have stuff after it + $stringPatternFound = count( $stringPatternMatches ) == 3; + } + + if ( ! $stringPatternFound ) { + $noQuotesOrigFieldName = CargoUtils::removeQuotedStrings( $origFieldName ); + + $functionCallPattern = '/\w\s*\(/'; + $hasFunctionCall = preg_match( $functionCallPattern, $noQuotesOrigFieldName ); + } + } + // If it's a pre-defined field, we probably know its // type. if ( $fieldName == '_ID' || $fieldName == '_rowID' || $fieldName == '_pageID' ) { $description->mType = 'Integer'; + } elseif ( $fieldName == '_pageTitle' ) { + // It's a string - do nothing. } elseif ( $fieldName == '_pageName' ) { $description->mType = 'Page'; - } elseif ( strpos( $origFieldName, '(' ) !== false ) { - $sqlFunctions = self::getAndValidateSQLFunctions( $origFieldName ); + } elseif ( $stringPatternFound ) { + // It's a quoted, literal string - do nothing. + } elseif ( $hasFunctionCall ) { + $sqlFunctions = self::getAndValidateSQLFunctions( $noQuotesOrigFieldName ); $firstFunction = $sqlFunctions[0]; if ( in_array( $firstFunction, array( 'COUNT', 'FLOOR', 'CEIL', 'ROUND' ) ) ) { $description->mType = 'Integer'; @@ -404,8 +438,6 @@ } // If it's anything else ('CONCAT', 'SUBSTRING', // etc. etc.), we don't have to do anything. - } elseif ( preg_match( "/^'.*'$/m", $fieldName ) ) { - // It's a quoted, literal string - do nothing. } else { // It's a standard field - though if it's // '_value', or ends in '__full', it's actually @@ -526,11 +558,6 @@ return false; } - function whereStringHasPattern( $searchPattern ) { - $searchPattern = "/(^|\s|\()$searchPattern\s/"; - return preg_match( $searchPattern, $this->mWhereStr, $matches ); - } - function handleVirtualFields() { // The array-field alias can be found in a number of different // clauses. Handling depends on which clause it is: @@ -565,61 +592,56 @@ foreach ( $virtualFields as $virtualField ) { $fieldName = $virtualField['fieldName']; $tableName = $virtualField['tableName']; + $foundLikeMatch1 = $foundMatch1 = $foundLikeMatch2 = $foundMatch2 = false; + $patternSuffix = '\s+(HOLDS)\s+(LIKE)?/i'; + $throwException = false; - // Is this field in the "where" string at all? If not, - // skip it. - $pattern1 = "$tableName\.$fieldName"; - $foundMatch1 = $this->whereStringHasPattern( $pattern1 ); - if ( !$foundMatch1 ) { - $pattern2 = $fieldName; - $foundMatch2 = $this->whereStringHasPattern( $pattern2 ); - if ( !$foundMatch2 ) { - continue; + $simplePattern1 = CargoUtils::getSQLTableAndFieldPattern( $tableName, $fieldName ); + if ( preg_match( $simplePattern1, $this->mWhereStr ) ) { + $likePattern1 = CargoUtils::getSQLTableAndFieldPattern( $tableName, $fieldName, false ). + $patternSuffix; + if ( preg_match( $likePattern1, $this->mWhereStr, $matches ) ) { + $foundMatch1 = count( $matches ) == 3; + $foundLikeMatch1 = count( $matches ) == 4; } + $throwException = $throwException || (! $foundMatch1 && ! $foundLikeMatch1); } - - $likePattern1 = "$tableName\.$fieldName\s*HOLDS\s*LIKE"; - $foundLikeMatch1 = $this->whereStringHasPattern( $likePattern1 ); - $foundLikeMatch2 = $foundMatch1 = $foundMatch2 = false; - if ( !$foundLikeMatch1 ) { - $likePattern2 = "$fieldName\s*HOLDS\s*LIKE"; - $foundLikeMatch2 = $this->whereStringHasPattern( $likePattern2 ); - } - - if ( !$foundLikeMatch1 && !$foundLikeMatch2 ) { - $holdsPattern1 = "$tableName\.$fieldName\s*HOLDS"; - $foundHoldsMatch1 = $this->whereStringHasPattern( $holdsPattern1 ); - if ( !$foundHoldsMatch1 ) { - $holdsPattern2 = "$fieldName\s*HOLDS"; - $foundHoldsMatch2 = $this->whereStringHasPattern( $holdsPattern2 ); + $simplePattern2 = CargoUtils::getSQLFieldPattern( $fieldName ); + if ( preg_match( $simplePattern2, $this->mWhereStr ) ) { + $likePattern2 = CargoUtils::getSQLFieldPattern( $fieldName, false ). + $patternSuffix; + if ( preg_match( $likePattern2, $this->mWhereStr, $matches ) ) { + $foundMatch2 = count( $matches ) == 3; + $foundLikeMatch2 = count( $matches ) == 4; } + $throwException = $throwException || (! $foundMatch2 && ! $foundLikeMatch2); + } + if ( $throwException ) { + throw new MWException( "Error: operator for the virtual field '" . + "$tableName.$fieldName' must be 'HOLDS' or 'HOLDS LIKE'." ); } - // If no "HOLDS", throw an error. - if ( !$foundLikeMatch1 && !$foundLikeMatch2 && !$foundHoldsMatch1 && !$foundHoldsMatch2 ) { - throw new MWException( "Error: operator for the virtual field '" - . "$tableName.$fieldName' must be 'HOLDS' or 'HOLDS LIKE'." ); - } - - $fieldTableName = $tableName . '__' . $fieldName; - $this->addFieldTableToTableNames( $fieldTableName, $tableName ); - $this->mCargoJoinConds[] = array( - 'joinType' => 'LEFT OUTER JOIN', - 'table1' => $tableName, - 'field1' => '_ID', - 'table2' => $fieldTableName, - 'field2' => '_rowID' - ); - if ( $foundLikeMatch1 ) { - $this->mWhereStr = preg_replace( "/$likePattern1/", "$fieldTableName._value LIKE", - $this->mWhereStr ); - } elseif ( $foundLikeMatch2 ) { - $this->mWhereStr = preg_replace( "/$likePattern2/", "$fieldTableName._value LIKE", - $this->mWhereStr ); - } elseif ( $foundHoldsMatch1 ) { - $this->mWhereStr = preg_replace( "/$holdsPattern1/", "$fieldTableName._value=", $this->mWhereStr ); - } elseif ( $foundHoldsMatch2 ) { - $this->mWhereStr = preg_replace( "/$holdsPattern2/", "$fieldTableName._value=", $this->mWhereStr ); + if ( $foundLikeMatch1 || $foundLikeMatch2 || $foundMatch1 || $foundMatch2 ) { + $fieldTableName = $tableName . '__' . $fieldName; + $this->addFieldTableToTableNames( $fieldTableName, $tableName ); + $this->mCargoJoinConds[] = array( + 'joinType' => 'LEFT OUTER JOIN', + 'table1' => $tableName, + 'field1' => '_ID', + 'table2' => $fieldTableName, + 'field2' => '_rowID' + ); + if ( $foundLikeMatch1 ) { + $this->mWhereStr = preg_replace( $likePattern1, "$fieldTableName._value LIKE ", + $this->mWhereStr ); + } elseif ( $foundLikeMatch2 ) { + $this->mWhereStr = preg_replace( $likePattern2, "$fieldTableName._value LIKE ", + $this->mWhereStr ); + } elseif ( $foundMatch1 ) { + $this->mWhereStr = preg_replace( $likePattern1, "$fieldTableName._value=", $this->mWhereStr ); + } elseif ( $foundMatch2 ) { + $this->mWhereStr = preg_replace( $likePattern2, "$fieldTableName._value=", $this->mWhereStr ); + } } } @@ -672,9 +694,9 @@ foreach ( $virtualFields as $virtualField ) { $fieldName = $virtualField['fieldName']; $tableName = $virtualField['tableName']; - $pattern1 = "/(^|\s)$tableName\.$fieldName($|\s)/"; + $pattern1 = CargoUtils::getSQLTableAndFieldPattern( $tableName, $fieldName ); $foundMatch1 = preg_match( $pattern1, $this->mGroupByStr, $matches ); - $pattern2 = "/(^|\s)$fieldName($|\s)/"; + $pattern2 = CargoUtils::getSQLFieldPattern( $fieldName ); $foundMatch2 = false; if ( !$foundMatch1 ) { @@ -746,9 +768,9 @@ foreach ( $virtualFields as $virtualField ) { $fieldName = $virtualField['fieldName']; $tableName = $virtualField['tableName']; - $pattern1 = "/(^|\s)$tableName\.$fieldName($|\s)/"; + $pattern1 = CargoUtils::getSQLTableAndFieldPattern( $tableName, $fieldName ); $foundMatch1 = preg_match( $pattern1, $this->mOrderByStr, $matches ); - $pattern2 = "/(^|\s)$fieldName($|\s)/"; + $pattern2 = CargoUtils::getSQLFieldPattern( $fieldName ); $foundMatch2 = false; if ( !$foundMatch1 ) { @@ -842,19 +864,21 @@ foreach ( $coordinateFields as $coordinateField ) { $fieldName = $coordinateField['fieldName']; $tableName = $coordinateField['tableName']; - $pattern1 = "/(^|\s|\()$tableName\.$fieldName(\s*NEAR\s*)\(([^)]*)\)/"; + $patternSuffix = '(\s+NEAR\s*)\(([^)]*)\)/i'; + + $pattern1 = CargoUtils::getSQLTableAndFieldPattern( $tableName, $fieldName, false ) . $patternSuffix; $foundMatch1 = preg_match( $pattern1, $this->mWhereStr, $matches ); if ( !$foundMatch1 ) { - $pattern2 = "/(^|\s|\()$fieldName(\s*NEAR\s*)\(([^)]*)\)/"; + $pattern2 = CargoUtils::getSQLFieldPattern( $fieldName, false ) . $patternSuffix; $foundMatch2 = preg_match( $pattern2, $this->mWhereStr, $matches ); } if ( $foundMatch1 || $foundMatch2 ) { // If no "NEAR", throw an error. - if ( count( $matches ) != 3 ) { + if ( count( $matches ) != 4 ) { throw new MWException( "Error: operator for the virtual coordinates field " . "'$tableName.$fieldName' must be 'NEAR'." ); } - $coordinatesAndDistance = explode( ',', $matches[2] ); + $coordinatesAndDistance = explode( ',', $matches[3] ); if ( count( $coordinatesAndDistance ) != 3 ) { throw new MWException( "Error: value for the 'NEAR' operator must be of the form " . "\"(latitude, longitude, distance)\"." ); @@ -1034,10 +1058,7 @@ $tableNamePatterns = array(); $tableNameReplacements = array(); foreach ( $this->mTableNames as $tableName ) { - // Is there a way to do this with just one regexp? - $tableNamePatterns[] = "/^$tableName\./"; - $tableNamePatterns[] = "/(\W)$tableName\./"; - $tableNameReplacements[] = $cdb->tableName( $tableName ) . "."; + $tableNamePatterns[] = CargoUtils::getSQLTablePattern($tableName); $tableNameReplacements[] = "$1" . $cdb->tableName( $tableName ) . "."; } diff --git a/CargoUtils.php b/CargoUtils.php index 8799fa9..0ecd191 100644 --- a/CargoUtils.php +++ b/CargoUtils.php @@ -188,8 +188,10 @@ } /** - * Splits a string by the delimiter, but ignores delimiters contained - * within parentheses. + * Splits a string by the delimiter, but ensures that parenthesis, separators + * and "the other quote" (single quote in a double quoted string or double + * quote in a single quoted string) inside a quoted string are not considered + * lexically. */ static function smartSplit( $delimiter, $string ) { if ( $string == '' ) { @@ -199,8 +201,6 @@ $ignoreNextChar = false; $returnValues = array(); $numOpenParentheses = 0; - $numOpenSingleQuotes = 0; - $numOpenDoubleQuotes = 0; $curReturnValue = ''; for ( $i = 0; $i < strlen( $string ); $i++ ) { @@ -216,26 +216,18 @@ $numOpenParentheses++; } elseif ( $curChar == ')' ) { $numOpenParentheses--; - } elseif ( $curChar == '\'' ) { - if ( $numOpenSingleQuotes == 0 ) { - $numOpenSingleQuotes = 1; - } else { - $numOpenSingleQuotes = 0; + } elseif ( $curChar == '\'' || $curChar == '"' ) { + $pos = CargoUtils::findQuotedStringEnd($string, $curChar, $i + 1 ); + if ( $pos === false ) { + throw new MWException( "Error: unmatched quote in SQL string constant." ); } - } elseif ( $curChar == '"' ) { - if ( $numOpenDoubleQuotes == 0 ) { - $numOpenDoubleQuotes = 1; - } else { - $numOpenDoubleQuotes = 0; - } + $curReturnValue .= substr( $string, $i, $pos - $i ); + $i = $pos; } elseif ( $curChar == '\\' ) { $ignoreNextChar = true; } - if ( $curChar == $delimiter && - $numOpenParentheses == 0 && - $numOpenSingleQuotes == 0 && - $numOpenDoubleQuotes == 0 ) { + if ( $curChar == $delimiter && $numOpenParentheses == 0 ) { $returnValues[] = trim( $curReturnValue ); $curReturnValue = ''; } else { @@ -244,10 +236,83 @@ } $returnValues[] = trim( $curReturnValue ); + if ( $ignoreNextChar ) { + throw new MWException( "Error: incomplete escape sequence." ); + } + return $returnValues; } /** + * find the end of a quoted string + */ + public static function findQuotedStringEnd( $string, $quoteChar, $pos ) { + $ignoreNextChar = false; + for ( $i = $pos ; $i < strlen($string) ; $i++ ) { + $curChar = $string{$i}; + if ( $ignoreNextChar ) { + $ignoreNextChar = false; + } elseif ( $curChar == $quoteChar ) { + if ( $i + 1 < strlen($string) && $string{$i + 1} == $quoteChar ) { + $i++; + } else { + return $i; + } + } elseif ( $curChar == '\\' ) { + $ignoreNextChar = true; + } + } + if ( $ignoreNextChar ) { + throw new MWException( "Error: incomplete escape sequence." ); + } + return false; + } + + /** + * Delete text within quotes and raise exception if a quoted string + * is not closed. + */ + public static function removeQuotedStrings( $string ) { + $noQuotesPattern = '/("|\')([^\\1\\\\]|\\\\.)*?\\1/'; + $string = preg_replace( $noQuotesPattern, '', $string ); + if ( strpos($string,'"') !== false || strpos($string,"'") !== false ) { + throw new MWException( "Error: unclosed string literal." ); + } + return $string; + } + + /** + * Generates a Regular Expression to match $fieldName in a SQL string. + * Allows for $ as valid identifier character. + */ + public static function getSQLFieldPattern( $fieldName, $closePattern = true ) { + $fieldName = str_replace( '$', '\$', $fieldName ); + $pattern = '/([^\w$.]|^)'.$fieldName; + return $pattern . ( $closePattern ? '([^\w$]|$)/i' : '' ); + } + + /** + * Generates a Regular Expression to match $tableName.$fieldName in a + * SQL string. Allows for $ as valid identifier character. + */ + public static function getSQLTableAndFieldPattern( $tableName, $fieldName, $closePattern = true ) { + $fieldName = str_replace( '$', '\$', $fieldName ); + $tableName = str_replace( '$', '\$', $tableName ); + $pattern = '/([^\w$]|^)'.$tableName.'\.'.$fieldName; + return $pattern . ( $closePattern ? '([^\w$]|$)/i' : '' ); + } + + /** + * Generates a Regular Expression to match $tableName in a SQL string. + * Allows for $ as valid identifier character. + */ + public static function getSQLTablePattern( $tableName, $closePattern = true ) { + $tableName = str_replace( '$', '\$', $tableName ); + $pattern = '/([^\w$]|^)'.$tableName.'\.'; + return $pattern . ( $closePattern ? '/i' : '' ); + } + + /** * Parse a piece of wikitext differently depending on whether * we're in a special or regular page. * -- To view, visit https://gerrit.wikimedia.org/r/258753 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I56e9bee6237e1bfd66889bf9a63b2b3216ebd2d3 Gerrit-PatchSet: 6 Gerrit-Project: mediawiki/extensions/Cargo Gerrit-Branch: master Gerrit-Owner: Ed Hoo <edward....@gmail.com> Gerrit-Reviewer: Ed Hoo <edward....@gmail.com> Gerrit-Reviewer: Umherirrender <umherirrender_de...@web.de> Gerrit-Reviewer: Yaron Koren <yaro...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits