Re: [PHP] Re: [new version] Re: [a proactive example of learning by hacking] Re: [PHP] Getting queries from files FYI
Robin Vickery wrote: This is another rather hackish attempt at using the tokeniser. you beast ;-) apart from the fact that I don't use mysql this is really cool. I have no time to play right now (gotta catcha plana) but I have it on my to do list of things I need to take apart and understand properly. thanks! It turns this: ?php $emu = my_column {$banana}; $wallaby = 'my_table'; $kookaburra = 'SELECT * FROM'; $kookaburra .= $wallaby; $koala = 'ASC'; $taipan = ' ORDER BY' . $emu; $dropBear = 'group by something'; mysql_query($kookaburra WHERE (up = 'down') $taipan $koala . strtoupper($dropBear)); ? into this: SELECT * FROM my_table WHERE (up = 'down') ORDER BY my_column $banana ASC strtoupper( group by something) Which isn't perfect by a long shot, but I'm away home now so it'll have to do. I'd be interested to know what it makes of your queries. -robin ?php //looks at all $dir/*.php files. $dir = '/path/to/php/files'; foreach (getPhpFileList($dir) as $file) { print = $file =\n; $token = tokeniseFile($file); // first find all calls to mysql_query() $mysqlCalls = getMysqlQueryCalls($token); foreach($mysqlCalls as $range) { $sql = resolveExpression($token, $range[0], $range[0], $range[1]); echo trim(preg_replace('/\s+/', ' ', $sql)), \n; // tidy it a little } } function getMysqlQueryCalls($token) { $callList = array(); for ($i=0; isset($token[$i]); $i++) { if (!is_array($token[$i])) continue; // not interested in atomic tokens if (strtolower($token[$i][1]) !== 'mysql_query') continue; $args = getMysqlQueryArguments($i, $token); if ($args !== false) $call[] = $args; } return $call; } function nameTokens($token, $start = 0, $end = null) { if (is_null($end)) $end = sizeof($token); $range = array_slice($token, $start, $end - $start); foreach ($range as $key = $tok) { if (is_array($tok)) $range[$key][0] = token_name($range[$key][0]); } return $range; } function resolveToken($token, $cursor, $i) { if(is_array($token[$i])) { switch ($token[$i][0]) { case T_DOLLAR_OPEN_CURLY_BRACES: case T_CURLY_OPEN: break; case T_STRING: case T_WHITESPACE: case T_ENCAPSED_AND_WHITESPACE: return $token[$i][1]; case T_CONSTANT_ENCAPSED_STRING: return eval( return {$token[$i][1]};); case T_VARIABLE: case T_STRING_VARNAME: $def = findLastDefinition($token, $cursor, $i); if ($def === false) { // can't find anything else to replace $var with // presume it's defined elsewhere or we're not clever // enough to find it. return preg_replace('/^\$*/', '$', $token[$i][1]); } if ($def[3] == '.=') return resolveToken($token, $def[0], $i) . resolveExpression($token, $def[0], $def[1], $def[2]); return resolveExpression($token, $def[0], $def[1], $def[2]); default: return '('.token_name($token[$i][0]) . ':' . $token[$i][1] . ')'; } } else { switch ($token[$i]) { case '(': case ')': return $token[$i]; default: return ''; } } } function ResolveExpression($token, $cursor, $start, $end) { $output = ''; // just try and resolve all the tokens in the expression, concat them and throw them back. for ( $i=$start; $i$end; $i++) { $output .= resolveToken($token, $cursor, $i); } return $output; } function findLastDefinition($token, $i, $id) { // make sure all variables are in the form $name as ${name} ones are // just 'name' by the time they end up here. $name = preg_replace('/^\$*/', '$', $token[$id][1]); // rewind until we hit an assignment or run out of tokens while (isset($token[--$i])) { // if we catch an assignment and our $name is to the left then capture the right. if (($token[$i] == '=' || (is_array($token[$i]) $token[$i][0] == T_CONCAT_EQUAL)) getLHS($token, $i) == $name) { $RHS = getRHS($token, $i); $RHS[] = is_array($token[$i]) ? $token[$i][1] : $token[$i]; return $RHS; } } // we've run out of tokens, so seems like we can't find where this variable was defined. return false; } function getLHS($token, $i) { // rewind until we hit an variable name or run out of tokens while (isset($token[--$i])) { if (is_array($token[$i]) $token[$i][0] == T_VARIABLE) return $token[$i][1]; } // run out of tokens, we can't get a left hand side. return false; } function getRHS($token, $i) { // save the cursor at the assignment operator, so if $name is referred to on // the RHS, and we have to look for it again, we don't look at this bit. $cursor = $i; // fast forward until we get to a ';' or run out of tokens. while (isset($token[++$i]) $token[$i] != ';') { if (!isset($start)) $start = $i; } // if we've run out of tokens or the RHS is empty then give up. if (!isset($start) || !isset($token[$i])) return false; return array($cursor, $start, $i); } // just stick all the tokens
RE: [PHP] Re: [new version] Re: [a proactive example of learning by hacking] Re: [PHP] Getting queries from files FYI
[top-snip] nice! I'll give it a look this morning [/snip] This is another rather hackish attempt at using the tokeniser. It turns this: ?php $emu = my_column {$banana}; $wallaby = 'my_table'; $kookaburra = 'SELECT * FROM'; $kookaburra .= $wallaby; $koala = 'ASC'; $taipan = ' ORDER BY' . $emu; $dropBear = 'group by something'; mysql_query($kookaburra WHERE (up = 'down') $taipan $koala . strtoupper($dropBear)); ? into this: SELECT * FROM my_table WHERE (up = 'down') ORDER BY my_column $banana ASC strtoupper( group by something) Which isn't perfect by a long shot, but I'm away home now so it'll have to do. I'd be interested to know what it makes of your queries. -robin ?php //looks at all $dir/*.php files. $dir = '/path/to/php/files'; foreach (getPhpFileList($dir) as $file) { print = $file =\n; $token = tokeniseFile($file); // first find all calls to mysql_query() $mysqlCalls = getMysqlQueryCalls($token); foreach($mysqlCalls as $range) { $sql = resolveExpression($token, $range[0], $range[0], $range[1]); echo trim(preg_replace('/\s+/', ' ', $sql)), \n; // tidy it a little } } function getMysqlQueryCalls($token) { $callList = array(); for ($i=0; isset($token[$i]); $i++) { if (!is_array($token[$i])) continue; // not interested in atomic tokens if (strtolower($token[$i][1]) !== 'mysql_query') continue; $args = getMysqlQueryArguments($i, $token); if ($args !== false) $call[] = $args; } return $call; } function nameTokens($token, $start = 0, $end = null) { if (is_null($end)) $end = sizeof($token); $range = array_slice($token, $start, $end - $start); foreach ($range as $key = $tok) { if (is_array($tok)) $range[$key][0] = token_name($range[$key][0]); } return $range; } function resolveToken($token, $cursor, $i) { if(is_array($token[$i])) { switch ($token[$i][0]) { case T_DOLLAR_OPEN_CURLY_BRACES: case T_CURLY_OPEN: break; case T_STRING: case T_WHITESPACE: case T_ENCAPSED_AND_WHITESPACE: return $token[$i][1]; case T_CONSTANT_ENCAPSED_STRING: return eval( return {$token[$i][1]};); case T_VARIABLE: case T_STRING_VARNAME: $def = findLastDefinition($token, $cursor, $i); if ($def === false) { // can't find anything else to replace $var with // presume it's defined elsewhere or we're not clever // enough to find it. return preg_replace('/^\$*/', '$', $token[$i][1]); } if ($def[3] == '.=') return resolveToken($token, $def[0], $i) . resolveExpression($token, $def[0], $def[1], $def[2]); return resolveExpression($token, $def[0], $def[1], $def[2]); default: return '('.token_name($token[$i][0]) . ':' . $token[$i][1] . ')'; } } else { switch ($token[$i]) { case '(': case ')': return $token[$i]; default: return ''; } } } function ResolveExpression($token, $cursor, $start, $end) { $output = ''; // just try and resolve all the tokens in the expression, concat them and throw them back. for ( $i=$start; $i$end; $i++) { $output .= resolveToken($token, $cursor, $i); } return $output; } function findLastDefinition($token, $i, $id) { // make sure all variables are in the form $name as ${name} ones are // just 'name' by the time they end up here. $name = preg_replace('/^\$*/', '$', $token[$id][1]); // rewind until we hit an assignment or run out of tokens while (isset($token[--$i])) { // if we catch an assignment and our $name is to the left then capture the right. if (($token[$i] == '=' || (is_array($token[$i]) $token[$i][0] == T_CONCAT_EQUAL)) getLHS($token, $i) == $name) { $RHS = getRHS($token, $i); $RHS[] = is_array($token[$i]) ? $token[$i][1] : $token[$i]; return $RHS; } } // we've run out of tokens, so seems like we can't find where this variable was defined. return false; } function getLHS($token, $i) { // rewind until we hit an variable name or run out of tokens while (isset($token[--$i])) { if (is_array($token[$i]) $token[$i][0] == T_VARIABLE) return $token[$i][1]; } // run out of tokens, we can't get a left hand side. return false; } function getRHS($token, $i) { // save the cursor at the assignment operator, so if $name is referred to on // the RHS, and we have to look for it again, we don't look at this bit. $cursor = $i; // fast forward until we get to a ';' or run out of tokens. while (isset($token[++$i]) $token[$i] != ';') { if (!isset($start)) $start = $i; } // if we've run out of tokens or the RHS is empty then give up. if (!isset($start) || !isset($token[$i])) return false; return array($cursor, $start, $i); } // just stick all the tokens together to see what's going on. function renderTokens($token, $start, $end) { $output = ''; for ( $i=$start; $i$end; $i++) { $output .= is_array($token[$i]) ? $token[$i][1] : $token[$i]; }
Re: [PHP] Re: [new version] Re: [a proactive example of learning by hacking] Re: [PHP] Getting queries from files FYI
This is another rather hackish attempt at using the tokeniser. It turns this: ?php $emu = my_column {$banana}; $wallaby = 'my_table'; $kookaburra = 'SELECT * FROM'; $kookaburra .= $wallaby; $koala = 'ASC'; $taipan = ' ORDER BY' . $emu; $dropBear = 'group by something'; mysql_query($kookaburra WHERE (up = 'down') $taipan $koala . strtoupper($dropBear)); ? into this: SELECT * FROM my_table WHERE (up = 'down') ORDER BY my_column $banana ASC strtoupper( group by something) Which isn't perfect by a long shot, but I'm away home now so it'll have to do. I'd be interested to know what it makes of your queries. -robin ?php //looks at all $dir/*.php files. $dir = '/path/to/php/files'; foreach (getPhpFileList($dir) as $file) { print = $file =\n; $token = tokeniseFile($file); // first find all calls to mysql_query() $mysqlCalls = getMysqlQueryCalls($token); foreach($mysqlCalls as $range) { $sql = resolveExpression($token, $range[0], $range[0], $range[1]); echo trim(preg_replace('/\s+/', ' ', $sql)), \n; // tidy it a little } } function getMysqlQueryCalls($token) { $callList = array(); for ($i=0; isset($token[$i]); $i++) { if (!is_array($token[$i])) continue; // not interested in atomic tokens if (strtolower($token[$i][1]) !== 'mysql_query') continue; $args = getMysqlQueryArguments($i, $token); if ($args !== false) $call[] = $args; } return $call; } function nameTokens($token, $start = 0, $end = null) { if (is_null($end)) $end = sizeof($token); $range = array_slice($token, $start, $end - $start); foreach ($range as $key = $tok) { if (is_array($tok)) $range[$key][0] = token_name($range[$key][0]); } return $range; } function resolveToken($token, $cursor, $i) { if(is_array($token[$i])) { switch ($token[$i][0]) { case T_DOLLAR_OPEN_CURLY_BRACES: case T_CURLY_OPEN: break; case T_STRING: case T_WHITESPACE: case T_ENCAPSED_AND_WHITESPACE: return $token[$i][1]; case T_CONSTANT_ENCAPSED_STRING: return eval( return {$token[$i][1]};); case T_VARIABLE: case T_STRING_VARNAME: $def = findLastDefinition($token, $cursor, $i); if ($def === false) { // can't find anything else to replace $var with // presume it's defined elsewhere or we're not clever // enough to find it. return preg_replace('/^\$*/', '$', $token[$i][1]); } if ($def[3] == '.=') return resolveToken($token, $def[0], $i) . resolveExpression($token, $def[0], $def[1], $def[2]); return resolveExpression($token, $def[0], $def[1], $def[2]); default: return '('.token_name($token[$i][0]) . ':' . $token[$i][1] . ')'; } } else { switch ($token[$i]) { case '(': case ')': return $token[$i]; default: return ''; } } } function ResolveExpression($token, $cursor, $start, $end) { $output = ''; // just try and resolve all the tokens in the expression, concat them and throw them back. for ( $i=$start; $i$end; $i++) { $output .= resolveToken($token, $cursor, $i); } return $output; } function findLastDefinition($token, $i, $id) { // make sure all variables are in the form $name as ${name} ones are // just 'name' by the time they end up here. $name = preg_replace('/^\$*/', '$', $token[$id][1]); // rewind until we hit an assignment or run out of tokens while (isset($token[--$i])) { // if we catch an assignment and our $name is to the left then capture the right. if (($token[$i] == '=' || (is_array($token[$i]) $token[$i][0] == T_CONCAT_EQUAL)) getLHS($token, $i) == $name) { $RHS = getRHS($token, $i); $RHS[] = is_array($token[$i]) ? $token[$i][1] : $token[$i]; return $RHS; } } // we've run out of tokens, so seems like we can't find where this variable was defined. return false; } function getLHS($token, $i) { // rewind until we hit an variable name or run out of tokens while (isset($token[--$i])) { if (is_array($token[$i]) $token[$i][0] == T_VARIABLE) return $token[$i][1]; } // run out of tokens, we can't get a left hand side. return false; } function getRHS($token, $i) { // save the cursor at the assignment operator, so if $name is referred to on // the RHS, and we have to look for it again, we don't look at this bit. $cursor = $i; // fast forward until we get to a ';' or run out of tokens. while (isset($token[++$i]) $token[$i] != ';') { if (!isset($start)) $start = $i; } // if we've run out of tokens or the RHS is empty then give up. if (!isset($start) || !isset($token[$i])) return false; return array($cursor, $start, $i); } // just stick all the tokens together to see what's going on. function renderTokens($token, $start, $end) { $output = ''; for ( $i=$start; $i$end; $i++) { $output .= is_array($token[$i]) ? $token[$i][1] : $token[$i]; } return $output; } function getMysqlQueryArguments($i,
[PHP] RE: [new version] Re: [a proactive example of learning by hacking] Re: [PHP] Getting queries from files FYI
[snip] here is a new and improved version: ?php /* * Query Finder [/snip] Sweet, I'll give it a run here in a few minutes. -- PHP General Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php
[PHP] RE: [new version] Re: [a proactive example of learning by hacking] Re: [PHP] Getting queries from files FYI
[top-snip, because it makes sense in the context] On line 98 I changed from echo Line . str_pad($lineNo, 4, ' ', PAD_LEFT) . : . $fileLine; to echo Line . str_pad($lineNo, 4, ' ', PAD_LEFT) . : . ltrim($fileLine); to account for indented code, lines everything up neatly. Also, it would be cool to be able to specify that the entire query for a filtered item would show up. Therefore if I did; php queryFinder.php -nr -F FROM MyDatabase I would get queries even if they were spread across multiple lines. I will work on that today. [/top-snip] ?php /* * Query Finder * Jay Blanchard, Jochem Maas * August 2005 * NOT REALLY TOO EXTENSIBLE * * usage:call from command line, perform manual output to text file * i.e. php qryfind.php nameOfFileToSave.txt */ /* script help message */ if (in_array('--help', $_SERVER['argv']) || in_array('-h', $_SERVER['argv']) || in_array('-?', $_SERVER['argv'])) { echo ' Use this command to search for SQL query strings inside php files. call from command line, perform manual output to text file i.e. php '.basename(__FILE__).' nameOfFileToSave.txt arguments: -nr no recursion - do not search in subdirectories -f filtering - only show lines that contain the string passed as the arg to this flag --filtersame as -f -v show more output, e.g. prints every scanned file instead of only the ones in whichs query strings were found. --verbose same as -v show this message: -?, -h or --help '; exit; } /* cruise the directory looking for PHP files */ function findTheQueries($theDirectory, $filterString = null, $beVerbose = false, $NoRec = false) { static $arrQueryStarters, $arrQueryStartersCnt, $dirSep; if (!isset($arrQueryStarters)) { $arrQueryStarters = array('SELECT ', 'INSERT ', 'UPDATE ', 'FROM ', 'EXECUTE ', 'WHERE ', 'ORDER BY ', 'LEFT JOIN '); $arrQueryStartersCnt= count($arrQueryStarters); // Determine OS specific settings $uname = php_uname(); if (substr($uname, 0, 7) == Windows) { $dirSep = \\; } else if (substr($uname, 0, 3) == Mac) { $dirSep = /; } else { $dirSep = /; } } if (is_dir($theDirectory)) { /* * or you could just use glob('*.php') */ if ($dh = opendir($theDirectory)) { while (($theFile = readdir($dh)) !== false) { /* recurse subdirs */ if (is_dir($theDirectory.$dirSep.$theFile)) { if ($theFile != '.' $theFile != '..' !$NoRec) { findTheQueries($theDirectory.$dirSep.$theFile, $filterString, $beVerbose); } continue; } /* we only want to look at PHP files */ $fileParts = array_reverse(explode('.', $theFile)); if(php == $fileParts[0]){ /* always echo the file name, even if no queries */ $fileNameOutputLine = Filename: {$theDirectory}{$dirSep}{$theFile}\n; if ($beVerbose) { echo $fileNameOutputLine; unset($fileNameOutputLine); } $lineNo = 0; /* cruise the file looking for queries */ $openFile = fopen($theDirectory.$dirSep.$theFile, r); while(!feof($openFile)){ $fileLine = fgets($openFile, 4096); $lineNo++; /* loop through query starter array */ for($i = 0; $i $arrQueryStartersCnt; $i++){ if(strstr($fileLine, $arrQueryStarters[$i])) { if (!empty($filterString) !strstr($fileLine, $filterString)) { continue; } if (isset($fileNameOutputLine)) { echo $fileNameOutputLine; unset($fileNameOutputLine); } echo Line . str_pad($lineNo, 4, ' ', PAD_LEFT) . : . ltrim($fileLine); break; // if we find a line no need to find it again because it contains more than one keyword. } } } fclose($openFile); } } closedir($dh); } else { echo Could not open: $theDirectory\n; } } else { echo Bad directory: $theDirectory\n; } } /* * Determine command args */ $filterString = null; $beVerbose = false; $NoRec = false; foreach ($_SERVER['argv'] as $k = $v) { /* determine
[PHP] Re: [new version] Re: [a proactive example of learning by hacking] Re: [PHP] Getting queries from files FYI
Jay Blanchard wrote: [top-snip, because it makes sense in the context] On line 98 I changed from echo Line . str_pad($lineNo, 4, ' ', PAD_LEFT) . : . $fileLine; to echo Line . str_pad($lineNo, 4, ' ', PAD_LEFT) . : . ltrim($fileLine); to account for indented code, lines everything up neatly. Also, it would be cool to be able to specify that the entire query for a filtered item would show up. Therefore if I did; php queryFinder.php -nr -F FROM MyDatabase -f I would get queries even if they were spread across multiple lines. I will work on that today. this should work like this regardless of any extra filter given (obviously the filter should also be taken into account if set) this seems hard - I wanted to make it do this also... I had a think about it but I just couldn't see a simple way of doing it... how does the code 'know' one string belongs to another... I figurede it's impossible to do properly unless you start searching/matching/analysing the tokenized code... which is when I decided to do something different ;-) interested to see where you go with this! [/top-snip] ?php /* * Query Finder * Jay Blanchard, Jochem Maas * August 2005 * NOT REALLY TOO EXTENSIBLE * * usage:call from command line, perform manual output to text file * i.e. php qryfind.php nameOfFileToSave.txt */ /* script help message */ if (in_array('--help', $_SERVER['argv']) || in_array('-h', $_SERVER['argv']) || in_array('-?', $_SERVER['argv'])) { echo ' Use this command to search for SQL query strings inside php files. call from command line, perform manual output to text file i.e. php '.basename(__FILE__).' nameOfFileToSave.txt arguments: -nr no recursion - do not search in subdirectories -f filtering - only show lines that contain the string passed as the arg to this flag --filtersame as -f -v show more output, e.g. prints every scanned file instead of only the ones in whichs query strings were found. --verbose same as -v show this message: -?, -h or --help '; exit; } /* cruise the directory looking for PHP files */ function findTheQueries($theDirectory, $filterString = null, $beVerbose = false, $NoRec = false) { static $arrQueryStarters, $arrQueryStartersCnt, $dirSep; if (!isset($arrQueryStarters)) { $arrQueryStarters = array('SELECT ', 'INSERT ', 'UPDATE ', 'FROM ', 'EXECUTE ', 'WHERE ', 'ORDER BY ', 'LEFT JOIN '); $arrQueryStartersCnt= count($arrQueryStarters); // Determine OS specific settings $uname = php_uname(); if (substr($uname, 0, 7) == Windows) { $dirSep = \\; } else if (substr($uname, 0, 3) == Mac) { $dirSep = /; } else { $dirSep = /; } } if (is_dir($theDirectory)) { /* * or you could just use glob('*.php') */ if ($dh = opendir($theDirectory)) { while (($theFile = readdir($dh)) !== false) { /* recurse subdirs */ if (is_dir($theDirectory.$dirSep.$theFile)) { if ($theFile != '.' $theFile != '..' !$NoRec) { findTheQueries($theDirectory.$dirSep.$theFile, $filterString, $beVerbose); } continue; } /* we only want to look at PHP files */ $fileParts = array_reverse(explode('.', $theFile)); if(php == $fileParts[0]){ /* always echo the file name, even if no queries */ $fileNameOutputLine = Filename: {$theDirectory}{$dirSep}{$theFile}\n; if ($beVerbose) { echo $fileNameOutputLine; unset($fileNameOutputLine); } $lineNo = 0; /* cruise the file looking for queries */ $openFile = fopen($theDirectory.$dirSep.$theFile, r); while(!feof($openFile)){ $fileLine = fgets($openFile, 4096); $lineNo++; /* loop through query starter array */ for($i = 0; $i $arrQueryStartersCnt; $i++){ if(strstr($fileLine, $arrQueryStarters[$i])) { if (!empty($filterString) !strstr($fileLine, $filterString)) { continue; } if (isset($fileNameOutputLine)) { echo $fileNameOutputLine; unset($fileNameOutputLine); } echo Line . str_pad($lineNo, 4, ' ', PAD_LEFT) . : . ltrim($fileLine); break; // if we find a line no need to