Re: [PHP] Re: [new version] Re: [a proactive example of learning by hacking] Re: [PHP] Getting queries from files FYI
Robin Vickery wrote: This is another rather hackish attempt at using the tokeniser. you beast ;-) apart from the fact that I don't use mysql this is really cool. I have no time to play right now (gotta catcha plana) but I have it on my to do list of things I need to take apart and understand properly. thanks! It turns this: ?php $emu = my_column {$banana}; $wallaby = 'my_table'; $kookaburra = 'SELECT * FROM'; $kookaburra .= $wallaby; $koala = 'ASC'; $taipan = ' ORDER BY' . $emu; $dropBear = 'group by something'; mysql_query($kookaburra WHERE (up = 'down') $taipan $koala . strtoupper($dropBear)); ? into this: SELECT * FROM my_table WHERE (up = 'down') ORDER BY my_column $banana ASC strtoupper( group by something) Which isn't perfect by a long shot, but I'm away home now so it'll have to do. I'd be interested to know what it makes of your queries. -robin ?php //looks at all $dir/*.php files. $dir = '/path/to/php/files'; foreach (getPhpFileList($dir) as $file) { print = $file =\n; $token = tokeniseFile($file); // first find all calls to mysql_query() $mysqlCalls = getMysqlQueryCalls($token); foreach($mysqlCalls as $range) { $sql = resolveExpression($token, $range[0], $range[0], $range[1]); echo trim(preg_replace('/\s+/', ' ', $sql)), \n; // tidy it a little } } function getMysqlQueryCalls($token) { $callList = array(); for ($i=0; isset($token[$i]); $i++) { if (!is_array($token[$i])) continue; // not interested in atomic tokens if (strtolower($token[$i][1]) !== 'mysql_query') continue; $args = getMysqlQueryArguments($i, $token); if ($args !== false) $call[] = $args; } return $call; } function nameTokens($token, $start = 0, $end = null) { if (is_null($end)) $end = sizeof($token); $range = array_slice($token, $start, $end - $start); foreach ($range as $key = $tok) { if (is_array($tok)) $range[$key][0] = token_name($range[$key][0]); } return $range; } function resolveToken($token, $cursor, $i) { if(is_array($token[$i])) { switch ($token[$i][0]) { case T_DOLLAR_OPEN_CURLY_BRACES: case T_CURLY_OPEN: break; case T_STRING: case T_WHITESPACE: case T_ENCAPSED_AND_WHITESPACE: return $token[$i][1]; case T_CONSTANT_ENCAPSED_STRING: return eval( return {$token[$i][1]};); case T_VARIABLE: case T_STRING_VARNAME: $def = findLastDefinition($token, $cursor, $i); if ($def === false) { // can't find anything else to replace $var with // presume it's defined elsewhere or we're not clever // enough to find it. return preg_replace('/^\$*/', '$', $token[$i][1]); } if ($def[3] == '.=') return resolveToken($token, $def[0], $i) . resolveExpression($token, $def[0], $def[1], $def[2]); return resolveExpression($token, $def[0], $def[1], $def[2]); default: return '('.token_name($token[$i][0]) . ':' . $token[$i][1] . ')'; } } else { switch ($token[$i]) { case '(': case ')': return $token[$i]; default: return ''; } } } function ResolveExpression($token, $cursor, $start, $end) { $output = ''; // just try and resolve all the tokens in the expression, concat them and throw them back. for ( $i=$start; $i$end; $i++) { $output .= resolveToken($token, $cursor, $i); } return $output; } function findLastDefinition($token, $i, $id) { // make sure all variables are in the form $name as ${name} ones are // just 'name' by the time they end up here. $name = preg_replace('/^\$*/', '$', $token[$id][1]); // rewind until we hit an assignment or run out of tokens while (isset($token[--$i])) { // if we catch an assignment and our $name is to the left then capture the right. if (($token[$i] == '=' || (is_array($token[$i]) $token[$i][0] == T_CONCAT_EQUAL)) getLHS($token, $i) == $name) { $RHS = getRHS($token, $i); $RHS[] = is_array($token[$i]) ? $token[$i][1] : $token[$i]; return $RHS; } } // we've run out of tokens, so seems like we can't find where this variable was defined. return false; } function getLHS($token, $i) { // rewind until we hit an variable name or run out of tokens while (isset($token[--$i])) { if (is_array($token[$i]) $token[$i][0] == T_VARIABLE) return $token[$i][1]; } // run out of tokens, we can't get a left hand side. return false; } function getRHS($token, $i) { // save the cursor at the assignment operator, so if $name is referred to on // the RHS, and we have to look for it again, we don't look at this bit. $cursor = $i; // fast forward until we get to a ';' or run out of tokens. while (isset($token[++$i]) $token[$i] != ';') { if (!isset($start)) $start = $i; } // if we've run out of tokens or the RHS is empty then give up. if (!isset($start) || !isset($token[$i])) return false; return array($cursor, $start, $i); } // just stick all the tokens
RE: [PHP] Re: [new version] Re: [a proactive example of learning by hacking] Re: [PHP] Getting queries from files FYI
[top-snip] nice! I'll give it a look this morning [/snip] This is another rather hackish attempt at using the tokeniser. It turns this: ?php $emu = my_column {$banana}; $wallaby = 'my_table'; $kookaburra = 'SELECT * FROM'; $kookaburra .= $wallaby; $koala = 'ASC'; $taipan = ' ORDER BY' . $emu; $dropBear = 'group by something'; mysql_query($kookaburra WHERE (up = 'down') $taipan $koala . strtoupper($dropBear)); ? into this: SELECT * FROM my_table WHERE (up = 'down') ORDER BY my_column $banana ASC strtoupper( group by something) Which isn't perfect by a long shot, but I'm away home now so it'll have to do. I'd be interested to know what it makes of your queries. -robin ?php //looks at all $dir/*.php files. $dir = '/path/to/php/files'; foreach (getPhpFileList($dir) as $file) { print = $file =\n; $token = tokeniseFile($file); // first find all calls to mysql_query() $mysqlCalls = getMysqlQueryCalls($token); foreach($mysqlCalls as $range) { $sql = resolveExpression($token, $range[0], $range[0], $range[1]); echo trim(preg_replace('/\s+/', ' ', $sql)), \n; // tidy it a little } } function getMysqlQueryCalls($token) { $callList = array(); for ($i=0; isset($token[$i]); $i++) { if (!is_array($token[$i])) continue; // not interested in atomic tokens if (strtolower($token[$i][1]) !== 'mysql_query') continue; $args = getMysqlQueryArguments($i, $token); if ($args !== false) $call[] = $args; } return $call; } function nameTokens($token, $start = 0, $end = null) { if (is_null($end)) $end = sizeof($token); $range = array_slice($token, $start, $end - $start); foreach ($range as $key = $tok) { if (is_array($tok)) $range[$key][0] = token_name($range[$key][0]); } return $range; } function resolveToken($token, $cursor, $i) { if(is_array($token[$i])) { switch ($token[$i][0]) { case T_DOLLAR_OPEN_CURLY_BRACES: case T_CURLY_OPEN: break; case T_STRING: case T_WHITESPACE: case T_ENCAPSED_AND_WHITESPACE: return $token[$i][1]; case T_CONSTANT_ENCAPSED_STRING: return eval( return {$token[$i][1]};); case T_VARIABLE: case T_STRING_VARNAME: $def = findLastDefinition($token, $cursor, $i); if ($def === false) { // can't find anything else to replace $var with // presume it's defined elsewhere or we're not clever // enough to find it. return preg_replace('/^\$*/', '$', $token[$i][1]); } if ($def[3] == '.=') return resolveToken($token, $def[0], $i) . resolveExpression($token, $def[0], $def[1], $def[2]); return resolveExpression($token, $def[0], $def[1], $def[2]); default: return '('.token_name($token[$i][0]) . ':' . $token[$i][1] . ')'; } } else { switch ($token[$i]) { case '(': case ')': return $token[$i]; default: return ''; } } } function ResolveExpression($token, $cursor, $start, $end) { $output = ''; // just try and resolve all the tokens in the expression, concat them and throw them back. for ( $i=$start; $i$end; $i++) { $output .= resolveToken($token, $cursor, $i); } return $output; } function findLastDefinition($token, $i, $id) { // make sure all variables are in the form $name as ${name} ones are // just 'name' by the time they end up here. $name = preg_replace('/^\$*/', '$', $token[$id][1]); // rewind until we hit an assignment or run out of tokens while (isset($token[--$i])) { // if we catch an assignment and our $name is to the left then capture the right. if (($token[$i] == '=' || (is_array($token[$i]) $token[$i][0] == T_CONCAT_EQUAL)) getLHS($token, $i) == $name) { $RHS = getRHS($token, $i); $RHS[] = is_array($token[$i]) ? $token[$i][1] : $token[$i]; return $RHS; } } // we've run out of tokens, so seems like we can't find where this variable was defined. return false; } function getLHS($token, $i) { // rewind until we hit an variable name or run out of tokens while (isset($token[--$i])) { if (is_array($token[$i]) $token[$i][0] == T_VARIABLE) return $token[$i][1]; } // run out of tokens, we can't get a left hand side. return false; } function getRHS($token, $i) { // save the cursor at the assignment operator, so if $name is referred to on // the RHS, and we have to look for it again, we don't look at this bit. $cursor = $i; // fast forward until we get to a ';' or run out of tokens. while (isset($token[++$i]) $token[$i] != ';') { if (!isset($start)) $start = $i; } // if we've run out of tokens or the RHS is empty then give up. if (!isset($start) || !isset($token[$i])) return false; return array($cursor, $start, $i); } // just stick all the tokens together to see what's going on. function renderTokens($token, $start, $end) { $output = ''; for ( $i=$start; $i$end; $i++) { $output .= is_array($token[$i]) ? $token[$i][1] : $token[$i]; }
Re: [PHP] Re: [new version] Re: [a proactive example of learning by hacking] Re: [PHP] Getting queries from files FYI
This is another rather hackish attempt at using the tokeniser. It turns this: ?php $emu = my_column {$banana}; $wallaby = 'my_table'; $kookaburra = 'SELECT * FROM'; $kookaburra .= $wallaby; $koala = 'ASC'; $taipan = ' ORDER BY' . $emu; $dropBear = 'group by something'; mysql_query($kookaburra WHERE (up = 'down') $taipan $koala . strtoupper($dropBear)); ? into this: SELECT * FROM my_table WHERE (up = 'down') ORDER BY my_column $banana ASC strtoupper( group by something) Which isn't perfect by a long shot, but I'm away home now so it'll have to do. I'd be interested to know what it makes of your queries. -robin ?php //looks at all $dir/*.php files. $dir = '/path/to/php/files'; foreach (getPhpFileList($dir) as $file) { print = $file =\n; $token = tokeniseFile($file); // first find all calls to mysql_query() $mysqlCalls = getMysqlQueryCalls($token); foreach($mysqlCalls as $range) { $sql = resolveExpression($token, $range[0], $range[0], $range[1]); echo trim(preg_replace('/\s+/', ' ', $sql)), \n; // tidy it a little } } function getMysqlQueryCalls($token) { $callList = array(); for ($i=0; isset($token[$i]); $i++) { if (!is_array($token[$i])) continue; // not interested in atomic tokens if (strtolower($token[$i][1]) !== 'mysql_query') continue; $args = getMysqlQueryArguments($i, $token); if ($args !== false) $call[] = $args; } return $call; } function nameTokens($token, $start = 0, $end = null) { if (is_null($end)) $end = sizeof($token); $range = array_slice($token, $start, $end - $start); foreach ($range as $key = $tok) { if (is_array($tok)) $range[$key][0] = token_name($range[$key][0]); } return $range; } function resolveToken($token, $cursor, $i) { if(is_array($token[$i])) { switch ($token[$i][0]) { case T_DOLLAR_OPEN_CURLY_BRACES: case T_CURLY_OPEN: break; case T_STRING: case T_WHITESPACE: case T_ENCAPSED_AND_WHITESPACE: return $token[$i][1]; case T_CONSTANT_ENCAPSED_STRING: return eval( return {$token[$i][1]};); case T_VARIABLE: case T_STRING_VARNAME: $def = findLastDefinition($token, $cursor, $i); if ($def === false) { // can't find anything else to replace $var with // presume it's defined elsewhere or we're not clever // enough to find it. return preg_replace('/^\$*/', '$', $token[$i][1]); } if ($def[3] == '.=') return resolveToken($token, $def[0], $i) . resolveExpression($token, $def[0], $def[1], $def[2]); return resolveExpression($token, $def[0], $def[1], $def[2]); default: return '('.token_name($token[$i][0]) . ':' . $token[$i][1] . ')'; } } else { switch ($token[$i]) { case '(': case ')': return $token[$i]; default: return ''; } } } function ResolveExpression($token, $cursor, $start, $end) { $output = ''; // just try and resolve all the tokens in the expression, concat them and throw them back. for ( $i=$start; $i$end; $i++) { $output .= resolveToken($token, $cursor, $i); } return $output; } function findLastDefinition($token, $i, $id) { // make sure all variables are in the form $name as ${name} ones are // just 'name' by the time they end up here. $name = preg_replace('/^\$*/', '$', $token[$id][1]); // rewind until we hit an assignment or run out of tokens while (isset($token[--$i])) { // if we catch an assignment and our $name is to the left then capture the right. if (($token[$i] == '=' || (is_array($token[$i]) $token[$i][0] == T_CONCAT_EQUAL)) getLHS($token, $i) == $name) { $RHS = getRHS($token, $i); $RHS[] = is_array($token[$i]) ? $token[$i][1] : $token[$i]; return $RHS; } } // we've run out of tokens, so seems like we can't find where this variable was defined. return false; } function getLHS($token, $i) { // rewind until we hit an variable name or run out of tokens while (isset($token[--$i])) { if (is_array($token[$i]) $token[$i][0] == T_VARIABLE) return $token[$i][1]; } // run out of tokens, we can't get a left hand side. return false; } function getRHS($token, $i) { // save the cursor at the assignment operator, so if $name is referred to on // the RHS, and we have to look for it again, we don't look at this bit. $cursor = $i; // fast forward until we get to a ';' or run out of tokens. while (isset($token[++$i]) $token[$i] != ';') { if (!isset($start)) $start = $i; } // if we've run out of tokens or the RHS is empty then give up. if (!isset($start) || !isset($token[$i])) return false; return array($cursor, $start, $i); } // just stick all the tokens together to see what's going on. function renderTokens($token, $start, $end) { $output = ''; for ( $i=$start; $i$end; $i++) { $output .= is_array($token[$i]) ? $token[$i][1] : $token[$i]; } return $output; } function getMysqlQueryArguments($i,