gwynne Sat Jul 28 14:27:32 2007 UTC
Modified files: /phd/formats xhtml.php /phd/include PhDReader.class.php Log: Beginnings of a new implementation. This code DOES NOT WORK, and is very incomplete. The last set of working code has been tagged in the repository.
http://cvs.php.net/viewvc.cgi/phd/formats/xhtml.php?r1=1.5&r2=1.6&diff_format=u Index: phd/formats/xhtml.php diff -u phd/formats/xhtml.php:1.5 phd/formats/xhtml.php:1.6 --- phd/formats/xhtml.php:1.5 Fri Jul 27 23:49:12 2007 +++ phd/formats/xhtml.php Sat Jul 28 14:27:32 2007 @@ -1,6 +1,6 @@ <?php -/* $Id: xhtml.php,v 1.5 2007/07/27 23:49:12 gwynne Exp $ +/* $Id: xhtml.php,v 1.6 2007/07/28 14:27:32 gwynne Exp $ +-------------------------------------------------------------------------+ | Copyright(c) 2007 | | Authors: | @@ -155,7 +155,7 @@ public function format_listing_hyperlink_function( $matches ) { $link = str_replace( '_', '-', $matches[ 1 ] ); - $link = "function${link}.html"; + $link = "function{$link}.html"; return '<a class="phpfunc" href="'.$link.'">'.$matches[ 1 ].'</a></span>'.$matches[ 2 ]; } http://cvs.php.net/viewvc.cgi/phd/include/PhDReader.class.php?r1=1.4&r2=1.5&diff_format=u Index: phd/include/PhDReader.class.php diff -u phd/include/PhDReader.class.php:1.4 phd/include/PhDReader.class.php:1.5 --- phd/include/PhDReader.class.php:1.4 Fri Jul 27 23:49:12 2007 +++ phd/include/PhDReader.class.php Sat Jul 28 14:27:32 2007 @@ -1,6 +1,6 @@ <?php -/* $Id: PhDReader.class.php,v 1.4 2007/07/27 23:49:12 gwynne Exp $ +/* $Id: PhDReader.class.php,v 1.5 2007/07/28 14:27:32 gwynne Exp $ +-------------------------------------------------------------------------+ | Copyright(c) 2007 | | Authors: | @@ -17,8 +17,34 @@ +-------------------------------------------------------------------------+ */ +// All XML namespaces used by PhD must be defined as constants here. NEVER hardcode namespace URLs. +define( 'XMLNS_XML', 'http://www.w3.org/XML/1998/namespace' ); +define( 'XMLNS_XLINK', 'http://www.w3.org/1999/xlink' ); +define( 'XMLNS_PHD', 'http://phd.php.net/namespace' ); + +// Special flag for non-iterative attributes +define( 'PHD_WANT_INLINE_ATTRIBUTES', 99 ); + abstract class PhDReader extends XMLReader { + // *** + // Properties + + // What END_ELEMENT node name, namespace, and depth to stop at, NULL if none. + protected $lastElementName = NULL; + protected $lastElementNS = NULL; + protected $lastElementDepth = NULL; + + // The current list of attributes for inline attribute access. + protected $attrList = NULL; + + // The current list of name->value replacements from <define/> elements. + protected $replacementList = array(); + + // The current input stream. This isn't so unlike the flex idea of multiple input streams. + // We use this to implement includes without doing all kinds of crazy things. + protected $inputSource = NULL; + protected $map = array(); public function __construct( $file, $encoding = "utf-8", $options = NULL ) { @@ -26,30 +52,155 @@ if ( !parent::open( $file, $encoding, $options ) ) { throw new Exception(); } + $this->inputSource = $this; } public function __destruct() { } - /* Format subclasses must implement these to make them real formats. */ + // *** + // Format subclasses must implement these to make them real formats. + // THIS IS THE OFFICIAL OUTPUT FORMAT INTERFACE. + + // proto string getFormatName( void ) + // Return the name of the format. abstract public function getFormatName(); - abstract protected function transformFromMap( $open, $name ); + + // proto array getNodeTypeList( void ) + // Return an array of node types that the format cares about + // processing. Nodes with types not in this array will be SKIPPED. In + // the special case of a NULL value being returned, PhD will pass all + // nodes through to the output file unmodified; the identity + // transformation format would do this. A format that returns an empty + // array will result in an empty output document! + abstract protected function getNodeTypeList(); - /* These are new functions, extending XMLReader. */ + // proto string transformNode( string name, int type ) + // Transform a given node, returning the binary string output. Binary + // strings ARE handled safely. This function will be called for all + // types of nodes returned by getNodeTypeList(). In the special case + // of a NULL (not empty!) value being returned, parsing will halt + // immediately WITHOUT an error. Use the usual means to trigger a + // processing error. It is always valid for this method to make the + // parser move around in the file. + abstract protected function transformNode( $name, $type ); - /* Seek to an ID within the file. */ + // proto bool isChunkBoundary( void ) + // Return TRUE if it's time to chunk the output, FALSE otherwise. + // Always return FALSE to avoid chunking. A chunk boundary always + // includes the current node. This will often be modified by themes. + abstract protected function isChunkBoundary(); + + // *** + // Public methods + + // Seek to an ID. This is used to start the parser somewhere that isn't at + // the beginning (duh). Be careful; this does not cause the parser to halt + // at the closing element of a successful seek. Use setRoot() for that. + // Don't forget to check the return value. public function seek( $id ) { - while( parent::read() ) { - if ( $this->nodeType == XMLREADER::ELEMENT && $this->hasAttributes && - $this->moveToAttributeNs( "id", "http://www.w3.org/XML/1998/namespace" ) && $this->value == $id ) { - return $this->moveToElement(); + while ( $this->inputSource->read() ) { + if ( $this->inputSource->nodeType == XMLReader::ELEMENT && $this->inputSource->hasAttributes && + $id == $this->inputSource->getAttributeNs( "id", XMLNS_XML ) ) { + return TRUE; } } return FALSE; } + + // Seek to an ID AND set it as the root element for the parser. The node + // must be an element for this to succeed. + public function setRoot( $id ) { + + if ( $this->inputSource->seek( $id ) ) { + if ( $this->inputSource->nodeType != XMLReader::ELEMENT ) { + return FALSE; + } + $this->lastElementName = $this->inputSource->name; + $this->lastElementNS = $this->inputSource->namespaceURI; + $this->lastElementDepth = $this->inputSource->depth; + return TRUE; + } + return FALSE; + + } + + // Run a transformation starting from the current node until the next chunk + // boundary. This is recommended for small to medium-sized chunks. If the + // chunk buffer grows beyond a reasonable limit (adjusted in config.php), + // the parser will start spooling to disk. + public function transformChunk() { + global $OPTIONS; + + if ( ( $chunkData = fopen( "php://temp/maxmemory:{$OPTIONS[ 'chunking_memory_limit' ]}", 'r+' ) ) === FALSE ) { + PhD_Error( "Couldn't create the chunk spooling stream. Why?" ); + } + $nodeTypeList = $this->getNodeTypeList(); + $attributeMode = ( in_array( PHD_WANT_INLINE_ATTRIBUTES, $nodeTypeList ) ? 1 : + in_array( XMLReader::ATTRIBUTE, $nodeTypeList ) ? 2 : 0 ); + + do { + $type = $this->inputSource->nodeType; + + // First, handle elements in the PhD namespace specially. + if ( $type == XMLReader::ELEMENT && $this->inputSource->namespaceURI == XMLNS_PHD ) { + if ( $this->handlePhDElement() ) { // this will probably recurse! + continue; + } + } + + // Next, check for a chunking boundary. + if ( $this->isChunkingBoundary() ) { + break; + } + + // If the format wanted inline attributes and this is an element node, gobble up the attrs for the format to use. + if ( $attributeMode == 1 && $type == XMLReader::ELEMENT && $this->inputSource->hasAttributes ) { + $this->gobbleAttributes(); + } + + // Next, pass the node to the format, if it wants it. + if ( in_array( $type, $nodeTypeList ) ) { + fwrite( $chunkData, $this->transformNode( $this->inputSource->name, $type ) ); + } + + // Next, if this is an element node AND the format wants attribute nodes iteratively, iterate them as a subset + if ( $type == XMLReader::ELEMENT && $this->inputSource->hasAttributes && $attributeMode == 2 ) { + fwrite( $chunkData, $this->iterateAttributes() ); + } + + // Next, clear out the last set of attributes if any. + $this->attrList = NULL; + + // And finally, advance to the next node. + } while ( $this->inputSource->read() ); + + rewind( $chunkData ); + $finalData = stream_get_contents( $chunkData ); + fclose( $chunkData ); + return $finalData; + + } + + // This is probably the most problematic issue in all the engine: + // How to handle all our custom elements gracefully with a pull model + protected function handlePhDElement() { + + switch ( $this->name ) { + case 'include': { + $this->includeFile( strlen( $t = $this->inputSource->getAttribute( 'target' ) ) ? $t : + $this->inputSource->getAttributeNs( 'href', XMLNS_XLINK ) ); + return TRUE; + } + case 'define': { + $this->replacementList[ $this->inputSource->getAttribute( 'name' ) ] = $this->inputSource->readInnerXML(); + return TRUE; + } + case 'constant': { + $this->substituteConstant( /* Go to the next useful node in the file. */ public function nextNode() { @@ -130,7 +281,7 @@ if( isset( $this->map[ $name ] ) ) { return $this->transformFromMap( $type == XMLReader::ELEMENT, $name ); } - return call_user_func( array( $this, "format_${name}" ), $type == XMLReader::ELEMENT ); + return call_user_func( array( $this, "format_{$name}" ), $type == XMLReader::ELEMENT ); break; case XMLReader::TEXT: