gwynne Sun Jul 29 14:31:54 2007 UTC
Modified files: (Branch: GWYNNE_PLAYS_HERE) /phd build.php /phd/formats xhtml.php /phd/include PhDReader.class.php Log: Committed to my branch ONLY. This is my vision of some of what should be going on in an output format, though a lot of this should be in a theme really. Discussion versus Hannes' method is requested :).
http://cvs.php.net/viewvc.cgi/phd/build.php?r1=1.4&r2=1.4.2.1&diff_format=u Index: phd/build.php diff -u phd/build.php:1.4 phd/build.php:1.4.2.1 --- phd/build.php:1.4 Fri Jul 27 23:09:27 2007 +++ phd/build.php Sun Jul 29 14:31:54 2007 @@ -1,15 +1,36 @@ <?php +error_reporting( E_ALL | E_STRICT ); + require_once 'config.php'; require_once 'formats/xhtml.php'; -$phd = new PhDXHTMLReader( "${OPTIONS[ 'xml_root' ]}/.manual.xml" ); -$phd->seek( "function.dotnet-load" ); -echo date( DATE_RSS )." done seeking\n"; - -ob_start(); -while( $phd->nextNode() ) { - print $phd->transform(); +file_put_contents( dirname( __FILE__ ) . "/temp.xml", <<<~XML +<?xml version="1.0" encoding="utf-8"?> + +<book> + <part id="part1"> + <chapter id="chap1"> + Using the application <application>autoconf</application>, we can do some + fun stuff, since <command>php</command> does other fun things, and dolor + Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod + tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, + quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo + consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse + cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat + non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. + </chapter> + </part> +</book> +XML + ); + +$phd = new PhDReader_XHTML( dirname( __FILE__ ) . "/temp.xml", NULL, 2 ); + +while ( $phd->transformChunk( $chunk ) ) { + print "{$chunk}\n"; } +print "{$chunk}\n"; + $phd->close(); ?> http://cvs.php.net/viewvc.cgi/phd/formats/xhtml.php?r1=1.7.2.1&r2=1.7.2.2&diff_format=u Index: phd/formats/xhtml.php diff -u phd/formats/xhtml.php:1.7.2.1 phd/formats/xhtml.php:1.7.2.2 --- phd/formats/xhtml.php:1.7.2.1 Sun Jul 29 09:46:39 2007 +++ phd/formats/xhtml.php Sun Jul 29 14:31:54 2007 @@ -1,6 +1,6 @@ <?php -/* $Id: xhtml.php,v 1.7.2.1 2007/07/29 09:46:39 gwynne Exp $ +/* $Id: xhtml.php,v 1.7.2.2 2007/07/29 14:31:54 gwynne Exp $ +-------------------------------------------------------------------------+ | Copyright(c) 2007 | | Authors: | @@ -20,53 +20,7 @@ /* Grab the PhDReader parent class. */ require_once 'include/PhDReader.class.php'; -class PhDXHTMLReader extends PhDReader { - - protected $map = array( - 'application' => 'span', - 'classname' => 'span', - 'code' => 'code', - 'collab' => 'span', - 'collabname' => 'span', - 'command' => 'span', - 'computeroutput' => 'span', - 'constant' => 'span', - 'emphasis' => 'em', - 'enumname' => 'span', - 'envar' => 'span', - 'filename' => 'span', - 'glossterm' => 'span', - 'holder' => 'span', - 'informaltable' => 'table', - 'itemizedlist' => 'ul', - 'listitem' => 'li', - 'literal' => 'span', - 'mediaobject' => 'div', - 'methodparam' => 'span', - 'member' => 'li', - 'note' => 'div', - 'option' => 'span', - 'orderedlist' => 'ol', - 'para' => 'p', - 'parameter' => 'span', - 'partintro' => 'div', - 'productname' => 'span', - 'propname' => 'span', - 'property' => 'span', - 'proptype' => 'span', - 'section' => 'div', - 'simplelist' => 'ul', - 'simpara' => 'p', - 'title' => array( - /* DEFAULT */ 'h1', - 'refsect1' => 'h3', - 'example' => 'h4', - ), - 'year' => 'span', - ); - protected $CURRENT_FUNCTION_ID = ""; - protected $CURRENT_REFERENCE_ID = ""; - protected $functionList = array(); +class PhDReader_XHTML extends PhDReader { public function __construct( $file, $encoding = 'utf-8', $options = NULL ) { parent::__construct( $file, $encoding, $options ); @@ -80,51 +34,153 @@ return 'XHTML 1.0 Transitional'; } - - public function format_refentry( $open ) { - - if ( $open ) { - $this->CURRENT_FUNCTION_ID = $id = $this->getID(); - - return sprintf( '<div id="%s" class="refentry">', $id ); - } - $this->CURRENT_FUNCTION_ID = ""; - return "</div>"; - } - public function format_reference( $open ) { - if ( $open ) { - $this->CURRENT_REFERENCE_ID = $id = $this->getID(); - - return sprintf( '<div id="%s" class="reference">', $id ); - - } - $content = "</div>"; - $content .= '<ul class="funclist">'; - foreach( $this->functionList as $func => $desc ) { - $content .= sprintf( '<li><a href="function.%1$s.html" class="refentry">%1$s</a></li>', $func ); - } - $content .= "</ul>\n"; - $this->CURRENT_REFERENCE_ID = ""; - $this->functionList = array(); - return $content; - - } - - public function format_function( $open ) { - return sprintf( '<a href="function.%1$s.html">%1$s</a>', $this->readContent() ); - - } - - public function format_refsect1( $open ) { - - if ( $open ) { - return sprintf( '<div class="refsect %s">', $this->readAttribute( "role" ) ); - } - return "</div>\n"; + protected function transformNode( $name, $type, &$output ) { + + switch ( $type ) { + + case XMLReader::ELEMENT: + case XMLReader::END_ELEMENT: + return $this->processElement( $name, $type == XMLReader::ELEMENT, $output ); + break; + + case XMLReader::TEXT: + $output = $this->value; + return FALSE; + + case XMLReader::CDATA: + $output = $this->processCDATA( $this->value ); + return FALSE; + + case XMLReader::ENTITY_REF: + $output = '<div class="error">WARNING: UNRESOLVED ENTITY '.htmlspecialchars( $name ).'</div>'; + return FALSE; + + } + + } + + protected function processElement( $name, $isOpen, &$output ) { + static $handlerMap = NULL; + + if ( is_null( $handlerMap ) ) { + $spanName = array( '<span class="%n%">', FALSE, '</span>', FALSE ); + $divName = array( '<div class="%n%">', FALSE, '</div>', FALSE ); + $divNameChunked = array( '<div class="%n%">', FALSE, '</div>', TRUE ); + $oneToOne = array( '<%n%>', FALSE, '</%n%>', FALSE ); + + $handlerMap = array( + 'application' => $spanName, + 'classname' => $spanName, + 'code' => $oneToOne, + 'collab' => $spanName, + 'collabname' => $spanName, + 'command' => $spanName, + 'computerOutput' => $spanName, + 'constant' => $spanName, + 'emphasis' => $oneToOne, + 'enumname' => $spanName, + 'envar' => $spanName, + 'filename' => $spanName, + 'glossterm' => $spanName, + 'holder' => $spanName, + 'informatlable' => array( '<table>', FALSE, '</table>', FALSE ), + 'itemizedlist' => array( '<ul>', FALSE, '</ul>', FALSE ), + 'listitem' => array( '<li>', FALSE, '</li>', FALSE ), + 'literal' => $spanName, + 'mediaobject' => $divName, + 'methodparam' => $spanName, + 'member' => array( '<li>', FALSE, '</li>', FALSE ), + 'note' => $divName, + 'option' => $spanName, + 'orderedlist' => array( '<ol>', FALSE, '</ol>', FALSE ), + 'para' => array( '<p>', FALSE, '</p>', FALSE ), + 'parameter' => $spanName, + 'partintro' => $divName, + 'productname' => $spanName, + 'propname' => $spanName, + 'property' => $spanName, + 'proptype' => $spanName, + 'section' => $divNameChunked, + 'simplelist' => array( '<ul>', FALSE, '</ul>', FALSE ), + 'simpara' => array( '<p>', FALSE, '</p>', FALSE ), + 'title' => array( 'checkparentname', array( '__default' => 'h1', 'refsect1' => 'h3', 'example' => 'h4' ) ), + 'year' => $spanName, + 'refentry' => array( '<div id="%i%" class="refentry">', FALSE, '</div>', TRUE, TRUE ), + 'reference' => array( $this, 'format_reference' ), + 'function' => array( '<a href="function.%v%.html">', FALSE, '</a>', FALSE ), + 'refsect1' => array( '<div class="refsect_%r%">', FALSE, '</div>', FALSE ), + '__default' => array( $this, 'unknownElement' ), + ); + } + + $mapping = isset( $handlerMap[ $name ] ) ? $handlerMap[ $name ] : $handlerMap[ '__default' ]; + if ( is_array( $mapping ) ) { + if ( is_string( $mapping[ 0 ] ) ) { + switch ( $mapping[ 0 ] ) { + case 'checkparentname': + $output = '<div class="warning">NOT IMPLEMENTED YET.</div>'; + return FALSE; + default: + $id = $this->getID(); + $output = $this->formatMappingString( $name, $id, $isOpen ? $mapping[ 0 ] : $mapping[ 2 ] ); + if ( !empty( $mapping[ 4 ] ) ) { + $this->pushStack( $id ); + } + return ( $isOpen ? $mapping[ 1 ] : $mapping[ 3 ] ); + } + } else if ( is_callable( $mapping ) ) { + return call_user_func( $mapping, $name, $isOpen, &$output ); + } + } else if ( is_string( $mapping ) ) { + if ( $isOpen ) { + $output = $this->formatMappingString( $name, $this->getID(), $mapping ); + } else { + $output = ''; + } + return FALSE; + } + $output = '<div class="warning">Bad handler string for '.$name.'!</div>'; + return FALSE; - } + } + + protected function processCDATA( $content ) { + + return '<div class="phpcode">' . highlight_string( $content ) . '</div>'; + + } + + protected function formatMappingString( $name, $id, $string ) { + + // XXX Yes, this needs heavy optimization, it's example for now. + return str_replace( array( '%n%', '%i%', '%v%', '%r' ), + array( $name, $id, $this->readInnerXML(), $this->getAttribute( 'role' ) ), + $string ); + + } + + protected function format_reference( $name, $isOpen, $output ) { + if ( $isOpen ) { + $output = sprintf( '<div id="%s" class="reference">', $this->getID() ); + return FALSE; + } + $output = '</div>' . + '<ul class="funclist">'; + foreach ( $this->popStack() as $func => $desc ) { + $output .= sptrinf( '<li><a href="function.%1$s.html" class="refentry">%1$s</a></li>', $func ); + } + $output .= '</ul>'; + return TRUE; + } + + protected function unknownElement( $name, $isOpen, $output ) { + $output = "Can't handle a {$name}.\n"; + return FALSE; + + } +/* public function format_link( $open ) { $this->moveToNextAttribute(); @@ -137,7 +193,7 @@ public function format_methodsynopsis( $open ) { - /* We read this element to END_ELEMENT so $open is useless */ + /* We read this element to END_ELEMENT so $open is useless * $content = '<div class="methodsynopsis">'; $root = $this->name; @@ -182,56 +238,7 @@ return sprintf( '<div class="refnamediv"><span class="refname">%s</span><span class="refpurpose">%s</span></div>', $refname, $refpurpose ); } - protected function transormFromMap($open, $tag, $name) { - - $tag = $this->map[ $name ]; - if($open) { - return sprintf( '<%s class="%s">', $tag, $name ); - } - return "</$tag>"; - - } - - public function format_listing_hyperlink_function( $matches ) { - - $link = str_replace( '_', '-', $matches[ 1 ] ); - $link = "function${link}.html"; - return '<a class="phpfunc" href="'.$link.'">'.$matches[ 1 ].'</a></span>'.$matches[ 2 ]; - - } - - public function highlight_php_code( $str ) { /* copy&paste from livedocs */ - - if ( is_array( $str ) ) { - $str = $str[ 0 ]; - } - - $tmp = str_replace( - array( - ' ', - '<font color="', // for PHP 4 - '<span style="color: ', // for PHP 5.0.0RC1 - '</font>', - "\n ", - ' ' - ), - array( - ' ', - '<span class="', - '<span class="', - '</span>', - "\n ", - ' ' - ), - highlight_string( $str, TRUE ) - ); - - $tmp = preg_replace_callback( '{([\w_]+)\s*</span>(\s*<span\s+class="keyword">\s*\()}m', - array( $this, 'format_listing_hyperlink_function' ), $tmp ); - return sprintf( '<div class="phpcode">%s</div>', $tmp ); - - } - +*/ } /* http://cvs.php.net/viewvc.cgi/phd/include/PhDReader.class.php?r1=1.6.2.1&r2=1.6.2.2&diff_format=u Index: phd/include/PhDReader.class.php diff -u phd/include/PhDReader.class.php:1.6.2.1 phd/include/PhDReader.class.php:1.6.2.2 --- phd/include/PhDReader.class.php:1.6.2.1 Sun Jul 29 09:46:39 2007 +++ phd/include/PhDReader.class.php Sun Jul 29 14:31:54 2007 @@ -1,6 +1,6 @@ <?php -/* $Id: PhDReader.class.php,v 1.6.2.1 2007/07/29 09:46:39 gwynne Exp $ +/* $Id: PhDReader.class.php,v 1.6.2.2 2007/07/29 14:31:54 gwynne Exp $ +-------------------------------------------------------------------------+ | Copyright(c) 2007 | | Authors: | @@ -22,27 +22,112 @@ const XMLNS_XLINK = "http://www.w3.org/1999/xlinK"; const XMLNS_PHD = "http://www.php.net/ns/phd"; - protected $map = array(); - protected $STACK = array(); + protected $stack = array(); public function __construct( $file, $encoding = "utf-8", $options = NULL ) { if ( !parent::open( $file, $encoding, $options ) ) { throw new Exception(); } + $this->read(); } - + public function __destruct() { } - /* Format subclasses must implement these to make them real formats. */ + // *** + // Format subclasses must implement these to make them real formats. + // THIS IS THE OFFICIAL OUTPUT FORMAT INTERFACE. + + // proto string getFormatName( void ) + // Return the name of the format. abstract public function getFormatName(); - abstract protected function transformFromMap( $open, $name ); + + // proto string transformNode( string name, int type, string &output ) + // Transform a given node, returning the binary string output. Binary + // strings ARE handled safely. This function will be called for all + // element, text, cdata, entity reference, and end element nodes. It + // is always valid for this method to make the parser move around in + // the file. Return TRUE to create a chunk boundary, FALSE otherwise. + abstract protected function transformNode( $name, $type, &$output ); - /* These are new functions, extending XMLReader. */ + // *** + // Protected methods (intended for internal and subclass use only) + + // proto array getAllAttributes( void ) + // Return all the attributes in the current element node as name:ns => + // value pairs. Prefer the getAttribute*() methods defined by XMLReader + // when possible; use this only when you really do need all the + // attributes. An element without any attributes will result in an empty + // array, while a non-element node will result in a return of FALSE. + protected function getAttributes() { + + $type = $this->nodeType; + if ( $type != XMLReader::ELEMENT && $type != XMLReader::END_ELEMENT ) { + return FALSE; + } + $attrCount = $this->attributeCount; + $attrs = array(); + if ( $attrCount > 0 ) { + for ( $i = 0; $i < $attrCount; ++$i ) { + $this->moveToAttributeNo( $i ); + $attrs[ $this->name ] = $this->value; + } + $this->moveToElement(); + } + return $attrs; + + } - /* Seek to an ID within the file. */ + // proto string getID( void ) + // Get the ID of the current element. Works on element and end element + // nodes only, returning an empty string in all other cases. + protected function getID() { + + if ( $this->hasAttributes && $this->moveToAttributeNs( "id", self::XMLNS_XML ) ) { + $id = $this->value; + $this->moveToElement(); + return $id; + } + return ""; + + } + + // protected void pushStack( mixed value ) + // Push a value of any kind onto the parser stack. The stack is not used + // by the parser; it is intended as a cheap data store for formats and + // themes. + protected function pushStack( $value ) { + + array_push( $this->stack, $value ); + + } + + // protected mixed stackTop( void ) + // Return the top value on the stack. + protected function stackTop() { + + return count( $this->stack ) ? $this->stack[ 0 ] : NULL; + + } + + // protected mixed popStack( void ) + // Pop the top value off the stack and return it. + protected function popStack() { + + return array_pop( $this->stack ); + + } + + // *** + // Public methods + + // proto bool seek( string id ) + // Seek to an ID. This is used to start the parser somewhere that isn't at + // the beginning (duh). Be careful; this does not cause the parser to halt + // at the closing element of a successful seek. Don't forget to check the + // return value. public function seek( $id ) { while( parent::read() ) { @@ -54,6 +139,70 @@ return FALSE; } + + // proto string transform( void ) + // Transform the whole tree as one giant chunk, IGNORING the output + // format's chunker. Returns the tree, or FALSE on error. + public function transform() { + + $allData = ''; + while ( ( $data = $this->transformChunk() ) !== FALSE ) { + $allData .= $data; + } + return $allData; + + } + + // proto bool transformChunk( string &outData ) + // Transform nodes until the output format says it's time to output a + // chunking boundary or the parser runs out of data. Returns TRUE on + // success, FALSE on EOF. $data contains the transformed data, if any. + public function transformChunk( &$outData ) { + global $OPTIONS; + + $hasMore = TRUE; + $data = fopen( "php://temp/maxmemory:{$OPTIONS[ 'chunking_memory_limit' ]}", "r+" ); + $isChunk = FALSE; + do { + $nodeName = $this->name; + $nodeType = $this->nodeType; + switch ( $nodeType ) { + case XMLReader::NONE: + break; + + case XMLReader::ELEMENT: + case XMLReader::END_ELEMENT: + case XMLReader::TEXT: + case XMLReader::CDATA: + case XMLReader::ENTITY_REF: + $isChunk = $this->transformNode( $nodeName, $nodeType, $output ); + fwrite( $data, $output ); + break; + + case XMLReader::ENTITY: + case XMLReader::PI: + case XMLReader::DOC_TYPE: + case XMLReader::DOC: + case XMLReader::DOC_FRAGMENT: + case XMLReader::NOTATION: + case XMLReader::WHITESPACE: + case XMLReader::SIGNIFICANT_WHITESPACE: + case XMLReader::END_ENTITY: + case XMLReader::XML_DECLARATION: + // Eat it for lunch. + break; + } + $hasMore = $this->read(); + } while ( !$isChunk && $hasMore ); + + rewind( $data ); + $outData = stream_get_contents( $data ); + fclose( $data ); + return $hasMore; + + } + +/* public function getID() { if ( $this->hasAttributes && $this->moveToAttributeNs("id", self::XMLNS_XML) ) { $id = $this->value; @@ -63,7 +212,6 @@ return ""; } - /* Go to the next useful node in the file. */ public function nextNode() { while( $this->read() ) { @@ -84,14 +232,12 @@ } - /* Read a node with the right name? */ public function readNode( $nodeName ) { return $this->read() && !( $this->nodeType == XMLReader::END_ELEMENT && $this->name == $nodeName ); } - /* Get the content of a named node, or the current node. */ public function readContent( $node = NULL ) { $retval = ""; @@ -106,25 +252,23 @@ } - /* Get the attribute value by name, if exists. */ public function readAttribute( $attr ) { return $this->moveToAttribute( $attr ) ? $this->value : ""; } - /* Handle unmapped nodes. */ public function __call( $func, $args ) { if ( $this->nodeType == XMLReader::END_ELEMENT ) { - /* ignore */ return; + /* ignore * return; } trigger_error( "No mapper for $func", E_USER_WARNING ); /* NOTE: * The _content_ of the element will get processed even though we dont * know how to handle the elment itself - */ + * return ""; } @@ -141,7 +285,6 @@ return $tag; } - /* Perform a transformation. */ public function transform() { $type = $this->nodeType; @@ -178,8 +321,8 @@ case XMLReader::COMMENT: case XMLReader::WHITESPACE: case XMLReader::SIGNIFICANT_WHITESPACE: - /* swallow it */ - /* XXX This could lead to a recursion overflow if a lot of comment nodes get strung together. */ + // swallow it + // XXX This could lead to a recursion overflow if a lot of comment nodes get strung together. $this->read(); return $this->transform(); @@ -189,7 +332,7 @@ } } - +*/ } /*