gwynne Sat Jul 28 14:27:32 2007 UTC
Modified files:
/phd/formats xhtml.php
/phd/include PhDReader.class.php
Log:
Beginnings of a new implementation. This code DOES NOT WORK, and is very
incomplete. The last set of working code has been tagged in the repository.
http://cvs.php.net/viewvc.cgi/phd/formats/xhtml.php?r1=1.5&r2=1.6&diff_format=u
Index: phd/formats/xhtml.php
diff -u phd/formats/xhtml.php:1.5 phd/formats/xhtml.php:1.6
--- phd/formats/xhtml.php:1.5 Fri Jul 27 23:49:12 2007
+++ phd/formats/xhtml.php Sat Jul 28 14:27:32 2007
@@ -1,6 +1,6 @@
<?php
-/* $Id: xhtml.php,v 1.5 2007/07/27 23:49:12 gwynne Exp $
+/* $Id: xhtml.php,v 1.6 2007/07/28 14:27:32 gwynne Exp $
+-------------------------------------------------------------------------+
| Copyright(c) 2007 |
| Authors: |
@@ -155,7 +155,7 @@
public function format_listing_hyperlink_function( $matches ) {
$link = str_replace( '_', '-', $matches[ 1 ] );
- $link = "function${link}.html";
+ $link = "function{$link}.html";
return '<a class="phpfunc" href="'.$link.'">'.$matches[ 1
].'</a></span>'.$matches[ 2 ];
}
http://cvs.php.net/viewvc.cgi/phd/include/PhDReader.class.php?r1=1.4&r2=1.5&diff_format=u
Index: phd/include/PhDReader.class.php
diff -u phd/include/PhDReader.class.php:1.4 phd/include/PhDReader.class.php:1.5
--- phd/include/PhDReader.class.php:1.4 Fri Jul 27 23:49:12 2007
+++ phd/include/PhDReader.class.php Sat Jul 28 14:27:32 2007
@@ -1,6 +1,6 @@
<?php
-/* $Id: PhDReader.class.php,v 1.4 2007/07/27 23:49:12 gwynne Exp $
+/* $Id: PhDReader.class.php,v 1.5 2007/07/28 14:27:32 gwynne Exp $
+-------------------------------------------------------------------------+
| Copyright(c) 2007 |
| Authors: |
@@ -17,8 +17,34 @@
+-------------------------------------------------------------------------+
*/
+// All XML namespaces used by PhD must be defined as constants here. NEVER
hardcode namespace URLs.
+define( 'XMLNS_XML', 'http://www.w3.org/XML/1998/namespace' );
+define( 'XMLNS_XLINK', 'http://www.w3.org/1999/xlink' );
+define( 'XMLNS_PHD', 'http://phd.php.net/namespace' );
+
+// Special flag for non-iterative attributes
+define( 'PHD_WANT_INLINE_ATTRIBUTES', 99 );
+
abstract class PhDReader extends XMLReader {
+ // ***
+ // Properties
+
+ // What END_ELEMENT node name, namespace, and depth to stop at, NULL if
none.
+ protected $lastElementName = NULL;
+ protected $lastElementNS = NULL;
+ protected $lastElementDepth = NULL;
+
+ // The current list of attributes for inline attribute access.
+ protected $attrList = NULL;
+
+ // The current list of name->value replacements from <define/> elements.
+ protected $replacementList = array();
+
+ // The current input stream. This isn't so unlike the flex idea of
multiple input streams.
+ // We use this to implement includes without doing all kinds of crazy
things.
+ protected $inputSource = NULL;
+
protected $map = array();
public function __construct( $file, $encoding = "utf-8", $options =
NULL ) {
@@ -26,30 +52,155 @@
if ( !parent::open( $file, $encoding, $options ) ) {
throw new Exception();
}
+ $this->inputSource = $this;
}
public function __destruct() {
}
- /* Format subclasses must implement these to make them real formats. */
+ // ***
+ // Format subclasses must implement these to make them real formats.
+ // THIS IS THE OFFICIAL OUTPUT FORMAT INTERFACE.
+
+ // proto string getFormatName( void )
+ // Return the name of the format.
abstract public function getFormatName();
- abstract protected function transformFromMap( $open, $name );
+
+ // proto array getNodeTypeList( void )
+ // Return an array of node types that the format cares about
+ // processing. Nodes with types not in this array will be SKIPPED. In
+ // the special case of a NULL value being returned, PhD will pass all
+ // nodes through to the output file unmodified; the identity
+ // transformation format would do this. A format that returns an empty
+ // array will result in an empty output document!
+ abstract protected function getNodeTypeList();
- /* These are new functions, extending XMLReader. */
+ // proto string transformNode( string name, int type )
+ // Transform a given node, returning the binary string output. Binary
+ // strings ARE handled safely. This function will be called for all
+ // types of nodes returned by getNodeTypeList(). In the special case
+ // of a NULL (not empty!) value being returned, parsing will halt
+ // immediately WITHOUT an error. Use the usual means to trigger a
+ // processing error. It is always valid for this method to make the
+ // parser move around in the file.
+ abstract protected function transformNode( $name, $type );
- /* Seek to an ID within the file. */
+ // proto bool isChunkBoundary( void )
+ // Return TRUE if it's time to chunk the output, FALSE otherwise.
+ // Always return FALSE to avoid chunking. A chunk boundary always
+ // includes the current node. This will often be modified by themes.
+ abstract protected function isChunkBoundary();
+
+ // ***
+ // Public methods
+
+ // Seek to an ID. This is used to start the parser somewhere that isn't at
+ // the beginning (duh). Be careful; this does not cause the parser to halt
+ // at the closing element of a successful seek. Use setRoot() for that.
+ // Don't forget to check the return value.
public function seek( $id ) {
- while( parent::read() ) {
- if ( $this->nodeType == XMLREADER::ELEMENT &&
$this->hasAttributes &&
- $this->moveToAttributeNs( "id",
"http://www.w3.org/XML/1998/namespace" ) && $this->value == $id ) {
- return $this->moveToElement();
+ while ( $this->inputSource->read() ) {
+ if ( $this->inputSource->nodeType == XMLReader::ELEMENT
&& $this->inputSource->hasAttributes &&
+ $id == $this->inputSource->getAttributeNs(
"id", XMLNS_XML ) ) {
+ return TRUE;
}
}
return FALSE;
}
+
+ // Seek to an ID AND set it as the root element for the parser. The node
+ // must be an element for this to succeed.
+ public function setRoot( $id ) {
+
+ if ( $this->inputSource->seek( $id ) ) {
+ if ( $this->inputSource->nodeType != XMLReader::ELEMENT ) {
+ return FALSE;
+ }
+ $this->lastElementName = $this->inputSource->name;
+ $this->lastElementNS = $this->inputSource->namespaceURI;
+ $this->lastElementDepth = $this->inputSource->depth;
+ return TRUE;
+ }
+ return FALSE;
+
+ }
+
+ // Run a transformation starting from the current node until the next chunk
+ // boundary. This is recommended for small to medium-sized chunks. If the
+ // chunk buffer grows beyond a reasonable limit (adjusted in config.php),
+ // the parser will start spooling to disk.
+ public function transformChunk() {
+ global $OPTIONS;
+
+ if ( ( $chunkData = fopen( "php://temp/maxmemory:{$OPTIONS[
'chunking_memory_limit' ]}", 'r+' ) ) === FALSE ) {
+ PhD_Error( "Couldn't create the chunk spooling stream. Why?" );
+ }
+ $nodeTypeList = $this->getNodeTypeList();
+ $attributeMode = ( in_array( PHD_WANT_INLINE_ATTRIBUTES, $nodeTypeList
) ? 1 :
+ in_array( XMLReader::ATTRIBUTE, $nodeTypeList ) ? 2 : 0 );
+
+ do {
+ $type = $this->inputSource->nodeType;
+
+ // First, handle elements in the PhD namespace specially.
+ if ( $type == XMLReader::ELEMENT &&
$this->inputSource->namespaceURI == XMLNS_PHD ) {
+ if ( $this->handlePhDElement() ) { // this will probably
recurse!
+ continue;
+ }
+ }
+
+ // Next, check for a chunking boundary.
+ if ( $this->isChunkingBoundary() ) {
+ break;
+ }
+
+ // If the format wanted inline attributes and this is an element
node, gobble up the attrs for the format to use.
+ if ( $attributeMode == 1 && $type == XMLReader::ELEMENT &&
$this->inputSource->hasAttributes ) {
+ $this->gobbleAttributes();
+ }
+
+ // Next, pass the node to the format, if it wants it.
+ if ( in_array( $type, $nodeTypeList ) ) {
+ fwrite( $chunkData, $this->transformNode(
$this->inputSource->name, $type ) );
+ }
+
+ // Next, if this is an element node AND the format wants attribute
nodes iteratively, iterate them as a subset
+ if ( $type == XMLReader::ELEMENT &&
$this->inputSource->hasAttributes && $attributeMode == 2 ) {
+ fwrite( $chunkData, $this->iterateAttributes() );
+ }
+
+ // Next, clear out the last set of attributes if any.
+ $this->attrList = NULL;
+
+ // And finally, advance to the next node.
+ } while ( $this->inputSource->read() );
+
+ rewind( $chunkData );
+ $finalData = stream_get_contents( $chunkData );
+ fclose( $chunkData );
+ return $finalData;
+
+ }
+
+ // This is probably the most problematic issue in all the engine:
+ // How to handle all our custom elements gracefully with a pull model
+ protected function handlePhDElement() {
+
+ switch ( $this->name ) {
+ case 'include': {
+ $this->includeFile( strlen( $t =
$this->inputSource->getAttribute( 'target' ) ) ? $t :
+ $this->inputSource->getAttributeNs( 'href', XMLNS_XLINK )
);
+ return TRUE;
+ }
+ case 'define': {
+ $this->replacementList[ $this->inputSource->getAttribute(
'name' ) ] = $this->inputSource->readInnerXML();
+ return TRUE;
+ }
+ case 'constant': {
+ $this->substituteConstant(
/* Go to the next useful node in the file. */
public function nextNode() {
@@ -130,7 +281,7 @@
if( isset( $this->map[ $name ] ) ) {
return $this->transformFromMap( $type ==
XMLReader::ELEMENT, $name );
}
- return call_user_func( array( $this, "format_${name}"
), $type == XMLReader::ELEMENT );
+ return call_user_func( array( $this, "format_{$name}"
), $type == XMLReader::ELEMENT );
break;
case XMLReader::TEXT: