Revision: 58746
Author:   jhoffman
Date:     2017-03-02 12:16:37 +0100 (Thu, 02 Mar 2017)
Log Message:
-----------
Moved project from forge.onehippo.org/svn/ without history

Added Paths:
-----------
    attic/forge/wikipediaimport/
    attic/forge/wikipediaimport/branches/
    attic/forge/wikipediaimport/tags/
    attic/forge/wikipediaimport/trunk/
    attic/forge/wikipediaimport/trunk/README.TXT
    attic/forge/wikipediaimport/trunk/filter.json
    attic/forge/wikipediaimport/trunk/find_cycles.php
    attic/forge/wikipediaimport/trunk/get_category_tree.php
    attic/forge/wikipediaimport/trunk/hash_importer.php
    attic/forge/wikipediaimport/trunk/importer.php
    attic/forge/wikipediaimport/trunk/library/
    attic/forge/wikipediaimport/trunk/library/jcr/
    attic/forge/wikipediaimport/trunk/library/jcr/node.php
    attic/forge/wikipediaimport/trunk/library/jcr/parser.php
    attic/forge/wikipediaimport/trunk/library/jcr/property.php
    attic/forge/wikipediaimport/trunk/library/jcr/value.php
    attic/forge/wikipediaimport/trunk/library/wiki/
    attic/forge/wikipediaimport/trunk/library/wiki/parseRaw.inc.php
    attic/forge/wikipediaimport/trunk/library/wiki/table-converter.inc.php
    attic/forge/wikipediaimport/trunk/list_top_categories.php
    attic/forge/wikipediaimport/trunk/listcategories.php
    attic/forge/wikipediaimport/trunk/resources/
    attic/forge/wikipediaimport/trunk/resources/pom.xml
    attic/forge/wikipediaimport/trunk/resources/wikipedia-data.xml
    attic/forge/wikipediaimport/trunk/resources/wikipedia-namespace.xml
    attic/forge/wikipediaimport/trunk/resources/wikipedia.cnd
    attic/forge/wikipediaimport/trunk/run_tests.php
    attic/forge/wikipediaimport/trunk/source/
    attic/forge/wikipediaimport/trunk/source/Category.php
    attic/forge/wikipediaimport/trunk/source/CategoryCycleDetector.php
    attic/forge/wikipediaimport/trunk/source/CategoryFilter.php
    attic/forge/wikipediaimport/trunk/source/CategoryHandler.php
    attic/forge/wikipediaimport/trunk/source/HippoExtensionWriter.php
    attic/forge/wikipediaimport/trunk/source/IWikiHandler.php
    attic/forge/wikipediaimport/trunk/source/JcrDocument.php
    attic/forge/wikipediaimport/trunk/source/JcrDocumentWriter.php
    attic/forge/wikipediaimport/trunk/source/JcrHashFolder.php
    attic/forge/wikipediaimport/trunk/source/JcrHashFolderWriter.php
    attic/forge/wikipediaimport/trunk/source/JcrWikiFolder.php
    attic/forge/wikipediaimport/trunk/source/MavenProject.php
    attic/forge/wikipediaimport/trunk/source/OrderedList.php
    attic/forge/wikipediaimport/trunk/source/TestCase.php
    attic/forge/wikipediaimport/trunk/source/WikiDocument.php
    attic/forge/wikipediaimport/trunk/source/WikiParser.php
    attic/forge/wikipediaimport/trunk/source/WriterTestCase.php
    attic/forge/wikipediaimport/trunk/source/main/
    attic/forge/wikipediaimport/trunk/source/main/resources/
    attic/forge/wikipediaimport/trunk/test/
    attic/forge/wikipediaimport/trunk/test/CategoryCycleDetectorTest.php
    attic/forge/wikipediaimport/trunk/test/CategoryFilterTest.php
    attic/forge/wikipediaimport/trunk/test/CategoryTest.php
    attic/forge/wikipediaimport/trunk/test/HippoExtensionWriterTest.php
    attic/forge/wikipediaimport/trunk/test/JcrDocumentWriterTest.php
    attic/forge/wikipediaimport/trunk/test/JcrHashFolderWriterTest.php
    attic/forge/wikipediaimport/trunk/test/JcrXmlParserTest.php
    attic/forge/wikipediaimport/trunk/test/MavenProjectTest.php
    attic/forge/wikipediaimport/trunk/test/OrderedListTest.php
    attic/forge/wikipediaimport/trunk/test/TestCaseTest.php
    attic/forge/wikipediaimport/trunk/test/WikiDocumentTest.php
    attic/forge/wikipediaimport/trunk/test/WriterTestCaseTest.php

Added: attic/forge/wikipediaimport/trunk/README.TXT
===================================================================
--- attic/forge/wikipediaimport/trunk/README.TXT                                
(rev 0)
+++ attic/forge/wikipediaimport/trunk/README.TXT        2017-03-02 11:16:37 UTC 
(rev 58746)
@@ -0,0 +1,69 @@
+Wikipedia content import
+---------------------------------
+
+Contents
+1. Using the pre-generated jar's
+  1.1 Placing the jar in your local maven repository
+  1.2 Add an dependency in your applications pom file
+2. Generating your own jar's
+3. Known issues
+
+
+1. Using the pre-generated jar's
+================================
+When using the pre-generated jar's, there are just a few steps you need to 
take.
+- You need to place the jar in your local maven repository
+- You need to add an dependency in your applications pom file
+
+=== 1.1 Placing the jar in your local maven repository ===
+Copy the following command to your command-line and let maven do the work for 
you. 
+Be sure to modify -Dfile option to point to the desired jar.
+
+mvn install:install-file \
+  -Dfile=wikipedia-content-1.0.0-100.jar \
+  -DgroupId=org.onehippo.addon.content.wikipedia \
+  -DartifactId=wikipedia-content \
+  -Dversion=1.0.0 \
+  -Dpackaging=jar \
+  -DgeneratePom=true 
+
+Alternatively you could copy the jar yourself to the correct location in your 
maven 
+repository if you know the layout.
+
+=== 1.2 Add an dependency in your applications pom file ===
+Open your applications pom file in your favorite texteditor and add the 
following to
+your <dependencies> section.
+
+<!-- wikipedia content addon -->
+<dependency>
+       <groupId>org.onehippo.addon.content.wikipedia</groupId>
+       <artifactId>wikipedia-content</artifactId>
+       <version>1.0.0</version>
+       <type>jar</type>
+</dependency>
+
+Thats it. Your done. Please note that the bigger data sets can take a very 
long time to import.
+They may also require more memory to be assigned to the jvm.
+
+
+2. Generating your own jar's
+=============================
+First we need to generate some content. For this you will need an export from 
the wikipedia 
+content. You can download it here: 
http://en.wikipedia.org/wiki/Wikipedia_database
+The 'pages-current.xml.bz2 - Current revisions only, all pages' will do just 
fine. Extract it.
+We will need the *pages-articles.xml file.
+
+php importer.php <path-to-wikipedia-content-xml> <number-of-articles>
+
+This will generate a maven project in the target/ folder.  In this folder, run
+
+mvn clean install
+
+This will package your jar and place it in your repository. Now add your jar 
to your application
+like explained in section 1.2 and see the results.
+
+
+3. Known issues
+==============================
+- not all wiki syntax is converted to html
+- category hierarchy is constructed on any [[Category:...]] link, not just in 
the parents section


Property changes on: attic/forge/wikipediaimport/trunk/README.TXT
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
Added: attic/forge/wikipediaimport/trunk/filter.json
===================================================================
--- attic/forge/wikipediaimport/trunk/filter.json                               
(rev 0)
+++ attic/forge/wikipediaimport/trunk/filter.json       2017-03-02 11:16:37 UTC 
(rev 58746)
@@ -0,0 +1 @@
+["Data analysis","Concurrency control","Data management","Computer file 
systems","Computer storage media","Video storage","Search 
algorithms","Searching","Library cataloging and 
classification","Metadata","Information retrieval","Audio storage","Electronic 
documents","Ontology (computer science)","Semantic Web","Databases","Digital 
libraries","Film sound production","Sound production technology","Office 
software","Sound recording","Recording","Internet search engines","Internet 
search","Data search engines","Data security","Ontology languages","Query 
languages","Metadata registry","MusicBrainz","Voice technology","Mass 
digitization","Content management systems","Geographic region-oriented digital 
libraries","Search engine software","EPrint archives","Open access 
archives","Discipline-oriented digital libraries","Transaction 
processing","String similarity measures","Commercial digital libraries","World 
Digital Library","Document-oriented databases","Corpora","Language-oriente
 d digita
 l libraries","Aggregation-based digital libraries","Data warehousing","Web 
archives","Digital library projects","Data partitioning","Computer-aided 
software engineering tools","Theatrical sound production","Citation 
indices","Structured storage","UNESCO nomenclature","Concordances","Library of 
Congress Classification","Digital library software","Code search 
engines","Microformats","Ontology editors","Ontology learning (computer 
science)","ISO\/IEC 11179","Ranking functions","Legal citators","Human edited 
search engines","Semantic desktop","File systems","Open data"]
\ No newline at end of file

Added: attic/forge/wikipediaimport/trunk/find_cycles.php
===================================================================
--- attic/forge/wikipediaimport/trunk/find_cycles.php                           
(rev 0)
+++ attic/forge/wikipediaimport/trunk/find_cycles.php   2017-03-02 11:16:37 UTC 
(rev 58746)
@@ -0,0 +1,21 @@
+<?
+
+include 'source/Category.php';
+include 'source/CategoryCycleDetector.php';
+
+Category::restore();
+
+$detector = new CategoryCycleDetector();
+$cycles = $detector->getCycles(Category::find());
+
+echo "[\n";
+$first = true;
+foreach ($cycles as $cycle) {
+    if (!$first)
+        echo ",\n";
+    else
+        $first = false;
+    echo "  " . json_encode($cycle);
+}
+echo "]\n";
+

Added: attic/forge/wikipediaimport/trunk/get_category_tree.php
===================================================================
--- attic/forge/wikipediaimport/trunk/get_category_tree.php                     
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/get_category_tree.php     2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,38 @@
+<?
+
+include_once "source/Category.php";
+
+Category::restore();
+
+$ancestors = array();
+$ancestors["Metadata"] = true;
+$ancestors["Information retrieval"] = true;
+
+$count = 0;
+$categories = array();
+foreach (Category::find() as $category) {
+    $test = array();
+    $test[] = $category->getName();
+    foreach ($category->getParents() as $parent) {
+        $test[] = $parent->getName();
+        foreach ($parent->getParents() as $ancestor) {
+            $test[] = $ancestor->getName();
+        }
+    }
+    foreach ($test as $ancestor) {
+        if (isset($ancestors[$ancestor])) {
+            $categories[] = $category->getName();
+            $count += $category->getCount();
+            break;
+        }
+    }
+}
+
+echo json_encode($categories);
+
+/*
+foreach ($categories as $category) {
+    echo $category . "\n";
+}
+echo "count: " . $count . "\n";
+*/

Added: attic/forge/wikipediaimport/trunk/hash_importer.php
===================================================================
--- attic/forge/wikipediaimport/trunk/hash_importer.php                         
(rev 0)
+++ attic/forge/wikipediaimport/trunk/hash_importer.php 2017-03-02 11:16:37 UTC 
(rev 58746)
@@ -0,0 +1,28 @@
+<?php
+
+// check if the file exists
+if (!file_exists($argv[1])) {
+    die ("Wikipedia dump file $argv[1] does not exist.");
+}
+if (!file_exists("filter.json")) {
+    die ("Filter file filter.json does not exist");
+}
+
+include_once "source/MavenProject.php";
+
+mkdir("target") || die("Could not create folder target");
+define("TARGET", "target");
+
+$project = new MavenProject("target");
+$project->create();
+
+include_once "source/WikiParser.php";
+include_once "source/CategoryFilter.php";
+include_once "source/JcrHashFolderWriter.php";
+
+$parser = new WikiParser(new CategoryFilter(new JcrHashFolderWriter(), 
json_decode(file_get_contents("filter.json"))));
+$parser->setFile($argv[1]);
+$parser->setLimit(0);
+$parser->parse();
+
+/* EOF */

Added: attic/forge/wikipediaimport/trunk/importer.php
===================================================================
--- attic/forge/wikipediaimport/trunk/importer.php                              
(rev 0)
+++ attic/forge/wikipediaimport/trunk/importer.php      2017-03-02 11:16:37 UTC 
(rev 58746)
@@ -0,0 +1,28 @@
+<?php
+
+// check if the file exists
+if (!file_exists($argv[1])){
+    die ("File $argv[1] does not exist.");
+}
+
+include_once "source/MavenProject.php";
+
+mkdir("target") || die("Could not create folder target");
+define("TARGET", "target");
+
+$project = new MavenProject("target");
+$project->create();
+
+include_once "source/WikiParser.php";
+include_once "source/JcrDocumentWriter.php";
+
+$parser = new WikiParser(new JcrDocumentWriter());
+$parser->setFile($argv[1]);
+
+$max = 1000;
+if (isset($argv[2]) && 0 < (int) $argv[2]){
+    $max = (int) $argv[2];
+}
+$parser->setLimit($max);
+
+$parser->parse();

Added: attic/forge/wikipediaimport/trunk/library/jcr/node.php
===================================================================
--- attic/forge/wikipediaimport/trunk/library/jcr/node.php                      
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/library/jcr/node.php      2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,104 @@
+<?
+
+include_once dirname(__FILE__) . '/value.php';
+include_once dirname(__FILE__) . '/property.php';
+
+class Node {
+  private $parent;
+
+  private $name;
+
+  const JCR_PRIMARY_TYPE = "jcr:primaryType";
+  const JCR_MIXIN_TYPES = "jcr:mixinTypes";
+
+  /* package */ public $_nodes = array();
+  /* package */ public $_properties = array();
+  private $_mixins = array();
+
+  public function __construct($name, $type) {
+    $this->name = $name;
+    $this->setProperty(self::JCR_PRIMARY_TYPE, value::newName($type));
+  }
+
+  public function getName() {
+    return $this->name;
+  }
+
+  public function getPrimaryNodeType() {
+    return $this->getProperty(self::JCR_PRIMARY_TYPE)->getValue()->getString();
+  }
+
+  public function getIndex() {
+    $siblings = $this->parent->_nodes[$this->name];
+    $i = 0;
+    foreach ($siblings as $sibling) {
+      if ($sibling == $this) {
+        return $i;
+      }
+      $i++;
+    }
+    return $i;
+  }
+
+  public function getProperty($name) {
+    return $this->_properties[$name];
+  }
+
+  public function setProperty($name, $value) {
+    $this->_properties[$name] = new property($this, $name, $value);
+  }
+
+  public function addMixin($name) {
+    if (!isset($this->_properties[self::JCR_MIXIN_TYPES])) {
+      $this->setProperty(self::JCR_MIXIN_TYPES, array());
+    }
+    $values = $this->getProperty(self::JCR_MIXIN_TYPES)->getValues();
+    $values[] = value::newName($name);
+    $this->setProperty(self::JCR_MIXIN_TYPES, $values);
+  }
+
+  public function addNode($name, $type) {
+    $child = new Node($name, $type);
+    return $this->addChild($child);
+  }
+
+  public function addChild(Node $child) {
+    $name = $child->name;
+    if (!isset($this->_nodes[$name])) {
+      $this->_nodes[$name] = array();
+    }
+    $this->_nodes[$name][] = $child;
+    $child->parent = $this;
+    return $child;
+  }
+
+  public function getNodes($name = null) {
+    if ($name == null) {
+      $arr = array();
+      foreach ($this->_nodes as $name => $siblings) {
+        $arr = array_merge($arr, $siblings);
+      }
+    } else {
+      $arr = $this->_nodes[$name];
+    }
+    return new ArrayIterator($arr);
+  }
+
+  public function toSystemView($root = true, $indent = "") {
+    if ($root) {
+        $result  = "<?xml version=\"1.0\"?>\n";
+        $result .= "<sv:node sv:name=\"{$this->name}\" 
xmlns:sv=\"http://www.jcp.org/jcr/sv/1.0\";>\n";
+        $indent = "";
+    } else {
+        $result  = $indent . "<sv:node sv:name=\"{$this->name}\">\n";
+    }
+    foreach ($this->_properties as $name => $property) {
+      $result .= $property->toSystemView(false, $indent . "  ");
+    }
+    foreach ($this->getNodes() as $child) {
+      $result .= $child->toSystemView(false, $indent . "  ");
+    }
+    $result .= $indent . "</sv:node>\n";
+    return $result;
+  }
+}

Added: attic/forge/wikipediaimport/trunk/library/jcr/parser.php
===================================================================
--- attic/forge/wikipediaimport/trunk/library/jcr/parser.php                    
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/library/jcr/parser.php    2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,77 @@
+<?
+
+include_once dirname(__FILE__) . "/node.php";
+
+/**
+ * A parser for JCR system-view XML exports.
+ * Constructs a Node object that correponds with the content.
+ */
+class JcrXmlParser {
+
+    public function parse($text) {
+        if ($text == null || trim($text) == "") {
+            throw new Exception("No text provided for parsing");
+        }
+        $xmlDom = new DOMDocument();
+        $xmlDom->loadXML($text);
+
+        return $this->parseNode($xmlDom->firstChild);
+    }
+
+    private function parseNode($xmlNode) {
+        $props = $this->getProperties($xmlNode);
+        $values = $this->getPropertyValues($props["jcr:primaryType"]);
+        $node = new Node($xmlNode->getAttribute("sv:name"), $values[0]);
+        foreach ($props as $name => $xmlProp) {
+            if ($name == "jcr:primaryType")
+                continue;
+            $values = $this->getPropertyValues($xmlProp);
+            if (count($values) == 0)
+                continue;
+            if (count($values) > 1)
+                $node->setProperty($name, $values);
+            else
+                $node->setProperty($name, $values[0]);
+        }
+
+        $nodes = $this->getNodes($xmlNode);
+        foreach ($nodes as $childXmlNode) {
+            $node->addChild($this->parseNode($childXmlNode));
+        }
+        return $node;
+    }
+
+    private function getNodes($xmlNode) {
+        $nodes = array();
+        for ($item = $xmlNode->firstChild; $item != null; $item = 
$item->nextSibling) {
+            if ($item->nodeType != XML_ELEMENT_NODE)
+                continue;
+            if ($item->nodeName == "sv:node")
+                $nodes[$item->getAttribute("sv:name")] = $item;
+        }
+        return $nodes;
+    }
+
+    private function getProperties($xmlNode) {
+        $props = array();
+        for ($item = $xmlNode->firstChild; $item != null; $item = 
$item->nextSibling) {
+            if ($item->nodeType != XML_ELEMENT_NODE)
+                continue;
+            if ($item->nodeName == "sv:property")
+                $props[$item->getAttribute("sv:name")] = $item;
+        }
+        return $props;
+    }
+
+    private function getPropertyValues($xmlProp) {
+        $values = array();
+        for ($item = $xmlProp->firstChild; $item != null; $item = 
$item->nextSibling) {
+            if ($item->nodeType != XML_ELEMENT_NODE)
+                continue;
+            if ($item->nodeName == "sv:value")
+                $values[] = $item->firstChild->textContent;
+        }
+        return $values;
+    }
+
+}

Added: attic/forge/wikipediaimport/trunk/library/jcr/property.php
===================================================================
--- attic/forge/wikipediaimport/trunk/library/jcr/property.php                  
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/library/jcr/property.php  2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,85 @@
+<?
+
+include_once dirname(__FILE__) . '/value.php';
+
+class property {
+  private $parent;
+
+  private $data;
+  private $name;
+
+  public function __construct($parent, $name, $data) {
+    $this->parent = $parent;
+    $this->name = $name;
+    if (is_array($data)) {
+      $this->setValues($data);
+    } else {
+      $this->setValue($data);
+    }
+  }
+
+  public function getValue() {
+    if (is_array($this->data)) {
+      throw new Exception("Multi-valued property");
+    }
+    return $this->data;
+  }
+
+  public function getValues() {
+    if (!is_array($this->data)) {
+      throw new Exception("Single-valued property");
+    }
+    return $this->data;
+  }
+
+  public function setValue($value) {
+    if (is_a($value, "value")) {
+      $this->data = $value;
+    } else {
+      $this->data = value::newString($value);
+    }
+  }
+
+  public function setValues($values) {
+    $this->data = array();
+    foreach ($values as $value) {
+      if (is_a($value, "value")) {
+        $this->data[] = $value;
+      } else {
+        $this->data[] = value::newString($value);
+      }
+    }
+  }
+
+  public function getType() {
+    if (is_array($this->data)) {
+      if (count($this->data) > 0) {
+        return $this->data[0]->getType();
+      } else {
+        return "String";
+      }
+    }
+    return $this->data->getType();
+  }
+
+  public function remove() {
+    unset($parent->_properties[$this->name]);
+  }
+
+  public function toSystemView($indent = "") {
+    $result = $indent . "<sv:property sv:name=\"{$this->name}\" 
sv:type=\"{$this->getType()}\">\n";
+    if (is_array($this->data)) {
+      foreach ($this->data as $value) {
+        $result .= $indent . "  <sv:value>" . 
$this->encode($value->getString()) . "</sv:value>\n";
+      }
+    } else {
+      $result .= $indent . "  <sv:value>" . 
$this->encode($this->data->getString()) . "</sv:value>\n";
+    }
+    $result   .= $indent . "</sv:property>\n";
+    return $result;
+  }
+
+  private function encode($value) {
+    return htmlspecialchars($value, ENT_COMPAT, 'UTF-8');
+  }
+}

Added: attic/forge/wikipediaimport/trunk/library/jcr/value.php
===================================================================
--- attic/forge/wikipediaimport/trunk/library/jcr/value.php                     
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/library/jcr/value.php     2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,44 @@
+<?
+
+class value {
+  private $type;
+  private $value;
+
+  public function __construct($type, $value) {
+    $this->type = $type;
+    $this->value = $value;
+  }
+
+  public function getType() {
+    return $this->type;
+  }
+
+  public function getString() {
+    return $this->value;
+  }
+
+  public function toSystemView($value) {
+    return $value;
+  }
+
+  public static function newString($value) {
+    return new value("String", $value);
+  }
+
+  public static function newDouble($value) {
+    return new value("Double", $value);
+  }
+
+  public static function newLong($value) {
+    return new value("Long", $value);
+  }
+
+  public static function newDate($value) {
+    return new value("Date", $value);
+  }
+
+  public static function newName($value) {
+    return new value("Name", $value);
+  }
+
+}

Added: attic/forge/wikipediaimport/trunk/library/wiki/parseRaw.inc.php
===================================================================
--- attic/forge/wikipediaimport/trunk/library/wiki/parseRaw.inc.php             
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/library/wiki/parseRaw.inc.php     
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,134 @@
+<?php
+
+include(dirname(__FILE__) . "/table-converter.inc.php");
+
+function getPartBetween($str, $a, $b){
+       $start = strpos($str,$a) + strlen($a);
+       if(strpos($str,$a) === false) return false;
+       $length = strpos($str,$b,$start) - $start;
+       if(strpos($str,$b,$start) === false) return false;
+       return substr($str,$start,$length);
+}
+function debug_preg($matches){
+       echo "\n\n<h3 style='color=red'>PREG</h3><pre>\n\n";
+       var_dump($matches);
+       echo "\n\n<hr style='color=red' />n\n";
+       return $matches[0];
+}
+function simpleText($html){
+
+       $html = str_replace('&ndash;','-',$html);
+       $html = str_replace('&quot;','"',$html);
+       $html = preg_replace('/\&amp;(nbsp);/','&${1};',$html);
+
+       //formatting
+       // bold
+       $html = 
preg_replace('/\'\'\'([^\n\']+)\'\'\'/','<strong>${1}</strong>',$html);
+       // emphasized
+       $html = preg_replace('/\'\'([^\'\n]+)\'\'?/','<em>${1}</em>',$html);
+       //interwiki links
+       $html = 
preg_replace_callback('/\[\[([^\|\n\]:]+)[\|]([^\]]+)\]\]/','helper_interwikilinks',$html);
+       // without text
+       $html = 
preg_replace_callback('/\[\[([^\|\n\]:]+)\]\]/','helper_interwikilinks',$html);
+       // 
+       //$html = preg_replace('/{{([^}]+)+}}/','Interwiki: 
${1}+${2}+${3}',$html);
+       $html = preg_replace('/{{([^\|\n\}]+)([\|]?([^\}]+))+\}\}/','Interwiki: 
${1} &raquo; ${3}',$html);
+       // Template
+       //$html = preg_replace('/{{([^}]*)}}/',' ',$html);
+       // categories
+       //$html = 
preg_replace('/\[\[([^\|\n\]]+)([\|]([^\]]+))?\]\]/','',$html);
+       $html = 
preg_replace('/\[\[([^\|\n\]]{2})([\:]([^\]]+))?\]\]/','Translation: ${1} 
&raquo; ${3}',$html);
+       $html = preg_replace('/\[\[([^\|\n\]]+)([\:]([^\]]+))?\]\]/','Category: 
${1} - ${2}',$html);
+       // image
+       $html = preg_replace('/\[\[([^\|\n\]]+)([\|]([^\]]+))+\]\]/','Image: 
${0}+${1}+${2}+${3}',$html);
+       
+       //links
+       //$html = preg_replace('/\[([^\[\]\|\n\': ]+)\]/','<a 
href="${1}">${1}</a>',$html);
+       $html = preg_replace_callback('/\[([^\[\]\|\n\': 
]+)\]/','helper_externlinks',$html);
+       // with text
+       //$html = preg_replace('/\[([^\[\]\|\n\' ]+)[\| ]([^\]\']+)\]/','<a 
href="${1}">${2}</a>',$html);
+       $html = preg_replace_callback('/\[([^\[\]\|\n\' ]+)[\| 
]([^\]\']+)\]/','helper_externlinks',$html);
+       
+       // allowed tags
+       $html = 
preg_replace('/&lt;(\/?)(small|sup|sub|u)&gt;/','<${1}${2}>',$html);
+       
+       $html = preg_replace('/\n*&lt;br *\/?&gt;\n*/',"\n",$html);
+       $html = 
preg_replace('/&lt;(\/?)(math|pre|code|nowiki)&gt;/','<${1}pre>',$html);
+       $html = preg_replace('/&lt;!--/','<!--',$html);
+       $html = preg_replace('/--&gt;/',' -->',$html);
+
+       // headings
+       for($i=7;$i>0;$i--){
+               $html = preg_replace(
+                       '/\n+[=]{'.$i.'}([^=]+)[=]{'.$i.'}\n*/',
+                       '<h'.$i.'>${1}</h'.$i.'>',
+                       $html
+               );
+       }
+       
+       //lists
+       $html = preg_replace(
+               '/(\n[ ]*[^#* ][^\n]*)\n(([ ]*[*]([^\n]*)\n)+)/',
+               '${1}<ul>'."\n".'${2}'.'</ul>'."\n",
+               $html
+       );
+       $html = preg_replace(
+               '/(\n[ ]*[^#* ][^\n]*)\n(([ ]*[#]([^\n]*)\n)+)/',
+               '${1}<ol>'."\n".'${2}'.'</ol>'."\n",
+               $html
+       );
+       $html = preg_replace('/\n[ ]*[\*#]+([^\n]*)/','<li>${1}</li>',$html);
+       
+       $html = preg_replace('/----/','<hr />',$html);
+
+       //$html = nl2br($html);
+       // line breaks
+       $html = preg_replace('/[\n\r]{4}/',"<br/><br/>",$html);
+       $html = preg_replace('/[\n\r]{2}/',"<br/>",$html);
+       
+       $html = preg_replace('/[>]<br\/>[<]/',"><",$html);
+
+       return $html;
+}
+function parseRaw($title,$page){
+       //$text = (getPartBetween($page, '<text xml:space="preserve">', 
'</text>'));
+       $html = $page;
+       //echo "<!-- " . wordwrap($text,120,"\n",1) . " -->";
+       // re-html
+       $html = html_entity_decode($html);
+       $html = str_replace('&ndash;','-',$html);
+       $html = str_replace('&quot;','"',$html);
+       $html = preg_replace('/\&amp;(nbsp);/','&${1};',$html);
+
+       
+       $html = str_replace('{{PAGENAME}}',$title,$html);
+       
+       // Table
+       $html = convertTables($html);
+       
+       $html = simpleText($html);
+       return $html;
+}
+function giveSource($page){
+       $text = (getPartBetween($page, '<text xml:space="preserve">', 
'</text>'));
+       $text = "<pre>".$text."</pre>";
+       return $text;
+}
+function helper_externlinks($matches){
+       $target = $matches[1];
+       $text = empty($matches[2])?$matches[1]:$matches[2];
+       return '<a href="'.$target.'">'.$text.'</a>';
+}
+function helper_interwikilinks($matches){
+       $target = $matches[1];
+       $text = empty($matches[2])?$matches[1]:$matches[2];
+       $class=" class=\"dunno\" ";
+       /*static $links_checked_interwiki = 0;
+       if(!$_GET["nocache"] && ++$links_checked_interwiki<10){
+               $data = cachedFunc("getPos",$target);
+               if($data["pos"]) $class = " class=\"exists\" "; $class = " 
class=\"notexists\" ";
+       }*/
+       return '<a '.$class.' href="?page='.$target.'">'.$text.'</a>';
+}
+
+?>

Added: attic/forge/wikipediaimport/trunk/library/wiki/table-converter.inc.php
===================================================================
--- attic/forge/wikipediaimport/trunk/library/wiki/table-converter.inc.php      
                        (rev 0)
+++ attic/forge/wikipediaimport/trunk/library/wiki/table-converter.inc.php      
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,104 @@
+<?php
+
+function convertTables($text){
+       $lines = explode("\n",$text);
+       $innertable = 0;
+       $innertabledata = array();
+       foreach($lines as $line){
+               //echo "<pre>".++$i.": ".htmlspecialchars($line)."</pre>";
+               $line = str_replace("position:relative","",$line);
+               $line = str_replace("position:absolute","",$line);
+               if(substr($line,0,2) == '{|'){
+                       // inner table
+                       //echo "<p>beginning inner table #$innertable</p>";
+                       $innertable++;
+               }
+               $innertabledata[$innertable] .= $line . "\n";
+               if($innertable){
+                       // we're inside
+                       if(substr($line,0,2) == '|}'){
+                               $innertableconverted = 
convertTable($innertabledata[$innertable]);
+                               $innertabledata[$innertable] = "";
+                               $innertable--;
+                               $innertabledata[$innertable] .= 
$innertableconverted."\n";
+                       }
+               }
+       }
+       return $innertabledata[0];
+}
+function convertTable($intext){
+       $text = $intext;
+       $lines = explode("\n",$text);
+       $intable = false;
+       
+       //var_dump($lines);
+       foreach($lines as $line){
+               $line = trim($line);
+               if(substr($line,0,1) == '{'){
+                       //begin of the table
+                       $stuff = explode('| ',substr($line,1),2);
+                       $tableopen = true;
+                       $table = "<table ".$stuff[0].">\n";
+               } else if(substr($line,0,1) == '|'){
+                       // table related
+                       $line = substr($line,1);
+                       if(substr($line,0,5) == '-----'){
+                               // row break
+                               if($thopen)
+                                       $table .="</th>\n";
+                               if($tdopen)
+                                       $table .="</td>\n";
+                               if($rowopen)
+                                       $table .="\t</tr>\n";
+                               $table .= "\t<tr>\n";
+                               $rowopen = true;
+                               $tdopen = false;
+                               $thopen = false;
+                       }else if(substr($line,0,1) == '}'){
+                               // table end
+                               break;
+                       }else{
+                               // td
+                               $stuff = explode('| ',$line,2);
+                               if($tdopen)
+                                       $table .="</td>\n";
+                               if(count($stuff)==1)
+                                       $table .= 
"\t\t<td>".simpleText($stuff[0]);
+                               else
+                                       $table .= "\t\t<td ".$stuff[0].">".
+                                               simpleText($stuff[1]);
+                               $tdopen = true;
+                       }
+               } else if(substr($line,0,1) == '!'){
+                       // th
+                       $stuff = explode('| ',substr($line,1),2);
+                       if($thopen)
+                               $table .="</th>\n";
+                       if(count($stuff)==1)
+                               $table .= "\t\t<th>".simpleText($stuff[0]);
+                       else
+                               $table .= "\t\t<th ".$stuff[0].">".
+                                       simpleText($stuff[1]);
+                       $thopen = true;
+               }else{
+                       // plain text
+                       $table .= simpleText($line) ."\n";
+               }
+               //echo "<pre>".++$i.": ".htmlspecialchars($line)."</pre>";
+               //echo "<p>Table so far: 
<pre>".htmlspecialchars($table)."</pre></p>";
+       }
+       if($thopen)
+               $table .="</th>\n";
+       if($tdopen)
+               $table .="</td>\n";
+       if($rowopen)
+               $table .="\t</tr>\n";
+       if($tableopen)
+               $table .="</table>\n";
+       //echo "<hr />";
+       //echo "<p>Table at the end: 
<pre>".htmlspecialchars($table)."</pre></p>";
+       //echo $table;  
+       return $table;
+}
+
+?>

Added: attic/forge/wikipediaimport/trunk/list_top_categories.php
===================================================================
--- attic/forge/wikipediaimport/trunk/list_top_categories.php                   
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/list_top_categories.php   2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,39 @@
+<?php
+
+include dirname(__FILE__) . "/source/Category.php";
+include dirname(__FILE__) . "/source/OrderedList.php";
+
+// Category::restore("categories_test.json");
+Category::restore();
+
+$list = new OrderedList();
+foreach (Category::find() as $category) {
+    $list->add($category);
+}
+
+echo "[\n";
+$first = true;
+
+for ($entry = $list->getHead(); $entry != null; $entry = $entry->next) {
+    $category = $entry->entry;
+    if (!$first)
+        echo ",\n";
+    else
+        $first = false;
+    if ($category->getParents() != null) {
+        $parents = array();
+        foreach ($category->getParents() as $parent) {
+            $parents[] = $parent->getName();
+        }
+
+        echo json_encode(
+            array("name" => $category->getName(),
+                  "count" => $category->getCount(),
+                  "parents" => $parents));
+    } else {
+        echo json_encode(
+            array("name" => $category->getName(),
+                  "count" => $category->getCount()));
+    }
+}
+echo "]\n";

Added: attic/forge/wikipediaimport/trunk/listcategories.php
===================================================================
--- attic/forge/wikipediaimport/trunk/listcategories.php                        
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/listcategories.php        2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,40 @@
+<?php
+
+// check if the file exists
+if (!file_exists($argv[1])){
+    die ("File $argv[1] does not exist.");
+}
+
+include_once "source/WikiParser.php";
+include_once "source/CategoryHandler.php";
+
+$counter = new CategoryHandler();
+$parser = new WikiParser($counter);
+$parser->setFile($argv[1]);
+
+$max = 1000;
+if (isset($argv[2]) && 0 <= (int) $argv[2]){
+    $max = (int) $argv[2];
+}
+$parser->setLimit($max);
+$parser->setShowProgress(false);
+
+$parser->parse();
+
+Category::save();
+
+/*
+echo "[\n";
+$first = true;
+foreach($counter->getCategories() as $category => $count) {
+    if ($first) {
+        $first = false;
+    } else {
+        echo ",\n";
+    }
+    echo json_encode(array("category" => $category, "count" => $count));
+}
+echo "\n]";
+*/
+
+/* EOF */

Added: attic/forge/wikipediaimport/trunk/resources/pom.xml
===================================================================
--- attic/forge/wikipediaimport/trunk/resources/pom.xml                         
(rev 0)
+++ attic/forge/wikipediaimport/trunk/resources/pom.xml 2017-03-02 11:16:37 UTC 
(rev 58746)
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+    Copyright 2007-2008 Hippo.
+    
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+    
+    http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    
+    <name>Wikipedia content</name>
+    <description>Wikipedia demo content</description>
+    <groupId>org.onehippo.addon.content.wikipedia</groupId>
+    <artifactId>wikipedia-content</artifactId>
+    <version>1.0.0</version>
+    <packaging>jar</packaging>
+    
+    <build>
+        <defaultGoal>install</defaultGoal>
+        <resources>
+            <resource>
+                <directory>${basedir}/src/main/resources</directory>
+                <targetPath>.</targetPath>
+                <includes>
+                    <include>*.xml</include>
+                    <include>*.cnd</include>
+                </includes>
+            </resource>
+        </resources>
+        <extensions>
+            <extension>
+                <groupId>org.jvnet.wagon-svn</groupId>
+                <artifactId>wagon-svn</artifactId>
+                <version>1.9</version>
+            </extension>
+        </extensions>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <configuration>
+                    <source>1.5</source>
+                    <target>1.5</target>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+    <distributionManagement>
+        <repository>
+            <uniqueVersion>false</uniqueVersion>
+            <id>wikipediaimport-maven-repo</id>
+            
<url>svn:http://forge.hippo-ecm.org/svn/wikipediaimport/maven2/</url>
+        </repository>
+    </distributionManagement>
+
+</project>


Property changes on: attic/forge/wikipediaimport/trunk/resources/pom.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
Added: attic/forge/wikipediaimport/trunk/resources/wikipedia-data.xml
===================================================================
--- attic/forge/wikipediaimport/trunk/resources/wikipedia-data.xml              
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/resources/wikipedia-data.xml      
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Copyright 2007-2009 Hippo
+
+  Licensed under the Apache License, Version 2.0 (the  "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS"
+  BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<sv:node xmlns:sv="http://www.jcp.org/jcr/sv/1.0"; sv:name="content">
+  <sv:property sv:name="jcr:primaryType" sv:type="Name">
+    <sv:value>hippostd:folder</sv:value>
+  </sv:property>
+  <sv:property sv:name="jcr:mixinTypes" sv:type="Name">
+    <sv:value>hippo:harddocument</sv:value>
+  </sv:property>
+  <sv:property sv:name="hippo:paths" sv:type="String">
+  </sv:property>
+  <sv:property sv:name="hippo:related___pathreference" sv:type="String">
+  </sv:property>
+  <sv:property sv:name="hippostd:foldertype" sv:type="String">
+    <sv:value>new-folder</sv:value>
+  </sv:property>
+  <sv:node sv:name="documents">
+    <sv:property sv:name="jcr:primaryType" sv:type="Name">
+      <sv:value>hippostd:folder</sv:value>
+    </sv:property>
+    <sv:property sv:name="jcr:mixinTypes" sv:type="Name">
+      <sv:value>hippo:harddocument</sv:value>
+    </sv:property>
+    <sv:property sv:name="hippo:paths" sv:type="String">
+    </sv:property>
+    <sv:property sv:name="hippo:related___pathreference" sv:type="String">
+    </sv:property>
+    <sv:property sv:name="hippostd:foldertype" sv:type="String">
+      <sv:value>new-folder</sv:value>
+    </sv:property>
+    <sv:node sv:name="wikipedia">
+      <sv:property sv:name="jcr:primaryType" sv:type="Name">
+        <sv:value>hippostd:folder</sv:value>
+      </sv:property>
+      <sv:property sv:name="jcr:mixinTypes" sv:type="Name">
+        <sv:value>hippo:harddocument</sv:value>
+      </sv:property>
+      <sv:property sv:name="hippostd:foldertype" sv:type="String">
+        <sv:value>new-document</sv:value>
+        <sv:value>new-folder</sv:value>
+      </sv:property>
+    </sv:node>
+  </sv:node>
+</sv:node>
+


Property changes on: 
attic/forge/wikipediaimport/trunk/resources/wikipedia-data.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
Added: attic/forge/wikipediaimport/trunk/resources/wikipedia-namespace.xml
===================================================================
--- attic/forge/wikipediaimport/trunk/resources/wikipedia-namespace.xml         
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/resources/wikipedia-namespace.xml 
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,221 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Copyright 2007 Hippo
+
+  Licensed under the Apache License, Version 2.0 (the  "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS"
+  BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<sv:node sv:name="wikipedia"
+  xmlns:sv="http://www.jcp.org/jcr/sv/1.0";>
+  <sv:property sv:name="jcr:primaryType" sv:type="Name">
+    <sv:value>hipposysedit:namespace</sv:value>
+  </sv:property>
+  <sv:property sv:name="jcr:mixinTypes" sv:type="Name">
+    <sv:value>mix:referenceable</sv:value>
+  </sv:property>
+  <sv:property sv:name="jcr:uuid" sv:type="String">
+    <sv:value>0570e669-aadb-42e4-8d66-acdd31d39371</sv:value>
+  </sv:property>
+  <sv:node sv:name="article">
+    <sv:property sv:name="jcr:primaryType" sv:type="Name">
+      <sv:value>hipposysedit:templatetype</sv:value>
+    </sv:property>
+    <sv:property sv:name="jcr:mixinTypes" sv:type="Name">
+      <sv:value>mix:referenceable</sv:value>
+      <sv:value>editor:editable</sv:value>
+    </sv:property>
+    <sv:property sv:name="jcr:uuid" sv:type="String">
+      <sv:value>788ed814-f9ee-4b58-968c-d0b59763d30c</sv:value>
+    </sv:property>
+    <sv:node sv:name="hipposysedit:nodetype">
+      <sv:property sv:name="jcr:primaryType" sv:type="Name">
+        <sv:value>hippo:handle</sv:value>
+      </sv:property>
+      <sv:property sv:name="jcr:mixinTypes" sv:type="Name">
+        <sv:value>hippo:hardhandle</sv:value>
+      </sv:property>
+      <sv:property sv:name="jcr:uuid" sv:type="String">
+        <sv:value>f47b9324-8174-4c89-b9c2-c32ec95f7429</sv:value>
+      </sv:property>
+      <sv:node sv:name="hipposysedit:nodetype">
+        <sv:property sv:name="jcr:primaryType" sv:type="Name">
+          <sv:value>hipposysedit:nodetype</sv:value>
+        </sv:property>
+        <sv:property sv:name="jcr:mixinTypes" sv:type="Name">
+          <sv:value>hipposysedit:remodel</sv:value>
+          <sv:value>hippo:harddocument</sv:value>
+        </sv:property>
+        <sv:property sv:name="jcr:uuid" sv:type="String">
+          <sv:value>ac873d39-0817-4dac-bc8c-87603e5ec91c</sv:value>
+        </sv:property>
+        <sv:property sv:name="hipposysedit:node" sv:type="Boolean">
+          <sv:value>true</sv:value>
+        </sv:property>
+        <sv:property sv:name="hipposysedit:supertype" sv:type="String">
+          <sv:value>hippo:document</sv:value>
+          <sv:value>hippostd:publishable</sv:value>
+          <sv:value>hippostd:publishableSummary</sv:value>
+        </sv:property>
+        <sv:property sv:name="hipposysedit:uri" sv:type="String">
+          <sv:value>http://forge.onehippo.org/wikipedia/nt/1.0</sv:value>
+        </sv:property>
+        <sv:node sv:name="title">
+          <sv:property sv:name="jcr:primaryType" sv:type="Name">
+            <sv:value>hipposysedit:field</sv:value>
+          </sv:property>
+          <sv:property sv:name="hipposysedit:path" sv:type="String">
+            <sv:value>wikipedia:title</sv:value>
+          </sv:property>
+          <sv:property sv:name="hipposysedit:type" sv:type="String">
+            <sv:value>String</sv:value>
+          </sv:property>
+        </sv:node>
+        <sv:node sv:name="body">
+          <sv:property sv:name="jcr:primaryType" sv:type="Name">
+            <sv:value>hipposysedit:field</sv:value>
+          </sv:property>
+          <sv:property sv:name="hipposysedit:path" sv:type="String">
+            <sv:value>wikipedia:html</sv:value>
+          </sv:property>
+          <sv:property sv:name="hipposysedit:type" sv:type="String">
+            <sv:value>Html</sv:value>
+          </sv:property>
+        </sv:node>
+      </sv:node>
+    </sv:node>
+    <sv:node sv:name="hipposysedit:prototypes">
+      <sv:property sv:name="jcr:primaryType" sv:type="Name">
+        <sv:value>hipposysedit:prototypeset</sv:value>
+      </sv:property>
+      <sv:node sv:name="hipposysedit:prototype">
+        <sv:property sv:name="jcr:primaryType" sv:type="Name">
+          <sv:value>wikipedia:article</sv:value>
+        </sv:property>
+        <sv:property sv:name="jcr:mixinTypes" sv:type="Name">
+          <sv:value>hippo:harddocument</sv:value>
+        </sv:property>
+        <sv:property sv:name="hippostd:state" sv:type="String">
+          <sv:value>unpublished</sv:value>
+        </sv:property>
+        <sv:property sv:name="hippostd:stateSummary" sv:type="String">
+          <sv:value>new</sv:value>
+        </sv:property>
+        <sv:property sv:name="wikipedia:html" sv:type="String">
+          <sv:value/>
+        </sv:property>
+        <sv:property sv:name="wikipedia:title" sv:type="String">
+          <sv:value>Title</sv:value>
+        </sv:property>
+      </sv:node>
+    </sv:node>
+    <sv:node sv:name="editor:templates">
+      <sv:property sv:name="jcr:primaryType" sv:type="Name">
+        <sv:value>editor:templateset</sv:value>
+      </sv:property>
+      <sv:node sv:name="_default_">
+        <sv:property sv:name="jcr:primaryType" sv:type="Name">
+          <sv:value>frontend:plugincluster</sv:value>
+        </sv:property>
+        <sv:property sv:name="frontend:references" sv:type="String">
+               <sv:value>engine</sv:value>
+               <sv:value>wicket.model</sv:value>
+        </sv:property>
+        <sv:property sv:name="frontend:services" sv:type="String">
+               <sv:value>wicket.id</sv:value>
+               <sv:value>wicket.dialog</sv:value>
+               <sv:value>wicket.model</sv:value>
+        </sv:property>
+        <sv:property sv:name="frontend:properties" sv:type="String">
+               <sv:value>mode</sv:value>
+        </sv:property>
+        <sv:property sv:name="mode" sv:type="String">
+          <sv:value>edit</sv:value>
+        </sv:property>
+        <sv:node sv:name="root">
+          <sv:property sv:name="jcr:primaryType" sv:type="Name">
+            <sv:value>frontend:plugin</sv:value>
+          </sv:property>
+          <sv:property sv:name="item" sv:type="String">
+            <sv:value>${cluster.id}.field</sv:value>
+          </sv:property>
+          <sv:property sv:name="plugin.class" sv:type="String">
+            
<sv:value>org.hippoecm.frontend.service.render.ListViewPlugin</sv:value>
+          </sv:property>
+          <sv:property sv:name="wicket.id" sv:type="String">
+            <sv:value>${wicket.id}</sv:value>
+          </sv:property>
+        </sv:node>
+        <sv:node sv:name="title">
+          <sv:property sv:name="jcr:primaryType" sv:type="Name">
+            <sv:value>frontend:plugin</sv:value>
+          </sv:property>
+          <sv:property sv:name="caption" sv:type="String">
+            <sv:value>Title</sv:value>
+          </sv:property>
+          <sv:property sv:name="engine" sv:type="String">
+            <sv:value>${engine}</sv:value>
+          </sv:property>
+          <sv:property sv:name="field" sv:type="String">
+            <sv:value>title</sv:value>
+          </sv:property>
+          <sv:property sv:name="mode" sv:type="String">
+            <sv:value>${mode}</sv:value>
+          </sv:property>
+          <sv:property sv:name="plugin.class" sv:type="String">
+            
<sv:value>org.hippoecm.frontend.editor.plugins.field.PropertyFieldPlugin</sv:value>
+          </sv:property>
+          <sv:property sv:name="template.size" sv:type="String">
+            <sv:value/>
+          </sv:property>
+          <sv:property sv:name="wicket.id" sv:type="String">
+            <sv:value>${cluster.id}.field</sv:value>
+          </sv:property>
+          <sv:property sv:name="wicket.model" sv:type="String">
+            <sv:value>${wicket.model}</sv:value>
+          </sv:property>
+        </sv:node>
+        <sv:node sv:name="body">
+          <sv:property sv:name="jcr:primaryType" sv:type="Name">
+            <sv:value>frontend:plugin</sv:value>
+          </sv:property>
+          <sv:property sv:name="caption" sv:type="String">
+            <sv:value>Body</sv:value>
+          </sv:property>
+          <sv:property sv:name="engine" sv:type="String">
+            <sv:value>${engine}</sv:value>
+          </sv:property>
+          <sv:property sv:name="field" sv:type="String">
+            <sv:value>body</sv:value>
+          </sv:property>
+          <sv:property sv:name="mode" sv:type="String">
+            <sv:value>${mode}</sv:value>
+          </sv:property>
+          <sv:property sv:name="plugin.class" sv:type="String">
+            
<sv:value>org.hippoecm.frontend.editor.plugins.field.PropertyFieldPlugin</sv:value>
+          </sv:property>
+          <sv:property sv:name="template.height" sv:type="String">
+            <sv:value/>
+          </sv:property>
+          <sv:property sv:name="template.width" sv:type="String">
+            <sv:value/>
+          </sv:property>
+          <sv:property sv:name="wicket.id" sv:type="String">
+            <sv:value>${cluster.id}.field</sv:value>
+          </sv:property>
+          <sv:property sv:name="wicket.model" sv:type="String">
+            <sv:value>${wicket.model}</sv:value>
+          </sv:property>
+        </sv:node>
+      </sv:node>
+    </sv:node>
+  </sv:node>
+</sv:node>


Property changes on: 
attic/forge/wikipediaimport/trunk/resources/wikipedia-namespace.xml
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
Added: attic/forge/wikipediaimport/trunk/resources/wikipedia.cnd
===================================================================
--- attic/forge/wikipediaimport/trunk/resources/wikipedia.cnd                   
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/resources/wikipedia.cnd   2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,23 @@
+/*
+ * Copyright 2008 Hippo
+ *
+ * Licensed under the Apache License, Version 2.0 (the  "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ 
+<hippo='http://www.onehippo.org/jcr/hippo/nt/2.0'>
+<hippostd='http://www.onehippo.org/jcr/hippostd/nt/2.0'>
+<wikipedia='http://forge.onehippo.org/wikipedia/nt/1.0'>
+
+[wikipedia:article] > hippo:document, hippostd:publishable, 
hippostd:publishableSummary, hippostd:taggable
+- wikipedia:title (string)
+- wikipedia:html (string)

Added: attic/forge/wikipediaimport/trunk/run_tests.php
===================================================================
--- attic/forge/wikipediaimport/trunk/run_tests.php                             
(rev 0)
+++ attic/forge/wikipediaimport/trunk/run_tests.php     2017-03-02 11:16:37 UTC 
(rev 58746)
@@ -0,0 +1,35 @@
+<?
+
+include_once "source/TestCase.php";
+
+$tests = null;
+if (count($argv) > 1) {
+    $tests = $argv;
+    array_shift($tests);
+}
+
+echo "Starting tests\n";
+$suite = opendir("test");
+while ($entry = readdir($suite)) {
+    if (substr($entry, 0, 1) == ".")
+        continue;
+
+    $clazzName = substr($entry, 0, strrpos($entry, "."));
+    if ($tests != null && !in_array($clazzName, $tests))
+        continue;
+
+    include "test/" . $entry;
+    if (!class_exists($clazzName)) {
+        continue;
+    }
+
+    $clazz = new ReflectionClass($clazzName);
+    if ($clazz->isSubclassOf("TestCase")) {
+        echo "Running {$clazz->name}\n";
+        $test = $clazz->newInstance();
+        $test->run();
+    }
+}
+closedir($suite);
+echo "Done\n";
+

Added: attic/forge/wikipediaimport/trunk/source/Category.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/Category.php                       
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/Category.php       2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,116 @@
+<?php
+
+define("STATUS_HOLLOW", 0);
+define("STATUS_LOADED", 1);
+
+class Category {
+    private static $categories = array();
+
+    private $parents;
+    private $name;
+    private $count = 0;
+
+    private function __construct(&$name, &$parents = null) {
+        $this->name = &$name;
+        $this->parents = &$parents;
+    }
+
+    public function getName() {
+        return $this->name;
+    }
+
+    public function getParents() {
+        if ($this->parents == null) {
+            return array();
+        }
+        return $this->parents;
+    }
+
+    public function getStatus() {
+        if ($this->parents == null) {
+            return STATUS_HOLLOW;
+        } else {
+            return STATUS_LOADED;
+        }
+    }
+
+    public function setParents(&$parents) {
+        $this->parents = $parents;
+    }
+
+    public function getCount() {
+        return $this->count;
+    }
+
+    public function addDocument($document) {
+        $this->count++;
+    }
+
+    public static function find() {
+        return new ArrayIterator(self::$categories);
+    }
+
+    public static function load($name) {
+        if (!isset(self::$categories[$name])) {
+            self::$categories[$name] = new Category($name);
+        }
+        return self::$categories[$name];
+    }
+
+    public static function save($file = "categories.json") {
+        $fd = fopen($file, "w");
+        fwrite($fd, "[\n");
+        $first = true;
+        foreach (self::$categories as $category) {
+            if ($first) {
+                $first = false;
+            } else {
+                fwrite($fd, ",\n");
+            }
+            if ($category->getParents() != null) {
+                $parents = array();
+                foreach ($category->getParents() as $parent) {
+                    $parents[] = $parent->name;
+                }
+                fwrite($fd, json_encode(array("name" => $category->name, 
"count" => $category->count, "parents" => $parents)));
+            } else {
+                fwrite($fd, json_encode(array("name" => $category->name, 
"count" => $category->count)));
+            }
+        }
+        fwrite($fd, "\n]\n");
+        fclose($fd);
+    }
+
+    public static function restore($file = "categories.json") {
+        $fd = fopen($file, "r");
+        $lines = 0;
+        $last = false;
+        while (!feof($fd) && !$last) {
+            $linebuffer = trim(fgets($fd));
+            if ($lines == 0) {
+                $lines++;
+                continue;
+            }
+            if (substr($linebuffer, strlen($linebuffer) - 1) != ",") {
+                $last = true;
+                $json = $linebuffer;
+            } else {
+                $json = substr($linebuffer, 0, strlen($linebuffer) - 1);
+            }
+            $obj = json_decode($json);
+            $category = self::load($obj->name);
+            $category->count = $obj->count;
+            if (isset($obj->parents)) {
+                $parents = $obj->parents;
+                $parentCats = array();
+                foreach ($parents as $parent) {
+                    $parentCats[] = self::load($parent);
+                }
+                $category->parents = $parentCats;
+            }
+        }
+        fclose($fd);
+    }
+
+}
+

Added: attic/forge/wikipediaimport/trunk/source/CategoryCycleDetector.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/CategoryCycleDetector.php          
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/CategoryCycleDetector.php  
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,55 @@
+<?
+
+class CategoryCycleDetector {
+    private $visited = array();
+    private $stack = array();
+    private $cycles = array();
+
+    public $debug;
+
+    public function __construct($debug = false) {
+        $this->debug = $debug;
+    }
+
+    public function getCycles($categories) {
+        foreach ($categories as $category) {
+            $this->visit($category);
+        }
+        return $this->cycles;
+    }
+
+    private function visit($category) {
+        $name = $category->getName();
+
+        if ($this->debug)
+            echo json_encode($this->stack) . ",\n";
+        if (in_array($name, $this->stack)) {
+            $cycle = array();
+            $in_cycle = false;
+            foreach ($this->stack as $cat) {
+                if ($in_cycle) {
+                    $cycle[] = $cat;
+                } else if ($cat == $name) {
+                    $in_cycle = true;
+                    $cycle[] = $cat;
+                }
+            }
+            $this->cycles[] = $cycle;
+            return;
+        }
+
+        if (isset($this->visited[$name])) {
+            return;
+        }
+
+        $this->visited[$name] = true;
+        array_push($this->stack, $name);
+        foreach ($category->getParents() as $parent) {
+            if ($this->debug)
+                echo $name . ": ". $parent->getName() . "\n";
+            $this->visit($parent);
+        }
+        array_pop($this->stack);
+    }
+
+}

Added: attic/forge/wikipediaimport/trunk/source/CategoryFilter.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/CategoryFilter.php                 
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/CategoryFilter.php 2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,30 @@
+<?
+
+include_once dirname(__FILE__) . "/IWikiHandler.php";
+
+class CategoryFilter implements IWikiHandler {
+    private $upstream;
+    private $categories;
+
+    public function __construct($upstream, $categories) {
+        $this->upstream = $upstream;
+        $this->categories = array();
+        foreach ($categories as $category) {
+            $this->categories[$category] = true;
+        }
+    }
+
+    public function handle(WikiDocument $document) {
+        if ($document->getCategories() == null)
+            return false;
+        foreach ($document->getCategories() as $category) {
+            if (isset($this->categories[$category])) {
+                return $this->upstream->handle($document);
+            }
+        }
+    }
+
+    public function close() {
+        $this->upstream->close();
+    }
+}

Added: attic/forge/wikipediaimport/trunk/source/CategoryHandler.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/CategoryHandler.php                
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/CategoryHandler.php        
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,40 @@
+<?php
+
+include_once dirname(__FILE__) . "/Category.php";
+
+class CategoryHandler implements IWikiHandler {
+
+    public function handle(WikiDocument $document) {
+        foreach ($document->getCategories() as $category) {
+            Category::load($category)->addDocument($document);
+        }
+
+        if ($this->matchCategory($document, &$categoryName)) {
+            $this->addCategory($categoryName, $document->getCategories());
+        }
+        return true;
+    }
+
+    public function close() {
+    }
+
+    private function matchCategory($document, &$categoryName) {
+        $name = $document->getName();
+        if (preg_match("/^Category:/", $name, &$matches)) {
+            $categoryName = substr($name, 9, strlen($name) - 9);
+            return true;
+        }
+        return false;
+    }
+
+    private function addCategory($name, $parents) {
+        $parentCats = array();
+        foreach($parents as $parent) {
+            $parentCats[] = Category::load($parent);
+        }
+        $category = Category::load($name);
+        $category->setParents($parentCats);
+    }
+
+}
+

Added: attic/forge/wikipediaimport/trunk/source/HippoExtensionWriter.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/HippoExtensionWriter.php           
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/HippoExtensionWriter.php   
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,45 @@
+<?php
+
+include_once "library/jcr/node.php";
+
+class HippoExtensionWriter {
+
+    private $folder;
+
+    public function __construct() {
+        if (!defined("TARGET")) {
+            die("No TARGET defined");
+        }
+
+        $this->folder = new node("hippo:initialize", "hippo:initializefolder");
+
+        $nsItem = $this->folder->addNode("wikipedia", "hippo:initializeitem");
+        $nsItem->setProperty("hippo:sequence", value::newDouble(5000));
+        $nsItem->setProperty("hippo:namespace", 
"http://forge.onehippo.org/wikipedia/nt/1.0";);
+        $nsItem->setProperty("hippo:nodetypesresource", "wikipedia.cnd");
+        $nsItem->setProperty("hippo:contentresource", 
"wikipedia-namespace.xml");
+        $nsItem->setProperty("hippo:contentroot", "/hippo:namespaces");
+
+        $root = $this->folder->addNode("wikipedia-root", 
"hippo:initializeitem");
+        $root->setProperty("hippo:sequence", value::newDouble(20000));
+        $root->setProperty("hippo:contentresource", "wikipedia-data.xml");
+        $root->setProperty("hippo:contentroot", "/");
+        $this->save();
+    }
+
+    public function addContent($name, $resource, $sequenceId, $root = "") {
+        $item = $this->folder->addNode($name, "hippo:initializeitem");
+        $item->setProperty("hippo:sequence", value::newDouble(20001 + 
$sequenceId));
+        $item->setProperty("hippo:contentresource", $resource);
+        $item->setProperty("hippo:contentroot", "/content/documents/wikipedia" 
. $root);
+        $this->save();
+    }
+
+    private function save() {
+        $file = fopen(TARGET . "/src/main/resources/hippoecm-extension.xml", 
"w");
+        fwrite($file, $this->folder->toSystemView());
+        fclose($file);
+    }
+
+}
+

Added: attic/forge/wikipediaimport/trunk/source/IWikiHandler.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/IWikiHandler.php                   
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/IWikiHandler.php   2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,10 @@
+<?php
+
+include_once dirname(__FILE__) . "/WikiDocument.php";
+
+interface IWikiHandler {
+
+    function handle(WikiDocument $document);
+
+    function close();
+}

Added: attic/forge/wikipediaimport/trunk/source/JcrDocument.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/JcrDocument.php                    
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/JcrDocument.php    2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,28 @@
+<?php
+
+include_once "library/jcr/node.php";
+
+class JcrDocument {
+
+    private $handle;
+
+    public function __construct(WikiDocument $document) {
+        $this->handle = new node($document->getTitle(), "hippo:handle");
+        $this->handle->addMixin("hippo:hardhandle");
+
+        $article = $this->handle->addNode($document->getTitle(), 
"wikipedia:article");
+        $article->addMixin("hippo:harddocument");
+        $article->setProperty("wikipedia:title", $document->getTitle());
+        $article->setProperty("wikipedia:html", $document->getHTML());
+        $article->setProperty("hippostd:state", "unpublished");
+        $article->setProperty("hippostd:stateSummary", "changed");
+        $article->setProperty("hippostd:tags", $document->getCategories());
+    }
+
+    public function getNode() {
+        return $this->handle;
+    }
+
+}
+
+

Added: attic/forge/wikipediaimport/trunk/source/JcrDocumentWriter.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/JcrDocumentWriter.php              
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/JcrDocumentWriter.php      
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,55 @@
+<?php
+
+include_once dirname(__FILE__) . "/HippoExtensionWriter.php";
+include_once dirname(__FILE__) . "/JcrDocument.php";
+include_once dirname(__FILE__) . "/JcrWikiFolder.php";
+
+class JcrDocumentWriter implements IWikiHandler {
+    private $folder;
+    private $files = 1;
+    private $pages = 0;
+    private $extensionWriter;
+
+    public function __construct() {
+        if (!defined("TARGET")) {
+            die("No TARGET defined");
+        }
+        $this->extensionWriter = new HippoExtensionWriter();
+        $this->newFolder();
+    }
+
+    public function handle(WikiDocument $document) {
+        $jcrDocument = new JcrDocument($document);
+               $article = $this->folder->addDocument($jcrDocument);
+
+        $pages++;
+        if ($pages == 500){
+            $this->nextFolder();
+            $pages = 0;
+        }
+        return true;
+    }
+
+    public function close() {
+        $this->closeFolder();
+    }
+
+    private function newFolder() {
+        $name = "wikipedia-content-{$this->files}";
+        $this->folder = new JcrWikiFolder("wikipedia-" . $this->files, TARGET 
. "/src/main/resources/" . $name . ".xml");
+        $this->extensionWriter->addContent($name, $name . ".xml", 
$this->files);
+    }
+
+    private function closeFolder(){
+        $this->folder->close();
+        $this->folder = null;
+    }
+
+    private function nextFolder(){
+        $this->closeFile();
+        $this->files++;
+        $this-->newFile();
+    }
+
+}
+

Added: attic/forge/wikipediaimport/trunk/source/JcrHashFolder.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/JcrHashFolder.php                  
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/JcrHashFolder.php  2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,50 @@
+<?php
+
+include_once "library/jcr/node.php";
+
+/**
+ * A hash folder contains containers for content folders.
+ */
+class JcrHashFolder {
+
+    private $fileName;
+    private $folder;
+    private $children = array();
+
+    public function __construct($name, $fileName = null) {
+        $this->fileName = $fileName;
+
+        $this->folder = new node($name, "hippostd:folder");
+        $this->folder->addMixin("hippo:harddocument");
+        $this->folder->setProperty("hippostd:foldertype", array("new-folder"));
+    }
+
+    public function getFolder($hash) {
+        return $this->children[$hash];
+    }
+
+    public function addFolder(JcrWikiFolder $folder) {
+        $this->folder->addChild($folder->getNode());
+        $this->children[$folder->getNode()->getName()] = $folder;
+    }
+
+    public function save() {
+        if ($this->fileName == null) {
+            die ("No file name specified for JcrHashFolder");
+        }
+        $file = fopen($this->fileName, "w");
+        fwrite($file, $this->folder->toSystemView());
+        fclose($file);
+    }
+
+    public function close() {
+        if ($this->fileName != null) {
+            $this->save();
+        }
+        foreach ($this->children as $name => $folder) {
+            $folder->close();
+        }
+    }
+
+}
+

Added: attic/forge/wikipediaimport/trunk/source/JcrHashFolderWriter.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/JcrHashFolderWriter.php            
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/JcrHashFolderWriter.php    
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,57 @@
+<?php
+
+include_once dirname(__FILE__) . "/IWikiHandler.php";
+include_once dirname(__FILE__) . "/HippoExtensionWriter.php";
+include_once dirname(__FILE__) . "/JcrDocument.php";
+include_once dirname(__FILE__) . "/JcrWikiFolder.php";
+include_once dirname(__FILE__) . "/JcrHashFolder.php";
+
+class JcrHashFolderWriter implements IWikiHandler {
+    private $folders;
+    private $files = 1;
+    private $extensionWriter;
+
+    public function __construct() {
+        if (!defined("TARGET")) {
+            die("No TARGET defined");
+        }
+
+        $this->extensionWriter = new HippoExtensionWriter();
+        $this->folders = array();
+    }
+
+    public function handle(WikiDocument $document) {
+        $md5 = md5($document->getTitle());
+
+        $firstLevel = substr($md5, 0, 2);
+        if (!isset($this->folders[$firstLevel])) {
+            $name = "wikipedia-content-{$firstLevel}";
+            $this->folders[$firstLevel] = new JcrHashFolder($firstLevel,
+                TARGET . "/src/main/resources/" . $name . ".xml");
+            $this->extensionWriter->addContent($name, $name . ".xml", 
++$this->files);
+        }
+        $container = $this->folders[$firstLevel];
+
+        $secondLevel = substr($md5, 2, 2);
+        $folder = $container->getFolder($secondLevel);
+        if ($folder == null) {
+            $name = "wikipedia-content-{$firstLevel}_{$secondLevel}";
+            $folder = new JcrWikiFolder($secondLevel);
+            $container->addFolder($folder);
+        }
+
+        $jcrDoc = new JcrDocument($document);
+               $folder->addDocument($jcrDoc);
+        $container->save();
+
+        return true;
+    }
+
+    public function close() {
+        foreach ($this->folders as $name => $container) {
+            $container->close();
+        }
+    }
+
+}
+

Added: attic/forge/wikipediaimport/trunk/source/JcrWikiFolder.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/JcrWikiFolder.php                  
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/JcrWikiFolder.php  2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,45 @@
+<?php
+
+include_once "library/jcr/node.php";
+
+class JcrWikiFolder {
+
+    private $fileName;
+    private $folder;
+
+    public function __construct($name, $fileName = null) {
+        $this->fileName = $fileName;
+
+        $this->folder = new node($name, "hippostd:folder");
+        $this->folder->addMixin("hippo:harddocument");
+        $this->folder->setProperty("hippostd:foldertype", 
array("new-document", "new-folder"));
+    }
+
+    public function getNode() {
+        return $this->folder;
+    }
+
+    public function addDocument(JcrDocument $document) {
+        $handle = $this->folder->addChild($document->getNode());
+        if ($this->fileName != null) {
+            $this->save();
+        }
+        return $handle;
+    }
+
+    public function close() {
+        if ($this->fileName != null) {
+            $this->save();
+        }
+    }
+
+    public function save() {
+        if ($this->fileName == null) {
+            die("No file name specified for JcrWikiFolder");
+        }
+        $file = fopen($this->fileName, "w");
+        fwrite($file, $this->folder->toSystemView());
+        fclose($file);
+    }
+}
+

Added: attic/forge/wikipediaimport/trunk/source/MavenProject.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/MavenProject.php                   
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/MavenProject.php   2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,24 @@
+<?php
+
+class MavenProject {
+
+    private $target;
+
+    public function __construct($target = "target") {
+        if (!file_exists($target)) {
+            die("path $target does not exist");
+        }
+        $this->target = $target;
+    }
+
+    public function create() {
+        if (!file_exists($this->target . "/src/main/resources")) {
+            mkdir($this->target . "/src/main/resources", 0777, true);
+        }
+        copy(dirname(__FILE__) . "/../resources/pom.xml", $this->target . 
"/pom.xml");
+        foreach (array("wikipedia.cnd", "wikipedia-data.xml", 
"wikipedia-namespace.xml") as $file) {
+            copy(dirname(__FILE__) . "/../resources/" . $file, $this->target . 
"/src/main/resources/" . $file);
+        }
+    }
+
+}

Added: attic/forge/wikipediaimport/trunk/source/OrderedList.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/OrderedList.php                    
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/OrderedList.php    2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,64 @@
+<?
+
+class ListEntry {
+    var $entry;
+    var $next;
+
+    function __construct($entry, $next) {
+        $this->entry = $entry;
+        $this->next = $next;
+    }
+}
+
+class OrderedList {
+    private $start = null;
+    private $count = 0;
+    private $max;
+
+    function __construct($max = 1000) {
+        $this->max = $max;
+    }
+
+    function compare($a, $b) {
+        if ($a->getCount() > $b->getCount()) {
+            return 1;
+        } else if ($a->getCount() < $b->getCount()) {
+            return -1;
+        } else {
+            return 0;
+        }
+    }
+
+    function add($entry) {
+        if ($this->start == null) {
+            $this->start = new ListEntry($entry, null);
+            $this->count = 1;
+        } else {
+            // if entry has lower count than first entry, do nothing
+            if ($this->compare($entry, $this->start->entry) < 0) {
+                if ($this->count < $this->max) {
+                    $this->start = new ListEntry($entry, $this->start);
+                    $this->count++;
+                }
+                return;
+            }
+            // traverse while entry has higher count
+            $cursor = $this->start;
+            while ($cursor->next != null && $this->compare($entry, 
$cursor->next->entry) > 0) {
+                $cursor = $cursor->next;
+            }
+            $cursor->next = new ListEntry($entry, $cursor->next);
+            $this->count++;
+            if ($this->count > $this->max) {
+                $this->start = $this->start->next;
+                $this->count--;
+            }
+        }
+    }
+
+    function getHead() {
+        return $this->start;
+    }
+}
+
+

Added: attic/forge/wikipediaimport/trunk/source/TestCase.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/TestCase.php                       
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/TestCase.php       2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,44 @@
+<?
+
+class TestCase {
+
+    private $errors = array();
+    private $running = false;
+
+    protected function setUp() {
+        $this->running = true;
+    }
+
+    protected function tearDown() {
+        $this->running = false;
+    }
+
+    public final function getErrors() {
+        return $this->errors;
+    }
+
+    public function run() {
+        $reflection = new ReflectionClass(get_class($this));
+        foreach ($reflection->getMethods(ReflectionMethod::IS_PUBLIC) as 
$method) {
+            if (substr($method->name, 0, 4) == "test") {
+                $this->setUp();
+                if (!$this->running) {
+                    throw new Exception("A class in the hierarchy of " . 
get_class($this) .
+                        " did not call parent::setUp in their override");
+                }
+                try {
+                    $method->invoke($this);
+                } catch (Exception $e) {
+                    echo $e->getTraceAsString() . "\n";
+                    $this->errors[] = $method->name;
+                }
+                $this->tearDown();
+                if ($this->running) {
+                    throw new Exception("A class in the hierarchy of " . 
get_class($this) .
+                        " did not call parent::tearDown in their override");
+                }
+            }
+        }
+    }
+
+}

Added: attic/forge/wikipediaimport/trunk/source/WikiDocument.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/WikiDocument.php                   
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/WikiDocument.php   2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,44 @@
+<?php
+
+require_once 'library/wiki/parseRaw.inc.php';
+
+class WikiDocument {
+    private $name;
+    private $text;
+
+    public function __construct($name, $text) {
+        $this->name = $name;
+        $this->text = $text;
+    }
+
+    public function getName() {
+        return $this->name;
+    }
+
+    public function getTitle() {
+        return str_replace(array('/',"'", ':', '"', '*'), '', $this->name);
+    }
+
+    public function getText() {
+        return $this->text;
+    }
+
+    public function getHTML() {
+        $markup = simpleText(parseRaw('', $this->text));
+        $matches = array();
+        preg_match("/.*}}/", $markup, $matches);
+        $preface = strpos($markup, $matches[0]) + strlen($matches[0]);
+        return trim(substr($markup, $preface));
+    }
+
+    public function getCategories() {
+        $matches = array();
+        $result = array();
+        preg_match_all("/\[\[Category:[^|\]]+\]\]/", $this->text, $matches);
+        foreach ($matches[0] as $match) {
+            $result[] = substr($match, 11, (strlen($match) - 13));
+        }
+        return $result;
+    }
+
+}

Added: attic/forge/wikipediaimport/trunk/source/WikiParser.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/WikiParser.php                     
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/WikiParser.php     2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,83 @@
+<?php
+
+include_once dirname(__FILE__) . "/IWikiHandler.php";
+include_once dirname(__FILE__) . "/WikiDocument.php";
+
+class WikiParser {
+    private $handler;
+
+    private $fileName;
+    private $limit = 1000;
+    private $showProgress = true;
+
+    function __construct(IWikiHandler $handler) {
+        $this->handler = $handler;
+    }
+
+    function setFile($str) {
+        $this->fileName = $str;
+    }
+
+    function setLimit($limit) {
+        $this->limit = $limit;
+    }
+
+    function setShowProgress($show) {
+        $this->showProgress = $show;
+    }
+
+    function parse() {
+        $file = fopen($this->fileName, "r");
+        $articles = 0;
+
+        $inPage = false;
+        $pagebuffer = '';
+        $lines = 0;
+        while (!feof($file) && ($this->limit == 0 || $articles < 
$this->limit)) {
+            // read a line
+            $linebuffer = fgets($file);
+            // check if it is a beginning of a page
+            if (strpos($linebuffer, '<page>') !== false){
+                $inPage = true;
+            }
+            // in a page write the line to the buffer
+            if ($inPage){
+                $pagebuffer .= $linebuffer;
+                $lines++;
+            }
+            // check for end of a page
+            if (strpos($linebuffer, '</page>') !== false){
+                $inPage = false;
+            }
+            // if end of a page transform page xml
+            if (!$inPage && !empty($pagebuffer)){
+                $xmlDom = new DOMDocument();
+                $xmlDom->loadXML($pagebuffer);
+                $document = new 
WikiDocument($xmlDom->getElementsByTagName('title')->item(0)->nodeValue,
+                                             
$xmlDom->getElementsByTagName('text')->item(0)->nodeValue);
+                if ($this->handler->handle($document)) {
+                    $articles++;
+                    if ($this->showProgress && ($articles % 100 == 0)) {
+                        echo "$articles articles parsed ";
+                        if ($this->limit > 0) {
+                            echo "[" . (int) (($articles / $this->limit) * 
100) . "%]";
+                        }
+                        echo "\n";
+                    }
+                }
+                $lines = 0;
+                $pagebuffer = '';
+                unset($xmlDom);
+            }
+        }
+
+        $this->handler->close();
+
+        // clean
+        fclose($file);
+        if ($this->showProgress) {
+            echo "Done parsing [100%]\n";
+        }
+    }
+}
+

Added: attic/forge/wikipediaimport/trunk/source/WriterTestCase.php
===================================================================
--- attic/forge/wikipediaimport/trunk/source/WriterTestCase.php                 
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/source/WriterTestCase.php 2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,38 @@
+<?
+
+include_once dirname(__FILE__) . "/TestCase.php";
+
+class WriterTestCase extends TestCase {
+
+    protected function setUp() {
+        parent::setUp();
+        define("TARGET", "/tmp/wikipedia-test");
+        @mkdir(TARGET . "/src/main/resources", 0777, true);
+    }
+
+    protected function tearDown() {
+        if (defined("TARGET")) {
+            $this->delete(TARGET);
+        }
+        parent::tearDown();
+    }
+
+    private function delete($file) {
+        if (is_dir($file)) {
+            $handle = opendir($file);
+            while ($entry = readdir($handle)) {
+                if ($entry == "." || $entry == "..") {
+                    continue;
+                }
+                $this->delete($file . "/" . $entry);
+            }
+            closedir($handle);
+            rmdir($file);
+        } else {
+            unlink($file);
+        }
+    }
+
+}
+
+

Added: attic/forge/wikipediaimport/trunk/test/CategoryCycleDetectorTest.php
===================================================================
--- attic/forge/wikipediaimport/trunk/test/CategoryCycleDetectorTest.php        
                        (rev 0)
+++ attic/forge/wikipediaimport/trunk/test/CategoryCycleDetectorTest.php        
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,63 @@
+<?
+
+include_once "source/CategoryCycleDetector.php";
+
+class TestCategory {
+    private $name;
+    private $parents;
+
+    public function __construct($name) {
+        $this->name = $name;
+    }
+
+    public function getName() {
+        return $this->name;
+    }
+
+    public function getParents() {
+        return $this->parents;
+    }
+
+    function setParents($parents) {
+        $this->parents = $parents;
+    }
+}
+
+class CategoryCycleDetectorTest extends TestCase {
+
+    function testSimpleLoop() {
+        $a = new TestCategory("a");
+        $b = new TestCategory("b");
+        $a->setParents(array($b));
+        $b->setParents(array($a));
+
+        $detector = new CategoryCycleDetector();
+        $cycles = $detector->getCycles(array($a, $b));
+        assert(count($cycles) == 1);
+        $cycle = $cycles[0];
+        assert($cycle[0] == "a");
+        assert($cycle[1] == "b");
+    }
+
+    function testMultiLoop() {
+        $a = new TestCategory("a");
+        $b = new TestCategory("b");
+        $c = new TestCategory("c");
+        $a->setParents(array($b));
+        $b->setParents(array($c));
+        $c->setParents(array($a, $b));
+
+        $detector = new CategoryCycleDetector();
+        $cycles = $detector->getCycles(array($a, $b, $c));
+        assert(count($cycles) == 2);
+
+        $cycle = $cycles[0];
+        assert($cycle[0] == "a");
+        assert($cycle[1] == "b");
+        assert($cycle[2] == "c");
+
+        $cycle = $cycles[1];
+        assert($cycle[0] == "b");
+        assert($cycle[1] == "c");
+    }
+}

Added: attic/forge/wikipediaimport/trunk/test/CategoryFilterTest.php
===================================================================
--- attic/forge/wikipediaimport/trunk/test/CategoryFilterTest.php               
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/test/CategoryFilterTest.php       
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,47 @@
+<?
+
+include_once "source/CategoryFilter.php";
+
+class TestHandler implements IWikiHandler {
+
+    public $handled = false;
+    public $closed = false;
+
+    public function handle(WikiDocument $document) {
+        $this->handled = true;
+        return true;
+    }
+
+    public function close() {
+        $this->closed = true;
+    }
+}
+
+class CategoryFilterTest extends TestCase {
+
+    public function testCategoryIsHandled() {
+        $handler = new TestHandler();
+        $filter = new CategoryFilter($handler, array("TestCategory"));
+        $document = new WikiDocument("test-document", 
"[[Category:TestCategory]]");
+        $result = $filter->handle($document);
+        assert ($result);
+        assert ($handler->handled);
+    }
+
+    public function testOtherIsFiltered() {
+        $handler = new TestHandler();
+        $filter = new CategoryFilter($handler, array("TestCategory"));
+        $document = new WikiDocument("test-document", "test");
+        $result = $filter->handle($document);
+        assert (!$result);
+        assert (!$handler->handled);
+    }
+
+    public function testClose() {
+        $handler = new TestHandler();
+        $filter = new CategoryFilter($handler, array("TestCategory"));
+        $filter->close();
+        assert($handler->closed);
+    }
+
+}

Added: attic/forge/wikipediaimport/trunk/test/CategoryTest.php
===================================================================
--- attic/forge/wikipediaimport/trunk/test/CategoryTest.php                     
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/test/CategoryTest.php     2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,38 @@
+<?
+
+include dirname(__FILE__) . "/../source/Category.php";
+
+class CategoryTest extends TestCase {
+
+    protected function tearDown() {
+        parent::tearDown();
+        unlink("category_test.json");
+    }
+
+    public function testReload() {
+        $category = Category::load("xxx");
+        Category::save("category_test.json");
+
+        $contents = file_get_contents("category_test.json");
+        $obj = json_decode($contents);
+        assert(is_array($obj));
+        assert($obj[0]->name == "xxx");
+    }
+
+    public function testRestore() {
+        file_put_contents("category_test.json", "[\n{\"name\": \"yyy\", 
\"count\": 12}\n]");
+        Category::restore("category_test.json");
+        $category = Category::load("yyy");
+        assert($category->getCount() == 12);
+
+        Category::save("category_test.json");
+        $cmp = <<<HEREDOC
+[
+{"name":"xxx","count":0},
+{"name":"yyy","count":12}
+]
+
+HEREDOC;
+        assert($cmp == file_get_contents("category_test.json"));
+    }
+}

Added: attic/forge/wikipediaimport/trunk/test/HippoExtensionWriterTest.php
===================================================================
--- attic/forge/wikipediaimport/trunk/test/HippoExtensionWriterTest.php         
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/test/HippoExtensionWriterTest.php 
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,19 @@
+<?
+
+include_once dirname(__FILE__) . "/../source/WriterTestCase.php";
+include_once dirname(__FILE__) . "/../source/HippoExtensionWriter.php";
+include_once dirname(__FILE__) . "/../library/jcr/parser.php";
+
+class HippoExtensionWriterTest extends WriterTestCase {
+
+    public function testSaveAfterAddContent() {
+        $writer = new HippoExtensionWriter();
+        $writer->addContent("test", "content.xml", 1);
+
+        $parser = new JcrXmlParser();
+        $file = TARGET . "/src/main/resources/hippoecm-extension.xml";
+        $node = $parser->parse(file_get_contents($file));
+        $children = $node->getNodes("test");
+        assert (count($children) == 1);
+    }
+}

Added: attic/forge/wikipediaimport/trunk/test/JcrDocumentWriterTest.php
===================================================================
--- attic/forge/wikipediaimport/trunk/test/JcrDocumentWriterTest.php            
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/test/JcrDocumentWriterTest.php    
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,39 @@
+<?php
+
+include_once "source/WriterTestCase.php";
+
+include_once "source/WikiParser.php";
+include_once "source/JcrDocumentWriter.php";
+include_once "library/jcr/parser.php";
+
+class JcrDocumentWriterTest extends WriterTestCase {
+
+    function testWriter() {
+        $writer = new JcrDocumentWriter();
+        $document = new WikiDocument("test-title", "test-content");
+        $writer->handle($document);
+        $writer->close();
+
+        $test = file_get_contents(TARGET . 
"/src/main/resources/wikipedia-content-1.xml");
+        $parser = new JcrXmlParser();
+        $node = $parser->parse($test);
+        assert($node->getName() == "wikipedia-1");
+        assert($node->getPrimaryNodeType() == "hippostd:folder");
+        assert("hippo:harddocument" == 
$node->getProperty("jcr:mixinTypes")->getValue()->getString());
+
+        $children = $node->getNodes();
+        assert (count($children) == 1);
+        $child = $children[0];
+        assert ($child->getName() == "test-title");
+        assert ($child->getPrimaryNodeType() == "hippo:handle");
+
+        // todo: verify title and content
+        $variants = $child->getNodes();
+        assert (count($variants) == 1);
+        $document = $variants[0];
+        assert ($document->getName() == "test-title");
+        assert 
($document->getProperty("wikipedia:title")->getValue()->getString() == 
"test-title");
+        assert 
($document->getProperty("wikipedia:html")->getValue()->getString() == 
"test-content");
+    }
+
+}

Added: attic/forge/wikipediaimport/trunk/test/JcrHashFolderWriterTest.php
===================================================================
--- attic/forge/wikipediaimport/trunk/test/JcrHashFolderWriterTest.php          
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/test/JcrHashFolderWriterTest.php  
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,64 @@
+<?
+
+include_once "library/jcr/parser.php";
+include_once "source/WriterTestCase.php";
+include_once "source/JcrHashFolderWriter.php";
+include_once "source/WikiDocument.php";
+
+class JcrHashFolderWriterTest extends WriterTestCase {
+
+    const DOCUMENT_NAME = "test-document";
+
+    public function testHashFolderIsCreated() {
+        $writer = new JcrHashFolderWriter();
+        $writer->handle(new WikiDocument(self::DOCUMENT_NAME, "content"));
+        $writer->close();
+
+        $hash = md5(self::DOCUMENT_NAME);
+        $name = "wikipedia-content-" . substr($hash, 0, 2);
+        $file = TARGET . "/src/main/resources/" . $name . ".xml";
+        assert(file_exists($file));
+
+        $parser = new JcrXmlParser();
+        $node = $parser->parse(file_get_contents($file));
+        assert($node->getName() == substr($hash, 0, 2));
+        assert($node->getPrimaryNodeType() == "hippostd:folder");
+
+        $children = $node->getNodes(substr($hash, 2, 2)); 
+        assert (count($children) == 1);
+
+        $file = TARGET . "/src/main/resources/hippoecm-extension.xml";
+        $node = $parser->parse(file_get_contents($file));
+        $children = $node->getNodes($name);
+        assert (count($children) == 1);
+    }
+
+    public function testSubFolderIsCreated() {
+        $writer = new JcrHashFolderWriter();
+        $writer->handle(new WikiDocument(self::DOCUMENT_NAME, "content"));
+        $writer->close();
+
+        $hash = md5(self::DOCUMENT_NAME);
+        $name = "wikipedia-content-" . substr($hash, 0, 2);
+        $file = TARGET . "/src/main/resources/" . $name . ".xml";
+        assert(file_exists($file));
+
+        $parser = new JcrXmlParser();
+        $node = $parser->parse(file_get_contents($file));
+        assert($node->getName() == substr($hash, 0, 2));
+        assert($node->getPrimaryNodeType() == "hippostd:folder");
+
+        $children = $node->getNodes(substr($hash, 2, 2));
+        assert (count($children) == 1);
+        $node = $children[0];
+
+        $children = $node->getNodes(self::DOCUMENT_NAME);
+        assert (count($children) == 1);
+
+        $file = TARGET . "/src/main/resources/hippoecm-extension.xml";
+        $node = $parser->parse(file_get_contents($file));
+        $children = $node->getNodes($name);
+        assert (count($children) == 1);
+    }
+
+}

Added: attic/forge/wikipediaimport/trunk/test/JcrXmlParserTest.php
===================================================================
--- attic/forge/wikipediaimport/trunk/test/JcrXmlParserTest.php                 
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/test/JcrXmlParserTest.php 2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,42 @@
+<?
+
+include_once "source/TestCase.php";
+include_once "library/jcr/parser.php";
+
+class JcrXmlParserTest extends TestCase {
+
+    public function testParser() {
+        $parser = new JcrXmlParser();
+        $node = $parser->parse(<<<HEREDOC
+<?xml version="1.0"?>
+<sv:node sv:name="test-node" xmlns:sv="http://www.jcp.org/jcr/sv/1.0";>
+  <sv:property sv:name="jcr:primaryType">
+    <sv:value>jcr:test</sv:value>
+  </sv:property>
+  <sv:node sv:name="child-node">
+    <sv:property sv:name="jcr:primaryType">
+      <sv:value>jcr:child</sv:value>
+    </sv:property>
+    <sv:property sv:name="test-property">
+      <sv:value>test</sv:value>
+    </sv:property>
+    <sv:property sv:name="test-multi">
+      <sv:value>aap</sv:value>
+      <sv:value>noot</sv:value>
+    </sv:property>
+  </sv:node>
+</sv:node>
+HEREDOC
+        );
+        assert ($node->getProperty("jcr:primaryType")->getValue()->getString() 
== "jcr:test");
+        assert (count($node->getNodes()) == 1);
+        $children = $node->getNodes();
+        $child = $children[0];
+        assert 
($child->getProperty("jcr:primaryType")->getValue()->getString() == 
"jcr:child");
+        assert ($child->getProperty("test-property")->getValue()->getString() 
== "test");
+        $values = $child->getProperty("test-multi")->getValues();
+        assert ($values[0]->getString() == "aap");
+        assert ($values[1]->getString() == "noot");
+    }
+}
+

Added: attic/forge/wikipediaimport/trunk/test/MavenProjectTest.php
===================================================================
--- attic/forge/wikipediaimport/trunk/test/MavenProjectTest.php                 
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/test/MavenProjectTest.php 2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,15 @@
+<?php
+
+include_once dirname(__FILE__) . "/../source/WriterTestCase.php";
+include_once dirname(__FILE__) . "/../source/MavenProject.php";
+
+class MavenProjectTest extends WriterTestCase {
+
+    public function testResourcesAreCopied() {
+        $project = new MavenProject(TARGET);
+        $project->create();
+
+        assert(file_exists(TARGET . "/pom.xml"));
+        assert(file_exists(TARGET . "/src/main/resources/wikipedia.cnd"));
+    }
+}

Added: attic/forge/wikipediaimport/trunk/test/OrderedListTest.php
===================================================================
--- attic/forge/wikipediaimport/trunk/test/OrderedListTest.php                  
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/test/OrderedListTest.php  2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,58 @@
+<?
+
+include_once "source/TestCase.php";
+include_once "source/OrderedList.php";
+
+class entry {
+    var $count;
+
+    function __construct($count) {
+        $this->count = $count;
+    }
+
+    function getCount() {
+        return $this->count;
+    }
+}
+
+class OrderedListTest extends TestCase {
+
+    function testSimpleOrdering() {
+        $list = new OrderedList();
+        $list->add(new entry(2));
+        $list->add(new entry(1));
+
+        $le = $list->getHead();
+        assert($le->entry->count == 1);
+        $le = $le->next;
+        assert($le->entry->count == 2);
+        assert($le->next == null);
+    }
+
+    function testDoublures() {
+        $list = new OrderedList();
+        $list->add(new entry(1));
+        $list->add(new entry(1));
+
+        $le = $list->getHead();
+        assert($le->entry->count == 1);
+        $le = $le->next;
+        assert($le->entry->count == 1);
+        assert($le->next == null);
+    }
+
+    function testMaximum() {
+        $list = new OrderedList(2);
+        $list->add(new entry(1));
+        $list->add(new entry(2));
+        $list->add(new entry(3));
+
+        $le = $list->getHead();
+        assert($le->entry->count == 2);
+        $le = $le->next;
+        assert($le->entry->count == 3);
+        assert($le->next == null);
+    }
+
+}
+

Added: attic/forge/wikipediaimport/trunk/test/TestCaseTest.php
===================================================================
--- attic/forge/wikipediaimport/trunk/test/TestCaseTest.php                     
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/test/TestCaseTest.php     2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,80 @@
+<?
+
+include_once "source/TestCase.php";
+
+class TestClass extends TestCase {
+    public $hasRun = false;
+    public $setup = false;
+    public $teardown = false;
+
+    protected function setUp() {
+        parent::setUp();
+        $this->setup = true;
+    }
+
+    protected function tearDown() {
+        $this->teardown = true;
+        parent::tearDown();
+    }
+
+    public function testHello() {
+        $this->hasRun = true;
+    }
+
+}
+
+class TestException extends TestCase {
+    public $teardown = false;
+
+    protected function tearDown() {
+        $this->teardown = true;
+        parent::tearDown();
+    }
+
+    public function testThrow() {
+        throw new Exception("Something went horribly wrong!");
+    }
+}
+
+class TestTeardown extends TestCase {
+    protected function tearDown() {
+    }
+
+    public function testTeardown() {
+    }
+}
+
+class TestCaseTest extends TestCase {
+
+    public function testSubclass() {
+        $test = new TestClass();
+        $test->run();
+        assert ($test->setup);
+        assert ($test->hasRun);
+        assert ($test->teardown);
+    }
+
+    public function testTeardownRunsAfterException() {
+        $test = new TestException();
+        @ob_start();
+        $test->run();
+        ob_end_clean();
+        assert ($test->teardown);
+        assert (in_array("testThrow", $test->getErrors()));
+    }
+
+    public function testTeardownMustCallSuper() {
+        $test = new TestTeardown();
+        $caught = false;
+        try {
+            $test->run();
+        } catch (Exception $e) {
+            // this is OK
+            $caught = true;
+        }
+        if (!$caught) {
+            throw new Exception("Teardown did not have to call parent");
+        }
+    }
+
+}

Added: attic/forge/wikipediaimport/trunk/test/WikiDocumentTest.php
===================================================================
--- attic/forge/wikipediaimport/trunk/test/WikiDocumentTest.php                 
        (rev 0)
+++ attic/forge/wikipediaimport/trunk/test/WikiDocumentTest.php 2017-03-02 
11:16:37 UTC (rev 58746)
@@ -0,0 +1,20 @@
+<?php
+
+include_once dirname(__FILE__) . "/../source/TestCase.php";
+include_once dirname(__FILE__) . "/../source/WikiDocument.php";
+
+class WikiDocumentTest extends TestCase {
+
+    public function testMultiCategory() {
+        $document = new WikiDocument("xyz", "[[Category:a:b]] [[Category:c]]");
+        assert(count($document->getCategories()) == 2);
+        $categories = $document->getCategories();
+        assert($categories[0] == "a:b");
+    }
+
+    public function testBasic() {
+        $document = new WikiDocument("test-title", "test-content");
+        assert ("test-title" == $document->getTitle());
+        assert ("test-content" == $document->getHtml());
+    }
+}

Added: attic/forge/wikipediaimport/trunk/test/WriterTestCaseTest.php
===================================================================
--- attic/forge/wikipediaimport/trunk/test/WriterTestCaseTest.php               
                (rev 0)
+++ attic/forge/wikipediaimport/trunk/test/WriterTestCaseTest.php       
2017-03-02 11:16:37 UTC (rev 58746)
@@ -0,0 +1,22 @@
+<?
+
+include_once "source/WriterTestCase.php";
+
+class WriterTestClass extends WriterTestCase {
+    public $defined = false;
+
+    public function testDefined() {
+        if (defined("TARGET")) {
+            $this->defined = true;
+        }
+    }
+}
+
+class WriterTestCaseTest extends TestCase {
+
+    public function testTargetIsDefined() {
+        $test = new WriterTestClass();
+        $test->run();
+        assert ($test->defined);
+    }
+}

_______________________________________________
Hippocms-svn mailing list
Hippocms-svn@lists.onehippo.org
https://lists.onehippo.org/mailman/listinfo/hippocms-svn

Reply via email to