Added: 
incubator/any23/trunk/test-resources/src/test/resources/html/default-language.html
URL: 
http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/html/default-language.html?rev=1371537&view=auto
==============================================================================
--- 
incubator/any23/trunk/test-resources/src/test/resources/html/default-language.html
 (added)
+++ 
incubator/any23/trunk/test-resources/src/test/resources/html/default-language.html
 Fri Aug 10 00:40:13 2012
@@ -0,0 +1,124 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN"
+  "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd";>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" version="XHTML+RDFa 
1.0" dir="ltr"
+  xmlns:admin="http://webns.net/mvcb/";
+  xmlns:content="http://purl.org/rss/1.0/modules/content/";
+  xmlns:dc="http://purl.org/dc/terms/";
+  xmlns:foaf="http://xmlns.com/foaf/0.1/";
+  xmlns:owl="http://www.w3.org/2002/07/owl#";
+  xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+  xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#";
+  xmlns:rss="http://purl.org/rss/1.0/";
+  xmlns:tags="http://www.holygoat.co.uk/owl/redwood/0.1/tags/";
+  xmlns:sioc="http://rdfs.org/sioc/ns#";
+  xmlns:sioct="http://rdfs.org/sioc/types#";
+  xmlns:ctag="http://commontag.org/ns#";
+  xmlns:skos="http://www.w3.org/2004/02/skos/core#";
+  xmlns:xsd="http://www.w3.org/2001/XMLSchema#";>
+
+<head profile="http://www.w3.org/1999/xhtml/vocab";>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<link rel="shortcut icon" 
href="http://drupalrdf.openspring.net/misc/favicon.ico"; 
type="image/vnd.microsoft.icon" />
+<meta name="Generator" content="Drupal 7 (http://drupal.org)" />
+  <title>Welcome to mydomain.net</title>
+  <style type="text/css" media="all">@import 
url("http://drupalrdf.openspring.net/modules/system/system.css?f0000000000000000000";);
+@import 
url("http://drupalrdf.openspring.net/modules/system/system-behavior.css?f0000000000000000000";);
+@import 
url("http://drupalrdf.openspring.net/modules/system/system-menus.css?f0000000000000000000";);
+@import 
url("http://drupalrdf.openspring.net/modules/system/system-messages.css?f0000000000000000000";);
+@import 
url("http://drupalrdf.openspring.net/modules/node/node.css?f0000000000000000000";);
+@import 
url("http://drupalrdf.openspring.net/modules/user/user.css?f0000000000000000000";);
+@import 
url("http://drupalrdf.openspring.net/themes/garland/style.css?f0000000000000000000";);</style>
+<style type="text/css" media="print">@import 
url("http://drupalrdf.openspring.net/themes/garland/print.css?f0000000000000000000";);</style>
+
+<!--[if lt IE 7]>
+<link type="text/css" rel="stylesheet" 
href="http://drupalrdf.openspring.net/themes/garland/fix-ie.css?f0000000000000000000";
 media="all" />
+<![endif]-->
+  </head>
+<body class="html front not-logged-in one-sidebar sidebar-first page-node 
fluid-width" >
+  <div id="skip-link">
+    <a href="#main-content">Skip to main content</a>
+  </div>
+  <div class="region region-page-top">
+  </div>
+
+  <div id="wrapper">
+    <div id="container" class="clearfix">
+
+      <div id="header">
+        <div id="logo-floater">
+                              <div id="branding"><strong><a href="/" 
title="drupalrdf.openspring.net ">
+                          <img 
src="http://drupalrdf.openspring.net/themes/garland/logo.png"; 
alt="drupalrdf.openspring.net " id="logo" />
+                        <span>drupalrdf.openspring.net</span>            
</a></strong></div>
+                          </div>
+
+                      </div> <!-- /#header -->
+
+              <div id="sidebar-first" class="sidebar">
+          <div class="region region-sidebar-first">
+  <div id="block-user-login" class="block block-user clearfix">
+
+    <h2 class="title">User login</h2>
+
+  <div class="content"><form action="/node?destination=node"  
accept-charset="UTF-8" method="post" id="user-login-form">
+<div><div class="form-item form-type-textfield form-item-name">
+  <label for="edit-name">Username <span class="form-required" title="This 
field is required.">*</span></label>
+ <input type="text" maxlength="60" name="name" id="edit-name" size="15" 
value="" class="form-text required" />
+</div>
+<div class="form-item form-type-password form-item-pass">
+  <label for="edit-pass">Password <span class="form-required" title="This 
field is required.">*</span></label>
+ <input type="password" name="pass" id="edit-pass"  maxlength="60"  size="15"  
class="form-text required" />
+</div>
+<input type="submit" name="op" id="edit-submit" value="Log in"  
class="form-submit" />
+<div class="item-list"><ul><li class="first"><a href="/user/register" 
title="Create a new user account.">Create new account</a></li>
+<li class="last"><a href="/user/password" title="Request new password via 
e-mail.">Request new password</a></li>
+</ul></div><input type="hidden" name="form_build_id" 
id="form-7764a35c4cc1da9c993a9754ff1f51c1" 
value="form-7764a35c4cc1da9c993a9754ff1f51c1"  />
+<input type="hidden" name="form_id" id="edit-user-login-block" 
value="user_login_block"  />
+
+</div></form>
+</div>
+</div>
+</div>
+        </div>
+
+      <div id="center"><div id="squeeze"><div class="right-corner"><div 
class="left-corner">
+                              <a id="main-content"></a>
+                                          <h1>Welcome to 
drupalrdf.openspring.net</h1>
+                                                                               
 <div class="clearfix">
+            <div class="region region-content">
+  <div id="block-system-main" class="block block-system clearfix">
+
+
+  <div class="content"><div id="first-time"><p>No front page content has been 
created yet.</p></div></div>
+</div>
+</div>
+          </div>
+                    <div class="region region-footer">
+  <div id="block-system-powered-by" class="block block-system clearfix">
+
+
+  <div class="content"><span>Powered by <a 
href="http://drupal.org";>Drupal</a></span></div>
+</div>
+</div>
+      </div></div></div></div> <!-- /.left-corner, /.right-corner, /#squeeze, 
/#center -->
+
+
+    </div> <!-- /#container -->
+  </div> <!-- /#wrapper -->
+  </body>
+</html>
\ No newline at end of file

Added: 
incubator/any23/trunk/test-resources/src/test/resources/html/empty-span-broken.html
URL: 
http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/html/empty-span-broken.html?rev=1371537&view=auto
==============================================================================
--- 
incubator/any23/trunk/test-resources/src/test/resources/html/empty-span-broken.html
 (added)
+++ 
incubator/any23/trunk/test-resources/src/test/resources/html/empty-span-broken.html
 Fri Aug 10 00:40:13 2012
@@ -0,0 +1,20 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<span about="#me" xmlns:foaf="http://xmlns.com/foaf/0.1/";>
+  <span rel="foaf:homepage" resource="http://richard.cyganiak.de/"; />
+  <span property="foaf:nick" content="cygri" />
+</span>
\ No newline at end of file

Added: 
incubator/any23/trunk/test-resources/src/test/resources/html/empty-span-works.html
URL: 
http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/html/empty-span-works.html?rev=1371537&view=auto
==============================================================================
--- 
incubator/any23/trunk/test-resources/src/test/resources/html/empty-span-works.html
 (added)
+++ 
incubator/any23/trunk/test-resources/src/test/resources/html/empty-span-works.html
 Fri Aug 10 00:40:13 2012
@@ -0,0 +1,20 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<span about="#me" xmlns:foaf="http://xmlns.com/foaf/0.1/";>
+    <span rel="foaf:homepage" resource="http://richard.cyganiak.de/";></span>
+    <span property="foaf:nick" content="cygri"></span>
+</span>

Added: 
incubator/any23/trunk/test-resources/src/test/resources/html/encoding-test.html
URL: 
http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/html/encoding-test.html?rev=1371537&view=auto
==============================================================================
--- 
incubator/any23/trunk/test-resources/src/test/resources/html/encoding-test.html 
(added)
+++ 
incubator/any23/trunk/test-resources/src/test/resources/html/encoding-test.html 
Fri Aug 10 00:40:13 2012
@@ -0,0 +1,314 @@
+<!doctype html>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html lang="en" dir="ltr">
+<head>
+<title>Knud Möller - semanticweb.org</title>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+<meta name="generator" content="MediaWiki 1.16alpha" />
+<link rel="alternate" type="application/x-wiki" title="Edit" 
href="/index.php?title=Knud_M%C3%B6ller&amp;action=edit" />
+<link rel="edit" title="Edit" 
href="/index.php?title=Knud_M%C3%B6ller&amp;action=edit" />
+<link rel="shortcut icon" href="/favicon.ico" />
+<link rel="search" type="application/opensearchdescription+xml" 
href="/opensearch_desc.php" title="semanticweb.org (en)" />
+<link rel="alternate" type="application/rss+xml" title="semanticweb.org RSS 
Feed" href="/index.php?title=Special:RecentChanges&amp;feed=rss" />
+<link rel="alternate" type="application/atom+xml" title="semanticweb.org Atom 
Feed" href="/index.php?title=Special:RecentChanges&amp;feed=atom" />
+<link rel="stylesheet" href="/skins/vector/main-ltr.css?243" media="screen" />
+<link rel="stylesheet" href="/skins/common/shared.css?243" media="screen" />
+<link rel="stylesheet" href="/skins/common/commonPrint.css?243" media="print" 
/>
+<link rel="stylesheet" 
href="/index.php?title=MediaWiki:Common.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=18000&amp;action=raw&amp;maxage=18000"
 />
+<link rel="stylesheet" 
href="/index.php?title=MediaWiki:Print.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=18000&amp;action=raw&amp;maxage=18000"
 media="print" />
+<link rel="stylesheet" 
href="/index.php?title=MediaWiki:Vector.css&amp;usemsgcache=yes&amp;ctype=text%2Fcss&amp;smaxage=18000&amp;action=raw&amp;maxage=18000"
 />
+<link rel="stylesheet" 
href="/index.php?title=-&amp;action=raw&amp;maxage=18000&amp;gen=css" />
+<script>
+var skin="vector",
+stylepath="/skins",
+wgArticlePath="/wiki/$1",
+wgScriptPath="",
+wgScriptExtension=".php",
+wgScript="/index.php",
+wgVariantArticlePath=false,
+wgActionPaths={},
+wgServer="http://semanticweb.org";,
+wgCanonicalNamespace="",
+wgCanonicalSpecialPageName=false,
+wgNamespaceNumber=0,
+wgPageName="Knud_Möller",
+wgTitle="Knud Möller",
+wgAction="view",
+wgArticleId=3008,
+wgIsArticle=true,
+wgUserName=null,
+wgUserGroups=null,
+wgUserLanguage="en",
+wgContentLanguage="en",
+wgBreakFrames=false,
+wgCurRevisionId=35464,
+wgVersion="1.16alpha",
+wgEnableAPI=true,
+wgEnableWriteAPI=true,
+wgSeparatorTransformTable=["", ""],
+wgDigitTransformTable=["", ""],
+wgMainPageTitle="Main Page",
+wgFormattedNamespaces={"-2": "Media", "-1": "Special", "0": "", "1": "Talk", 
"2": "User", "3": "User talk", "4": "semanticweb.org", "5": "semanticweb.org 
talk", "6": "File", "7": "File talk", "8": "MediaWiki", "9": "MediaWiki talk", 
"10": "Template", "11": "Template talk", "12": "Help", "13": "Help talk", "14": 
"Category", "15": "Category talk", "102": "Property", "103": "Property talk", 
"104": "Type", "105": "Type talk", "106": "Form", "107": "Form talk", "108": 
"Concept", "109": "Concept talk"},
+wgNamespaceIds={"media": -2, "special": -1, "": 0, "talk": 1, "user": 2, 
"user_talk": 3, "semanticweb.org": 4, "semanticweb.org_talk": 5, "file": 6, 
"file_talk": 7, "mediawiki": 8, "mediawiki_talk": 9, "template": 10, 
"template_talk": 11, "help": 12, "help_talk": 13, "category": 14, 
"category_talk": 15, "property": 102, "property_talk": 103, "type": 104, 
"type_talk": 105, "form": 106, "form_talk": 107, "concept": 108, 
"concept_talk": 109, "image": 6, "image_talk": 7},
+wgSiteName="semanticweb.org",
+wgRestrictionEdit=[],
+wgRestrictionMove=[];
+</script>
+<script src="/skins/common/wikibits.js?urid=243_1255941349"></script>
+<script src="/skins/common/ajax.js?urid=243_1219842883"></script>
+<!--[if lt IE 7]><style 
type="text/css">body{behavior:url("/skins/vector/csshover.htc")}</style><![endif]-->
+<!--[if lt IE 7]><script type="text/javascript" 
src="/skins/common/IEFixes.js?243"></script><meta http-equiv="imagetoolbar" 
content="no" /><![endif]-->
+<script 
src="/index.php?title=-&amp;action=raw&amp;gen=js&amp;useskin=vector&amp;urid=243"></script>
+
+               <link rel="alternate" type="application/rdf+xml" title="Knud 
Möller" 
href="/index.php?title=Special:ExportRDF/Knud_M%C3%B6ller&amp;xmlmime=rdf" />
+</head>
+       <body  class="mediawiki ltr ns-0 ns-subject page-Knud_Möller 
skin-vector" dir="ltr">
+               <div id="page-base" class="noprint"></div>
+               <div id="head-base" class="noprint"></div>
+               <!-- content -->
+               <div id="content">
+                       <a id="top"></a>
+                       <div id="mw-js-message" style="display:none;"></div>
+                                               <!-- firstHeading -->
+                       <h1 id="firstHeading" class="firstHeading">Knud 
Möller</h1>
+                       <!-- /firstHeading -->
+                       <!-- bodyContent -->
+                       <div id="bodyContent">
+                               <!-- tagline -->
+                               <h3 id="siteSub">From semanticweb.org</h3>
+                               <!-- /tagline -->
+                               <!-- subtitle -->
+                               <div id="contentSub"></div>
+                               <!-- /subtitle -->
+                                                                               
                                                <!-- jumpto -->
+                               <div id="jump-to-nav">
+                                       Jump to:<a href="#head">navigation</a>,
+                                       <a href="#p-search">search</a>
+                               </div>
+                               <!-- /jumpto -->
+                                                               <!-- bodytext 
-->
+                               <table cellspacing="0" cellpadding="5" 
style="position:relative; margin: 0 0 0.5em 1em; border-collapse: collapse; 
border: 1px solid #aaa; background: #fff; float: right; clear: right; width: 
20em">
+<tr>
+<th colspan="2" style="background: #86ba0c; color: white"><span 
style="font-size: 80%; float: right;"><a 
href="/wiki/Special:Ask/-5B-5BKnud-20M%C3%B6ller-5D-5D/-3FName/-3FAffiliation%3Dorganization/-3FEmail/-3FFoaf:phone%3Dworkphone/-3FHomepage/format%3Dvcard/searchlabel%3DvCard/limit%3D20"
 
title="Special:Ask/-5B-5BKnud-20Möller-5D-5D/-3FName/-3FAffiliation=organization/-3FEmail/-3FFoaf:phone=workphone/-3FHomepage/format=vcard/searchlabel=vCard/limit=20">vCard</a></span>
 Knud Moeller
+</th></tr>
+
+
+
+
+
+
+<tr>
+<td style="vertical-align: top;">Homepage:
+</td><td> <a href="http://www.deri.ie"; class="external text" 
rel="nofollow">www.deri.ie</a>
+</td></tr>
+
+
+<tr>
+<td colspan="2" style="text-align: Left; background: #e4f8b6"><div 
style="font-size: 80%">PC member of: <a href="/wiki/SemanticDesktopWS2006" 
title="SemanticDesktopWS2006">SemanticDesktopWS2006</a></div>
+</td></tr>
+<tr>
+<td colspan="2" style="text-align: Left; background: #e4f8b6"><div 
style="font-size: 80%">OC member of: <a href="/wiki/ISWC2008" 
title="ISWC2008">ISWC2008</a>, <a href="/wiki/ISWC2007%2BASWC2007" 
title="ISWC2007+ASWC2007">ISWC2007+ASWC2007</a>, <a href="/wiki/SAAW2006" 
title="SAAW2006">SAAW2006</a></div>
+</td></tr>
+<tr>
+<td style="vertical-align: top;">See also:
+</td><td> <a href="http://sw.deri.org/%7Eknud/knudfoaf.rdf"; class="external 
text" rel="nofollow">FOAF</a>
+</td></tr></table>
+<p>This person attended <a href="/wiki/ESWC2006" title="ESWC2006">ESWC2006</a>.
+</p>
+<!-- 
+NewPP limit report
+Preprocessor node count: 108/1000000
+Post-expand include size: 2183/2097152 bytes
+Template argument size: 429/2097152 bytes
+Expensive parser function count: 0/100
+-->
+
+<!-- Saved in parser cache with key 
ls3_wiki_semanticweb:pcache:idhash:3008-0!1!0!!en!2 and timestamp 
20100209103614 -->
+<div class="printfooter">
+Retrieved from "<a 
href="http://semanticweb.org/wiki/Knud_M%C3%B6ller";>http://semanticweb.org/wiki/Knud_M%C3%B6ller</a>"</div>
+                               <!-- /bodytext -->
+                                                               <!-- catlinks 
-->
+                               <div id='catlinks' class='catlinks'><div 
id="mw-normal-catlinks"><a href="/wiki/Special:Categories" 
title="Special:Categories">Category</a>: <span dir='ltr'><a 
href="/wiki/Category:Person" 
title="Category:Person">Person</a></span></div></div>                           
   <!-- /catlinks -->
+                                                                               
                <div class="visualClear"></div>
+                       </div>
+                       <!-- /bodyContent -->
+               </div>
+               <!-- /content -->
+               <!-- header -->
+               <div id="head" class="noprint">
+                       
+<!-- 0 -->
+<div id="p-personal" class="">
+       <h5>Personal tools</h5>
+       <ul lang="en" xml:lang="en">
+                                       <li  id="pt-login"><a 
href="/index.php?title=Special:UserLogin&amp;returnto=Knud_M%C3%B6ller" 
title="You are encouraged to log in; however, it is not mandatory [o]" 
accesskey="o">Log in / create account</a></li>
+                       </ul>
+</div>
+
+<!-- /0 -->
+                       <div id="left-navigation">
+                               
+<!-- 0 -->
+<div id="p-namespaces" class="vectorTabs">
+       <h5>Namespaces</h5>
+       <ul lang="en" xml:lang="en">
+                                       <li  id="ca-nstab-main" 
class="selected"><a href="/wiki/Knud_M%C3%B6ller"  title="View the content page 
[c]" accesskey="c"><span>Page</span></a></li>
+                                       <li  id="ca-talk" class="new"><a 
href="/index.php?title=Talk:Knud_M%C3%B6ller&amp;action=edit&amp;redlink=1"  
title="Discussion about the content page [t]" 
accesskey="t"><span>Discussion</span></a></li>
+                       </ul>
+</div>
+
+<!-- /0 -->
+
+<!-- 1 -->
+<div id="p-variants" class="vectorMenu emptyPortlet">
+       <h5><span>Variants</span><a href="#"></a></h5>
+       <div class="menu">
+               <ul lang="en" xml:lang="en">
+                                       </ul>
+       </div>
+</div>
+
+<!-- /1 -->
+                       </div>
+                       <div id="right-navigation">
+                               
+<!-- 0 -->
+<div id="p-views" class="vectorTabs">
+       <h5>Views</h5>
+       <ul lang="en" xml:lang="en">
+                                       <li id="ca-view" class="selected"><a 
href="/wiki/Knud_M%C3%B6ller" ><span>Read</span></a></li>
+                                       <li id="ca-edit"><a 
href="/index.php?title=Knud_M%C3%B6ller&amp;action=edit"  title="You can edit 
this page.&#10;Please use the preview button before saving [e]" 
accesskey="e"><span>Edit</span></a></li>
+                                       <li id="ca-history" class="collapsible 
"><a href="/index.php?title=Knud_M%C3%B6ller&amp;action=history"  title="Past 
revisions of this page [h]" accesskey="h"><span>View history</span></a></li>
+                       </ul>
+</div>
+
+<!-- /0 -->
+
+<!-- 1 -->
+<div id="p-cactions" class="vectorMenu emptyPortlet">
+       <h5><span>Actions</span><a href="#"></a></h5>
+       <div class="menu">
+               <ul lang="en" xml:lang="en">
+                                       </ul>
+       </div>
+</div>
+
+<!-- /1 -->
+
+<!-- 2 -->
+<div id="p-search">
+       <h5 lang="en" xml:lang="en"><label for="searchInput">search the 
wiki</label></h5>
+       <form action="/index.php" id="searchform">
+               <input type='hidden' name="title" value="Special:Search"/>
+                               <input id="searchInput" name="search" 
type="text"  title="Search semanticweb.org [f]" accesskey="f"  value="" />
+               <input type='submit' name="go" class="searchButton" 
id="searchGoButton" value="Go" title="Go to a page with this exact name if 
exists" />
+               <input type="submit" name="fulltext" class="searchButton" 
id="mw-searchButton" value="Search" title="Search the pages for this text" />
+                       </form>
+</div>
+
+<!-- /2 -->
+                       </div>
+               </div>
+               <!-- /header -->
+               <!-- panel -->
+                       <div id="panel" class="noprint">
+                               <!-- logo -->
+                                       <div id="p-logo"><a 
style="background-image: url(/images/Semantic-Web-Logo-by-W3C.png);" 
href="/wiki/Main_Page"  title="Visit the main page"></a></div>
+                               <!-- /logo -->
+                               
+<!-- navigation -->
+<div class="portal" id='p-navigation'>
+       <h5 lang="en" xml:lang="en">Navigation</h5>
+       <div class="body">
+                               <ul>
+                                       <li id="n-mainpage"><a 
href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z">Main 
Page</a></li>
+                                       <li id="n-Tools"><a 
href="/wiki/Tools">Tools</a></li>
+                                       <li id="n-Ontologies"><a 
href="/wiki/Ontology">Ontologies</a></li>
+                                       <li id="n-People"><a 
href="/wiki/People">People</a></li>
+                                       <li id="n-Events"><a 
href="/wiki/Events">Events</a></li>
+                               </ul>
+                       </div>
+</div>
+
+<!-- /navigation -->
+
+<!-- services -->
+<div class="portal" id='p-services'>
+       <h5 lang="en" xml:lang="en">services</h5>
+       <div class="body">
+                               <ul>
+                                       <li id="n-Editing-help"><a 
href="/wiki/Help:Editing">Editing help</a></li>
+                                       <li id="n-browse"><a 
href="/wiki/Special:Browse">Browse wiki</a></li>
+                                       <li id="n-OWL.2FRDF-feeds"><a 
href="/wiki/Special:ExportRDF">OWL/RDF feeds</a></li>
+                                       <li id="n-recentchanges"><a 
href="/wiki/Special:RecentChanges" title="The list of recent changes in the 
wiki [r]" accesskey="r">Recent changes</a></li>
+                               </ul>
+                       </div>
+</div>
+
+<!-- /services -->
+
+<!-- SEARCH -->
+
+<!-- /SEARCH -->
+
+<!-- TOOLBOX -->
+<div class="portal" id="p-tb">
+       <h5 lang="en" xml:lang="en">Toolbox</h5>
+       <div class="body">
+               <ul>
+                                       <li id="t-whatlinkshere"><a 
href="/wiki/Special:WhatLinksHere/Knud_M%C3%B6ller" title="List of all wiki 
pages that link here [j]" accesskey="j">What links here</a></li>
+                                               <li 
id="t-recentchangeslinked"><a 
href="/wiki/Special:RecentChangesLinked/Knud_M%C3%B6ller" title="Recent changes 
in pages linked from this page [k]" accesskey="k">Related changes</a></li>
+                                                                               
                                                                                
                                                                                
                                                        <li id="t-upload"><a 
href="/wiki/Special:Upload" title="Upload files [u]" accesskey="u">Upload 
file</a></li>
+                                                                               
        <li id="t-specialpages"><a href="/wiki/Special:SpecialPages" 
title="List of all special pages [q]" accesskey="q">Special pages</a></li>
+                                                                       <li 
id="t-print"><a href="/index.php?title=Knud_M%C3%B6ller&amp;printable=yes" 
rel="alternate" title="Printable version of this page [p]" 
accesskey="p">Printable version</a></li>
+                                               <li id="t-permalink"><a 
href="/index.php?title=Knud_M%C3%B6ller&amp;oldid=35464" title="Permanent link 
to this revision of the page">Permanent link</a></li>
+                                               <li id="t-smwbrowselink"><a 
href="/wiki/Special:Browse/Knud_M%C3%B6ller" title="Special:Browse/Knud 
Möller">Browse properties</a></li>         </ul>
+       </div>
+</div>
+
+<!-- /TOOLBOX -->
+
+<!-- LANGUAGES -->
+
+<!-- /LANGUAGES -->
+                       </div>
+               <!-- /panel -->
+               <!-- footer -->
+               <div id="footer">
+                                                                               
        <ul id="footer-info">
+                                                                               
                                                        <li 
id="footer-info-lastmod"> This page was last modified on 11 November 2007, at 
05:27.</li>
+                                                                               
                                                                                
                        <li id="footer-info-viewcount">This page has been 
accessed 3,797 times.</li>
+                                                                               
                                        </ul>
+                                                                               
                                        <ul id="footer-places">
+                                                                               
                                                        <li 
id="footer-places-privacy"><a href="/wiki/semanticweb.org:Privacy_policy" 
title="semanticweb.org:Privacy policy">Privacy policy</a></li>
+                                                                               
                                                                                
                        <li id="footer-places-about"><a 
href="/wiki/semanticweb.org:About" title="semanticweb.org:About">About 
semanticweb.org</a></li>
+                                                                               
                                                                                
                        <li id="footer-places-disclaimer"><a 
href="/wiki/semanticweb.org:General_disclaimer" title="semanticweb.org:General 
disclaimer">Disclaimers</a></li>
+                                                                               
                                        </ul>
+                                                                               
<ul id="footer-icons" class="noprint">
+                                                               <li 
id="footer-icon-poweredby"><a href="http://semantic-mediawiki.org/";><img 
src="/extensions/SemanticMediaWiki/skins/images/smw_button.png" alt="Powered by 
Semantic MediaWiki"/></a></li>
+                                                                               
        </ul>
+                       <div style="clear:both"></div>
+               </div>
+               <!-- /footer -->
+               <!-- fixalpha -->
+               <script type="text/javascript"> if ( window.isMSIE55 ) 
fixalpha(); </script>
+               <!-- /fixalpha -->
+               
+<script>if (window.runOnloadHook) runOnloadHook();</script>
+               <!-- Served in 0.909 secs. -->                  </body>
+</html>

Added: 
incubator/any23/trunk/test-resources/src/test/resources/html/html-head-link-extractor.html
URL: 
http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/html/html-head-link-extractor.html?rev=1371537&view=auto
==============================================================================
--- 
incubator/any23/trunk/test-resources/src/test/resources/html/html-head-link-extractor.html
 (added)
+++ 
incubator/any23/trunk/test-resources/src/test/resources/html/html-head-link-extractor.html
 Fri Aug 10 00:40:13 2012
@@ -0,0 +1,45 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" lang="en">
+<head>
+    <meta http-equiv="content-type" content="text/html;charset=UTF-8"/>
+    <title>myExperiment &#45; Workflows &#45; Pathways and Gene annotations 
for QTL region - Mouse (Paul Fisher)
+        [Taverna 2 Workflow]</title>
+    <link rel="alternate" href="http://www.myexperiment.org/workflows/16.rdf"; 
type="application/rdf+xml"
+          title="RDF+XML"/>
+    <link rel="alternate" href="http://www.myexperiment.org/workflows/16.xml"; 
type="application/xml" title="REST XML"/>
+    <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon"/>
+    <link href="/stylesheets/reset-fonts-grids.css?1202402252" media="screen" 
rel="Stylesheet" type="text/css"/>
+    <link href="/stylesheets/base-min.css?1202402252" media="screen" 
rel="Stylesheet" type="text/css"/>
+    <link href="/stylesheets/acts_as_taggable_stylesheet.css?1214908649" 
media="screen" rel="Stylesheet"
+          type="text/css"/>
+    <link href="/stylesheets/star_rating.css?1231340673" media="screen" 
rel="Stylesheet" type="text/css"/>
+    <link href="/stylesheets/styles.css?1303293842" media="screen" 
rel="Stylesheet" type="text/css"/>
+    <link href="/stylesheets/gadgets.css?1273675163" media="screen" 
rel="Stylesheet" type="text/css"/>
+    <script src="/javascripts/prototype.js?1202402251" 
type="text/javascript"></script>
+    <script src="/javascripts/effects.js?1202402251" 
type="text/javascript"></script>
+    <script src="/javascripts/dragdrop.js?1202402251" 
type="text/javascript"></script>
+    <script src="/javascripts/controls.js?1202402251" 
type="text/javascript"></script>
+    <script src="/javascripts/qrcode.js?1202402251" 
type="text/javascript"></script>
+    <script src="/javascripts/application.js?1223293458" 
type="text/javascript"></script>
+    <script src="/javascripts/boxover.js?1202402251" 
type="text/javascript"></script>
+</head>
+<body>
+</body>
+</html>

Added: 
incubator/any23/trunk/test-resources/src/test/resources/html/html-head-meta-extractor.html
URL: 
http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/html/html-head-meta-extractor.html?rev=1371537&view=auto
==============================================================================
--- 
incubator/any23/trunk/test-resources/src/test/resources/html/html-head-meta-extractor.html
 (added)
+++ 
incubator/any23/trunk/test-resources/src/test/resources/html/html-head-meta-extractor.html
 Fri Aug 10 00:40:13 2012
@@ -0,0 +1,38 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" lang="en">
+<head>
+    <meta http-equiv="content-type" content="text/html;charset=UTF-8"/>
+    <title>test to check meta extraction</title>
+    <meta http-equiv="Content-Type" content="application/xhtml+xml; 
charset=utf-8"/>
+    <meta http-equiv="Content-Style-Type" content="text/css"/>
+    <link rel="DC" href="http://purl.org/dc/elements/1.1/"/>
+    <link rel="DCTERMS" href="http://purl.org/dc/terms/"/>
+    <meta name="content-language" content="en"/>
+    <meta name="robots" content="index, follow"/>
+    <meta name="DC.title" content="XHTML+RDFa example"/>
+    <meta name="DC.subject" content="XHTML+RDFa, semantic web"/>
+    <meta name="DC.description"
+          content="Example for Extensible Hypertext Markup Language + Resource 
Description Framework – in – attributes."/>
+    <meta name="DC.format" content="application/xhtml+xml"/>
+    <meta name="DC.language" content="en"/>
+</head>
+<body>
+</body>
+</html>

Added: 
incubator/any23/trunk/test-resources/src/test/resources/html/html-turtle.html
URL: 
http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/html/html-turtle.html?rev=1371537&view=auto
==============================================================================
--- 
incubator/any23/trunk/test-resources/src/test/resources/html/html-turtle.html 
(added)
+++ 
incubator/any23/trunk/test-resources/src/test/resources/html/html-turtle.html 
Fri Aug 10 00:40:13 2012
@@ -0,0 +1,123 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN"
+  "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd";>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" version="XHTML+RDFa 
1.0" dir="ltr"
+  xmlns:xsd="http://www.w3.org/2001/XMLSchema#";>
+
+<head profile="http://www.w3.org/1999/xhtml/vocab";>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<link rel="shortcut icon" 
href="http://drupalrdf.openspring.net/misc/favicon.ico"; 
type="image/vnd.microsoft.icon" />
+<meta name="Generator" content="Drupal 7 (http://drupal.org)" />
+  <title>Welcome to mydomain.net</title>
+  <style type="text/css" media="all">
+      @import 
url("http://drupalrdf.openspring.net/modules/system/system.css?f0000000000000000000";);
+  <style type="text/css" media="print">@import 
url("http://drupalrdf.openspring.net/themes/garland/print.css?f0000000000000000000";);</style>
+
+<!--[if lt IE 7]>
+<link type="text/css" rel="stylesheet" 
href="http://drupalrdf.openspring.net/themes/garland/fix-ie.css?f0000000000000000000";
 media="all" />
+<![endif]-->
+</head>
+<body class="html front not-logged-in one-sidebar sidebar-first page-node 
fluid-width" >
+
+<script type="text/turtle" id="graph1"><![CDATA[
+   @prefix foo:  <http://www.example.com/xyz#> .
+   @prefix gr:   <http://purl.org/goodrelations/v1#> .
+   @prefix xsd:  <http://www.w3.org/2001/XMLSchema#> .
+   @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+
+   foo:myCompany
+     a gr:BusinessEntity ;
+     rdfs:seeAlso <http://www.example.com/xyz> ;
+     gr:hasLegalName "Hepp Industries Ltd."^^xsd:string .
+]]>
+</script>
+
+<div id="skip-link">
+    <a href="#main-content">Skip to main content</a>
+</div>
+<div class="region region-page-top">
+</div>
+
+<script type="text/n3" id="graph2"><![CDATA[
+@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix ex: <http://example.org/stuff/1.0/> .
+
+<http://www.w3.org/TR/rdf-syntax-grammar>
+  dc:title "RDF/XML Syntax Specification (Revised)" ;
+  ex:editor [
+    ex:fullname "Dave Beckett";
+    ex:homePage <http://purl.org/net/dajobe/>
+  ] .
+]]>
+</script>
+
+<div id="wrapper">
+    <div id="container" class="clearfix">
+
+        <div id="header">
+            <div id="logo-floater">
+                <div id="branding"><strong><a href="/" 
title="drupalrdf.openspring.net ">
+                    <img 
src="http://drupalrdf.openspring.net/themes/garland/logo.png"; 
alt="drupalrdf.openspring.net "
+                         id="logo"/>
+                    <span>drupalrdf.openspring.net</span> </a></strong></div>
+            </div>
+
+        </div>
+        <div id="sidebar-first" class="sidebar">
+            <div class="region region-sidebar-first">
+                <div id="block-user-login" class="block block-user clearfix">
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+
+<script type="text/plain" id="graph3"><![CDATA[
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+
+<http://en.wikipedia.org/wiki/Tony_Benn>
+  dc:title "Tony Benn";
+  dc:publisher "Wikipedia" .
+  <#graph1> rdfs:comment "Not sure this is true. It seems like just example 
data." .  
+]]>
+</script>
+
+<div id="wrapper">
+    <div id="container" class="clearfix">
+
+        <div id="header">
+            <div id="logo-floater">
+                <div id="branding"><strong><a href="/" 
title="drupalrdf.openspring.net ">
+                    <img 
src="http://drupalrdf.openspring.net/themes/garland/logo.png"; 
alt="drupalrdf.openspring.net "
+                         id="logo"/>
+                    <span>drupalrdf.openspring.net</span> </a></strong></div>
+            </div>
+
+        </div>
+        <div id="sidebar-first" class="sidebar">
+            <div class="region region-sidebar-first">
+                <div id="block-user-login" class="block block-user clearfix">
+                </div>
+            </div>
+        </div>
+    </div>
+</div>
+</body>
+</html>
\ No newline at end of file

Added: 
incubator/any23/trunk/test-resources/src/test/resources/html/html-without-uf.html
URL: 
http://svn.apache.org/viewvc/incubator/any23/trunk/test-resources/src/test/resources/html/html-without-uf.html?rev=1371537&view=auto
==============================================================================
--- 
incubator/any23/trunk/test-resources/src/test/resources/html/html-without-uf.html
 (added)
+++ 
incubator/any23/trunk/test-resources/src/test/resources/html/html-without-uf.html
 Fri Aug 10 00:40:13 2012
@@ -0,0 +1,25 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<html>
+  <head>
+    <title>Hello World!</title>
+  </head>
+  <body>
+    <h1>Hello World!</h1>
+    <p>It's Tuesday. The sun is shining! I want more coffee.</p>
+  </body>
+</html>
\ No newline at end of file


Reply via email to