Sorin Marian Nasoi has proposed merging lp:~zorba-coders/zorba/project_tag_html 
into lp:zorba.

Commit message:
- updated project TAG

Requested reviews:
  Sorin Marian Nasoi (sorin.marian.nasoi)

For more details, see:
https://code.launchpad.net/~zorba-coders/zorba/project_tag_html/+merge/169642
-- 
https://code.launchpad.net/~zorba-coders/zorba/project_tag_html/+merge/169642
Your team Zorba Coders is subscribed to branch lp:zorba.
=== added file 'CMakeLists.txt'
--- CMakeLists.txt	1970-01-01 00:00:00 +0000
+++ CMakeLists.txt	2013-06-15 17:12:29 +0000
@@ -0,0 +1,38 @@
+# Copyright 2006-2010 The FLWOR Foundation.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6)
+
+PROJECT (zorba_html_module)
+ENABLE_TESTING ()
+INCLUDE (CTest)
+
+IF (WIN32)
+  # On Windows we use proxy modules that try to guess first the location
+  # of the required third party libraries. This will search in order in:
+  # 1. the path pointed by ZORBA_THIRD_PARTY_REQUIREMENTS
+  # 2. the Program Files directory available on the users computer
+  # 3. the PATH environment variable
+  # The logic is implemented by the macros in the ProxyFindModule.cmake module.
+  LIST (APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake_modules/Windows")
+ENDIF (WIN32)
+LIST (APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake_modules")
+
+FIND_PACKAGE (Zorba REQUIRED HINTS "${ZORBA_BUILD_DIR}")
+MESSAGE(STATUS "Zorba_USE_FILE ${Zorba_USE_FILE}")
+INCLUDE ("${Zorba_USE_FILE}")
+
+ADD_SUBDIRECTORY("src")
+
+DONE_DECLARING_ZORBA_URIS()

=== renamed file 'CMakeLists.txt' => 'CMakeLists.txt.moved'
=== added directory 'cmake_modules'
=== renamed directory 'cmake_modules' => 'cmake_modules.moved'
=== added file 'cmake_modules/FindLibTidy.cmake'
--- cmake_modules/FindLibTidy.cmake	1970-01-01 00:00:00 +0000
+++ cmake_modules/FindLibTidy.cmake	2013-06-15 17:12:29 +0000
@@ -0,0 +1,60 @@
+# Copyright 2006-2008 The FLWOR Foundation.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# - Try to find the HTML Tidy lib
+#
+#  LIBTIDY_FOUND        - true if LIBTIDY was found
+#  LIBTIDY_INCLUDE_DIRS - Directory to include to get LIBTIDY headers
+#                         Note: always include LIBTIDY headers as e.g.,
+#                         tidy/tidy.h
+#  LIBTIDY_LIBRARIES    - Libraries to link against for the LIBTIDY
+#
+
+
+IF (LIBTIDY_INCLUDE_DIR)
+  SET(LibTidy_FIND_QUIETLY TRUE)
+ENDIF (LIBTIDY_INCLUDE_DIR)
+
+# Look for the header file.
+FIND_PATH(LIBTIDY_INCLUDE_DIR tidy.h PATH_SUFFIXES tidy)
+MARK_AS_ADVANCED(LIBTIDY_INCLUDE_DIR)
+
+# Look for the library.
+# FIND_LIBRARY(LIBTIDY_LIBRARY NAMES tidy PATHS ${LIBTIDY_LIBRARIES})
+FIND_LIBRARY(LIBTIDY_LIBRARY NAMES tidy)
+MARK_AS_ADVANCED(LIBTIDY_LIBRARY)
+
+# INCLUDE(FindPackageHandleStandardArgs)
+# only available in cmake > 2.6
+# FIND_PACKAGE_HANDLE_STANDARD_ARGS(LIBTIDY DEFAULT_MSG LIBTIDY_INCLUDE_DIR LIBTIDY_LIBRARY)
+#IF (LIBTIDY_FOUND)
+#  SET(LIBTIDY_LIBRARIES ${LIBTIDY_LIBRARY})
+#  SET(LIBTIDY_INCLUDE_DIRS ${LIBTIDY_INCLUDE_DIR})
+#ELSE (LIBTIDY_FOUND)
+#  SET(LIBTIDY_LIBRARIES)
+#  SET(LIBTIDY_INCLUDE_DIRS)
+#ENDIF (LIBTIDY_FOUND)
+IF (LIBTIDY_INCLUDE_DIR AND LIBTIDY_LIBRARY)
+  SET(LIBTIDY_FOUND 1)
+  SET(LIBTIDY_LIBRARIES ${LIBTIDY_LIBRARY})
+  SET(LIBTIDY_INCLUDE_DIRS ${LIBTIDY_INCLUDE_DIR})
+  IF(NOT LibTidy_FIND_QUIETLY)
+    MESSAGE(STATUS "Found libtidy library      : " ${LIBTIDY_LIBRARY})
+    MESSAGE(STATUS "Found libtidy include path : " ${LIBTIDY_INCLUDE_DIR})
+  ENDIF(NOT LibTidy_FIND_QUIETLY)
+ELSE (LIBTIDY_INCLUDE_DIR AND LIBTIDY_LIBRARY)
+  SET(LIBTIDY_FOUND 0)
+  SET(LIBTIDY_LIBRARIES)
+  SET(LIBTIDY_INCLUDE_DIRS)
+ENDIF (LIBTIDY_INCLUDE_DIR AND LIBTIDY_LIBRARY)

=== added directory 'cmake_modules/Windows'
=== added file 'cmake_modules/Windows/FindJansson.cmake'
--- cmake_modules/Windows/FindJansson.cmake	1970-01-01 00:00:00 +0000
+++ cmake_modules/Windows/FindJansson.cmake	2013-06-15 17:12:29 +0000
@@ -0,0 +1,30 @@
+# Copyright 2010 The FLWOR Foundation.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# - Try to find the Jansson lib on Windows
+#
+# This is a proxy module that calls the FindJansson.cmake module. Before
+# doing that, we try to guess where Jansson might be on the user's machine.
+# The user should provide ZORBA_THIRD_PARTY_REQUIREMENTS which is a path where
+# the Jansson directory can be found. The Jansson directory must have "jansson"
+# (case insensitive) in its name.
+#
+# This module helps the Windows user to avoid providing the following two
+# variables when building Zorba:
+# -D Jansson_INCLUDE="path_to_3rd_party_dir\*jansson*\src"
+# -D Jansson_LIBRARY="path_to_3rd_party_dir\*jansson*\bin\[Release\]jansson.lib"
+#
+# See the FindLibTidy.cmake module shipped with Zorba for more information.
+
+FIND_PACKAGE_WIN32(NAME Jansson FOUND_VAR Jansson_FOUND SEARCH_NAMES jansson)

=== added file 'cmake_modules/Windows/FindLibTidy.cmake'
--- cmake_modules/Windows/FindLibTidy.cmake	1970-01-01 00:00:00 +0000
+++ cmake_modules/Windows/FindLibTidy.cmake	2013-06-15 17:12:29 +0000
@@ -0,0 +1,37 @@
+# Copyright 2010 The FLWOR Foundation.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+# http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# - Try to find the HTML Tidy lib on Windows
+#
+# This is a proxy module that calls the FindLibTidy.cmake module. Before
+# doing that, we try to guess where LibTidy might be on the user's machine.
+# The user should provide ZORBA_THIRD_PARTY_REQUIREMENTS which is a path where
+# the LibTidy directory can be found. The LibTidy directory must have "tidy"
+# (case insensitive) in its name.
+#
+# This module helps the Windows user to avoid providing the following two
+# variables when building Zorba:
+# -D LIBTIDY_INCLUDE_DIR="path_to_3rd_party_dir\*tidy*\include"
+# -D LIBTIDY_LIBRARY="path_to_3rd_party_dir\*tidy*\lib\tidy.lib"
+#
+# See the FindLibTidy.cmake module shipped with Zorba for more information.
+
+FIND_PACKAGE_WIN32(NAME LibTidy FOUND_VAR LIBTIDY_FOUND SEARCH_NAMES tidy)
+
+IF (LIBTIDY_FOUND)
+
+  # find the needed DLL's
+  FIND_PACKAGE_DLLS_WIN32 (${FOUND_LOCATION} tidy.dll)
+
+ENDIF (LIBTIDY_FOUND)

=== added directory 'src'
=== renamed directory 'src' => 'src.moved'
=== added file 'src/CMakeLists.txt'
--- src/CMakeLists.txt	1970-01-01 00:00:00 +0000
+++ src/CMakeLists.txt	2013-06-15 17:12:29 +0000
@@ -0,0 +1,43 @@
+# Copyright 2006-2008 The FLWOR Foundation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# LibTidy
+#
+IF (ZORBA_SUPPRESS_LIBTIDY)
+  MESSAGE (STATUS "ZORBA_SUPPRESS_LIBTIDY is true - not searching for LibTidy.")
+ELSE (ZORBA_SUPPRESS_LIBTIDY)
+
+  MESSAGE (STATUS "Looking for LibTidy")
+  FIND_PACKAGE (LibTidy)
+  
+  IF (LIBTIDY_FOUND)
+    MESSAGE (STATUS "Found LibTidy library -- " ${LIBTIDY_LIBRARIES})
+    SET (HTML_LINK_LIBRARIES ${LIBTIDY_LIBRARIES})
+
+    INCLUDE_DIRECTORIES (${LIBTIDY_INCLUDE_DIR})
+    INCLUDE_DIRECTORIES ("html.xq.src")
+    DECLARE_ZORBA_SCHEMA (FILE "html-options.xsd"
+      URI "http://www.zorba-xquery.com/modules/converters/html-options";)
+    DECLARE_ZORBA_MODULE (URI "http://www.zorba-xquery.com/modules/converters/html"; VERSION 1.0 FILE "html.xq" LINK_LIBRARIES "${LIBTIDY_LIBRARIES}")
+    ADD_TEST_DIRECTORY ("${PROJECT_SOURCE_DIR}/test")
+
+    ADD_TEST(zorba_html_module/link_crawler_test_for_compilation "${ZORBA_EXE}" -f -q "${PROJECT_SOURCE_DIR}/test/Queries/link_crawler2.xq2" --compile-only)
+  ELSE (LIBTIDY_FOUND)
+    MESSAGE (STATUS "LibTidy library not found -- if you want to use HTML Tidy functionality please set LIBTIDY_INCLUDE_DIR and LIBTIDY_LIBRARIES cmake parameters.")
+    SET_PROPERTY (GLOBAL PROPERTY ZORBA_PROJECT_UNAVAILABLE 1)
+  ENDIF (LIBTIDY_FOUND)
+ENDIF (ZORBA_SUPPRESS_LIBTIDY)
+MESSAGE (STATUS "")
+

=== added file 'src/html-options.xsd'
--- src/html-options.xsd	1970-01-01 00:00:00 +0000
+++ src/html-options.xsd	2013-06-15 17:12:29 +0000
@@ -0,0 +1,40 @@
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"; 
+  xmlns:ho="http://www.zorba-xquery.com/modules/converters/html-options"; 
+  targetNamespace="http://www.zorba-xquery.com/modules/converters/html-options"; 
+  elementFormDefault="qualified" attributeFormDefault="unqualified"
+  >
+<!--
+:: Copyright 2006-2008 The FLWOR Foundation.
+::
+:: Licensed under the Apache License, Version 2.0 (the "License");
+:: you may not use this file except in compliance with the License.
+:: You may obtain a copy of the License at
+::
+:: http://www.apache.org/licenses/LICENSE-2.0
+::
+:: Unless required by applicable law or agreed to in writing, software
+:: distributed under the License is distributed on an "AS IS" BASIS,
+:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+:: See the License for the specific language governing permissions and
+:: limitations under the License.
+::
+-->
+
+
+  <xs:element name="options">
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element name="tidyParam" minOccurs="0" maxOccurs="unbounded">
+          <xs:complexType>   
+            <xs:simpleContent>
+              <xs:extension base="xs:string">
+                <xs:attribute name="name" type="xs:string" use="required"/>
+                <xs:attribute name="value" type="xs:string" use="required"/>
+              </xs:extension>
+            </xs:simpleContent>     
+          </xs:complexType>
+        </xs:element>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+</xs:schema>
\ No newline at end of file

=== added file 'src/html.xq'
--- src/html.xq	1970-01-01 00:00:00 +0000
+++ src/html.xq	2013-06-15 17:12:29 +0000
@@ -0,0 +1,130 @@
+xquery version "3.0";
+
+(:
+ : Copyright 2006-2009 The FLWOR Foundation.
+ :
+ : Licensed under the Apache License, Version 2.0 (the "License");
+ : you may not use this file except in compliance with the License.
+ : You may obtain a copy of the License at
+ :
+ : http://www.apache.org/licenses/LICENSE-2.0
+ :
+ : Unless required by applicable law or agreed to in writing, software
+ : distributed under the License is distributed on an "AS IS" BASIS,
+ : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ : See the License for the specific language governing permissions and
+ : limitations under the License.
+:)
+
+(:~
+ : <p>
+ : This module provides functions to <a href="http://www.w3.org/People/Raggett/tidy/"; target="_blank">tidy</a> a HTML document. <br /> 
+ : The functions in this module take an HTML document (a string) as parameter, 
+ : tidy it in order to result in valid XHTML, and return this XHTML document as a document-node.
+ : </p>
+ :
+ :
+ : @author Sorin Nasoi
+ : @library <a href="http://tidy.sourceforge.net/";>Tidy C++ Library</a>
+ : @project Zorba/Data Converters/HTML
+ :
+ :)
+module namespace html = "http://www.zorba-xquery.com/modules/converters/html";;
+
+(:~
+ : Import module for checking if html options element is validated.
+ :)
+import module namespace schema = "http://www.zorba-xquery.com/modules/schema";;
+
+import schema namespace html-options = "http://www.zorba-xquery.com/modules/converters/html-options";;
+
+declare namespace err = "http://ww.w3.org/2005/xqt-errors";;
+
+declare namespace ver = "http://www.zorba-xquery.com/options/versioning";;
+declare option ver:module-version "1.0";
+
+(:~
+ : <p>This function tidies the given HTML string and returns
+ : a valid XHTML document node.</p>
+ :
+ : <p>This functions automatically sets the following tidying parameters:
+ :   <ul>
+ :    <li>output-xml=yes</li>
+ :    <li>doctype=omit</li>
+ :    <li>quote-nbsp=no</li>
+ :    <li>char-encoding=utf8</li>
+ :    <li>newline=LF</li>
+ :    <li>tidy-mark=no</li>
+ :   </ul>
+ : </p>
+ :
+ : @param $html the HTML string to tidy
+ : @return the tidied XML document
+ :
+ : @error html:InternalError if an internal error occurred while tidying
+ :  the string.
+ :
+ : @example test/Queries/tidy_2.xq
+ :)
+declare function html:parse (
+  $html as xs:string
+) as document-node()
+{
+  let $validated-options := 
+    validate {
+          <options xmlns="http://www.zorba-xquery.com/modules/converters/html-options"; >
+              <tidyParam name="output-xml" value="yes" />
+              <tidyParam name="doctype" value="omit" />
+              <tidyParam name="quote-nbsp" value="no" />
+              <tidyParam name="char-encoding" value="utf8" />
+              <tidyParam name="newline" value="LF" />
+              <tidyParam name="tidy-mark" value="no" />
+            </options>
+    }
+  return
+    html:parse-internal($html, $validated-options)
+};
+
+(:~
+ : <p>This function tidies the given HTML string and returns
+ : a valid XHTML document node.</p>
+ :
+ : <p>The second parameter allows to specify options that
+ : configure the tidy process. This parameter is a sequence
+ : of name=value pairs. Allowed parameter names and values
+ : are documented at <a href="http://tidy.sourceforge.net/docs/quickref.html";>
+ : http://tidy.sourceforge.net/docs/quickref.html</a>.</p>
+ :
+ : @param $html the HTML string to tidy
+ : @param $options a set of name and value pairs that provide options
+ :        to configure the tidy process that have to be validated against the 
+ :        "http://www.zorba-xquery.com/modules/converters/html-options"; schema.
+ : @return the tidied XHTML document node
+ :
+ : @error err:XQDY0027 if $options can not be validated against the
+ :  html-options schema
+ : @error html:TidyOption if there was an error with one of the options
+ :  in the $options parameter that couldn't have been caught by validating
+ :  against the schema
+ : @error html:InternalError if an internal error occurred while tidying
+ :  the string.
+ :
+ : @example test/Queries/tidy_1.xq
+ :)
+declare function html:parse (
+  $html as xs:string,
+  $options as element(html-options:options)
+) as document-node()
+{
+  let $validated-options := if(schema:is-validated($options)) then
+                              $options
+                            else
+                              validate { $options } 
+  return
+    html:parse-internal($html, $validated-options)
+};
+
+declare %private function html:parse-internal(
+  $html as xs:string,
+  $options as element(html-options:options)
+) as document-node() external;

=== added directory 'src/html.xq.src'
=== added file 'src/html.xq.src/html.cpp'
--- src/html.xq.src/html.cpp	1970-01-01 00:00:00 +0000
+++ src/html.xq.src/html.cpp	2013-06-15 17:12:29 +0000
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sstream>
+
+#include <zorba/empty_sequence.h>
+#include <zorba/singleton_item_sequence.h>
+#include <zorba/item.h>
+
+#include "html.h"
+#include "tidy_wrapper.h"
+
+namespace zorba
+{
+  namespace htmlmodule
+  {
+
+//*****************************************************************************
+//*****************************************************************************
+
+ParseFunction::ParseFunction(const HtmlModule* aModule)
+: HtmlFunction(aModule)
+{
+}
+
+ItemSequence_t
+ParseFunction::evaluate(
+  const ExternalFunction::Arguments_t& aArgs,
+  const StaticContext*                          aSctxCtx,
+  const DynamicContext*                         aDynCtx) const
+  {
+    std::auto_ptr<std::istringstream> iss;
+    std::istream *is;
+    String docString;
+    Item lStringItem, lOptionsItem;
+
+    if (aArgs.size() >= 1)
+    {
+      Iterator_t lArg0Iter = aArgs[0]->getIterator();
+      lArg0Iter->open();
+      lArg0Iter->next(lStringItem);
+      lArg0Iter->close();
+    }
+
+    if ( lStringItem.isStreamable() )
+    {
+      //
+      // The "iss" auto_ptr can NOT be used since it will delete the stream that,
+      // in this case, is a data member inside another object and not dynamically
+      // allocated.
+      //
+      // We can't replace "iss" with "is" since we still need the auto_ptr for
+      // the case when the result is not streamable.
+      //
+      is = &lStringItem.getStream();
+    }
+    else
+    {
+      docString = lStringItem.getStringValue();
+      iss.reset (new std::istringstream(docString.c_str()));
+      is = iss.get();
+    }
+
+    if (aArgs.size() == 2)
+    {
+      Iterator_t lArg1Iter = aArgs[1]->getIterator();
+      lArg1Iter->open();
+      lArg1Iter->next(lOptionsItem);
+      lArg1Iter->close();
+    }
+
+    return ItemSequence_t(new SingletonItemSequence(
+      createHtmlItem( *is , lOptionsItem )));
+  }
+
+//*****************************************************************************
+//*****************************************************************************
+
+ItemFactory* HtmlModule::theFactory = 0;
+
+HtmlModule::~HtmlModule()
+{
+  for ( FuncMap_t::const_iterator lIter = theFunctions.begin();
+        lIter != theFunctions.end();
+        ++lIter)
+       {
+         delete lIter->second;
+       }
+       theFunctions.clear();
+}
+
+ExternalFunction*
+HtmlModule::getExternalFunction(const String& aLocalname)
+{
+  ExternalFunction*& lFunc = theFunctions[aLocalname];
+  if (!lFunc)
+  {
+    if (1 == 0)
+    { }
+    else if (aLocalname == "parse-internal")
+    {
+      lFunc = new ParseFunction(this);
+    }
+  }
+  return lFunc;
+}
+
+void
+HtmlModule::destroy()
+{
+  if (!dynamic_cast<HtmlModule*>(this))
+  {
+    return;
+  }
+  delete this;
+}
+//*****************************************************************************
+//*****************************************************************************
+
+  } /* namespace htmlmodule */
+} /* namespace zorba */
+
+#ifdef WIN32
+#  define DLL_EXPORT __declspec(dllexport)
+#else
+#  define DLL_EXPORT __attribute__ ((visibility("default")))
+#endif
+
+extern "C" DLL_EXPORT zorba::ExternalModule* createModule()
+{
+  return new zorba::htmlmodule::HtmlModule();
+}

=== added file 'src/html.xq.src/html.h'
--- src/html.xq.src/html.h	1970-01-01 00:00:00 +0000
+++ src/html.xq.src/html.h	2013-06-15 17:12:29 +0000
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef ZORBA_HTMLMODULE_HTML_H
+#define ZORBA_HTMLMODULE_HTML_H
+
+#include <map>
+
+#include <zorba/zorba.h>
+#include <zorba/function.h>
+#include <zorba/external_module.h>
+
+namespace zorba
+{
+  namespace htmlmodule
+  {
+//*****************************************************************************
+//*****************************************************************************
+    class HtmlModule : public ExternalModule
+    {
+    private:
+      static ItemFactory* theFactory;
+
+    protected:
+      class ltstr
+      {
+      public:
+        bool operator()(const String& s1, const String& s2) const
+        {
+          return s1.compare(s2) < 0;
+        }
+      };
+
+      typedef std::map<String, ExternalFunction*, ltstr> FuncMap_t;
+
+      FuncMap_t theFunctions;
+
+    public:
+      virtual ~HtmlModule();
+
+      virtual String
+      getURI() const { return "http://www.zorba-xquery.com/modules/converters/html";; }
+
+      virtual ExternalFunction*
+      getExternalFunction(const String& aLocalname);
+
+      virtual void
+      destroy();
+
+      static ItemFactory*
+      getItemFactory()
+      {
+        if(!theFactory)
+        {
+          theFactory = Zorba::getInstance(0)->getItemFactory();
+        }
+        return theFactory;
+      }
+    };
+
+//*****************************************************************************
+//*****************************************************************************
+    class HtmlFunction : public ContextualExternalFunction
+    {
+    protected:
+      const HtmlModule* theModule;
+    public:
+      HtmlFunction(const HtmlModule* aModule)
+      : theModule(aModule) {};
+
+      ~HtmlFunction() {};
+
+      virtual String
+      getURI() const { return theModule->getURI(); }
+
+    };
+
+//*****************************************************************************
+//*****************************************************************************
+    class ParseFunction : public HtmlFunction
+    {
+    public:
+      ParseFunction(const HtmlModule* aModule);
+
+      virtual String
+      getLocalName() const { return "parse-internal"; }
+
+      virtual ItemSequence_t
+      evaluate(const ExternalFunction::Arguments_t& args,
+               const StaticContext* aSctxCtx,
+               const DynamicContext* aDynCtx) const;
+    };
+
+
+
+
+  } /* namespace htmlmodule */ 
+} /* namespace zorba */
+
+#endif /* ZORBA_HTMLMODULE_HTML_H */

=== added file 'src/html.xq.src/tidy_wrapper.cpp'
--- src/html.xq.src/tidy_wrapper.cpp	1970-01-01 00:00:00 +0000
+++ src/html.xq.src/tidy_wrapper.cpp	2013-06-15 17:12:29 +0000
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+
+#include <tidy.h>
+#include <buffio.h>
+
+#include "tidy_wrapper.h"
+
+#include <zorba/item_factory.h>
+#include <zorba/item.h>
+#include <zorba/xmldatamanager.h>
+
+namespace zorba
+{
+  namespace htmlmodule
+  {
+
+  } /* namespace htmlmodule */
+} /* namespace zorba */
\ No newline at end of file

=== added file 'src/html.xq.src/tidy_wrapper.h'
--- src/html.xq.src/tidy_wrapper.h	1970-01-01 00:00:00 +0000
+++ src/html.xq.src/tidy_wrapper.h	2013-06-15 17:12:29 +0000
@@ -0,0 +1,200 @@
+/*
+ * Copyright 2006-2008 The FLWOR Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ZORBA_HTMLMODULE_TIDY_WRAPPER_H
+#define ZORBA_HTMLMODULE_TIDY_WRAPPER_H
+
+#include <iostream>
+#include <sstream>
+
+#include <tidy.h>
+#include <buffio.h>
+
+#include <zorba/item_factory.h>
+#include <zorba/item.h>
+#include <zorba/iterator.h>
+#include <zorba/store_consts.h>
+#include <zorba/xmldatamanager.h>
+#include <zorba/error.h>
+#include <zorba/diagnostic_list.h>
+#include <zorba/user_exception.h>
+#include <zorba/zorba.h>
+
+namespace zorba
+{
+  namespace htmlmodule
+  {
+    class TidyReader {
+    private:
+      std::istream* theStream;
+      // We need a buffer to support the unget function
+      std::vector<unsigned int> theBuffer;
+    public:
+      TidyReader(std::istream* aStream) : theStream(aStream) {}
+      TidyInputSource getInputSource()
+      {
+        TidyInputSource lResult;
+        lResult.sourceData = this;
+        lResult.getByte = &getByte;
+        lResult.ungetByte = &ungetByte;
+        lResult.eof = &isEof;
+        return lResult;
+      }
+      
+      public: // callback functions
+      static int TIDY_CALL getByte(void* aData)
+      {
+        TidyReader* lReader = static_cast<TidyReader*>(aData);
+        if (lReader->theBuffer.empty())
+          return lReader->theStream->get();
+        else
+        {
+          int lResult = lReader->theBuffer.back();
+          lReader->theBuffer.pop_back();
+          return lResult;
+        }
+      }
+      
+      static void TIDY_CALL ungetByte(void* aData, byte aByte)
+      {
+        TidyReader* lReader = static_cast<TidyReader*>(aData);
+        lReader->theBuffer.push_back(aByte);
+      }
+      
+      static Bool TIDY_CALL isEof(void* aData)
+      {
+        TidyReader* lReader = static_cast<TidyReader*>(aData);
+        return lReader->theStream->eof() ? yes : no;
+      }
+    };
+    
+    static void checkRC(int rc, const char* errMsg)
+    {
+      if (rc > 1)
+      {
+        zorba::Item lError = Zorba::getInstance(0)->getItemFactory()
+          ->createQName(
+            "http://www.zorba-xquery.com/modules/converters/html";,
+            "InternalError");
+        throw USER_EXCEPTION(lError, errMsg );
+      }
+    }
+    
+    static Bool setTidyOption(TidyDoc doc, const char* option, const char* value)
+    {
+      Bool ok = yes;
+      TidyOptionId toID = tidyOptGetIdForName(option);
+      if(toID < N_TIDY_OPTIONS)
+      {
+        ok = tidyOptSetValue(doc, toID, value);
+        if (ok != yes)
+        {
+        zorba::Item lError = Zorba::getInstance(0)->getItemFactory()
+          ->createQName(
+              "http://www.zorba-xquery.com/modules/converters/html";,
+              "TidyOption");
+          std::ostringstream lErrorMsg;
+          lErrorMsg << "Error setting tidy option '" << option 
+            << "' with value '" << value << "'";
+          throw USER_EXCEPTION(lError, lErrorMsg.str());
+        }
+      }
+      else
+      {
+        return no;
+      }
+      return ok;
+    }
+
+    static Bool applyOptions(TidyDoc aDoc, zorba::Item &aOptions)
+    {
+      zorba::Iterator_t lAttributes, lElements;
+      zorba::Item lAttr, lElementItem, lAttrName;
+      zorba::String lStrName, lStrValue;
+      Bool lRet = yes;
+
+      if(!aOptions.isNull())
+      {
+        lElements = aOptions.getChildren();
+        lElements->open();
+        while (lElements->next(lElementItem)
+          && lElementItem.getNodeKind () == store::StoreConsts::elementNode)
+        {
+          lAttributes = lElementItem.getAttributes();
+          lAttributes->open();
+          while (lAttributes->next(lAttr))
+          {
+            lAttr.getNodeName(lAttrName);
+            if(lAttrName.getLocalName() == "name")
+              lStrName = lAttr.getStringValue();
+            else if(lAttrName.getLocalName() == "value")
+              lStrValue = lAttr.getStringValue();
+          }
+          setTidyOption(aDoc, lStrName.c_str(), lStrValue.c_str());
+          lAttributes->close();
+        }
+        lElements->close();
+      }
+      return lRet;
+    }
+
+    static zorba::Item createHtmlItem( std::istream& aStream , zorba::Item &aOptions)
+    {
+      TidyReader lReader(&aStream);
+      TidyInputSource lInputSource = lReader.getInputSource();
+      
+      TidyBuffer output;
+      tidyBufInit(&output);
+      TidyBuffer errbuf;
+      tidyBufInit(&errbuf);
+      TidyDoc tDoc = tidyCreate();
+
+      applyOptions(tDoc, aOptions);
+
+      int rc = -1;
+      rc = tidySetErrorBuffer(tDoc, &errbuf);
+      checkRC(rc, "Could not set error buffer");
+      rc = tidyParseSource(tDoc, &lInputSource);
+      checkRC(rc, "Could not parse the source");
+      rc = tidyCleanAndRepair(tDoc);
+      checkRC(rc, "Could not clean and repair");
+      rc = tidyRunDiagnostics(tDoc);
+      if ( rc > 1 )
+        rc = ( tidyOptSetBool(tDoc, TidyForceOutput, yes) ? rc : -1 );
+
+      // Tidy does not support streaming for output, it only supports
+      // something they call a "sink". Therefore we buffer it in a string.
+      rc = tidySaveBuffer(tDoc, &output);
+      checkRC(rc, "Could not save the buffer");
+      std::string lResult((char*) output.bp, output.size);
+      std::istringstream lStream(lResult);
+
+      tidyBufFree(&output);
+      tidyBufFree(&errbuf);
+      tidyRelease(tDoc);
+      XmlDataManager* lDM = Zorba::getInstance(0)->getXmlDataManager();
+      try
+      {
+        return lDM->parseXML(lStream);
+      } catch (ZorbaException&)
+      {
+        return NULL;//Zorba::getInstance(0)->getItemFactory()->createString(lResult);
+      }
+    }
+  } /* namespace htmlmodule */
+} /* namespace zorba */
+
+#endif //ZORBA_HTMLMODULE_TIDY_WRAPPER_H

=== added directory 'test'
=== renamed directory 'test' => 'test.moved'
=== added directory 'test/ExpQueryResults'
=== added file 'test/ExpQueryResults/tidy_1.xml.res'
--- test/ExpQueryResults/tidy_1.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/tidy_1.xml.res	2013-06-15 17:12:29 +0000
@@ -0,0 +1,8 @@
+<html>
+<head>
+<title>Foo</title>
+</head>
+<body>
+<p>Foo!</p>
+</body>
+</html>
\ No newline at end of file

=== added file 'test/ExpQueryResults/tidy_2.xml.res'
--- test/ExpQueryResults/tidy_2.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/tidy_2.xml.res	2013-06-15 17:12:29 +0000
@@ -0,0 +1,8 @@
+<html>
+<head>
+<title>Foo</title>
+</head>
+<body>
+<p>Foo!</p>
+</body>
+</html>
\ No newline at end of file

=== added file 'test/ExpQueryResults/tidy_3.xml.res'
--- test/ExpQueryResults/tidy_3.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/tidy_3.xml.res	2013-06-15 17:12:29 +0000
@@ -0,0 +1,41 @@
+<html>
+<head>
+<title>[ #426885 ] Definition list w/Center crashes</title>
+</head>
+<body>
+<center>
+<h1>Heading 1</h1>
+</center>
+<dl>
+<dt>
+<img src="redball.gif"/>
+<b>Term 1</b>
+</dt>
+<dt>
+<img src="redball.gif"/>
+<b>Term 2</b>
+</dt>
+<dd>
+<hr/>
+</dd>
+</dl>
+<center>
+<h1>Heading 2</h1>
+</center>
+<div style="margin-left: 2em">
+<dl>
+<dt>
+<img src="redball.gif"/>
+<b>Term 3</b>
+</dt>
+<dt>
+<img src="redball.gif"/>
+<b>Term 4</b>
+</dt>
+<dd>
+<hr/>
+</dd>
+</dl>
+</div>
+</body>
+</html>
\ No newline at end of file

=== added file 'test/ExpQueryResults/tidy_4.xml.res'
--- test/ExpQueryResults/tidy_4.xml.res	1970-01-01 00:00:00 +0000
+++ test/ExpQueryResults/tidy_4.xml.res	2013-06-15 17:12:29 +0000
@@ -0,0 +1,9 @@
+<html>
+<head>
+<title>[#427663] Line endings not supported correctly</title>
+</head>
+<body>
+<p>This is a carriage return^MThis is a Unix line-ending This is a
+DOS line ending^M</p>
+</body>
+</html>
\ No newline at end of file

=== added directory 'test/Queries'
=== added file 'test/Queries/link_crawler2.xq2'
--- test/Queries/link_crawler2.xq2	1970-01-01 00:00:00 +0000
+++ test/Queries/link_crawler2.xq2	2013-06-15 17:12:29 +0000
@@ -0,0 +1,263 @@
+(:
+ : Copyright 2006-2011 The FLWOR Foundation.
+ :
+ : Licensed under the Apache License, Version 2.0 (the "License");
+ : you may not use this file except in compliance with the License.
+ : You may obtain a copy of the License at
+ :
+ : http://www.apache.org/licenses/LICENSE-2.0
+ :
+ : Unless required by applicable law or agreed to in writing, software
+ : distributed under the License is distributed on an "AS IS" BASIS,
+ : WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ : See the License for the specific language governing permissions and
+ : limitations under the License.
+:)
+
+import module namespace http = "http://www.zorba-xquery.com/modules/http-client";;
+import module namespace map = "http://www.zorba-xquery.com/modules/store/data-structures/unordered-map";;
+import module namespace html = "http://www.zorba-xquery.com/modules/converters/html";;
+import module namespace parse-xml = "http://www.zorba-xquery.com/modules/xml";;
+import module namespace file = "http://expath.org/ns/file";;
+
+declare namespace an = "http://www.zorba-xquery.com/annotations";;
+declare namespace xhtml="http://www.w3.org/1999/xhtml";;
+declare namespace output="http://www.w3.org/2010/xslt-xquery-serialization";;
+declare namespace err="http://www.w3.org/2005/xqt-errors";;
+declare namespace httpsch = "http://expath.org/ns/http-client";;
+
+declare variable $top-uri  as xs:string := "http://www.zorba-xquery.com/html/index/";;
+declare variable $uri-host as xs:string := "http://www.zorba-xquery.com";;
+
+
+
+declare variable $local:processed-internal-links := xs:QName("processed-internal-links");
+declare variable $local:processed-external-links := xs:QName("processed-external-links");
+declare variable $local:tidy-options := <options xmlns="http://www.zorba-xquery.com/modules/converters/html-options"; >
+                                         <tidyParam name="output-xml" value="yes" />
+                                         <tidyParam name="doctype" value="omit" />
+                                         <tidyParam name="quote-nbsp" value="no" />
+                                         <tidyParam name="char-encoding" value="utf8" />
+                                         <tidyParam name="newline" value="LF" />
+                                         <tidyParam name="tidy-mark" value="no" />
+                                         <tidyParam name="new-inline-tags" value="nav header section article footer xqdoc:custom d c options json-param" />
+                                        </options>;
+
+
+
+declare %an:sequential function local:create-containers()
+{
+  map:create($local:processed-internal-links, xs:QName("xs:string"));
+  map:create($local:processed-external-links, xs:QName("xs:string"));
+};
+
+declare %an:sequential function local:delete-containers(){
+  for $x in map:available-maps()
+  return map:delete($x);
+};
+
+declare function local:is-internal($x as xs:string) as xs:boolean
+{
+ starts-with($x, $uri-host)
+};
+
+declare function local:my-substring-before($s1 as xs:string, $s2 as xs:string) as xs:string
+{
+let $sb := fn:substring-before($s1, $s2)
+return  if($sb = "") then  $s1 else $sb
+};
+
+declare %an:sequential function local:get-real-link($href as xs:string, $start-uri as xs:string) as xs:string?
+{
+   variable $absuri;
+   try{
+    $absuri := local:my-substring-before(resolve-uri(fn:normalize-space($href), $start-uri), "#");
+   }
+   catch *
+   { 
+     map:insert($local:processed-external-links, (<FROM>{$start-uri}</FROM>, 
+                                                  <MESSAGE>malformed</MESSAGE>,
+                                                  <RESULT>broken</RESULT>), $href);
+   }
+   $absuri
+};
+
+
+declare  function local:get-media-type ($http-call as node()) as xs:string
+{
+   local:my-substring-before($http-call/httpsch:header[@name = 'Content-Type'][1]/string(@value), ";")
+};
+
+declare function local:alive($http-call as item()*) as xs:boolean
+{
+ if((count($http-call) ge 1) and 
+    ($http-call[1]/@status eq 200)) 
+   then true() else fn:trace(false(), "alive")
+};
+
+declare function local:is-redirect($http-call as item()*) as xs:boolean
+{
+ if((count($http-call) ge 1) and 
+    (($http-call[1]/@status idiv 100) eq 3)) 
+   then fn:trace(true(), "redirect") else false()
+};
+
+
+declare %an:sequential function local:get-out-links-parsed($content as node()*, $uri as xs:string) as xs:string*
+{  distinct-values( for $y in  ($content//*:a/string(@href),
+                              $content//*:link/string(@href),
+                              $content//*:script/string(@src),
+                              $content//*:img/string(@src),
+                              $content//*:area/string(@href)
+                              )
+return  local:get-real-link($y, $uri))
+};
+
+
+declare %an:sequential function local:get-out-links-unparsed($content as xs:string, $uri as xs:string) as xs:string*{
+
+      distinct-values( 
+         let $search := fn:analyze-string($content, "(&lt;|&amp;lt;|<)(((a|link|area).+?href)|((script|img).+?src))=([""'])(.*?)\7")
+         for $other-uri2 in  $search//group[@nr=8]/string()
+         return local:get-real-link($other-uri2, $uri)
+     )
+};
+
+
+declare %an:sequential function local:map-insert-result($map-name as xs:QName, $url as xs:string, $http-result as item()*) 
+{
+  if(count($http-result) ge 1) 
+    then 
+      map:insert($map-name, (<STATUS>{fn:string($http-result[1]/@status)}</STATUS>,
+                             <MESSAGE>{fn:string($http-result[1]/@message)}</MESSAGE>,
+                             <RESULT>{if(local:alive($http-result)) 
+                                        then "Ok" 
+                                        else if(local:is-redirect($http-result))
+                                                then "redirect" 
+                                                else "broken"
+                             }</RESULT>), $url);
+    else map:insert($map-name, <RESULT>broken</RESULT>, $url);
+  if(local:is-redirect($http-result)) then
+    map:insert($map-name, <REDIRECT>{fn:string($http-result[1]/httpsch:header[@name = "Location"]/@value)}</REDIRECT>, $url);
+  else {}
+};
+
+declare %an:sequential function local:process-link($x as xs:string, $baseUri as xs:string, $n as xs:integer) as item()*{
+ if(local:is-internal($x))
+       then local:process-internal-link($x, $baseUri, $n);
+       else local:process-external-link($x, $baseUri);
+
+};
+
+declare  %an:sequential function local:process-external-link($x as xs:string, $baseUri as xs:string){
+  if(not(empty(map:get($local:processed-external-links, $x))))
+         then   exit returning false();
+         else {}
+  fn:trace($x, "HEAD external link");
+  map:insert($local:processed-external-links, <FROM>{$baseUri}</FROM>, $x);
+  variable $http-call:=();
+  try{
+        $http-call:=http:send-request(<httpsch:request method="GET" href="{$x}"/>, (), ());
+        if((count($http-call) ge 1) and 
+            fn:not($http-call[1]/@status eq 200)) then
+        {
+           if(local:is-redirect($http-call)) then
+           {
+             local:map-insert-result($local:processed-external-links, $x, $http-call);
+           }
+           else {} 
+           $http-call:=http:send-request(<httpsch:request method="GET" href="{$x}"/>, (), ());
+           local:map-insert-result($local:processed-external-links, $x, $http-call); 
+        }
+        else
+        {}
+  }
+  catch * 
+  { $http-call:=();}
+  local:map-insert-result($local:processed-external-links, $x, $http-call); 
+};
+
+
+declare  %an:sequential function local:process-internal-link($x as xs:string, $baseUri as xs:string, $n as xs:integer){
+      (: if($n=3) then exit returning (); else {} :)
+      if(not(empty(map:get($local:processed-internal-links, $x))))
+            then exit returning false();
+              else {}
+      fn:trace($x, "GET internal link");
+      map:insert($local:processed-internal-links, <FROM>{$baseUri}</FROM>, $x);
+       variable $http-call:=();
+       try{
+          $http-call:=http:send-request(<httpsch:request method="GET" href="{$x}" follow-redirect="false"/>, (), ());
+       }
+       catch * { }
+       if(local:is-redirect($http-call)) then
+       {
+         local:map-insert-result($local:processed-internal-links, $x, $http-call);
+         try{
+            $http-call:=http:send-request(<httpsch:request method="GET" href="{$x}"/>, (), ());
+         }
+         catch * { }
+       }
+       else {}
+       if( not(local:alive($http-call)))
+               then { local:map-insert-result($local:processed-internal-links, $x, $http-call); exit returning ();}
+               else {}
+       
+       if(not (local:get-media-type($http-call[1]) = "text/html"))
+               then { local:map-insert-result($local:processed-internal-links, $x, $http-call); exit returning ();}
+               else {}
+       variable $string-content := string($http-call[2]);
+       variable $content:=();
+
+       try{
+             $content:=html:parse($string-content,$local:tidy-options );
+             local:map-insert-result($local:processed-internal-links, $x, $http-call); 
+        }
+        catch *
+             {   
+                 map:insert($local:processed-internal-links, (<MESSAGE>{concat("cannot tidy: ", $err:description)}</MESSAGE>,
+                                                              <RESULT>broken</RESULT>), $x); 
+                 try{
+                       $content:=parse-xml:parse-xml-fragment ($string-content, "");
+                 }
+                 catch *
+                     { map:insert($local:processed-internal-links, <MESSAGE>{concat("cannot parse: ", $err:description)}</MESSAGE>, $x);}
+            }
+       variable $links :=();
+       if(empty($content))
+           then $links:=local:get-out-links-unparsed($string-content, fn:trace($x, "parse with regex, because tidy failed"));
+           else $links:=local:get-out-links-parsed($content, $x);
+       for $l in $links
+       return  local:process-link($l, $x, $n+1);
+};
+
+
+
+
+declare function local:print-results() as element()*
+{
+    for $x in map:keys($local:processed-internal-links)/map:attribute/@value/string()
+    return <INTERNAL><LINK>{$x}</LINK>{map:get($local:processed-internal-links,$x)}</INTERNAL>, 
+     for $x in map:keys($local:processed-external-links)/map:attribute/@value/string()
+     return <EXTERNAL><LINK>{$x}</LINK>{map:get($local:processed-external-links,$x)}</EXTERNAL>
+};
+
+(:==========================================
+===========================================:)
+
+variable $uri:= $top-uri;
+
+variable $result;
+
+local:create-containers();
+local:process-link($uri, "", 1);
+$result:=local:print-results() ;
+
+local:delete-containers();
+
+file:write(fn:resolve-uri("link_crawler_result.xml"),
+            <result>{$result}</result>,
+            <output:serialization-parameters>
+                <output:indent value="yes"/>
+            </output:serialization-parameters>)
+

=== added file 'test/Queries/tidy_1.xq'
--- test/Queries/tidy_1.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/tidy_1.xq	2013-06-15 17:12:29 +0000
@@ -0,0 +1,14 @@
+(: tidy a html using different tidy options :)
+
+import module namespace html="http://www.zorba-xquery.com/modules/converters/html";;
+import schema namespace html-options="http://www.zorba-xquery.com/modules/converters/html-options";;
+
+html:parse('<title>Foo</title><p>Foo!',
+            <options xmlns="http://www.zorba-xquery.com/modules/converters/html-options"; >
+              <tidyParam name="output-xml" value="yes" />
+              <tidyParam name="doctype" value="omit" />
+              <tidyParam name="quote-nbsp" value="no" />
+              <tidyParam name="char-encoding" value="utf8" />
+              <tidyParam name="newline" value="LF" />
+              <tidyParam name="tidy-mark" value="no" />
+            </options>)
\ No newline at end of file

=== added file 'test/Queries/tidy_2.xq'
--- test/Queries/tidy_2.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/tidy_2.xq	2013-06-15 17:12:29 +0000
@@ -0,0 +1,6 @@
+(: tidy a html using default tidy options :)
+
+import module namespace html="http://www.zorba-xquery.com/modules/converters/html";;
+import schema namespace html-options="http://www.zorba-xquery.com/modules/converters/html-options";;
+
+html:parse('<title>Foo</title><p>Foo!')
\ No newline at end of file

=== added file 'test/Queries/tidy_3.xq'
--- test/Queries/tidy_3.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/tidy_3.xq	2013-06-15 17:12:29 +0000
@@ -0,0 +1,18 @@
+import module namespace html="http://www.zorba-xquery.com/modules/converters/html";;
+import schema namespace html-options="http://www.zorba-xquery.com/modules/converters/html-options";;
+
+html:parse('<HTML>
+<HEAD>
+<TITLE>[ #426885 ] Definition list w/Center crashes</TITLE>
+</HEAD>
+<BODY>
+<CENTER><H1>Heading 1</H1></CENTER>
+<DT><IMG src="redball.gif"><B>Term 1</B></DT>
+<DT><IMG src="redball.gif"><B>Term 2</B><HR></DT>
+<CENTER><H1>Heading 2</H1></CENTER>
+<UL>
+<DT><IMG src="redball.gif"><B>Term 3</B></DT>
+<DT><IMG src="redball.gif"><B>Term 4</B><HR></DT>
+</UL>
+</BODY>
+</HTML>')
\ No newline at end of file

=== added file 'test/Queries/tidy_4.xq'
--- test/Queries/tidy_4.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/tidy_4.xq	2013-06-15 17:12:29 +0000
@@ -0,0 +1,14 @@
+import module namespace html="http://www.zorba-xquery.com/modules/converters/html";;
+import schema namespace html-options="http://www.zorba-xquery.com/modules/converters/html-options";;
+
+html:parse('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
+<html>
+<head>
+  <title>[#427663] Line endings not supported correctly</title>
+</head>
+<body>
+<p>This is a carriage return^MThis is a Unix line-ending
+This is a DOS line ending^M
+
+</body>
+</html>')
\ No newline at end of file

=== added file 'test/Queries/tidy_5_wrong_options.spec'
--- test/Queries/tidy_5_wrong_options.spec	1970-01-01 00:00:00 +0000
+++ test/Queries/tidy_5_wrong_options.spec	2013-06-15 17:12:29 +0000
@@ -0,0 +1,1 @@
+Error: http://www.zorba-xquery.com/modules/converters/html:TidyOption

=== added file 'test/Queries/tidy_5_wrong_options.xq'
--- test/Queries/tidy_5_wrong_options.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/tidy_5_wrong_options.xq	2013-06-15 17:12:29 +0000
@@ -0,0 +1,9 @@
+(: tidy a html using wrong tidy option for a value :)
+
+import module namespace html="http://www.zorba-xquery.com/modules/converters/html";;
+import schema namespace html-options="http://www.zorba-xquery.com/modules/converters/html-options";;
+
+html:parse('<title>Foo</title><p>Foo!',
+            <options xmlns="http://www.zorba-xquery.com/modules/converters/html-options"; >
+              <tidyParam name="output-xml" value="maybe" />
+            </options>)
\ No newline at end of file

=== added file 'test/Queries/tidy_6_wrong_options.spec'
--- test/Queries/tidy_6_wrong_options.spec	1970-01-01 00:00:00 +0000
+++ test/Queries/tidy_6_wrong_options.spec	2013-06-15 17:12:29 +0000
@@ -0,0 +1,1 @@
+Error: http://www.w3.org/2005/xqt-errors:XQDY0027

=== added file 'test/Queries/tidy_6_wrong_options.xq'
--- test/Queries/tidy_6_wrong_options.xq	1970-01-01 00:00:00 +0000
+++ test/Queries/tidy_6_wrong_options.xq	2013-06-15 17:12:29 +0000
@@ -0,0 +1,14 @@
+(: tidy a html using correct tidy options/values but wrongly formated as html-options :)
+
+import module namespace html="http://www.zorba-xquery.com/modules/converters/html";;
+import schema namespace html-options="http://www.zorba-xquery.com/modules/converters/html-options";;
+
+html:parse('<title>Foo</title><p>Foo!',
+            <options xmlns="http://www.zorba-xquery.com/modules/converters/html-options"; >
+              <tidyaram name="output-xml" value="yes" />
+              <tidyParam name="doctype" value="omit" />
+              <tidyParam name="quote-nbsp" value="no" />
+              <tidyParam name="char-encoding" value="utf8" />
+              <tidyParam name="newline" value="LF" />
+              <tidyParam name="tidy-mark" value="no" />
+            </options>)

-- 
Mailing list: https://launchpad.net/~zorba-coders
Post to     : zorba-coders@lists.launchpad.net
Unsubscribe : https://launchpad.net/~zorba-coders
More help   : https://help.launchpad.net/ListHelp

Reply via email to