Hi Asiri,
I think I'd really prefer one filter per class. Same as what is done
in the HTML cleaner. Also please donc use any *Utils class and no
static please (these are both anti patterns).
Thanks
-Vincent
On Oct 28, 2008, at 2:54 PM, asiri (SVN) wrote:
> Author: asiri
> Date: 2008-10-28 14:54:04 +0100 (Tue, 28 Oct 2008)
> New Revision: 13868
>
> Removed:
> sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> plugin/officeimporter/filter/
> Modified:
> sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> plugin/officeimporter/OfficeImporterPlugin.java
> sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> plugin/officeimporter/utils/HtmlFilterUtils.java
> sandbox/xwiki-plugin-officeimporter/src/test/java/com/xpn/xwiki/
> plugin/officeconverter/CleanHTMLTest.java
> Log:
> Moved all html filter code into a single utility class called
> HtmlFilterUtils. I thought of introducing some sort of a filter
> chain (may be chain of responsibility pattern) but it seemed like an
> over-kill for this scenario.
>
> Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/
> xwiki/plugin/officeimporter/OfficeImporterPlugin.java
> ===================================================================
> --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> plugin/officeimporter/OfficeImporterPlugin.java 2008-10-28 11:33:41
> UTC (rev 13867)
> +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> plugin/officeimporter/OfficeImporterPlugin.java 2008-10-28 13:54:04
> UTC (rev 13868)
> @@ -57,14 +57,9 @@
> import com.xpn.xwiki.doc.XWikiDocument;
> import com.xpn.xwiki.plugin.XWikiDefaultPlugin;
> import com.xpn.xwiki.plugin.XWikiPluginInterface;
> -import com.xpn.xwiki.plugin.officeimporter.filter.EmptyLinkFilter;
> -import com.xpn.xwiki.plugin.officeimporter.filter.ImageTagFilter;
> -import com.xpn.xwiki.plugin.officeimporter.filter.PinLiFilter;
> -import com.xpn.xwiki.plugin.officeimporter.filter.TagRemoveFilter;
> -import
> com.xpn.xwiki.plugin.officeimporter.filter.UnderlineLinkFilter;
> -import
> com.xpn.xwiki.plugin.officeimporter.filter.XWikiSyntaxEscapeFilter;
> -import com.xpn.xwiki.plugin.officeimporter.utils.ImporterException;
> import com.xpn.xwiki.plugin.officeimporter.utils.DocumentType;
> +import com.xpn.xwiki.plugin.officeimporter.utils.HtmlFilterUtils;
> +import com.xpn.xwiki.plugin.officeimporter.utils.ImporterException;
> import com.xpn.xwiki.web.Utils;
>
> /**
> @@ -471,9 +466,7 @@
> HTMLCleaner.ROLE), e);
> }
> Document document = htmlCleaner.clean(new
> StringReader(inputHTML));
> -
> - new UnderlineLinkFilter().filter(document);
> -
> + HtmlFilterUtils.filterUnderlinedLinks(document);
> XMLUtils.stripHTMLEnvelope(document);
> String cleanedHTML = XMLUtils.toString(document);
> return cleanedHTML;
> @@ -499,14 +492,12 @@
> HTMLCleaner.ROLE), e);
> }
> Document document = htmlCleaner.clean(new
> StringReader(inputHTML));
> -
> - new TagRemoveFilter().filter(document);
> - new UnderlineLinkFilter().filter(document);
> - new XWikiSyntaxEscapeFilter().filter(document);
> - new ImageTagFilter().filter(document);
> - new PinLiFilter().filter(document);
> - new EmptyLinkFilter().filter(document);
> -
> + HtmlFilterUtils.filterTags(document, new String[]{"style",
> "script"});
> + HtmlFilterUtils.filterUnderlinedLinks(document);
> + HtmlFilterUtils.filterSytaxChars(document);
> + HtmlFilterUtils.filterImageLinks(document);
> + HtmlFilterUtils.filterParagraphTagsInLineItemTags(document);
> + HtmlFilterUtils.filterEmptyLinks(document);
> XMLUtils.stripHTMLEnvelope(document);
> String cleanedHTML = XMLUtils.toString(document);
> return cleanedHTML;
>
> Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/
> xwiki/plugin/officeimporter/utils/HtmlFilterUtils.java
> ===================================================================
> --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> plugin/officeimporter/utils/HtmlFilterUtils.java 2008-10-28 11:33:41
> UTC (rev 13867)
> +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/
> plugin/officeimporter/utils/HtmlFilterUtils.java 2008-10-28 13:54:04
> UTC (rev 13868)
> @@ -1,12 +1,247 @@
> package com.xpn.xwiki.plugin.officeimporter.utils;
>
> +import java.util.ArrayList;
> +import java.util.List;
> +
> +import org.w3c.dom.Document;
> +import org.w3c.dom.Element;
> +import org.w3c.dom.NamedNodeMap;
> +import org.w3c.dom.Node;
> +import org.w3c.dom.NodeList;
> +import org.w3c.dom.Text;
> +
> /**
> * A utility class containing a suite of filter methods used to
> manipulate Html documents.
> *
> * @version $Id$
> * @since 1.7M1
> */
> -public class HtmlFilterUtils
> +public abstract class HtmlFilterUtils
> {
> + /**
> + * Characters that need to be escaped when jumping from html to
> xwiki syntax.
> + */
> + private static final List<String> escapeChars = new
> ArrayList<String>();
>
> + /**
> + * Static initializer for escape chars.
> + */
> + static {
> + escapeChars.add("[");
> + escapeChars.add("]");
> + escapeChars.add("{");
> + escapeChars.add("}");
> + escapeChars.add("*");
> + escapeChars.add("~");
> + escapeChars.add("_");
> + escapeChars.add("-");
> + escapeChars.add("1");
> + escapeChars.add("#");
> + escapeChars.add("$");
> + }
> +
> + /**
> + * Removes empty links from html documents. If the label of the
> link is empty, simply remove the
> + * tag as in [EMAIL PROTECTED] <a/>} or [EMAIL PROTECTED] <a href=""/>}.
> If the label
> is not null but the href
> + * attribute is missing, replace the tag with it's label. Like
> changing [EMAIL PROTECTED] <a>something</a>}
> + * to [EMAIL PROTECTED] something}.
> + *
> + * @param document The html document.
> + */
> + public static void filterEmptyLinks(Document document)
> + {
> + Element root = document.getDocumentElement();
> + NodeList links = root.getElementsByTagName("a");
> + for (int i = 0; i < links.getLength(); i++) {
> + Node link = links.item(i);
> + if (link.getTextContent() == null ||
> link.getTextContent().trim().equals("")) {
> + link.getParentNode().removeChild(link);
> + i--;
> + continue;
> + }
> +
> + Node hrefAttr =
> link.getAttributes().getNamedItem("href");
> + if (hrefAttr == null ||
> hrefAttr.getTextContent().trim().equals("")) {
> + NodeList children = link.getChildNodes();
> + while (children.getLength() > 0) {
> +
> link.getParentNode().insertBefore(children.item(0), link);
> + }
> + link.getParentNode().removeChild(link);
> + i--;
> + }
> + }
> + }
> +
> + /**
> + * Replaces the [EMAIL PROTECTED] <img>} tags with corresponding {image}
>
> macro elements which are
> + * recognized by xwiki syntax 1.0. Handles image attributes
> like src, width, height, alt, align.
> + *
> + * @param document The html document.
> + */
> + public static void filterImageLinks(Document document)
> + {
> + Element root = document.getDocumentElement();
> + NodeList imgs = root.getElementsByTagName("img");
> + while (imgs.getLength() > 0) {
> + Node image = imgs.item(0);
> + String imageCode = generateImageMacroString(image);
> + Node parent = image.getParentNode();
> + Text newImg = document.createTextNode(imageCode);
> + parent.replaceChild(newImg, image);
> + }
> + }
> +
> + /**
> + * Converts a [EMAIL PROTECTED] <img>} element into a xwiki syntax 1.0
> {image} macro element.
> + *
> + * @param imageLink Node representing the image link.
> + * @return Converted {image} macro string.
> + */
> + private static String generateImageMacroString(Node imageLink)
> + {
> + NamedNodeMap attrs = imageLink.getAttributes();
> + if (attrs == null) {
> + return null;
> + }
> + StringBuffer sb = new StringBuffer();
> + sb.append("{image:");
> + if (attrs.getNamedItem("src") != null) {
> + String src = attrs.getNamedItem("src").getTextContent();
> + sb.append(src);
> + }
> + if (attrs.getNamedItem("width") != null) {
> + String width =
> attrs.getNamedItem("width").getTextContent();
> + sb.append("|width=" + width);
> + }
> + if (attrs.getNamedItem("height") != null) {
> + String height =
> attrs.getNamedItem("height").getTextContent();
> + sb.append("|height=" + height);
> + }
> + if (attrs.getNamedItem("alt") != null) {
> + String alt = attrs.getNamedItem("alt").getTextContent();
> + sb.append("|alt=" + alt);
> + }
> + if (attrs.getNamedItem("align") != null) {
> + String align =
> attrs.getNamedItem("align").getTextContent();
> + sb.append("|align=" + align);
> + }
> + sb.append("}");
> + return sb.toString();
> + }
> +
> + /**
> + * Removes the starting [EMAIL PROTECTED] <p>} tags found within [EMAIL
> PROTECTED]
> <li>} tags. This is useful since
> + * such formations are not properly handled in xwiki 1.0 syntax.
> + *
> + * @param document The html document.
> + */
> + public static void filterParagraphTagsInLineItemTags(Document
> document)
> + {
> + Element root = document.getDocumentElement();
> + NodeList lists = root.getElementsByTagName("li");
> + for (int i = 0; i < lists.getLength(); i++) {
> + Node list = lists.item(i);
> + Node firstChild = list.getFirstChild();
> + if (firstChild.getNodeName() != null &&
> firstChild.getNodeName().equals("p")) {
> + NodeList childchildren = firstChild.getChildNodes();
> + while (childchildren.getLength() > 0) {
> + list.insertBefore(childchildren.item(0),
> firstChild);
> + }
> + list.removeChild(firstChild);
> + }
> + }
> + }
> +
> + /**
> + * Removes all listed tags from the given html document.
> + *
> + * @param document The html document.
> + * @param tags Tags to be removed.
> + */
> + public static void filterTags(Document document, String[] tags)
> + {
> + Element root = document.getDocumentElement();
> + for (String tag : tags) {
> + NodeList toBeRemovedTags =
> root.getElementsByTagName(tag);
> + while (toBeRemovedTags.getLength() > 0) {
> + Node t = toBeRemovedTags.item(0);
> + t.getParentNode().removeChild(t);
> + }
> + }
> + }
> +
> + /**
> + * Strips off underline tags surrounding links like [EMAIL PROTECTED]
> <u><a href="something">link</a></u>}.
> + *
> + * @param document The html document.
> + */
> + public static void filterUnderlinedLinks(Document document)
> + {
> + Element root = document.getDocumentElement();
> + NodeList links = root.getElementsByTagName("a");
> + for (int i = 0; i < links.getLength(); i++) {
> + Node link = links.item(i);
> + Node parent = link.getParentNode();
> + String parentName = parent.getNodeName();
> + if (parentName != null && (parentName.equals("u") ||
> parentName.equals("del"))) {
> + parent.getParentNode().insertBefore(link, parent);
> + parent.getParentNode().removeChild(parent);
> + }
> + }
> + }
> +
> + /**
> + * Escapes the xwiki sytax characters from the given html
> document. Example : [EMAIL PROTECTED] [} will be
> + * replaced by [EMAIL PROTECTED] \]}.
> + *
> + * @param document The html document.
> + */
> + public static void filterSytaxChars(Document document)
> + {
> + Element root = document.getDocumentElement();
> + escapeNode(root);
> + }
> +
> + /**
> + * Escapes xwiki syntax characters within the given node's
> content.
> + *
> + * @param node The node which is to be examined.
> + */
> + private static void escapeNode(Node node)
> + {
> + NodeList nodes = node.getChildNodes();
> + for (int i = 0; i < nodes.getLength(); i++) {
> + Node next = nodes.item(i);
> + if (next instanceof Text) {
> + String text = next.getTextContent();
> + text = escapeText(text);
> + next.setTextContent(text);
> + } else {
> + if (next.hasChildNodes()) {
> + escapeNode(next);
> + }
> + }
> + }
> + }
> +
> + /**
> + * Escapes xwiki syntax characters within the given string.
> + *
> + * @param text The string to be examined.
> + * @return The syntax escaped string.
> + */
> + private static String escapeText(String text)
> + {
> + StringBuffer sb = new StringBuffer();
> + for (int i = 0; i < text.length(); i++) {
> + char x = text.charAt(i);
> + if (escapeChars.contains(String.valueOf(x))) {
> + sb.append("\\");
> + sb.append(String.valueOf(x));
> + } else {
> + sb.append(x);
> + }
> + }
> + return sb.toString();
> + }
> }
>
> Modified: sandbox/xwiki-plugin-officeimporter/src/test/java/com/xpn/
> xwiki/plugin/officeconverter/CleanHTMLTest.java
> ===================================================================
> --- sandbox/xwiki-plugin-officeimporter/src/test/java/com/xpn/xwiki/
> plugin/officeconverter/CleanHTMLTest.java 2008-10-28 11:33:41 UTC
> (rev 13867)
> +++ sandbox/xwiki-plugin-officeimporter/src/test/java/com/xpn/xwiki/
> plugin/officeconverter/CleanHTMLTest.java 2008-10-28 13:54:04 UTC
> (rev 13868)
> @@ -27,13 +27,7 @@
> import org.xwiki.xml.XMLUtils;
> import org.xwiki.xml.html.HTMLCleaner;
>
> -import com.xpn.xwiki.plugin.officeimporter.filter.EmptyLinkFilter;
> -import com.xpn.xwiki.plugin.officeimporter.filter.HTMLFilter;
> -import com.xpn.xwiki.plugin.officeimporter.filter.ImageTagFilter;
> -import com.xpn.xwiki.plugin.officeimporter.filter.PinLiFilter;
> -import com.xpn.xwiki.plugin.officeimporter.filter.TagRemoveFilter;
> -import
> com.xpn.xwiki.plugin.officeimporter.filter.UnderlineLinkFilter;
> -import
> com.xpn.xwiki.plugin.officeimporter.filter.XWikiSyntaxEscapeFilter;
> +import com.xpn.xwiki.plugin.officeimporter.utils.HtmlFilterUtils;
> import com.xpn.xwiki.plugin.officeimporter.utils.ImporterException;
> import com.xpn.xwiki.test.AbstractXWikiComponentTestCase;
>
> @@ -121,14 +115,12 @@
> private void test(String input, String expected) throws
> ImporterException
> {
> Document document = cleaner.clean(new StringReader(input));
> -
> - new TagRemoveFilter().filter(document);
> - new UnderlineLinkFilter().filter(document);
> - new XWikiSyntaxEscapeFilter().filter(document);
> - new ImageTagFilter().filter(document);
> - new PinLiFilter().filter(document);
> - new EmptyLinkFilter().filter(document);
> -
> + HtmlFilterUtils.filterTags(document, new String[]{"style",
> "script"});
> + HtmlFilterUtils.filterUnderlinedLinks(document);
> + HtmlFilterUtils.filterSytaxChars(document);
> + HtmlFilterUtils.filterImageLinks(document);
> + HtmlFilterUtils.filterParagraphTagsInLineItemTags(document);
> + HtmlFilterUtils.filterEmptyLinks(document);
> XMLUtils.stripHTMLEnvelope(document);
> String actual = XMLUtils.toString(document);
> assertEquals(HEAD + expected + FOOT, actual);
>
> _______________________________________________
> notifications mailing list
> [EMAIL PROTECTED]
> http://lists.xwiki.org/mailman/listinfo/notifications
_______________________________________________
devs mailing list
[email protected]
http://lists.xwiki.org/mailman/listinfo/devs