Hi Asiri,

For trimming please use:

* Commons Lang: 
http://commons.apache.org/lang/api/org/apache/commons/lang/StringUtils.html#stripStart(java.lang.String,%20java.lang.String)
* If you want the exact list of whitespaces for HTML, see 
http://www.w3.org/TR/html4/struct/text.html#h-9.1
* You can also use \s in Regex

It really depends what chars you want removed but your implementation  
isn't correct I'm sure.

Thanks
-Vincent

On Nov 10, 2008, at 1:09 PM, asiri (SVN) wrote:

> Author: asiri
> Date: 2008-11-10 13:09:35 +0100 (Mon, 10 Nov 2008)
> New Revision: 14068
>
> Added:
>   sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/filter/HtmlTableFilter.java
>   sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/utils/ImporterUtils.java
> Modified:
>   sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/filter/HtmlListFilter.java
>   sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/transformer/HtmlToXWikiTwoZeroTransformer.java
>   sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/transformer/HtmlToXWikiXhtmlTransformer.java
> Log:
> XAOFFICE-1 : Develop the initial feature set for office-importer  
> plugin.
>
> * Completing list support and table support, need to test and make  
> few more adjustments.
>
> Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/ 
> xwiki/plugin/officeimporter/filter/HtmlListFilter.java
> ===================================================================
> --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/filter/HtmlListFilter.java      2008-11-10 11:32:32  
> UTC (rev 14067)
> +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/filter/HtmlListFilter.java      2008-11-10 12:09:35  
> UTC (rev 14068)
> @@ -5,11 +5,15 @@
> import org.w3c.dom.NodeList;
>
> import com.xpn.xwiki.plugin.officeimporter.utils.ImporterContext;
> +import com.xpn.xwiki.plugin.officeimporter.utils.ImporterUtils;
>
> /**
> - * This is a temporary filter until we have a clear solution to the  
> issue at
> - * http://jira.xwiki.org/jira/browse/XWIKI-2812. The problem occurs  
> when the first child of a
> - * [EMAIL PROTECTED] <li>} tag is a [EMAIL PROTECTED] <p>} tag.
> + * Presently xwiki rendering module doesn't support complex list  
> items. Because of this reason this
> + * temporary filter is used to rip off any complex formatting  
> elements present in html lists. The
> + * JIRA issue is located at http://jira.xwiki.org/jira/browse/XWIKI-2812 
> .
> + *
> + * @version $Id$
> + * @since 1.7M1
>  */
> public class HtmlListFilter implements HtmlFilter
> {
> @@ -21,13 +25,23 @@
>         NodeList listItems = document.getElementsByTagName("li");
>         for (int i = 0; i < listItems.getLength(); i++) {
>             Node listItem = listItems.item(i);
> -            Node firstChild = listItem.getFirstChild();
> -            if (firstChild != null &&  
> firstChild.getNodeName().equals("p")) {
> -                NodeList grandChildren = firstChild.getChildNodes();
> -                while (grandChildren.getLength() > 0) {
> -                    listItem.insertBefore(grandChildren.item(0),  
> firstChild);
> +            Node counter = listItem.getFirstChild();
> +            while (counter != null) {
> +                if (counter.getNodeType() == Node.TEXT_NODE) {
> +                    String trimmed =  
> ImporterUtils.leftTrim(counter.getTextContent());
> +                    counter.setTextContent(trimmed);
> +                    if (trimmed.equals("")) {
> +                        counter = counter.getNextSibling();
> +                        continue;
> +                    }
> +                } else if (counter.getNodeName().equals("p")) {
> +                    NodeList children = counter.getChildNodes();
> +                    while (children.getLength() > 0) {
> +                        listItem.insertBefore(children.item(0),  
> counter);
> +                    }
> +                    listItem.removeChild(counter);
>                 }
> -                listItem.removeChild(firstChild);
> +                break;
>             }
>         }
>     }
>
> Added: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/ 
> xwiki/plugin/officeimporter/filter/HtmlTableFilter.java
> ===================================================================
> --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/filter/ 
> HtmlTableFilter.java                          (rev 0)
> +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/filter/HtmlTableFilter.java     2008-11-10  
> 12:09:35 UTC (rev 14068)
> @@ -0,0 +1,65 @@
> +package com.xpn.xwiki.plugin.officeimporter.filter;
> +
> +import org.w3c.dom.Document;
> +import org.w3c.dom.Node;
> +import org.w3c.dom.NodeList;
> +
> +import com.xpn.xwiki.plugin.officeimporter.utils.ImporterContext;
> +import com.xpn.xwiki.plugin.officeimporter.utils.ImporterUtils;
> +
> +/**
> + * Presently xwiki rendering module doesn't support complex table  
> cell items. This filter is used to
> + * rip-off or modify html tables so that they can be rendered  
> properly. The corresponding JIRA issue
> + * is located at http://jira.xwiki.org/jira/browse/XWIKI-2804.
> + *
> + * @version $Id$
> + * @since 1.7M1
> + */
> +public class HtmlTableFilter implements HtmlFilter
> +{
> +    /**
> +     * [EMAIL PROTECTED]
> +     */
> +    public void filter(Document document, ImporterContext context)
> +    {
> +        NodeList cellItems = document.getElementsByTagName("td");
> +        for (int i = 0; i < cellItems.getLength(); i++) {
> +            Node cellItem = cellItems.item(i);
> +            Node counter = cellItem.getFirstChild();
> +            while (counter != null) {
> +                if (counter.getNodeType() == Node.TEXT_NODE) {
> +                    String trimmed =  
> ImporterUtils.leftTrim(counter.getTextContent());
> +                    counter.setTextContent(trimmed);
> +                    if (trimmed.equals("")) {
> +                        counter = counter.getNextSibling();
> +                        continue;
> +                    }
> +                } else if (counter.getNodeName().equals("p")) {
> +                    NodeList children = counter.getChildNodes();
> +                    while (children.getLength() > 0) {
> +                        cellItem.insertBefore(children.item(0),  
> counter);
> +                    }
> +                    cellItem.removeChild(counter);
> +                }
> +                break;
> +            }
> +            counter = cellItem.getLastChild();
> +            while (counter != null) {
> +                if (counter.getNodeType() == Node.TEXT_NODE) {
> +                    String trimmed =  
> ImporterUtils.rightTrim(counter.getTextContent());
> +                    counter.setTextContent(trimmed);
> +                    if (trimmed.equals("")) {
> +                        counter = counter.getPreviousSibling();
> +                        continue;
> +                    }
> +                }
> +                break;
> +            }
> +            // Fill all empty cells with an empty character  
> (space / tab).
> +            // This is because the current xwiki 2.0 syntax doesn't  
> handle empty cells correctly.
> +            if (cellItem.getTextContent().equals("")) {
> +                cellItem.setTextContent("-");
> +            }
> +        }
> +    }
> +}
>
> Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/ 
> xwiki/plugin/officeimporter/transformer/ 
> HtmlToXWikiTwoZeroTransformer.java
> ===================================================================
> --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/transformer/HtmlToXWikiTwoZeroTransformer.java   
> 2008-11-10 11:32:32 UTC (rev 14067)
> +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/transformer/HtmlToXWikiTwoZeroTransformer.java   
> 2008-11-10 12:09:35 UTC (rev 14068)
> @@ -23,6 +23,7 @@
> import com.xpn.xwiki.plugin.officeimporter.filter.HtmlLinkFilter;
> import com.xpn.xwiki.plugin.officeimporter.filter.HtmlListFilter;
> import com.xpn.xwiki.plugin.officeimporter.filter.HtmlStyleFilter;
> +import com.xpn.xwiki.plugin.officeimporter.filter.HtmlTableFilter;
> import com.xpn.xwiki.plugin.officeimporter.filter.ImageTagFilter;
> import com.xpn.xwiki.plugin.officeimporter.filter.TagRemoveFilter;
> import com.xpn.xwiki.plugin.officeimporter.utils.ImporterContext;
> @@ -50,6 +51,7 @@
>         filterList.add(new ImageTagFilter());
>         filterList.add(new HtmlLinkFilter());
>         filterList.add(new HtmlListFilter());
> +        filterList.add(new HtmlTableFilter());
>     }
>
>     /**
>
> Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/ 
> xwiki/plugin/officeimporter/transformer/ 
> HtmlToXWikiXhtmlTransformer.java
> ===================================================================
> --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/transformer/HtmlToXWikiXhtmlTransformer.java     
> 2008-11-10 11:32:32 UTC (rev 14067)
> +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/transformer/HtmlToXWikiXhtmlTransformer.java     
> 2008-11-10 12:09:35 UTC (rev 14068)
> @@ -13,6 +13,7 @@
> import com.xpn.xwiki.plugin.officeimporter.filter.HtmlLinkFilter;
> import com.xpn.xwiki.plugin.officeimporter.filter.HtmlListFilter;
> import com.xpn.xwiki.plugin.officeimporter.filter.HtmlStyleFilter;
> +import com.xpn.xwiki.plugin.officeimporter.filter.HtmlTableFilter;
> import com.xpn.xwiki.plugin.officeimporter.filter.ImageTagFilter;
> import com.xpn.xwiki.plugin.officeimporter.filter.TagRemoveFilter;
> import com.xpn.xwiki.plugin.officeimporter.utils.ImporterContext;
> @@ -41,6 +42,7 @@
>         filterList.add(new ImageTagFilter());
>         filterList.add(new HtmlLinkFilter());
>         filterList.add(new HtmlListFilter());
> +        filterList.add(new HtmlTableFilter());
>     }
>
>     /**
>
> Added: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/ 
> xwiki/plugin/officeimporter/utils/ImporterUtils.java
> ===================================================================
> --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/utils/ 
> ImporterUtils.java                            (rev 0)
> +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ 
> plugin/officeimporter/utils/ImporterUtils.java        2008-11-10 12:09:35  
> UTC (rev 14068)
> @@ -0,0 +1,36 @@
> +package com.xpn.xwiki.plugin.officeimporter.utils;
> +
> +/**
> + * Utility methods common for office importer.
> + *
> + * @version $Id$
> + * @since 1.7M1
> + */
> +public abstract class ImporterUtils
> +{
> +    public static String leftTrim(String s)
> +    {
> +        String content = s.trim();
> +        if (content.equals("")) {
> +            return "";
> +        } else {
> +            int index = s.indexOf(content);
> +            if(index == 0) {
> +                return s;
> +            } else {
> +                return s.substring(index);
> +            }
> +        }
> +    }
> +
> +    public static String rightTrim(String s)
> +    {
> +        String content = s.trim();
> +        if (content.equals("")) {
> +            return "";
> +        } else {
> +            int index = s.indexOf(content);
> +            return s.substring(0, index + content.length());
> +        }
> +    }
> +}
_______________________________________________
devs mailing list
devs@xwiki.org
http://lists.xwiki.org/mailman/listinfo/devs

Reply via email to