Hi Asiri, For trimming please use:
* Commons Lang: http://commons.apache.org/lang/api/org/apache/commons/lang/StringUtils.html#stripStart(java.lang.String,%20java.lang.String) * If you want the exact list of whitespaces for HTML, see http://www.w3.org/TR/html4/struct/text.html#h-9.1 * You can also use \s in Regex It really depends what chars you want removed but your implementation isn't correct I'm sure. Thanks -Vincent On Nov 10, 2008, at 1:09 PM, asiri (SVN) wrote: > Author: asiri > Date: 2008-11-10 13:09:35 +0100 (Mon, 10 Nov 2008) > New Revision: 14068 > > Added: > sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/filter/HtmlTableFilter.java > sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/utils/ImporterUtils.java > Modified: > sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/filter/HtmlListFilter.java > sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/transformer/HtmlToXWikiTwoZeroTransformer.java > sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/transformer/HtmlToXWikiXhtmlTransformer.java > Log: > XAOFFICE-1 : Develop the initial feature set for office-importer > plugin. > > * Completing list support and table support, need to test and make > few more adjustments. > > Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/ > xwiki/plugin/officeimporter/filter/HtmlListFilter.java > =================================================================== > --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/filter/HtmlListFilter.java 2008-11-10 11:32:32 > UTC (rev 14067) > +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/filter/HtmlListFilter.java 2008-11-10 12:09:35 > UTC (rev 14068) > @@ -5,11 +5,15 @@ > import org.w3c.dom.NodeList; > > import com.xpn.xwiki.plugin.officeimporter.utils.ImporterContext; > +import com.xpn.xwiki.plugin.officeimporter.utils.ImporterUtils; > > /** > - * This is a temporary filter until we have a clear solution to the > issue at > - * http://jira.xwiki.org/jira/browse/XWIKI-2812. The problem occurs > when the first child of a > - * [EMAIL PROTECTED] <li>} tag is a [EMAIL PROTECTED] <p>} tag. > + * Presently xwiki rendering module doesn't support complex list > items. Because of this reason this > + * temporary filter is used to rip off any complex formatting > elements present in html lists. The > + * JIRA issue is located at http://jira.xwiki.org/jira/browse/XWIKI-2812 > . > + * > + * @version $Id$ > + * @since 1.7M1 > */ > public class HtmlListFilter implements HtmlFilter > { > @@ -21,13 +25,23 @@ > NodeList listItems = document.getElementsByTagName("li"); > for (int i = 0; i < listItems.getLength(); i++) { > Node listItem = listItems.item(i); > - Node firstChild = listItem.getFirstChild(); > - if (firstChild != null && > firstChild.getNodeName().equals("p")) { > - NodeList grandChildren = firstChild.getChildNodes(); > - while (grandChildren.getLength() > 0) { > - listItem.insertBefore(grandChildren.item(0), > firstChild); > + Node counter = listItem.getFirstChild(); > + while (counter != null) { > + if (counter.getNodeType() == Node.TEXT_NODE) { > + String trimmed = > ImporterUtils.leftTrim(counter.getTextContent()); > + counter.setTextContent(trimmed); > + if (trimmed.equals("")) { > + counter = counter.getNextSibling(); > + continue; > + } > + } else if (counter.getNodeName().equals("p")) { > + NodeList children = counter.getChildNodes(); > + while (children.getLength() > 0) { > + listItem.insertBefore(children.item(0), > counter); > + } > + listItem.removeChild(counter); > } > - listItem.removeChild(firstChild); > + break; > } > } > } > > Added: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/ > xwiki/plugin/officeimporter/filter/HtmlTableFilter.java > =================================================================== > --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/filter/ > HtmlTableFilter.java (rev 0) > +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/filter/HtmlTableFilter.java 2008-11-10 > 12:09:35 UTC (rev 14068) > @@ -0,0 +1,65 @@ > +package com.xpn.xwiki.plugin.officeimporter.filter; > + > +import org.w3c.dom.Document; > +import org.w3c.dom.Node; > +import org.w3c.dom.NodeList; > + > +import com.xpn.xwiki.plugin.officeimporter.utils.ImporterContext; > +import com.xpn.xwiki.plugin.officeimporter.utils.ImporterUtils; > + > +/** > + * Presently xwiki rendering module doesn't support complex table > cell items. This filter is used to > + * rip-off or modify html tables so that they can be rendered > properly. The corresponding JIRA issue > + * is located at http://jira.xwiki.org/jira/browse/XWIKI-2804. > + * > + * @version $Id$ > + * @since 1.7M1 > + */ > +public class HtmlTableFilter implements HtmlFilter > +{ > + /** > + * [EMAIL PROTECTED] > + */ > + public void filter(Document document, ImporterContext context) > + { > + NodeList cellItems = document.getElementsByTagName("td"); > + for (int i = 0; i < cellItems.getLength(); i++) { > + Node cellItem = cellItems.item(i); > + Node counter = cellItem.getFirstChild(); > + while (counter != null) { > + if (counter.getNodeType() == Node.TEXT_NODE) { > + String trimmed = > ImporterUtils.leftTrim(counter.getTextContent()); > + counter.setTextContent(trimmed); > + if (trimmed.equals("")) { > + counter = counter.getNextSibling(); > + continue; > + } > + } else if (counter.getNodeName().equals("p")) { > + NodeList children = counter.getChildNodes(); > + while (children.getLength() > 0) { > + cellItem.insertBefore(children.item(0), > counter); > + } > + cellItem.removeChild(counter); > + } > + break; > + } > + counter = cellItem.getLastChild(); > + while (counter != null) { > + if (counter.getNodeType() == Node.TEXT_NODE) { > + String trimmed = > ImporterUtils.rightTrim(counter.getTextContent()); > + counter.setTextContent(trimmed); > + if (trimmed.equals("")) { > + counter = counter.getPreviousSibling(); > + continue; > + } > + } > + break; > + } > + // Fill all empty cells with an empty character > (space / tab). > + // This is because the current xwiki 2.0 syntax doesn't > handle empty cells correctly. > + if (cellItem.getTextContent().equals("")) { > + cellItem.setTextContent("-"); > + } > + } > + } > +} > > Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/ > xwiki/plugin/officeimporter/transformer/ > HtmlToXWikiTwoZeroTransformer.java > =================================================================== > --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/transformer/HtmlToXWikiTwoZeroTransformer.java > 2008-11-10 11:32:32 UTC (rev 14067) > +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/transformer/HtmlToXWikiTwoZeroTransformer.java > 2008-11-10 12:09:35 UTC (rev 14068) > @@ -23,6 +23,7 @@ > import com.xpn.xwiki.plugin.officeimporter.filter.HtmlLinkFilter; > import com.xpn.xwiki.plugin.officeimporter.filter.HtmlListFilter; > import com.xpn.xwiki.plugin.officeimporter.filter.HtmlStyleFilter; > +import com.xpn.xwiki.plugin.officeimporter.filter.HtmlTableFilter; > import com.xpn.xwiki.plugin.officeimporter.filter.ImageTagFilter; > import com.xpn.xwiki.plugin.officeimporter.filter.TagRemoveFilter; > import com.xpn.xwiki.plugin.officeimporter.utils.ImporterContext; > @@ -50,6 +51,7 @@ > filterList.add(new ImageTagFilter()); > filterList.add(new HtmlLinkFilter()); > filterList.add(new HtmlListFilter()); > + filterList.add(new HtmlTableFilter()); > } > > /** > > Modified: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/ > xwiki/plugin/officeimporter/transformer/ > HtmlToXWikiXhtmlTransformer.java > =================================================================== > --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/transformer/HtmlToXWikiXhtmlTransformer.java > 2008-11-10 11:32:32 UTC (rev 14067) > +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/transformer/HtmlToXWikiXhtmlTransformer.java > 2008-11-10 12:09:35 UTC (rev 14068) > @@ -13,6 +13,7 @@ > import com.xpn.xwiki.plugin.officeimporter.filter.HtmlLinkFilter; > import com.xpn.xwiki.plugin.officeimporter.filter.HtmlListFilter; > import com.xpn.xwiki.plugin.officeimporter.filter.HtmlStyleFilter; > +import com.xpn.xwiki.plugin.officeimporter.filter.HtmlTableFilter; > import com.xpn.xwiki.plugin.officeimporter.filter.ImageTagFilter; > import com.xpn.xwiki.plugin.officeimporter.filter.TagRemoveFilter; > import com.xpn.xwiki.plugin.officeimporter.utils.ImporterContext; > @@ -41,6 +42,7 @@ > filterList.add(new ImageTagFilter()); > filterList.add(new HtmlLinkFilter()); > filterList.add(new HtmlListFilter()); > + filterList.add(new HtmlTableFilter()); > } > > /** > > Added: sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/ > xwiki/plugin/officeimporter/utils/ImporterUtils.java > =================================================================== > --- sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/utils/ > ImporterUtils.java (rev 0) > +++ sandbox/xwiki-plugin-officeimporter/src/main/java/com/xpn/xwiki/ > plugin/officeimporter/utils/ImporterUtils.java 2008-11-10 12:09:35 > UTC (rev 14068) > @@ -0,0 +1,36 @@ > +package com.xpn.xwiki.plugin.officeimporter.utils; > + > +/** > + * Utility methods common for office importer. > + * > + * @version $Id$ > + * @since 1.7M1 > + */ > +public abstract class ImporterUtils > +{ > + public static String leftTrim(String s) > + { > + String content = s.trim(); > + if (content.equals("")) { > + return ""; > + } else { > + int index = s.indexOf(content); > + if(index == 0) { > + return s; > + } else { > + return s.substring(index); > + } > + } > + } > + > + public static String rightTrim(String s) > + { > + String content = s.trim(); > + if (content.equals("")) { > + return ""; > + } else { > + int index = s.indexOf(content); > + return s.substring(0, index + content.length()); > + } > + } > +} _______________________________________________ devs mailing list devs@xwiki.org http://lists.xwiki.org/mailman/listinfo/devs