Il giorno 23/nov/04, alle 07:31, Derek Hohls ha scritto:

Thanks; are there any example demo'ing how this
approach can be integrated into Cocoon... I'm afraid
this hint is less than obvious to me, especially as it
seem htmlArea is not producing tags at all...

The simple class attached will take in a String as produced by HTMLArea, parse it using Neko and return a DOM. You must of course have the Neko parser on your classpath.

        Ugo

/*
 * $Id: HTMLParser.java,v 1.2 2004/02/25 15:47:39 ugo Exp $
 */

import java.io.IOException;
import java.io.StringReader;

import org.apache.xerces.parsers.DOMParser;
import org.cyberneko.html.HTMLConfiguration;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
 * Description of HTMLParser.
 */
public class HTMLParser {
    private static final String xmlProlog = "<?xml";
    
    private static DOMParser parser;
    
    static {
        HTMLConfiguration parserconf = new HTMLConfiguration();
        
parserconf.setProperty("http://cyberneko.org/html/properties/names/elems";, 
"lower");
        
parserconf.setProperty("http://cyberneko.org/html/properties/names/attrs";, 
"lower");
        
parserconf.setProperty("http://cyberneko.org/html/properties/default-encoding";, 
"ISO-8859-1");
        parser = new DOMParser(parserconf);
    }
    
    private HTMLParser() {
    }
    
    public static synchronized Document parse(String input) throws 
SAXException, IOException {
        if (input.startsWith(xmlProlog)) {
            int pos = input.indexOf('>');
            if (pos > 0) {
                input = input.substring(pos + 1);
            }
            else {
                // TODO: log this
                return null;
            }
        }
        InputSource is = new InputSource(new StringReader(input));
        parser.parse(is);
        return parser.getDocument();
    }
}


--
Ugo Cei - http://beblogging.com/

Attachment: smime.p7s
Description: S/MIME cryptographic signature

Reply via email to