Author: rwesten
Date: Tue May 15 07:05:23 2012
New Revision: 1338567

URL: http://svn.apache.org/viewvc?rev=1338567&view=rev
Log:
STANBOL-583: changed implementation so that the XML escaped text is directly 
streamed to the HTTP request. While this changes are a clear improvement they 
have not solved the remaining issues (failing unit test of the CELI NER engine

Modified:
    
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java

Modified: 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
URL: 
http://svn.apache.org/viewvc/incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java?rev=1338567&r1=1338566&r2=1338567&view=diff
==============================================================================
--- 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
 (original)
+++ 
incubator/stanbol/branches/celi-enhancement-engines/engines/celi/src/main/java/org/apache/stanbol/enhancer/engines/celi/ner/impl/NERserviceClientHTTP.java
 Tue May 15 07:05:23 2012
@@ -1,11 +1,15 @@
 package org.apache.stanbol.enhancer.engines.celi.ner.impl;
 
+import java.io.BufferedWriter;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStreamWriter;
+import java.io.Writer;
 import java.net.HttpURLConnection;
 import java.net.URL;
+import java.nio.charset.Charset;
+import java.util.Collections;
 import java.util.List;
 import java.util.Vector;
 
@@ -23,11 +27,30 @@ import org.slf4j.LoggerFactory;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
 
-
 public class NERserviceClientHTTP {
-
-       private URL serviceEP;
-       private String licenseKey;
+    /**
+     * The UTF-8 {@link Charset}
+     */
+    private static final Charset UTF8 = Charset.forName("UTF-8");
+    /**
+     * The content type "text/xml; charset={@link #UTF8}"
+     */
+       private static final String     CONTENT_TYPE = "text/xml; 
charset="+UTF8.name();
+       /**
+        * The XML version, encoding; SOAP envelope, heder and starting element 
of the body;
+        * processTextRequest and text starting element.
+        */
+    private static final String REQUEST_PREFIX = "<?xml version=\"1.0\" 
encoding=\""+UTF8.name()+"\"?>" +
+               "<soapenv:Envelope 
xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\"; " +
+               
"xmlns:v0u0=\"http://linguagrid.org/ns/namedentityrecognition/v0u0\";><soapenv:Header/>"
 +
+               "<soapenv:Body><v0u0:processTextRequest><v0u0:text>";
+    /**
+     * closes the text, processTextRequest, SOAP body and envelope
+     */
+    private static final String REQUEST_SUFFIX = 
"</v0u0:text></v0u0:processTextRequest></soapenv:Body></soapenv:Envelope>";
+       
+       private final URL serviceEP;
+       private final String licenseKey;
        
        private final Logger log = LoggerFactory.getLogger(getClass());
                
@@ -35,8 +58,50 @@ public class NERserviceClientHTTP {
                this.serviceEP=serviceUrl;
                this.licenseKey=licenseKey;
        }
-       
-       public InputStream doPostRequest(URL url, String body) throws 
IOException {
+       /**
+        * creates a POST request to the {@link #serviceEP} by using the
+        * {@link #licenseKey} so that one can write the request data to the
+        * returned {@link HttpURLConnection#getOutputStream()}
+        * @param hasBody
+        * @return
+        * @throws IOException
+        */
+       private HttpURLConnection createPostRequest() throws IOException {
+               HttpURLConnection urlConn = (HttpURLConnection) 
serviceEP.openConnection();
+               urlConn.setRequestMethod("POST");
+               urlConn.setDoInput(true);
+               urlConn.setDoOutput(true);
+               urlConn.setUseCaches(false);
+               if(CONTENT_TYPE != null){
+                       urlConn.setRequestProperty("Content-Type", 
CONTENT_TYPE);
+               }
+               if(this.licenseKey!=null){
+                       String encoded = 
Base64.encode(this.licenseKey.getBytes(UTF8));
+                       urlConn.setRequestProperty("Authorization", "Basic 
"+encoded);
+               }
+               return urlConn;
+       }
+       /**
+        * performs the request
+        * @param urlConn
+        * @return
+        * @throws IOException
+        */
+       private InputStream doRequest(HttpURLConnection urlConn) throws 
IOException {
+               //close connection
+               urlConn.disconnect();
+               
+               // get response data
+               return urlConn.getInputStream();
+               
+       }
+       /**
+        * use {@link #createPostRequest()} and {@link 
#doRequest(HttpURLConnection)
+        * to avoid creating in-memory copies of the parsed text with
+        * StringEscapeUtils#escapeXml(String).
+        */
+       @Deprecated
+       private InputStream doPostRequest(URL url, String body) throws 
IOException {
                
                HttpURLConnection urlConn = (HttpURLConnection) 
url.openConnection();
                urlConn.setRequestMethod("POST");
@@ -69,25 +134,32 @@ public class NERserviceClientHTTP {
                return urlConn.getInputStream();
        }
 
-
        public List<NamedEntity> extractEntities(String text) {
-
+           if(text == null || text.isEmpty()){
+               //no text -> no extractions
+               return Collections.emptyList();
+           }
                List<NamedEntity> extractedNE = new Vector<NamedEntity>();
 
                try {
-                       String txt = StringEscapeUtils.escapeXml(text);
-                       String xmldata = "<?xml version=\"1.0\" 
encoding=\"UTF-8\"?><soapenv:Envelope 
xmlns:soapenv=\"http://schemas.xmlsoap.org/soap/envelope/\"; 
xmlns:v0u0=\"http://linguagrid.org/ns/namedentityrecognition/v0u0\";><soapenv:Header/><soapenv:Body><v0u0:processTextRequest><v0u0:text>"
-                                       + txt + 
"</v0u0:text></v0u0:processTextRequest></soapenv:Body></soapenv:Envelope>";
-                       
-                       InputStream resultStream = doPostRequest(serviceEP, 
xmldata);
+                   //create the POST request
+                       HttpURLConnection con = createPostRequest();
+                       //write content
+                       BufferedWriter writer = new BufferedWriter(new 
OutputStreamWriter(con.getOutputStream(),UTF8));
+                       writer.write(REQUEST_PREFIX);
+                       StringEscapeUtils.escapeXml(writer, text);
+                       writer.write(REQUEST_SUFFIX);
+                       writer.close();
+                       //now perform the request
+                       InputStream stream = doRequest(con);
 
-                       // Create SoapMessage
+                       // Create SoapMessage and parse the results
                        MessageFactory msgFactory = 
MessageFactory.newInstance();
                        SOAPMessage message = msgFactory.createMessage();
                        SOAPPart soapPart = message.getSOAPPart();
 
                        // Load the SOAP text into a stream source
-                       StreamSource source = new StreamSource(resultStream);
+                       StreamSource source = new StreamSource(stream);
 
                        // Set contents of message
                        soapPart.setContent(source);
@@ -117,4 +189,4 @@ public class NERserviceClientHTTP {
 
                return extractedNE;
        }
-}
+}
\ No newline at end of file


Reply via email to