Author: burton
Date: Sun Feb  6 00:29:56 2005
New Revision: 151555

URL: http://svn.apache.org/viewcvs?view=rev&rev=151555
Log:
Fixed potential bug (but worried about regression) with accented text in XML

Modified:
    jakarta/commons/sandbox/feedparser/trunk/TODO
    
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserImpl.java
    
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/network/NetworkException.java
    
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/tools/XMLCleanser.java
    jakarta/commons/sandbox/feedparser/trunk/xdocs/navigation.xml

Modified: jakarta/commons/sandbox/feedparser/trunk/TODO
URL: 
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/TODO?view=diff&r1=151554&r2=151555
==============================================================================
--- jakarta/commons/sandbox/feedparser/trunk/TODO (original)
+++ jakarta/commons/sandbox/feedparser/trunk/TODO Sun Feb  6 00:29:56 2005
@@ -3,12 +3,15 @@
 
     - Get viewcvs linked to the app
 
-    - Nightly builds
+    - Nightly builds?
 
     - 0.5 public release
 
         http://jakarta.apache.org/commons/releases/index.html
 
+
+
+
     - maven?
 
 - (DONE) All FeedParser exceptions should include the URL of the feed if
@@ -26,6 +29,16 @@
 
 - (DONE) Rework the factory mechanism to support multiple FeedParsers... should
   be an interface.
+
+- How do I want to maintain a public changelog?
+
+- How do I want to maintain a public TODO?
+
+- Fix the feedparsing bug where we'll drop chars:
+
+    current-broken-drop-accents.atom
+
+    The bug is in getCorrectInputStream
 
 - Networking layer should support per-request UserAgent settings.  This should
   just be a request header I think

Modified: 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserImpl.java
URL: 
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserImpl.java?view=diff&r1=151554&r2=151555
==============================================================================
--- 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserImpl.java
 (original)
+++ 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/FeedParserImpl.java
 Sun Feb  6 00:29:56 2005
@@ -106,10 +106,10 @@
         if (encoding == null)
             encoding = "UTF-8";
 
-        if (encoding.startsWith("UTF")) {
+        if ( encoding.startsWith( "UTF" ) ) {
 
-            String result = XMLCleanser.cleanse(bytes, encoding);
-            bytes = FeedFilter.parse(result, encoding);
+            String result = XMLCleanser.cleanse( bytes, encoding );
+            bytes = FeedFilter.parse( result, encoding );
 
         } else {
 
@@ -120,7 +120,7 @@
         //remove prefix whitespace, intern HTML entities, etc.
 
         //build an input stream from the our bytes for parsing...
-        is = new ByteArrayInputStream(bytes);
+        is = new ByteArrayInputStream( bytes );
 
         return is;
 

Modified: 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/network/NetworkException.java
URL: 
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/network/NetworkException.java?view=diff&r1=151554&r2=151555
==============================================================================
--- 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/network/NetworkException.java
 (original)
+++ 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/network/NetworkException.java
 Sun Feb  6 00:29:56 2005
@@ -22,7 +22,7 @@
 /**
  * 
  * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a>
- * @version $Id: NetworkException.java,v 1.1 2005/01/25 07:55:19 burton Exp $
+ * @version $Id$
  */
 public class NetworkException extends IOException {
 
@@ -109,6 +109,17 @@
 
     public int getResponseCode() {
 
+        //FIXME: 
+        //        java.lang.NumberFormatException: For input string: "fie"
+        //         at 
java.lang.NumberFormatException.forInputString(NumberFormatException.java:48)
+        //         at java.lang.Integer.parseInt(Integer.java:468)
+        //         at java.lang.Integer.parseInt(Integer.java:518)
+        //         at 
org.peerfear.newsmonster.network.NetworkException.getResponseCode(NetworkException.java:142)
+        //         at ksa.robot.FeedTask._doTaskLogFailure(FeedTask.java:264)
+        //         at ksa.robot.FeedTask.run(FeedTask.java:202)
+        //         at ksa.robot.TaskThread.doProcessTask(TaskThread.java:298)
+        //         at ksa.robot.TaskThread.run(TaskThread.java:111)
+        
         if ( _urlConnection == null ) {
             return -1;
         } 

Modified: 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/tools/XMLCleanser.java
URL: 
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/tools/XMLCleanser.java?view=diff&r1=151554&r2=151555
==============================================================================
--- 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/tools/XMLCleanser.java
 (original)
+++ 
jakarta/commons/sandbox/feedparser/trunk/src/java/org/apache/commons/feedparser/tools/XMLCleanser.java
 Sun Feb  6 00:29:56 2005
@@ -20,10 +20,10 @@
  * Class that can cleanse a string so that nothing can be present to break an
  * XML parser.  This is a VERY non-portable class as it is meant to work just
  * with Xalan/Xerces and may remove more text and replace things that are
- * non-XML centric.  
+ * non-XML centric.
  *
  * @author <a href="mailto:[EMAIL PROTECTED]">Kevin A. Burton</a>
- * @version $Id: XMLCleanser.java,v 1.2 2004/09/03 19:46:47 burton Exp $
+ * @version $Id$
  */
 public class XMLCleanser {
 
@@ -125,26 +125,43 @@
     }
 
     /*
-     * This is a utility function for determining whether a specified 
-     * character is a character according to production 2 of the 
-     * XML 1.0 specification.
+     * This is a utility function for determining whether a specified character
+     * is a character according to production 2 of the XML 1.0 specification.
      *
      * @param c <code>char</code> to check for XML compliance.
-     * @return <code>boolean</code> - true if it's a character, 
-     *                                false otherwise.
+
+     * @return <code>boolean</code> - true if it's a character, false 
otherwise.
      */
-    public static boolean isXMLCharacter(char c) {
+    public static boolean isXMLCharacter( char c ) {
 
+        // A parsed entity contains text, a sequence of characters, which may
+        // represent markup or character data. A character is an atomic unit of
+        // text as specified by ISO/IEC 10646 [ISO/IEC 10646]. Legal characters
+        // are tab, carriage return, line feed, and the legal graphic 
characters
+        // of Unicode and ISO/IEC 10646. The use of "compatibility characters",
+        // as defined in section 6.8 of [Unicode], is discouraged.
+
+        // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
+        // [#x10000-#x10FFFF] /* any Unicode character, excluding the surrogate
+        // blocks, FFFE, and FFFF. */
+        
         if (c == '\n') return true;
         if (c == '\r') return true;
         if (c == '\t') return true;
+
+        //NOTE: this was BROKEN!  The range between 0x80 and 0xFF is valid XML
+        //and would end up dropping latin characters in UTF-8.  Why did I want
+        //to return false here again?
         
-        if (c < 0x20) return false;  if (c < 0x80) return true;
-        if (c < 0xFF) return false; if (c <= 0xD7FF) return true;
+        //if (c < 0x20) return false;  if (c < 0x80) return true;
+        //if (c < 0xFF) return false; if (c <= 0xD7FF) return true;
+
+        if (c < 0x20) return false;  if (c <= 0xD7FF) return true;
         if (c < 0xE000) return false;  if (c <= 0xFFFD) return true;
         if (c < 0x10000) return false;  if (c <= 0x10FFFF) return true;
         
         return false;
+
     }
 
 }

Modified: jakarta/commons/sandbox/feedparser/trunk/xdocs/navigation.xml
URL: 
http://svn.apache.org/viewcvs/jakarta/commons/sandbox/feedparser/trunk/xdocs/navigation.xml?view=diff&r1=151554&r2=151555
==============================================================================
--- jakarta/commons/sandbox/feedparser/trunk/xdocs/navigation.xml (original)
+++ jakarta/commons/sandbox/feedparser/trunk/xdocs/navigation.xml Sun Feb  6 
00:29:56 2005
@@ -19,6 +19,10 @@
 
             <item name="Wiki"                          
                   href="http://wiki.apache.org/jakarta-commons/FeedParser"; />
+
+            <item name="ViewCVS"
+                  
href="http://svn.apache.org/viewcvs.cgi/jakarta/commons/sandbox/feedparser/trunk";
 />
+
         </menu>
         &common-menus;
     </body>



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to