martinc     2004/03/11 23:34:46

  Modified:    fileupload/src/java/org/apache/commons/fileupload
                        DefaultFileItem.java
               fileupload/src/test/org/apache/commons/fileupload
                        DefaultFileItemTest.java TestAll.java
  Added:       fileupload/src/java/org/apache/commons/fileupload
                        ParameterParser.java
               fileupload/src/test/org/apache/commons/fileupload
                        ParameterParserTest.java
  Log:
  Add support for character sets specified for individual parts.
  
  PR: 20813
  Submitted by: Oleg Kalnichevski
  
  Revision  Changes    Path
  1.25      +46 -7     
jakarta-commons/fileupload/src/java/org/apache/commons/fileupload/DefaultFileItem.java
  
  Index: DefaultFileItem.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons/fileupload/src/java/org/apache/commons/fileupload/DefaultFileItem.java,v
  retrieving revision 1.24
  retrieving revision 1.25
  diff -u -r1.24 -r1.25
  --- DefaultFileItem.java      25 Feb 2004 21:07:12 -0000      1.24
  +++ DefaultFileItem.java      12 Mar 2004 07:34:45 -0000      1.25
  @@ -25,6 +25,7 @@
   import java.io.InputStream;
   import java.io.OutputStream;
   import java.io.UnsupportedEncodingException;
  +import java.util.Map;
   
   
   /**
  @@ -69,6 +70,15 @@
   
   
       /**
  +     * Default content charset to be used when no explicit charset
  +     * parameter is provided by the sender. Media subtypes of the 
  +     * "text" type are defined to have a default charset value of 
  +     * "ISO-8859-1" when received via HTTP.
  +     */
  +    public static final String DEFAULT_CHARSET = "ISO-8859-1";
  +
  +
  +    /**
        * The content type passed by the browser, or <code>null</code> if
        * not defined.
        */
  @@ -172,10 +182,10 @@
   
   
       /**
  -     * Returns the content type passed by the browser or <code>null</code> if
  +     * Returns the content type passed by the agent or <code>null</code> if
        * not defined.
        *
  -     * @return The content type passed by the browser or <code>null</code> if
  +     * @return The content type passed by the agent or <code>null</code> if
        *         not defined.
        */
       public String getContentType()
  @@ -185,6 +195,23 @@
   
   
       /**
  +     * Returns the content charset passed by the agent or <code>null</code> if
  +     * not defined.
  +     * 
  +     * @return The content charset passed by the agent or <code>null</code> if
  +     *         not defined.
  +     */
  +    public String getCharSet()
  +    {
  +        ParameterParser parser = new ParameterParser();
  +        parser.setLowerCaseNames(true);
  +        // Parameter parser can handle null input
  +        Map params = parser.parse(getContentType(), ';');
  +        return (String)params.get("charset");
  +    }
  +
  +
  +    /**
        * Returns the original filename in the client's filesystem.
        *
        * @return The original filename in the client's filesystem.
  @@ -287,17 +314,17 @@
        * encoding.  This method uses [EMAIL PROTECTED] #get()} to retrieve the
        * contents of the file.
        *
  -     * @param encoding The character encoding to use.
  +     * @param charset The charset to use.
        *
        * @return The contents of the file, as a string.
        *
        * @exception UnsupportedEncodingException if the requested character
        *                                         encoding is not available.
        */
  -    public String getString(String encoding)
  +    public String getString(final String charset)
           throws UnsupportedEncodingException
       {
  -        return new String(get(), encoding);
  +        return new String(get(), charset);
       }
   
   
  @@ -309,8 +336,20 @@
        * @return The contents of the file, as a string.
        */
       public String getString()
  +    //@TODO: Consider making this method throw UnsupportedEncodingException 
       {
  -        return new String(get());
  +        byte[] rawdata = get();
  +        String charset = getCharSet();
  +        if (charset == null) {
  +            charset = DEFAULT_CHARSET;
  +        }
  +        try
  +        {
  +            return new String(rawdata, charset);
  +        }
  +        catch(UnsupportedEncodingException e) {
  +            return new String(rawdata);
  +        }
       }
   
   
  
  
  
  1.1                  
jakarta-commons/fileupload/src/java/org/apache/commons/fileupload/ParameterParser.java
  
  Index: ParameterParser.java
  ===================================================================
  /*
   * Copyright 2001-2004 The Apache Software Foundation
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  package org.apache.commons.fileupload;
  
  import java.util.HashMap;
  import java.util.Map;
  
  /**
   * A simple parser intended to parse sequences of name/value pairs.
   * Parameter values are exptected to be enclosed in quotes if they
   * contain unsafe characters, such as '=' characters or separators.
   * Parameter values are optional and can be omitted.
   *
   * <p>
   *  <code>param1 = value; param2 = "anything goes; really"; param3</code>
   * </p>
   *
   * @author <a href="mailto:[EMAIL PROTECTED]">Oleg Kalnichevski</a>
   */
  
  public class ParameterParser
  {
      /** String to be parsed */
      private char[] chars = null;
  
      /** Current position in the string */
      private int pos = 0;
  
      /** Maximum position in the string */
      private int len = 0;
  
      /** Start of a token */
      private int i1 = 0;
  
      /** End of a token */
      private int i2 = 0;
  
      /**
       * Whether names stored in the map should be converted
       * to lower case
       */
      private boolean lowerCaseNames = false;
  
      /**
       * Default ParameterParser constructor
       */
      public ParameterParser()
      {
          super();
      }
  
      /**
       * Are there any characters left to parse?
       *
       * @return <tt>true</tt> if there are unparsed characters,
       *         <tt>false</tt> otherwise.
       */
      private boolean hasChar()
      {
          return this.pos < this.len;
      }
  
      /**
       * A helper method to process the parsed token. This method removes
       * leading and trailing blanks as well as enclosing quotation marks,
       * when necessary.
       *
       * @param quoted <tt>true</tt> if quotation marks are expected,
       *               <tt>false</tt> otherwise.
       * @return the token
       */
      private String getToken(boolean quoted)
      {
          // Trim leading white spaces
          while ((i1 < i2) && (Character.isWhitespace(chars[i1])))
          {
              i1++;
          }
          // Trim trailing white spaces
          while ((i2 > i1) && (Character.isWhitespace(chars[i2 - 1])))
          {
              i2--;
          }
          // Strip away quotation marks if necessary
          if (quoted)
          {
              if (((i2 - i1) >= 2)
                  && (chars[i1] == '"')
                  && (chars[i2 - 1] == '"'))
              {
                  i1++;
                  i2--;
              }
          }
          String result = null;
          if (i2 > i1)
          {
              result = new String(chars, i1, i2 - i1);
          }
          return result;
      }
  
      /**
       * Tests if the given character is present in the array of characters
       *
       * @param ch the character to test for presense in the array of characters
       * @param charray the array of characters to test against
       *
       * @return <tt>true</tt> if the character is present in the array of
       *   characters, <tt>false</tt> otherwise.
       */
      private boolean isOneOf(char ch, final char[] charray)
      {
          boolean result = false;
          for (int i = 0; i < charray.length; i++)
          {
              if (ch == charray[i])
              {
                  result = true;
                  break;
              }
          }
          return result;
      }
  
      /**
       * Parses out a token until any of the given terminators
       * is encountered.
       *
       * @param terminators the array of terminating characters. Any of these
       * characters when encountered signify the end of the token
       *
       * @return the token
       */
      private String parseToken(final char[] terminators)
      {
          char ch;
          i1 = pos;
          i2 = pos;
          while (hasChar())
          {
              ch = chars[pos];
              if (isOneOf(ch, terminators))
              {
                  break;
              }
              i2++;
              pos++;
          }
          return getToken(false);
      }
  
      /**
       * Parses out a token until any of the given terminators
       * is encountered outside the quotation marks.
       *
       * @param terminators the array of terminating characters. Any of these
       * characters when encountered outside the quotation marks signify the end
       * of the token
       *
       * @return the token
       */
      private String parseQuotedToken(final char[] terminators)
      {
          char ch;
          i1 = pos;
          i2 = pos;
          boolean quoted = false;
          boolean charEscaped = false;
          while (hasChar())
          {
              ch = chars[pos];
              if (!quoted && isOneOf(ch, terminators))
              {
                  break;
              }
              if (!charEscaped && ch == '"')
              {
                  quoted = !quoted;
              }
              charEscaped = (!charEscaped && ch == '\\');
              i2++;
              pos++;
  
          }
          return getToken(true);
      }
  
      /**
      * Returns <tt>true</tt> if parameter names are to be
      * converted to lower case when name/value pairs are parsed
      *
      * @return <tt>true</tt> if parameter names are to be
      * converted to lower case when name/value pairs are parsed.
      * Otherwise returns <tt>false</tt>
      */
      public boolean isLowerCaseNames()
      {
          return this.lowerCaseNames;
      }
  
      /**
      * Sets the flag if parameter names are to be converted to
      * lower case when name/value pairs are parsed
      *
      * @param b <tt>true</tt> if parameter names are to be
      * converted to lower case when name/value pairs are parsed.
      * <tt>false</tt> otherwise.
      */
      public void setLowerCaseNames(boolean b)
      {
          this.lowerCaseNames = b;
      }
  
      /**
       * Extracts a map of name/value pairs from the given string.
       * Names are expected to be unique
       *
       * @param str the string that contains a sequence of name/value pairs
       * @param separator the name/value pairs separator
       *
       * @return a map of name/value pairs
       */
      public Map parse(final String str, char separator)
      {
          if (str == null)
          {
              return new HashMap();
          }
          return parse(str.toCharArray(), separator);
      }
  
      /**
       * Extracts a map of name/value pairs from the given array of
       * characters. Names are expected to be unique
       *
       * @param chars the array of characters that contains a sequence of
       * name/value pairs
       * @param separator the name/value pairs separator
       *
       * @return a map of name/value pairs
       */
      public Map parse(final char[] chars, char separator)
      {
          if (chars == null)
          {
              return new HashMap();
          }
          return parse(chars, 0, chars.length, separator);
      }
  
      /**
       * Extracts a map of name/value pairs from the given array of
       * characters. Names are expected to be unique
       *
       * @param chars the array of characters that contains a sequence of
       * name/value pairs
       * @param offset - the initial offset.
       * @param length - the length.
       * @param separator the name/value pairs separator
       *
       * @return a map of name/value pairs
       */
      public Map parse(
          final char[] chars,
          int offset,
          int length,
          char separator)
      {
  
          if (chars == null)
          {
              return new HashMap();
          }
          HashMap params = new HashMap();
          this.chars = chars;
          this.pos = offset;
          this.len = length;
  
          String paramName = null;
          String paramValue = null;
          while (hasChar())
          {
              paramName = parseToken(new char[] { '=', separator });
              paramValue = null;
              if (hasChar() && (chars[pos] == '='))
              {
                  pos++; // skip '='
                  paramValue = parseQuotedToken(new char[] { separator });
              }
              if (hasChar() && (chars[pos] == separator))
              {
                  pos++; // skip separator
              }
              if ((paramName != null) && (paramName.length() > 0))
              {
                  if (this.lowerCaseNames)
                  {
                      paramName = paramName.toLowerCase();
                  }
                  params.put(paramName, paramValue);
              }
          }
          return params;
      }
  }
  
  
  
  1.4       +144 -0    
jakarta-commons/fileupload/src/test/org/apache/commons/fileupload/DefaultFileItemTest.java
  
  Index: DefaultFileItemTest.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons/fileupload/src/test/org/apache/commons/fileupload/DefaultFileItemTest.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- DefaultFileItemTest.java  25 Feb 2004 21:07:12 -0000      1.3
  +++ DefaultFileItemTest.java  12 Mar 2004 07:34:46 -0000      1.4
  @@ -225,4 +225,148 @@
       {
           return new DefaultFileItemFactory(threshold, repository);
       }
  +
  +
  +    static final String CHARSET_ISO88591 = "ISO-8859-1";
  +    static final String CHARSET_ASCII = "US-ASCII";
  +    static final String CHARSET_UTF8 = "UTF-8";
  +    static final String CHARSET_KOI8_R = "KOI8_R";
  +    static final String CHARSET_WIN1251 = "Cp1251";
  +
  +    static final int SWISS_GERMAN_STUFF_UNICODE [] = 
  +    {
  +        0x47, 0x72, 0xFC, 0x65, 0x7A, 0x69, 0x5F, 0x7A, 0xE4, 0x6D, 0xE4
  +    };
  +    
  +    static final int SWISS_GERMAN_STUFF_ISO8859_1 [] = 
  +    {
  +        0x47, 0x72, 0xFC, 0x65, 0x7A, 0x69, 0x5F, 0x7A, 0xE4, 0x6D, 0xE4
  +    };
  +    
  +    static final int SWISS_GERMAN_STUFF_UTF8 [] = 
  +    {
  +        0x47, 0x72, 0xC3, 0xBC, 0x65, 0x7A, 0x69, 0x5F, 0x7A, 0xC3, 0xA4,
  +        0x6D, 0xC3, 0xA4
  +    };
  +
  +    static final int RUSSIAN_STUFF_UNICODE [] = 
  +    {
  +        0x412, 0x441, 0x435, 0x43C, 0x5F, 0x43F, 0x440, 0x438, 
  +        0x432, 0x435, 0x442 
  +    }; 
  +
  +    static final int RUSSIAN_STUFF_UTF8 [] = 
  +    {
  +        0xD0, 0x92, 0xD1, 0x81, 0xD0, 0xB5, 0xD0, 0xBC, 0x5F, 
  +        0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 
  +        0xB5, 0xD1, 0x82
  +    };
  +
  +    static final int RUSSIAN_STUFF_KOI8R [] = 
  +    {
  +        0xF7, 0xD3, 0xC5, 0xCD, 0x5F, 0xD0, 0xD2, 0xC9, 0xD7, 
  +        0xC5, 0xD4
  +    };
  +
  +    static final int RUSSIAN_STUFF_WIN1251 [] = 
  +    {
  +        0xC2, 0xF1, 0xE5, 0xEC, 0x5F, 0xEF, 0xF0, 0xE8, 0xE2, 
  +        0xE5, 0xF2
  +    };
  +
  +
  +    private static String constructString(int[] unicodeChars)
  +    {
  +        StringBuffer buffer = new StringBuffer();
  +        if (unicodeChars != null)
  +        {
  +            for (int i = 0; i < unicodeChars.length; i++)
  +            {
  +                buffer.append((char) unicodeChars[i]);
  +            }
  +        }
  +        return buffer.toString();
  +    }
  +
  +    /**
  +     * Test construction of content charset.
  +     */
  +    public void testContentCharSet() throws Exception
  +    {
  +        FileItemFactory factory = createFactory(null);
  +
  +        String teststr = constructString(SWISS_GERMAN_STUFF_UNICODE);
  +
  +        FileItem item =
  +            factory.createItem(
  +                "doesnotmatter",
  +                "text/plain; charset=" + CHARSET_ISO88591,
  +                true,
  +                null);
  +        OutputStream outstream = item.getOutputStream();
  +        for (int i = 0; i < SWISS_GERMAN_STUFF_ISO8859_1.length; i++)
  +        {
  +            outstream.write(SWISS_GERMAN_STUFF_ISO8859_1[i]);
  +        }
  +        outstream.close();
  +        assertEquals(teststr, teststr, item.getString());
  +
  +        item =
  +            factory.createItem(
  +                "doesnotmatter",
  +                "text/plain; charset=" + CHARSET_UTF8,
  +                true,
  +                null);
  +        outstream = item.getOutputStream();
  +        for (int i = 0; i < SWISS_GERMAN_STUFF_UTF8.length; i++)
  +        {
  +            outstream.write(SWISS_GERMAN_STUFF_UTF8[i]);
  +        }
  +        outstream.close();
  +        assertEquals(teststr, teststr, item.getString());
  +
  +        teststr = constructString(RUSSIAN_STUFF_UNICODE);
  +
  +        item =
  +            factory.createItem(
  +                "doesnotmatter",
  +                "text/plain; charset=" + CHARSET_KOI8_R,
  +                true,
  +                null);
  +        outstream = item.getOutputStream();
  +        for (int i = 0; i < RUSSIAN_STUFF_KOI8R.length; i++)
  +        {
  +            outstream.write(RUSSIAN_STUFF_KOI8R[i]);
  +        }
  +        outstream.close();
  +        assertEquals(teststr, teststr, item.getString());
  +
  +        item =
  +            factory.createItem(
  +                "doesnotmatter",
  +                "text/plain; charset=" + CHARSET_WIN1251,
  +                true,
  +                null);
  +        outstream = item.getOutputStream();
  +        for (int i = 0; i < RUSSIAN_STUFF_WIN1251.length; i++)
  +        {
  +            outstream.write(RUSSIAN_STUFF_WIN1251[i]);
  +        }
  +        outstream.close();
  +        assertEquals(teststr, teststr, item.getString());
  +
  +        item =
  +            factory.createItem(
  +                "doesnotmatter",
  +                "text/plain; charset=" + CHARSET_UTF8,
  +                true,
  +                null);
  +        outstream = item.getOutputStream();
  +        for (int i = 0; i < RUSSIAN_STUFF_UTF8.length; i++)
  +        {
  +            outstream.write(RUSSIAN_STUFF_UTF8[i]);
  +        }
  +        outstream.close();
  +        assertEquals(teststr, teststr, item.getString());
  +    }
   }
  
  
  
  1.5       +1 -0      
jakarta-commons/fileupload/src/test/org/apache/commons/fileupload/TestAll.java
  
  Index: TestAll.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-commons/fileupload/src/test/org/apache/commons/fileupload/TestAll.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- TestAll.java      25 Feb 2004 21:07:12 -0000      1.4
  +++ TestAll.java      12 Mar 2004 07:34:46 -0000      1.5
  @@ -30,6 +30,7 @@
   
       public static Test suite() {
           TestSuite suite = new TestSuite();
  +        suite.addTest(new TestSuite(ParameterParserTest.class));
           suite.addTest(new TestSuite(MultipartStreamTest.class));
           suite.addTest(new TestSuite(FileUploadTest.class));
           suite.addTest(new TestSuite(DeferredFileOutputStreamTest.class));
  
  
  
  1.1                  
jakarta-commons/fileupload/src/test/org/apache/commons/fileupload/ParameterParserTest.java
  
  Index: ParameterParserTest.java
  ===================================================================
  /*
   * Copyright 2001-2004 The Apache Software Foundation
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  package org.apache.commons.fileupload;
  
  import junit.framework.Test;
  import junit.framework.TestCase;
  import junit.framework.TestSuite;
  import java.util.Map;
  
  /**
   * Unit tests for [EMAIL PROTECTED] ParameterParser}.
   *
   * @author <a href="mailto:[EMAIL PROTECTED]">Oleg Kalnichevski</a>
   */
  public class ParameterParserTest extends TestCase
  {
  
      // ------------------------------------------------------------ Constructor
      public ParameterParserTest(String testName)
      {
          super(testName);
      }
  
      // ------------------------------------------------------------------- Main
      public static void main(String args[])
      {
          String[] testCaseName = { ParameterParserTest.class.getName()};
          junit.textui.TestRunner.main(testCaseName);
      }
  
      // ------------------------------------------------------- TestCase Methods
  
      public static Test suite()
      {
          return new TestSuite(ParameterParserTest.class);
      }
  
      public void testParsing()
      {
          String s =
              "test; test1 =  stuff   ; test2 =  \"stuff; stuff\"; test3=\"stuff";
          ParameterParser parser = new ParameterParser();
          Map params = parser.parse(s, ';');
          assertEquals(null, params.get("test"));
          assertEquals("stuff", params.get("test1"));
          assertEquals("stuff; stuff", params.get("test2"));
          assertEquals("\"stuff", params.get("test3"));
  
          s = "  test  , test1=stuff   ,  , test2=, test3, ";
          params = parser.parse(s, ',');
          assertEquals(null, params.get("test"));
          assertEquals("stuff", params.get("test1"));
          assertEquals(null, params.get("test2"));
          assertEquals(null, params.get("test3"));
  
          s = "  test";
          params = parser.parse(s, ';');
          assertEquals(null, params.get("test"));
  
          s = "  ";
          params = parser.parse(s, ';');
          assertEquals(0, params.size());
  
          s = " = stuff ";
          params = parser.parse(s, ';');
          assertEquals(0, params.size());
      }
  
      public void testContentTypeParsing()
      {
          String s = "text/plain; Charset=UTF-8";
          ParameterParser parser = new ParameterParser();
          parser.setLowerCaseNames(true);
          Map params = parser.parse(s, ';');
          assertEquals("UTF-8", params.get("charset"));
      }
  
      public void testParsingEscapedChars()
      {
          String s = "param = \"stuff\\\"; more stuff\"";
          ParameterParser parser = new ParameterParser();
          Map params = parser.parse(s, ';');
          assertEquals(1, params.size());
          assertEquals("stuff\\\"; more stuff", params.get("param"));
  
          s = "param = \"stuff\\\\\"; anotherparam";
          params = parser.parse(s, ';');
          assertEquals(2, params.size());
          assertEquals("stuff\\\\", params.get("param"));
          assertNull(params.get("anotherparam"));
      }
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to