martinc 2004/03/11 23:34:46 Modified: fileupload/src/java/org/apache/commons/fileupload DefaultFileItem.java fileupload/src/test/org/apache/commons/fileupload DefaultFileItemTest.java TestAll.java Added: fileupload/src/java/org/apache/commons/fileupload ParameterParser.java fileupload/src/test/org/apache/commons/fileupload ParameterParserTest.java Log: Add support for character sets specified for individual parts. PR: 20813 Submitted by: Oleg Kalnichevski Revision Changes Path 1.25 +46 -7 jakarta-commons/fileupload/src/java/org/apache/commons/fileupload/DefaultFileItem.java Index: DefaultFileItem.java =================================================================== RCS file: /home/cvs/jakarta-commons/fileupload/src/java/org/apache/commons/fileupload/DefaultFileItem.java,v retrieving revision 1.24 retrieving revision 1.25 diff -u -r1.24 -r1.25 --- DefaultFileItem.java 25 Feb 2004 21:07:12 -0000 1.24 +++ DefaultFileItem.java 12 Mar 2004 07:34:45 -0000 1.25 @@ -25,6 +25,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.UnsupportedEncodingException; +import java.util.Map; /** @@ -69,6 +70,15 @@ /** + * Default content charset to be used when no explicit charset + * parameter is provided by the sender. Media subtypes of the + * "text" type are defined to have a default charset value of + * "ISO-8859-1" when received via HTTP. + */ + public static final String DEFAULT_CHARSET = "ISO-8859-1"; + + + /** * The content type passed by the browser, or <code>null</code> if * not defined. */ @@ -172,10 +182,10 @@ /** - * Returns the content type passed by the browser or <code>null</code> if + * Returns the content type passed by the agent or <code>null</code> if * not defined. * - * @return The content type passed by the browser or <code>null</code> if + * @return The content type passed by the agent or <code>null</code> if * not defined. */ public String getContentType() @@ -185,6 +195,23 @@ /** + * Returns the content charset passed by the agent or <code>null</code> if + * not defined. + * + * @return The content charset passed by the agent or <code>null</code> if + * not defined. + */ + public String getCharSet() + { + ParameterParser parser = new ParameterParser(); + parser.setLowerCaseNames(true); + // Parameter parser can handle null input + Map params = parser.parse(getContentType(), ';'); + return (String)params.get("charset"); + } + + + /** * Returns the original filename in the client's filesystem. * * @return The original filename in the client's filesystem. @@ -287,17 +314,17 @@ * encoding. This method uses [EMAIL PROTECTED] #get()} to retrieve the * contents of the file. * - * @param encoding The character encoding to use. + * @param charset The charset to use. * * @return The contents of the file, as a string. * * @exception UnsupportedEncodingException if the requested character * encoding is not available. */ - public String getString(String encoding) + public String getString(final String charset) throws UnsupportedEncodingException { - return new String(get(), encoding); + return new String(get(), charset); } @@ -309,8 +336,20 @@ * @return The contents of the file, as a string. */ public String getString() + //@TODO: Consider making this method throw UnsupportedEncodingException { - return new String(get()); + byte[] rawdata = get(); + String charset = getCharSet(); + if (charset == null) { + charset = DEFAULT_CHARSET; + } + try + { + return new String(rawdata, charset); + } + catch(UnsupportedEncodingException e) { + return new String(rawdata); + } } 1.1 jakarta-commons/fileupload/src/java/org/apache/commons/fileupload/ParameterParser.java Index: ParameterParser.java =================================================================== /* * Copyright 2001-2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.fileupload; import java.util.HashMap; import java.util.Map; /** * A simple parser intended to parse sequences of name/value pairs. * Parameter values are exptected to be enclosed in quotes if they * contain unsafe characters, such as '=' characters or separators. * Parameter values are optional and can be omitted. * * <p> * <code>param1 = value; param2 = "anything goes; really"; param3</code> * </p> * * @author <a href="mailto:[EMAIL PROTECTED]">Oleg Kalnichevski</a> */ public class ParameterParser { /** String to be parsed */ private char[] chars = null; /** Current position in the string */ private int pos = 0; /** Maximum position in the string */ private int len = 0; /** Start of a token */ private int i1 = 0; /** End of a token */ private int i2 = 0; /** * Whether names stored in the map should be converted * to lower case */ private boolean lowerCaseNames = false; /** * Default ParameterParser constructor */ public ParameterParser() { super(); } /** * Are there any characters left to parse? * * @return <tt>true</tt> if there are unparsed characters, * <tt>false</tt> otherwise. */ private boolean hasChar() { return this.pos < this.len; } /** * A helper method to process the parsed token. This method removes * leading and trailing blanks as well as enclosing quotation marks, * when necessary. * * @param quoted <tt>true</tt> if quotation marks are expected, * <tt>false</tt> otherwise. * @return the token */ private String getToken(boolean quoted) { // Trim leading white spaces while ((i1 < i2) && (Character.isWhitespace(chars[i1]))) { i1++; } // Trim trailing white spaces while ((i2 > i1) && (Character.isWhitespace(chars[i2 - 1]))) { i2--; } // Strip away quotation marks if necessary if (quoted) { if (((i2 - i1) >= 2) && (chars[i1] == '"') && (chars[i2 - 1] == '"')) { i1++; i2--; } } String result = null; if (i2 > i1) { result = new String(chars, i1, i2 - i1); } return result; } /** * Tests if the given character is present in the array of characters * * @param ch the character to test for presense in the array of characters * @param charray the array of characters to test against * * @return <tt>true</tt> if the character is present in the array of * characters, <tt>false</tt> otherwise. */ private boolean isOneOf(char ch, final char[] charray) { boolean result = false; for (int i = 0; i < charray.length; i++) { if (ch == charray[i]) { result = true; break; } } return result; } /** * Parses out a token until any of the given terminators * is encountered. * * @param terminators the array of terminating characters. Any of these * characters when encountered signify the end of the token * * @return the token */ private String parseToken(final char[] terminators) { char ch; i1 = pos; i2 = pos; while (hasChar()) { ch = chars[pos]; if (isOneOf(ch, terminators)) { break; } i2++; pos++; } return getToken(false); } /** * Parses out a token until any of the given terminators * is encountered outside the quotation marks. * * @param terminators the array of terminating characters. Any of these * characters when encountered outside the quotation marks signify the end * of the token * * @return the token */ private String parseQuotedToken(final char[] terminators) { char ch; i1 = pos; i2 = pos; boolean quoted = false; boolean charEscaped = false; while (hasChar()) { ch = chars[pos]; if (!quoted && isOneOf(ch, terminators)) { break; } if (!charEscaped && ch == '"') { quoted = !quoted; } charEscaped = (!charEscaped && ch == '\\'); i2++; pos++; } return getToken(true); } /** * Returns <tt>true</tt> if parameter names are to be * converted to lower case when name/value pairs are parsed * * @return <tt>true</tt> if parameter names are to be * converted to lower case when name/value pairs are parsed. * Otherwise returns <tt>false</tt> */ public boolean isLowerCaseNames() { return this.lowerCaseNames; } /** * Sets the flag if parameter names are to be converted to * lower case when name/value pairs are parsed * * @param b <tt>true</tt> if parameter names are to be * converted to lower case when name/value pairs are parsed. * <tt>false</tt> otherwise. */ public void setLowerCaseNames(boolean b) { this.lowerCaseNames = b; } /** * Extracts a map of name/value pairs from the given string. * Names are expected to be unique * * @param str the string that contains a sequence of name/value pairs * @param separator the name/value pairs separator * * @return a map of name/value pairs */ public Map parse(final String str, char separator) { if (str == null) { return new HashMap(); } return parse(str.toCharArray(), separator); } /** * Extracts a map of name/value pairs from the given array of * characters. Names are expected to be unique * * @param chars the array of characters that contains a sequence of * name/value pairs * @param separator the name/value pairs separator * * @return a map of name/value pairs */ public Map parse(final char[] chars, char separator) { if (chars == null) { return new HashMap(); } return parse(chars, 0, chars.length, separator); } /** * Extracts a map of name/value pairs from the given array of * characters. Names are expected to be unique * * @param chars the array of characters that contains a sequence of * name/value pairs * @param offset - the initial offset. * @param length - the length. * @param separator the name/value pairs separator * * @return a map of name/value pairs */ public Map parse( final char[] chars, int offset, int length, char separator) { if (chars == null) { return new HashMap(); } HashMap params = new HashMap(); this.chars = chars; this.pos = offset; this.len = length; String paramName = null; String paramValue = null; while (hasChar()) { paramName = parseToken(new char[] { '=', separator }); paramValue = null; if (hasChar() && (chars[pos] == '=')) { pos++; // skip '=' paramValue = parseQuotedToken(new char[] { separator }); } if (hasChar() && (chars[pos] == separator)) { pos++; // skip separator } if ((paramName != null) && (paramName.length() > 0)) { if (this.lowerCaseNames) { paramName = paramName.toLowerCase(); } params.put(paramName, paramValue); } } return params; } } 1.4 +144 -0 jakarta-commons/fileupload/src/test/org/apache/commons/fileupload/DefaultFileItemTest.java Index: DefaultFileItemTest.java =================================================================== RCS file: /home/cvs/jakarta-commons/fileupload/src/test/org/apache/commons/fileupload/DefaultFileItemTest.java,v retrieving revision 1.3 retrieving revision 1.4 diff -u -r1.3 -r1.4 --- DefaultFileItemTest.java 25 Feb 2004 21:07:12 -0000 1.3 +++ DefaultFileItemTest.java 12 Mar 2004 07:34:46 -0000 1.4 @@ -225,4 +225,148 @@ { return new DefaultFileItemFactory(threshold, repository); } + + + static final String CHARSET_ISO88591 = "ISO-8859-1"; + static final String CHARSET_ASCII = "US-ASCII"; + static final String CHARSET_UTF8 = "UTF-8"; + static final String CHARSET_KOI8_R = "KOI8_R"; + static final String CHARSET_WIN1251 = "Cp1251"; + + static final int SWISS_GERMAN_STUFF_UNICODE [] = + { + 0x47, 0x72, 0xFC, 0x65, 0x7A, 0x69, 0x5F, 0x7A, 0xE4, 0x6D, 0xE4 + }; + + static final int SWISS_GERMAN_STUFF_ISO8859_1 [] = + { + 0x47, 0x72, 0xFC, 0x65, 0x7A, 0x69, 0x5F, 0x7A, 0xE4, 0x6D, 0xE4 + }; + + static final int SWISS_GERMAN_STUFF_UTF8 [] = + { + 0x47, 0x72, 0xC3, 0xBC, 0x65, 0x7A, 0x69, 0x5F, 0x7A, 0xC3, 0xA4, + 0x6D, 0xC3, 0xA4 + }; + + static final int RUSSIAN_STUFF_UNICODE [] = + { + 0x412, 0x441, 0x435, 0x43C, 0x5F, 0x43F, 0x440, 0x438, + 0x432, 0x435, 0x442 + }; + + static final int RUSSIAN_STUFF_UTF8 [] = + { + 0xD0, 0x92, 0xD1, 0x81, 0xD0, 0xB5, 0xD0, 0xBC, 0x5F, + 0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, + 0xB5, 0xD1, 0x82 + }; + + static final int RUSSIAN_STUFF_KOI8R [] = + { + 0xF7, 0xD3, 0xC5, 0xCD, 0x5F, 0xD0, 0xD2, 0xC9, 0xD7, + 0xC5, 0xD4 + }; + + static final int RUSSIAN_STUFF_WIN1251 [] = + { + 0xC2, 0xF1, 0xE5, 0xEC, 0x5F, 0xEF, 0xF0, 0xE8, 0xE2, + 0xE5, 0xF2 + }; + + + private static String constructString(int[] unicodeChars) + { + StringBuffer buffer = new StringBuffer(); + if (unicodeChars != null) + { + for (int i = 0; i < unicodeChars.length; i++) + { + buffer.append((char) unicodeChars[i]); + } + } + return buffer.toString(); + } + + /** + * Test construction of content charset. + */ + public void testContentCharSet() throws Exception + { + FileItemFactory factory = createFactory(null); + + String teststr = constructString(SWISS_GERMAN_STUFF_UNICODE); + + FileItem item = + factory.createItem( + "doesnotmatter", + "text/plain; charset=" + CHARSET_ISO88591, + true, + null); + OutputStream outstream = item.getOutputStream(); + for (int i = 0; i < SWISS_GERMAN_STUFF_ISO8859_1.length; i++) + { + outstream.write(SWISS_GERMAN_STUFF_ISO8859_1[i]); + } + outstream.close(); + assertEquals(teststr, teststr, item.getString()); + + item = + factory.createItem( + "doesnotmatter", + "text/plain; charset=" + CHARSET_UTF8, + true, + null); + outstream = item.getOutputStream(); + for (int i = 0; i < SWISS_GERMAN_STUFF_UTF8.length; i++) + { + outstream.write(SWISS_GERMAN_STUFF_UTF8[i]); + } + outstream.close(); + assertEquals(teststr, teststr, item.getString()); + + teststr = constructString(RUSSIAN_STUFF_UNICODE); + + item = + factory.createItem( + "doesnotmatter", + "text/plain; charset=" + CHARSET_KOI8_R, + true, + null); + outstream = item.getOutputStream(); + for (int i = 0; i < RUSSIAN_STUFF_KOI8R.length; i++) + { + outstream.write(RUSSIAN_STUFF_KOI8R[i]); + } + outstream.close(); + assertEquals(teststr, teststr, item.getString()); + + item = + factory.createItem( + "doesnotmatter", + "text/plain; charset=" + CHARSET_WIN1251, + true, + null); + outstream = item.getOutputStream(); + for (int i = 0; i < RUSSIAN_STUFF_WIN1251.length; i++) + { + outstream.write(RUSSIAN_STUFF_WIN1251[i]); + } + outstream.close(); + assertEquals(teststr, teststr, item.getString()); + + item = + factory.createItem( + "doesnotmatter", + "text/plain; charset=" + CHARSET_UTF8, + true, + null); + outstream = item.getOutputStream(); + for (int i = 0; i < RUSSIAN_STUFF_UTF8.length; i++) + { + outstream.write(RUSSIAN_STUFF_UTF8[i]); + } + outstream.close(); + assertEquals(teststr, teststr, item.getString()); + } } 1.5 +1 -0 jakarta-commons/fileupload/src/test/org/apache/commons/fileupload/TestAll.java Index: TestAll.java =================================================================== RCS file: /home/cvs/jakarta-commons/fileupload/src/test/org/apache/commons/fileupload/TestAll.java,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- TestAll.java 25 Feb 2004 21:07:12 -0000 1.4 +++ TestAll.java 12 Mar 2004 07:34:46 -0000 1.5 @@ -30,6 +30,7 @@ public static Test suite() { TestSuite suite = new TestSuite(); + suite.addTest(new TestSuite(ParameterParserTest.class)); suite.addTest(new TestSuite(MultipartStreamTest.class)); suite.addTest(new TestSuite(FileUploadTest.class)); suite.addTest(new TestSuite(DeferredFileOutputStreamTest.class)); 1.1 jakarta-commons/fileupload/src/test/org/apache/commons/fileupload/ParameterParserTest.java Index: ParameterParserTest.java =================================================================== /* * Copyright 2001-2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.fileupload; import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; import java.util.Map; /** * Unit tests for [EMAIL PROTECTED] ParameterParser}. * * @author <a href="mailto:[EMAIL PROTECTED]">Oleg Kalnichevski</a> */ public class ParameterParserTest extends TestCase { // ------------------------------------------------------------ Constructor public ParameterParserTest(String testName) { super(testName); } // ------------------------------------------------------------------- Main public static void main(String args[]) { String[] testCaseName = { ParameterParserTest.class.getName()}; junit.textui.TestRunner.main(testCaseName); } // ------------------------------------------------------- TestCase Methods public static Test suite() { return new TestSuite(ParameterParserTest.class); } public void testParsing() { String s = "test; test1 = stuff ; test2 = \"stuff; stuff\"; test3=\"stuff"; ParameterParser parser = new ParameterParser(); Map params = parser.parse(s, ';'); assertEquals(null, params.get("test")); assertEquals("stuff", params.get("test1")); assertEquals("stuff; stuff", params.get("test2")); assertEquals("\"stuff", params.get("test3")); s = " test , test1=stuff , , test2=, test3, "; params = parser.parse(s, ','); assertEquals(null, params.get("test")); assertEquals("stuff", params.get("test1")); assertEquals(null, params.get("test2")); assertEquals(null, params.get("test3")); s = " test"; params = parser.parse(s, ';'); assertEquals(null, params.get("test")); s = " "; params = parser.parse(s, ';'); assertEquals(0, params.size()); s = " = stuff "; params = parser.parse(s, ';'); assertEquals(0, params.size()); } public void testContentTypeParsing() { String s = "text/plain; Charset=UTF-8"; ParameterParser parser = new ParameterParser(); parser.setLowerCaseNames(true); Map params = parser.parse(s, ';'); assertEquals("UTF-8", params.get("charset")); } public void testParsingEscapedChars() { String s = "param = \"stuff\\\"; more stuff\""; ParameterParser parser = new ParameterParser(); Map params = parser.parse(s, ';'); assertEquals(1, params.size()); assertEquals("stuff\\\"; more stuff", params.get("param")); s = "param = \"stuff\\\\\"; anotherparam"; params = parser.parse(s, ';'); assertEquals(2, params.size()); assertEquals("stuff\\\\", params.get("param")); assertNull(params.get("anotherparam")); } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]