Modified: websites/production/commons/content/proper/commons-csv/testapidocs/src-html/org/apache/commons/csv/CSVLexerTest.html ============================================================================== --- websites/production/commons/content/proper/commons-csv/testapidocs/src-html/org/apache/commons/csv/CSVLexerTest.html (original) +++ websites/production/commons/content/proper/commons-csv/testapidocs/src-html/org/apache/commons/csv/CSVLexerTest.html Thu Mar 21 15:20:15 2013 @@ -8,9 +8,9 @@ <FONT color="green">005</FONT> * The ASF licenses this file to You under the Apache License, Version 2.0<a name="line.5"></a> <FONT color="green">006</FONT> * (the "License"); you may not use this file except in compliance with<a name="line.6"></a> <FONT color="green">007</FONT> * the License. You may obtain a copy of the License at<a name="line.7"></a> -<FONT color="green">008</FONT> * <a name="line.8"></a> +<FONT color="green">008</FONT> *<a name="line.8"></a> <FONT color="green">009</FONT> * http://www.apache.org/licenses/LICENSE-2.0<a name="line.9"></a> -<FONT color="green">010</FONT> * <a name="line.10"></a> +<FONT color="green">010</FONT> *<a name="line.10"></a> <FONT color="green">011</FONT> * Unless required by applicable law or agreed to in writing, software<a name="line.11"></a> <FONT color="green">012</FONT> * distributed under the License is distributed on an "AS IS" BASIS,<a name="line.12"></a> <FONT color="green">013</FONT> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<a name="line.13"></a> @@ -20,154 +20,272 @@ <FONT color="green">017</FONT> <a name="line.17"></a> <FONT color="green">018</FONT> package org.apache.commons.csv;<a name="line.18"></a> <FONT color="green">019</FONT> <a name="line.19"></a> -<FONT color="green">020</FONT> import java.io.IOException;<a name="line.20"></a> -<FONT color="green">021</FONT> import java.io.StringReader;<a name="line.21"></a> -<FONT color="green">022</FONT> <a name="line.22"></a> -<FONT color="green">023</FONT> import junit.framework.TestCase;<a name="line.23"></a> -<FONT color="green">024</FONT> import org.apache.commons.csv.CSVLexer.Token;<a name="line.24"></a> -<FONT color="green">025</FONT> <a name="line.25"></a> -<FONT color="green">026</FONT> import static org.apache.commons.csv.CSVLexer.Token.Type.*;<a name="line.26"></a> -<FONT color="green">027</FONT> <a name="line.27"></a> -<FONT color="green">028</FONT> public class CSVLexerTest extends TestCase {<a name="line.28"></a> -<FONT color="green">029</FONT> <a name="line.29"></a> -<FONT color="green">030</FONT> private CSVLexer getLexer(String input, CSVFormat format) {<a name="line.30"></a> -<FONT color="green">031</FONT> return new CSVLexer(format, new ExtendedBufferedReader(new StringReader(input)));<a name="line.31"></a> -<FONT color="green">032</FONT> }<a name="line.32"></a> +<FONT color="green">020</FONT> import static org.apache.commons.csv.Token.Type.COMMENT;<a name="line.20"></a> +<FONT color="green">021</FONT> import static org.apache.commons.csv.Token.Type.EOF;<a name="line.21"></a> +<FONT color="green">022</FONT> import static org.apache.commons.csv.Token.Type.EORECORD;<a name="line.22"></a> +<FONT color="green">023</FONT> import static org.apache.commons.csv.Token.Type.TOKEN;<a name="line.23"></a> +<FONT color="green">024</FONT> import static org.junit.Assert.assertEquals;<a name="line.24"></a> +<FONT color="green">025</FONT> import static org.junit.Assert.assertFalse;<a name="line.25"></a> +<FONT color="green">026</FONT> import static org.junit.Assert.assertTrue;<a name="line.26"></a> +<FONT color="green">027</FONT> import static org.junit.Assert.fail;<a name="line.27"></a> +<FONT color="green">028</FONT> <a name="line.28"></a> +<FONT color="green">029</FONT> import java.io.IOException;<a name="line.29"></a> +<FONT color="green">030</FONT> import java.io.StringReader;<a name="line.30"></a> +<FONT color="green">031</FONT> <a name="line.31"></a> +<FONT color="green">032</FONT> import org.junit.Test;<a name="line.32"></a> <FONT color="green">033</FONT> <a name="line.33"></a> -<FONT color="green">034</FONT> private void assertTokenEquals(Token.Type expectedType, String expectedContent, Token token) {<a name="line.34"></a> -<FONT color="green">035</FONT> assertEquals("Token type", expectedType, token.type);<a name="line.35"></a> -<FONT color="green">036</FONT> assertEquals("Token content", expectedContent, token.content.toString());<a name="line.36"></a> -<FONT color="green">037</FONT> }<a name="line.37"></a> -<FONT color="green">038</FONT> <a name="line.38"></a> -<FONT color="green">039</FONT> // Single line (without comment)<a name="line.39"></a> -<FONT color="green">040</FONT> public void testNextToken1() throws IOException {<a name="line.40"></a> -<FONT color="green">041</FONT> String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";<a name="line.41"></a> -<FONT color="green">042</FONT> CSVLexer parser = getLexer(code, CSVFormat.DEFAULT);<a name="line.42"></a> -<FONT color="green">043</FONT> assertTokenEquals(TOKEN, "abc", parser.nextToken(new Token()));<a name="line.43"></a> -<FONT color="green">044</FONT> assertTokenEquals(TOKEN, "def", parser.nextToken(new Token()));<a name="line.44"></a> -<FONT color="green">045</FONT> assertTokenEquals(TOKEN, "hijk", parser.nextToken(new Token()));<a name="line.45"></a> -<FONT color="green">046</FONT> assertTokenEquals(TOKEN, "lmnop", parser.nextToken(new Token()));<a name="line.46"></a> -<FONT color="green">047</FONT> assertTokenEquals(TOKEN, "qrst", parser.nextToken(new Token()));<a name="line.47"></a> -<FONT color="green">048</FONT> assertTokenEquals(TOKEN, "uv", parser.nextToken(new Token()));<a name="line.48"></a> -<FONT color="green">049</FONT> assertTokenEquals(TOKEN, "wxy", parser.nextToken(new Token()));<a name="line.49"></a> -<FONT color="green">050</FONT> assertTokenEquals(TOKEN, "z", parser.nextToken(new Token()));<a name="line.50"></a> -<FONT color="green">051</FONT> assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));<a name="line.51"></a> -<FONT color="green">052</FONT> assertTokenEquals(EOF, "", parser.nextToken(new Token()));<a name="line.52"></a> -<FONT color="green">053</FONT> }<a name="line.53"></a> -<FONT color="green">054</FONT> <a name="line.54"></a> -<FONT color="green">055</FONT> // multiline including comments (and empty lines)<a name="line.55"></a> -<FONT color="green">056</FONT> public void testNextToken2() throws IOException {<a name="line.56"></a> -<FONT color="green">057</FONT> /* file: 1,2,3,<a name="line.57"></a> -<FONT color="green">058</FONT> * a,b x,c<a name="line.58"></a> -<FONT color="green">059</FONT> *<a name="line.59"></a> -<FONT color="green">060</FONT> * # this is a comment<a name="line.60"></a> -<FONT color="green">061</FONT> * d,e,<a name="line.61"></a> -<FONT color="green">062</FONT> *<a name="line.62"></a> -<FONT color="green">063</FONT> */<a name="line.63"></a> -<FONT color="green">064</FONT> String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";<a name="line.64"></a> -<FONT color="green">065</FONT> CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#');<a name="line.65"></a> -<FONT color="green">066</FONT> <a name="line.66"></a> -<FONT color="green">067</FONT> CSVLexer parser = getLexer(code, format);<a name="line.67"></a> -<FONT color="green">068</FONT> <a name="line.68"></a> -<FONT color="green">069</FONT> <a name="line.69"></a> -<FONT color="green">070</FONT> assertTokenEquals(TOKEN, "1", parser.nextToken(new Token()));<a name="line.70"></a> -<FONT color="green">071</FONT> assertTokenEquals(TOKEN, "2", parser.nextToken(new Token()));<a name="line.71"></a> -<FONT color="green">072</FONT> assertTokenEquals(TOKEN, "3", parser.nextToken(new Token()));<a name="line.72"></a> -<FONT color="green">073</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token()));<a name="line.73"></a> -<FONT color="green">074</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.74"></a> -<FONT color="green">075</FONT> assertTokenEquals(TOKEN, "b x", parser.nextToken(new Token()));<a name="line.75"></a> -<FONT color="green">076</FONT> assertTokenEquals(EORECORD, "c", parser.nextToken(new Token()));<a name="line.76"></a> -<FONT color="green">077</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token()));<a name="line.77"></a> -<FONT color="green">078</FONT> assertTokenEquals(TOKEN, "d", parser.nextToken(new Token()));<a name="line.78"></a> -<FONT color="green">079</FONT> assertTokenEquals(TOKEN, "e", parser.nextToken(new Token()));<a name="line.79"></a> -<FONT color="green">080</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token()));<a name="line.80"></a> -<FONT color="green">081</FONT> assertTokenEquals(EOF, "", parser.nextToken(new Token()));<a name="line.81"></a> -<FONT color="green">082</FONT> assertTokenEquals(EOF, "", parser.nextToken(new Token()));<a name="line.82"></a> -<FONT color="green">083</FONT> <a name="line.83"></a> -<FONT color="green">084</FONT> }<a name="line.84"></a> -<FONT color="green">085</FONT> <a name="line.85"></a> -<FONT color="green">086</FONT> // simple token with escaping<a name="line.86"></a> -<FONT color="green">087</FONT> public void testNextToken3() throws IOException {<a name="line.87"></a> -<FONT color="green">088</FONT> /* file: a,\,,b<a name="line.88"></a> -<FONT color="green">089</FONT> * \,,<a name="line.89"></a> -<FONT color="green">090</FONT> */<a name="line.90"></a> -<FONT color="green">091</FONT> String code = "a,\\,,b\n\\,,";<a name="line.91"></a> -<FONT color="green">092</FONT> CSVFormat format = CSVFormat.DEFAULT.withCommentStart('#');<a name="line.92"></a> -<FONT color="green">093</FONT> CSVLexer parser = getLexer(code, format);<a name="line.93"></a> -<FONT color="green">094</FONT> <a name="line.94"></a> -<FONT color="green">095</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.95"></a> -<FONT color="green">096</FONT> // an unquoted single backslash is not an escape char<a name="line.96"></a> -<FONT color="green">097</FONT> assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));<a name="line.97"></a> -<FONT color="green">098</FONT> assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));<a name="line.98"></a> -<FONT color="green">099</FONT> assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));<a name="line.99"></a> -<FONT color="green">100</FONT> // an unquoted single backslash is not an escape char<a name="line.100"></a> -<FONT color="green">101</FONT> assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));<a name="line.101"></a> -<FONT color="green">102</FONT> assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));<a name="line.102"></a> -<FONT color="green">103</FONT> assertTokenEquals(EOF, "", parser.nextToken(new Token()));<a name="line.103"></a> -<FONT color="green">104</FONT> }<a name="line.104"></a> -<FONT color="green">105</FONT> <a name="line.105"></a> -<FONT color="green">106</FONT> // encapsulator tokenizer (sinle line)<a name="line.106"></a> -<FONT color="green">107</FONT> public void testNextToken4() throws IOException {<a name="line.107"></a> -<FONT color="green">108</FONT> /* file: a,"foo",b<a name="line.108"></a> -<FONT color="green">109</FONT> * a, " foo",b<a name="line.109"></a> -<FONT color="green">110</FONT> * a,"foo " ,b // whitespace after closing encapsulator<a name="line.110"></a> -<FONT color="green">111</FONT> * a, " foo " ,b<a name="line.111"></a> -<FONT color="green">112</FONT> */<a name="line.112"></a> -<FONT color="green">113</FONT> String code = "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";<a name="line.113"></a> -<FONT color="green">114</FONT> CSVLexer parser = getLexer(code, CSVFormat.DEFAULT);<a name="line.114"></a> -<FONT color="green">115</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.115"></a> -<FONT color="green">116</FONT> assertTokenEquals(TOKEN, "foo", parser.nextToken(new Token()));<a name="line.116"></a> -<FONT color="green">117</FONT> assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));<a name="line.117"></a> -<FONT color="green">118</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.118"></a> -<FONT color="green">119</FONT> assertTokenEquals(TOKEN, " foo", parser.nextToken(new Token()));<a name="line.119"></a> -<FONT color="green">120</FONT> assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));<a name="line.120"></a> -<FONT color="green">121</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.121"></a> -<FONT color="green">122</FONT> assertTokenEquals(TOKEN, "foo ", parser.nextToken(new Token()));<a name="line.122"></a> -<FONT color="green">123</FONT> assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));<a name="line.123"></a> -<FONT color="green">124</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.124"></a> -<FONT color="green">125</FONT> assertTokenEquals(TOKEN, " foo ", parser.nextToken(new Token()));<a name="line.125"></a> -<FONT color="green">126</FONT> // assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));<a name="line.126"></a> -<FONT color="green">127</FONT> assertTokenEquals(EOF, "b", parser.nextToken(new Token()));<a name="line.127"></a> -<FONT color="green">128</FONT> }<a name="line.128"></a> -<FONT color="green">129</FONT> <a name="line.129"></a> -<FONT color="green">130</FONT> // encapsulator tokenizer (multi line, delimiter in string)<a name="line.130"></a> -<FONT color="green">131</FONT> public void testNextToken5() throws IOException {<a name="line.131"></a> -<FONT color="green">132</FONT> String code = "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\"";<a name="line.132"></a> -<FONT color="green">133</FONT> CSVLexer parser = getLexer(code, CSVFormat.DEFAULT);<a name="line.133"></a> -<FONT color="green">134</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.134"></a> -<FONT color="green">135</FONT> assertTokenEquals(TOKEN, "foo\n", parser.nextToken(new Token()));<a name="line.135"></a> -<FONT color="green">136</FONT> assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));<a name="line.136"></a> -<FONT color="green">137</FONT> assertTokenEquals(EORECORD, "foo\n baar ,,,", parser.nextToken(new Token()));<a name="line.137"></a> -<FONT color="green">138</FONT> assertTokenEquals(EOF, "\n\t \n", parser.nextToken(new Token()));<a name="line.138"></a> -<FONT color="green">139</FONT> <a name="line.139"></a> -<FONT color="green">140</FONT> }<a name="line.140"></a> -<FONT color="green">141</FONT> <a name="line.141"></a> -<FONT color="green">142</FONT> // change delimiters, comment, encapsulater<a name="line.142"></a> -<FONT color="green">143</FONT> public void testNextToken6() throws IOException {<a name="line.143"></a> -<FONT color="green">144</FONT> /* file: a;'b and \' more<a name="line.144"></a> -<FONT color="green">145</FONT> * '<a name="line.145"></a> -<FONT color="green">146</FONT> * !comment;;;;<a name="line.146"></a> -<FONT color="green">147</FONT> * ;;<a name="line.147"></a> -<FONT color="green">148</FONT> */<a name="line.148"></a> -<FONT color="green">149</FONT> String code = "a;'b and '' more\n'\n!comment;;;;\n;;";<a name="line.149"></a> -<FONT color="green">150</FONT> CSVFormat format = new CSVFormat(';', '\'', '!');<a name="line.150"></a> -<FONT color="green">151</FONT> CSVLexer parser = getLexer(code, format);<a name="line.151"></a> -<FONT color="green">152</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.152"></a> -<FONT color="green">153</FONT> assertTokenEquals(EORECORD, "b and ' more\n", parser.nextToken(new Token()));<a name="line.153"></a> -<FONT color="green">154</FONT> }<a name="line.154"></a> -<FONT color="green">155</FONT> <a name="line.155"></a> -<FONT color="green">156</FONT> // From SANDBOX-153<a name="line.156"></a> -<FONT color="green">157</FONT> public void testDelimiterIsWhitespace() throws IOException {<a name="line.157"></a> -<FONT color="green">158</FONT> String code = "one\ttwo\t\tfour \t five\t six";<a name="line.158"></a> -<FONT color="green">159</FONT> CSVLexer parser = getLexer(code, CSVFormat.TDF);<a name="line.159"></a> -<FONT color="green">160</FONT> assertTokenEquals(TOKEN, "one", parser.nextToken(new Token()));<a name="line.160"></a> -<FONT color="green">161</FONT> assertTokenEquals(TOKEN, "two", parser.nextToken(new Token()));<a name="line.161"></a> -<FONT color="green">162</FONT> assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));<a name="line.162"></a> -<FONT color="green">163</FONT> assertTokenEquals(TOKEN, "four", parser.nextToken(new Token()));<a name="line.163"></a> -<FONT color="green">164</FONT> assertTokenEquals(TOKEN, "five", parser.nextToken(new Token()));<a name="line.164"></a> -<FONT color="green">165</FONT> assertTokenEquals(EOF, "six", parser.nextToken(new Token()));<a name="line.165"></a> -<FONT color="green">166</FONT> }<a name="line.166"></a> -<FONT color="green">167</FONT> }<a name="line.167"></a> +<FONT color="green">034</FONT> /**<a name="line.34"></a> +<FONT color="green">035</FONT> * <a name="line.35"></a> +<FONT color="green">036</FONT> * <a name="line.36"></a> +<FONT color="green">037</FONT> * @version $Id: CSVLexerTest.java 1409455 2012-11-14 21:54:16Z ggregory $<a name="line.37"></a> +<FONT color="green">038</FONT> */<a name="line.38"></a> +<FONT color="green">039</FONT> public class CSVLexerTest {<a name="line.39"></a> +<FONT color="green">040</FONT> <a name="line.40"></a> +<FONT color="green">041</FONT> private Lexer getLexer(final String input, final CSVFormat format) {<a name="line.41"></a> +<FONT color="green">042</FONT> return new CSVLexer(format, new ExtendedBufferedReader(new StringReader(input)));<a name="line.42"></a> +<FONT color="green">043</FONT> }<a name="line.43"></a> +<FONT color="green">044</FONT> <a name="line.44"></a> +<FONT color="green">045</FONT> private void assertTokenEquals(final Token.Type expectedType, final String expectedContent, final Token token) {<a name="line.45"></a> +<FONT color="green">046</FONT> assertEquals("Token type", expectedType, token.type);<a name="line.46"></a> +<FONT color="green">047</FONT> assertEquals("Token content", expectedContent, token.content.toString());<a name="line.47"></a> +<FONT color="green">048</FONT> }<a name="line.48"></a> +<FONT color="green">049</FONT> <a name="line.49"></a> +<FONT color="green">050</FONT> // Single line (without comment)<a name="line.50"></a> +<FONT color="green">051</FONT> @Test<a name="line.51"></a> +<FONT color="green">052</FONT> public void testNextToken1() throws IOException {<a name="line.52"></a> +<FONT color="green">053</FONT> final String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";<a name="line.53"></a> +<FONT color="green">054</FONT> final Lexer parser = getLexer(code, CSVFormat.newBuilder().withIgnoreSurroundingSpaces(true).build());<a name="line.54"></a> +<FONT color="green">055</FONT> assertTokenEquals(TOKEN, "abc", parser.nextToken(new Token()));<a name="line.55"></a> +<FONT color="green">056</FONT> assertTokenEquals(TOKEN, "def", parser.nextToken(new Token()));<a name="line.56"></a> +<FONT color="green">057</FONT> assertTokenEquals(TOKEN, "hijk", parser.nextToken(new Token()));<a name="line.57"></a> +<FONT color="green">058</FONT> assertTokenEquals(TOKEN, "lmnop", parser.nextToken(new Token()));<a name="line.58"></a> +<FONT color="green">059</FONT> assertTokenEquals(TOKEN, "qrst", parser.nextToken(new Token()));<a name="line.59"></a> +<FONT color="green">060</FONT> assertTokenEquals(TOKEN, "uv", parser.nextToken(new Token()));<a name="line.60"></a> +<FONT color="green">061</FONT> assertTokenEquals(TOKEN, "wxy", parser.nextToken(new Token()));<a name="line.61"></a> +<FONT color="green">062</FONT> assertTokenEquals(TOKEN, "z", parser.nextToken(new Token()));<a name="line.62"></a> +<FONT color="green">063</FONT> assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));<a name="line.63"></a> +<FONT color="green">064</FONT> assertTokenEquals(EOF, "", parser.nextToken(new Token()));<a name="line.64"></a> +<FONT color="green">065</FONT> }<a name="line.65"></a> +<FONT color="green">066</FONT> <a name="line.66"></a> +<FONT color="green">067</FONT> // multiline including comments (and empty lines)<a name="line.67"></a> +<FONT color="green">068</FONT> @Test<a name="line.68"></a> +<FONT color="green">069</FONT> public void testNextToken2() throws IOException {<a name="line.69"></a> +<FONT color="green">070</FONT> final String code =<a name="line.70"></a> +<FONT color="green">071</FONT> "1,2,3,\n"+ // 1<a name="line.71"></a> +<FONT color="green">072</FONT> "\n"+<a name="line.72"></a> +<FONT color="green">073</FONT> "\n"+<a name="line.73"></a> +<FONT color="green">074</FONT> "a,b x,c#no-comment\n"+ // 2<a name="line.74"></a> +<FONT color="green">075</FONT> "\n"+<a name="line.75"></a> +<FONT color="green">076</FONT> "\n"+<a name="line.76"></a> +<FONT color="green">077</FONT> "# foo \n"+ // 3<a name="line.77"></a> +<FONT color="green">078</FONT> "\n"+ // 4<a name="line.78"></a> +<FONT color="green">079</FONT> "d,e,#no-comment\n"+ // 5<a name="line.79"></a> +<FONT color="green">080</FONT> "\n"+<a name="line.80"></a> +<FONT color="green">081</FONT> "\n"+<a name="line.81"></a> +<FONT color="green">082</FONT> "# penultimate comment\n"+ // 6<a name="line.82"></a> +<FONT color="green">083</FONT> "\n"+<a name="line.83"></a> +<FONT color="green">084</FONT> "\n"+<a name="line.84"></a> +<FONT color="green">085</FONT> "# Final comment\n"; // 7<a name="line.85"></a> +<FONT color="green">086</FONT> final CSVFormat format = CSVFormat.newBuilder().withCommentStart('#').build();<a name="line.86"></a> +<FONT color="green">087</FONT> assertTrue("Should ignore empty lines", format.getIgnoreEmptyLines());<a name="line.87"></a> +<FONT color="green">088</FONT> <a name="line.88"></a> +<FONT color="green">089</FONT> final Lexer parser = getLexer(code, format);<a name="line.89"></a> +<FONT color="green">090</FONT> <a name="line.90"></a> +<FONT color="green">091</FONT> <a name="line.91"></a> +<FONT color="green">092</FONT> assertTokenEquals(TOKEN, "1", parser.nextToken(new Token()));<a name="line.92"></a> +<FONT color="green">093</FONT> assertTokenEquals(TOKEN, "2", parser.nextToken(new Token()));<a name="line.93"></a> +<FONT color="green">094</FONT> assertTokenEquals(TOKEN, "3", parser.nextToken(new Token()));<a name="line.94"></a> +<FONT color="green">095</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token())); // 1<a name="line.95"></a> +<FONT color="green">096</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.96"></a> +<FONT color="green">097</FONT> assertTokenEquals(TOKEN, "b x", parser.nextToken(new Token()));<a name="line.97"></a> +<FONT color="green">098</FONT> assertTokenEquals(EORECORD, "c#no-comment", parser.nextToken(new Token())); // 2<a name="line.98"></a> +<FONT color="green">099</FONT> assertTokenEquals(COMMENT, "foo", parser.nextToken(new Token())); // 3<a name="line.99"></a> +<FONT color="green">100</FONT> // 4 empty line, ignored // 4<a name="line.100"></a> +<FONT color="green">101</FONT> assertTokenEquals(TOKEN, "d", parser.nextToken(new Token()));<a name="line.101"></a> +<FONT color="green">102</FONT> assertTokenEquals(TOKEN, "e", parser.nextToken(new Token()));<a name="line.102"></a> +<FONT color="green">103</FONT> assertTokenEquals(EORECORD, "#no-comment", parser.nextToken(new Token())); // 5<a name="line.103"></a> +<FONT color="green">104</FONT> assertTokenEquals(COMMENT, "penultimate comment", parser.nextToken(new Token())); // 6<a name="line.104"></a> +<FONT color="green">105</FONT> assertTokenEquals(COMMENT, "Final comment", parser.nextToken(new Token())); // 7<a name="line.105"></a> +<FONT color="green">106</FONT> assertTokenEquals(EOF, "", parser.nextToken(new Token()));<a name="line.106"></a> +<FONT color="green">107</FONT> assertTokenEquals(EOF, "", parser.nextToken(new Token()));<a name="line.107"></a> +<FONT color="green">108</FONT> <a name="line.108"></a> +<FONT color="green">109</FONT> }<a name="line.109"></a> +<FONT color="green">110</FONT> <a name="line.110"></a> +<FONT color="green">111</FONT> // multiline including comments (and empty lines)<a name="line.111"></a> +<FONT color="green">112</FONT> @Test<a name="line.112"></a> +<FONT color="green">113</FONT> public void testNextToken2EmptyLines() throws IOException {<a name="line.113"></a> +<FONT color="green">114</FONT> final String code =<a name="line.114"></a> +<FONT color="green">115</FONT> "1,2,3,\n"+ // 1<a name="line.115"></a> +<FONT color="green">116</FONT> "\n"+ // 1b<a name="line.116"></a> +<FONT color="green">117</FONT> "\n"+ // 1c<a name="line.117"></a> +<FONT color="green">118</FONT> "a,b x,c#no-comment\n"+ // 2<a name="line.118"></a> +<FONT color="green">119</FONT> "#foo\n"+ // 3<a name="line.119"></a> +<FONT color="green">120</FONT> "\n"+ // 4<a name="line.120"></a> +<FONT color="green">121</FONT> "\n"+ // 4b<a name="line.121"></a> +<FONT color="green">122</FONT> "d,e,#no-comment\n"+ // 5<a name="line.122"></a> +<FONT color="green">123</FONT> "\n"+ // 5b<a name="line.123"></a> +<FONT color="green">124</FONT> "\n"+ // 5c<a name="line.124"></a> +<FONT color="green">125</FONT> "# penultimate comment\n"+ // 6<a name="line.125"></a> +<FONT color="green">126</FONT> "\n"+ // 6b<a name="line.126"></a> +<FONT color="green">127</FONT> "\n"+ // 6c<a name="line.127"></a> +<FONT color="green">128</FONT> "# Final comment\n"; // 7<a name="line.128"></a> +<FONT color="green">129</FONT> final CSVFormat format = CSVFormat.newBuilder().withCommentStart('#').withIgnoreEmptyLines(false).build();<a name="line.129"></a> +<FONT color="green">130</FONT> assertFalse("Should not ignore empty lines", format.getIgnoreEmptyLines());<a name="line.130"></a> +<FONT color="green">131</FONT> <a name="line.131"></a> +<FONT color="green">132</FONT> final Lexer parser = getLexer(code, format);<a name="line.132"></a> +<FONT color="green">133</FONT> <a name="line.133"></a> +<FONT color="green">134</FONT> <a name="line.134"></a> +<FONT color="green">135</FONT> assertTokenEquals(TOKEN, "1", parser.nextToken(new Token()));<a name="line.135"></a> +<FONT color="green">136</FONT> assertTokenEquals(TOKEN, "2", parser.nextToken(new Token()));<a name="line.136"></a> +<FONT color="green">137</FONT> assertTokenEquals(TOKEN, "3", parser.nextToken(new Token()));<a name="line.137"></a> +<FONT color="green">138</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token())); // 1<a name="line.138"></a> +<FONT color="green">139</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token())); // 1b<a name="line.139"></a> +<FONT color="green">140</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token())); // 1c<a name="line.140"></a> +<FONT color="green">141</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.141"></a> +<FONT color="green">142</FONT> assertTokenEquals(TOKEN, "b x", parser.nextToken(new Token()));<a name="line.142"></a> +<FONT color="green">143</FONT> assertTokenEquals(EORECORD, "c#no-comment", parser.nextToken(new Token())); // 2<a name="line.143"></a> +<FONT color="green">144</FONT> assertTokenEquals(COMMENT, "foo", parser.nextToken(new Token())); // 3<a name="line.144"></a> +<FONT color="green">145</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token())); // 4<a name="line.145"></a> +<FONT color="green">146</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token())); // 4b<a name="line.146"></a> +<FONT color="green">147</FONT> assertTokenEquals(TOKEN, "d", parser.nextToken(new Token()));<a name="line.147"></a> +<FONT color="green">148</FONT> assertTokenEquals(TOKEN, "e", parser.nextToken(new Token()));<a name="line.148"></a> +<FONT color="green">149</FONT> assertTokenEquals(EORECORD, "#no-comment", parser.nextToken(new Token())); // 5<a name="line.149"></a> +<FONT color="green">150</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token())); // 5b<a name="line.150"></a> +<FONT color="green">151</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token())); // 5c<a name="line.151"></a> +<FONT color="green">152</FONT> assertTokenEquals(COMMENT, "penultimate comment", parser.nextToken(new Token())); // 6<a name="line.152"></a> +<FONT color="green">153</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token())); // 6b<a name="line.153"></a> +<FONT color="green">154</FONT> assertTokenEquals(EORECORD, "", parser.nextToken(new Token())); // 6c<a name="line.154"></a> +<FONT color="green">155</FONT> assertTokenEquals(COMMENT, "Final comment", parser.nextToken(new Token())); // 7<a name="line.155"></a> +<FONT color="green">156</FONT> assertTokenEquals(EOF, "", parser.nextToken(new Token()));<a name="line.156"></a> +<FONT color="green">157</FONT> assertTokenEquals(EOF, "", parser.nextToken(new Token()));<a name="line.157"></a> +<FONT color="green">158</FONT> <a name="line.158"></a> +<FONT color="green">159</FONT> }<a name="line.159"></a> +<FONT color="green">160</FONT> <a name="line.160"></a> +<FONT color="green">161</FONT> // simple token with escaping not enabled<a name="line.161"></a> +<FONT color="green">162</FONT> @Test<a name="line.162"></a> +<FONT color="green">163</FONT> public void testNextToken3() throws IOException {<a name="line.163"></a> +<FONT color="green">164</FONT> /* file: a,\,,b<a name="line.164"></a> +<FONT color="green">165</FONT> * \,,<a name="line.165"></a> +<FONT color="green">166</FONT> */<a name="line.166"></a> +<FONT color="green">167</FONT> final String code = "a,\\,,b\\\n\\,,";<a name="line.167"></a> +<FONT color="green">168</FONT> final CSVFormat format = CSVFormat.DEFAULT;<a name="line.168"></a> +<FONT color="green">169</FONT> assertFalse(format.isEscaping());<a name="line.169"></a> +<FONT color="green">170</FONT> final Lexer parser = getLexer(code, format);<a name="line.170"></a> +<FONT color="green">171</FONT> <a name="line.171"></a> +<FONT color="green">172</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.172"></a> +<FONT color="green">173</FONT> // an unquoted single backslash is not an escape char<a name="line.173"></a> +<FONT color="green">174</FONT> assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));<a name="line.174"></a> +<FONT color="green">175</FONT> assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));<a name="line.175"></a> +<FONT color="green">176</FONT> assertTokenEquals(EORECORD, "b\\", parser.nextToken(new Token()));<a name="line.176"></a> +<FONT color="green">177</FONT> // an unquoted single backslash is not an escape char<a name="line.177"></a> +<FONT color="green">178</FONT> assertTokenEquals(TOKEN, "\\", parser.nextToken(new Token()));<a name="line.178"></a> +<FONT color="green">179</FONT> assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));<a name="line.179"></a> +<FONT color="green">180</FONT> assertTokenEquals(EOF, "", parser.nextToken(new Token()));<a name="line.180"></a> +<FONT color="green">181</FONT> }<a name="line.181"></a> +<FONT color="green">182</FONT> <a name="line.182"></a> +<FONT color="green">183</FONT> // simple token with escaping enabled<a name="line.183"></a> +<FONT color="green">184</FONT> @Test<a name="line.184"></a> +<FONT color="green">185</FONT> public void testNextToken3Escaping() throws IOException {<a name="line.185"></a> +<FONT color="green">186</FONT> /* file: a,\,,b<a name="line.186"></a> +<FONT color="green">187</FONT> * \,,<a name="line.187"></a> +<FONT color="green">188</FONT> */<a name="line.188"></a> +<FONT color="green">189</FONT> final String code = "a,\\,,b\\\\\n\\,,\\\nc,d\\\r\ne";<a name="line.189"></a> +<FONT color="green">190</FONT> final CSVFormat format = CSVFormat.newBuilder().withEscape('\\').withIgnoreEmptyLines(false).build();<a name="line.190"></a> +<FONT color="green">191</FONT> assertTrue(format.isEscaping());<a name="line.191"></a> +<FONT color="green">192</FONT> final Lexer parser = getLexer(code, format);<a name="line.192"></a> +<FONT color="green">193</FONT> <a name="line.193"></a> +<FONT color="green">194</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.194"></a> +<FONT color="green">195</FONT> assertTokenEquals(TOKEN, ",", parser.nextToken(new Token()));<a name="line.195"></a> +<FONT color="green">196</FONT> assertTokenEquals(EORECORD, "b\\", parser.nextToken(new Token()));<a name="line.196"></a> +<FONT color="green">197</FONT> assertTokenEquals(TOKEN, ",", parser.nextToken(new Token()));<a name="line.197"></a> +<FONT color="green">198</FONT> assertTokenEquals(TOKEN, "\nc", parser.nextToken(new Token()));<a name="line.198"></a> +<FONT color="green">199</FONT> assertTokenEquals(EORECORD, "d\r", parser.nextToken(new Token()));<a name="line.199"></a> +<FONT color="green">200</FONT> assertTokenEquals(EOF, "e", parser.nextToken(new Token()));<a name="line.200"></a> +<FONT color="green">201</FONT> }<a name="line.201"></a> +<FONT color="green">202</FONT> <a name="line.202"></a> +<FONT color="green">203</FONT> // simple token with escaping enabled<a name="line.203"></a> +<FONT color="green">204</FONT> @Test<a name="line.204"></a> +<FONT color="green">205</FONT> public void testNextToken3BadEscaping() throws IOException {<a name="line.205"></a> +<FONT color="green">206</FONT> final String code = "a,b,c\\";<a name="line.206"></a> +<FONT color="green">207</FONT> final CSVFormat format = CSVFormat.newBuilder().withEscape('\\').build();<a name="line.207"></a> +<FONT color="green">208</FONT> assertTrue(format.isEscaping());<a name="line.208"></a> +<FONT color="green">209</FONT> final Lexer parser = getLexer(code, format);<a name="line.209"></a> +<FONT color="green">210</FONT> <a name="line.210"></a> +<FONT color="green">211</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.211"></a> +<FONT color="green">212</FONT> assertTokenEquals(TOKEN, "b", parser.nextToken(new Token()));<a name="line.212"></a> +<FONT color="green">213</FONT> try {<a name="line.213"></a> +<FONT color="green">214</FONT> final Token tkn = parser.nextToken(new Token());<a name="line.214"></a> +<FONT color="green">215</FONT> fail("Expected IOE, found "+tkn);<a name="line.215"></a> +<FONT color="green">216</FONT> } catch (final IOException e) {<a name="line.216"></a> +<FONT color="green">217</FONT> }<a name="line.217"></a> +<FONT color="green">218</FONT> }<a name="line.218"></a> +<FONT color="green">219</FONT> <a name="line.219"></a> +<FONT color="green">220</FONT> // encapsulator tokenizer (single line)<a name="line.220"></a> +<FONT color="green">221</FONT> @Test<a name="line.221"></a> +<FONT color="green">222</FONT> public void testNextToken4() throws IOException {<a name="line.222"></a> +<FONT color="green">223</FONT> /* file: a,"foo",b<a name="line.223"></a> +<FONT color="green">224</FONT> * a, " foo",b<a name="line.224"></a> +<FONT color="green">225</FONT> * a,"foo " ,b // whitespace after closing encapsulator<a name="line.225"></a> +<FONT color="green">226</FONT> * a, " foo " ,b<a name="line.226"></a> +<FONT color="green">227</FONT> */<a name="line.227"></a> +<FONT color="green">228</FONT> final String code = "a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";<a name="line.228"></a> +<FONT color="green">229</FONT> final Lexer parser = getLexer(code, CSVFormat.newBuilder().withIgnoreSurroundingSpaces(true).build());<a name="line.229"></a> +<FONT color="green">230</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.230"></a> +<FONT color="green">231</FONT> assertTokenEquals(TOKEN, "foo", parser.nextToken(new Token()));<a name="line.231"></a> +<FONT color="green">232</FONT> assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));<a name="line.232"></a> +<FONT color="green">233</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.233"></a> +<FONT color="green">234</FONT> assertTokenEquals(TOKEN, " foo", parser.nextToken(new Token()));<a name="line.234"></a> +<FONT color="green">235</FONT> assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));<a name="line.235"></a> +<FONT color="green">236</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.236"></a> +<FONT color="green">237</FONT> assertTokenEquals(TOKEN, "foo ", parser.nextToken(new Token()));<a name="line.237"></a> +<FONT color="green">238</FONT> assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));<a name="line.238"></a> +<FONT color="green">239</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.239"></a> +<FONT color="green">240</FONT> assertTokenEquals(TOKEN, " foo ", parser.nextToken(new Token()));<a name="line.240"></a> +<FONT color="green">241</FONT> // assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));<a name="line.241"></a> +<FONT color="green">242</FONT> assertTokenEquals(EOF, "b", parser.nextToken(new Token()));<a name="line.242"></a> +<FONT color="green">243</FONT> }<a name="line.243"></a> +<FONT color="green">244</FONT> <a name="line.244"></a> +<FONT color="green">245</FONT> // encapsulator tokenizer (multi line, delimiter in string)<a name="line.245"></a> +<FONT color="green">246</FONT> @Test<a name="line.246"></a> +<FONT color="green">247</FONT> public void testNextToken5() throws IOException {<a name="line.247"></a> +<FONT color="green">248</FONT> final String code = "a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\"";<a name="line.248"></a> +<FONT color="green">249</FONT> final Lexer parser = getLexer(code, CSVFormat.DEFAULT);<a name="line.249"></a> +<FONT color="green">250</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.250"></a> +<FONT color="green">251</FONT> assertTokenEquals(TOKEN, "foo\n", parser.nextToken(new Token()));<a name="line.251"></a> +<FONT color="green">252</FONT> assertTokenEquals(EORECORD, "b", parser.nextToken(new Token()));<a name="line.252"></a> +<FONT color="green">253</FONT> assertTokenEquals(EORECORD, "foo\n baar ,,,", parser.nextToken(new Token()));<a name="line.253"></a> +<FONT color="green">254</FONT> assertTokenEquals(EOF, "\n\t \n", parser.nextToken(new Token()));<a name="line.254"></a> +<FONT color="green">255</FONT> <a name="line.255"></a> +<FONT color="green">256</FONT> }<a name="line.256"></a> +<FONT color="green">257</FONT> <a name="line.257"></a> +<FONT color="green">258</FONT> // change delimiters, comment, encapsulater<a name="line.258"></a> +<FONT color="green">259</FONT> @Test<a name="line.259"></a> +<FONT color="green">260</FONT> public void testNextToken6() throws IOException {<a name="line.260"></a> +<FONT color="green">261</FONT> /* file: a;'b and \' more<a name="line.261"></a> +<FONT color="green">262</FONT> * '<a name="line.262"></a> +<FONT color="green">263</FONT> * !comment;;;;<a name="line.263"></a> +<FONT color="green">264</FONT> * ;;<a name="line.264"></a> +<FONT color="green">265</FONT> */<a name="line.265"></a> +<FONT color="green">266</FONT> final String code = "a;'b and '' more\n'\n!comment;;;;\n;;";<a name="line.266"></a> +<FONT color="green">267</FONT> final CSVFormat format = CSVFormat.newBuilder().withDelimiter(';').withQuoteChar('\'').withCommentStart('!').build();<a name="line.267"></a> +<FONT color="green">268</FONT> final Lexer parser = getLexer(code, format);<a name="line.268"></a> +<FONT color="green">269</FONT> assertTokenEquals(TOKEN, "a", parser.nextToken(new Token()));<a name="line.269"></a> +<FONT color="green">270</FONT> assertTokenEquals(EORECORD, "b and ' more\n", parser.nextToken(new Token()));<a name="line.270"></a> +<FONT color="green">271</FONT> }<a name="line.271"></a> +<FONT color="green">272</FONT> <a name="line.272"></a> +<FONT color="green">273</FONT> // From CSV-1<a name="line.273"></a> +<FONT color="green">274</FONT> @Test<a name="line.274"></a> +<FONT color="green">275</FONT> public void testDelimiterIsWhitespace() throws IOException {<a name="line.275"></a> +<FONT color="green">276</FONT> final String code = "one\ttwo\t\tfour \t five\t six";<a name="line.276"></a> +<FONT color="green">277</FONT> final Lexer parser = getLexer(code, CSVFormat.TDF);<a name="line.277"></a> +<FONT color="green">278</FONT> assertTokenEquals(TOKEN, "one", parser.nextToken(new Token()));<a name="line.278"></a> +<FONT color="green">279</FONT> assertTokenEquals(TOKEN, "two", parser.nextToken(new Token()));<a name="line.279"></a> +<FONT color="green">280</FONT> assertTokenEquals(TOKEN, "", parser.nextToken(new Token()));<a name="line.280"></a> +<FONT color="green">281</FONT> assertTokenEquals(TOKEN, "four", parser.nextToken(new Token()));<a name="line.281"></a> +<FONT color="green">282</FONT> assertTokenEquals(TOKEN, "five", parser.nextToken(new Token()));<a name="line.282"></a> +<FONT color="green">283</FONT> assertTokenEquals(EOF, "six", parser.nextToken(new Token()));<a name="line.283"></a> +<FONT color="green">284</FONT> }<a name="line.284"></a> +<FONT color="green">285</FONT> }<a name="line.285"></a> @@ -230,4 +348,4 @@ </PRE> </BODY> -</HTML> +</HTML> \ No newline at end of file