pitrou commented on a change in pull request #11836: URL: https://github.com/apache/arrow/pull/11836#discussion_r761091386
########## File path: cpp/src/arrow/csv/writer_test.cc ########## @@ -69,25 +70,49 @@ std::vector<WriterTestParams> GenerateTestCases() { { "a": 124, "b\"": "a\"\"b\"" }, { "d": 0 }, { "e": 86400000 }, - { "f": 1078016523 }])"; - std::string expected_without_header = std::string("1,,-1,,,") + "\n" + // line 1 - R"(1,"abc""efg",2324,,,)" + "\n" + // line 2 - R"(,"abcd",5467,,,)" + "\n" + // line 3 - R"(,,,,,)" + "\n" + // line 4 - R"(546,"",517,,,)" + "\n" + // line 5 - R"(124,"a""""b""",,,,)" + "\n" + // line 6 - R"(,,,1970-01-01,,)" + "\n" + // line 7 - R"(,,,,1970-01-02,)" + "\n" + // line 8 - R"(,,,,,2004-02-29 01:02:03)" + "\n"; // line 9 + { "f": 1078016523 }, + { "b\"": "NA" }])"; + std::string expected_without_header = std::string("1,,-1,,,") + "\n" + // line 1 + R"(1,"abc""efg",2324,,,)" + "\n" + // line 2 + R"(,"abcd",5467,,,)" + "\n" + // line 3 + R"(,,,,,)" + "\n" + // line 4 + R"(546,"",517,,,)" + "\n" + // line 5 + R"(124,"a""""b""",,,,)" + "\n" + // line 6 + R"(,,,1970-01-01,,)" + "\n" + // line 7 + R"(,,,,1970-01-02,)" + "\n" + // line 8 + R"(,,,,,2004-02-29 01:02:03)" + "\n" + // line 9 + R"(,"NA",,,,)" + "\n"; // line 10 + std::string expected_header = std::string(R"("a","b""","c ","d","e","f")") + "\n"; + auto schema_custom_na = schema({field("g", uint64()), field("h", utf8())}); + + auto populated_batch_custom_na = R"([{"g": 42, "h": "NA"}, + {}])"; + + std::string expected_custom_na = std::string(R"(42,"NA")") + "\n" + // line 1 + R"(NA,NA)" + "\n"; // line 2 + + std::string expected_custom_quoted_na = std::string(R"(42,"NA")") + "\n" + // line 1 + R"(""NA"",""NA"")" + "\n"; // line 2 Review comment: Actually, no, the problem really seems to be in this PR, since otherwise the CSV writer works fine: ```python >>> tab = pa.table({'a': [0,1,2], 'b': ['foo', '"bar"', '""baz""']}) >>> bio = io.BytesIO() >>> csv.write_csv(tab, bio) >>> bio.getvalue() b'"a","b"\n0,"foo"\n1,"""bar"""\n2,"""""baz"""""\n' >>> bio.seek(0) 0 >>> tt = csv.read_csv(bio) >>> tt pyarrow.Table a: int64 b: string ---- a: [[0,1,2]] b: [["foo",""bar"","""baz"""]] >>> tab == tt True ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org