paleolimbot commented on code in PR #45459:
URL: https://github.com/apache/arrow/pull/45459#discussion_r1975891793
##########
cpp/src/parquet/reader_test.cc:
##########
@@ -1857,4 +1863,172 @@ TEST(PageIndexReaderTest, ReadFileWithoutPageIndex) {
ASSERT_EQ(nullptr, row_group_index_reader);
}
+class TestGeometryLogicalType : public ::testing::Test {
+ public:
+ const int kNumRows = 1000;
+
+ void WriteTestData(ParquetDataPageVersion data_page_version,
+ bool enable_write_page_index, bool write_arrow) {
+ // Make schema
+ schema::NodeVector fields;
+ fields.push_back(PrimitiveNode::Make("g", Repetition::REQUIRED,
+
GeometryLogicalType::Make("srid:1234"),
+ Type::BYTE_ARRAY));
+ auto schema = std::static_pointer_cast<GroupNode>(
+ GroupNode::Make("schema", Repetition::REQUIRED, fields));
+
+ // Write small batches and small data pages
+ auto writer_props_builder = WriterProperties::Builder();
+
writer_props_builder.write_batch_size(64)->data_pagesize(128)->data_page_version(
+ data_page_version);
+ if (enable_write_page_index) {
+ writer_props_builder.enable_write_page_index();
+ }
+
+ std::shared_ptr<WriterProperties> writer_props =
writer_props_builder.build();
+
+ ASSERT_OK_AND_ASSIGN(auto out_file,
::arrow::io::BufferOutputStream::Create());
+ std::shared_ptr<ParquetFileWriter> file_writer =
+ ParquetFileWriter::Open(out_file, schema, writer_props);
+ RowGroupWriter* rg_writer = file_writer->AppendRowGroup();
+
+ // write WKB points to columns
+ auto* writer = static_cast<ByteArrayWriter*>(rg_writer->NextColumn());
+ if (!write_arrow) {
+ WriteTestDataUsingWriteBatch(writer);
+ } else {
+ WriteTestDataUsingWriteArrow(writer);
+ }
+
+ rg_writer->Close();
+ file_writer->Close();
+
+ ASSERT_OK_AND_ASSIGN(file_buf, out_file->Finish());
+ }
+
+ void WriteTestDataUsingWriteBatch(ByteArrayWriter* writer) {
+ std::vector<uint8_t> buffer(test::kWkbPointSize * kNumRows);
+ uint8_t* ptr = buffer.data();
+ std::vector<ByteArray> values(kNumRows);
+ for (int k = 0; k < kNumRows; k++) {
+ test::GenerateWKBPoint(ptr, k, k + 1);
+ values[k].len = test::kWkbPointSize;
+ values[k].ptr = ptr;
+ ptr += test::kWkbPointSize;
+ }
+ writer->WriteBatch(kNumRows, nullptr, nullptr, values.data());
+ }
+
+ void WriteTestDataUsingWriteArrow(ByteArrayWriter* writer) {
+ ::arrow::BinaryBuilder builder;
+ std::vector<uint8_t> buffer(test::kWkbPointSize * kNumRows);
+ uint8_t* ptr = buffer.data();
+ for (int k = 0; k < kNumRows; k++) {
+ test::GenerateWKBPoint(ptr, k, k + 1);
+ ASSERT_OK(builder.Append(ptr, test::kWkbPointSize));
+ ptr += test::kWkbPointSize;
+ }
+ std::shared_ptr<::arrow::BinaryArray> array;
+ ASSERT_OK(builder.Finish(&array));
+
+ std::shared_ptr<ArrowWriterProperties> properties =
+ ArrowWriterProperties::Builder().build();
+ MemoryPool* pool = ::arrow::default_memory_pool();
+ auto ctx = std::make_unique<ArrowWriteContext>(pool, properties.get());
+ ASSERT_OK(writer->WriteArrow(nullptr, nullptr, kNumRows, *array,
ctx.get(), true));
+ }
+
+ void TestWriteAndRead(ParquetDataPageVersion data_page_version,
+ bool enable_write_page_index, bool write_arrow) {
+ WriteTestData(data_page_version, enable_write_page_index, write_arrow);
+
+ auto in_file = std::make_shared<::arrow::io::BufferReader>(file_buf);
+
+ ReaderProperties reader_props;
+ reader_props.enable_buffered_stream();
+ reader_props.set_buffer_size(64);
+ auto file_reader = ParquetFileReader::Open(in_file, reader_props);
+
+ // Check that the geometry statistics are correctly written and read
+ auto metadata = file_reader->metadata();
+ auto page_index_reader = file_reader->GetPageIndexReader();
+ int num_row_groups = metadata->num_row_groups();
+ for (int i = 0; i < num_row_groups; i++) {
+ auto row_group_metadata = metadata->RowGroup(i);
+ auto column_chunk_metadata = row_group_metadata->ColumnChunk(0);
+ auto geometry_stats = column_chunk_metadata->geometry_statistics();
+ CheckGeospatialStatistics(geometry_stats);
Review Comment:
I think I successfully got everything renamed to `Geospatial` or `Geo`!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]