This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new ca9bb59b4 ORC-1920: [C++] Support `Geometry` and `Geography` types
ca9bb59b4 is described below

commit ca9bb59b4bfd15da89e6270d077d2464495db0d9
Author: ffacs <[email protected]>
AuthorDate: Sun Jul 13 17:31:32 2025 -0700

    ORC-1920: [C++] Support `Geometry` and `Geography` types
    
    What changes were proposed in this pull request?
    Support Geometry and Geography types for c++ side
    
    Why are the changes needed?
    Add support for Geometry and Geography types
    
    How was this patch tested?
    UT passed
    
    Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #2269 from ffacs/geo.
    
    Authored-by: ffacs <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 c++/include/orc/Geospatial.hh    | 196 ++++++++++++++++++++++
 c++/include/orc/Statistics.hh    |  30 +++-
 c++/include/orc/Type.hh          |  24 ++-
 c++/include/orc/meson.build      |   1 +
 c++/src/CMakeLists.txt           |   1 +
 c++/src/ColumnPrinter.cc         |   2 +
 c++/src/ColumnReader.cc          |   2 +
 c++/src/ColumnWriter.cc          |  65 ++++++++
 c++/src/Geospatial.cc            | 307 +++++++++++++++++++++++++++++++++++
 c++/src/Geospatial.hh            |  86 ++++++++++
 c++/src/Reader.cc                |   2 +
 c++/src/SchemaEvolution.cc       |   7 +-
 c++/src/Statistics.cc            |  47 ++++++
 c++/src/Statistics.hh            | 122 ++++++++++++++
 c++/src/TypeImpl.cc              | 139 ++++++++++++++++
 c++/src/TypeImpl.hh              |  26 +++
 c++/src/Writer.cc                |  35 ++++
 c++/src/meson.build              |   1 +
 c++/test/CMakeLists.txt          |   2 +
 c++/test/TestColumnStatistics.cc | 342 +++++++++++++++++++++++++++++++++++++++
 c++/test/TestStatistics.cc       | 230 ++++++++++++++++++++++++++
 c++/test/TestUtil.cc             |  65 ++++++++
 c++/test/TestUtil.hh             |  44 +++++
 c++/test/TestWriter.cc           | 137 +++++++++++++++-
 c++/test/meson.build             |   2 +
 tools/src/CSVFileImport.cc       |   2 +
 26 files changed, 1912 insertions(+), 5 deletions(-)

diff --git a/c++/include/orc/Geospatial.hh b/c++/include/orc/Geospatial.hh
new file mode 100644
index 000000000..d3b9e2828
--- /dev/null
+++ b/c++/include/orc/Geospatial.hh
@@ -0,0 +1,196 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains code adapted from the Apache Arrow project.
+ *
+ * Original source:
+ * 
https://github.com/apache/arrow/blob/main/cpp/src/parquet/geospatial/statistics.h
+ *
+ * The original code is licensed under the Apache License, Version 2.0.
+ *
+ * Modifications may have been made from the original source.
+ */
+
+#ifndef ORC_GEOSPATIAL_HH
+#define ORC_GEOSPATIAL_HH
+
+#include <array>
+#include <cmath>
+#include <ostream>
+#include <string>
+
+namespace orc::geospatial {
+
+  constexpr double INF = std::numeric_limits<double>::infinity();
+  // The maximum number of dimensions supported (X, Y, Z, M)
+  inline constexpr int MAX_DIMENSIONS = 4;
+
+  // Supported combinations of geometry dimensions
+  enum class Dimensions {
+    XY = 0,    // X and Y only
+    XYZ = 1,   // X, Y, and Z
+    XYM = 2,   // X, Y, and M
+    XYZM = 3,  // X, Y, Z, and M
+    VALUE_MIN = 0,
+    VALUE_MAX = 3
+  };
+
+  // Supported geometry types according to ISO WKB
+  enum class GeometryType {
+    POINT = 1,
+    LINESTRING = 2,
+    POLYGON = 3,
+    MULTIPOINT = 4,
+    MULTILINESTRING = 5,
+    MULTIPOLYGON = 6,
+    GEOMETRYCOLLECTION = 7,
+    VALUE_MIN = 1,
+    VALUE_MAX = 7
+  };
+
+  // BoundingBox represents the minimum bounding rectangle (or box) for a 
geometry.
+  // It supports up to 4 dimensions (X, Y, Z, M).
+  struct BoundingBox {
+    using XY = std::array<double, 2>;
+    using XYZ = std::array<double, 3>;
+    using XYM = std::array<double, 3>;
+    using XYZM = std::array<double, 4>;
+
+    // Default constructor: initializes to an empty bounding box.
+    BoundingBox() : min{INF, INF, INF, INF}, max{-INF, -INF, -INF, -INF} {}
+    // Constructor with explicit min/max values.
+    BoundingBox(const XYZM& mins, const XYZM& maxes) : min(mins), max(maxes) {}
+    BoundingBox(const BoundingBox& other) = default;
+    BoundingBox& operator=(const BoundingBox&) = default;
+
+    // Update the bounding box to include a 2D coordinate.
+    void updateXY(const XY& coord) {
+      updateInternal(coord);
+    }
+    // Update the bounding box to include a 3D coordinate (XYZ).
+    void updateXYZ(const XYZ& coord) {
+      updateInternal(coord);
+    }
+    // Update the bounding box to include a 3D coordinate (XYM).
+    void updateXYM(const XYM& coord) {
+      std::array<int, 3> dims = {0, 1, 3};
+      for (int i = 0; i < 3; ++i) {
+        auto dim = dims[i];
+        if (!std::isnan(min[dim]) && !std::isnan(max[dim])) {
+          min[dim] = std::min(min[dim], coord[i]);
+          max[dim] = std::max(max[dim], coord[i]);
+        }
+      }
+    }
+    // Update the bounding box to include a 4D coordinate (XYZM).
+    void updateXYZM(const XYZM& coord) {
+      updateInternal(coord);
+    }
+
+    // Reset the bounding box to its initial empty state.
+    void reset() {
+      for (int i = 0; i < MAX_DIMENSIONS; ++i) {
+        min[i] = INF;
+        max[i] = -INF;
+      }
+    }
+
+    // Invalidate the bounding box (set all values to NaN).
+    void invalidate() {
+      for (int i = 0; i < MAX_DIMENSIONS; ++i) {
+        min[i] = std::numeric_limits<double>::quiet_NaN();
+        max[i] = std::numeric_limits<double>::quiet_NaN();
+      }
+    }
+
+    // Check if the bound for a given dimension is empty.
+    bool boundEmpty(int dim) const {
+      return std::isinf(min[dim] - max[dim]);
+    }
+
+    // Check if the bound for a given dimension is valid (not NaN).
+    bool boundValid(int dim) const {
+      return !std::isnan(min[dim]) && !std::isnan(max[dim]);
+    }
+
+    // Get the lower bound (min values).
+    const XYZM& lowerBound() const {
+      return min;
+    }
+    // Get the upper bound (max values).
+    const XYZM& upperBound() const {
+      return max;
+    }
+
+    // Get validity for each dimension.
+    std::array<bool, MAX_DIMENSIONS> dimensionValid() const {
+      return {boundValid(0), boundValid(1), boundValid(2), boundValid(3)};
+    }
+    // Get emptiness for each dimension.
+    std::array<bool, MAX_DIMENSIONS> dimensionEmpty() const {
+      return {boundEmpty(0), boundEmpty(1), boundEmpty(2), boundEmpty(3)};
+    }
+
+    // Merge another bounding box into this one.
+    void merge(const BoundingBox& other) {
+      for (int i = 0; i < MAX_DIMENSIONS; ++i) {
+        if (std::isnan(min[i]) || std::isnan(max[i]) || 
std::isnan(other.min[i]) ||
+            std::isnan(other.max[i])) {
+          min[i] = std::numeric_limits<double>::quiet_NaN();
+          max[i] = std::numeric_limits<double>::quiet_NaN();
+        } else {
+          min[i] = std::min(min[i], other.min[i]);
+          max[i] = std::max(max[i], other.max[i]);
+        }
+      }
+    }
+
+    // Convert the bounding box to a string representation.
+    std::string toString() const;
+
+    XYZM min;  // Minimum values for each dimension
+    XYZM max;  // Maximum values for each dimension
+
+   private:
+    // Internal update function for XY, XYZ, or XYZM coordinates.
+    template <typename Coord>
+    void updateInternal(const Coord& coord) {
+      for (size_t i = 0; i < coord.size(); ++i) {
+        if (!std::isnan(min[i]) && !std::isnan(max[i])) {
+          min[i] = std::min(min[i], coord[i]);
+          max[i] = std::max(max[i], coord[i]);
+        }
+      }
+    }
+  };
+
+  inline bool operator==(const BoundingBox& lhs, const BoundingBox& rhs) {
+    return lhs.min == rhs.min && lhs.max == rhs.max;
+  }
+  inline bool operator!=(const BoundingBox& lhs, const BoundingBox& rhs) {
+    return !(lhs == rhs);
+  }
+  inline std::ostream& operator<<(std::ostream& os, const BoundingBox& obj) {
+    os << obj.toString();
+    return os;
+  }
+
+}  // namespace orc::geospatial
+
+#endif  // ORC_GEOSPATIAL_HH
diff --git a/c++/include/orc/Statistics.hh b/c++/include/orc/Statistics.hh
index 4ba8c35f7..58169abe5 100644
--- a/c++/include/orc/Statistics.hh
+++ b/c++/include/orc/Statistics.hh
@@ -19,12 +19,11 @@
 #ifndef ORC_STATISTICS_HH
 #define ORC_STATISTICS_HH
 
+#include "orc/Geospatial.hh"
 #include "orc/Type.hh"
 #include "orc/Vector.hh"
 #include "orc/orc-config.hh"
 
-#include <sstream>
-
 namespace orc {
 
   /**
@@ -367,6 +366,33 @@ namespace orc {
     virtual int32_t getMaximumNanos() const = 0;
   };
 
+  /**
+   * Statistics for Geometry and Geography
+   */
+  class GeospatialColumnStatistics : public ColumnStatistics {
+   public:
+    virtual ~GeospatialColumnStatistics();
+
+    /**
+     * Get bounding box
+     * @return bounding box
+     */
+    virtual const geospatial::BoundingBox& getBoundingBox() const = 0;
+
+    /**
+     * Get geospatial types
+     * @return a sorted vector of geometry type IDs that elements is unique
+     */
+    virtual std::vector<int32_t> getGeospatialTypes() const = 0;
+
+    /**
+     * Update stats by a new value
+     * @param value new value to update
+     * @param length length of the value
+     */
+    virtual void update(const char* value, size_t length) = 0;
+  };
+
   class Statistics {
    public:
     virtual ~Statistics();
diff --git a/c++/include/orc/Type.hh b/c++/include/orc/Type.hh
index 82e0e3cc8..4bb794ff3 100644
--- a/c++/include/orc/Type.hh
+++ b/c++/include/orc/Type.hh
@@ -25,6 +25,18 @@
 
 namespace orc {
 
+  namespace geospatial {
+    enum EdgeInterpolationAlgorithm {
+      SPHERICAL = 0,
+      VINCENTY = 1,
+      THOMAS = 2,
+      ANDOYER = 3,
+      KARNEY = 4
+    };
+    std::string AlgoToString(EdgeInterpolationAlgorithm algo);
+    EdgeInterpolationAlgorithm AlgoFromString(const std::string& algo);
+  }  // namespace geospatial
+
   enum TypeKind {
     BOOLEAN = 0,
     BYTE = 1,
@@ -44,7 +56,9 @@ namespace orc {
     DATE = 15,
     VARCHAR = 16,
     CHAR = 17,
-    TIMESTAMP_INSTANT = 18
+    TIMESTAMP_INSTANT = 18,
+    GEOMETRY = 19,
+    GEOGRAPHY = 20
   };
 
   class Type {
@@ -59,6 +73,10 @@ namespace orc {
     virtual uint64_t getMaximumLength() const = 0;
     virtual uint64_t getPrecision() const = 0;
     virtual uint64_t getScale() const = 0;
+    // for geospatial types only
+    virtual const std::string& getCrs() const = 0;
+    // for geography type only
+    virtual geospatial::EdgeInterpolationAlgorithm getAlgorithm() const = 0;
     virtual Type& setAttribute(const std::string& key, const std::string& 
value) = 0;
     virtual bool hasAttributeKey(const std::string& key) const = 0;
     virtual Type& removeAttribute(const std::string& key) = 0;
@@ -115,6 +133,10 @@ namespace orc {
   std::unique_ptr<Type> createListType(std::unique_ptr<Type> elements);
   std::unique_ptr<Type> createMapType(std::unique_ptr<Type> key, 
std::unique_ptr<Type> value);
   std::unique_ptr<Type> createUnionType();
+  std::unique_ptr<Type> createGeometryType(const std::string& crs = 
"OGC:CRS84");
+  std::unique_ptr<Type> createGeographyType(
+      const std::string& crs = "OGC:CRS84",
+      geospatial::EdgeInterpolationAlgorithm algo = geospatial::SPHERICAL);
 
 }  // namespace orc
 #endif
diff --git a/c++/include/orc/meson.build b/c++/include/orc/meson.build
index 2e9e18199..e2524051f 100644
--- a/c++/include/orc/meson.build
+++ b/c++/include/orc/meson.build
@@ -34,6 +34,7 @@ install_headers(
         'ColumnPrinter.hh',
         'Common.hh',
         'Exceptions.hh',
+        'Geospatial.hh',
         'Int128.hh',
         'MemoryPool.hh',
         'OrcFile.hh',
diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
index e378429f1..09a0b148e 100644
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -171,6 +171,7 @@ set(SOURCE_FILES
   ConvertColumnReader.cc
   CpuInfoUtil.cc
   Exceptions.cc
+  Geospatial.cc
   Int128.cc
   LzoDecompressor.cc
   MemoryPool.cc
diff --git a/c++/src/ColumnPrinter.cc b/c++/src/ColumnPrinter.cc
index 8b16ecbd0..6535c612c 100644
--- a/c++/src/ColumnPrinter.cc
+++ b/c++/src/ColumnPrinter.cc
@@ -254,6 +254,8 @@ namespace orc {
           break;
 
         case BINARY:
+        case GEOMETRY:
+        case GEOGRAPHY:
           result = std::make_unique<BinaryColumnPrinter>(buffer, param);
           break;
 
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index 0fd17de1b..89ff0e024 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -1747,6 +1747,8 @@ namespace orc {
       case CHAR:
       case STRING:
       case VARCHAR:
+      case GEOMETRY:
+      case GEOGRAPHY:
         switch 
(static_cast<int64_t>(stripe.getEncoding(type.getColumnId()).kind())) {
           case proto::ColumnEncoding_Kind_DICTIONARY:
           case proto::ColumnEncoding_Kind_DICTIONARY_V2:
diff --git a/c++/src/ColumnWriter.cc b/c++/src/ColumnWriter.cc
index d31b1c65d..c99890b88 100644
--- a/c++/src/ColumnWriter.cc
+++ b/c++/src/ColumnWriter.cc
@@ -17,8 +17,11 @@
  */
 
 #include "orc/Int128.hh"
+#include "orc/Statistics.hh"
+#include "orc/Type.hh"
 #include "orc/Writer.hh"
 
+#include <memory>
 #include "ByteRLE.hh"
 #include "ColumnWriter.hh"
 #include "RLE.hh"
@@ -2871,6 +2874,65 @@ namespace orc {
     }
   }
 
+  class GeospatialColumnWriter : public BinaryColumnWriter {
+   public:
+    GeospatialColumnWriter(const Type& type, const StreamsFactory& factory,
+                           const WriterOptions& options)
+        : BinaryColumnWriter(type, factory, options),
+          isGeometry_(type.getKind() == TypeKind::GEOMETRY) {}
+
+    virtual void add(ColumnVectorBatch& rowBatch, uint64_t offset, uint64_t 
numValues,
+                     const char* incomingMask) override {
+      ColumnWriter::add(rowBatch, offset, numValues, incomingMask);
+
+      const StringVectorBatch* strBatch = dynamic_cast<const 
StringVectorBatch*>(&rowBatch);
+      if (strBatch == nullptr) {
+        throw InvalidArgument("Failed to cast to StringVectorBatch");
+      }
+      auto data = &strBatch->data[offset];
+      auto length = &strBatch->length[offset];
+      const char* notNull = strBatch->hasNulls ? strBatch->notNull.data() + 
offset : nullptr;
+
+      bool hasNull = false;
+      GeospatialColumnStatisticsImpl* geoStats = nullptr;
+      if (isGeometry_) {
+        geoStats = 
dynamic_cast<GeospatialColumnStatisticsImpl*>(colIndexStatistics.get());
+      }
+
+      uint64_t count = 0;
+      for (uint64_t i = 0; i < numValues; ++i) {
+        if (notNull == nullptr || notNull[i]) {
+          uint64_t len = static_cast<uint64_t>(length[i]);
+          directDataStream->write(data[i], len);
+
+          // update stats
+          if (geoStats) {
+            ++count;
+            geoStats->update(data[i], len);
+          }
+
+          if (enableBloomFilter) {
+            bloomFilter->addBytes(data[i], length[i]);
+          }
+        } else if (!hasNull) {
+          hasNull = true;
+          if (geoStats) {
+            geoStats->setHasNull(hasNull);
+          }
+        }
+      }
+
+      directLengthEncoder->add(length, numValues, notNull);
+
+      if (geoStats) {
+        geoStats->increase(count);
+      }
+    }
+
+   private:
+    bool isGeometry_;
+  };
+
   std::unique_ptr<ColumnWriter> buildWriter(const Type& type, const 
StreamsFactory& factory,
                                             const WriterOptions& options) {
     switch (static_cast<int64_t>(type.getKind())) {
@@ -2941,6 +3003,9 @@ namespace orc {
         return std::make_unique<MapColumnWriter>(type, factory, options);
       case UNION:
         return std::make_unique<UnionColumnWriter>(type, factory, options);
+      case GEOMETRY:
+      case GEOGRAPHY:
+        return std::make_unique<GeospatialColumnWriter>(type, factory, 
options);
       default:
         throw NotImplementedYet(
             "Type is not supported yet for creating "
diff --git a/c++/src/Geospatial.cc b/c++/src/Geospatial.cc
new file mode 100644
index 000000000..6d7d26870
--- /dev/null
+++ b/c++/src/Geospatial.cc
@@ -0,0 +1,307 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains code adapted from the Apache Arrow project.
+ *
+ * Original source:
+ * 
https://github.com/apache/arrow/blob/main/cpp/src/parquet/geospatial/statistics.cc
+ *
+ * The original code is licensed under the Apache License, Version 2.0.
+ *
+ * Modifications may have been made from the original source.
+ */
+
+#include "orc/Geospatial.hh"
+#include "orc/Exceptions.hh"
+
+#include "Geospatial.hh"
+
+#include <algorithm>
+#include <cstring>
+#include <optional>
+#include <sstream>
+
+namespace orc::geospatial {
+
+  template <typename T>
+  inline std::enable_if_t<std::is_trivially_copyable_v<T>, T> safeLoadAs(const 
uint8_t* unaligned) {
+    std::remove_const_t<T> ret;
+    std::memcpy(&ret, unaligned, sizeof(T));
+    return ret;
+  }
+
+  template <typename U, typename T>
+  inline std::enable_if_t<std::is_trivially_copyable_v<T> && 
std::is_trivially_copyable_v<U> &&
+                              sizeof(T) == sizeof(U),
+                          U>
+  safeCopy(T value) {
+    std::remove_const_t<U> ret;
+    std::memcpy(&ret, static_cast<const void*>(&value), sizeof(T));
+    return ret;
+  }
+
+  static bool isLittleEndian() {
+    static union {
+      uint32_t i;
+      char c[4];
+    } num = {0x01020304};
+    return num.c[0] == 4;
+  }
+
+#if defined(_MSC_VER)
+#include <intrin.h>  // IWYU pragma: keep
+#define ORC_BYTE_SWAP64 _byteSwap_uint64
+#define ORC_BYTE_SWAP32 _byteSwap_ulong
+#else
+#define ORC_BYTE_SWAP64 __builtin_bswap64
+#define ORC_BYTE_SWAP32 __builtin_bswap32
+#endif
+
+  // Swap the byte order (i.e. endianness)
+  static inline uint32_t byteSwap(uint32_t value) {
+    return static_cast<uint32_t>(ORC_BYTE_SWAP32(value));
+  }
+  static inline double byteSwap(double value) {
+    const uint64_t swapped = ORC_BYTE_SWAP64(safeCopy<uint64_t>(value));
+    return safeCopy<double>(swapped);
+  }
+
+  std::string BoundingBox::toString() const {
+    std::stringstream ss;
+    ss << "BoundingBox{xMin=" << min[0] << ", xMax=" << max[0] << ", yMin=" << 
min[1]
+       << ", yMax=" << max[1] << ", zMin=" << min[2] << ", zMax=" << max[2] << 
", mMin=" << min[3]
+       << ", mMax=" << max[3] << "}";
+    return ss.str();
+  }
+
+  /// \brief Object to keep track of the low-level consumption of a well-known 
binary
+  /// geometry
+  ///
+  /// Briefly, ISO well-known binary supported by the Parquet spec is an 
endian byte
+  /// (0x01 or 0x00), followed by geometry type + dimensions encoded as a 
(uint32_t),
+  /// followed by geometry-specific data. Coordinate sequences are represented 
by a
+  /// uint32_t (the number of coordinates) plus a sequence of doubles (number 
of coordinates
+  /// multiplied by the number of dimensions).
+  class WKBBuffer {
+   public:
+    WKBBuffer() : data_(nullptr), size_(0) {}
+    WKBBuffer(const uint8_t* data, int64_t size) : data_(data), size_(size) {}
+
+    uint8_t readUInt8() {
+      return readChecked<uint8_t>();
+    }
+
+    uint32_t readUInt32(bool swap) {
+      auto value = readChecked<uint32_t>();
+      return swap ? byteSwap(value) : value;
+    }
+
+    template <typename Coord, typename Visit>
+    void readCoords(uint32_t nCoords, bool swap, Visit&& visit) {
+      size_t total_bytes = nCoords * sizeof(Coord);
+      if (size_ < total_bytes) {
+      }
+
+      if (swap) {
+        Coord coord;
+        for (uint32_t i = 0; i < nCoords; i++) {
+          coord = readUnchecked<Coord>();
+          for (auto& c : coord) {
+            c = byteSwap(c);
+          }
+
+          std::forward<Visit>(visit)(coord);
+        }
+      } else {
+        for (uint32_t i = 0; i < nCoords; i++) {
+          std::forward<Visit>(visit)(readUnchecked<Coord>());
+        }
+      }
+    }
+
+    size_t size() const {
+      return size_;
+    }
+
+   private:
+    const uint8_t* data_;
+    size_t size_;
+
+    template <typename T>
+    T readChecked() {
+      if (size_ < sizeof(T)) {
+        std::stringstream ss;
+        ss << "Can't read" << sizeof(T) << " bytes from WKBBuffer with " << 
size_ << " remaining";
+        throw ParseError(ss.str());
+      }
+
+      return readUnchecked<T>();
+    }
+
+    template <typename T>
+    T readUnchecked() {
+      T out = safeLoadAs<T>(data_);
+      data_ += sizeof(T);
+      size_ -= sizeof(T);
+      return out;
+    }
+  };
+
+  using GeometryTypeAndDimensions = std::pair<GeometryType, Dimensions>;
+
+  namespace {
+
+    std::optional<GeometryTypeAndDimensions> parseGeometryType(uint32_t 
wkbGeometryType) {
+      // The number 1000 can be used because WKB geometry types are constructed
+      // on purpose such that this relationship is true (e.g., LINESTRING ZM 
maps
+      // to 3002).
+      uint32_t geometryTypeComponent = wkbGeometryType % 1000;
+      uint32_t dimensionsComponent = wkbGeometryType / 1000;
+
+      auto minGeometryTypeValue = 
static_cast<uint32_t>(GeometryType::VALUE_MIN);
+      auto maxGeometryTypeValue = 
static_cast<uint32_t>(GeometryType::VALUE_MAX);
+      auto minDimensionValue = static_cast<uint32_t>(Dimensions::VALUE_MIN);
+      auto maxDimensionValue = static_cast<uint32_t>(Dimensions::VALUE_MAX);
+
+      if (geometryTypeComponent < minGeometryTypeValue ||
+          geometryTypeComponent > maxGeometryTypeValue || dimensionsComponent 
< minDimensionValue ||
+          dimensionsComponent > maxDimensionValue) {
+        return std::nullopt;
+      }
+
+      return std::make_optional(
+          
GeometryTypeAndDimensions{static_cast<GeometryType>(geometryTypeComponent),
+                                    
static_cast<Dimensions>(dimensionsComponent)});
+    }
+
+  }  // namespace
+
+  std::vector<int32_t> WKBGeometryBounder::geometryTypes() const {
+    std::vector<int32_t> out(geospatialTypes_.begin(), geospatialTypes_.end());
+    std::sort(out.begin(), out.end());
+    return out;
+  }
+
+  void WKBGeometryBounder::mergeGeometry(std::string_view bytesWkb) {
+    if (!isValid_) {
+      return;
+    }
+    mergeGeometry(reinterpret_cast<const uint8_t*>(bytesWkb.data()), 
bytesWkb.size());
+  }
+
+  void WKBGeometryBounder::mergeGeometry(const uint8_t* bytesWkb, size_t 
bytesSize) {
+    if (!isValid_) {
+      return;
+    }
+    WKBBuffer src{bytesWkb, static_cast<int64_t>(bytesSize)};
+    try {
+      mergeGeometryInternal(&src, /*record_wkb_type=*/true);
+    } catch (const ParseError&) {
+      invalidate();
+      return;
+    }
+    if (src.size() != 0) {
+      // "Exepcted zero bytes after consuming WKB
+      invalidate();
+    }
+  }
+
+  void WKBGeometryBounder::mergeGeometryInternal(WKBBuffer* src, bool 
recordWkbType) {
+    uint8_t endian = src->readUInt8();
+    bool swap = endian != 0x00;
+    if (isLittleEndian()) {
+      swap = endian != 0x01;
+    }
+
+    uint32_t wkbGeometryType = src->readUInt32(swap);
+    auto geometryTypeAndDimensions = parseGeometryType(wkbGeometryType);
+    if (!geometryTypeAndDimensions.has_value()) {
+      invalidate();
+      return;
+    }
+    auto& [geometry_type, dimensions] = geometryTypeAndDimensions.value();
+
+    // Keep track of geometry types encountered if at the top level
+    if (recordWkbType) {
+      geospatialTypes_.insert(static_cast<int32_t>(wkbGeometryType));
+    }
+
+    switch (geometry_type) {
+      case GeometryType::POINT:
+        mergeSequence(src, dimensions, 1, swap);
+        break;
+
+      case GeometryType::LINESTRING: {
+        uint32_t nCoords = src->readUInt32(swap);
+        mergeSequence(src, dimensions, nCoords, swap);
+        break;
+      }
+      case GeometryType::POLYGON: {
+        uint32_t n_parts = src->readUInt32(swap);
+        for (uint32_t i = 0; i < n_parts; i++) {
+          uint32_t nCoords = src->readUInt32(swap);
+          mergeSequence(src, dimensions, nCoords, swap);
+        }
+        break;
+      }
+
+      // These are all encoded the same in WKB, even though this encoding would
+      // allow for parts to be of a different geometry type or different 
dimensions.
+      // For the purposes of bounding, this does not cause us problems. We pass
+      // record_wkb_type = false because we do not want the child geometry to 
be
+      // added to the geometry_types list (e.g., for a MultiPoint, we only want
+      // the code for MultiPoint to be added, not the code for Point).
+      case GeometryType::MULTIPOINT:
+      case GeometryType::MULTILINESTRING:
+      case GeometryType::MULTIPOLYGON:
+      case GeometryType::GEOMETRYCOLLECTION: {
+        uint32_t n_parts = src->readUInt32(swap);
+        for (uint32_t i = 0; i < n_parts; i++) {
+          mergeGeometryInternal(src, /*record_wkb_type*/ false);
+        }
+        break;
+      }
+    }
+  }
+
+  void WKBGeometryBounder::mergeSequence(WKBBuffer* src, Dimensions 
dimensions, uint32_t nCoords,
+                                         bool swap) {
+    switch (dimensions) {
+      case Dimensions::XY:
+        src->readCoords<BoundingBox::XY>(nCoords, swap,
+                                         [&](BoundingBox::XY coord) { 
box_.updateXY(coord); });
+        break;
+      case Dimensions::XYZ:
+        src->readCoords<BoundingBox::XYZ>(nCoords, swap,
+                                          [&](BoundingBox::XYZ coord) { 
box_.updateXYZ(coord); });
+        break;
+      case Dimensions::XYM:
+        src->readCoords<BoundingBox::XYM>(nCoords, swap,
+                                          [&](BoundingBox::XYM coord) { 
box_.updateXYM(coord); });
+        break;
+      case Dimensions::XYZM:
+        src->readCoords<BoundingBox::XYZM>(
+            nCoords, swap, [&](BoundingBox::XYZM coord) { 
box_.updateXYZM(coord); });
+        break;
+      default:
+        invalidate();
+    }
+  }
+
+}  // namespace orc::geospatial
diff --git a/c++/src/Geospatial.hh b/c++/src/Geospatial.hh
new file mode 100644
index 000000000..aebb72747
--- /dev/null
+++ b/c++/src/Geospatial.hh
@@ -0,0 +1,86 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ORC_GEOSPATIAL_IMPL_HH
+#define ORC_GEOSPATIAL_IMPL_HH
+
+#include "orc/Geospatial.hh"
+
+#include <unordered_set>
+#include <vector>
+
+namespace orc {
+  namespace geospatial {
+    class WKBBuffer;
+
+    class WKBGeometryBounder {
+     public:
+      void mergeGeometry(std::string_view bytesWkb);
+      void mergeGeometry(const uint8_t* bytesWkb, size_t bytesSize);
+
+      void mergeBox(const BoundingBox& box) {
+        box_.merge(box);
+      }
+      void mergeGeometryTypes(const std::vector<int>& geospatialTypes) {
+        geospatialTypes_.insert(geospatialTypes.begin(), 
geospatialTypes.end());
+      }
+      void merge(const WKBGeometryBounder& other) {
+        if (!isValid() || !other.isValid()) {
+          invalidate();
+          return;
+        }
+        box_.merge(other.box_);
+        geospatialTypes_.insert(other.geospatialTypes_.begin(), 
other.geospatialTypes_.end());
+      }
+
+      // Get the bounding box for the merged geometries.
+      const BoundingBox& bounds() const {
+        return box_;
+      }
+
+      // Get the set of geometry types encountered during merging.
+      // Returns a sorted vector of geometry type IDs.
+      std::vector<int32_t> geometryTypes() const;
+
+      void reset() {
+        isValid_ = true;
+        box_.reset();
+        geospatialTypes_.clear();
+      }
+      bool isValid() const {
+        return isValid_;
+      }
+      void invalidate() {
+        isValid_ = false;
+        box_.invalidate();
+        geospatialTypes_.clear();
+      }
+
+     private:
+      BoundingBox box_;
+      std::unordered_set<int32_t> geospatialTypes_;
+      bool isValid_ = true;
+
+      void mergeGeometryInternal(WKBBuffer* src, bool recordWkbType);
+      void mergeSequence(WKBBuffer* src, Dimensions dimensions, uint32_t 
nCoords, bool swap);
+    };
+  }  // namespace geospatial
+}  // namespace orc
+
+#endif
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 17bf83520..349ae1b40 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -873,6 +873,8 @@ namespace orc {
       case proto::Type_Kind_CHAR:
       case proto::Type_Kind_STRING:
       case proto::Type_Kind_VARCHAR:
+      case proto::Type_Kind_GEOMETRY:
+      case proto::Type_Kind_GEOGRAPHY:
         return 4;
       default:
         return 0;
diff --git a/c++/src/SchemaEvolution.cc b/c++/src/SchemaEvolution.cc
index 7cf3b5c51..442c43c22 100644
--- a/c++/src/SchemaEvolution.cc
+++ b/c++/src/SchemaEvolution.cc
@@ -18,6 +18,7 @@
 
 #include "SchemaEvolution.hh"
 #include "orc/Exceptions.hh"
+#include "orc/Type.hh"
 
 namespace orc {
 
@@ -113,7 +114,9 @@ namespace orc {
         case TIMESTAMP:
         case TIMESTAMP_INSTANT:
         case DATE:
-        case BINARY: {
+        case BINARY:
+        case GEOMETRY:
+        case GEOGRAPHY: {
           // Not support
           break;
         }
@@ -235,6 +238,8 @@ namespace orc {
         case FLOAT:
         case DOUBLE:
         case BINARY:
+        case GEOMETRY:
+        case GEOGRAPHY:
         case TIMESTAMP:
         case LIST:
         case MAP:
diff --git a/c++/src/Statistics.cc b/c++/src/Statistics.cc
index c1a23cad1..a86247f10 100644
--- a/c++/src/Statistics.cc
+++ b/c++/src/Statistics.cc
@@ -44,6 +44,8 @@ namespace orc {
       return new DateColumnStatisticsImpl(s, statContext);
     } else if (s.has_binary_statistics()) {
       return new BinaryColumnStatisticsImpl(s, statContext);
+    } else if (s.has_geospatial_statistics()) {
+      return new GeospatialColumnStatisticsImpl(s);
     } else {
       return new ColumnStatisticsImpl(s);
     }
@@ -148,6 +150,10 @@ namespace orc {
     // PASS
   }
 
+  GeospatialColumnStatistics::~GeospatialColumnStatistics() {
+    // PASS
+  }
+
   ColumnStatisticsImpl::~ColumnStatisticsImpl() {
     // PASS
   }
@@ -188,6 +194,10 @@ namespace orc {
     // PASS
   }
 
+  GeospatialColumnStatisticsImpl::~GeospatialColumnStatisticsImpl() {
+    // PASS
+  }
+
   ColumnStatisticsImpl::ColumnStatisticsImpl(const proto::ColumnStatistics& 
pb) {
     stats_.setNumberOfValues(pb.number_of_values());
     stats_.setHasNull(pb.has_has_null() ? pb.has_null() : true);
@@ -391,6 +401,40 @@ namespace orc {
     }
   }
 
+  GeospatialColumnStatisticsImpl::GeospatialColumnStatisticsImpl(
+      const proto::ColumnStatistics& pb) {
+    reset();
+    if (!pb.has_geospatial_statistics()) {
+      bounder_.invalidate();
+    } else {
+      const proto::GeospatialStatistics& stats = pb.geospatial_statistics();
+      geospatial::BoundingBox::XYZM min;
+      geospatial::BoundingBox::XYZM max;
+      for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
+        min[i] = max[i] = std::numeric_limits<double>::quiet_NaN();
+      }
+      if (stats.has_bbox()) {
+        const auto& protoBBox = stats.bbox();
+        min[0] = protoBBox.xmin();
+        min[1] = protoBBox.ymin();
+        max[0] = protoBBox.xmax();
+        max[1] = protoBBox.ymax();
+        if (protoBBox.has_zmin() && protoBBox.has_zmax()) {
+          min[2] = protoBBox.zmin();
+          max[2] = protoBBox.zmax();
+        }
+        if (protoBBox.has_mmin() && protoBBox.has_mmax()) {
+          min[3] = protoBBox.mmin();
+          max[3] = protoBBox.mmax();
+        }
+      }
+      bounder_.mergeBox(geospatial::BoundingBox(min, max));
+      std::vector<int32_t> types = {stats.geospatial_types().begin(),
+                                    stats.geospatial_types().end()};
+      bounder_.mergeGeometryTypes(types);
+    }
+  }
+
   std::unique_ptr<MutableColumnStatistics> createColumnStatistics(const Type& 
type) {
     switch (static_cast<int64_t>(type.getKind())) {
       case BOOLEAN:
@@ -422,6 +466,9 @@ namespace orc {
         return std::make_unique<TimestampColumnStatisticsImpl>();
       case DECIMAL:
         return std::make_unique<DecimalColumnStatisticsImpl>();
+      case GEOGRAPHY:
+      case GEOMETRY:
+        return std::make_unique<GeospatialColumnStatisticsImpl>();
       default:
         throw NotImplementedYet("Not supported type: " + type.toString());
     }
diff --git a/c++/src/Statistics.hh b/c++/src/Statistics.hh
index b7ed5d1e5..94b1e5d2b 100644
--- a/c++/src/Statistics.hh
+++ b/c++/src/Statistics.hh
@@ -24,6 +24,7 @@
 #include "orc/OrcFile.hh"
 #include "orc/Reader.hh"
 
+#include "Geospatial.hh"
 #include "Timezone.hh"
 #include "TypeImpl.hh"
 
@@ -1683,6 +1684,127 @@ namespace orc {
     }
   };
 
+  class GeospatialColumnStatisticsImpl : public GeospatialColumnStatistics,
+                                         public MutableColumnStatistics {
+   private:
+    geospatial::WKBGeometryBounder bounder_;
+    InternalCharStatistics stats_;
+
+   public:
+    GeospatialColumnStatisticsImpl() {
+      reset();
+    }
+    explicit GeospatialColumnStatisticsImpl(const proto::ColumnStatistics& 
stats);
+    virtual ~GeospatialColumnStatisticsImpl();
+
+    uint64_t getNumberOfValues() const override {
+      return stats_.getNumberOfValues();
+    }
+
+    void setNumberOfValues(uint64_t value) override {
+      stats_.setNumberOfValues(value);
+    }
+
+    void increase(uint64_t count) override {
+      stats_.setNumberOfValues(stats_.getNumberOfValues() + count);
+    }
+
+    bool hasNull() const override {
+      return stats_.hasNull();
+    }
+
+    void setHasNull(bool hasNull) override {
+      stats_.setHasNull(hasNull);
+    }
+
+    void merge(const MutableColumnStatistics& other) override {
+      const GeospatialColumnStatisticsImpl& geoStats =
+          dynamic_cast<const GeospatialColumnStatisticsImpl&>(other);
+      stats_.merge(geoStats.stats_);
+      bounder_.merge(geoStats.bounder_);
+    }
+
+    void reset() override {
+      stats_.reset();
+      bounder_.reset();
+    }
+
+    void update(const char* value, size_t length) override {
+      bounder_.mergeGeometry(std::string_view(value, length));
+    }
+
+    void toProtoBuf(proto::ColumnStatistics& pbStats) const override {
+      pbStats.set_has_null(stats_.hasNull());
+      pbStats.set_number_of_values(stats_.getNumberOfValues());
+
+      proto::GeospatialStatistics* geoStats = 
pbStats.mutable_geospatial_statistics();
+      const auto& bbox = bounder_.bounds();
+      if (bbox.boundValid(0) && bbox.boundValid(1) && !bbox.boundEmpty(0) && 
!bbox.boundEmpty(1)) {
+        geoStats->mutable_bbox()->set_xmin(bbox.min[0]);
+        geoStats->mutable_bbox()->set_xmax(bbox.max[0]);
+        geoStats->mutable_bbox()->set_ymin(bbox.min[1]);
+        geoStats->mutable_bbox()->set_ymax(bbox.max[1]);
+        if (bbox.boundValid(2) && !bbox.boundEmpty(2)) {
+          geoStats->mutable_bbox()->set_zmin(bbox.min[2]);
+          geoStats->mutable_bbox()->set_zmax(bbox.max[2]);
+        }
+        if (bbox.boundValid(3) && !bbox.boundEmpty(3)) {
+          geoStats->mutable_bbox()->set_mmin(bbox.min[3]);
+          geoStats->mutable_bbox()->set_mmax(bbox.max[3]);
+        }
+      }
+      for (auto type : bounder_.geometryTypes()) {
+        geoStats->add_geospatial_types(type);
+      }
+    }
+
+    std::string toString() const override {
+      if (!bounder_.isValid()) {
+        return "<GeoStatistics> invalid";
+      }
+
+      std::stringstream ss;
+      ss << "<GeoStatistics>";
+
+      std::string dim_label("xyzm");
+      const auto& bbox = bounder_.bounds();
+      auto dim_valid = bbox.dimensionValid();
+      auto dim_empty = bbox.dimensionEmpty();
+      auto lower = bbox.lowerBound();
+      auto upper = bbox.upperBound();
+
+      for (int i = 0; i < 4; i++) {
+        ss << " " << dim_label[i] << ": ";
+        if (!dim_valid[i]) {
+          ss << "invalid";
+        } else if (dim_empty[i]) {
+          ss << "empty";
+        } else {
+          ss << "[" << lower[i] << ", " << upper[i] << "]";
+        }
+      }
+
+      std::vector<int32_t> maybe_geometry_types = bounder_.geometryTypes();
+      ss << " geometry_types: [";
+      std::string sep("");
+      for (int32_t geometry_type : maybe_geometry_types) {
+        ss << sep << geometry_type;
+        sep = ", ";
+      }
+      ss << "]";
+
+      return ss.str();
+    }
+
+    const geospatial::BoundingBox& getBoundingBox() const override {
+      return bounder_.bounds();
+    }
+
+    std::vector<int32_t> getGeospatialTypes() const override {
+      return bounder_.geometryTypes();
+    }
+  };
+
   ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s,
                                             const StatContext& statContext);
 
diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc
index cbc7b8279..18c4985ab 100644
--- a/c++/src/TypeImpl.cc
+++ b/c++/src/TypeImpl.cc
@@ -19,8 +19,10 @@
 #include "TypeImpl.hh"
 #include "Adaptor.hh"
 #include "orc/Exceptions.hh"
+#include "orc/Type.hh"
 
 #include <iostream>
+#include <memory>
 #include <sstream>
 
 namespace orc {
@@ -62,6 +64,33 @@ namespace orc {
     subtypeCount_ = 0;
   }
 
+  TypeImpl::TypeImpl(TypeKind kind, const std::string& crs) {
+    parent_ = nullptr;
+    columnId_ = -1;
+    maximumColumnId_ = -1;
+    kind_ = kind;
+    maxLength_ = 0;
+    precision_ = 0;
+    scale_ = 0;
+    subtypeCount_ = 0;
+    crs_ = crs;
+    edgeInterpolationAlgorithm_ = 
geospatial::EdgeInterpolationAlgorithm::SPHERICAL;
+  }
+
+  TypeImpl::TypeImpl(TypeKind kind, const std::string& crs,
+                     geospatial::EdgeInterpolationAlgorithm algo) {
+    parent_ = nullptr;
+    columnId_ = -1;
+    maximumColumnId_ = -1;
+    kind_ = kind;
+    maxLength_ = 0;
+    precision_ = 0;
+    scale_ = 0;
+    subtypeCount_ = 0;
+    crs_ = crs;
+    edgeInterpolationAlgorithm_ = algo;
+  }
+
   uint64_t TypeImpl::assignIds(uint64_t root) const {
     columnId_ = static_cast<int64_t>(root);
     uint64_t current = root + 1;
@@ -120,6 +149,14 @@ namespace orc {
     return scale_;
   }
 
+  const std::string& TypeImpl::getCrs() const {
+    return crs_;
+  }
+
+  geospatial::EdgeInterpolationAlgorithm TypeImpl::getAlgorithm() const {
+    return edgeInterpolationAlgorithm_;
+  }
+
   Type& TypeImpl::setAttribute(const std::string& key, const std::string& 
value) {
     attributes_[key] = value;
     return *this;
@@ -189,6 +226,45 @@ namespace orc {
     return true;
   }
 
+  namespace geospatial {
+    std::string AlgoToString(EdgeInterpolationAlgorithm algo) {
+      switch (algo) {
+        case EdgeInterpolationAlgorithm::SPHERICAL:
+          return "speherial";
+        case VINCENTY:
+          return "vincenty";
+        case THOMAS:
+          return "thomas";
+        case ANDOYER:
+          return "andoyer";
+        case KARNEY:
+          return "karney";
+        default:
+          throw InvalidArgument("Unknown algo");
+      }
+    }
+
+    EdgeInterpolationAlgorithm AlgoFromString(const std::string& algo) {
+      if (algo == "speherial") {
+        return EdgeInterpolationAlgorithm::SPHERICAL;
+      }
+      if (algo == "vincenty") {
+        return VINCENTY;
+      }
+      if (algo == "thomas") {
+        return THOMAS;
+      }
+      if (algo == "andoyer") {
+        return ANDOYER;
+      }
+      if (algo == "karney") {
+        return KARNEY;
+      }
+      throw InvalidArgument("Unknown algo: " + algo);
+    }
+
+  }  // namespace geospatial
+
   std::string TypeImpl::toString() const {
     switch (static_cast<int64_t>(kind_)) {
       case BOOLEAN:
@@ -271,6 +347,17 @@ namespace orc {
         result << "char(" << maxLength_ << ")";
         return result.str();
       }
+      case GEOMETRY: {
+        std::stringstream result;
+        result << "geometry(" << crs_ << ")";
+        return result.str();
+      }
+      case GEOGRAPHY: {
+        std::stringstream result;
+        result << "geography(" << crs_ << ","
+               << geospatial::AlgoToString(edgeInterpolationAlgorithm_) << ")";
+        return result.str();
+      }
       default:
         throw NotImplementedYet("Unknown type");
     }
@@ -322,6 +409,8 @@ namespace orc {
       case BINARY:
       case CHAR:
       case VARCHAR:
+      case GEOMETRY:
+      case GEOGRAPHY:
         return encoded ? std::make_unique<EncodedStringVectorBatch>(capacity, 
memoryPool)
                        : std::make_unique<StringVectorBatch>(capacity, 
memoryPool);
 
@@ -419,6 +508,15 @@ namespace orc {
     return std::make_unique<TypeImpl>(UNION);
   }
 
+  std::unique_ptr<Type> createGeometryType(const std::string& crs) {
+    return std::make_unique<TypeImpl>(GEOMETRY, crs);
+  }
+
+  std::unique_ptr<Type> createGeographyType(const std::string& crs,
+                                            
geospatial::EdgeInterpolationAlgorithm algo) {
+    return std::make_unique<TypeImpl>(GEOGRAPHY, crs, algo);
+  }
+
   std::string printProtobufMessage(const google::protobuf::Message& message);
   std::unique_ptr<Type> convertType(const proto::Type& type, const 
proto::Footer& footer) {
     std::unique_ptr<Type> ret;
@@ -443,6 +541,16 @@ namespace orc {
         ret = std::make_unique<TypeImpl>(static_cast<TypeKind>(type.kind()), 
type.maximum_length());
         break;
 
+      case proto::Type_Kind_GEOMETRY:
+        ret = std::make_unique<TypeImpl>(static_cast<TypeKind>(type.kind()), 
type.crs());
+        break;
+
+      case proto::Type_Kind_GEOGRAPHY:
+        ret = std::make_unique<TypeImpl>(
+            static_cast<TypeKind>(type.kind()), type.crs(),
+            
static_cast<geospatial::EdgeInterpolationAlgorithm>(type.algorithm()));
+        break;
+
       case proto::Type_Kind_DECIMAL:
         ret = std::make_unique<TypeImpl>(DECIMAL, type.precision(), 
type.scale());
         break;
@@ -523,6 +631,13 @@ namespace orc {
       case CHAR:
         result = std::make_unique<TypeImpl>(fileType->getKind(), 
fileType->getMaximumLength());
         break;
+      case GEOMETRY:
+        result = std::make_unique<TypeImpl>(fileType->getKind(), 
fileType->getCrs());
+        break;
+      case GEOGRAPHY:
+        result = std::make_unique<TypeImpl>(fileType->getKind(), 
fileType->getCrs(),
+                                            fileType->getAlgorithm());
+        break;
 
       case LIST:
         result = std::make_unique<TypeImpl>(fileType->getKind());
@@ -710,6 +825,22 @@ namespace orc {
     return std::make_unique<TypeImpl>(DECIMAL, precision, scale);
   }
 
+  std::unique_ptr<Type> TypeImpl::parseGeographyType(const std::string& input, 
size_t start,
+                                                     size_t end) {
+    if (input[start] != '(') {
+      throw std::logic_error("Missing ( after geography.");
+    }
+    size_t pos = start + 1;
+    size_t sep = input.find(',', pos);
+    if (sep + 1 >= end || sep == std::string::npos) {
+      throw std::logic_error("Geography type must specify CRS.");
+    }
+    std::string crs = input.substr(pos, sep - pos);
+    std::string algoStr = input.substr(sep + 1, end - sep - 1);
+    geospatial::EdgeInterpolationAlgorithm algo = 
geospatial::AlgoFromString(algoStr);
+    return std::make_unique<TypeImpl>(GEOGRAPHY, crs, algo);
+  }
+
   void validatePrimitiveType(std::string category, const std::string& input, 
const size_t pos) {
     if (input[pos] == '<' || input[pos] == '(') {
       std::ostringstream oss;
@@ -780,6 +911,14 @@ namespace orc {
       uint64_t maxLength =
           static_cast<uint64_t>(atoi(input.substr(start + 1, end - start + 
1).c_str()));
       return std::make_unique<TypeImpl>(CHAR, maxLength);
+    } else if (category == "geometry") {
+      if (input[start] != '(') {
+        throw std::logic_error("Missing ( after geometry.");
+      }
+      std::string crs = input.substr(start + 1, end - start + 1);
+      return std::make_unique<TypeImpl>(GEOMETRY, crs);
+    } else if (category == "geography") {
+      return parseGeographyType(input, start, end);
     } else {
       throw std::logic_error("Unknown type " + category);
     }
diff --git a/c++/src/TypeImpl.hh b/c++/src/TypeImpl.hh
index 647d5a5d2..2db175aba 100644
--- a/c++/src/TypeImpl.hh
+++ b/c++/src/TypeImpl.hh
@@ -24,6 +24,7 @@
 #include "Adaptor.hh"
 #include "wrap/orc-proto-wrapper.hh"
 
+#include <memory>
 #include <vector>
 
 namespace orc {
@@ -41,6 +42,9 @@ namespace orc {
     uint64_t precision_;
     uint64_t scale_;
     std::map<std::string, std::string> attributes_;
+    std::string crs_;
+    geospatial::EdgeInterpolationAlgorithm edgeInterpolationAlgorithm_ =
+        geospatial::EdgeInterpolationAlgorithm::SPHERICAL;
 
    public:
     /**
@@ -58,6 +62,16 @@ namespace orc {
      */
     TypeImpl(TypeKind kind, uint64_t precision, uint64_t scale);
 
+    /**
+     * Create geometry type.
+     */
+    TypeImpl(TypeKind kind, const std::string& crs);
+
+    /**
+     * Create geography type.
+     */
+    TypeImpl(TypeKind kind, const std::string& crs, 
geospatial::EdgeInterpolationAlgorithm algo);
+
     uint64_t getColumnId() const override;
 
     uint64_t getMaximumColumnId() const override;
@@ -76,6 +90,10 @@ namespace orc {
 
     uint64_t getScale() const override;
 
+    const std::string& getCrs() const override;
+
+    geospatial::EdgeInterpolationAlgorithm getAlgorithm() const override;
+
     Type& setAttribute(const std::string& key, const std::string& value) 
override;
 
     bool hasAttributeKey(const std::string& key) const override;
@@ -176,6 +194,14 @@ namespace orc {
     static std::unique_ptr<Type> parseDecimalType(const std::string& input, 
size_t start,
                                                   size_t end);
 
+    /**
+     * Parse geography type from string
+     * @param input the input string of a decimal type
+     * @param start start position of the input string
+     * @param end end position of the input string
+     */
+    static std::unique_ptr<Type> parseGeographyType(const std::string& input, 
size_t start,
+                                                    size_t end);
     /**
      * Parse type for a category
      * @param category type name
diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc
index 775e6d245..c235169cc 100644
--- a/c++/src/Writer.cc
+++ b/c++/src/Writer.cc
@@ -24,6 +24,7 @@
 #include "Utils.hh"
 
 #include <memory>
+#include <stdexcept>
 
 namespace orc {
 
@@ -702,6 +703,40 @@ namespace orc {
         protoType.set_kind(proto::Type_Kind_CHAR);
         break;
       }
+      case GEOMETRY: {
+        protoType.set_kind(proto::Type_Kind_GEOMETRY);
+        protoType.set_crs(t.getCrs());
+        break;
+      }
+      case GEOGRAPHY: {
+        protoType.set_kind(proto::Type_Kind_GEOGRAPHY);
+        protoType.set_crs(t.getCrs());
+        switch (t.getAlgorithm()) {
+          case geospatial::EdgeInterpolationAlgorithm::SPHERICAL: {
+            
protoType.set_algorithm(proto::Type_EdgeInterpolationAlgorithm_SPHERICAL);
+            break;
+          }
+          case orc::geospatial::EdgeInterpolationAlgorithm::VINCENTY: {
+            
protoType.set_algorithm(proto::Type_EdgeInterpolationAlgorithm_VINCENTY);
+            break;
+          }
+          case orc::geospatial::EdgeInterpolationAlgorithm::THOMAS: {
+            
protoType.set_algorithm(proto::Type_EdgeInterpolationAlgorithm_VINCENTY);
+            break;
+          }
+          case orc::geospatial::EdgeInterpolationAlgorithm::ANDOYER: {
+            
protoType.set_algorithm(proto::Type_EdgeInterpolationAlgorithm_ANDOYER);
+            break;
+          }
+          case orc::geospatial::EdgeInterpolationAlgorithm::KARNEY: {
+            
protoType.set_algorithm(proto::Type_EdgeInterpolationAlgorithm_KARNEY);
+            break;
+          }
+          default:
+            throw std::invalid_argument("Unknown Algorithm.");
+        }
+        break;
+      }
       default:
         throw std::logic_error("Unknown type.");
     }
diff --git a/c++/src/meson.build b/c++/src/meson.build
index 3d77d3242..0794dec84 100644
--- a/c++/src/meson.build
+++ b/c++/src/meson.build
@@ -151,6 +151,7 @@ source_files += files(
     'ConvertColumnReader.cc',
     'CpuInfoUtil.cc',
     'Exceptions.cc',
+    'Geospatial.cc',
     'Int128.cc',
     'LzoDecompressor.cc',
     'MemoryPool.cc',
diff --git a/c++/test/CMakeLists.txt b/c++/test/CMakeLists.txt
index f7328abb3..3261fedde 100644
--- a/c++/test/CMakeLists.txt
+++ b/c++/test/CMakeLists.txt
@@ -56,12 +56,14 @@ add_executable (orc-test
   TestRleEncoder.cc
   TestRLEV2Util.cc
   TestSargsApplier.cc
+  TestStatistics.cc
   TestSearchArgument.cc
   TestSchemaEvolution.cc
   TestStripeIndexStatistics.cc
   TestTimestampStatistics.cc
   TestTimezone.cc
   TestType.cc
+  TestUtil.cc
   TestWriter.cc
   TestCache.cc
   ${SIMD_TEST_SRCS}
diff --git a/c++/test/TestColumnStatistics.cc b/c++/test/TestColumnStatistics.cc
index 5cf2d9e41..642a8019d 100644
--- a/c++/test/TestColumnStatistics.cc
+++ b/c++/test/TestColumnStatistics.cc
@@ -17,6 +17,7 @@
  */
 
 #include "Statistics.hh"
+#include "TestUtil.hh"
 #include "orc/OrcFile.hh"
 #include "wrap/gmock.h"
 #include "wrap/gtest-wrapper.h"
@@ -531,4 +532,345 @@ namespace orc {
     collectionStats->merge(*other);
     EXPECT_FALSE(collectionStats->hasTotalChildren());
   }
+
+  TEST(ColumnStatistics, TestGeospatialDefaults) {
+    std::unique_ptr<GeospatialColumnStatisticsImpl> geoStats(new 
GeospatialColumnStatisticsImpl());
+    EXPECT_TRUE(geoStats->getGeospatialTypes().empty());
+    auto bbox = geoStats->getBoundingBox();
+    for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
+      EXPECT_TRUE(bbox.boundEmpty(i));
+      EXPECT_TRUE(bbox.boundValid(i));
+    }
+    EXPECT_EQ("<GeoStatistics> x: empty y: empty z: empty m: empty 
geometry_types: []",
+              geoStats->toString());
+  }
+
+  TEST(ColumnStatistics, TestGeospatialUpdate) {
+    std::unique_ptr<GeospatialColumnStatisticsImpl> geoStats(new 
GeospatialColumnStatisticsImpl());
+    EXPECT_TRUE(geoStats->getGeospatialTypes().empty());
+    const auto& bbox = geoStats->getBoundingBox();
+    for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
+      EXPECT_TRUE(bbox.boundEmpty(i));
+      EXPECT_TRUE(bbox.boundValid(i));
+    }
+    EXPECT_EQ(geoStats->getGeospatialTypes().size(), 0);
+
+    geospatial::BoundingBox::XYZM expectedMin;
+    geospatial::BoundingBox::XYZM expectedMax;
+    std::array<bool, geospatial::MAX_DIMENSIONS> expectedEmpty;
+    std::array<bool, geospatial::MAX_DIMENSIONS> expectedValid;
+    std::vector<int32_t> expectedTypes;
+    for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
+      expectedMin[i] = geospatial::INF;
+      expectedMax[i] = -geospatial::INF;
+      expectedEmpty[i] = true;
+      expectedValid[i] = true;
+    }
+
+    auto Verify = [&]() {
+      EXPECT_EQ(expectedEmpty, geoStats->getBoundingBox().dimensionEmpty());
+      EXPECT_EQ(expectedValid, geoStats->getBoundingBox().dimensionValid());
+      EXPECT_EQ(expectedTypes, geoStats->getGeospatialTypes());
+      for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
+        if (geoStats->getBoundingBox().boundValid(i)) {
+          EXPECT_EQ(expectedMin[i], 
geoStats->getBoundingBox().lowerBound()[i]);
+          EXPECT_EQ(expectedMax[i], 
geoStats->getBoundingBox().upperBound()[i]);
+        } else {
+          EXPECT_TRUE(std::isnan(geoStats->getBoundingBox().lowerBound()[i]));
+          EXPECT_TRUE(std::isnan(geoStats->getBoundingBox().upperBound()[i]));
+        }
+      }
+    };
+
+    // Update a xy point
+    std::string xy0 = MakeWKBPoint({10, 11}, false, false);
+    geoStats->update(xy0.c_str(), xy0.size());
+    expectedMin[0] = expectedMax[0] = 10;
+    expectedMin[1] = expectedMax[1] = 11;
+    expectedEmpty[0] = expectedEmpty[1] = false;
+    expectedTypes.push_back(1);
+    Verify();
+
+    // Update a xyz point.
+    std::string xyz0 = MakeWKBPoint({11, 12, 13}, true, false);
+    geoStats->update(xyz0.c_str(), xyz0.size());
+    expectedMax[0] = 11;
+    expectedMax[1] = 12;
+    expectedMin[2] = expectedMax[2] = 13;
+    expectedEmpty[2] = false;
+    expectedTypes.push_back(1001);
+    Verify();
+
+    // Update a xym point.
+    std::string xym0 = MakeWKBPoint({9, 10, 0, 11}, false, true);
+    geoStats->update(xym0.c_str(), xym0.size());
+    expectedMin[0] = 9;
+    expectedMin[1] = 10;
+    expectedMin[3] = expectedMax[3] = 11;
+    expectedEmpty[3] = false;
+    expectedTypes.push_back(2001);
+    Verify();
+
+    // Update a xymz point.
+    std::string xymz0 = MakeWKBPoint({8, 9, 10, 12}, true, true);
+    geoStats->update(xymz0.c_str(), xymz0.size());
+    expectedMin[0] = 8;
+    expectedMin[1] = 9;
+    expectedMin[2] = 10;
+    expectedMax[3] = 12;
+    expectedTypes.push_back(3001);
+    Verify();
+
+    // Update NaN to every dimension.
+    std::string xyzm1 = MakeWKBPoint(
+        {std::numeric_limits<double>::quiet_NaN(), 
std::numeric_limits<double>::quiet_NaN(),
+         std::numeric_limits<double>::quiet_NaN(), 
std::numeric_limits<double>::quiet_NaN()},
+        true, false);
+    geoStats->update(xyzm1.c_str(), xyzm1.size());
+    Verify();
+
+    // Update a invalid WKB
+    std::string invalidWKB;
+    geoStats->update(invalidWKB.c_str(), invalidWKB.size());
+    expectedValid[0] = expectedValid[1] = expectedValid[2] = expectedValid[3] 
= false;
+    expectedTypes.clear();
+    Verify();
+
+    // Update a xy point again
+    std::string xy1 = MakeWKBPoint({10, 11}, false, false);
+    geoStats->update(xy1.c_str(), xy1.size());
+    Verify();
+  }
+
+  TEST(ColumnStatistics, TestGeospatialToProto) {
+    // Test Empty
+    std::unique_ptr<GeospatialColumnStatisticsImpl> geoStats(new 
GeospatialColumnStatisticsImpl());
+    proto::ColumnStatistics pbStats;
+    geoStats->toProtoBuf(pbStats);
+    EXPECT_TRUE(pbStats.has_geospatial_statistics());
+    EXPECT_EQ(0, pbStats.geospatial_statistics().geospatial_types().size());
+    EXPECT_FALSE(pbStats.geospatial_statistics().has_bbox());
+
+    // Update a xy point
+    std::string xy = MakeWKBPoint({10, 11}, false, false);
+    geoStats->update(xy.c_str(), xy.size());
+    pbStats.Clear();
+    geoStats->toProtoBuf(pbStats);
+    EXPECT_TRUE(pbStats.has_geospatial_statistics());
+    EXPECT_EQ(1, pbStats.geospatial_statistics().geospatial_types().size());
+    EXPECT_EQ(1, pbStats.geospatial_statistics().geospatial_types(0));
+    EXPECT_TRUE(pbStats.geospatial_statistics().has_bbox());
+    const auto& bbox0 = pbStats.geospatial_statistics().bbox();
+    EXPECT_TRUE(bbox0.has_xmin());
+    EXPECT_TRUE(bbox0.has_xmax());
+    EXPECT_TRUE(bbox0.has_ymin());
+    EXPECT_TRUE(bbox0.has_ymax());
+    EXPECT_FALSE(bbox0.has_zmin());
+    EXPECT_FALSE(bbox0.has_zmax());
+    EXPECT_FALSE(bbox0.has_mmin());
+    EXPECT_FALSE(bbox0.has_mmax());
+    EXPECT_EQ(10, bbox0.xmin());
+    EXPECT_EQ(10, bbox0.xmax());
+    EXPECT_EQ(11, bbox0.ymin());
+    EXPECT_EQ(11, bbox0.ymax());
+
+    // Update a xyzm point.
+    std::string xyzm = MakeWKBPoint({-10, -11, -12, -13}, true, true);
+    geoStats->update(xyzm.c_str(), xyzm.size());
+    pbStats.Clear();
+    geoStats->toProtoBuf(pbStats);
+    EXPECT_TRUE(pbStats.has_geospatial_statistics());
+    EXPECT_EQ(2, pbStats.geospatial_statistics().geospatial_types().size());
+    EXPECT_EQ(1, pbStats.geospatial_statistics().geospatial_types(0));
+    EXPECT_EQ(3001, pbStats.geospatial_statistics().geospatial_types(1));
+    EXPECT_TRUE(pbStats.geospatial_statistics().has_bbox());
+    const auto& bbox1 = pbStats.geospatial_statistics().bbox();
+    EXPECT_TRUE(bbox1.has_xmin());
+    EXPECT_TRUE(bbox1.has_xmax());
+    EXPECT_TRUE(bbox1.has_ymin());
+    EXPECT_TRUE(bbox1.has_ymax());
+    EXPECT_TRUE(bbox1.has_zmin());
+    EXPECT_TRUE(bbox1.has_zmax());
+    EXPECT_TRUE(bbox1.has_mmin());
+    EXPECT_TRUE(bbox1.has_mmax());
+    EXPECT_EQ(-10, bbox1.xmin());
+    EXPECT_EQ(10, bbox1.xmax());
+    EXPECT_EQ(-11, bbox1.ymin());
+    EXPECT_EQ(11, bbox1.ymax());
+    EXPECT_EQ(-12, bbox1.zmin());
+    EXPECT_EQ(-12, bbox1.zmax());
+    EXPECT_EQ(-13, bbox1.mmin());
+    EXPECT_EQ(-13, bbox1.mmax());
+
+    // Update a invalid point
+    std::string invalidWKB;
+    geoStats->update(invalidWKB.c_str(), invalidWKB.size());
+    pbStats.Clear();
+    geoStats->toProtoBuf(pbStats);
+    EXPECT_TRUE(pbStats.has_geospatial_statistics());
+    EXPECT_EQ(0, pbStats.geospatial_statistics().geospatial_types().size());
+    EXPECT_FALSE(pbStats.geospatial_statistics().has_bbox());
+  }
+
+  TEST(ColumnStatistics, TestGeospatialMerge) {
+    std::unique_ptr<GeospatialColumnStatisticsImpl> invalidStats(
+        new GeospatialColumnStatisticsImpl());
+    invalidStats->update("0", 0);
+
+    std::unique_ptr<GeospatialColumnStatisticsImpl> emptyStats(
+        new GeospatialColumnStatisticsImpl());
+
+    std::unique_ptr<GeospatialColumnStatisticsImpl> xyStats(new 
GeospatialColumnStatisticsImpl());
+    std::string xy = MakeWKBPoint({10, 11}, false, false);
+    xyStats->update(xy.c_str(), xy.size());
+
+    std::unique_ptr<GeospatialColumnStatisticsImpl> xyzStats(new 
GeospatialColumnStatisticsImpl());
+    std::string xyz = MakeWKBPoint({12, 13, 14}, true, false);
+    xyzStats->update(xyz.c_str(), xyz.size());
+
+    std::unique_ptr<GeospatialColumnStatisticsImpl> xyzmStats(new 
GeospatialColumnStatisticsImpl());
+    std::string xyzm = MakeWKBPoint({-10, -11, -12, -13}, true, true);
+    xyzmStats->update(xyzm.c_str(), xyzm.size());
+
+    // invalid merge invalid
+    invalidStats->merge(*invalidStats);
+    std::array<bool, 4> expectedValid = {false, false, false, false};
+    EXPECT_EQ(invalidStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(invalidStats->getGeospatialTypes().size(), 0);
+
+    // Empty merge empty
+    emptyStats->merge(*emptyStats);
+    expectedValid = {true, true, true, true};
+    std::array<bool, 4> expectedEmpty = {true, true, true, true};
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionEmpty(), expectedEmpty);
+    EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 0);
+
+    // Empty merge xy
+    emptyStats->merge(*xyStats);
+    expectedEmpty = {false, false, true, true};
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionEmpty(), expectedEmpty);
+    EXPECT_EQ(10, emptyStats->getBoundingBox().lowerBound()[0]);
+    EXPECT_EQ(10, emptyStats->getBoundingBox().upperBound()[0]);
+    EXPECT_EQ(11, emptyStats->getBoundingBox().lowerBound()[1]);
+    EXPECT_EQ(11, emptyStats->getBoundingBox().upperBound()[1]);
+    EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 1);
+    EXPECT_EQ(emptyStats->getGeospatialTypes()[0], 1);
+
+    // Empty merge xyz
+    emptyStats->merge(*xyzStats);
+    expectedEmpty = {false, false, false, true};
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionEmpty(), expectedEmpty);
+    EXPECT_EQ(10, emptyStats->getBoundingBox().lowerBound()[0]);
+    EXPECT_EQ(12, emptyStats->getBoundingBox().upperBound()[0]);
+    EXPECT_EQ(11, emptyStats->getBoundingBox().lowerBound()[1]);
+    EXPECT_EQ(13, emptyStats->getBoundingBox().upperBound()[1]);
+    EXPECT_EQ(14, emptyStats->getBoundingBox().lowerBound()[2]);
+    EXPECT_EQ(14, emptyStats->getBoundingBox().upperBound()[2]);
+    EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 2);
+    EXPECT_EQ(emptyStats->getGeospatialTypes()[0], 1);
+    EXPECT_EQ(emptyStats->getGeospatialTypes()[1], 1001);
+
+    // Empty merge xyzm
+    emptyStats->merge(*xyzmStats);
+    expectedEmpty = {false, false, false, false};
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionEmpty(), expectedEmpty);
+    EXPECT_EQ(-10, emptyStats->getBoundingBox().lowerBound()[0]);
+    EXPECT_EQ(12, emptyStats->getBoundingBox().upperBound()[0]);
+    EXPECT_EQ(-11, emptyStats->getBoundingBox().lowerBound()[1]);
+    EXPECT_EQ(13, emptyStats->getBoundingBox().upperBound()[1]);
+    EXPECT_EQ(-12, emptyStats->getBoundingBox().lowerBound()[2]);
+    EXPECT_EQ(14, emptyStats->getBoundingBox().upperBound()[2]);
+    EXPECT_EQ(-13, emptyStats->getBoundingBox().lowerBound()[3]);
+    EXPECT_EQ(-13, emptyStats->getBoundingBox().upperBound()[3]);
+    EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 3);
+    EXPECT_EQ(emptyStats->getGeospatialTypes()[0], 1);
+    EXPECT_EQ(emptyStats->getGeospatialTypes()[1], 1001);
+    EXPECT_EQ(emptyStats->getGeospatialTypes()[2], 3001);
+
+    // Empty merge invalid
+    emptyStats->merge(*invalidStats);
+    expectedValid = {false, false, false, false};
+    EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 0);
+  }
+
+  TEST(ColumnStatistics, TestGeospatialFromProto) {
+    proto::ColumnStatistics pbStats;
+    // No geostats
+
+    std::unique_ptr<GeospatialColumnStatisticsImpl> emptyStats0(
+        new GeospatialColumnStatisticsImpl(pbStats));
+    std::array<bool, 4> expectedValid = {false, false, false, false};
+    EXPECT_TRUE(emptyStats0->getGeospatialTypes().empty());
+    EXPECT_EQ(emptyStats0->getBoundingBox().dimensionValid(), expectedValid);
+
+    // Add empty geostats
+    pbStats.mutable_geospatial_statistics();
+    std::unique_ptr<GeospatialColumnStatisticsImpl> emptyStats1(
+        new GeospatialColumnStatisticsImpl(pbStats));
+    EXPECT_TRUE(emptyStats1->getGeospatialTypes().empty());
+    EXPECT_EQ(emptyStats1->getBoundingBox().dimensionValid(), expectedValid);
+
+    // Set xy bounds
+    auto* geoProtoStas = pbStats.mutable_geospatial_statistics();
+    geoProtoStas->mutable_bbox()->set_xmin(0);
+    geoProtoStas->mutable_bbox()->set_xmax(1);
+    geoProtoStas->mutable_bbox()->set_ymin(0);
+    geoProtoStas->mutable_bbox()->set_ymax(1);
+    geoProtoStas->mutable_geospatial_types()->Add(2);
+    std::unique_ptr<GeospatialColumnStatisticsImpl> xyStats(
+        new GeospatialColumnStatisticsImpl(pbStats));
+    expectedValid = {true, true, false, false};
+    EXPECT_EQ(xyStats->getGeospatialTypes().size(), 1);
+    EXPECT_EQ(xyStats->getGeospatialTypes()[0], 2);
+    EXPECT_EQ(xyStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(0, xyStats->getBoundingBox().lowerBound()[0]);
+    EXPECT_EQ(1, xyStats->getBoundingBox().upperBound()[0]);
+    EXPECT_EQ(0, xyStats->getBoundingBox().lowerBound()[1]);
+    EXPECT_EQ(1, xyStats->getBoundingBox().upperBound()[1]);
+
+    // Set xyz bounds
+    geoProtoStas->mutable_bbox()->set_zmin(0);
+    geoProtoStas->mutable_bbox()->set_zmax(1);
+    geoProtoStas->mutable_geospatial_types()->Add(1003);
+    std::unique_ptr<GeospatialColumnStatisticsImpl> xyzStats(
+        new GeospatialColumnStatisticsImpl(pbStats));
+    expectedValid = {true, true, true, false};
+    EXPECT_EQ(xyzStats->getGeospatialTypes().size(), 2);
+    EXPECT_EQ(xyzStats->getGeospatialTypes()[0], 2);
+    EXPECT_EQ(xyzStats->getGeospatialTypes()[1], 1003);
+    EXPECT_EQ(xyzStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(0, xyzStats->getBoundingBox().lowerBound()[0]);
+    EXPECT_EQ(1, xyzStats->getBoundingBox().upperBound()[0]);
+    EXPECT_EQ(0, xyzStats->getBoundingBox().lowerBound()[1]);
+    EXPECT_EQ(1, xyzStats->getBoundingBox().upperBound()[1]);
+    EXPECT_EQ(0, xyzStats->getBoundingBox().lowerBound()[2]);
+    EXPECT_EQ(1, xyzStats->getBoundingBox().upperBound()[2]);
+
+    // Set xyzm bounds
+    geoProtoStas->mutable_bbox()->set_mmin(0);
+    geoProtoStas->mutable_bbox()->set_mmax(1);
+    geoProtoStas->mutable_geospatial_types()->Add(3003);
+    std::unique_ptr<GeospatialColumnStatisticsImpl> xyzmStats(
+        new GeospatialColumnStatisticsImpl(pbStats));
+    expectedValid = {true, true, true, true};
+    EXPECT_EQ(xyzmStats->getGeospatialTypes().size(), 3);
+    EXPECT_EQ(xyzmStats->getGeospatialTypes()[0], 2);
+    EXPECT_EQ(xyzmStats->getGeospatialTypes()[1], 1003);
+    EXPECT_EQ(xyzmStats->getGeospatialTypes()[2], 3003);
+    EXPECT_EQ(xyzmStats->getBoundingBox().dimensionValid(), expectedValid);
+    EXPECT_EQ(0, xyzmStats->getBoundingBox().lowerBound()[0]);
+    EXPECT_EQ(1, xyzmStats->getBoundingBox().upperBound()[0]);
+    EXPECT_EQ(0, xyzmStats->getBoundingBox().lowerBound()[1]);
+    EXPECT_EQ(1, xyzmStats->getBoundingBox().upperBound()[1]);
+    EXPECT_EQ(0, xyzmStats->getBoundingBox().lowerBound()[2]);
+    EXPECT_EQ(1, xyzmStats->getBoundingBox().upperBound()[2]);
+    EXPECT_EQ(0, xyzmStats->getBoundingBox().lowerBound()[3]);
+    EXPECT_EQ(1, xyzmStats->getBoundingBox().upperBound()[3]);
+  }
+
 }  // namespace orc
diff --git a/c++/test/TestStatistics.cc b/c++/test/TestStatistics.cc
new file mode 100644
index 000000000..61c5e08cb
--- /dev/null
+++ b/c++/test/TestStatistics.cc
@@ -0,0 +1,230 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "orc/OrcFile.hh"
+
+#include "MemoryInputStream.hh"
+#include "MemoryOutputStream.hh"
+#include "TestUtil.hh"
+
+#include "wrap/gtest-wrapper.h"
+
+#include <cstdint>
+#include <memory>
+#include <utility>
+
+namespace orc {
+
+#define ENSURE_DYNAMIC_CAST_NOT_NULL(PTR) \
+  if (PTR == NULL) throw std::logic_error("dynamic_cast returns null");
+
+  const int DEFAULT_MEM_STREAM_SIZE = 1024 * 1024;  // 1M
+
+  static std::unique_ptr<Writer> createWriter(uint64_t stripeSize, const Type& 
type,
+                                              MemoryPool* memoryPool, 
OutputStream* stream) {
+    WriterOptions options;
+    options.setStripeSize(stripeSize);
+    options.setCompressionBlockSize(256);
+    options.setMemoryBlockSize(256);
+    options.setCompression(CompressionKind_ZLIB);
+    options.setMemoryPool(memoryPool);
+    options.setRowIndexStride(10);
+    return createWriter(type, stream, options);
+  }
+
+  static std::unique_ptr<Reader> createReader(MemoryPool* memoryPool,
+                                              MemoryOutputStream& memStream) {
+    std::unique_ptr<InputStream> inStream(
+        new MemoryInputStream(memStream.getData(), memStream.getLength()));
+    ReaderOptions options;
+    options.setMemoryPool(*memoryPool);
+    return createReader(std::move(inStream), options);
+  }
+
+  TEST(Statistics, geometryStatsWithNull) {
+    std::unique_ptr<Type> const 
type(Type::buildTypeFromString("struct<col1:geometry(OGC:CRS84)>"));
+
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+    MemoryPool* const pool = getDefaultPool();
+    uint64_t const stripeSize = 32;  // small stripe size to garantee multi 
stripes
+    std::unique_ptr<Writer> writer = createWriter(stripeSize, *type, pool, 
&memStream);
+
+    uint64_t const batchCount = 1000;
+    uint64_t const batches = 10;
+    std::unique_ptr<ColumnVectorBatch> const batch = 
writer->createRowBatch(batchCount);
+    StructVectorBatch* structBatch = 
dynamic_cast<StructVectorBatch*>(batch.get());
+    ENSURE_DYNAMIC_CAST_NOT_NULL(structBatch);
+
+    StringVectorBatch* strBatch = 
dynamic_cast<StringVectorBatch*>(structBatch->fields[0]);
+    ENSURE_DYNAMIC_CAST_NOT_NULL(strBatch);
+
+    // create str values
+    std::vector<std::string> wkbs;
+    std::array<double, 4> mins = {geospatial::INF, geospatial::INF, 
geospatial::INF,
+                                  geospatial::INF};
+    std::array<double, 4> maxs = {-geospatial::INF, -geospatial::INF, 
-geospatial::INF,
+                                  -geospatial::INF};
+    for (uint64_t i = 1; i < batchCount - 1; ++i) {
+      if (i % 3 == 0) {
+        wkbs.push_back(MakeWKBPoint({i * 1.0, i * 1.0}, false, false));
+        mins[0] = std::min(mins[0], i * 1.0);
+        maxs[0] = std::max(maxs[0], i * 1.0);
+        mins[1] = std::min(mins[1], i * 1.0);
+        maxs[1] = std::max(maxs[1], i * 1.0);
+      } else if (i % 3 == 1) {
+        wkbs.push_back(MakeWKBPoint({i * 1.0, i * 1.0, i * 1.0}, true, false));
+        mins[0] = std::min(mins[0], i * 1.0);
+        maxs[0] = std::max(maxs[0], i * 1.0);
+        mins[1] = std::min(mins[1], i * 1.0);
+        maxs[1] = std::max(maxs[1], i * 1.0);
+        mins[2] = std::min(mins[2], i * 1.0);
+        maxs[2] = std::max(maxs[2], i * 1.0);
+      } else if (i % 3 == 2) {
+        wkbs.push_back(MakeWKBPoint({i * 1.0, i * 1.0, i * 1.0, i * 1.0}, 
true, true));
+        mins[0] = std::min(mins[0], i * 1.0);
+        maxs[0] = std::max(maxs[0], i * 1.0);
+        mins[1] = std::min(mins[1], i * 1.0);
+        maxs[1] = std::max(maxs[1], i * 1.0);
+        mins[2] = std::min(mins[2], i * 1.0);
+        maxs[2] = std::max(maxs[2], i * 1.0);
+        mins[3] = std::min(mins[3], i * 1.0);
+        maxs[3] = std::max(maxs[3], i * 1.0);
+      }
+    }
+    for (uint64_t i = 1; i < batchCount - 1; ++i) {
+      strBatch->data[i] = const_cast<char*>(wkbs[i - 1].c_str());
+      strBatch->length[i] = static_cast<int32_t>(wkbs[i - 1].length());
+    }
+
+    structBatch->numElements = batchCount;
+    strBatch->numElements = batchCount;
+
+    structBatch->hasNulls = true;
+    structBatch->notNull[0] = '\0';
+    structBatch->notNull[batchCount - 1] = '\0';
+    strBatch->hasNulls = true;
+    strBatch->notNull[0] = '\0';
+    strBatch->notNull[batchCount - 1] = '\0';
+
+    for (uint64_t i = 0; i < batches; ++i) {
+      writer->add(*batch.get());
+    }
+    writer->close();
+
+    std::unique_ptr<Reader> reader = createReader(pool, memStream);
+
+    // check column 1 (string) file stats
+    auto stats1 = reader->getColumnStatistics(1);
+    const GeospatialColumnStatistics* geoFileStats =
+        dynamic_cast<const GeospatialColumnStatistics*>(stats1.get());
+    ENSURE_DYNAMIC_CAST_NOT_NULL(geoFileStats);
+    EXPECT_EQ(geoFileStats->getGeospatialTypes().size(), 3);
+    EXPECT_EQ(geoFileStats->getGeospatialTypes()[0], 1);
+    EXPECT_EQ(geoFileStats->getGeospatialTypes()[1], 1001);
+    EXPECT_EQ(geoFileStats->getGeospatialTypes()[2], 3001);
+    std::array<bool, 4> expectValid = {true, true, true, true};
+    std::array<bool, 4> expectEmpty = {false, false, false, false};
+    EXPECT_EQ(geoFileStats->getBoundingBox().dimensionValid(), expectValid);
+    EXPECT_EQ(geoFileStats->getBoundingBox().dimensionEmpty(), expectEmpty);
+    EXPECT_EQ(geoFileStats->getBoundingBox().lowerBound(), mins);
+    EXPECT_EQ(geoFileStats->getBoundingBox().upperBound(), maxs);
+  }
+
+  TEST(Statistics, geographyStatsWithNull) {
+    std::unique_ptr<Type> const type(
+        
Type::buildTypeFromString("struct<col1:geography(OGC:CRS84,speherial)>"));
+
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+    MemoryPool* const pool = getDefaultPool();
+    uint64_t const stripeSize = 32;  // small stripe size to garantee multi 
stripes
+    std::unique_ptr<Writer> writer = createWriter(stripeSize, *type, pool, 
&memStream);
+
+    uint64_t const batchCount = 1000;
+    uint64_t const batches = 10;
+    std::unique_ptr<ColumnVectorBatch> const batch = 
writer->createRowBatch(batchCount);
+    StructVectorBatch* structBatch = 
dynamic_cast<StructVectorBatch*>(batch.get());
+    ENSURE_DYNAMIC_CAST_NOT_NULL(structBatch);
+
+    StringVectorBatch* strBatch = 
dynamic_cast<StringVectorBatch*>(structBatch->fields[0]);
+    ENSURE_DYNAMIC_CAST_NOT_NULL(strBatch);
+
+    // create str values
+    std::vector<std::string> wkbs;
+    std::array<double, 4> mins = {geospatial::INF, geospatial::INF, 
geospatial::INF,
+                                  geospatial::INF};
+    std::array<double, 4> maxs = {-geospatial::INF, -geospatial::INF, 
-geospatial::INF,
+                                  -geospatial::INF};
+    for (uint64_t i = 1; i < batchCount - 1; ++i) {
+      if (i % 3 == 0) {
+        wkbs.push_back(MakeWKBPoint({i * 1.0, i * 1.0}, false, false));
+        mins[0] = std::min(mins[0], i * 1.0);
+        maxs[0] = std::max(maxs[0], i * 1.0);
+        mins[1] = std::min(mins[1], i * 1.0);
+        maxs[1] = std::max(maxs[1], i * 1.0);
+      } else if (i % 3 == 1) {
+        wkbs.push_back(MakeWKBPoint({i * 1.0, i * 1.0, i * 1.0}, true, false));
+        mins[0] = std::min(mins[0], i * 1.0);
+        maxs[0] = std::max(maxs[0], i * 1.0);
+        mins[1] = std::min(mins[1], i * 1.0);
+        maxs[1] = std::max(maxs[1], i * 1.0);
+        mins[2] = std::min(mins[2], i * 1.0);
+        maxs[2] = std::max(maxs[2], i * 1.0);
+      } else if (i % 3 == 2) {
+        wkbs.push_back(MakeWKBPoint({i * 1.0, i * 1.0, i * 1.0, i * 1.0}, 
true, true));
+        mins[0] = std::min(mins[0], i * 1.0);
+        maxs[0] = std::max(maxs[0], i * 1.0);
+        mins[1] = std::min(mins[1], i * 1.0);
+        maxs[1] = std::max(maxs[1], i * 1.0);
+        mins[2] = std::min(mins[2], i * 1.0);
+        maxs[2] = std::max(maxs[2], i * 1.0);
+        mins[3] = std::min(mins[3], i * 1.0);
+        maxs[3] = std::max(maxs[3], i * 1.0);
+      }
+    }
+    for (uint64_t i = 1; i < batchCount - 1; ++i) {
+      strBatch->data[i] = const_cast<char*>(wkbs[i - 1].c_str());
+      strBatch->length[i] = static_cast<int32_t>(wkbs[i - 1].length());
+    }
+
+    structBatch->numElements = batchCount;
+    strBatch->numElements = batchCount;
+
+    structBatch->hasNulls = true;
+    structBatch->notNull[0] = '\0';
+    structBatch->notNull[batchCount - 1] = '\0';
+    strBatch->hasNulls = true;
+    strBatch->notNull[0] = '\0';
+    strBatch->notNull[batchCount - 1] = '\0';
+
+    for (uint64_t i = 0; i < batches; ++i) {
+      writer->add(*batch.get());
+    }
+    writer->close();
+
+    std::unique_ptr<Reader> reader = createReader(pool, memStream);
+
+    // check column 1 (string) file stats
+    auto stats1 = reader->getColumnStatistics(1);
+    const GeospatialColumnStatistics* geoFileStats =
+        dynamic_cast<const GeospatialColumnStatistics*>(stats1.get());
+    ENSURE_DYNAMIC_CAST_NOT_NULL(geoFileStats);
+    EXPECT_EQ(geoFileStats->getGeospatialTypes().size(), 0);
+    std::array<bool, 4> expectValid = {false, false, false, false};
+    EXPECT_EQ(geoFileStats->getBoundingBox().dimensionValid(), expectValid);
+  }
+}  // namespace orc
\ No newline at end of file
diff --git a/c++/test/TestUtil.cc b/c++/test/TestUtil.cc
new file mode 100644
index 000000000..a76880340
--- /dev/null
+++ b/c++/test/TestUtil.cc
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "TestUtil.hh"
+#include <cassert>
+#include <cstring>
+
+namespace orc {
+  uint32_t GeometryTypeToWKB(geospatial::GeometryType geometryType, bool hasZ, 
bool hasM) {
+    auto wkbGeomType = static_cast<uint32_t>(geometryType);
+
+    if (hasZ) {
+      wkbGeomType += 1000;
+    }
+
+    if (hasM) {
+      wkbGeomType += 2000;
+    }
+
+    return wkbGeomType;
+  }
+
+  std::string MakeWKBPoint(const std::vector<double>& xyzm, bool hasZ, bool 
hasM) {
+    // 1:endianness + 4:type + 8:x + 8:y
+    int numBytes = kWkbPointXYSize + (hasZ ? sizeof(double) : 0) + (hasM ? 
sizeof(double) : 0);
+    std::string wkb(numBytes, 0);
+    char* ptr = wkb.data();
+
+    ptr[0] = kWkbNativeEndianness;
+    uint32_t geom_type = GeometryTypeToWKB(geospatial::GeometryType::POINT, 
hasZ, hasM);
+    std::memcpy(&ptr[1], &geom_type, 4);
+    std::memcpy(&ptr[5], &xyzm[0], 8);
+    std::memcpy(&ptr[13], &xyzm[1], 8);
+    ptr += 21;
+
+    if (hasZ) {
+      std::memcpy(ptr, &xyzm[2], 8);
+      ptr += 8;
+    }
+
+    if (hasM) {
+      std::memcpy(ptr, &xyzm[3], 8);
+      ptr += 8;
+    }
+
+    assert(static_cast<size_t>(ptr - wkb.data()) == wkb.length());
+    return wkb;
+  }
+
+}  // namespace orc
\ No newline at end of file
diff --git a/c++/test/TestUtil.hh b/c++/test/TestUtil.hh
new file mode 100644
index 000000000..104fbc039
--- /dev/null
+++ b/c++/test/TestUtil.hh
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include "orc/Geospatial.hh"
+
+#include <cstdint>
+#include <vector>
+
+namespace orc {
+
+  /// \brief Number of bytes in a WKB Point with X and Y dimensions (uint8_t 
endian,
+  /// uint32_t geometry type, 2 * double coordinates)
+  static constexpr int kWkbPointXYSize = 21;
+
+  static bool isLittleEndian() {
+    static union {
+      uint32_t i;
+      char c[4];
+    } num = {0x01020304};
+    return num.c[0] == 4;
+  }
+
+  static uint8_t kWkbNativeEndianness = isLittleEndian() ? 0x01 : 0x00;
+
+  uint32_t GeometryTypeToWKB(geospatial::GeometryType geometryType, bool hasZ, 
bool hasM);
+  std::string MakeWKBPoint(const std::vector<double>& xyzm, bool hasZ, bool 
hasM);
+
+}  // namespace orc
\ No newline at end of file
diff --git a/c++/test/TestWriter.cc b/c++/test/TestWriter.cc
index 975462e30..11ba0c9de 100644
--- a/c++/test/TestWriter.cc
+++ b/c++/test/TestWriter.cc
@@ -16,18 +16,20 @@
  * limitations under the License.
  */
 
-#include "orc/ColumnPrinter.hh"
+#include <gtest/gtest.h>
 #include "orc/OrcFile.hh"
 
 #include "MemoryInputStream.hh"
 #include "MemoryOutputStream.hh"
 #include "Reader.hh"
+#include "TestUtil.hh"
 
 #include "wrap/gmock.h"
 #include "wrap/gtest-wrapper.h"
 
 #include <cmath>
 #include <ctime>
+#include <memory>
 #include <sstream>
 
 #ifdef __clang__
@@ -2400,6 +2402,139 @@ namespace orc {
     EXPECT_FALSE(rowReader->next(*batch));
   }
 
+  TEST_P(WriterTest, writeGeometryAndGeographyColumn) {
+    MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
+    MemoryPool* pool = getDefaultPool();
+    std::unique_ptr<Type> type(Type::buildTypeFromString(
+        
"struct<col1:geometry(OGC:CRS84),col2:geography(OGC:CRS84,speherial)>"));
+    uint64_t stripeSize = 1024;            // 1K
+    uint64_t compressionBlockSize = 1024;  // 1k
+    uint64_t memoryBlockSize = 64;
+    std::unique_ptr<Writer> writer =
+        createWriter(stripeSize, memoryBlockSize, compressionBlockSize, 
CompressionKind_ZLIB, *type,
+                     pool, &memStream, fileVersion, 
enableAlignBlockBoundToRowGroup ? 1024 : 0);
+
+    
EXPECT_EQ("struct<col1:geometry(OGC:CRS84),),col2:geography(OGC:CRS84,speherial)>",
+              type->toString());
+
+    uint64_t batchCount = 100, batchSize = 1000;
+    std::unique_ptr<ColumnVectorBatch> batch = 
writer->createRowBatch(batchSize);
+    StructVectorBatch* structBatch = 
dynamic_cast<StructVectorBatch*>(batch.get());
+    StringVectorBatch* geometryBatch = 
dynamic_cast<StringVectorBatch*>(structBatch->fields[0]);
+    StringVectorBatch* geographyBatch = 
dynamic_cast<StringVectorBatch*>(structBatch->fields[1]);
+
+    std::unique_ptr<char[]> buffer(new char[8000000]);
+    char* buf = buffer.get();
+
+    // write 100 * 1000 rows, every 100 rows are in one row group
+    // every 2 consecutive rows has one null value.
+    uint64_t rowCount = 0;
+    for (uint64_t i = 0; i != batchCount; ++i) {
+      structBatch->hasNulls = false;
+      structBatch->numElements = batchSize;
+
+      geometryBatch->hasNulls = true;
+      geometryBatch->numElements = batchSize;
+      geographyBatch->hasNulls = true;
+      geographyBatch->numElements = batchSize;
+
+      for (uint64_t j = 0; j != batchSize; ++j) {
+        if (rowCount % 2 == 0) {
+          geometryBatch->notNull[j] = 0;
+          geographyBatch->notNull[j] = 0;
+        } else {
+          geometryBatch->notNull[j] = 1;
+          geographyBatch->notNull[j] = 1;
+
+          std::string wkb = MakeWKBPoint({j * 1.0, j * 1.0}, false, false);
+          strncpy(buf, wkb.c_str(), wkb.size());
+
+          geometryBatch->data[j] = buf;
+          geometryBatch->length[j] = static_cast<int64_t>(wkb.size());
+          geographyBatch->data[j] = buf;
+          geographyBatch->length[j] = static_cast<int64_t>(wkb.size());
+
+          buf += wkb.size();
+        }
+        ++rowCount;
+      }
+
+      writer->add(*batch);
+    }
+    writer->close();
+
+    std::unique_ptr<InputStream> inStream(
+        new MemoryInputStream(memStream.getData(), memStream.getLength()));
+    std::unique_ptr<Reader> reader = createReader(pool, std::move(inStream));
+    EXPECT_EQ(batchCount * batchSize, reader->getNumberOfRows());
+    EXPECT_TRUE(reader->getNumberOfStripes() > 1);
+
+    
EXPECT_EQ("struct<col1:geometry(OGC:CRS84),),col2:geography(OGC:CRS84,speherial)>",
+              reader->getType().toString());
+    // test sequential reader
+    std::unique_ptr<RowReader> seqReader = createRowReader(reader.get());
+    rowCount = 0;
+    for (uint64_t i = 0; i != batchCount; ++i) {
+      seqReader->next(*batch);
+
+      EXPECT_FALSE(structBatch->hasNulls);
+      EXPECT_EQ(batchSize, structBatch->numElements);
+
+      EXPECT_TRUE(geometryBatch->hasNulls);
+      EXPECT_EQ(batchSize, geometryBatch->numElements);
+      EXPECT_TRUE(geographyBatch->hasNulls);
+      EXPECT_EQ(batchSize, geographyBatch->numElements);
+
+      for (uint64_t j = 0; j != batchSize; ++j) {
+        if (rowCount % 2 == 0) {
+          EXPECT_TRUE(geometryBatch->notNull[j] == 0);
+          EXPECT_TRUE(geographyBatch->notNull[j] == 0);
+        } else {
+          EXPECT_TRUE(geometryBatch->notNull[j] != 0);
+          EXPECT_TRUE(geographyBatch->notNull[j] != 0);
+          std::string wkb = MakeWKBPoint({j * 1.0, j * 1.0}, false, false);
+          EXPECT_EQ(static_cast<int64_t>(wkb.size()), 
geometryBatch->length[j]);
+          EXPECT_TRUE(strncmp(geometryBatch->data[j], wkb.c_str(), wkb.size()) 
== 0);
+          EXPECT_EQ(static_cast<int64_t>(wkb.size()), 
geographyBatch->length[j]);
+          EXPECT_TRUE(strncmp(geographyBatch->data[j], wkb.c_str(), 
wkb.size()) == 0);
+        }
+        ++rowCount;
+      }
+    }
+    EXPECT_FALSE(seqReader->next(*batch));
+
+    // test seek reader
+    std::unique_ptr<RowReader> seekReader = createRowReader(reader.get());
+    batch = seekReader->createRowBatch(2);
+    structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
+    geometryBatch = dynamic_cast<StringVectorBatch*>(structBatch->fields[0]);
+    geographyBatch = dynamic_cast<StringVectorBatch*>(structBatch->fields[1]);
+
+    for (uint64_t row = rowCount - 2; row >= 100; row -= 100) {
+      seekReader->seekToRow(row);
+      seekReader->next(*batch);
+
+      EXPECT_FALSE(structBatch->hasNulls);
+      EXPECT_EQ(2, structBatch->numElements);
+      EXPECT_TRUE(geometryBatch->hasNulls);
+      EXPECT_EQ(2, geometryBatch->numElements);
+      EXPECT_TRUE(geographyBatch->hasNulls);
+      EXPECT_EQ(2, geographyBatch->numElements);
+
+      EXPECT_TRUE(geometryBatch->notNull[0] == 0);
+      EXPECT_TRUE(geometryBatch->notNull[1] != 0);
+      EXPECT_TRUE(geographyBatch->notNull[0] == 0);
+      EXPECT_TRUE(geographyBatch->notNull[1] != 0);
+
+      std::string wkb = MakeWKBPoint({(row + 1) * 1.0, (row + 1) * 1.0}, 
false, false);
+
+      EXPECT_EQ(static_cast<int64_t>(wkb.size()), geometryBatch->length[1]);
+      EXPECT_TRUE(strncmp(geometryBatch->data[1], wkb.c_str(), wkb.size()) == 
0);
+      EXPECT_EQ(static_cast<int64_t>(wkb.size()), geographyBatch->length[1]);
+      EXPECT_TRUE(strncmp(geographyBatch->data[1], wkb.c_str(), wkb.size()) == 
0);
+    }
+  }
+
   std::vector<TestParams> testParams = {{FileVersion::v_0_11(), true},
                                         {FileVersion::v_0_11(), false},
                                         {FileVersion::v_0_12(), false},
diff --git a/c++/test/meson.build b/c++/test/meson.build
index ba84bf7fa..a8d30a6b9 100644
--- a/c++/test/meson.build
+++ b/c++/test/meson.build
@@ -50,10 +50,12 @@ test_sources = [
     'TestSargsApplier.cc',
     'TestSearchArgument.cc',
     'TestSchemaEvolution.cc',
+    'TestStatistics.cc',
     'TestStripeIndexStatistics.cc',
     'TestTimestampStatistics.cc',
     'TestTimezone.cc',
     'TestType.cc',
+    'TestUtil.cc',
     'TestWriter.cc',
     'TestCache.cc',        
 ]
diff --git a/tools/src/CSVFileImport.cc b/tools/src/CSVFileImport.cc
index ae17b3348..31a6f52a2 100644
--- a/tools/src/CSVFileImport.cc
+++ b/tools/src/CSVFileImport.cc
@@ -420,6 +420,8 @@ int main(int argc, char* argv[]) {
           case orc::LIST:
           case orc::MAP:
           case orc::UNION:
+          case orc::GEOMETRY:
+          case orc::GEOGRAPHY:
             throw std::runtime_error(subType->toString() + " is not supported 
yet.");
         }
       }


Reply via email to