This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new b6bea12b68 Add Parquet geospatial statistics utility (#8414)
b6bea12b68 is described below
commit b6bea12b68835a7523c3e2bfea65eba05df1111d
Author: Dewey Dunnington <[email protected]>
AuthorDate: Sun Sep 28 07:58:05 2025 -0500
Add Parquet geospatial statistics utility (#8414)
# Which issue does this PR close?
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax.
- Closes #8411.
# Rationale for this change
The presence of relevant statistics when writing geometries and/or
geographies is one of the primary motivations behind the GEOMETRY and/or
GEOGRAPHY in Parquet. We'd like to make it easy for writers to provide
them!
# What changes are included in this PR?
This PR introduces `Interval` and `WraparoundInterval` structs that
handle interval math, and a `GeometryBounder` that iterates over input
using the fantastic `wkb` crate (via `geo-traits`).
# Are these changes tested?
Yes!
# Are there any user-facing changes?
All public structures and functions are documented (although I am not
sure what the final public API will be).
---------
Co-authored-by: Kyle Barron <[email protected]>
---
parquet-geospatial/Cargo.toml | 6 +
parquet-geospatial/src/bounding.rs | 602 ++++++++++++++++++++
parquet-geospatial/src/interval.rs | 1080 ++++++++++++++++++++++++++++++++++++
parquet-geospatial/src/lib.rs | 3 +
4 files changed, 1691 insertions(+)
diff --git a/parquet-geospatial/Cargo.toml b/parquet-geospatial/Cargo.toml
index a407b429c5..1ea083b1d8 100644
--- a/parquet-geospatial/Cargo.toml
+++ b/parquet-geospatial/Cargo.toml
@@ -31,6 +31,12 @@ edition = { workspace = true }
rust-version = { workspace = true }
[dependencies]
+arrow-schema = { workspace = true }
+geo-traits = { version = "0.3" }
+wkb = { version = "0.9" }
+
+[dev-dependencies]
+wkt = { version = "0.14" }
[lib]
name = "parquet_geospatial"
diff --git a/parquet-geospatial/src/bounding.rs
b/parquet-geospatial/src/bounding.rs
new file mode 100644
index 0000000000..17ecb8e6e7
--- /dev/null
+++ b/parquet-geospatial/src/bounding.rs
@@ -0,0 +1,602 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::collections::HashSet;
+
+use arrow_schema::ArrowError;
+use geo_traits::{
+ CoordTrait, Dimensions, GeometryCollectionTrait, GeometryTrait,
GeometryType, LineStringTrait,
+ MultiLineStringTrait, MultiPointTrait, MultiPolygonTrait, PointTrait,
PolygonTrait,
+};
+use wkb::reader::Wkb;
+
+use crate::interval::{Interval, IntervalTrait, WraparoundInterval};
+
+/// Geometry bounder
+///
+/// Utility to accumulate statistics for geometries as they are written.
+/// This bounder is designed to output statistics accumulated according
+/// to the Parquet specification such that the output can be written to
+/// Parquet statistics with minimal modification.
+///
+/// See the [IntervalTrait] for an in-depth discussion of wraparound bounding
+/// (which adds some complexity to this implementation).
+#[derive(Debug)]
+pub struct GeometryBounder {
+ /// Union of all contiguous x intervals to the left of the wraparound
midpoint
+ x_left: Interval,
+ /// Union of all contiguous x intervals that intersect the wraparound
midpoint
+ x_mid: Interval,
+ /// Union of all contiguous x intervals to the right of the wraparound
midpoint
+ x_right: Interval,
+ /// Union of all y intervals
+ y: Interval,
+ /// Union of all z intervals
+ z: Interval,
+ /// Union of all m intervals
+ m: Interval,
+ /// Unique geometry type codes encountered by the bounder
+ ///
+ /// The integer codes are identical to the ISO WKB geometry type codes and
+ /// are documented as part of the Parquet specification:
+ ///
<https://github.com/apache/parquet-format/blob/master/Geospatial.md#geospatial-types>
+ geometry_types: HashSet<i32>,
+ wraparound_hint: Interval,
+}
+
+impl GeometryBounder {
+ /// Create a new, empty bounder that represents empty input
+ pub fn empty() -> Self {
+ Self {
+ x_left: Interval::empty(),
+ x_mid: Interval::empty(),
+ x_right: Interval::empty(),
+ y: Interval::empty(),
+ z: Interval::empty(),
+ m: Interval::empty(),
+ geometry_types: HashSet::<i32>::default(),
+ wraparound_hint: Interval::empty(),
+ }
+ }
+
+ /// Set the hint to use for generation of potential wraparound xmin/xmax
output
+ ///
+ /// Usually this value should be set to (-180, 180), as wraparound is
primarily
+ /// targeted at lon/lat coordinate systems where collections of features
with
+ /// components at the very far left and very far right of the coordinate
system
+ /// are actually very close to each other.
+ ///
+ /// It is safe to set this value even when the actual coordinate system of
the
+ /// input is unknown: if the input has coordinate values that are outside
the
+ /// range of the wraparound hint, wraparound xmin/xmax values will not be
+ /// generated. If the input has coordinate values that are well inside of
the
+ /// range of the wraparound hint, the wraparound xmin/xmax value will be
+ /// substantially wider than the non-wraparound version and will not be
returned.
+ pub fn with_wraparound_hint(self, wraparound_hint: impl Into<Interval>) ->
Self {
+ Self {
+ wraparound_hint: wraparound_hint.into(),
+ ..self
+ }
+ }
+
+ /// Calculate the final xmin and xmax for geometries encountered by this
bounder
+ ///
+ /// The interval returned may wraparound if a hint was set and the input
+ /// encountered by this bounder were exclusively at the far left and far
right
+ /// of the input range. See [IntervalTrait] for an in-depth description of
+ /// wraparound intervals.
+ pub fn x(&self) -> WraparoundInterval {
+ let out_all = Interval::empty()
+ .merge_interval(&self.x_left)
+ .merge_interval(&self.x_mid)
+ .merge_interval(&self.x_right);
+
+ // Check if this even makes sense: if anything is covering the midpoint
+ // of the wraparound hint or the bounds don't make sense for the
provided
+ // wraparound hint, just return the Cartesian bounds.
+ if !self.x_mid.is_empty() ||
!self.wraparound_hint.contains_interval(&out_all) {
+ return out_all.into();
+ }
+
+ // Check if our wraparound bounds are any better than our Cartesian
bounds
+ // If the Cartesian bounds are tighter, return them.
+ let out_width = (self.x_left.hi() - self.wraparound_hint.lo())
+ + (self.wraparound_hint.hi() - self.x_right.hi());
+ if out_all.width() < out_width {
+ return out_all.into();
+ }
+
+ // Wraparound!
+ WraparoundInterval::new(self.x_right.lo(), self.x_left.hi())
+ }
+
+ /// Calculate the final ymin and ymax for geometries encountered by this
bounder
+ pub fn y(&self) -> Interval {
+ self.y
+ }
+
+ /// Calculate the final zmin and zmax for geometries encountered by this
bounder
+ pub fn z(&self) -> Interval {
+ self.z
+ }
+
+ /// Calculate the final mmin and mmax values for geometries encountered by
this bounder
+ pub fn m(&self) -> Interval {
+ self.m
+ }
+
+ /// Calculate the final geometry type set
+ ///
+ /// Returns a copy of the unique geometry type/dimension combinations
encountered
+ /// by this bounder. These identifiers are ISO WKB identifiers (e.g., 1001
+ /// for PointZ). The output is always returned sorted.
+ pub fn geometry_types(&self) -> Vec<i32> {
+ let mut out = self.geometry_types.iter().copied().collect::<Vec<_>>();
+ out.sort();
+ out
+ }
+
+ /// Update this bounder with one WKB-encoded geometry
+ ///
+ /// Parses and accumulates the bounds of one WKB-encoded geometry. This
function
+ /// will error for invalid WKB input; however, clients may wish to ignore
such
+ /// an error for the purposes of writing statistics.
+ pub fn update_wkb(&mut self, wkb: &[u8]) -> Result<(), ArrowError> {
+ let wkb = Wkb::try_new(wkb).map_err(|e|
ArrowError::ExternalError(Box::new(e)))?;
+ self.update_geometry(&wkb)?;
+ Ok(())
+ }
+
+ fn update_geometry(&mut self, geom: &impl GeometryTrait<T = f64>) ->
Result<(), ArrowError> {
+ let geometry_type = geometry_type(geom)?;
+ self.geometry_types.insert(geometry_type);
+
+ visit_intervals(geom, 'x', &mut |x| self.update_x(&x))?;
+ visit_intervals(geom, 'y', &mut |y| self.y.update_interval(&y))?;
+ visit_intervals(geom, 'z', &mut |z| self.z.update_interval(&z))?;
+ visit_intervals(geom, 'm', &mut |m| self.m.update_interval(&m))?;
+
+ Ok(())
+ }
+
+ fn update_x(&mut self, x: &Interval) {
+ if x.hi() < self.wraparound_hint.mid() {
+ // If the x interval is completely to the left of the midpoint,
merge it
+ // with x_left
+ self.x_left.update_interval(x);
+ } else if x.lo() > self.wraparound_hint.mid() {
+ // If the x interval is completely to the right of the midpoint,
merge it
+ // with x_right
+ self.x_right.update_interval(x);
+ } else {
+ // Otherwise, merge it with x_mid
+ self.x_mid.update_interval(x);
+ }
+ }
+}
+
+/// Visit contiguous intervals for a given dimension within a [GeometryTrait]
+///
+/// Here, contiguous intervals refers to intervals that must not be separated
+/// by wraparound bounding. Point components of a geometry are visited as
+/// degenerate intervals of a single value; linestring or polygon ring
components
+/// are visited as single intervals.
+fn visit_intervals(
+ geom: &impl GeometryTrait<T = f64>,
+ dimension: char,
+ func: &mut impl FnMut(Interval),
+) -> Result<(), ArrowError> {
+ let n = if let Some(n) = dimension_index(geom.dim(), dimension) {
+ n
+ } else {
+ return Ok(());
+ };
+
+ match geom.as_type() {
+ GeometryType::Point(pt) => {
+ if let Some(coord) = PointTrait::coord(pt) {
+ visit_point(coord, n, func);
+ }
+ }
+ GeometryType::LineString(ls) => {
+ visit_sequence(ls.coords(), n, func);
+ }
+ GeometryType::Polygon(pl) => {
+ if let Some(exterior) = pl.exterior() {
+ visit_sequence(exterior.coords(), n, func);
+ }
+
+ for interior in pl.interiors() {
+ visit_sequence(interior.coords(), n, func);
+ }
+ }
+ GeometryType::MultiPoint(multi_pt) => {
+ visit_collection(multi_pt.points(), dimension, func)?;
+ }
+ GeometryType::MultiLineString(multi_ls) => {
+ visit_collection(multi_ls.line_strings(), dimension, func)?;
+ }
+ GeometryType::MultiPolygon(multi_pl) => {
+ visit_collection(multi_pl.polygons(), dimension, func)?;
+ }
+ GeometryType::GeometryCollection(collection) => {
+ visit_collection(collection.geometries(), dimension, func)?;
+ }
+ _ => {
+ return Err(ArrowError::InvalidArgumentError(
+ "GeometryType not supported for dimension bounds".to_string(),
+ ))
+ }
+ }
+
+ Ok(())
+}
+
+/// Visit a point
+///
+/// Points can be separated by wraparound bounding even if they occur within
+/// the same feature, so we visit them as individual degenerate intervals.
+fn visit_point(coord: impl CoordTrait<T = f64>, n: usize, func: &mut impl
FnMut(Interval)) {
+ let val = unsafe { coord.nth_unchecked(n) };
+ func((val, val).into());
+}
+
+/// Visit contiguous sequences
+///
+/// Sequences (e.g., linestrings or polygon rings) must always be considered
+/// together (i.e., are never separated by wraparound bounding).
+fn visit_sequence(
+ coords: impl IntoIterator<Item = impl CoordTrait<T = f64>>,
+ n: usize,
+ func: &mut impl FnMut(Interval),
+) {
+ let mut interval = Interval::empty();
+ for coord in coords {
+ interval.update_value(unsafe { coord.nth_unchecked(n) });
+ }
+
+ func(interval);
+}
+
+/// Visit intervals in a collection of geometries
+fn visit_collection(
+ collection: impl IntoIterator<Item = impl GeometryTrait<T = f64>>,
+ target: char,
+ func: &mut impl FnMut(Interval),
+) -> Result<(), ArrowError> {
+ for geom in collection {
+ visit_intervals(&geom, target, func)?;
+ }
+
+ Ok(())
+}
+
+/// Extract the geometry type code encountered by the bounder
+///
+/// The integer code is a ISO WKB geometry type codes is documented as part
+/// of the Parquet specification:
+///
<https://github.com/apache/parquet-format/blob/master/Geospatial.md#geospatial-types>
+///
+/// This can also be derived from bytes 2-5 (possibly endian-swapped according
to byte 1)
+/// of the input WKB buffer but is slightly clearer recomputed.
+fn geometry_type(geom: &impl GeometryTrait<T = f64>) -> Result<i32,
ArrowError> {
+ let dimension_type = match geom.dim() {
+ Dimensions::Xy => 0,
+ Dimensions::Xyz => 1000,
+ Dimensions::Xym => 2000,
+ Dimensions::Xyzm => 3000,
+ Dimensions::Unknown(_) => {
+ return Err(ArrowError::InvalidArgumentError(
+ "Unsupported dimensions".to_string(),
+ ))
+ }
+ };
+
+ let geometry_type = match geom.as_type() {
+ GeometryType::Point(_) => 1,
+ GeometryType::LineString(_) => 2,
+ GeometryType::Polygon(_) => 3,
+ GeometryType::MultiPoint(_) => 4,
+ GeometryType::MultiLineString(_) => 5,
+ GeometryType::MultiPolygon(_) => 6,
+ GeometryType::GeometryCollection(_) => 7,
+ _ => {
+ return Err(ArrowError::InvalidArgumentError(
+ "GeometryType not supported for dimension bounds".to_string(),
+ ))
+ }
+ };
+
+ Ok(dimension_type + geometry_type)
+}
+
+fn dimension_index(dim: Dimensions, target: char) -> Option<usize> {
+ match target {
+ 'x' => return Some(0),
+ 'y' => return Some(1),
+ _ => {}
+ }
+
+ match (dim, target) {
+ (Dimensions::Xyz, 'z') => Some(2),
+ (Dimensions::Xym, 'm') => Some(2),
+ (Dimensions::Xyzm, 'z') => Some(2),
+ (Dimensions::Xyzm, 'm') => Some(3),
+ (_, _) => None,
+ }
+}
+
+#[cfg(test)]
+mod test {
+
+ use std::str::FromStr;
+
+ use wkt::Wkt;
+
+ use super::*;
+
+ fn wkt_bounds(
+ wkt_values: impl IntoIterator<Item = impl AsRef<str>>,
+ ) -> Result<GeometryBounder, ArrowError> {
+ wkt_bounds_with_wraparound(wkt_values, Interval::empty())
+ }
+
+ fn wkt_bounds_with_wraparound(
+ wkt_values: impl IntoIterator<Item = impl AsRef<str>>,
+ wraparound: impl Into<Interval>,
+ ) -> Result<GeometryBounder, ArrowError> {
+ let mut bounder =
GeometryBounder::empty().with_wraparound_hint(wraparound);
+ for wkt_value in wkt_values {
+ let wkt: Wkt = Wkt::from_str(wkt_value.as_ref())
+ .map_err(|e| ArrowError::InvalidArgumentError(e.to_string()))?;
+ bounder.update_geometry(&wkt)?;
+ }
+ Ok(bounder)
+ }
+
+ #[test]
+ fn test_wkb() {
+ let wkt: Wkt = Wkt::from_str("LINESTRING (0 1, 2 3)").unwrap();
+ let mut wkb = Vec::new();
+ wkb::writer::write_geometry(&mut wkb, &wkt,
&Default::default()).unwrap();
+
+ let mut bounds = GeometryBounder::empty();
+ bounds.update_wkb(&wkb).unwrap();
+
+ assert_eq!(bounds.x(), (0, 2).into());
+ assert_eq!(bounds.y(), (1, 3).into());
+ }
+
+ #[test]
+ fn test_geometry_types() {
+ let empties = [
+ "POINT EMPTY",
+ "LINESTRING EMPTY",
+ "POLYGON EMPTY",
+ "MULTIPOINT EMPTY",
+ "MULTILINESTRING EMPTY",
+ "MULTIPOLYGON EMPTY",
+ "GEOMETRYCOLLECTION EMPTY",
+ ];
+
+ assert_eq!(
+ wkt_bounds(empties).unwrap().geometry_types(),
+ vec![1, 2, 3, 4, 5, 6, 7]
+ );
+
+ let empties_z = [
+ "POINT Z EMPTY",
+ "LINESTRING Z EMPTY",
+ "POLYGON Z EMPTY",
+ "MULTIPOINT Z EMPTY",
+ "MULTILINESTRING Z EMPTY",
+ "MULTIPOLYGON Z EMPTY",
+ "GEOMETRYCOLLECTION Z EMPTY",
+ ];
+
+ assert_eq!(
+ wkt_bounds(empties_z).unwrap().geometry_types(),
+ vec![1001, 1002, 1003, 1004, 1005, 1006, 1007]
+ );
+
+ let empties_m = [
+ "POINT M EMPTY",
+ "LINESTRING M EMPTY",
+ "POLYGON M EMPTY",
+ "MULTIPOINT M EMPTY",
+ "MULTILINESTRING M EMPTY",
+ "MULTIPOLYGON M EMPTY",
+ "GEOMETRYCOLLECTION M EMPTY",
+ ];
+
+ assert_eq!(
+ wkt_bounds(empties_m).unwrap().geometry_types(),
+ vec![2001, 2002, 2003, 2004, 2005, 2006, 2007]
+ );
+
+ let empties_zm = [
+ "POINT ZM EMPTY",
+ "LINESTRING ZM EMPTY",
+ "POLYGON ZM EMPTY",
+ "MULTIPOINT ZM EMPTY",
+ "MULTILINESTRING ZM EMPTY",
+ "MULTIPOLYGON ZM EMPTY",
+ "GEOMETRYCOLLECTION ZM EMPTY",
+ ];
+
+ assert_eq!(
+ wkt_bounds(empties_zm).unwrap().geometry_types(),
+ vec![3001, 3002, 3003, 3004, 3005, 3006, 3007]
+ );
+ }
+
+ #[test]
+ fn test_bounds_empty() {
+ let empties = [
+ "POINT EMPTY",
+ "LINESTRING EMPTY",
+ "POLYGON EMPTY",
+ "MULTIPOINT EMPTY",
+ "MULTILINESTRING EMPTY",
+ "MULTIPOLYGON EMPTY",
+ "GEOMETRYCOLLECTION EMPTY",
+ ];
+
+ let bounds = wkt_bounds(empties).unwrap();
+ assert!(bounds.x().is_empty());
+ assert!(bounds.y().is_empty());
+ assert!(bounds.z().is_empty());
+ assert!(bounds.m().is_empty());
+
+ // With wraparound, still empty
+ let bounds = wkt_bounds_with_wraparound(empties, (-180, 180)).unwrap();
+ assert!(bounds.x().is_empty());
+ assert!(bounds.y().is_empty());
+ assert!(bounds.z().is_empty());
+ assert!(bounds.m().is_empty());
+ }
+
+ #[test]
+ fn test_bounds_coord() {
+ let bounds = wkt_bounds(["POINT (0 1)", "POINT (2 3)"]).unwrap();
+ assert_eq!(bounds.x(), (0, 2).into());
+ assert_eq!(bounds.y(), (1, 3).into());
+ assert!(bounds.z().is_empty());
+ assert!(bounds.m().is_empty());
+
+ let bounds = wkt_bounds(["POINT Z (0 1 2)", "POINT Z (3 4
5)"]).unwrap();
+ assert_eq!(bounds.x(), (0, 3).into());
+ assert_eq!(bounds.y(), (1, 4).into());
+ assert_eq!(bounds.z(), (2, 5).into());
+ assert!(bounds.m().is_empty());
+
+ let bounds = wkt_bounds(["POINT M (0 1 2)", "POINT M (3 4
5)"]).unwrap();
+ assert_eq!(bounds.x(), (0, 3).into());
+ assert_eq!(bounds.y(), (1, 4).into());
+ assert!(bounds.z().is_empty());
+ assert_eq!(bounds.m(), (2, 5).into());
+
+ let bounds = wkt_bounds(["POINT ZM (0 1 2 3)", "POINT ZM (4 5 6
7)"]).unwrap();
+ assert_eq!(bounds.x(), (0, 4).into());
+ assert_eq!(bounds.y(), (1, 5).into());
+ assert_eq!(bounds.z(), (2, 6).into());
+ assert_eq!(bounds.m(), (3, 7).into());
+ }
+
+ #[test]
+ fn test_bounds_sequence() {
+ let bounds = wkt_bounds(["LINESTRING (0 1, 2 3)"]).unwrap();
+ assert_eq!(bounds.x(), (0, 2).into());
+ assert_eq!(bounds.y(), (1, 3).into());
+ assert!(bounds.z().is_empty());
+ assert!(bounds.m().is_empty());
+
+ let bounds = wkt_bounds(["LINESTRING Z (0 1 2, 3 4 5)"]).unwrap();
+ assert_eq!(bounds.x(), (0, 3).into());
+ assert_eq!(bounds.y(), (1, 4).into());
+ assert_eq!(bounds.z(), (2, 5).into());
+ assert!(bounds.m().is_empty());
+
+ let bounds = wkt_bounds(["LINESTRING M (0 1 2, 3 4 5)"]).unwrap();
+ assert_eq!(bounds.x(), (0, 3).into());
+ assert_eq!(bounds.y(), (1, 4).into());
+ assert!(bounds.z().is_empty());
+ assert_eq!(bounds.m(), (2, 5).into());
+
+ let bounds = wkt_bounds(["LINESTRING ZM (0 1 2 3, 4 5 6 7)"]).unwrap();
+ assert_eq!(bounds.x(), (0, 4).into());
+ assert_eq!(bounds.y(), (1, 5).into());
+ assert_eq!(bounds.z(), (2, 6).into());
+ assert_eq!(bounds.m(), (3, 7).into());
+ }
+
+ #[test]
+ fn test_bounds_geometry_type() {
+ let bounds = wkt_bounds(["POINT (0 1)", "POINT (2 3)"]).unwrap();
+ assert_eq!(bounds.x(), (0, 2).into());
+ assert_eq!(bounds.y(), (1, 3).into());
+
+ let bounds = wkt_bounds(["LINESTRING (0 1, 2 3)"]).unwrap();
+ assert_eq!(bounds.x(), (0, 2).into());
+ assert_eq!(bounds.y(), (1, 3).into());
+
+ // Normally interiors are supposed to be inside the exterior; however,
we
+ // include a poorly formed polygon just to make sure they are
considered
+ let bounds =
+ wkt_bounds(["POLYGON ((0 0, 0 1, 1 0, 0 0), (10 10, 10 11, 11 10,
10 10))"]).unwrap();
+ assert_eq!(bounds.x(), (0, 11).into());
+ assert_eq!(bounds.y(), (0, 11).into());
+
+ let bounds = wkt_bounds(["MULTIPOINT ((0 1), (2 3))"]).unwrap();
+ assert_eq!(bounds.x(), (0, 2).into());
+ assert_eq!(bounds.y(), (1, 3).into());
+
+ let bounds = wkt_bounds(["MULTILINESTRING ((0 1, 2 3))"]).unwrap();
+ assert_eq!(bounds.x(), (0, 2).into());
+ assert_eq!(bounds.y(), (1, 3).into());
+
+ let bounds = wkt_bounds(["MULTIPOLYGON (((0 0, 0 1, 1 0, 0
0)))"]).unwrap();
+ assert_eq!(bounds.x(), (0, 1).into());
+ assert_eq!(bounds.y(), (0, 1).into());
+
+ let bounds = wkt_bounds(["GEOMETRYCOLLECTION (POINT (0 1), POINT (2
3))"]).unwrap();
+ assert_eq!(bounds.x(), (0, 2).into());
+ assert_eq!(bounds.y(), (1, 3).into());
+ }
+
+ #[test]
+ fn test_bounds_wrap_basic() {
+ let geoms = ["POINT (-170 0)", "POINT (170 0)"];
+
+ // No wraparound because it was disabled
+ let bounds = wkt_bounds_with_wraparound(geoms,
Interval::empty()).unwrap();
+ assert_eq!(bounds.x(), (-170, 170).into());
+
+ // Wraparound that can't happen because something is covering
+ // the midpoint.
+ let mut geoms_with_mid = geoms.to_vec();
+ geoms_with_mid.push("LINESTRING (-10 0, 10 0)");
+ let bounds = wkt_bounds_with_wraparound(geoms_with_mid, (-180,
180)).unwrap();
+ assert_eq!(bounds.x(), (-170, 170).into());
+
+ // Wraparound where the wrapped box is *not* better
+ let bounds = wkt_bounds_with_wraparound(geoms, (-1000, 1000)).unwrap();
+ assert_eq!(bounds.x(), (-170, 170).into());
+
+ // Wraparound where the wrapped box is inappropriate because it is
+ // outside the wrap hint
+ let bounds = wkt_bounds_with_wraparound(geoms, (-10, 10)).unwrap();
+ assert_eq!(bounds.x(), (-170, 170).into());
+
+ // Wraparound where the wrapped box *is* better
+ let bounds = wkt_bounds_with_wraparound(geoms, (-180, 180)).unwrap();
+ assert_eq!(bounds.x(), (170, -170).into());
+ }
+
+ #[test]
+ fn test_bounds_wrap_multipart() {
+ let fiji = "MULTIPOLYGON (
+ ((-180 -15.51, -180 -19.78, -178.61 -21.14, -178.02 -18.22, -178.57
-16.04, -180 -15.51)),
+ ((180 -15.51, 177.98 -16.25, 176.67 -17.14, 177.83 -19.31, 180 -19.78,
180 -15.51))
+ )";
+
+ let bounds = wkt_bounds_with_wraparound([fiji], (-180, 180)).unwrap();
+ assert!(bounds.x().is_wraparound());
+ assert_eq!(bounds.x(), (176.67, -178.02).into());
+ assert_eq!(bounds.y(), (-21.14, -15.51).into());
+ }
+}
diff --git a/parquet-geospatial/src/interval.rs
b/parquet-geospatial/src/interval.rs
new file mode 100644
index 0000000000..1929a12a96
--- /dev/null
+++ b/parquet-geospatial/src/interval.rs
@@ -0,0 +1,1080 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_schema::ArrowError;
+
+/// Generic 1D intervals with wraparound support
+///
+/// This trait specifies common behaviour implemented by the [Interval] and
+/// [WraparoundInterval].
+///
+/// Briefly, "wraparound" support was included in the Parquet specification
+/// to ensure that geometries or geographies with components on both sides of
+/// antimeridian (180 degrees longitude) can be reasonably summarized. This
+/// concept was borrowed from the widely used GeoJSON and matches identical
+/// bounding box specifications for GeoParquet and STAC, among others.
+///
+/// Because the Parquet specification also states that longitude values
+/// are always stored as `x` values (i.e., the first coordinate component),
+/// this contingency is only available for the `xmin`/`xmax` component of the
+/// GeoStatistics. Thus, the `xmin`/`xmax` pair may either represent a regular
+/// interval (specified by xmin = 10 and xmax = 20):
+///
+/// ```text
+/// 10 20
+/// |==========|
+/// ```
+///
+/// ...or a "wraparound" interval (specified by xmin = 20 and xmax = 10). This
+/// interval is the union of the two regular intervals (-Inf, 10] and (20,
Inf).
+/// Infinity was chosen rather than any particular value to ensure that Parquet
+/// implementations did not have to consider the value of the coordinate
+/// reference system when comparing intervals.
+///
+/// ```text
+/// 10 20
+/// <==========| |============>
+/// ```
+///
+/// In general, one should use [Interval] unless specifically working with
+/// wraparound, as the contingency of wraparound incurs overhead (particularly
+/// in a loop). This trait is mostly used to simplify testing and unify
+/// documentation for the two concrete implementations.
+pub trait IntervalTrait: std::fmt::Debug + PartialEq {
+ /// Create an interval from lo and hi values
+ fn new(lo: f64, hi: f64) -> Self;
+
+ /// Create an empty interval that intersects nothing (except the full
interval)
+ fn empty() -> Self;
+
+ /// Create the full interval (that intersects everything, including the
empty interval)
+ fn full() -> Self;
+
+ /// Lower bound
+ ///
+ /// If `is_wraparound()` returns false, this is also the minimum value.
When empty,
+ /// this value is Infinity; when full, this value is -Infinity.
+ fn lo(&self) -> f64;
+
+ /// Upper bound
+ ///
+ /// If `is_wraparound()` returns false, this is also the maximum value.
When empty,
+ /// this value is -Infinity; when full, this value is Infinity.
+ fn hi(&self) -> f64;
+
+ /// Check for wraparound
+ ///
+ /// If `is_wraparound()` returns false, this interval represents the
values that are
+ /// between lo and hi. If `is_wraparound()` returns true, this interval
represents
+ /// the values that are *not* between lo and hi.
+ ///
+ /// It is recommended to work directly with an [Interval] where this is
guaranteed to
+ /// return false unless wraparound support is specifically required.
+ fn is_wraparound(&self) -> bool;
+
+ /// Check for potential intersection with a value
+ ///
+ /// Note that intervals always contain their endpoints (for both the
wraparound and
+ /// non-wraparound case).
+ fn intersects_value(&self, value: f64) -> bool;
+
+ /// Check for potential intersection with an interval
+ ///
+ /// Note that intervals always contain their endpoints (for both the
wraparound and
+ /// non-wraparound case).
+ ///
+ /// This method accepts Self for performance reasons to prevent
unnecessary checking of
+ /// `is_wraparound()` when not required for an implementation.
+ fn intersects_interval(&self, other: &Self) -> bool;
+
+ /// Check for potential containment of an interval
+ ///
+ /// Note that intervals always contain their endpoints (for both the
wraparound and
+ /// non-wraparound case).
+ ///
+ /// This method accepts Self for performance reasons to prevent
unnecessary checking of
+ /// `is_wraparound()` when not required for an implementation.
+ fn contains_interval(&self, other: &Self) -> bool;
+
+ /// The width of the interval
+ ///
+ /// For the non-wraparound case, this is the distance between lo and hi.
For the wraparound
+ /// case, this is infinity.
+ fn width(&self) -> f64;
+
+ /// The midpoint of the interval
+ ///
+ /// For the non-wraparound case, this is the point exactly between lo and
hi. For the wraparound
+ /// case, this is arbitrarily chosen as infinity (to preserve the property
that intervals intersect
+ /// their midpoint).
+ fn mid(&self) -> f64;
+
+ /// True if this interval is empty (i.e. intersects no values)
+ fn is_empty(&self) -> bool;
+
+ /// Compute a new interval that is the union of both
+ ///
+ /// When accumulating intervals in a loop, use [Interval::update_interval].
+ fn merge_interval(&self, other: &Self) -> Self;
+
+ /// Compute a new interval that is the union of both
+ ///
+ /// When accumulating intervals in a loop, use [Interval::update_value].
+ fn merge_value(&self, other: f64) -> Self;
+
+ /// Expand this interval by a given distance
+ ///
+ /// Returns a new interval where both endpoints are moved outward by the
given distance.
+ /// For regular intervals, this expands both lo and hi by the distance.
+ /// For wraparound intervals, this may result in the full interval if
expansion is large enough.
+ fn expand_by(&self, distance: f64) -> Self;
+}
+
+/// 1D Interval that never wraps around
+///
+/// Represents a minimum and maximum value without wraparound logic (see
[WraparoundInterval]
+/// for a wraparound implementation).
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct Interval {
+ /// Lower bound
+ lo: f64,
+
+ /// Upper bound
+ hi: f64,
+}
+
+impl Interval {
+ /// Expand this interval to the union of self and other in place
+ ///
+ /// Note that NaN values are ignored when updating bounds.
+ pub fn update_interval(&mut self, other: &Self) {
+ self.lo = self.lo.min(other.lo);
+ self.hi = self.hi.max(other.hi);
+ }
+
+ /// Expand this interval to the union of self and other in place
+ ///
+ /// Note that NaN values are ignored when updating bounds.
+ pub fn update_value(&mut self, other: f64) {
+ self.lo = self.lo.min(other);
+ self.hi = self.hi.max(other);
+ }
+}
+
+impl From<(f64, f64)> for Interval {
+ fn from(value: (f64, f64)) -> Self {
+ Interval::new(value.0, value.1)
+ }
+}
+
+impl From<(i32, i32)> for Interval {
+ fn from(value: (i32, i32)) -> Self {
+ Interval::new(value.0 as f64, value.1 as f64)
+ }
+}
+
+impl TryFrom<WraparoundInterval> for Interval {
+ type Error = ArrowError;
+
+ fn try_from(value: WraparoundInterval) -> Result<Self, Self::Error> {
+ if value.is_wraparound() {
+ Err(ArrowError::InvalidArgumentError(format!(
+ "Can't convert wraparound interval {value:?} to Interval"
+ )))
+ } else {
+ Ok(Interval::new(value.lo(), value.hi()))
+ }
+ }
+}
+
+impl IntervalTrait for Interval {
+ fn new(lo: f64, hi: f64) -> Self {
+ Self { lo, hi }
+ }
+
+ fn empty() -> Self {
+ Self {
+ lo: f64::INFINITY,
+ hi: -f64::INFINITY,
+ }
+ }
+
+ fn full() -> Self {
+ Self {
+ lo: -f64::INFINITY,
+ hi: f64::INFINITY,
+ }
+ }
+
+ fn lo(&self) -> f64 {
+ self.lo
+ }
+
+ fn hi(&self) -> f64 {
+ self.hi
+ }
+
+ fn is_wraparound(&self) -> bool {
+ false
+ }
+
+ fn intersects_value(&self, value: f64) -> bool {
+ value >= self.lo && value <= self.hi
+ }
+
+ fn intersects_interval(&self, other: &Self) -> bool {
+ self.lo <= other.hi && other.lo <= self.hi
+ }
+
+ fn contains_interval(&self, other: &Self) -> bool {
+ self.lo <= other.lo && self.hi >= other.hi
+ }
+
+ fn width(&self) -> f64 {
+ self.hi - self.lo
+ }
+
+ fn mid(&self) -> f64 {
+ self.lo + self.width() / 2.0
+ }
+
+ fn is_empty(&self) -> bool {
+ self.width() == -f64::INFINITY
+ }
+
+ fn merge_interval(&self, other: &Self) -> Self {
+ let mut out = *self;
+ out.update_interval(other);
+ out
+ }
+
+ fn merge_value(&self, other: f64) -> Self {
+ let mut out = *self;
+ out.update_value(other);
+ out
+ }
+
+ fn expand_by(&self, distance: f64) -> Self {
+ if self.is_empty() || distance.is_nan() || distance < 0.0 {
+ return *self;
+ }
+
+ Self::new(self.lo - distance, self.hi + distance)
+ }
+}
+
+/// 1D Interval that may or may not wrap around
+///
+/// Concrete implementation that handles both the wraparound and regular
+/// interval case. This is separated from the [Interval] because the
+/// [Interval] is faster and most operations will use it directly (invoking
+/// this struct when it is specifically required).
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct WraparoundInterval {
+ inner: Interval,
+}
+
+impl WraparoundInterval {
+ /// Splits this interval into exactly two non-wraparound intervals
+ ///
+ /// If this interval does not wrap around, one of these intervals will
+ /// be empty.
+ fn split(&self) -> (Interval, Interval) {
+ if self.is_wraparound() {
+ (
+ Interval {
+ lo: -f64::INFINITY,
+ hi: self.inner.hi,
+ },
+ Interval {
+ lo: self.inner.lo,
+ hi: f64::INFINITY,
+ },
+ )
+ } else {
+ (self.inner, Interval::empty())
+ }
+ }
+}
+
+impl From<(f64, f64)> for WraparoundInterval {
+ fn from(value: (f64, f64)) -> Self {
+ WraparoundInterval::new(value.0, value.1)
+ }
+}
+
+impl From<(i32, i32)> for WraparoundInterval {
+ fn from(value: (i32, i32)) -> Self {
+ WraparoundInterval::new(value.0 as f64, value.1 as f64)
+ }
+}
+
+impl From<Interval> for WraparoundInterval {
+ fn from(value: Interval) -> Self {
+ WraparoundInterval::new(value.lo(), value.hi())
+ }
+}
+
+impl IntervalTrait for WraparoundInterval {
+ fn new(lo: f64, hi: f64) -> Self {
+ Self {
+ inner: Interval::new(lo, hi),
+ }
+ }
+
+ fn empty() -> Self {
+ Self {
+ inner: Interval::empty(),
+ }
+ }
+
+ fn full() -> Self {
+ Self {
+ inner: Interval::full(),
+ }
+ }
+
+ fn lo(&self) -> f64 {
+ self.inner.lo
+ }
+
+ fn hi(&self) -> f64 {
+ self.inner.hi
+ }
+
+ fn is_wraparound(&self) -> bool {
+ !self.is_empty() && self.inner.width() < 0.0
+ }
+
+ fn intersects_value(&self, value: f64) -> bool {
+ let (left, right) = self.split();
+ left.intersects_value(value) || right.intersects_value(value)
+ }
+
+ fn intersects_interval(&self, other: &Self) -> bool {
+ let (left, right) = self.split();
+ let (other_left, other_right) = other.split();
+ left.intersects_interval(&other_left)
+ || left.intersects_interval(&other_right)
+ || right.intersects_interval(&other_left)
+ || right.intersects_interval(&other_right)
+ }
+
+ fn contains_interval(&self, other: &Self) -> bool {
+ let (left, right) = self.split();
+ let (other_left, other_right) = other.split();
+ left.contains_interval(&other_left) &&
right.contains_interval(&other_right)
+ }
+
+ fn width(&self) -> f64 {
+ if self.is_wraparound() {
+ f64::INFINITY
+ } else {
+ self.inner.width()
+ }
+ }
+
+ fn mid(&self) -> f64 {
+ if self.is_wraparound() {
+ f64::INFINITY
+ } else {
+ self.inner.mid()
+ }
+ }
+
+ fn is_empty(&self) -> bool {
+ self.inner.is_empty()
+ }
+
+ fn merge_interval(&self, other: &Self) -> Self {
+ if self.is_empty() {
+ return *other;
+ }
+
+ if other.is_empty() {
+ return *self;
+ }
+
+ let (wraparound, not_wraparound) = match (self.is_wraparound(),
other.is_wraparound()) {
+ // Handle wraparound/not wraparound below
+ (true, false) => (self, other),
+ (false, true) => (other, self),
+ // Both are wraparound: Merge the two left intervals, then merge
the two right intervals
+ // and check if we need the full interval
+ (true, true) => {
+ let (left, right) = self.split();
+ let (other_left, other_right) = other.split();
+
+ let new_left = left.merge_interval(&other_left);
+ let new_right = right.merge_interval(&other_right);
+
+ // If the left and right intervals intersect each other, we
need the full interval
+ if new_left.intersects_interval(&new_right) {
+ return WraparoundInterval::full();
+ } else {
+ return WraparoundInterval::new(new_right.lo(),
new_left.hi());
+ }
+ }
+ // Neither are wraparound: just merge the inner intervals
+ (false, false) => {
+ return Self {
+ inner: self.inner.merge_interval(&other.inner),
+ };
+ }
+ };
+
+ let (left, right) = wraparound.split();
+ let distance_not_wraparound_left = (not_wraparound.mid() -
left.hi()).abs();
+ let distance_not_wraparound_right = (not_wraparound.mid() -
right.lo()).abs();
+ let (new_left, new_right) = if distance_not_wraparound_left <
distance_not_wraparound_right
+ {
+ (left.merge_interval(¬_wraparound.inner), right)
+ } else {
+ (left, right.merge_interval(¬_wraparound.inner))
+ };
+
+ // If the left and right intervals intersect each other, we need the
full interval
+ if new_left.intersects_interval(&new_right) {
+ WraparoundInterval::full()
+ } else {
+ WraparoundInterval::new(new_right.lo(), new_left.hi())
+ }
+ }
+
+ fn merge_value(&self, value: f64) -> Self {
+ if self.intersects_value(value) || value.is_nan() {
+ return *self;
+ }
+
+ if !self.is_wraparound() {
+ return Self {
+ inner: self.inner.merge_value(value),
+ };
+ }
+
+ // Move only one of the endpoints
+ let distance_left = value - self.inner.hi;
+ let distance_right = self.inner.lo - value;
+ debug_assert!(distance_left > 0.0);
+ debug_assert!(distance_right > 0.0);
+ if distance_left < distance_right {
+ Self {
+ inner: Interval {
+ lo: self.inner.lo,
+ hi: value,
+ },
+ }
+ } else {
+ Self {
+ inner: Interval {
+ lo: value,
+ hi: self.inner.hi,
+ },
+ }
+ }
+ }
+
+ fn expand_by(&self, distance: f64) -> Self {
+ if self.is_empty() || distance.is_nan() || distance < 0.0 {
+ return *self;
+ }
+
+ if !self.is_wraparound() {
+ // For non-wraparound, just expand the inner interval
+ return Self {
+ inner: self.inner.expand_by(distance),
+ };
+ }
+
+ // For wraparound intervals, expanding means including more values
+ // Wraparound interval (a, b) where a > b excludes the region (b, a)
+ // To expand by distance d, we shrink the excluded region from (b, a)
to (b+d, a-d)
+ // This means the new wraparound interval becomes (a-d, b+d)
+ let excluded_lo = self.inner.hi + distance; // b + d
+ let excluded_hi = self.inner.lo - distance; // a - d
+
+ // If the excluded region disappears (excluded_lo >= excluded_hi), we
get the full interval
+ if excluded_lo >= excluded_hi {
+ return Self::full();
+ }
+
+ // The new wraparound interval excludes (excluded_lo, excluded_hi)
+ // So the interval itself is (excluded_hi, excluded_lo)
+ Self::new(excluded_hi, excluded_lo)
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use core::f64;
+
+ use super::*;
+
+ fn test_empty<T: IntervalTrait>(empty: T) {
+ // Equals itself
+ #[allow(clippy::eq_op)]
+ {
+ assert_eq!(empty, empty);
+ }
+
+ // Empty intersects no values
+ assert!(!empty.intersects_value(0.0));
+ assert!(!empty.intersects_value(f64::INFINITY));
+ assert!(!empty.intersects_value(-f64::INFINITY));
+ assert!(!empty.intersects_value(f64::NAN));
+
+ // Empty intersects no intervals
+ assert!(!empty.intersects_interval(&T::new(-10.0, 10.0)));
+ assert!(!empty.intersects_interval(&T::empty()));
+
+ // ...except the full interval
+ assert!(empty.intersects_interval(&T::full()));
+
+ // Empty contains no intervals
+ assert!(!empty.contains_interval(&T::new(-10.0, 10.0)));
+ assert!(!empty.contains_interval(&T::full()));
+
+ // ...except empty itself (empty set is subset of itself)
+ assert!(empty.contains_interval(&T::empty()));
+
+ // Merging NaN is still empty
+ assert_eq!(empty.merge_value(f64::NAN), empty);
+
+ // Merging an empty interval results in an empty interval
+ assert_eq!(empty.merge_interval(&empty), empty);
+
+ // Merging a value results in a interval with equal lo/hi
+ assert_eq!(empty.merge_value(12.0), T::new(12.0, 12.0));
+
+ // Merging a non-empty interval results in the other interval
+ assert_eq!(
+ empty.merge_interval(&T::new(10.0, 20.0)),
+ T::new(10.0, 20.0)
+ );
+
+ // Expanding empty interval keeps it empty
+ assert_eq!(empty.expand_by(5.0), empty);
+ assert_eq!(empty.expand_by(0.0), empty);
+ assert_eq!(empty.expand_by(-1.0), empty);
+ assert_eq!(empty.expand_by(f64::NAN), empty);
+ }
+
+ #[test]
+ fn interval_empty() {
+ let empty = Interval::empty();
+ test_empty(empty);
+ }
+
+ #[test]
+ fn wraparound_interval_empty() {
+ let empty = WraparoundInterval::empty();
+
+ // Should pass all the regular interval tests
+ test_empty(empty);
+
+ // Empty shouldn't be treated as wraparound
+ assert!(!empty.is_wraparound());
+
+ // When merging an interval where the other one is a
+ // wraparound, we should get the other interval
+ assert_eq!(
+ empty.merge_interval(&WraparoundInterval::new(20.0, 10.0)),
+ WraparoundInterval::new(20.0, 10.0)
+ );
+ }
+
+ fn test_finite<T: IntervalTrait>(finite: T) {
+ // Check accessors
+ assert_eq!(finite.lo(), 10.0);
+ assert_eq!(finite.hi(), 20.0);
+ assert_eq!(finite.mid(), 15.0);
+ assert_eq!(finite.width(), 10.0);
+ assert!(!finite.is_wraparound());
+ assert!(!finite.is_empty());
+
+ // Intersects endpoints and midpoint
+ assert!(finite.intersects_value(10.0));
+ assert!(finite.intersects_value(15.0));
+ assert!(finite.intersects_value(20.0));
+
+ // Doesn't intersect infinite values, NaN, or finite values outside
+ // the range
+ assert!(!finite.intersects_value(0.0));
+ assert!(!finite.intersects_value(f64::INFINITY));
+ assert!(!finite.intersects_value(-f64::INFINITY));
+ assert!(!finite.intersects_value(f64::NAN));
+
+ // Intervals that intersect
+ assert!(finite.intersects_interval(&T::new(14.0, 16.0)));
+ assert!(finite.intersects_interval(&T::new(5.0, 15.0)));
+ assert!(finite.intersects_interval(&T::new(15.0, 25.0)));
+ assert!(finite.intersects_interval(&T::new(5.0, 25.0)));
+ assert!(finite.intersects_interval(&T::full()));
+
+ // Barely touching ones count
+ assert!(finite.intersects_interval(&T::new(5.0, 10.0)));
+ assert!(finite.intersects_interval(&T::new(20.0, 25.0)));
+
+ // Intervals that don't intersect
+ assert!(!finite.intersects_interval(&T::new(0.0, 5.0)));
+ assert!(!finite.intersects_interval(&T::new(25.0, 30.0)));
+ assert!(!finite.intersects_interval(&T::empty()));
+
+ // Intervals that are contained
+ assert!(finite.contains_interval(&T::new(14.0, 16.0)));
+ assert!(finite.contains_interval(&T::new(10.0, 15.0)));
+ assert!(finite.contains_interval(&T::new(15.0, 20.0)));
+ assert!(finite.contains_interval(&T::new(10.0, 20.0))); // itself
+ assert!(finite.contains_interval(&T::empty()));
+
+ // Intervals that are not contained
+ assert!(!finite.contains_interval(&T::new(5.0, 15.0))); // extends
below
+ assert!(!finite.contains_interval(&T::new(15.0, 25.0))); // extends
above
+ assert!(!finite.contains_interval(&T::new(5.0, 25.0))); // extends
both ways
+ assert!(!finite.contains_interval(&T::new(0.0, 5.0))); // completely
below
+ assert!(!finite.contains_interval(&T::new(25.0, 30.0))); // completely
above
+ assert!(!finite.contains_interval(&T::full())); // full interval is
larger
+
+ // Merging NaN
+ assert_eq!(finite.merge_value(f64::NAN), finite);
+
+ // Merging Infinities
+ assert_eq!(
+ finite.merge_value(f64::INFINITY),
+ T::new(finite.lo(), f64::INFINITY)
+ );
+ assert_eq!(
+ finite.merge_value(-f64::INFINITY),
+ T::new(-f64::INFINITY, finite.hi())
+ );
+
+ // Merging a value within the interval
+ assert_eq!(finite.merge_value(15.0), finite);
+
+ // Merging a value above
+ assert_eq!(finite.merge_value(25.0), T::new(10.0, 25.0));
+
+ // Merging a value below
+ assert_eq!(finite.merge_value(5.0), T::new(5.0, 20.0));
+
+ // Merging an empty interval
+ assert_eq!(finite.merge_interval(&T::empty()), finite);
+
+ // Merging an interval with itself
+ assert_eq!(finite.merge_interval(&finite), finite);
+
+ // Merging an interval with the full interval
+ assert_eq!(finite.merge_interval(&T::full()), T::full());
+
+ // Merging an interval within the interval
+ assert_eq!(finite.merge_interval(&T::new(14.0, 16.0)), finite);
+
+ // Merging a partially overlapping interval below
+ assert_eq!(finite.merge_interval(&T::new(5.0, 15.0)), T::new(5.0,
20.0));
+
+ // Merging a partially overlapping interval above
+ assert_eq!(
+ finite.merge_interval(&T::new(15.0, 25.0)),
+ T::new(10.0, 25.0)
+ );
+
+ // Merging a disjoint interval below
+ assert_eq!(finite.merge_interval(&T::new(0.0, 5.0)), T::new(0.0,
20.0));
+
+ // Merging a disjoint interval above
+ assert_eq!(
+ finite.merge_interval(&T::new(25.0, 30.0)),
+ T::new(10.0, 30.0)
+ );
+
+ // Expanding by positive distance
+ assert_eq!(finite.expand_by(2.0), T::new(8.0, 22.0));
+ assert_eq!(finite.expand_by(5.0), T::new(5.0, 25.0));
+
+ // Expanding by zero does nothing
+ assert_eq!(finite.expand_by(0.0), finite);
+
+ // Expanding by negative distance does nothing
+ assert_eq!(finite.expand_by(-1.0), finite);
+
+ // Expanding by NaN does nothing
+ assert_eq!(finite.expand_by(f64::NAN), finite);
+ }
+
+ #[test]
+ fn interval_finite() {
+ let finite = Interval::new(10.0, 20.0);
+ test_finite(finite);
+ }
+
+ #[test]
+ fn wraparound_interval_finite() {
+ let finite = WraparoundInterval::new(10.0, 20.0);
+ test_finite(finite);
+
+ // Convert to an Interval
+ let interval: Interval = finite.try_into().unwrap();
+ assert_eq!(interval, Interval::new(10.0, 20.0));
+ }
+
+ #[test]
+ fn wraparound_interval_actually_wraparound_accessors() {
+ // Everything *except* the interval (10, 20)
+ let wraparound = WraparoundInterval::new(20.0, 10.0);
+ assert!(wraparound.is_wraparound());
+ assert!(!wraparound.is_empty());
+ assert_eq!(wraparound.mid(), f64::INFINITY);
+ }
+
+ #[test]
+ fn wraparound_interval_actually_wraparound_intersects_value() {
+ // Everything *except* the interval (10, 20)
+ let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+ // Intersects endpoints but not a point between them
+ assert!(wraparound.intersects_value(10.0));
+ assert!(wraparound.intersects_value(20.0));
+ assert!(!wraparound.intersects_value(15.0));
+
+ // Intersects positive and negative infinity
+ assert!(wraparound.intersects_value(f64::INFINITY));
+ assert!(wraparound.intersects_value(-f64::INFINITY));
+
+ // ...but not NaN
+ assert!(!wraparound.intersects_value(f64::NAN));
+ }
+
+ #[test]
+ fn wraparound_interval_actually_wraparound_intersects_interval() {
+ // Everything *except* the interval (10, 20)
+ let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+ // Intersects itself
+ assert!(wraparound.intersects_interval(&wraparound));
+
+ // Intersects the full interval
+ assert!(wraparound.intersects_interval(&WraparoundInterval::full()));
+
+ // Interval completely between endpoints doesn't intersect
+ assert!(!wraparound.intersects_interval(&WraparoundInterval::new(14.0,
16.0)));
+ // ...unless it's also wraparound
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(16.0,
14.0)));
+
+ // Intervals overlapping endpoints intersect whether the are or aren't
wraparound
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(5.0,
15.0)));
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(15.0,
5.0)));
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(15.0,
25.0)));
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(25.0,
15.0)));
+
+ // Barely touching ones still intersect whether the are or aren't
wraparound
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(5.0,
10.0)));
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(10.0,
5.0)));
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(20.0,
25.0)));
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(25.0,
20.0)));
+
+ // Intervals completely above and below endpoints do intersect whether
they
+ // are or aren't wraparound
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(0.0,
5.0)));
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(5.0,
0.0)));
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(25.0,
30.0)));
+ assert!(wraparound.intersects_interval(&WraparoundInterval::new(30.0,
25.0)));
+ }
+
+ #[test]
+ fn wraparound_interval_actually_wraparound_contains_interval() {
+ // Everything *except* the interval (10, 20)
+ let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+ // Contains itself
+ assert!(wraparound.contains_interval(&wraparound));
+
+ // Empty is contained by everything
+ assert!(wraparound.contains_interval(&WraparoundInterval::empty()));
+
+ // Does not contain the full interval
+ assert!(!wraparound.contains_interval(&WraparoundInterval::full()));
+
+ // Regular interval completely between endpoints is not contained
+ assert!(!wraparound.contains_interval(&WraparoundInterval::new(14.0,
16.0)));
+
+ // Wraparound intervals that exclude more (narrower included regions)
are contained
+ assert!(wraparound.contains_interval(&WraparoundInterval::new(22.0,
8.0))); // excludes (8,22) which is larger than (10,20)
+ assert!(!wraparound.contains_interval(&WraparoundInterval::new(18.0,
12.0))); // excludes (12,18) which is smaller than (10,20)
+
+ // Regular intervals don't work the same way due to the split logic
+ // For a regular interval (a, b), split gives (left=(a,b), right=empty)
+ // For wraparound to contain it, we need both parts to be contained
+ // This means (-inf, 10] must contain (a,b) AND [20, inf) must contain
empty
+ // The second is always true, but the first requires b <= 10
+ assert!(wraparound.contains_interval(&WraparoundInterval::new(0.0,
5.0))); // completely within left part
+ assert!(wraparound.contains_interval(&WraparoundInterval::new(-5.0,
10.0))); // fits in left part
+ assert!(!wraparound.contains_interval(&WraparoundInterval::new(25.0,
30.0))); // doesn't fit in left part
+ assert!(!wraparound.contains_interval(&WraparoundInterval::new(20.0,
25.0))); // doesn't fit in left part
+
+ // Regular intervals that overlap the excluded zone are not contained
+ assert!(!wraparound.contains_interval(&WraparoundInterval::new(5.0,
15.0))); // overlaps excluded zone
+ assert!(!wraparound.contains_interval(&WraparoundInterval::new(15.0,
25.0))); // overlaps excluded zone
+
+ // Wraparound intervals that exclude less (wider included regions) are
not contained
+ assert!(!wraparound.contains_interval(&WraparoundInterval::new(15.0,
5.0))); // excludes (5,15) which is smaller
+ assert!(!wraparound.contains_interval(&WraparoundInterval::new(25.0,
15.0)));
+ // excludes (15,25) which is smaller
+ }
+
+ #[test]
+ fn wraparound_interval_actually_wraparound_merge_value() {
+ // Everything *except* the interval (10, 20)
+ let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+ // Merging NaN
+ assert_eq!(wraparound.merge_value(f64::NAN), wraparound);
+
+ // Merging a value closer to the left endpoint should move
+ // that endpoint
+ assert_eq!(
+ wraparound.merge_value(12.0),
+ WraparoundInterval::new(20.0, 12.0)
+ );
+
+ // Merging a value closer to the right endpoint should move
+ // that endpoint
+ assert_eq!(
+ wraparound.merge_value(18.0),
+ WraparoundInterval::new(18.0, 10.0)
+ );
+
+ // Merging a value that is already intersecting shouldn't change the
interval
+ assert_eq!(wraparound.merge_value(5.0), wraparound);
+ assert_eq!(wraparound.merge_value(10.0), wraparound);
+ assert_eq!(wraparound.merge_value(20.0), wraparound);
+ assert_eq!(wraparound.merge_value(25.0), wraparound);
+ }
+
+ #[test]
+ fn wraparound_interval_actually_wraparound_merge_interval() {
+ // Everything *except* the interval (10, 20)
+ let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+ // Merging an empty interval
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::empty()),
+ wraparound
+ );
+
+ // Merging an interval with itself
+ assert_eq!(wraparound.merge_interval(&wraparound), wraparound);
+
+ // Merging a wraparound interval with a "larger" wraparound interval
+ // 10 20
+ // <==========| |============>
+ // <==============| |================>
+ // 14 16
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::new(16.0, 14.0)),
+ WraparoundInterval::new(16.0, 14.0)
+ );
+
+ // Merging a wraparound interval with a "smaller" wraparound interval
+ // 10 20
+ // <==========| |============>
+ // <=====| |=======>
+ // 5 25
+ // <==========| |============>
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::new(25.0, 5.0)),
+ wraparound
+ );
+
+ // Merge with partially intersecting wraparounds
+ // 10 20
+ // <==========| |============>
+ // <=====| |=================>
+ // 5 15
+ // <==========| |=================>
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::new(15.0, 5.0)),
+ WraparoundInterval::new(15.0, 10.0)
+ );
+
+ // 10 20
+ // <==========| |============>
+ // <================| |======>
+ // 15 25
+ // <================| |============>
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::new(25.0, 15.0)),
+ WraparoundInterval::new(20.0, 15.0)
+ );
+
+ // Merge wraparound with wraparound whose union is the full interval
+ // 10 20
+ // <==========| |=========================>
+ // <=============================| |======>
+ // 25 30
+ // <===============================================>
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::new(30.0, 25.0)),
+ WraparoundInterval::full()
+ );
+
+ // 10 20
+ // <===================| |================>
+ // <==| |=================================>
+ // 0 5
+ // <===============================================>
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::new(5.0, 0.0)),
+ WraparoundInterval::full()
+ );
+
+ // Merge wraparound with a regular interval completely contained by
the original
+ // 10 20
+ // <=================| |==================>
+ // |=========|
+ // 25 30
+ // <=================| |==================>
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::new(25.0, 30.0)),
+ wraparound
+ );
+
+ // 10 20
+ // <=================| |==================>
+ // |=========|
+ // 0 5
+ // <=================| |==================>
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::new(0.0, 5.0)),
+ wraparound
+ );
+
+ // Merge wraparound with a partially intersecting regular interval that
+ // should extend the left side
+ // 10 20
+ // <=================| |==================>
+ // |=========|
+ // 5 15
+ // <======================| |==================>
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::new(5.0, 15.0)),
+ WraparoundInterval::new(20.0, 15.0)
+ );
+
+ // Merge wraparound with a partially intersecting regular interval that
+ // should extend the right side
+ // 10 20
+ // <=================| |==================>
+ // |=========|
+ // 15 25
+ // <=================| |==================>
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::new(15.0, 25.0)),
+ WraparoundInterval::new(15.0, 10.0)
+ );
+
+ // Merge wraparound with a disjoint regular interval that should
extend the left side
+ // 10 20
+ // <=================| |==================>
+ // |==|
+ // 12 15
+ // <======================| |==================>
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::new(12.0, 15.0)),
+ WraparoundInterval::new(20.0, 15.0)
+ );
+
+ // Merge wraparound with a disjoint regular interval that should
extend the right side
+ // 10 20
+ // <=================| |==================>
+ // |==|
+ // 15 18
+ // <=================| |==================>
+ assert_eq!(
+ wraparound.merge_interval(&WraparoundInterval::new(15.0, 18.0)),
+ WraparoundInterval::new(15.0, 10.0)
+ );
+ }
+
+ #[test]
+ fn wraparound_interval_actually_wraparound_expand_by() {
+ // Everything *except* the interval (10, 20)
+ let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+ // Expanding by a small amount shrinks the excluded region
+ // Original excludes (10, 20), expanding by 2 should exclude (12, 18)
+ // So the new interval should be (18, 12) = everything except (12, 18)
+ assert_eq!(
+ wraparound.expand_by(2.0),
+ WraparoundInterval::new(18.0, 12.0)
+ ); // now excludes (12, 18)
+
+ // Expanding by 4 should exclude (14, 16)
+ assert_eq!(
+ wraparound.expand_by(4.0),
+ WraparoundInterval::new(16.0, 14.0)
+ ); // now excludes (14, 16)
+
+ // Expanding by 5.0 should exactly eliminate the excluded region
+ // excluded region (10, 20) shrinks to (15, 15) which is empty
+ assert_eq!(wraparound.expand_by(5.0), WraparoundInterval::full()); //
excluded region disappears
+
+ // Any expansion greater than 5.0 should also give full interval
+ assert_eq!(wraparound.expand_by(6.0), WraparoundInterval::full());
+
+ assert_eq!(wraparound.expand_by(100.0), WraparoundInterval::full());
+
+ // Expanding by zero does nothing
+ assert_eq!(wraparound.expand_by(0.0), wraparound);
+
+ // Expanding by negative distance does nothing
+ assert_eq!(wraparound.expand_by(-1.0), wraparound);
+
+ // Expanding by NaN does nothing
+ assert_eq!(wraparound.expand_by(f64::NAN), wraparound);
+
+ // Test a finite (non-wraparound) wraparound interval
+ let non_wraparound = WraparoundInterval::new(10.0, 20.0);
+ assert!(!non_wraparound.is_wraparound());
+ assert_eq!(
+ non_wraparound.expand_by(2.0),
+ WraparoundInterval::new(8.0, 22.0)
+ );
+
+ // Test another wraparound case - excludes (5, 15) with width 10
+ let wraparound2 = WraparoundInterval::new(15.0, 5.0);
+ // Expanding by 3 should shrink excluded region from (5, 15) to (8, 12)
+ assert_eq!(
+ wraparound2.expand_by(3.0),
+ WraparoundInterval::new(12.0, 8.0)
+ );
+
+ // Expanding by 5 should make excluded region disappear: (5+5, 15-5) =
(10, 10)
+ assert_eq!(wraparound2.expand_by(5.0), WraparoundInterval::full());
+ }
+
+ #[test]
+ fn wraparound_interval_actually_wraparound_convert() {
+ // Everything *except* the interval (10, 20)
+ let wraparound = WraparoundInterval::new(20.0, 10.0);
+
+ // Can't convert a wraparound interval that actually wraps around to
an Interval
+ let err = Interval::try_from(wraparound).unwrap_err();
+ assert!(err
+ .to_string()
+ .contains("Can't convert wraparound interval"));
+ }
+}
diff --git a/parquet-geospatial/src/lib.rs b/parquet-geospatial/src/lib.rs
index f37b9b866c..1929c00fd8 100644
--- a/parquet-geospatial/src/lib.rs
+++ b/parquet-geospatial/src/lib.rs
@@ -26,3 +26,6 @@
//! If you are interested in helping, you can find more information on the
GitHub [Geometry issue]
//!
//! [Geometry issue]: https://github.com/apache/arrow-rs/issues/8373
+
+pub mod bounding;
+pub mod interval;