This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new a41ebdd1954 branch-3.0: [fix](func) Fix precision loss in
ST_GeometryFromWKB coordinate parsing #46661 (#47264)
a41ebdd1954 is described below
commit a41ebdd195452513f36b4d2bb0ce30e85eff4dae
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Mar 20 14:38:05 2025 +0800
branch-3.0: [fix](func) Fix precision loss in ST_GeometryFromWKB coordinate
parsing #46661 (#47264)
Cherry-picked from #46661
Co-authored-by: lw112 <[email protected]>
---
be/src/geo/geo_types.cpp | 2 +-
be/src/geo/wkb_parse.cpp | 153 ++++++++++++++-------
be/src/geo/wkb_parse.h | 14 +-
.../data/correctness_p0/test_select_constant.out | Bin 211 -> 214 bytes
.../data/nereids_function_p0/scalar_function/S.out | Bin 53552 -> 53624 bytes
.../data/nereids_p0/test_select_constant.out | Bin 186 -> 189 bytes
.../spatial_functions/test_gis_function.out | Bin 1438 -> 1604 bytes
.../data/query_p0/test_select_constant.out | Bin 186 -> 189 bytes
.../spatial_functions/test_gis_function.groovy | 6 +
9 files changed, 126 insertions(+), 49 deletions(-)
diff --git a/be/src/geo/geo_types.cpp b/be/src/geo/geo_types.cpp
index bee6f69ba8e..dc27595da3b 100644
--- a/be/src/geo/geo_types.cpp
+++ b/be/src/geo/geo_types.cpp
@@ -57,7 +57,7 @@ GeoCircle::~GeoCircle() = default;
void print_s2point(std::ostream& os, const S2Point& point) {
S2LatLng coord(point);
- os << std::setprecision(12) << coord.lng().degrees() << " " <<
coord.lat().degrees();
+ os << std::setprecision(15) << coord.lng().degrees() << " " <<
coord.lat().degrees();
}
static inline bool is_valid_lng_lat(double lng, double lat) {
diff --git a/be/src/geo/wkb_parse.cpp b/be/src/geo/wkb_parse.cpp
index e24328d7564..7b345929fc0 100644
--- a/be/src/geo/wkb_parse.cpp
+++ b/be/src/geo/wkb_parse.cpp
@@ -122,108 +122,169 @@ WkbParseContext* WkbParse::read(std::istream& is,
WkbParseContext* ctx) {
auto size = is.tellg();
is.seekg(0, std::ios::beg);
- std::vector<unsigned char> buf(static_cast<size_t>(size));
- is.read(reinterpret_cast<char*>(buf.data()),
static_cast<std::streamsize>(size));
-
- ctx->dis = ByteOrderDataInStream(buf.data(), buf.size()); // will default
to machine endian
+ // Check if size is valid
+ if (size <= 0) {
+ ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
+ return ctx;
+ }
- ctx->shape = readGeometry(ctx).release();
+ std::vector<unsigned char> buf(static_cast<size_t>(size));
+ if (!is.read(reinterpret_cast<char*>(buf.data()),
static_cast<std::streamsize>(size))) {
+ ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
+ return ctx;
+ }
- if (!ctx->shape) {
+ // Ensure we have at least one byte for byte order
+ if (buf.empty()) {
ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
+ return ctx;
}
- return ctx;
-}
-std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) {
- // determine byte order
- unsigned char byteOrder = ctx->dis.readByte();
+ // First read the byte order using machine endian
+ auto byteOrder = buf[0];
- // default is machine endian
+ // Create ByteOrderDataInStream with the correct byte order
if (byteOrder == byteOrder::wkbNDR) {
+ ctx->dis = ByteOrderDataInStream(buf.data(), buf.size());
ctx->dis.setOrder(ByteOrderValues::ENDIAN_LITTLE);
} else if (byteOrder == byteOrder::wkbXDR) {
+ ctx->dis = ByteOrderDataInStream(buf.data(), buf.size());
ctx->dis.setOrder(ByteOrderValues::ENDIAN_BIG);
+ } else {
+ ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
+ return ctx;
+ }
+
+ std::unique_ptr<GeoShape> shape = readGeometry(ctx);
+ if (!shape) {
+ ctx->parse_status = GEO_PARSE_WKB_SYNTAX_ERROR;
+ return ctx;
}
- uint32_t typeInt = ctx->dis.readUnsigned();
+ ctx->shape = shape.release();
+ return ctx;
+}
- uint32_t geometryType = (typeInt & 0xffff) % 1000;
+std::unique_ptr<GeoShape> WkbParse::readGeometry(WkbParseContext* ctx) {
+ try {
+ // Ensure we have enough data to read
+ if (ctx->dis.size() < 5) { // At least 1 byte for order and 4 bytes
for type
+ return nullptr;
+ }
- std::unique_ptr<GeoShape> shape;
+ // Skip the byte order as we've already handled it
+ ctx->dis.readByte();
- switch (geometryType) {
- case wkbType::wkbPoint:
- shape.reset(readPoint(ctx).release());
- break;
- case wkbType::wkbLine:
- shape.reset(readLine(ctx).release());
- break;
- case wkbType::wkbPolygon:
- shape.reset(readPolygon(ctx).release());
- break;
- default:
+ uint32_t typeInt = ctx->dis.readUnsigned();
+
+ // Check if geometry has SRID
+ bool has_srid = (typeInt & WKB_SRID_FLAG) != 0;
+
+ // Read SRID if present
+ if (has_srid) {
+ ctx->dis.readUnsigned(); // Read and store SRID if needed
+ }
+
+ // Get the base geometry type
+ uint32_t geometryType = typeInt & WKB_TYPE_MASK;
+
+ std::unique_ptr<GeoShape> shape;
+
+ switch (geometryType) {
+ case wkbType::wkbPoint:
+ shape = readPoint(ctx);
+ break;
+ case wkbType::wkbLine:
+ shape = readLine(ctx);
+ break;
+ case wkbType::wkbPolygon:
+ shape = readPolygon(ctx);
+ break;
+ default:
+ return nullptr;
+ }
+
+ return shape;
+ } catch (...) {
+ // Handle any exceptions from reading operations
return nullptr;
}
- return shape;
}
std::unique_ptr<GeoPoint> WkbParse::readPoint(WkbParseContext* ctx) {
GeoCoordinateList coords = WkbParse::readCoordinateList(1, ctx);
- std::unique_ptr<GeoPoint> point = GeoPoint::create_unique();
+ if (coords.list.empty()) {
+ return nullptr;
+ }
- if (point->from_coord(coords.list[0]) == GEO_PARSE_OK) {
- return point;
- } else {
+ std::unique_ptr<GeoPoint> point = GeoPoint::create_unique();
+ if (!point || point->from_coord(coords.list[0]) != GEO_PARSE_OK) {
return nullptr;
}
+
+ return point;
}
std::unique_ptr<GeoLine> WkbParse::readLine(WkbParseContext* ctx) {
uint32_t size = ctx->dis.readUnsigned();
- minMemSize(wkbLine, size, ctx);
+ if (minMemSize(wkbLine, size, ctx) != GEO_PARSE_OK) {
+ return nullptr;
+ }
GeoCoordinateList coords = WkbParse::readCoordinateList(size, ctx);
- std::unique_ptr<GeoLine> line = GeoLine::create_unique();
+ if (coords.list.empty()) {
+ return nullptr;
+ }
- if (line->from_coords(coords) == GEO_PARSE_OK) {
- return line;
- } else {
+ std::unique_ptr<GeoLine> line = GeoLine::create_unique();
+ if (!line || line->from_coords(coords) != GEO_PARSE_OK) {
return nullptr;
}
+
+ return line;
}
std::unique_ptr<GeoPolygon> WkbParse::readPolygon(WkbParseContext* ctx) {
uint32_t num_loops = ctx->dis.readUnsigned();
- minMemSize(wkbPolygon, num_loops, ctx);
+ if (minMemSize(wkbPolygon, num_loops, ctx) != GEO_PARSE_OK) {
+ return nullptr;
+ }
+
GeoCoordinateListList coordss;
- for (int i = 0; i < num_loops; ++i) {
+ for (uint32_t i = 0; i < num_loops; ++i) {
uint32_t size = ctx->dis.readUnsigned();
- GeoCoordinateList* coords = new GeoCoordinateList();
+ if (size < 3) { // A polygon loop must have at least 3 points
+ return nullptr;
+ }
+
+ auto coords = std::make_unique<GeoCoordinateList>();
*coords = WkbParse::readCoordinateList(size, ctx);
- coordss.add(coords);
+ if (coords->list.empty()) {
+ return nullptr;
+ }
+ coordss.add(coords.release());
}
std::unique_ptr<GeoPolygon> polygon = GeoPolygon::create_unique();
-
- if (polygon->from_coords(coordss) == GEO_PARSE_OK) {
- return polygon;
- } else {
+ if (!polygon || polygon->from_coords(coordss) != GEO_PARSE_OK) {
return nullptr;
}
+
+ return polygon;
}
GeoCoordinateList WkbParse::readCoordinateList(unsigned size, WkbParseContext*
ctx) {
GeoCoordinateList coords;
for (uint32_t i = 0; i < size; i++) {
- readCoordinate(ctx);
+ if (!readCoordinate(ctx)) {
+ return GeoCoordinateList();
+ }
unsigned int j = 0;
GeoCoordinate coord;
coord.x = ctx->ordValues[j++];
coord.y = ctx->ordValues[j++];
coords.add(coord);
}
-
return coords;
}
diff --git a/be/src/geo/wkb_parse.h b/be/src/geo/wkb_parse.h
index e03dddf97a3..de6d0e9b2d9 100644
--- a/be/src/geo/wkb_parse.h
+++ b/be/src/geo/wkb_parse.h
@@ -17,8 +17,7 @@
#pragma once
-#include <stdint.h>
-
+#include <cstdint>
#include <iosfwd>
#include <memory>
@@ -34,6 +33,17 @@ class GeoLine;
class GeoPoint;
class GeoPolygon;
+// WKB format constants
+// According to OpenGIS Implementation Specification:
+// The high bit of the type value is set to 1 if the WKB contains a SRID.
+// Reference: OpenGIS Implementation Specification for Geographic information
- Simple feature access - Part 1: Common architecture
+// Bit mask to check if WKB contains SRID
+constexpr uint32_t WKB_SRID_FLAG = 0x20000000;
+
+// The geometry type is stored in the least significant byte of the type value
+// Bit mask to extract the base geometry type
+constexpr uint32_t WKB_TYPE_MASK = 0xFF;
+
class WkbParse {
public:
static GeoParseStatus parse_wkb(std::istream& is, GeoShape** shape);
diff --git a/regression-test/data/correctness_p0/test_select_constant.out
b/regression-test/data/correctness_p0/test_select_constant.out
index 33c56d3a22c..6737e373d16 100644
Binary files a/regression-test/data/correctness_p0/test_select_constant.out and
b/regression-test/data/correctness_p0/test_select_constant.out differ
diff --git a/regression-test/data/nereids_function_p0/scalar_function/S.out
b/regression-test/data/nereids_function_p0/scalar_function/S.out
index 53a5a1639a7..b346ae022a0 100644
Binary files a/regression-test/data/nereids_function_p0/scalar_function/S.out
and b/regression-test/data/nereids_function_p0/scalar_function/S.out differ
diff --git a/regression-test/data/nereids_p0/test_select_constant.out
b/regression-test/data/nereids_p0/test_select_constant.out
index cb391ffb121..6faa26295a1 100644
Binary files a/regression-test/data/nereids_p0/test_select_constant.out and
b/regression-test/data/nereids_p0/test_select_constant.out differ
diff --git
a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out
b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out
index db1b1ffcae5..59bc628249f 100644
Binary files
a/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out
and
b/regression-test/data/query_p0/sql_functions/spatial_functions/test_gis_function.out
differ
diff --git a/regression-test/data/query_p0/test_select_constant.out
b/regression-test/data/query_p0/test_select_constant.out
index cb391ffb121..6faa26295a1 100644
Binary files a/regression-test/data/query_p0/test_select_constant.out and
b/regression-test/data/query_p0/test_select_constant.out differ
diff --git
a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy
b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy
index f76cb44cb4a..81eadfb0cc0 100644
---
a/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy
+++
b/regression-test/suites/query_p0/sql_functions/spatial_functions/test_gis_function.groovy
@@ -70,4 +70,10 @@ suite("test_gis_function", "arrow_flight_sql") {
qt_sql "SELECT
ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_GeometryFromText(\"LINESTRING (1 1, 2
2)\"))));"
qt_sql "SELECT ST_AsText(ST_GeomFromWKB(ST_AsBinary(ST_Polygon(\"POLYGON
((114.104486 22.547119,114.093758 22.547753,114.096504 22.532057,114.104229
22.539826,114.106203 22.542680,114.104486 22.547119))\"))));"
+ qt_sql "SELECT
ST_AsText(ST_GeometryFromWKB('01010000208A11000068270210774C5D40B8DECA334C3B4240'));"
+ qt_sql "SELECT ST_X(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1, 1))));"
+ qt_sql "SELECT
ST_AsText(ST_GeometryFromWKB(ST_AsBinary(ST_Point(1.23456789, 2.34567891))));"
+ qt_sql "SELECT ST_X(ST_Point(0.9999999999999, 1));"
+ qt_sql "SELECT ST_Y(ST_Point(1, 1.0000000000001));"
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]