jiayuasu commented on code in PR #2658:
URL: https://github.com/apache/sedona/pull/2658#discussion_r2824633096


##########
spark/common/src/test/scala/org/apache/sedona/sql/CRSTransformProj4Test.scala:
##########
@@ -855,4 +858,121 @@ class CRSTransformProj4Test extends TestBaseScala {
       assertEquals("All 40 points should transform successfully", 40, 
successCount)
     }
   }
+
+  describe("URL CRS Provider config integration") {
+
+    it("should still transform correctly when URL provider is not configured") 
{
+      // Verify default behavior (no URL provider) still works
+      sparkSession.conf.set("spark.sedona.crs.url.base", "")
+      val result = sparkSession
+        .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT (-122.4194 
37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')")
+        .first()
+        .getAs[Geometry](0)
+
+      assertNotNull(result)
+      assertEquals(3857, result.getSRID)
+      assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+      assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+    }
+
+    it("should fall back to built-in when URL provider returns nothing") {
+      // Point to a non-existent server — provider will fail, should fall back 
to built-in
+      sparkSession.conf.set("spark.sedona.crs.url.base", "http://127.0.0.1:1";)
+      sparkSession.conf.set("spark.sedona.crs.url.pathTemplate", 
"/epsg/{code}.json")
+      sparkSession.conf.set("spark.sedona.crs.url.format", "projjson")
+      try {
+        val result = sparkSession
+          .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT 
(-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')")
+          .first()
+          .getAs[Geometry](0)
+
+        // Should succeed via built-in fallback
+        assertNotNull(result)
+        assertEquals(3857, result.getSRID)
+        assertEquals(-13627665.27, result.getCoordinate.x, COORD_TOLERANCE)
+        assertEquals(4547675.35, result.getCoordinate.y, COORD_TOLERANCE)
+      } finally {
+        sparkSession.conf.set("spark.sedona.crs.url.base", "")
+        org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs")
+      }
+    }
+
+    it("should register URL CRS provider when config is set") {
+      sparkSession.conf.set("spark.sedona.crs.url.base", 
"https://test.example.com";)
+      sparkSession.conf.set("spark.sedona.crs.url.pathTemplate", 
"/epsg/{code}.json")
+      sparkSession.conf.set("spark.sedona.crs.url.format", "projjson")
+      try {
+        // Force a transform to trigger provider registration
+        val result = sparkSession
+          .sql("SELECT ST_Transform(ST_SetSRID(ST_GeomFromWKT('POINT 
(-122.4194 37.7749)'), 4326), 'EPSG:4326', 'EPSG:3857')")
+          .first()
+          .getAs[Geometry](0)
+
+        assertNotNull(result)
+
+        // Verify provider was registered
+        val providers = org.datasyslab.proj4sedona.defs.Defs.getProviders
+        val found = providers.stream().anyMatch(p => p.getName == 
"sedona-url-crs")
+        assertTrue("sedona-url-crs provider should be registered", found)
+      } finally {
+        sparkSession.conf.set("spark.sedona.crs.url.base", "")
+        org.datasyslab.proj4sedona.defs.Defs.removeProvider("sedona-url-crs")
+      }
+    }
+
+    it("should transform using local HTTP URL CRS provider with custom CRS") {
+      // Serve a deliberately wrong CRS definition for fake EPSG:990001 that no
+      // built-in provider knows. Uses Mercator with absurd false 
easting/northing.
+      // If the transform succeeds with shifted coordinates, the URL provider 
was used.
+      // If the URL provider didn't work, the transform would fail entirely.
+      val requestCount = new AtomicInteger(0)
+      val server = HttpServer.create(new InetSocketAddress(0), 0)
+      val port = server.getAddress.getPort
+
+      // Web Mercator with intentional 10M/20M false easting/northing
+      val weirdMercator =
+        "+proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0" +
+          " +x_0=10000000 +y_0=20000000 +k=1 +units=m +no_defs"
+
+      server.createContext(
+        "/epsg/",
+        exchange => {
+          val path = exchange.getRequestURI.getPath
+          if (path.contains("990001")) {
+            requestCount.incrementAndGet()
+            val body = weirdMercator.getBytes("UTF-8")
+            exchange.sendResponseHeaders(200, body.length)
+            exchange.getResponseBody.write(body)

Review Comment:
   fixed



##########
docs/api/sql/CRS-Transformation.md:
##########
@@ -200,6 +200,172 @@ SELECT ST_Transform(
 ) AS transformed_point
 ```
 
+## URL CRS Provider
+
+Since v1.9.0, Sedona supports resolving CRS definitions from a remote HTTP 
server. This is useful when you need custom or internal CRS definitions that 
are not included in the built-in database, or when you want to use your own CRS 
definition service.
+
+When configured, the URL provider is consulted **before** the built-in CRS 
database. If the URL provider returns a valid CRS definition, it is used 
directly. If the URL returns a 404 or an error, Sedona falls back to the 
built-in definitions.
+
+### Hosting CRS definitions
+
+You can host your custom CRS definitions on any HTTP-accessible location. Two 
common approaches:
+
+- **GitHub repository**: Store CRS definition files in a public GitHub repo 
and use the raw content URL. This is the easiest way to get started — no server 
infrastructure required.
+- **Public S3 bucket**: Upload CRS definition files to an Amazon S3 bucket 
with public read access and use the S3 static website URL or CloudFront 
distribution.
+
+Each file should contain a single CRS definition in the format you specify via 
`spark.sedona.crs.url.format` (PROJJSON, PROJ string, WKT1, or WKT2).
+
+### Configuration
+
+Set the following Spark configuration properties when creating your Sedona 
session:
+
+```python
+config = (
+    SedonaContext.builder()
+    .config("spark.sedona.crs.url.base", "https://crs.example.com";)
+    .config("spark.sedona.crs.url.pathTemplate", "/{authority}/{code}.json")
+    .config("spark.sedona.crs.url.format", "projjson")
+    .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+```
+
+With the default path template, resolving `EPSG:4326` will fetch:
+
+```
+https://crs.example.com/epsg/4326.json
+```
+
+Only `spark.sedona.crs.url.base` is required. The other two properties have 
sensible defaults (`/{authority}/{code}.json` and `projjson`).
+
+### Supported response formats
+
+| Format value | Description | Content example |
+|-------------|-------------|----------------|
+| `projjson` | PROJJSON (default) | `{"type": "GeographicCRS", ...}` |
+| `proj` | PROJ string | `+proj=longlat +datum=WGS84 +no_defs` |
+| `wkt1` | OGC WKT1 | `GEOGCS["WGS 84", ...]` |
+| `wkt2` | ISO 19162 WKT2 | `GEOGCRS["WGS 84", ...]` |
+
+### Example: GitHub repository
+
+Suppose you have a GitHub repo `myorg/crs-definitions` with the following 
structure:
+
+```
+crs-definitions/
+  epsg/
+    990001.proj
+    990002.proj
+```
+
+where `epsg/990001.proj` contains a PROJ string like:
+
+```
++proj=merc +a=6378137 +b=6378137 +lat_ts=0 +lon_0=0 +x_0=0 +y_0=0 +k=1 
+units=m +no_defs
+```
+
+Point Sedona to the raw GitHub content URL:
+
+```python
+config = (
+    SedonaContext.builder()
+    .config(
+        "spark.sedona.crs.url.base",
+        "https://raw.githubusercontent.com/myorg/crs-definitions/main";,
+    )
+    .config("spark.sedona.crs.url.pathTemplate", "/epsg/{code}.proj")
+    .config("spark.sedona.crs.url.format", "proj")
+    .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+
+# Resolves EPSG:990001 from:
+# https://raw.githubusercontent.com/myorg/crs-definitions/main/epsg/990001.proj
+sedona.sql("""
+    SELECT ST_Transform(
+        ST_GeomFromText('POINT(-122.4194 37.7749)'),
+        'EPSG:4326',
+        'EPSG:990001'
+    ) AS transformed_point
+""").show()
+```
+
+### Example: self-hosted CRS server
+
+```python
+config = (
+    SedonaContext.builder()
+    .config("spark.sedona.crs.url.base", "https://crs.mycompany.com";)
+    .config("spark.sedona.crs.url.pathTemplate", "/epsg/{code}.proj")
+    .config("spark.sedona.crs.url.format", "proj")
+    .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+
+# Now ST_Transform will try https://crs.mycompany.com/epsg/3857.proj
+# before falling back to built-in definitions
+sedona.sql("""
+    SELECT ST_Transform(
+        ST_GeomFromText('POINT(-122.4194 37.7749)'),
+        'EPSG:4326',
+        'EPSG:3857'
+    ) AS transformed_point
+""").show()
+```
+
+### Example: custom authority codes
+
+The URL provider is especially useful for custom or internal authority codes 
that are not in any public database. With the default path template 
`/{authority}/{code}.json`, the `{authority}` placeholder is replaced by the 
authority name from the CRS string (lowercased):
+
+```python
+config = (
+    SedonaContext.builder()
+    .config("spark.sedona.crs.url.base", "https://crs.mycompany.com";)
+    .config("spark.sedona.crs.url.format", "proj")
+    .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+
+# Resolves MYORG:1001 from:
+# https://crs.mycompany.com/myorg/1001.json
+sedona.sql("""
+    SELECT ST_Transform(
+        ST_GeomFromText('POINT(-122.4194 37.7749)'),
+        'EPSG:4326',
+        'MYORG:1001'
+    ) AS transformed_point
+""").show()
+```
+
+### Example: using geometry SRID with URL provider
+
+If the geometry already has an SRID set (e.g., via `ST_SetSRID`), you can omit 
the source CRS parameter. The source CRS is derived from the geometry's SRID as 
an EPSG code:
+
+```python
+config = (
+    SedonaContext.builder()
+    .config("spark.sedona.crs.url.base", "https://crs.mycompany.com";)
+    .config("spark.sedona.crs.url.format", "proj")
+    .getOrCreate()
+)
+sedona = SedonaContext.create(config)
+
+# The source CRS is taken from the geometry's SRID (4326 → EPSG:4326).
+# Only the target CRS string is needed.
+sedona.sql("""
+    SELECT ST_Transform(
+        ST_SetSRID(ST_GeomFromText('POINT(-122.4194 37.7749)'), 4326),
+        'EPSG:3857'
+    ) AS transformed_point
+""").show()
+```
+
+### Disabling the URL provider
+
+To disable, omit `spark.sedona.crs.url.base` or set it to an empty string (the 
default).

Review Comment:
   fixed



##########
common/src/main/java/org/apache/sedona/common/FunctionsProj4.java:
##########
@@ -62,6 +67,93 @@ public class FunctionsProj4 {
   private static final Pattern EPSG_PATTERN =
       Pattern.compile("^EPSG:(\\d+)$", Pattern.CASE_INSENSITIVE);
 
+  /** Name used for the registered URL CRS provider. */
+  private static final String URL_CRS_PROVIDER_NAME = "sedona-url-crs";
+
+  /**
+   * Tracks the currently registered URL CRS provider config (baseUrl + "|" + 
pathTemplate + "|" +
+   * format). Null means no provider registered yet. Uses AtomicReference for 
thread-safe lazy
+   * initialization on executors.
+   */
+  private static final AtomicReference<String> registeredUrlCrsConfig = new 
AtomicReference<>(null);
+
+  /**
+   * Register a URL-based CRS provider with proj4sedona's Defs registry. This 
provider will be
+   * consulted before the built-in provider when resolving EPSG codes.
+   *
+   * <p>This method is safe to call concurrently from multiple threads — it 
uses double-checked
+   * locking so the fast path (already registered with the same config) is 
lock-free, and the
+   * synchronized slow path executes at most once per JVM (or once per config 
change).
+   *
+   * @param baseUrl The base URL of the CRS definition server
+   * @param pathTemplate The URL path template (e.g., 
"/{authority}/{code}.json")
+   * @param format The expected response format: "projjson", "proj", "wkt1", 
or "wkt2"
+   */
+  public static void registerUrlCrsProvider(String baseUrl, String 
pathTemplate, String format) {
+    if (baseUrl == null || baseUrl.isEmpty()) {
+      return;
+    }
+
+    String configKey = baseUrl + "|" + pathTemplate + "|" + format;
+
+    // Fast path (lock-free): already registered with the same config.
+    // This handles 99.999%+ of calls with just a volatile read + 
String.equals().
+    if (configKey.equals(registeredUrlCrsConfig.get())) {
+      return;
+    }

Review Comment:
   fixed



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to