On 12/7/2010 5:15 PM, pcr...@pcreso.com wrote:
You might look at the newish (v8.4+ ?) RECURSIVE/WITH capabilities of Postgres.

Given Postgis allows you to create arbitrary buffer zones around your points, the RECURSIVE query capability allows you to write an SQL that will recursively track points which can interact with each other

So, just playing around a bit, I came up with this RECURSIVE query statement that will find all points in a particular "cluster". Every time I run the UPDATE statement, another cluster of points is identified and updated to have the same group id. I have no idea how this would perform over a table of a million points ...


CREATE TABLE pts (id serial, grp int, geom geometry);
CREATE SEQUENCE pts_grp_seq; -- to label cluster groups

-- Sample data similiar to Sébastien's original post
INSERT INTO pts (geom)
SELECT (ST_Dump(
'MULTIPOINT (
    6 129,
    21 200,
    30 66,
    33 109,
    41 137,
    48 191,
    76 119,
    90 197,
    175 212,
    178 230,
    182 53,
    196 209,
    259 124,
    259 148
)')).geom;

-- Simple wrapper around an UPDATE statement that identifies a
-- point cluster everytime its run
CREATE OR REPLACE FUNCTION cluster_pts() RETURNS text AS
$body$
DECLARE
   results int;
BEGIN
   LOOP
      -- Assign all records in the same cluster, the same group id
      UPDATE pts
      SET grp = f.grp
      FROM (
         WITH RECURSIVE t(id, geom, grp, path, cycle) AS (
             -- Grab the first record that doesn't have a group id
            (SELECT id, geom, nextval('pts_grp_seq'), ARRAY[id], false
             FROM pts
             WHERE grp IS NULL
             LIMIT 1
             )
           UNION ALL
             -- Find all the other records that are iteratively within some
             -- distance to the starting point.
-- This includes A to B to C paths, but not A to B to A cyclic paths SELECT a.id, a.geom, b.grp, path || ARRAY[a.id], a.id = ANY(path)
             FROM pts a, t b
             WHERE a.id != b.id
             AND ST_DWithin(a.geom, b.geom, 80)
             AND NOT cycle
         )
         SELECT DISTINCT id, grp FROM t
      ) AS f
      WHERE pts.id = f.id;

      -- repeat until no more updates are performed.
      GET DIAGNOSTICS results = ROW_COUNT;
      EXIT WHEN results = 0;
   END LOOP;

   RETURN 'done.';
END;
$body$ LANGUAGE plpgsql;

SELECT cluster_pts();


<<attachment: pts.png>>

_______________________________________________
postgis-users mailing list
postgis-users@postgis.refractions.net
http://postgis.refractions.net/mailman/listinfo/postgis-users

Reply via email to