On Tue, 25 Jan 2005, Neil Conway wrote:

> On Tue, 2005-01-25 at 01:13 -0500, Kris Jurka wrote:
> > The attached patch implements the soundex difference function which 
> > compares two strings' soundex values for similarity.
> 
> *** 19,24 ****
> --- 19,28 ----
>   AS 'MODULE_PATHNAME', 'soundex'
>   LANGUAGE 'C';
>   
> + CREATE FUNCTION difference(text,text) RETURNS int
> + AS 'MODULE_PATHNAME', 'difference'
> + LANGUAGE 'C';
> + 
> 
> This should be immutable, right?
> 

Yes, it should, and even more importantly strict because it crashes when
called with null inputs.  I copied this off the adjacent entry without
thinking about it.  So currently SELECT text_soundex(NULL); crashes the
server.  I've attached two new patches.  One revising my original patch to
make the function creations consistent and the other to just fix the
problem in the existing code (which should be backported as far as people
would like to).

Kris Jurka
Index: contrib/fuzzystrmatch/fuzzystrmatch.sql.in
===================================================================
RCS file: /cvsroot/pgsql/contrib/fuzzystrmatch/fuzzystrmatch.sql.in,v
retrieving revision 1.6
diff -c -r1.6 fuzzystrmatch.sql.in
*** contrib/fuzzystrmatch/fuzzystrmatch.sql.in  1 Jul 2004 03:25:48 -0000       
1.6
--- contrib/fuzzystrmatch/fuzzystrmatch.sql.in  25 Jan 2005 07:21:08 -0000
***************
*** 17,23 ****
  
  CREATE FUNCTION text_soundex(text) RETURNS text
  AS 'MODULE_PATHNAME', 'soundex'
! LANGUAGE 'C';
  
  CREATE FUNCTION dmetaphone (text) RETURNS text 
  LANGUAGE C IMMUTABLE STRICT
--- 17,23 ----
  
  CREATE FUNCTION text_soundex(text) RETURNS text
  AS 'MODULE_PATHNAME', 'soundex'
! LANGUAGE 'C' WITH (iscachable, isstrict);
  
  CREATE FUNCTION dmetaphone (text) RETURNS text 
  LANGUAGE C IMMUTABLE STRICT
? contrib/fuzzystrmatch/.deps
? contrib/fuzzystrmatch/fuzzystrmatch.sql
? contrib/fuzzystrmatch/libfuzzystrmatch.so.0.0
Index: contrib/fuzzystrmatch/README.fuzzystrmatch
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/fuzzystrmatch/README.fuzzystrmatch,v
retrieving revision 1.7
diff -c -r1.7 README.fuzzystrmatch
*** contrib/fuzzystrmatch/README.fuzzystrmatch  1 Jan 2005 20:44:11 -0000       
1.7
--- contrib/fuzzystrmatch/README.fuzzystrmatch  25 Jan 2005 07:16:41 -0000
***************
*** 33,38 ****
--- 33,42 ----
   * Folded existing soundex contrib into this one. Renamed text_soundex() (C 
function)
   * to soundex() for consistency.
   *
+  * difference()
+  * ------------
+  * Return the difference between two strings' soundex values.  Kris Jurka
+  *
   * Permission to use, copy, modify, and distribute this software and its
   * documentation for any purpose, without fee, and without a written agreement
   * is hereby granted, provided that the above copyright notice and this
Index: contrib/fuzzystrmatch/README.soundex
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/fuzzystrmatch/README.soundex,v
retrieving revision 1.2
diff -c -r1.2 README.soundex
*** contrib/fuzzystrmatch/README.soundex        1 Jul 2004 03:25:48 -0000       
1.2
--- contrib/fuzzystrmatch/README.soundex        25 Jan 2005 07:16:41 -0000
***************
*** 7,21 ****
--- 7,31 ----
  beyond English names (or the English pronunciation of names), and
  it is not a linguistic tool.
  
+ When comparing two soundex values to determine similarity, the
+ difference function reports how close the match is on a scale
+ from zero to four, with zero being no match and four being an
+ exact match.
+ 
  The following are some usage examples:
  
  SELECT soundex('hello world!');
  
+ SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann');
+ SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');
+ SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
+ 
  CREATE TABLE s (nm text)\g
  
  insert into s values ('john')\g
  insert into s values ('joan')\g
  insert into s values ('wobbly')\g
+ insert into s values ('jack')\g
  
  select * from s
  where soundex(nm) = soundex('john')\g
***************
*** 58,62 ****
  WHERE text_sx_eq(nm,'john')\g
  
  SELECT *
! from s
! where s.nm #= 'john';
--- 68,77 ----
  WHERE text_sx_eq(nm,'john')\g
  
  SELECT *
! FROM s
! WHERE s.nm #= 'john';
! 
! SELECT *
! FROM s
! WHERE difference(s.nm, 'john') > 2;
! 
Index: contrib/fuzzystrmatch/fuzzystrmatch.c
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/fuzzystrmatch/fuzzystrmatch.c,v
retrieving revision 1.14
diff -c -r1.14 fuzzystrmatch.c
*** contrib/fuzzystrmatch/fuzzystrmatch.c       1 Jan 2005 05:43:06 -0000       
1.14
--- contrib/fuzzystrmatch/fuzzystrmatch.c       25 Jan 2005 07:16:41 -0000
***************
*** 755,757 ****
--- 755,777 ----
                ++count;
        }
  }
+ 
+ PG_FUNCTION_INFO_V1(difference);
+ 
+ Datum
+ difference(PG_FUNCTION_ARGS)
+ {
+       char sndx1[SOUNDEX_LEN+1], sndx2[SOUNDEX_LEN+1];
+       int i, result;
+ 
+       _soundex(_textout(PG_GETARG_TEXT_P(0)), sndx1);
+       _soundex(_textout(PG_GETARG_TEXT_P(1)), sndx2);
+ 
+       result = 0;
+       for (i=0; i<SOUNDEX_LEN; i++) {
+               if (sndx1[i] == sndx2[i])
+                       result++;
+       }
+ 
+       PG_RETURN_INT32(result);
+ }
Index: contrib/fuzzystrmatch/fuzzystrmatch.h
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/fuzzystrmatch/fuzzystrmatch.h,v
retrieving revision 1.10
diff -c -r1.10 fuzzystrmatch.h
*** contrib/fuzzystrmatch/fuzzystrmatch.h       1 Jan 2005 05:43:06 -0000       
1.10
--- contrib/fuzzystrmatch/fuzzystrmatch.h       25 Jan 2005 07:16:41 -0000
***************
*** 60,65 ****
--- 60,66 ----
  extern Datum levenshtein(PG_FUNCTION_ARGS);
  extern Datum metaphone(PG_FUNCTION_ARGS);
  extern Datum soundex(PG_FUNCTION_ARGS);
+ extern Datum difference(PG_FUNCTION_ARGS);
  
  /*
   * Soundex
Index: contrib/fuzzystrmatch/fuzzystrmatch.sql.in
===================================================================
RCS file: /projects/cvsroot/pgsql/contrib/fuzzystrmatch/fuzzystrmatch.sql.in,v
retrieving revision 1.6
diff -c -r1.6 fuzzystrmatch.sql.in
*** contrib/fuzzystrmatch/fuzzystrmatch.sql.in  1 Jul 2004 03:25:48 -0000       
1.6
--- contrib/fuzzystrmatch/fuzzystrmatch.sql.in  25 Jan 2005 07:16:41 -0000
***************
*** 1,28 ****
  -- Adjust this setting to control where the objects get created.
  SET search_path = public;
  
! CREATE FUNCTION levenshtein (text,text)
! RETURNS int
  AS 'MODULE_PATHNAME','levenshtein'
! LANGUAGE 'C' WITH (iscachable, isstrict);
  
! CREATE FUNCTION metaphone (text,int)
! RETURNS text
  AS 'MODULE_PATHNAME','metaphone'
! LANGUAGE 'C' WITH (iscachable, isstrict);
  
  CREATE FUNCTION soundex(text) RETURNS text
  AS 'MODULE_PATHNAME', 'soundex'
! LANGUAGE 'C' WITH (iscachable, isstrict);
  
  CREATE FUNCTION text_soundex(text) RETURNS text
  AS 'MODULE_PATHNAME', 'soundex'
! LANGUAGE 'C';
  
  CREATE FUNCTION dmetaphone (text) RETURNS text 
! LANGUAGE C IMMUTABLE STRICT
! AS 'MODULE_PATHNAME', 'dmetaphone';
  
  CREATE FUNCTION dmetaphone_alt (text) RETURNS text 
! LANGUAGE C IMMUTABLE STRICT
! AS 'MODULE_PATHNAME', 'dmetaphone_alt';
--- 1,30 ----
  -- Adjust this setting to control where the objects get created.
  SET search_path = public;
  
! CREATE FUNCTION levenshtein (text,text) RETURNS int
  AS 'MODULE_PATHNAME','levenshtein'
! LANGUAGE C IMMUTABLE STRICT;
  
! CREATE FUNCTION metaphone (text,int) RETURNS text
  AS 'MODULE_PATHNAME','metaphone'
! LANGUAGE C IMMUTABLE STRICT;
  
  CREATE FUNCTION soundex(text) RETURNS text
  AS 'MODULE_PATHNAME', 'soundex'
! LANGUAGE C IMMUTABLE STRICT;
  
  CREATE FUNCTION text_soundex(text) RETURNS text
  AS 'MODULE_PATHNAME', 'soundex'
! LANGUAGE C IMMUTABLE STRICT;
! 
! CREATE FUNCTION difference(text,text) RETURNS int
! AS 'MODULE_PATHNAME', 'difference'
! LANGUAGE C IMMUTABLE STRICT;
  
  CREATE FUNCTION dmetaphone (text) RETURNS text 
! AS 'MODULE_PATHNAME', 'dmetaphone'
! LANGUAGE C IMMUTABLE STRICT;
  
  CREATE FUNCTION dmetaphone_alt (text) RETURNS text 
! AS 'MODULE_PATHNAME', 'dmetaphone_alt'
! LANGUAGE C IMMUTABLE STRICT;
---------------------------(end of broadcast)---------------------------
TIP 4: Don't 'kill -9' the postmaster

Reply via email to