Re: [HACKERS] easy way of copying regex_t

Artur Zakirov Sun, 24 Jan 2016 13:11:27 -0800

Hi all,

I've been working on moving an extension that allows to move the ispell
dictionaries to shared segment. It's almost complete, the last FIXME is
about copying regex_t structure (stored in AFFIX).


According to regex.h the structure is fairly complex and not exactly easy
to understand, so I'd like to know if anyone here already implemented that
or something that may serve the same purpose. Any ideas?

kind regards
Tomas

This message is the reply to the messagehttp://www.postgresql.org/message-id/dd02a31fdeffbf5cb24771e34213b40f.squir...@sq.gransy.com

Sorry, I can't reply to it directly. I can't get it from archive.

Thank you for your extension shared_ispell. It is very useful. I havegot it from https://github.com/tvondra/shared_ispellWith this message I want to send some patch to your repository withdraft of a code, which allows shared_ispell to copy regex_t.


The main idea of the patch is:
- we doesn't need copy all regex_t structure
- most of fields and structures used only in a compile time
- we need copy structures: guts, colormap, subre, cnfa
- from the subre structure we need only cnfa

colormap represents a directed acyclic graph. cnfa represents anondeterministic finite automaton.


In this patch also was done the following:
- added regression tests
- deleted spell.h and spell.c since they have duplicate code
- added shared_ispell.h which declares some structures
- fix an issue when stopFile can be NULL
- fixed countCMPDAffixes since theoretically could be zero affix
- added copyCMPDAffix

Question to hackers. Can such patch be useful as a PostgreSQL patch toFull-Text search? Is it needed?

shared_ispell loads dictionaries into a shared memory. The main benefitsare:- saving of memory. Every dictionary is loaded only once. Dictionariesare not loaded for each backend. In current version of PostgreSQLdictionaires are loaded for each backend where it was requested.- saving of time. The first load of a dictionary takes much time. Withthis patch dictionaries will be loaded only once.


--
Artur Zakirov
Postgres Professional: http://www.postgrespro.com
Russian Postgres Company

*** a/Makefile
--- b/Makefile
***************
*** 1,18 ****
  MODULE_big = shared_ispell
! OBJS = src/shared_ispell.o src/spell.o
  
  EXTENSION = shared_ispell
! DATA = sql/shared_ispell--1.0.0.sql
! MODULES = shared_ispell
  
! CFLAGS=`pg_config --includedir-server`
  
  PG_CONFIG = pg_config
  PGXS := $(shell $(PG_CONFIG) --pgxs)
  include $(PGXS)
! 
! all: shared_ispell.so
! 
! shared_ispell.so: $(OBJS)
! 
! %.o : src/%.c
--- 1,20 ----
+ # contrib/shared_ispell/Makefile
+ 
  MODULE_big = shared_ispell
! OBJS = src/shared_ispell.o
  
  EXTENSION = shared_ispell
! DATA = sql/shared_ispell--1.1.0.sql
  
! REGRESS = shared_ispell
  
+ ifdef USE_PGXS
  PG_CONFIG = pg_config
  PGXS := $(shell $(PG_CONFIG) --pgxs)
  include $(PGXS)
! else
! subdir = contrib/shared_ispell
! top_builddir = ../..
! include $(top_builddir)/src/Makefile.global
! include $(top_srcdir)/contrib/contrib-global.mk
! endif
*** a/README.md
--- b/README.md
***************
*** 13,28 **** If you need just snowball-type dictionaries, this extension is not
  really interesting for you. But if you really need an ispell
  dictionary, this may save you a lot of resources.
  
- Warning
- -------
- The extension does not yet handle affixes that require full regular
- expressions (regex_t, implemented in regex.h). This is indicated by
- an error when initializing the dictionary.
- 
- Simple affixes and affixes that can be handled by fast regex subset
- (as implemented in regis.h) are handled just fine.
- 
- 
  Install
  -------
  Installing the extension is quite simple, especially if you're on 9.1.
--- 13,18 ----
*** /dev/null
--- b/expected/shared_ispell.out
***************
*** 0 ****
--- 1,193 ----
+ CREATE EXTENSION shared_ispell;
+ -- Test ISpell dictionary with ispell affix file
+ CREATE TEXT SEARCH DICTIONARY shared_ispell (
+                         Template=shared_ispell,
+                         DictFile=ispell_sample,
+                         AffFile=ispell_sample
+ );
+ SELECT ts_lexize('shared_ispell', 'skies');
+  ts_lexize 
+ -----------
+  {sky}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'bookings');
+    ts_lexize    
+ ----------------
+  {booking,book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'booking');
+    ts_lexize    
+ ----------------
+  {booking,book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'foot');
+  ts_lexize 
+ -----------
+  {foot}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'foots');
+  ts_lexize 
+ -----------
+  {foot}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'rebookings');
+    ts_lexize    
+ ----------------
+  {booking,book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'rebooking');
+    ts_lexize    
+ ----------------
+  {booking,book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'rebook');
+  ts_lexize 
+ -----------
+  
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'unbookings');
+  ts_lexize 
+ -----------
+  {book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'unbooking');
+  ts_lexize 
+ -----------
+  {book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'unbook');
+  ts_lexize 
+ -----------
+  {book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'footklubber');
+    ts_lexize    
+ ----------------
+  {foot,klubber}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'footballklubber');
+                       ts_lexize                       
+ ------------------------------------------------------
+  {footballklubber,foot,ball,klubber,football,klubber}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'ballyklubber');
+    ts_lexize    
+ ----------------
+  {ball,klubber}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_ispell', 'footballyklubber');
+       ts_lexize      
+ ---------------------
+  {foot,ball,klubber}
+ (1 row)
+ 
+ -- Test ISpell dictionary with hunspell affix file
+ CREATE TEXT SEARCH DICTIONARY shared_hunspell (
+                         Template=shared_ispell,
+                         DictFile=ispell_sample,
+                         AffFile=hunspell_sample
+ );
+ SELECT ts_lexize('shared_hunspell', 'skies');
+  ts_lexize 
+ -----------
+  {sky}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'bookings');
+    ts_lexize    
+ ----------------
+  {booking,book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'booking');
+    ts_lexize    
+ ----------------
+  {booking,book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'foot');
+  ts_lexize 
+ -----------
+  {foot}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'foots');
+  ts_lexize 
+ -----------
+  {foot}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'rebookings');
+    ts_lexize    
+ ----------------
+  {booking,book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'rebooking');
+    ts_lexize    
+ ----------------
+  {booking,book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'rebook');
+  ts_lexize 
+ -----------
+  
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'unbookings');
+  ts_lexize 
+ -----------
+  {book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'unbooking');
+  ts_lexize 
+ -----------
+  {book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'unbook');
+  ts_lexize 
+ -----------
+  {book}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'footklubber');
+    ts_lexize    
+ ----------------
+  {foot,klubber}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'footballklubber');
+                       ts_lexize                       
+ ------------------------------------------------------
+  {footballklubber,foot,ball,klubber,football,klubber}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'ballyklubber');
+    ts_lexize    
+ ----------------
+  {ball,klubber}
+ (1 row)
+ 
+ SELECT ts_lexize('shared_hunspell', 'footballyklubber');
+       ts_lexize      
+ ---------------------
+  {foot,ball,klubber}
+ (1 row)
+ 
*** a/shared_ispell.control
--- b/shared_ispell.control
***************
*** 1,6 ****
  # shared ispell dictionary
  comment = 'Provides shared ispell dictionaries.'
! default_version = '1.0.0'
  relocatable = true
  
  module_pathname = '$libdir/shared_ispell'
--- 1,6 ----
  # shared ispell dictionary
  comment = 'Provides shared ispell dictionaries.'
! default_version = '1.1.0'
  relocatable = true
  
  module_pathname = '$libdir/shared_ispell'
*** a/sql/shared_ispell--1.0.0.sql
--- /dev/null
***************
*** 1,55 ****
- CREATE OR REPLACE FUNCTION shared_ispell_init(internal)
- 	RETURNS internal
- 	AS 'MODULE_PATHNAME', 'dispell_init'
- 	LANGUAGE C IMMUTABLE;
- 
- CREATE OR REPLACE FUNCTION shared_ispell_lexize(internal,internal,internal,internal)
- 	RETURNS internal
- 	AS 'MODULE_PATHNAME', 'dispell_lexize'
- 	LANGUAGE C IMMUTABLE;
- 
- CREATE OR REPLACE FUNCTION shared_ispell_reset()
- 	RETURNS void
- 	AS 'MODULE_PATHNAME', 'dispell_reset'
- 	LANGUAGE C IMMUTABLE;
- 
- CREATE OR REPLACE FUNCTION shared_ispell_mem_used()
- 	RETURNS integer
- 	AS 'MODULE_PATHNAME', 'dispell_mem_used'
- 	LANGUAGE C IMMUTABLE;
- 
- CREATE OR REPLACE FUNCTION shared_ispell_mem_available()
- 	RETURNS integer
- 	AS 'MODULE_PATHNAME', 'dispell_mem_available'
- 	LANGUAGE C IMMUTABLE;
- 
- CREATE OR REPLACE FUNCTION shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT)
-     RETURNS SETOF record
-     AS 'MODULE_PATHNAME', 'dispell_list_dicts'
-     LANGUAGE C IMMUTABLE;
- 
- CREATE OR REPLACE FUNCTION shared_ispell_stoplists( OUT stop_name VARCHAR, OUT words INT, OUT bytes INT)
-     RETURNS SETOF record
-     AS 'MODULE_PATHNAME', 'dispell_list_stoplists'
-     LANGUAGE C IMMUTABLE;
- 
- CREATE TEXT SEARCH TEMPLATE shared_ispell (
-     INIT = shared_ispell_init,
-     LEXIZE = shared_ispell_lexize
- );
- 
- /*
- CREATE TEXT SEARCH DICTIONARY czech_shared (
- 	TEMPLATE = shared_ispell,
- 	DictFile = czech,
- 	AffFile = czech,
- 	StopWords = czech
- );
- 
- CREATE TEXT SEARCH CONFIGURATION public.czech_shared ( COPY = pg_catalog.simple );
- 
- ALTER TEXT SEARCH CONFIGURATION czech_shared
-     ALTER MAPPING FOR asciiword, asciihword, hword_asciipart,
-                       word, hword, hword_part
-     WITH czech_shared;
- */
*** /dev/null
--- b/sql/shared_ispell--1.1.0.sql
***************
*** 0 ****
--- 1,55 ----
+ CREATE OR REPLACE FUNCTION shared_ispell_init(internal)
+ 	RETURNS internal
+ 	AS 'MODULE_PATHNAME', 'dispell_init'
+ 	LANGUAGE C IMMUTABLE;
+ 
+ CREATE OR REPLACE FUNCTION shared_ispell_lexize(internal,internal,internal,internal)
+ 	RETURNS internal
+ 	AS 'MODULE_PATHNAME', 'dispell_lexize'
+ 	LANGUAGE C IMMUTABLE;
+ 
+ CREATE OR REPLACE FUNCTION shared_ispell_reset()
+ 	RETURNS void
+ 	AS 'MODULE_PATHNAME', 'dispell_reset'
+ 	LANGUAGE C IMMUTABLE;
+ 
+ CREATE OR REPLACE FUNCTION shared_ispell_mem_used()
+ 	RETURNS integer
+ 	AS 'MODULE_PATHNAME', 'dispell_mem_used'
+ 	LANGUAGE C IMMUTABLE;
+ 
+ CREATE OR REPLACE FUNCTION shared_ispell_mem_available()
+ 	RETURNS integer
+ 	AS 'MODULE_PATHNAME', 'dispell_mem_available'
+ 	LANGUAGE C IMMUTABLE;
+ 
+ CREATE OR REPLACE FUNCTION shared_ispell_dicts( OUT dict_name VARCHAR, OUT affix_name VARCHAR, OUT words INT, OUT affixes INT, OUT bytes INT)
+     RETURNS SETOF record
+     AS 'MODULE_PATHNAME', 'dispell_list_dicts'
+     LANGUAGE C IMMUTABLE;
+ 
+ CREATE OR REPLACE FUNCTION shared_ispell_stoplists( OUT stop_name VARCHAR, OUT words INT, OUT bytes INT)
+     RETURNS SETOF record
+     AS 'MODULE_PATHNAME', 'dispell_list_stoplists'
+     LANGUAGE C IMMUTABLE;
+ 
+ CREATE TEXT SEARCH TEMPLATE shared_ispell (
+     INIT = shared_ispell_init,
+     LEXIZE = shared_ispell_lexize
+ );
+ 
+ /*
+ CREATE TEXT SEARCH DICTIONARY czech_shared (
+ 	TEMPLATE = shared_ispell,
+ 	DictFile = czech,
+ 	AffFile = czech,
+ 	StopWords = czech
+ );
+ 
+ CREATE TEXT SEARCH CONFIGURATION public.czech_shared ( COPY = pg_catalog.simple );
+ 
+ ALTER TEXT SEARCH CONFIGURATION czech_shared
+     ALTER MAPPING FOR asciiword, asciihword, hword_asciipart,
+                       word, hword, hword_part
+     WITH czech_shared;
+ */
*** /dev/null
--- b/sql/shared_ispell.sql
***************
*** 0 ****
--- 1,49 ----
+ CREATE EXTENSION shared_ispell;
+ 
+ -- Test ISpell dictionary with ispell affix file
+ CREATE TEXT SEARCH DICTIONARY shared_ispell (
+                         Template=shared_ispell,
+                         DictFile=ispell_sample,
+                         AffFile=ispell_sample
+ );
+ 
+ SELECT ts_lexize('shared_ispell', 'skies');
+ SELECT ts_lexize('shared_ispell', 'bookings');
+ SELECT ts_lexize('shared_ispell', 'booking');
+ SELECT ts_lexize('shared_ispell', 'foot');
+ SELECT ts_lexize('shared_ispell', 'foots');
+ SELECT ts_lexize('shared_ispell', 'rebookings');
+ SELECT ts_lexize('shared_ispell', 'rebooking');
+ SELECT ts_lexize('shared_ispell', 'rebook');
+ SELECT ts_lexize('shared_ispell', 'unbookings');
+ SELECT ts_lexize('shared_ispell', 'unbooking');
+ SELECT ts_lexize('shared_ispell', 'unbook');
+ 
+ SELECT ts_lexize('shared_ispell', 'footklubber');
+ SELECT ts_lexize('shared_ispell', 'footballklubber');
+ SELECT ts_lexize('shared_ispell', 'ballyklubber');
+ SELECT ts_lexize('shared_ispell', 'footballyklubber');
+ 
+ -- Test ISpell dictionary with hunspell affix file
+ CREATE TEXT SEARCH DICTIONARY shared_hunspell (
+                         Template=shared_ispell,
+                         DictFile=ispell_sample,
+                         AffFile=hunspell_sample
+ );
+ 
+ SELECT ts_lexize('shared_hunspell', 'skies');
+ SELECT ts_lexize('shared_hunspell', 'bookings');
+ SELECT ts_lexize('shared_hunspell', 'booking');
+ SELECT ts_lexize('shared_hunspell', 'foot');
+ SELECT ts_lexize('shared_hunspell', 'foots');
+ SELECT ts_lexize('shared_hunspell', 'rebookings');
+ SELECT ts_lexize('shared_hunspell', 'rebooking');
+ SELECT ts_lexize('shared_hunspell', 'rebook');
+ SELECT ts_lexize('shared_hunspell', 'unbookings');
+ SELECT ts_lexize('shared_hunspell', 'unbooking');
+ SELECT ts_lexize('shared_hunspell', 'unbook');
+ 
+ SELECT ts_lexize('shared_hunspell', 'footklubber');
+ SELECT ts_lexize('shared_hunspell', 'footballklubber');
+ SELECT ts_lexize('shared_hunspell', 'ballyklubber');
+ SELECT ts_lexize('shared_hunspell', 'footballyklubber');
*** a/src/shared_ispell.c
--- b/src/shared_ispell.c
***************
*** 39,45 ****
   *                  -> copyAffixNode (suffixes)
   *                  -> copySPNode
   *                  -> copy affix data
!  *                  -> copy compound affixes
   *          -> get_shared_stop_list
   *              -> readstoplist
   *              -> copyStopList
--- 39,45 ----
   *                  -> copyAffixNode (suffixes)
   *                  -> copySPNode
   *                  -> copy affix data
!  *                  -> copyCMPDAffix
   *          -> get_shared_stop_list
   *              -> readstoplist
   *              -> copyStopList
***************
*** 53,105 ****
   *      -> SharedNINormalizeWord
  */
  
- #include <stdio.h>
- #include <stdlib.h>
- #include <sys/shm.h>
- #include <sys/stat.h>
- 
- #include <sys/types.h>
- #include <sys/ipc.h>
- 
  #include "postgres.h"
  #include "miscadmin.h"
  #include "storage/ipc.h"
- #include "storage/fd.h"
  
- #include "commands/explain.h"
- #include "executor/executor.h"
- #include "executor/instrument.h"
- #include "utils/guc.h"
  #include "commands/defrem.h"
  #include "tsearch/ts_locale.h"
- #include "storage/lwlock.h"
- #include "utils/timestamp.h"
  #include "access/htup_details.h"
- 
  #include "funcapi.h"
  
! #include "libpq/md5.h"
! 
! #include "spell.h"
  #include "tsearch/dicts/spell.h"
  
- #ifdef PG_MODULE_MAGIC
  PG_MODULE_MAGIC;
- #endif
- 
- #if (PG_VERSION_NUM < 90100)
- #define    NIStartBuild(dict)
- #define    NIFinishBuild(dict)
- #endif
  
  /* private functions */
  static void ispell_shmem_startup(void);
  
- /* This segment is initialized in the first process that accesses it (see
-  * ispell_shmem_startup function).
-  */
- #define SEGMENT_NAME    "shared_ispell"
- 
  static int  max_ispell_mem_size  = (30*1024*1024); /* 50MB by default */
  
  /* Saved hook values in case of unload */
--- 53,76 ----
   *      -> SharedNINormalizeWord
  */
  
  #include "postgres.h"
  #include "miscadmin.h"
  #include "storage/ipc.h"
  
  #include "commands/defrem.h"
  #include "tsearch/ts_locale.h"
  #include "access/htup_details.h"
  #include "funcapi.h"
  
! #include "shared_ispell.h"
  #include "tsearch/dicts/spell.h"
+ #include "regex/regguts.h"
  
  PG_MODULE_MAGIC;
  
  /* private functions */
  static void ispell_shmem_startup(void);
  
  static int  max_ispell_mem_size  = (30*1024*1024); /* 50MB by default */
  
  /* Saved hook values in case of unload */
***************
*** 108,143 **** static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
  void        _PG_init(void);
  void        _PG_fini(void);
  
- /* used to allocate memory in the shared segment */
- typedef struct SegmentInfo {
- 
-     LWLockId    lock;
-     char        *firstfree;        /* first free address (always maxaligned) */
-     size_t        available;        /* free space remaining at firstfree */
-     Timestamp    lastReset;        /* last reset of the dictionary */
- 
-     /* the shared segment (info and data) */
-     SharedIspellDict * dict;
-     SharedStopList   * stop;
- 
- } SegmentInfo;
- 
- #define MAXLEN 255
- 
- /* used to keep track of dictionary in each backend */
- typedef struct DictInfo {
- 
-     Timestamp    lookup;
- 
-     char dictFile[MAXLEN];
-     char affixFile[MAXLEN];
-     char stopFile[MAXLEN];
- 
-     SharedIspellDict * dict;
-     SharedStopList   * stop;
- 
- } DictInfo;
- 
  /* These are used to allocate data within shared segment */
  static SegmentInfo * segment_info = NULL;
  
--- 79,84 ----
***************
*** 155,161 **** static int sizeStopList(StopList * list, char * stopFile);
  void
  _PG_init(void)
  {
- 
      /* */
      if (! process_shared_preload_libraries_in_progress) {
          elog(ERROR, "shared_ispell has to be loaded using shared_preload_libraries");
--- 96,101 ----
***************
*** 266,272 **** void ispell_shmem_startup() {
  static
  SharedIspellDict * get_shared_dict(char * words, char * affixes) {
  
!     SharedIspellDict * dict = segment_info->dict;
  
      while (dict != NULL) {
          if ((strcmp(dict->dictFile, words) == 0) &&
--- 206,212 ----
  static
  SharedIspellDict * get_shared_dict(char * words, char * affixes) {
  
!     SharedIspellDict * dict = segment_info->shdict;
  
      while (dict != NULL) {
          if ((strcmp(dict->dictFile, words) == 0) &&
***************
*** 289,295 **** SharedIspellDict * get_shared_dict(char * words, char * affixes) {
  static
  SharedStopList * get_shared_stop_list(char * stop) {
  
!     SharedStopList * list = segment_info->stop;
  
      while (list != NULL) {
          if (strcmp(list->stopFile, stop) == 0) {
--- 229,235 ----
  static
  SharedStopList * get_shared_stop_list(char * stop) {
  
!     SharedStopList * list = segment_info->shstop;
  
      while (list != NULL) {
          if (strcmp(list->stopFile, stop) == 0) {
***************
*** 355,366 **** void init_shared_dict(DictInfo * info, char * dictFile, char * affFile, char * s
          /* fine, there's enough space - copy the dictionary */
          shdict = copyIspellDict(dict, dictFile, affFile, size, dict->nspell);
  
-         elog(INFO, "shared dictionary %s.dict / %s.affix loaded, used %d B, %ld B remaining",
-              dictFile, affFile, size, segment_info->available);
- 
          /* add the new dictionary to the linked list (of SharedIspellDict structures) */
!         shdict->next = segment_info->dict;
!         segment_info->dict = shdict;
  
      }
      
--- 295,303 ----
          /* fine, there's enough space - copy the dictionary */
          shdict = copyIspellDict(dict, dictFile, affFile, size, dict->nspell);
  
          /* add the new dictionary to the linked list (of SharedIspellDict structures) */
!         shdict->next = segment_info->shdict;
!         segment_info->shdict = shdict;
  
      }
      
***************
*** 368,374 **** void init_shared_dict(DictInfo * info, char * dictFile, char * affFile, char * s
  
      /* lookup if the stop words are already loaded in the shared segment, but only if there
       * actually is a list */
!     if (stopFile != NULL) {
  
          shstop = get_shared_stop_list(stopFile);
  
--- 305,311 ----
  
      /* lookup if the stop words are already loaded in the shared segment, but only if there
       * actually is a list */
!     if (stopFile && *stopFile) {
  
          shstop = get_shared_stop_list(stopFile);
  
***************
*** 386,397 **** void init_shared_dict(DictInfo * info, char * dictFile, char * affFile, char * s
              /* fine, there's enough space - copy the stoplist */
              shstop = copyStopList(&stoplist, stopFile, size);
  
-             elog(INFO, "shared stoplist %s.stop loaded, used %d B, %ld B remaining",
-                  affFile, size, segment_info->available);
- 
              /* add the new stopword list to the linked list (of SharedStopList structures) */
!             shstop->next = segment_info->stop;
!             segment_info->stop = shstop;
  
          }
      }
--- 323,331 ----
              /* fine, there's enough space - copy the stoplist */
              shstop = copyStopList(&stoplist, stopFile, size);
  
              /* add the new stopword list to the linked list (of SharedStopList structures) */
!             shstop->next = segment_info->shstop;
!             segment_info->shstop = shstop;
  
          }
      }
***************
*** 399,412 **** void init_shared_dict(DictInfo * info, char * dictFile, char * affFile, char * s
      /* Now, fill the DictInfo structure for the backend (references to dictionary,
       * stopwords and the filenames). */
  
!     info->dict = shdict;
!     info->stop = shstop;
      info->lookup = GetCurrentTimestamp();
  
      memcpy(info->dictFile, dictFile, strlen(dictFile) + 1);
      memcpy(info->affixFile, dictFile, strlen(affFile)+ 1);
!     memcpy(info->stopFile, dictFile, strlen(stopFile) + 1);
! 
  }
  
  Datum dispell_init(PG_FUNCTION_ARGS);
--- 333,348 ----
      /* Now, fill the DictInfo structure for the backend (references to dictionary,
       * stopwords and the filenames). */
  
!     info->shdict = shdict;
!     info->shstop = shstop;
      info->lookup = GetCurrentTimestamp();
  
      memcpy(info->dictFile, dictFile, strlen(dictFile) + 1);
      memcpy(info->affixFile, dictFile, strlen(affFile)+ 1);
!     if (stopFile != NULL)
!         memcpy(info->stopFile, dictFile, strlen(stopFile) + 1);
!     else
!         memset(info->stopFile, 0, sizeof(info->stopFile));
  }
  
  Datum dispell_init(PG_FUNCTION_ARGS);
***************
*** 436,443 **** dispell_reset(PG_FUNCTION_ARGS)
  {
      LWLockAcquire(segment_info->lock, LW_EXCLUSIVE);
  
!     segment_info->dict = NULL;
!     segment_info->stop = NULL;
      segment_info->lastReset = GetCurrentTimestamp();
      segment_info->firstfree = ((char*)segment_info) + MAXALIGN(sizeof(SegmentInfo));
      segment_info->available = max_ispell_mem_size - (int)(segment_info->firstfree - (char*)segment_info);
--- 372,379 ----
  {
      LWLockAcquire(segment_info->lock, LW_EXCLUSIVE);
  
!     segment_info->shdict = NULL;
!     segment_info->shstop = NULL;
      segment_info->lastReset = GetCurrentTimestamp();
      segment_info->firstfree = ((char*)segment_info) + MAXALIGN(sizeof(SegmentInfo));
      segment_info->available = max_ispell_mem_size - (int)(segment_info->firstfree - (char*)segment_info);
***************
*** 596,607 **** dispell_lexize(PG_FUNCTION_ARGS)
          LWLockRelease(segment_info->lock);
          LWLockAcquire(segment_info->lock, LW_EXCLUSIVE);
  
-         elog(INFO, "reinitializing shared dict (segment reset)");
- 
          init_shared_dict(info, info->dictFile, info->affixFile, info->stopFile);
      }
  
!     res = SharedNINormalizeWord(info->dict, txt);
  
      /* nothing found :-( */
      if (res == NULL) {
--- 532,541 ----
          LWLockRelease(segment_info->lock);
          LWLockAcquire(segment_info->lock, LW_EXCLUSIVE);
  
          init_shared_dict(info, info->dictFile, info->affixFile, info->stopFile);
      }
  
!     res = NINormalizeWord(&(info->shdict->dict), txt);
  
      /* nothing found :-( */
      if (res == NULL) {
***************
*** 612,618 **** dispell_lexize(PG_FUNCTION_ARGS)
      ptr = cptr = res;
      while (ptr->lexeme)
      {
!         if (searchstoplist(&info->stop->list, ptr->lexeme))
          {
              pfree(ptr->lexeme);
              ptr->lexeme = NULL;
--- 546,552 ----
      ptr = cptr = res;
      while (ptr->lexeme)
      {
!         if (info->shstop && searchstoplist(&(info->shstop->stop), ptr->lexeme))
          {
              pfree(ptr->lexeme);
              ptr->lexeme = NULL;
***************
*** 751,756 **** int sizeRegisNode(RegisNode * node) {
--- 685,937 ----
      return size;
  }
  
+ /* struct cnfa */
+ 
+ static int
+ getNArcs(struct cnfa *cnfa)
+ {
+     int             i;
+     int             narcs = 0;
+     struct carc    *ca;
+ 
+     for (i = 0; i < cnfa->nstates; i++)
+     {
+         ca = cnfa->states[i];
+         narcs++;
+         while (ca && ca->co != COLORLESS)
+         {
+             ca++;
+             narcs++;
+         }
+     }
+ 
+     return narcs;
+ }
+ 
+ static void
+ copyCnfa(struct cnfa *cnfa, struct cnfa *newc)
+ {
+     int             narcs;
+     int             i;
+     struct carc    *ca;
+     struct carc    *newca;
+ 
+     memcpy(newc, cnfa, sizeof(struct cnfa));
+ 
+     /* stflags */
+     newc->stflags = (char *) shalloc(cnfa->nstates * sizeof(char));
+     memcpy(newc->stflags, cnfa->stflags, cnfa->nstates * sizeof(char));
+     /* states */
+     /* arcs */
+     narcs = getNArcs(cnfa);
+     newc->states = (struct carc **) shalloc(cnfa->nstates * sizeof(struct carc *));
+     newc->arcs = (struct carc *) shalloc(narcs * sizeof(struct carc));
+ 
+     newca = newc->arcs;
+     for (i = 0; i < cnfa->nstates; i++)
+     {
+         newc->states[i] = newca;
+ 
+         ca = cnfa->states[i];
+         while(ca && ca->co != COLORLESS)
+         {
+             newca->co = ca->co;
+             newca->to = ca->to;
+             newca++;
+             ca++;
+         }
+ 
+         newca->co = COLORLESS;
+         newca->to = 0;
+         newca++;
+     }
+ }
+ 
+ static int
+ sizeCnfa(struct cnfa *cnfa)
+ {
+     int narcs;
+     /* stflags */
+     int size = MAXALIGN(sizeof(char) * cnfa->nstates);
+     /* states */
+     size += MAXALIGN(sizeof(struct carc *) * cnfa->nstates);
+     /* arcs */
+     narcs = getNArcs(cnfa);
+     size += MAXALIGN(sizeof(struct carc) * narcs);
+ 
+     return size;
+ }
+ 
+ /* struct subre */
+ 
+ static struct subre *
+ copySubre(struct subre *tree)
+ {
+     struct subre *newt;
+     if (tree == NULL)
+         return NULL;
+ 
+     newt = (struct subre *) shalloc(sizeof(struct subre));
+     memcpy(newt, tree, sizeof(struct subre));
+     newt->left = NULL;
+     newt->right = NULL;
+     newt->begin = NULL;
+     newt->end = NULL;
+     newt->chain = NULL;
+     copyCnfa(&tree->cnfa, &newt->cnfa);
+ 
+     return newt;
+ }
+ 
+ static int
+ sizeSubre(struct subre *tree)
+ {
+     int size;
+     if (tree == NULL)
+         return 0;
+ 
+     size = MAXALIGN(sizeof(struct subre));
+     size += sizeCnfa(&tree->cnfa);
+ 
+     return size;
+ }
+ 
+ /* struct colormap */
+ 
+ static void
+ copyColormap(struct colormap *cm, struct colormap *newcm,
+              union tree *t, union tree *newt,
+              int level)
+ {
+     int         i;
+     int         j;
+     union tree *nextt;
+     union tree *newt2;
+ 
+     /* tree */
+     if (level == NBYTS - 1)
+         memcpy(newt->tcolor, t->tcolor, BYTTAB * sizeof(color));
+     else
+     {
+         nextt = &cm->tree[level + 1];
+         for (i = 0; i < BYTTAB; i++)
+             if (t->tptr[i] != nextt)
+             {
+                 newt2 = (union tree *) shalloc(sizeof(struct ptrs));
+                 for (j = BYTTAB - 1; j >= 0; j--)
+                     newt2->tptr[j] = &newcm->tree[level + 1];
+ 
+                 newt->tptr[i] = newt2;
+                 copyColormap(cm, newcm, t->tptr[i], newt2, level + 1);
+             }
+ 
+         copyColormap(cm, newcm, nextt, &newcm->tree[level + 1], level + 1);
+     }
+ }
+ 
+ static int
+ sizeColormap(struct colormap *cm, int level, union tree *t)
+ {
+     int         size = 0;
+     int         i;
+     union tree *nextt;
+ 
+     /* tree */
+     if (level == NBYTS - 1)
+         size += MAXALIGN(sizeof(struct colors));
+     else
+     {
+         nextt = &cm->tree[level + 1];
+         for (i = 0; i < BYTTAB; i++)
+             if (t->tptr[i] != nextt)
+             {
+                 size += MAXALIGN(sizeof(struct ptrs));
+                 size += sizeColormap(cm, level + 1, t->tptr[i]);
+             }
+ 
+         size += sizeColormap(cm, level + 1, nextt);
+     }
+ 
+     return size;
+ }
+ 
+ /* struct guts */
+ 
+ /*
+  * Copied function from regc_locale.c
+  * cmp - chr-substring compare
+  *
+  * Backrefs need this.  It should preferably be efficient.
+  * Note that it does not need to report anything except equal/unequal.
+  * Note also that the length is exact, and the comparison should not
+  * stop at embedded NULs!
+  */
+ static int                      /* 0 for equal, nonzero for unequal */
+ cmp(const chr *x, const chr *y, /* strings to compare */
+     size_t len)                 /* exact length of comparison */
+ {
+     return memcmp(VS(x), VS(y), len * sizeof(chr));
+ }
+ 
+ static struct guts *
+ copyGuts(regex_t *re)
+ {
+     int             i;
+     int             j;
+     union tree     *t;
+     union tree     *nextt;
+     struct guts    *newg;
+     struct guts    *g = (struct guts *) re->re_guts;
+ 
+     newg = (struct guts *) shalloc(sizeof(struct guts));
+     memcpy(newg, g, sizeof(struct guts));
+ 
+     /* tree */
+     newg->tree = copySubre(g->tree);
+     /* search */
+     copyCnfa(&g->search, &newg->search);
+ 
+     /* cmap */
+     /* upper levels of tree */
+     for (t = &newg->cmap.tree[0], j = NBYTS - 1; j > 0; t = nextt, j--)
+     {
+         nextt = t + 1;
+         for (i = BYTTAB - 1; i >= 0; i--)
+             t->tptr[i] = nextt;
+     }
+     /* bottom level is solid white */
+     t = &newg->cmap.tree[NBYTS - 1];
+     for (i = BYTTAB - 1; i >= 0; i--)
+         t->tcolor[i] = WHITE;
+ 
+     copyColormap(&g->cmap, &newg->cmap, g->cmap.tree, newg->cmap.tree, 0);
+     /* compare */
+     newg->compare = cmp;
+     /* lacons */
+     newg->lacons = copySubre(g->lacons);
+ 
+     return newg;
+ }
+ 
+ static int
+ sizeGuts(regex_t *re)
+ {
+     struct guts    *g;
+     int             size = MAXALIGN(sizeof(struct guts));
+ 
+     g = (struct guts *) re->re_guts;
+     /* tree */
+     size += sizeSubre(g->tree);
+     /* search */
+     size += sizeCnfa(&g->search);
+     /* cmap */
+     size += sizeColormap(&g->cmap, 0, g->cmap.tree);
+     /* lacons */
+     size += sizeSubre(g->lacons);
+ 
+     return size;
+ }
+ 
  /* AFFIX - affix rules (simple, regis or full regular expressions). */
  
  static
***************
*** 766,777 **** AFFIX * copyAffix(AFFIX * affix) {
      if (affix->isregis) {
          copy->reg.regis.node = copyRegisNode(affix->reg.regis.node);
      } else if (! affix->issimple) {
  
!         /*FIXME Need to copy the regex_t properly. But a plain copy would not be
!          *      safe tu use by multiple processes at the same time, so each backend
!          *      needs to create it's own copy. */
!         elog(ERROR, "This extension can't handle regex_t affixes yet.");
! 
      }
  
      return copy;
--- 947,956 ----
      if (affix->isregis) {
          copy->reg.regis.node = copyRegisNode(affix->reg.regis.node);
      } else if (! affix->issimple) {
+         struct guts *g = copyGuts(&affix->reg.regex);
  
!         memcpy(&copy->reg.regex, &affix->reg.regex, sizeof(regex_t));
!         copy->reg.regex.re_guts = (char *) g;
      }
  
      return copy;
***************
*** 789,799 **** int sizeAffix(AFFIX * affix) {
      if (affix->isregis) {
          size += sizeRegisNode(affix->reg.regis.node);
      } else if (! affix->issimple) {
! 
!         /*FIXME Need to copy the regex_t properly. But would a plain copy be
!          *      safe tu use by multiple processes at the same time? */
!         elog(ERROR, "This extension can't handle regex_t affixes yet.");
! 
      }
  
      return size;
--- 968,974 ----
      if (affix->isregis) {
          size += sizeRegisNode(affix->reg.regis.node);
      } else if (! affix->issimple) {
!         size += sizeGuts(&affix->reg.regex);
      }
  
      return size;
***************
*** 864,876 **** SharedStopList * copyStopList(StopList * list, char * stopFile, int size) {
      int i;
      SharedStopList * copy = (SharedStopList *)shalloc(sizeof(SharedStopList));
  
!     copy->list.len = list->len;
!     copy->list.stop = (char**)shalloc(sizeof(char*) * list->len);
      copy->stopFile = shstrcpy(stopFile);
      copy->nbytes = size;
  
      for (i = 0; i < list->len; i++) {
!         copy->list.stop[i] = shstrcpy(list->stop[i]);
      }
  
      return copy;
--- 1039,1051 ----
      int i;
      SharedStopList * copy = (SharedStopList *)shalloc(sizeof(SharedStopList));
  
!     copy->stop.len = list->len;
!     copy->stop.stop = (char**)shalloc(sizeof(char*) * list->len);
      copy->stopFile = shstrcpy(stopFile);
      copy->nbytes = size;
  
      for (i = 0; i < list->len; i++) {
!         copy->stop.stop[i] = shstrcpy(list->stop[i]);
      }
  
      return copy;
***************
*** 896,905 **** int sizeStopList(StopList * list, char * stopFile) {
  
  static
  int countCMPDAffixes(CMPDAffix * affixes) {
  
!     /* there's at least one affix */
!     int count = 1;
!     CMPDAffix * ptr = affixes;
  
      /* the last one is marked with (affix == NULL) */
      while (ptr->affix)
--- 1071,1081 ----
  
  static
  int countCMPDAffixes(CMPDAffix * affixes) {
+     int         count = 0;
+     CMPDAffix  *ptr = affixes;
  
!     if (affixes == NULL)
!         return 0;
  
      /* the last one is marked with (affix == NULL) */
      while (ptr->affix)
***************
*** 907,915 **** int countCMPDAffixes(CMPDAffix * affixes) {
--- 1083,1123 ----
          ptr++;
          count++;
      }
+     count++;
  
      return count;
+ }
  
+ static CMPDAffix *
+ copyCMPDAffix(CMPDAffix *affixes)
+ {
+     CMPDAffix  *res,
+                *ptr1,
+                *ptr2 = affixes;
+     int         cnt = countCMPDAffixes(ptr2);
+ 
+     if (cnt > 0)
+     {
+         ptr1 = res = (CMPDAffix *) shalloc(sizeof(CMPDAffix) * cnt);
+         memcpy(ptr1, ptr2, sizeof(CMPDAffix) * cnt);
+ 
+         /*
+          * Copy affixes.
+          * The last one is marked with (affix == NULL)
+          */
+         while (ptr2->affix)
+         {
+             ptr1->affix = shalloc(ptr2->len);
+             strcpy(ptr1->affix, ptr2->affix);
+             ptr1++;
+             ptr2++;
+         }
+         ptr1->affix = NULL;
+     }
+     else
+         res = NULL;
+ 
+     return res;
  }
  
  /*
***************
*** 922,960 **** int countCMPDAffixes(CMPDAffix * affixes) {
  static
  SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * affixFile, int size, int words) {
  
!     int i, cnt;
  
!     SharedIspellDict * copy = (SharedIspellDict*)shalloc(sizeof(SharedIspellDict));
  
!     copy->dictFile = shalloc(strlen(dictFile)+1);
!     copy->affixFile = shalloc(strlen(affixFile)+1);
  
      strcpy(copy->dictFile, dictFile);
      strcpy(copy->affixFile, affixFile);
  
!     copy->naffixes = dict->naffixes;
  
!     copy->Affix = (AFFIX*)shalloc(sizeof(AFFIX) * dict->naffixes);
  
!     copy->Suffix = copyAffixNode(dict->Suffix);
!     copy->Prefix = copyAffixNode(dict->Prefix);
  
!     copy->Dictionary = copySPNode(dict->Dictionary);
  
      /* copy affix data */
!     copy->nAffixData = dict->nAffixData;
!     copy->AffixData = (char**)shalloc(sizeof(char*) * dict->nAffixData);
!     for (i = 0; i < copy->nAffixData; i++) {
!         copy->AffixData[i] = shstrcpy(dict->AffixData[i]);
!     }
  
!     /* copy compound affixes (there's at least one) */
!     cnt = countCMPDAffixes(dict->CompoundAffix);
!     copy->CompoundAffix = (CMPDAffix*)shalloc(sizeof(CMPDAffix) * cnt);
!     memcpy(copy->CompoundAffix, dict->CompoundAffix, sizeof(CMPDAffix) * cnt);
  
!     memcpy(copy->flagval, dict->flagval, 255);
!     copy->usecompound = dict->usecompound;
  
      copy->nbytes = size;
      copy->nwords = words;
--- 1130,1164 ----
  static
  SharedIspellDict * copyIspellDict(IspellDict * dict, char * dictFile, char * affixFile, int size, int words) {
  
!     int i;
  
!     SharedIspellDict *copy = (SharedIspellDict *) shalloc(sizeof(SharedIspellDict));
  
!     copy->dictFile = shalloc(strlen(dictFile) + 1);
!     copy->affixFile = shalloc(strlen(affixFile) + 1);
  
      strcpy(copy->dictFile, dictFile);
      strcpy(copy->affixFile, affixFile);
  
!     copy->dict.naffixes = dict->naffixes;
  
!     copy->dict.Affix = (AFFIX *) shalloc(sizeof(AFFIX) * dict->naffixes);
  
!     copy->dict.Suffix = copyAffixNode(dict->Suffix);
!     copy->dict.Prefix = copyAffixNode(dict->Prefix);
  
!     copy->dict.Dictionary = copySPNode(dict->Dictionary);
  
      /* copy affix data */
!     copy->dict.nAffixData = dict->nAffixData;
!     copy->dict.AffixData = (char **) shalloc(sizeof(char *) * dict->nAffixData);
!     for (i = 0; i < copy->dict.nAffixData; i++)
!         copy->dict.AffixData[i] = shstrcpy(dict->AffixData[i]);
  
!     copy->dict.CompoundAffix = copyCMPDAffix(dict->CompoundAffix);
  
!     memcpy(copy->dict.flagval, dict->flagval, 256);
!     copy->dict.usecompound = dict->usecompound;
  
      copy->nbytes = size;
      copy->nwords = words;
***************
*** 988,994 **** int sizeIspellDict(IspellDict * dict, char * dictFile, char * affixFile) {
          size += MAXALIGN(sizeof(char) * strlen(dict->AffixData[i]) + 1);
      }
  
!     /* copy compound affixes (there's at least one) */
      size += MAXALIGN(sizeof(CMPDAffix) * countCMPDAffixes(dict->CompoundAffix));
  
      return size;
--- 1192,1198 ----
          size += MAXALIGN(sizeof(char) * strlen(dict->AffixData[i]) + 1);
      }
  
!     /* copy compound affixes */
      size += MAXALIGN(sizeof(CMPDAffix) * countCMPDAffixes(dict->CompoundAffix));
  
      return size;
***************
*** 1014,1020 **** dispell_list_dicts(PG_FUNCTION_ARGS)
  
          /* get a shared lock and then the first dictionary */
          LWLockAcquire(segment_info->lock, LW_SHARED);
!         funcctx->user_fctx = segment_info->dict;
  
          /* Build a tuple descriptor for our result type */
          if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
--- 1218,1224 ----
  
          /* get a shared lock and then the first dictionary */
          LWLockAcquire(segment_info->lock, LW_SHARED);
!         funcctx->user_fctx = segment_info->shdict;
  
          /* Build a tuple descriptor for our result type */
          if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
***************
*** 1066,1072 **** dispell_list_dicts(PG_FUNCTION_ARGS)
          values[0] = PointerGetDatum(dictname);
          values[1] = PointerGetDatum(affname);
          values[2] = UInt32GetDatum(dict->nwords);
!         values[3] = UInt32GetDatum(dict->naffixes);
          values[4] = UInt32GetDatum(dict->nbytes);
  
          /* Build and return the tuple. */
--- 1270,1276 ----
          values[0] = PointerGetDatum(dictname);
          values[1] = PointerGetDatum(affname);
          values[2] = UInt32GetDatum(dict->nwords);
!         values[3] = UInt32GetDatum(dict->dict.naffixes);
          values[4] = UInt32GetDatum(dict->nbytes);
  
          /* Build and return the tuple. */
***************
*** 1109,1115 **** dispell_list_stoplists(PG_FUNCTION_ARGS)
  
          /* get a shared lock and then the first stop list */
          LWLockAcquire(segment_info->lock, LW_SHARED);
!         funcctx->user_fctx = segment_info->stop;
  
          /* Build a tuple descriptor for our result type */
          if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
--- 1313,1319 ----
  
          /* get a shared lock and then the first stop list */
          LWLockAcquire(segment_info->lock, LW_SHARED);
!         funcctx->user_fctx = segment_info->shstop;
  
          /* Build a tuple descriptor for our result type */
          if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
***************
*** 1156,1162 **** dispell_list_stoplists(PG_FUNCTION_ARGS)
          strcpy(VARDATA(stopname), stoplist->stopFile);
  
          values[0] = PointerGetDatum(stopname);
!         values[1] = UInt32GetDatum(stoplist->list.len);
          values[2] = UInt32GetDatum(stoplist->nbytes);
  
          /* Build and return the tuple. */
--- 1360,1366 ----
          strcpy(VARDATA(stopname), stoplist->stopFile);
  
          values[0] = PointerGetDatum(stopname);
!         values[1] = UInt32GetDatum(stoplist->stop.len);
          values[2] = UInt32GetDatum(stoplist->nbytes);
  
          /* Build and return the tuple. */
*** /dev/null
--- b/src/shared_ispell.h
***************
*** 0 ****
--- 1,66 ----
+ #ifndef __SHARED_ISPELL_H__
+ #define __SHARED_ISPELL_H__
+ 
+ #include "storage/lwlock.h"
+ #include "utils/timestamp.h"
+ #include "tsearch/dicts/spell.h"
+ #include "tsearch/ts_public.h"
+ 
+ /* This segment is initialized in the first process that accesses it (see
+  * ispell_shmem_startup function).
+  */
+ #define SEGMENT_NAME    "shared_ispell"
+ 
+ #define MAXLEN 255
+ 
+ typedef struct SharedIspellDict
+ {
+ 	/* this is used for selecting the dictionary */
+ 	char   *dictFile;
+ 	char   *affixFile;
+ 	int		nbytes;
+ 	int		nwords;
+ 
+ 	/* next dictionary in the chain (essentially a linked list) */
+ 	struct SharedIspellDict *next;
+ 
+ 	IspellDict dict;
+ } SharedIspellDict;
+ 
+ typedef struct SharedStopList
+ {
+ 	char   *stopFile;
+ 	int		nbytes;
+ 
+ 	struct SharedStopList *next;
+ 
+ 	StopList stop;
+ } SharedStopList;
+ 
+ /* used to allocate memory in the shared segment */
+ typedef struct SegmentInfo
+ {
+ 	LWLockId	lock;
+ 	char	   *firstfree;        /* first free address (always maxaligned) */
+ 	size_t		available;        /* free space remaining at firstfree */
+ 	Timestamp	lastReset;        /* last reset of the dictionary */
+ 
+ 	/* the shared segment (info and data) */
+ 	SharedIspellDict *shdict;
+ 	SharedStopList	 *shstop;
+ } SegmentInfo;
+ 
+ /* used to keep track of dictionary in each backend */
+ typedef struct DictInfo
+ {
+ 	Timestamp	lookup;
+ 
+ 	char dictFile[MAXLEN];
+ 	char affixFile[MAXLEN];
+ 	char stopFile[MAXLEN];
+ 
+ 	SharedIspellDict	*shdict;
+ 	SharedStopList		*shstop;
+ } DictInfo;
+ 
+ #endif
*** a/src/spell.c
--- /dev/null
***************
*** 1,647 ****
- /*-------------------------------------------------------------------------
-  *
-  * spell.c
-  * 
-  * Normalizing word with ISpell (in shared segment). Mostly a slightly
-  * copy of the spell.c code, modified so that it works with SharedIspellDict
-  * instead of plain IspellDict.
-  *
-  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
-  * Copyright (c) 2011, Tomas Vondra
-  *
-  * IDENTIFICATION
-  *  src/spell.c (a slightly modified copy of src/backend/tsearch/spell.c)
-  *
-  *-------------------------------------------------------------------------
-  */
- 
- #include "postgres.h"
- 
- #include "spell.h"
- 
- #define MAX_NORM 1024
- #define MAXNORMLEN 256
- 
- #define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
- 
- static int
- FindWord(SharedIspellDict *Conf, const char *word, int affixflag, int flag)
- {
- 	SPNode	   *node = Conf->Dictionary;
- 	SPNodeData *StopLow,
- 			   *StopHigh,
- 			   *StopMiddle;
- 	const uint8 *ptr = (const uint8 *) word;
- 
- 	flag &= FF_DICTFLAGMASK;
- 
- 	while (node && *ptr)
- 	{
- 		StopLow = node->data;
- 		StopHigh = node->data + node->length;
- 		while (StopLow < StopHigh)
- 		{
- 			StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
- 			if (StopMiddle->val == *ptr)
- 			{
- 				if (*(ptr + 1) == '\0' && StopMiddle->isword)
- 				{
- 					if (flag == 0)
- 					{
- 						if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
- 							return 0;
- 					}
- 					else if ((flag & StopMiddle->compoundflag) == 0)
- 						return 0;
- 
- 					if ((affixflag == 0) || (strchr(Conf->AffixData[StopMiddle->affix], affixflag) != NULL))
- 						return 1;
- 				}
- 				node = StopMiddle->node;
- 				ptr++;
- 				break;
- 			}
- 			else if (StopMiddle->val < *ptr)
- 				StopLow = StopMiddle + 1;
- 			else
- 				StopHigh = StopMiddle;
- 		}
- 		if (StopLow >= StopHigh)
- 			break;
- 	}
- 	return 0;
- }
- 
- static AffixNodeData *
- FindAffixes(AffixNode *node, const char *word, int wrdlen, int *level, int type)
- {
- 	AffixNodeData *StopLow,
- 			   *StopHigh,
- 			   *StopMiddle;
- 	uint8 symbol;
- 
- 	if (node->isvoid)
- 	{							/* search void affixes */
- 		if (node->data->naff)
- 			return node->data;
- 		node = node->data->node;
- 	}
- 
- 	while (node && *level < wrdlen)
- 	{
- 		StopLow = node->data;
- 		StopHigh = node->data + node->length;
- 		while (StopLow < StopHigh)
- 		{
- 			StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
- 			symbol = GETWCHAR(word, wrdlen, *level, type);
- 
- 			if (StopMiddle->val == symbol)
- 			{
- 				(*level)++;
- 				if (StopMiddle->naff)
- 					return StopMiddle;
- 				node = StopMiddle->node;
- 				break;
- 			}
- 			else if (StopMiddle->val < symbol)
- 				StopLow = StopMiddle + 1;
- 			else
- 				StopHigh = StopMiddle;
- 		}
- 		if (StopLow >= StopHigh)
- 			break;
- 	}
- 	return NULL;
- }
- 
- static char *
- CheckAffix(const char *word, size_t len, AFFIX *Affix, int flagflags, char *newword, int *baselen)
- {
- 	/*
- 	 * Check compound allow flags
- 	 */
- 
- 	if (flagflags == 0)
- 	{
- 		if (Affix->flagflags & FF_COMPOUNDONLY)
- 			return NULL;
- 	}
- 	else if (flagflags & FF_COMPOUNDBEGIN)
- 	{
- 		if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
- 			return NULL;
- 		if ((Affix->flagflags & FF_COMPOUNDBEGIN) == 0)
- 			if (Affix->type == FF_SUFFIX)
- 				return NULL;
- 	}
- 	else if (flagflags & FF_COMPOUNDMIDDLE)
- 	{
- 		if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == 0 ||
- 			(Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
- 			return NULL;
- 	}
- 	else if (flagflags & FF_COMPOUNDLAST)
- 	{
- 		if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
- 			return NULL;
- 		if ((Affix->flagflags & FF_COMPOUNDLAST) == 0)
- 			if (Affix->type == FF_PREFIX)
- 				return NULL;
- 	}
- 
- 	/*
- 	 * make replace pattern of affix
- 	 */
- 	if (Affix->type == FF_SUFFIX)
- 	{
- 		strcpy(newword, word);
- 		strcpy(newword + len - Affix->replen, Affix->find);
- 		if (baselen)			/* store length of non-changed part of word */
- 			*baselen = len - Affix->replen;
- 	}
- 	else
- 	{
- 		/*
- 		 * if prefix is a all non-chaged part's length then all word contains
- 		 * only prefix and suffix, so out
- 		 */
- 		if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
- 			return NULL;
- 		strcpy(newword, Affix->find);
- 		strcat(newword, word + Affix->replen);
- 	}
- 
- 	/*
- 	 * check resulting word
- 	 */
- 	if (Affix->issimple)
- 		return newword;
- 	else if (Affix->isregis)
- 	{
- 		if (RS_execute(&(Affix->reg.regis), newword))
- 			return newword;
- 	}
- 	else
- 	{
- 		int			err;
- 		pg_wchar   *data;
- 		size_t		data_len;
- 		int			newword_len;
- 
- 		/* Convert data string to wide characters */
- 		newword_len = strlen(newword);
- 		data = (pg_wchar *) palloc((newword_len + 1) * sizeof(pg_wchar));
- 		data_len = pg_mb2wchar_with_len(newword, data, newword_len);
- 
- 		if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, 0, NULL, 0, NULL, 0)))
- 		{
- 			pfree(data);
- 			return newword;
- 		}
- 		pfree(data);
- 	}
- 
- 	return NULL;
- }
- 
- static int
- addToResult(char **forms, char **cur, char *word)
- {
- 	if (cur - forms >= MAX_NORM - 1)
- 		return 0;
- 	if (forms == cur || strcmp(word, *(cur - 1)) != 0)
- 	{
- 		*cur = pstrdup(word);
- 		*(cur + 1) = NULL;
- 		return 1;
- 	}
- 
- 	return 0;
- }
- 
- static char **
- NormalizeSubWord(SharedIspellDict *Conf, char *word, int flag)
- {
- 	AffixNodeData *suffix = NULL,
- 			   *prefix = NULL;
- 	int			slevel = 0,
- 				plevel = 0;
- 	int			wrdlen = strlen(word),
- 				swrdlen;
- 	char	  **forms;
- 	char	  **cur;
- 	char		newword[2 * MAXNORMLEN] = "";
- 	char		pnewword[2 * MAXNORMLEN] = "";
- 	AffixNode  *snode = Conf->Suffix,
- 			   *pnode;
- 	int			i,
- 				j;
- 
- 	if (wrdlen > MAXNORMLEN)
- 		return NULL;
- 	cur = forms = (char **) palloc(MAX_NORM * sizeof(char *));
- 	*cur = NULL;
- 
- 
- 	/* Check that the word itself is normal form */
- 	if (FindWord(Conf, word, 0, flag))
- 	{
- 		*cur = pstrdup(word);
- 		cur++;
- 		*cur = NULL;
- 	}
- 
- 	/* Find all other NORMAL forms of the 'word' (check only prefix) */
- 	pnode = Conf->Prefix;
- 	plevel = 0;
- 	while (pnode)
- 	{
- 		prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
- 		if (!prefix)
- 			break;
- 		for (j = 0; j < prefix->naff; j++)
- 		{
- 			if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
- 			{
- 				/* prefix success */
- 				if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
- 					cur += addToResult(forms, cur, newword);
- 			}
- 		}
- 		pnode = prefix->node;
- 	}
- 
- 	/*
- 	 * Find all other NORMAL forms of the 'word' (check suffix and then
- 	 * prefix)
- 	 */
- 	while (snode)
- 	{
- 		int			baselen = 0;
- 
- 		/* find possible suffix */
- 		suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
- 		if (!suffix)
- 			break;
- 		/* foreach suffix check affix */
- 		for (i = 0; i < suffix->naff; i++)
- 		{
- 			if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
- 			{
- 				/* suffix success */
- 				if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
- 					cur += addToResult(forms, cur, newword);
- 
- 				/* now we will look changed word with prefixes */
- 				pnode = Conf->Prefix;
- 				plevel = 0;
- 				swrdlen = strlen(newword);
- 				while (pnode)
- 				{
- 					prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
- 					if (!prefix)
- 						break;
- 					for (j = 0; j < prefix->naff; j++)
- 					{
- 						if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
- 						{
- 							/* prefix success */
- 							int			ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
- 							0 : prefix->aff[j]->flag;
- 
- 							if (FindWord(Conf, pnewword, ff, flag))
- 								cur += addToResult(forms, cur, pnewword);
- 						}
- 					}
- 					pnode = prefix->node;
- 				}
- 			}
- 		}
- 
- 		snode = suffix->node;
- 	}
- 
- 	if (cur == forms)
- 	{
- 		pfree(forms);
- 		return (NULL);
- 	}
- 	return (forms);
- }
- 
- typedef struct SplitVar
- {
- 	int			nstem;
- 	int			lenstem;
- 	char	  **stem;
- 	struct SplitVar *next;
- } SplitVar;
- 
- static int
- CheckCompoundAffixes(CMPDAffix **ptr, char *word, int len, bool CheckInPlace)
- {
- 	bool		issuffix;
- 
- 	if (CheckInPlace)
- 	{
- 		while ((*ptr)->affix)
- 		{
- 			if (len > (*ptr)->len && strncmp((*ptr)->affix, word, (*ptr)->len) == 0)
- 			{
- 				len = (*ptr)->len;
- 				issuffix = (*ptr)->issuffix;
- 				(*ptr)++;
- 				return (issuffix) ? len : 0;
- 			}
- 			(*ptr)++;
- 		}
- 	}
- 	else
- 	{
- 		char	   *affbegin;
- 
- 		while ((*ptr)->affix)
- 		{
- 			if (len > (*ptr)->len && (affbegin = strstr(word, (*ptr)->affix)) != NULL)
- 			{
- 				len = (*ptr)->len + (affbegin - word);
- 				issuffix = (*ptr)->issuffix;
- 				(*ptr)++;
- 				return (issuffix) ? len : 0;
- 			}
- 			(*ptr)++;
- 		}
- 	}
- 	return -1;
- }
- 
- static SplitVar *
- CopyVar(SplitVar *s, int makedup)
- {
- 	SplitVar   *v = (SplitVar *) palloc(sizeof(SplitVar));
- 
- 	v->next = NULL;
- 	if (s)
- 	{
- 		int			i;
- 
- 		v->lenstem = s->lenstem;
- 		v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
- 		v->nstem = s->nstem;
- 		for (i = 0; i < s->nstem; i++)
- 			v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
- 	}
- 	else
- 	{
- 		v->lenstem = 16;
- 		v->stem = (char **) palloc(sizeof(char *) * v->lenstem);
- 		v->nstem = 0;
- 	}
- 	return v;
- }
- 
- static void
- AddStem(SplitVar *v, char *word)
- {
- 	if (v->nstem >= v->lenstem)
- 	{
- 		v->lenstem *= 2;
- 		v->stem = (char **) repalloc(v->stem, sizeof(char *) * v->lenstem);
- 	}
- 
- 	v->stem[v->nstem] = word;
- 	v->nstem++;
- }
- 
- static SplitVar *
- SplitToVariants(SharedIspellDict *Conf, SPNode *snode, SplitVar *orig, char *word, int wordlen, int startpos, int minpos)
- {
- 	SplitVar   *var = NULL;
- 	SPNodeData *StopLow,
- 			   *StopHigh,
- 			   *StopMiddle = NULL;
- 	SPNode	   *node = (snode) ? snode : Conf->Dictionary;
- 	int			level = (snode) ? minpos : startpos;	/* recursive
- 														 * minpos==level */
- 	int			lenaff;
- 	CMPDAffix  *caff;
- 	char	   *notprobed;
- 	int			compoundflag = 0;
- 
- 	notprobed = (char *) palloc(wordlen);
- 	memset(notprobed, 1, wordlen);
- 	var = CopyVar(orig, 1);
- 
- 	while (level < wordlen)
- 	{
- 		/* find word with epenthetic or/and compound affix */
- 		caff = Conf->CompoundAffix;
- 		while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= 0)
- 		{
- 			/*
- 			 * there is one of compound affixes, so check word for existings
- 			 */
- 			char		buf[MAXNORMLEN];
- 			char	  **subres;
- 
- 			lenaff = level - startpos + lenaff;
- 
- 			if (!notprobed[startpos + lenaff - 1])
- 				continue;
- 
- 			if (level + lenaff - 1 <= minpos)
- 				continue;
- 
- 			if (lenaff >= MAXNORMLEN)
- 				continue;		/* skip too big value */
- 			if (lenaff > 0)
- 				memcpy(buf, word + startpos, lenaff);
- 			buf[lenaff] = '\0';
- 
- 			if (level == 0)
- 				compoundflag = FF_COMPOUNDBEGIN;
- 			else if (level == wordlen - 1)
- 				compoundflag = FF_COMPOUNDLAST;
- 			else
- 				compoundflag = FF_COMPOUNDMIDDLE;
- 			subres = NormalizeSubWord(Conf, buf, compoundflag);
- 			if (subres)
- 			{
- 				/* Yes, it was a word from dictionary */
- 				SplitVar   *new = CopyVar(var, 0);
- 				SplitVar   *ptr = var;
- 				char	  **sptr = subres;
- 
- 				notprobed[startpos + lenaff - 1] = 0;
- 
- 				while (*sptr)
- 				{
- 					AddStem(new, *sptr);
- 					sptr++;
- 				}
- 				pfree(subres);
- 
- 				while (ptr->next)
- 					ptr = ptr->next;
- 				ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
- 
- 				pfree(new->stem);
- 				pfree(new);
- 			}
- 		}
- 
- 		if (!node)
- 			break;
- 
- 		StopLow = node->data;
- 		StopHigh = node->data + node->length;
- 		while (StopLow < StopHigh)
- 		{
- 			StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
- 			if (StopMiddle->val == ((uint8 *) (word))[level])
- 				break;
- 			else if (StopMiddle->val < ((uint8 *) (word))[level])
- 				StopLow = StopMiddle + 1;
- 			else
- 				StopHigh = StopMiddle;
- 		}
- 
- 		if (StopLow < StopHigh)
- 		{
- 			if (level == FF_COMPOUNDBEGIN)
- 				compoundflag = FF_COMPOUNDBEGIN;
- 			else if (level == wordlen - 1)
- 				compoundflag = FF_COMPOUNDLAST;
- 			else
- 				compoundflag = FF_COMPOUNDMIDDLE;
- 
- 			/* find infinitive */
- 			if (StopMiddle->isword &&
- 				(StopMiddle->compoundflag & compoundflag) &&
- 				notprobed[level])
- 			{
- 				/* ok, we found full compoundallowed word */
- 				if (level > minpos)
- 				{
- 					/* and its length more than minimal */
- 					if (wordlen == level + 1)
- 					{
- 						/* well, it was last word */
- 						AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
- 						pfree(notprobed);
- 						return var;
- 					}
- 					else
- 					{
- 						/* then we will search more big word at the same point */
- 						SplitVar   *ptr = var;
- 
- 						while (ptr->next)
- 							ptr = ptr->next;
- 						ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
- 						/* we can find next word */
- 						level++;
- 						AddStem(var, pnstrdup(word + startpos, level - startpos));
- 						node = Conf->Dictionary;
- 						startpos = level;
- 						continue;
- 					}
- 				}
- 			}
- 			node = StopMiddle->node;
- 		}
- 		else
- 			node = NULL;
- 		level++;
- 	}
- 
- 	AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
- 	pfree(notprobed);
- 	return var;
- }
- 
- static void
- addNorm(TSLexeme **lres, TSLexeme **lcur, char *word, int flags, uint16 NVariant)
- {
- 	if (*lres == NULL)
- 		*lcur = *lres = (TSLexeme *) palloc(MAX_NORM * sizeof(TSLexeme));
- 
- 	if (*lcur - *lres < MAX_NORM - 1)
- 	{
- 		(*lcur)->lexeme = word;
- 		(*lcur)->flags = flags;
- 		(*lcur)->nvariant = NVariant;
- 		(*lcur)++;
- 		(*lcur)->lexeme = NULL;
- 	}
- }
- 
- TSLexeme *
- SharedNINormalizeWord(SharedIspellDict *Conf, char *word)
- {
- 	char	  **res;
- 	TSLexeme   *lcur = NULL,
- 			   *lres = NULL;
- 	uint16		NVariant = 1;
- 
- 	res = NormalizeSubWord(Conf, word, 0);
- 
- 	if (res)
- 	{
- 		char	  **ptr = res;
- 
- 		while (*ptr && (lcur - lres) < MAX_NORM)
- 		{
- 			addNorm(&lres, &lcur, *ptr, 0, NVariant++);
- 			ptr++;
- 		}
- 		pfree(res);
- 	}
- 
- 	if (Conf->usecompound)
- 	{
- 		int			wordlen = strlen(word);
- 		SplitVar   *ptr,
- 				   *var = SplitToVariants(Conf, NULL, NULL, word, wordlen, 0, -1);
- 		int			i;
- 
- 		while (var)
- 		{
- 			if (var->nstem > 1)
- 			{
- 				char	  **subres = NormalizeSubWord(Conf, var->stem[var->nstem - 1], FF_COMPOUNDLAST);
- 
- 				if (subres)
- 				{
- 					char	  **subptr = subres;
- 
- 					while (*subptr)
- 					{
- 						for (i = 0; i < var->nstem - 1; i++)
- 						{
- 							addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), 0, NVariant);
- 						}
- 
- 						addNorm(&lres, &lcur, *subptr, 0, NVariant);
- 						subptr++;
- 						NVariant++;
- 					}
- 
- 					pfree(subres);
- 					var->stem[0] = NULL;
- 					pfree(var->stem[var->nstem - 1]);
- 				}
- 			}
- 
- 			for (i = 0; i < var->nstem && var->stem[i]; i++)
- 				pfree(var->stem[i]);
- 			ptr = var->next;
- 			pfree(var->stem);
- 			pfree(var);
- 			var = ptr;
- 		}
- 	}
- 
- 	return lres;
- }
--- 0 ----
*** a/src/spell.h
--- /dev/null
***************
*** 1,71 ****
- /*-------------------------------------------------------------------------
-  *
-  * spell.h
-  *
-  * Declarations for ISpell dictionary
-  *
-  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
-  *
-  * src/include/tsearch/dicts/spell.h
-  *
-  *-------------------------------------------------------------------------
-  */
- 
- #ifndef __SHARED_SPELL_H__
- #define __SHARED_SPELL_H__
- 
- #include "regex/regex.h"
- #include "tsearch/dicts/regis.h"
- #include "tsearch/ts_public.h"
- #include "storage/lwlock.h"
- #include "tsearch/dicts/spell.h"
- 
- typedef struct SharedIspellDict
- {
- 	
- 	/* this is used for selecting the dictionary */
- 	char *	dictFile;
- 	char *  affixFile;
- 	
- 	int		nbytes;
- 	int		nwords;
- 	
- 	/* next dictionary in the chain (essentially a linked list) */
- 	struct SharedIspellDict * next;
- 	
- 	/* the copied fields */
- 	int			naffixes;
- 	AFFIX	   *Affix;
- 
- 	AffixNode  *Suffix;
- 	AffixNode  *Prefix;
- 
- 	SPNode	   *Dictionary;
- 	char	  **AffixData;		/* list of flags (characters) used in the dictionary */
- 	
- 	/* FIXME lenAffixData and nAffixData seems to be the same thing */
- 	int			lenAffixData;	/* length of the affix array */
- 	int			nAffixData;		/* number of affix data items */
- 
- 	CMPDAffix  * CompoundAffix;
- 
- 	unsigned char flagval[256];
- 	bool		usecompound;
- 	
- } SharedIspellDict;
- 
- typedef struct SharedStopList
- {
- 	
- 	char *  stopFile;
- 	
- 	int		nbytes;
- 	
- 	StopList list;
- 	struct SharedStopList * next;
- 	
- } SharedStopList;
- 
- TSLexeme *SharedNINormalizeWord(SharedIspellDict *Conf, char *word);
- 
- #endif

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] easy way of copying regex_t

Reply via email to