On 01/15/2011 07:41 PM, Andrew Dunstan wrote:


On 01/15/2011 12:29 PM, Andrew Dunstan wrote:

I've been waiting for the latest FDW patches as patiently as I can, and I've been reviewing them this morning, in particular the file_fdw patch and how it interacts with the newly exposed COPY API. Overall it seems to be a whole lot cleaner, and the wholesale duplication of the copy code is gone, so it's much nicer and cleaner. So now I'd like to add a new option to it: "textarray". This option would require that the foreign table have exactly one field, of type text[], and would compose all the field strings read from the file for each record into the array (however many there are). This would require a few changes to contrib/file_fdw/file_fdw.c and a few changes to src/backend/commands/copy.c, which I can probably have done in fairly short order, Deo Volente. This will allow something like:

   CREATE FOREIGN TABLE arr_text (
        t text[]
   )  SERVER file_server
   OPTIONS (format 'csv', filename '/path/to/ragged.csv', textarray
   'true');
   SELECT t[3]::int as a, t[1]::timestamptz as b, t[99] as not_there
   FROM arr_text;



A WIP patch is attached. It's against Shigeru Hanada's latest FDW patches. It's surprisingly tiny. Right now it probably leaks memory like a sieve, and that's the next thing I'm going to chase down.



Updated patch attached, that should use memory better.

cheers

andrew


*** a/contrib/file_fdw/file_fdw.c
--- b/contrib/file_fdw/file_fdw.c
***************
*** 58,67 **** static struct FileFdwOption valid_options[] = {
        { "quote",                      ForeignTableRelationId },
        { "escape",                     ForeignTableRelationId },
        { "null",                       ForeignTableRelationId },
  
        /* FIXME: implement force_not_null option */
  
!       /* Centinel */
        { NULL,                 InvalidOid }
  };
  
--- 58,68 ----
        { "quote",                      ForeignTableRelationId },
        { "escape",                     ForeignTableRelationId },
        { "null",                       ForeignTableRelationId },
+       { "textarray",          ForeignTableRelationId },
  
        /* FIXME: implement force_not_null option */
  
!       /* Sentinel */
        { NULL,                 InvalidOid }
  };
  
***************
*** 134,139 **** file_fdw_validator(PG_FUNCTION_ARGS)
--- 135,141 ----
        char       *escape = NULL;
        char       *null = NULL;
        bool            header;
+       bool        textarray;
  
        /* Only superuser can change generic options of the foreign table */
        if (catalog == ForeignTableRelationId && !superuser())
***************
*** 220,225 **** file_fdw_validator(PG_FUNCTION_ARGS)
--- 222,231 ----
                                                 errmsg("null representation 
cannot use newline or carriage return")));
                        null = strVal(def->arg);
                }
+               else if (strcmp(def->defname, "textarray") == 0)
+               {
+                       textarray = defGetBoolean(def);
+               }
        }
  
        /* Check options which depend on the file format. */
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
***************
*** 44,49 ****
--- 44,51 ----
  #include "utils/memutils.h"
  #include "utils/snapmgr.h"
  
+ /* initial size for arrays in textarray mode */
+ #define TEXTARRAY_SIZE 64
  
  #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
  #define OCTVALUE(c) ((c) - '0')
***************
*** 117,122 **** typedef struct CopyStateData
--- 119,127 ----
        bool       *force_quote_flags;          /* per-column CSV FQ flags */
        bool       *force_notnull_flags;        /* per-column CSV FNN flags */
  
+       /* param from FDW */
+       bool       text_array;      /* scan to a single text array field */
+ 
        /* these are just for error messages, see CopyFromErrorCallback */
        const char *cur_relname;        /* table name for error messages */
        int                     cur_lineno;             /* line number for 
error messages */
***************
*** 970,975 **** BeginCopy(bool is_from,
--- 975,988 ----
                                                 errmsg("argument to option 
\"%s\" must be a list of column names",
                                                                
defel->defname)));
                }
+               else if (strcmp(defel->defname, "textarray") == 0)
+               {
+                       if (cstate->text_array)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or 
redundant options")));
+                       cstate->text_array = defGetBoolean(defel);
+               }
                else
                        ereport(ERROR,
                                        (errcode(ERRCODE_SYNTAX_ERROR),
***************
*** 1109,1114 **** BeginCopy(bool is_from,
--- 1122,1157 ----
                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                 errmsg("CSV quote character must not appear in 
the NULL specification")));
  
+       /*
+        * file_fdw can use textarray mode, and as of the time of writing 
should not
+        * be able to cause any of these next errors. The checks are here more 
for
+        * the sake of future-proofing, and as protection in case a third party
+        * module uses the COPY API.
+        */
+ 
+       /* check textarray  mode */
+       if (cstate->text_array && !is_from)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                         errmsg("textarray only available in read mode")));
+ 
+       /* textarray is only allowed in text and CSV modes */
+       if (cstate->text_array && cstate->binary)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                         errmsg("textarray not available in binary mode")));
+ 
+       /* 
+        * textarray can't operate with force_not_null - it will not return
+        * a null in any case, although the array it returns might contain 
nulls,
+        * which it will be up to the user to deal with.
+        */
+ 
+       if (cstate->text_array && force_notnull != NIL)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                         errmsg("force not null not available with 
textarray")));
+ 
        if (rel)
        {
                Assert(!raw_query);
***************
*** 1201,1206 **** BeginCopy(bool is_from,
--- 1244,1262 ----
  
        num_phys_attrs = tupDesc->natts;
  
+       /* make sure rel has the right shape for textarray */
+       if (cstate->text_array)
+       {
+               if (num_phys_attrs != 1)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("too many  columns for 
textarray")));
+               if (tupDesc->attrs[0]->atttypid != TEXTARRAYOID)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("target column must be of type 
text[] for textarray")));                                
+       }
+ 
        /* Convert FORCE QUOTE name list to per-column flags, check validity */
        cstate->force_quote_flags = (bool *) palloc0(num_phys_attrs * 
sizeof(bool));
        if (force_quote_all)
***************
*** 2061,2068 **** BeginCopyFrom(Relation rel,
        num_defaults = 0;
  
        /* allocate arrays to avoid per-tuple overhead. */
!       cstate->values = palloc(sizeof(Datum) * tupDesc->natts);
!       cstate->nulls = palloc(sizeof(bool) * tupDesc->natts);
  
        /*
         * Pick up the required catalog information for each attribute in the
--- 2117,2132 ----
        num_defaults = 0;
  
        /* allocate arrays to avoid per-tuple overhead. */
!       if (!cstate->text_array)
!       {
!               cstate->values = (Datum *) palloc(num_phys_attrs * 
sizeof(Datum));
!               cstate->nulls = (bool *) palloc(num_phys_attrs * sizeof(bool));
!       }
!       else
!       {
!               cstate->values = (Datum *) palloc(TEXTARRAY_SIZE * 
sizeof(Datum));
!               cstate->nulls = (bool *) palloc(TEXTARRAY_SIZE * sizeof(bool));
!       }
  
        /*
         * Pick up the required catalog information for each attribute in the
***************
*** 2194,2200 **** BeginCopyFrom(Relation rel,
        }
  
        /* create workspace for CopyReadAttributes results */
!       if (!cstate->binary)
        {
                AttrNumber      attr_count = list_length(cstate->attnumlist);
                int     nfields = cstate->file_has_oids ? (attr_count + 1) : 
attr_count;
--- 2258,2269 ----
        }
  
        /* create workspace for CopyReadAttributes results */
!       if (cstate->text_array)
!       {
!               cstate->max_fields = TEXTARRAY_SIZE;
!               cstate->raw_fields = (char **) palloc(TEXTARRAY_SIZE * 
sizeof(char *));
!       }
!       else if (!cstate->binary)
        {
                AttrNumber      attr_count = list_length(cstate->attnumlist);
                int     nfields = cstate->file_has_oids ? (attr_count + 1) : 
attr_count;
***************
*** 2283,2289 **** NextCopyFrom(CopyState cstate)
                                fldct = CopyReadAttributesText(cstate);
  
                        /* check for overflowing fields */
!                       if (nfields > 0 && fldct > nfields)
                                ereport(ERROR,
                                                
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                                                 errmsg("extra data after last 
expected column")));
--- 2352,2358 ----
                                fldct = CopyReadAttributesText(cstate);
  
                        /* check for overflowing fields */
!                       if (nfields > 0 && fldct > nfields && 
!cstate->text_array)
                                ereport(ERROR,
                                                
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                                                 errmsg("extra data after last 
expected column")));
***************
*** 2319,2324 **** NextCopyFrom(CopyState cstate)
--- 2388,2447 ----
                                }
                        }
  
+                       if (cstate->text_array)
+                       {
+                               int        dims[1];
+                               int        lbs[1];
+                               int        fld;
+                               
+                               /* Treat an empty line as having no fields */
+                               if (fldct == 1 && 
+                                       field_strings[0] == NULL && 
+                                       cstate->null_print_len == 0)
+                               {
+                                       fldct = 0;
+                               }
+ 
+                               dims[0] = fldct;
+                               lbs[0] = 1; /* sql arrays typically start at 1 
*/
+ 
+                               /*
+                                * Use the values and nulls in cstate as 
workspace to create the array,
+                                * before storing the result back in the samne 
arrays.
+                                */
+ 
+                               
+                               for (fld=0; fld < fldct; fld++)
+                               {
+                                       if (field_strings[fld] == NULL)
+                                       {
+                                               cstate->nulls[fld] = true;
+                                       }
+                                       else
+                                       {
+                                               cstate->nulls[fld] = false;
+                                               cstate->values[fld] = 
+                                                       
DirectFunctionCall1(textin, 
+                                                                               
                PointerGetDatum(field_strings[fld]));
+                                       }
+                               }
+ 
+                               cstate->values[0] =  
PointerGetDatum(construct_md_array(
+                                                                               
                                 cstate->values,
+                                                                               
                                 cstate->nulls,
+                                                                               
                                 1,
+                                                                               
                                 dims,
+                                                                               
                                 lbs,
+                                                                               
                                 TEXTOID,
+                                                                               
                                 -1,
+                                                                               
                                 false,
+                                                                               
                                 'i'));
+                               cstate->nulls[0] = false;
+                               cstate->cur_attname = NULL;
+                               cstate->cur_attval = NULL;
+                       }
+                       else
+                       {
                        /* Loop to read the user attributes on the line. */
                        foreach(cur, cstate->attnumlist)
                        {
***************
*** 2350,2357 **** NextCopyFrom(CopyState cstate)
                                cstate->cur_attname = NULL;
                                cstate->cur_attval = NULL;
                        }
  
!                       Assert(fieldno == nfields);
                }
                else
                {
--- 2473,2481 ----
                                cstate->cur_attname = NULL;
                                cstate->cur_attval = NULL;
                        }
+                       }
  
!                       Assert(cstate->text_array || fieldno == nfields);
                }
                else
                {
***************
*** 3011,3016 **** CopyReadAttributesText(CopyState cstate)
--- 3135,3147 ----
                        cstate->max_fields *= 2;
                        cstate->raw_fields = 
                                repalloc(cstate->raw_fields, 
cstate->max_fields*sizeof(char *));
+                       if (cstate->text_array)
+                       {
+                               cstate->values = repalloc(cstate->values, 
+                                                                               
  cstate->max_fields*sizeof(Datum));
+                               cstate->nulls = repalloc(cstate->nulls, 
+                                                                               
  cstate->max_fields*sizeof(bool));
+                       }
                }
  
                /* Remember start of field on both input and output sides */
***************
*** 3228,3233 **** CopyReadAttributesCSV(CopyState cstate)
--- 3359,3371 ----
                        cstate->max_fields *= 2;
                        cstate->raw_fields = 
                                repalloc(cstate->raw_fields, 
cstate->max_fields*sizeof(char *));
+                       if (cstate->text_array)
+                       {
+                               cstate->values = repalloc(cstate->values, 
+                                                                               
  cstate->max_fields*sizeof(Datum));
+                               cstate->nulls = repalloc(cstate->nulls, 
+                                                                               
  cstate->max_fields*sizeof(bool));
+                       }
                }
  
                /* Remember start of field on both input and output sides */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to