On 01/15/2011 12:29 PM, Andrew Dunstan wrote:

I've been waiting for the latest FDW patches as patiently as I can, and I've been reviewing them this morning, in particular the file_fdw patch and how it interacts with the newly exposed COPY API. Overall it seems to be a whole lot cleaner, and the wholesale duplication of the copy code is gone, so it's much nicer and cleaner. So now I'd like to add a new option to it: "textarray". This option would require that the foreign table have exactly one field, of type text[], and would compose all the field strings read from the file for each record into the array (however many there are). This would require a few changes to contrib/file_fdw/file_fdw.c and a few changes to src/backend/commands/copy.c, which I can probably have done in fairly short order, Deo Volente. This will allow something like:

   CREATE FOREIGN TABLE arr_text (
        t text[]
   )  SERVER file_server
   OPTIONS (format 'csv', filename '/path/to/ragged.csv', textarray
   'true');
   SELECT t[3]::int as a, t[1]::timestamptz as b, t[99] as not_there
   FROM arr_text;



A WIP patch is attached. It's against Shigeru Hanada's latest FDW patches. It's surprisingly tiny. Right now it probably leaks memory like a sieve, and that's the next thing I'm going to chase down.

cheers

andrew
*** a/contrib/file_fdw/file_fdw.c
--- b/contrib/file_fdw/file_fdw.c
***************
*** 58,67 **** static struct FileFdwOption valid_options[] = {
        { "quote",                      ForeignTableRelationId },
        { "escape",                     ForeignTableRelationId },
        { "null",                       ForeignTableRelationId },
  
        /* FIXME: implement force_not_null option */
  
!       /* Centinel */
        { NULL,                 InvalidOid }
  };
  
--- 58,68 ----
        { "quote",                      ForeignTableRelationId },
        { "escape",                     ForeignTableRelationId },
        { "null",                       ForeignTableRelationId },
+       { "textarray",          ForeignTableRelationId },
  
        /* FIXME: implement force_not_null option */
  
!       /* Sentinel */
        { NULL,                 InvalidOid }
  };
  
***************
*** 134,139 **** file_fdw_validator(PG_FUNCTION_ARGS)
--- 135,141 ----
        char       *escape = NULL;
        char       *null = NULL;
        bool            header;
+       bool        textarray;
  
        /* Only superuser can change generic options of the foreign table */
        if (catalog == ForeignTableRelationId && !superuser())
***************
*** 220,225 **** file_fdw_validator(PG_FUNCTION_ARGS)
--- 222,231 ----
                                                 errmsg("null representation 
cannot use newline or carriage return")));
                        null = strVal(def->arg);
                }
+               else if (strcmp(def->defname, "textarray") == 0)
+               {
+                       textarray = defGetBoolean(def);
+               }
        }
  
        /* Check options which depend on the file format. */
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
***************
*** 117,122 **** typedef struct CopyStateData
--- 117,125 ----
        bool       *force_quote_flags;          /* per-column CSV FQ flags */
        bool       *force_notnull_flags;        /* per-column CSV FNN flags */
  
+       /* param from FDW */
+       bool       text_array;      /* scan to a single text array field */
+ 
        /* these are just for error messages, see CopyFromErrorCallback */
        const char *cur_relname;        /* table name for error messages */
        int                     cur_lineno;             /* line number for 
error messages */
***************
*** 970,975 **** BeginCopy(bool is_from,
--- 973,986 ----
                                                 errmsg("argument to option 
\"%s\" must be a list of column names",
                                                                
defel->defname)));
                }
+               else if (strcmp(defel->defname, "textarray") == 0)
+               {
+                       if (cstate->text_array)
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("conflicting or 
redundant options")));
+                       cstate->text_array = defGetBoolean(defel);
+               }
                else
                        ereport(ERROR,
                                        (errcode(ERRCODE_SYNTAX_ERROR),
***************
*** 1109,1114 **** BeginCopy(bool is_from,
--- 1120,1131 ----
                                (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                 errmsg("CSV quote character must not appear in 
the NULL specification")));
  
+       /* check textarray */
+       if (cstate->text_array && !is_from)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                         errmsg("textarray only available in read mode")));
+ 
        if (rel)
        {
                Assert(!raw_query);
***************
*** 1201,1206 **** BeginCopy(bool is_from,
--- 1218,1236 ----
  
        num_phys_attrs = tupDesc->natts;
  
+       /* make sure rel has the right shape for textarray */
+       if (cstate->text_array)
+       {
+               if (num_phys_attrs != 1)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("too many  columns for 
textarray")));
+               if (tupDesc->attrs[0]->atttypid != TEXTARRAYOID)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                        errmsg("target column must be of type 
text[] for textarray")));                                
+       }
+ 
        /* Convert FORCE QUOTE name list to per-column flags, check validity */
        cstate->force_quote_flags = (bool *) palloc0(num_phys_attrs * 
sizeof(bool));
        if (force_quote_all)
***************
*** 2283,2289 **** NextCopyFrom(CopyState cstate)
                                fldct = CopyReadAttributesText(cstate);
  
                        /* check for overflowing fields */
!                       if (nfields > 0 && fldct > nfields)
                                ereport(ERROR,
                                                
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                                                 errmsg("extra data after last 
expected column")));
--- 2313,2319 ----
                                fldct = CopyReadAttributesText(cstate);
  
                        /* check for overflowing fields */
!                       if (nfields > 0 && fldct > nfields && 
!cstate->text_array)
                                ereport(ERROR,
                                                
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
                                                 errmsg("extra data after last 
expected column")));
***************
*** 2319,2324 **** NextCopyFrom(CopyState cstate)
--- 2349,2401 ----
                                }
                        }
  
+                       if (cstate->text_array)
+                       {
+                               Datum      *textvals;
+                               bool       *nulls;
+                               int        dims[1];
+                               int        lbs[1];
+                               int        fld;
+                               
+                               textvals = palloc(fldct * sizeof(Datum));
+                               nulls = palloc(fldct * sizeof(bool));
+ 
+                               /* Treat an empty line as having no fields */
+                               if (fldct == 1 && field_strings[fld] == NULL && 
strlen(cstate->nulltext) == 0)
+                                       fldct = 0;
+ 
+                               dims[0] = fldct;
+                               lbs[0] = 1; /* sql arrays typically start at 1 
*/
+ 
+                               
+                               for (fld=0; fld < fldct; fld++)
+                               {
+                                       if (field_strings[fld] == NULL)
+                                       {
+                                               nulls[fld] = true;
+                                       }
+                                       else
+                                       {
+                                               nulls[fld] = false;
+                                               textvals[fld] = 
DirectFunctionCall1(textin, PointerGetDatum(field_strings[fld]));
+                                       }
+                               }
+                               
+                               cstate->values[0] = construct_md_array(textvals,
+                                                                               
        nulls,
+                                                                               
        1,
+                                                                               
        dims,
+                                                                               
        lbs,
+                                                                               
        TEXTOID,-
+                                                                               
        1,
+                                                                               
        false,
+                                                                               
        'i');
+                               cstate->nulls[0] = false;
+                               cstate->cur_attname = NULL;
+                               cstate->cur_attval = NULL;
+                       }
+                       else
+                       {
                        /* Loop to read the user attributes on the line. */
                        foreach(cur, cstate->attnumlist)
                        {
***************
*** 2350,2357 **** NextCopyFrom(CopyState cstate)
                                cstate->cur_attname = NULL;
                                cstate->cur_attval = NULL;
                        }
  
!                       Assert(fieldno == nfields);
                }
                else
                {
--- 2427,2435 ----
                                cstate->cur_attname = NULL;
                                cstate->cur_attval = NULL;
                        }
+                       }
  
!                       Assert(cstate->text_array || fieldno == nfields);
                }
                else
                {
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to