On 01/15/2011 07:41 PM, Andrew Dunstan wrote:
On 01/15/2011 12:29 PM, Andrew Dunstan wrote:
I've been waiting for the latest FDW patches as patiently as I can,
and I've been reviewing them this morning, in particular the file_fdw
patch and how it interacts with the newly exposed COPY API. Overall
it seems to be a whole lot cleaner, and the wholesale duplication of
the copy code is gone, so it's much nicer and cleaner. So now I'd
like to add a new option to it: "textarray". This option would
require that the foreign table have exactly one field, of type
text[], and would compose all the field strings read from the file
for each record into the array (however many there are). This would
require a few changes to contrib/file_fdw/file_fdw.c and a few
changes to src/backend/commands/copy.c, which I can probably have
done in fairly short order, Deo Volente. This will allow something like:
CREATE FOREIGN TABLE arr_text (
t text[]
) SERVER file_server
OPTIONS (format 'csv', filename '/path/to/ragged.csv', textarray
'true');
SELECT t[3]::int as a, t[1]::timestamptz as b, t[99] as not_there
FROM arr_text;
A WIP patch is attached. It's against Shigeru Hanada's latest FDW
patches. It's surprisingly tiny. Right now it probably leaks memory
like a sieve, and that's the next thing I'm going to chase down.
Updated patch attached, that should use memory better.
cheers
andrew
*** a/contrib/file_fdw/file_fdw.c
--- b/contrib/file_fdw/file_fdw.c
***************
*** 58,67 **** static struct FileFdwOption valid_options[] = {
{ "quote", ForeignTableRelationId },
{ "escape", ForeignTableRelationId },
{ "null", ForeignTableRelationId },
/* FIXME: implement force_not_null option */
! /* Centinel */
{ NULL, InvalidOid }
};
--- 58,68 ----
{ "quote", ForeignTableRelationId },
{ "escape", ForeignTableRelationId },
{ "null", ForeignTableRelationId },
+ { "textarray", ForeignTableRelationId },
/* FIXME: implement force_not_null option */
! /* Sentinel */
{ NULL, InvalidOid }
};
***************
*** 134,139 **** file_fdw_validator(PG_FUNCTION_ARGS)
--- 135,141 ----
char *escape = NULL;
char *null = NULL;
bool header;
+ bool textarray;
/* Only superuser can change generic options of the foreign table */
if (catalog == ForeignTableRelationId && !superuser())
***************
*** 220,225 **** file_fdw_validator(PG_FUNCTION_ARGS)
--- 222,231 ----
errmsg("null representation
cannot use newline or carriage return")));
null = strVal(def->arg);
}
+ else if (strcmp(def->defname, "textarray") == 0)
+ {
+ textarray = defGetBoolean(def);
+ }
}
/* Check options which depend on the file format. */
*** a/src/backend/commands/copy.c
--- b/src/backend/commands/copy.c
***************
*** 44,49 ****
--- 44,51 ----
#include "utils/memutils.h"
#include "utils/snapmgr.h"
+ /* initial size for arrays in textarray mode */
+ #define TEXTARRAY_SIZE 64
#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
#define OCTVALUE(c) ((c) - '0')
***************
*** 117,122 **** typedef struct CopyStateData
--- 119,127 ----
bool *force_quote_flags; /* per-column CSV FQ flags */
bool *force_notnull_flags; /* per-column CSV FNN flags */
+ /* param from FDW */
+ bool text_array; /* scan to a single text array field */
+
/* these are just for error messages, see CopyFromErrorCallback */
const char *cur_relname; /* table name for error messages */
int cur_lineno; /* line number for
error messages */
***************
*** 970,975 **** BeginCopy(bool is_from,
--- 975,988 ----
errmsg("argument to option
\"%s\" must be a list of column names",
defel->defname)));
}
+ else if (strcmp(defel->defname, "textarray") == 0)
+ {
+ if (cstate->text_array)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or
redundant options")));
+ cstate->text_array = defGetBoolean(defel);
+ }
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
***************
*** 1109,1114 **** BeginCopy(bool is_from,
--- 1122,1157 ----
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("CSV quote character must not appear in
the NULL specification")));
+ /*
+ * file_fdw can use textarray mode, and as of the time of writing
should not
+ * be able to cause any of these next errors. The checks are here more
for
+ * the sake of future-proofing, and as protection in case a third party
+ * module uses the COPY API.
+ */
+
+ /* check textarray mode */
+ if (cstate->text_array && !is_from)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("textarray only available in read mode")));
+
+ /* textarray is only allowed in text and CSV modes */
+ if (cstate->text_array && cstate->binary)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("textarray not available in binary mode")));
+
+ /*
+ * textarray can't operate with force_not_null - it will not return
+ * a null in any case, although the array it returns might contain
nulls,
+ * which it will be up to the user to deal with.
+ */
+
+ if (cstate->text_array && force_notnull != NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("force not null not available with
textarray")));
+
if (rel)
{
Assert(!raw_query);
***************
*** 1201,1206 **** BeginCopy(bool is_from,
--- 1244,1262 ----
num_phys_attrs = tupDesc->natts;
+ /* make sure rel has the right shape for textarray */
+ if (cstate->text_array)
+ {
+ if (num_phys_attrs != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("too many columns for
textarray")));
+ if (tupDesc->attrs[0]->atttypid != TEXTARRAYOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("target column must be of type
text[] for textarray")));
+ }
+
/* Convert FORCE QUOTE name list to per-column flags, check validity */
cstate->force_quote_flags = (bool *) palloc0(num_phys_attrs *
sizeof(bool));
if (force_quote_all)
***************
*** 2061,2068 **** BeginCopyFrom(Relation rel,
num_defaults = 0;
/* allocate arrays to avoid per-tuple overhead. */
! cstate->values = palloc(sizeof(Datum) * tupDesc->natts);
! cstate->nulls = palloc(sizeof(bool) * tupDesc->natts);
/*
* Pick up the required catalog information for each attribute in the
--- 2117,2132 ----
num_defaults = 0;
/* allocate arrays to avoid per-tuple overhead. */
! if (!cstate->text_array)
! {
! cstate->values = (Datum *) palloc(num_phys_attrs *
sizeof(Datum));
! cstate->nulls = (bool *) palloc(num_phys_attrs * sizeof(bool));
! }
! else
! {
! cstate->values = (Datum *) palloc(TEXTARRAY_SIZE *
sizeof(Datum));
! cstate->nulls = (bool *) palloc(TEXTARRAY_SIZE * sizeof(bool));
! }
/*
* Pick up the required catalog information for each attribute in the
***************
*** 2194,2200 **** BeginCopyFrom(Relation rel,
}
/* create workspace for CopyReadAttributes results */
! if (!cstate->binary)
{
AttrNumber attr_count = list_length(cstate->attnumlist);
int nfields = cstate->file_has_oids ? (attr_count + 1) :
attr_count;
--- 2258,2269 ----
}
/* create workspace for CopyReadAttributes results */
! if (cstate->text_array)
! {
! cstate->max_fields = TEXTARRAY_SIZE;
! cstate->raw_fields = (char **) palloc(TEXTARRAY_SIZE *
sizeof(char *));
! }
! else if (!cstate->binary)
{
AttrNumber attr_count = list_length(cstate->attnumlist);
int nfields = cstate->file_has_oids ? (attr_count + 1) :
attr_count;
***************
*** 2283,2289 **** NextCopyFrom(CopyState cstate)
fldct = CopyReadAttributesText(cstate);
/* check for overflowing fields */
! if (nfields > 0 && fldct > nfields)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("extra data after last
expected column")));
--- 2352,2358 ----
fldct = CopyReadAttributesText(cstate);
/* check for overflowing fields */
! if (nfields > 0 && fldct > nfields &&
!cstate->text_array)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("extra data after last
expected column")));
***************
*** 2319,2324 **** NextCopyFrom(CopyState cstate)
--- 2388,2447 ----
}
}
+ if (cstate->text_array)
+ {
+ int dims[1];
+ int lbs[1];
+ int fld;
+
+ /* Treat an empty line as having no fields */
+ if (fldct == 1 &&
+ field_strings[0] == NULL &&
+ cstate->null_print_len == 0)
+ {
+ fldct = 0;
+ }
+
+ dims[0] = fldct;
+ lbs[0] = 1; /* sql arrays typically start at 1
*/
+
+ /*
+ * Use the values and nulls in cstate as
workspace to create the array,
+ * before storing the result back in the samne
arrays.
+ */
+
+
+ for (fld=0; fld < fldct; fld++)
+ {
+ if (field_strings[fld] == NULL)
+ {
+ cstate->nulls[fld] = true;
+ }
+ else
+ {
+ cstate->nulls[fld] = false;
+ cstate->values[fld] =
+
DirectFunctionCall1(textin,
+
PointerGetDatum(field_strings[fld]));
+ }
+ }
+
+ cstate->values[0] =
PointerGetDatum(construct_md_array(
+
cstate->values,
+
cstate->nulls,
+
1,
+
dims,
+
lbs,
+
TEXTOID,
+
-1,
+
false,
+
'i'));
+ cstate->nulls[0] = false;
+ cstate->cur_attname = NULL;
+ cstate->cur_attval = NULL;
+ }
+ else
+ {
/* Loop to read the user attributes on the line. */
foreach(cur, cstate->attnumlist)
{
***************
*** 2350,2357 **** NextCopyFrom(CopyState cstate)
cstate->cur_attname = NULL;
cstate->cur_attval = NULL;
}
! Assert(fieldno == nfields);
}
else
{
--- 2473,2481 ----
cstate->cur_attname = NULL;
cstate->cur_attval = NULL;
}
+ }
! Assert(cstate->text_array || fieldno == nfields);
}
else
{
***************
*** 3011,3016 **** CopyReadAttributesText(CopyState cstate)
--- 3135,3147 ----
cstate->max_fields *= 2;
cstate->raw_fields =
repalloc(cstate->raw_fields,
cstate->max_fields*sizeof(char *));
+ if (cstate->text_array)
+ {
+ cstate->values = repalloc(cstate->values,
+
cstate->max_fields*sizeof(Datum));
+ cstate->nulls = repalloc(cstate->nulls,
+
cstate->max_fields*sizeof(bool));
+ }
}
/* Remember start of field on both input and output sides */
***************
*** 3228,3233 **** CopyReadAttributesCSV(CopyState cstate)
--- 3359,3371 ----
cstate->max_fields *= 2;
cstate->raw_fields =
repalloc(cstate->raw_fields,
cstate->max_fields*sizeof(char *));
+ if (cstate->text_array)
+ {
+ cstate->values = repalloc(cstate->values,
+
cstate->max_fields*sizeof(Datum));
+ cstate->nulls = repalloc(cstate->nulls,
+
cstate->max_fields*sizeof(bool));
+ }
}
/* Remember start of field on both input and output sides */
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers