On 02/07/2011 11:34 AM, Andrew Dunstan wrote:
On 02/04/2011 05:49 AM, Itagaki Takahiro wrote:
Here is a demonstration to support jagged input files. It's a patch
on the latest patch. The new added API is:
bool NextLineCopyFrom(
[IN] CopyState cstate,
[OUT] char ***fields, [OUT] int *nfields, [OUT] Oid *tupleOid)
It just returns separated fields in the next line. Fortunately, I need
no extra code for it because it is just extracted from NextCopyFrom().
I'm willing to include the change into copy APIs,
but we still have a few issues. See below.
I've looked at this, and I think it will do what I want. I haven't had
time to play with it, although I hope to soon. AIUI, it basically
hands back the raw parsed strings to the user, who then has the
responsibility of constructing Datum and Nulls arrays to form the
tuple. That should be all I need. So +1 from me for including it. In
fact, +10. And many thanks.
I think we need a better name though. NextCopyFromRawFields maybe.
Here is a patch against the latest revision of file_fdw to exercise this
API. It includes some regression tests, and I think apart from one or
two small details plus a requirement for documentation, is complete.
This work is also published at
<https://github.com/adunstan/postgresql-dev/tree/sqlmed3>
Here's an excerpt from the regression tests:
CREATE FOREIGN TABLE jagged_text (
t text[]
) SERVER file_server
OPTIONS (format 'csv', filename
'/home/andrew/pgl/pg_head/contrib/file_fdw/data/jagged.csv', header
'true', textarray 'true');
SELECT * FROM jagged_text;
t
--------------------------------------------
{"one field"}
{two,fields}
{three,NULL,"fields of which one is null"}
{"next line has no fields"}
{}
(5 rows)
SELECT t[3] AS a, t[1] AS b, t[99] AS c FROM jagged_text;
a | b | c
-----------------------------+-------------------------+---
| one field |
| two |
fields of which one is null | three |
| next line has no fields |
| |
(5 rows)
Overall, this API works quite nicely, and does exactly what I want, so
again many thanks.
cheers
andrew
*** /dev/null
--- b/contrib/file_fdw/data/jagged.csv
***************
*** 0 ****
--- 1,6 ----
+ header for a file with varying numbers of fields
+ one field
+ two,fields
+ three,,fields of which one is null
+ next line has no fields
+
*** a/contrib/file_fdw/file_fdw.c
--- b/contrib/file_fdw/file_fdw.c
***************
*** 13,18 ****
--- 13,22 ----
#include "postgres.h"
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <unistd.h>
+
#include "access/reloptions.h"
#include "catalog/pg_foreign_table.h"
#include "catalog/pg_foreign_server.h"
***************
*** 26,31 ****
--- 30,36 ----
#include "optimizer/cost.h"
#include "parser/parsetree.h"
#include "storage/fd.h"
+ #include "utils/array.h"
#include "utils/builtins.h"
PG_MODULE_MAGIC;
***************
*** 61,78 **** static struct FileFdwOption valid_options[] = {
/* FIXME: implement force_not_null option */
! /* Centinel */
{ NULL, InvalidOid }
};
/*
* FDW-specific information for FdwExecutionState.
*/
typedef struct FileFdwPrivate {
char *filename;
Relation rel; /* scan target relation */
! CopyState cstate; /* state of reaind file */
List *options; /* merged generic options, excluding filename */
} FileFdwPrivate;
/*
--- 66,92 ----
/* FIXME: implement force_not_null option */
! /* Local option */
! { "textarray", ForeignTableRelationId },
!
! /* Sentinel */
{ NULL, InvalidOid }
};
+ #define FILE_FDW_TEXTARRAY_STASH_INIT 64
/*
* FDW-specific information for FdwExecutionState.
*/
typedef struct FileFdwPrivate {
char *filename;
+ bool textarray; /* make a text array rather than a tuple */
Relation rel; /* scan target relation */
! CopyState cstate; /* state of read in file */
List *options; /* merged generic options, excluding filename */
+ /* stash for processing text arrays - not used otherwise */
+ int text_array_stash_size;
+ Datum *text_array_values;
+ bool *text_array_nulls;
} FileFdwPrivate;
/*
***************
*** 91,96 **** static void fileIterate(FdwExecutionState *festate, TupleTableSlot *slot);
--- 105,113 ----
static void fileEndScan(FdwExecutionState *festate);
static void fileReScan(FdwExecutionState *festate);
+ /* text array support */
+ static void makeTextArray(FileFdwPrivate *fdw_private,
+ TupleTableSlot *slot, char **raw_fields, int nfields);
/*
* Helper functions
*/
***************
*** 142,148 **** file_fdw_validator(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("only superuser can change foreign table options")));
! /* Vaidate each options */
foreach(cell, options_list)
{
DefElem *def = lfirst(cell);
--- 159,165 ----
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("only superuser can change foreign table options")));
! /* Validate each options */
foreach(cell, options_list)
{
DefElem *def = lfirst(cell);
***************
*** 298,303 **** filePlanRelScan(Oid foreigntableid, PlannerInfo *root, RelOptInfo *rel)
--- 315,322 ----
{
Const *relid;
Value *filename = NULL;
+ bool textarray = false;
+ Const *textarray_param;
ulong size;
FdwPlan *fplan;
ForeignTable *table;
***************
*** 346,351 **** filePlanRelScan(Oid foreigntableid, PlannerInfo *root, RelOptInfo *rel)
--- 365,394 ----
(errcode(ERRCODE_FDW_UNABLE_TO_CREATE_REPLY),
errmsg("filename is required for file_fdw scan")));
+ /*
+ * Split textarray option off from the list because it's handled
+ * here instead of being passed as another parameter to BeginCopyFrom().
+ */
+ prev = NULL;
+ foreach (lc, options)
+ {
+ DefElem *def = lfirst(lc);
+ if (strcmp(def->defname, "textarray") == 0)
+ {
+ textarray = defGetBoolean(def);
+ options = list_delete_cell(options, lc, prev);
+ break;
+ }
+ prev = lc;
+ }
+ textarray_param = (Const *) makeBoolConst(textarray,false);
+
+ if (textarray)
+ {
+ /* make sure the table has one column and it's oy type text[] */
+ /* XXX fill in this piece */
+ }
+
/* Construct FdwPlan and store relid and options in private area */
fplan = makeNode(FdwPlan);
size = estimate_costs(strVal(filename), rel,
***************
*** 354,359 **** filePlanRelScan(Oid foreigntableid, PlannerInfo *root, RelOptInfo *rel)
--- 397,403 ----
fplan->fdw_private = NIL;
fplan->fdw_private = lappend(fplan->fdw_private, relid);
fplan->fdw_private = lappend(fplan->fdw_private, filename);
+ fplan->fdw_private = lappend(fplan->fdw_private, textarray_param);
fplan->fdw_private = lappend(fplan->fdw_private, options);
return fplan;
***************
*** 374,379 **** fileBeginScan(FdwPlan *fplan, ParamListInfo params)
--- 418,424 ----
Const *relid_const;
Oid relid;
Value *filename;
+ Const *textarray;
List *options;
Relation rel;
CopyState cstate;
***************
*** 385,391 **** fileBeginScan(FdwPlan *fplan, ParamListInfo params)
/* Get oid of the relation and option list from private area of FdwPlan. */
relid_const = list_nth(fplan->fdw_private, 0);
filename = list_nth(fplan->fdw_private, 1);
! options = list_nth(fplan->fdw_private, 2);
relid = DatumGetObjectId(relid_const->constvalue);
--- 430,437 ----
/* Get oid of the relation and option list from private area of FdwPlan. */
relid_const = list_nth(fplan->fdw_private, 0);
filename = list_nth(fplan->fdw_private, 1);
! textarray = list_nth(fplan->fdw_private, 2);
! options = list_nth(fplan->fdw_private, 3);
relid = DatumGetObjectId(relid_const->constvalue);
***************
*** 405,415 **** fileBeginScan(FdwPlan *fplan, ParamListInfo params)
--- 451,471 ----
festate = palloc0(sizeof(FdwExecutionState));
fdw_private = palloc0(sizeof(FileFdwPrivate));
fdw_private->filename = strVal(filename);
+ fdw_private->textarray = textarray->constvalue;
fdw_private->rel = rel;
fdw_private->cstate = cstate;
fdw_private->options = options;
festate->fdw_private = (void *) fdw_private;
+ if (fdw_private->textarray)
+ {
+ fdw_private->text_array_stash_size = FILE_FDW_TEXTARRAY_STASH_INIT;
+ fdw_private->text_array_values =
+ palloc(FILE_FDW_TEXTARRAY_STASH_INIT * sizeof(Datum));
+ fdw_private->text_array_nulls =
+ palloc(FILE_FDW_TEXTARRAY_STASH_INIT * sizeof(bool));
+ }
+
return festate;
}
***************
*** 438,445 **** fileIterate(FdwExecutionState *festate, TupleTableSlot *slot)
* EOF.
*/
ExecClearTuple(slot);
! found = NextCopyFrom(fdw_private->cstate, slot->tts_values, slot->tts_isnull,
! NULL);
if (found)
ExecStoreVirtualTuple(slot);
--- 494,515 ----
* EOF.
*/
ExecClearTuple(slot);
! if (fdw_private->textarray)
! {
! char **raw_fields;
! int nfields;
!
! found = NextLineCopyFrom(fdw_private->cstate, &raw_fields, &nfields,
! NULL);
! if (found)
! makeTextArray(fdw_private, slot, raw_fields, nfields);
! }
! else
! {
! /* let the COPY code do the work */
! found = NextCopyFrom(fdw_private->cstate, slot->tts_values,
! slot->tts_isnull, NULL);
! }
if (found)
ExecStoreVirtualTuple(slot);
***************
*** 546,548 **** estimate_costs(const char *filename, RelOptInfo *baserel,
--- 616,691 ----
return stat_buf.st_size;
}
+ static void
+ makeTextArray(FileFdwPrivate *fdw_private, TupleTableSlot *slot, char **raw_fields, int nfields)
+ {
+ Datum *values;
+ bool *nulls;
+ int dims[1];
+ int lbs[1];
+ int fld;
+ Datum result;
+ int fldct = nfields;
+ char *string;
+
+ if (nfields == 1 &&
+ raw_fields[0] == NULL
+ /* fixme - probably need to get this from fdw_private */
+ /* && cstate->null_print_len == 0 */
+ )
+ {
+ /* Treat an empty line as having no fields */
+ fldct = 0;
+ }
+ else if (nfields > fdw_private->text_array_stash_size)
+ {
+ while (fdw_private->text_array_stash_size < nfields)
+ fdw_private->text_array_stash_size *= 2;
+
+ fdw_private->text_array_values =repalloc(
+ fdw_private->text_array_values,
+ fdw_private->text_array_stash_size * sizeof(Datum));
+ fdw_private->text_array_nulls =repalloc(
+ fdw_private->text_array_nulls,
+ fdw_private->text_array_stash_size * sizeof(bool));
+ }
+
+ values = fdw_private->text_array_values;
+ nulls = fdw_private->text_array_nulls;
+
+ dims[0] = fldct;
+ lbs[0] = 1; /* sql arrays typically start at 1 */
+
+ for (fld=0; fld < fldct; fld++)
+ {
+ string = raw_fields[fld];
+
+ if (string == NULL)
+ {
+ values[fld] = PointerGetDatum(NULL);
+ nulls[fld] = true;
+ }
+ else
+ {
+ nulls[fld] = false;
+ values[fld] = PointerGetDatum(
+ DirectFunctionCall1(textin,
+ PointerGetDatum(string)));
+ }
+ }
+
+ result = PointerGetDatum(construct_md_array(
+ values,
+ nulls,
+ 1,
+ dims,
+ lbs,
+ TEXTOID,
+ -1,
+ false,
+ 'i'));
+
+ slot->tts_values[0] = result;
+ slot->tts_isnull[0] = false;
+
+ }
*** a/contrib/file_fdw/input/file_fdw.source
--- b/contrib/file_fdw/input/file_fdw.source
***************
*** 67,77 **** CREATE FOREIGN TABLE agg_bad (
--- 67,86 ----
) SERVER file_server
OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', delimiter ';', quote '@', escape '"', null '');
+ CREATE FOREIGN TABLE jagged_text (
+ t text[]
+ ) SERVER file_server
+ OPTIONS (format 'csv', filename '@abs_srcdir@/data/jagged.csv', header 'true', textarray 'true');
+
-- basic query tests
SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
SELECT * FROM agg_csv ORDER BY a;
SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
+ -- textarray tests
+ SELECT * FROM jagged_text;
+ SELECT t[3] AS a, t[1] AS b, t[99] AS c FROM jagged_text;
+
-- error context report tests
SELECT * FROM agg_bad; -- ERROR
*** a/contrib/file_fdw/output/file_fdw.source
--- b/contrib/file_fdw/output/file_fdw.source
***************
*** 62,67 **** CREATE FOREIGN TABLE agg_bad (
--- 62,71 ----
b float4
) SERVER file_server
OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', delimiter ';', quote '@', escape '"', null '');
+ CREATE FOREIGN TABLE jagged_text (
+ t text[]
+ ) SERVER file_server
+ OPTIONS (format 'csv', filename '@abs_srcdir@/data/jagged.csv', header 'true', textarray 'true');
-- basic query tests
SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
a | b
***************
*** 86,91 **** SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
--- 90,116 ----
100 | 99.097 | 100 | 99.097
(3 rows)
+ -- textarray tests
+ SELECT * FROM jagged_text;
+ t
+ --------------------------------------------
+ {"one field"}
+ {two,fields}
+ {three,NULL,"fields of which one is null"}
+ {"next line has no fields"}
+ {}
+ (5 rows)
+
+ SELECT t[3] AS a, t[1] AS b, t[99] AS c FROM jagged_text;
+ a | b | c
+ -----------------------------+-------------------------+---
+ | one field |
+ | two |
+ fields of which one is null | three |
+ | next line has no fields |
+ | |
+ (5 rows)
+
-- error context report tests
SELECT * FROM agg_bad; -- ERROR
ERROR: invalid input syntax for type real: "aaa"
***************
*** 155,161 **** SET ROLE file_fdw_superuser;
-- cleanup
RESET ROLE;
DROP FOREIGN DATA WRAPPER file_fdw CASCADE;
! NOTICE: drop cascades to 7 other objects
DETAIL: drop cascades to server file_server
drop cascades to user mapping for file_fdw_user
drop cascades to user mapping for file_fdw_superuser
--- 180,186 ----
-- cleanup
RESET ROLE;
DROP FOREIGN DATA WRAPPER file_fdw CASCADE;
! NOTICE: drop cascades to 8 other objects
DETAIL: drop cascades to server file_server
drop cascades to user mapping for file_fdw_user
drop cascades to user mapping for file_fdw_superuser
***************
*** 163,166 **** drop cascades to user mapping for no_priv_user
--- 188,192 ----
drop cascades to foreign table agg_text
drop cascades to foreign table agg_csv
drop cascades to foreign table agg_bad
+ drop cascades to foreign table jagged_text
DROP ROLE IF EXISTS file_fdw_superuser, file_fdw_user, no_priv_user;
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers