On 02/07/2011 11:34 AM, Andrew Dunstan wrote:


On 02/04/2011 05:49 AM, Itagaki Takahiro wrote:
Here is a demonstration to support jagged input files. It's a patch
on the latest patch. The new added API is:

   bool NextLineCopyFrom(
         [IN] CopyState cstate,
         [OUT] char ***fields, [OUT] int *nfields, [OUT] Oid *tupleOid)

It just returns separated fields in the next line. Fortunately, I need
no extra code for it because it is just extracted from NextCopyFrom().

I'm willing to include the change into copy APIs,
but we still have a few issues. See below.


I've looked at this, and I think it will do what I want. I haven't had time to play with it, although I hope to soon. AIUI, it basically hands back the raw parsed strings to the user, who then has the responsibility of constructing Datum and Nulls arrays to form the tuple. That should be all I need. So +1 from me for including it. In fact, +10. And many thanks.


I think we need a better name though. NextCopyFromRawFields maybe.


Here is a patch against the latest revision of file_fdw to exercise this API. It includes some regression tests, and I think apart from one or two small details plus a requirement for documentation, is complete.

This work is also published at <https://github.com/adunstan/postgresql-dev/tree/sqlmed3>

Here's an excerpt from the regression tests:

   CREATE FOREIGN TABLE jagged_text (
        t   text[]
   ) SERVER file_server
   OPTIONS (format 'csv', filename
   '/home/andrew/pgl/pg_head/contrib/file_fdw/data/jagged.csv', header
   'true', textarray 'true');
   SELECT * FROM jagged_text;
                         t
   --------------------------------------------
     {"one field"}
     {two,fields}
     {three,NULL,"fields of which one is null"}
     {"next line has no fields"}
     {}
   (5 rows)

   SELECT t[3] AS a, t[1] AS b, t[99] AS c  FROM jagged_text;
                  a              |            b            | c
   -----------------------------+-------------------------+---
                                 | one field               |
                                 | two                     |
     fields of which one is null | three                   |
                                 | next line has no fields |
                                 |                         |
   (5 rows)

Overall, this API works quite nicely, and does exactly what I want, so again many thanks.

cheers

andrew


*** /dev/null
--- b/contrib/file_fdw/data/jagged.csv
***************
*** 0 ****
--- 1,6 ----
+ header for a file with varying numbers of fields
+ one field
+ two,fields
+ three,,fields of which one is null
+ next line has no fields
+ 
*** a/contrib/file_fdw/file_fdw.c
--- b/contrib/file_fdw/file_fdw.c
***************
*** 13,18 ****
--- 13,22 ----
  
  #include "postgres.h"
  
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ #include <unistd.h>
+ 
  #include "access/reloptions.h"
  #include "catalog/pg_foreign_table.h"
  #include "catalog/pg_foreign_server.h"
***************
*** 26,31 ****
--- 30,36 ----
  #include "optimizer/cost.h"
  #include "parser/parsetree.h"
  #include "storage/fd.h"
+ #include "utils/array.h"
  #include "utils/builtins.h"
  
  PG_MODULE_MAGIC;
***************
*** 61,78 **** static struct FileFdwOption valid_options[] = {
  
  	/* FIXME: implement force_not_null option */
  
! 	/* Centinel */
  	{ NULL,			InvalidOid }
  };
  
  /*
   * FDW-specific information for FdwExecutionState.
   */
  typedef struct FileFdwPrivate {
  	char		   *filename;
  	Relation		rel;		/* scan target relation */
! 	CopyState		cstate;		/* state of reaind file */
  	List		   *options;	/* merged generic options, excluding filename */
  } FileFdwPrivate;
  
  /*
--- 66,92 ----
  
  	/* FIXME: implement force_not_null option */
  
! 	/* Local option */
! 	{ "textarray",      ForeignTableRelationId },
! 
! 	/* Sentinel */
  	{ NULL,			InvalidOid }
  };
  
+ #define FILE_FDW_TEXTARRAY_STASH_INIT 64
  /*
   * FDW-specific information for FdwExecutionState.
   */
  typedef struct FileFdwPrivate {
  	char		   *filename;
+ 	bool            textarray;  /* make a text array rather than a tuple */
  	Relation		rel;		/* scan target relation */
! 	CopyState		cstate;		/* state of read in file */
  	List		   *options;	/* merged generic options, excluding filename */
+     /* stash for processing text arrays - not used otherwise */
+ 	int             text_array_stash_size;
+ 	Datum          *text_array_values;
+ 	bool           *text_array_nulls;
  } FileFdwPrivate;
  
  /*
***************
*** 91,96 **** static void fileIterate(FdwExecutionState *festate, TupleTableSlot *slot);
--- 105,113 ----
  static void fileEndScan(FdwExecutionState *festate);
  static void fileReScan(FdwExecutionState *festate);
  
+ /* text array support */
+ static void makeTextArray(FileFdwPrivate *fdw_private,
+ 						   TupleTableSlot *slot, char **raw_fields, int nfields);
  /*
   * Helper functions
   */
***************
*** 142,148 **** file_fdw_validator(PG_FUNCTION_ARGS)
  				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
  				 errmsg("only superuser can change foreign table options")));
  
! 	/* Vaidate each options */
  	foreach(cell, options_list)
  	{
  		DefElem    *def = lfirst(cell);
--- 159,165 ----
  				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
  				 errmsg("only superuser can change foreign table options")));
  
! 	/* Validate each options */
  	foreach(cell, options_list)
  	{
  		DefElem    *def = lfirst(cell);
***************
*** 298,303 **** filePlanRelScan(Oid foreigntableid, PlannerInfo *root, RelOptInfo *rel)
--- 315,322 ----
  {
  	Const		   *relid;
  	Value		   *filename = NULL;
+ 	bool            textarray = false;
+ 	Const          *textarray_param;
  	ulong			size;
  	FdwPlan		   *fplan;
  	ForeignTable   *table;
***************
*** 346,351 **** filePlanRelScan(Oid foreigntableid, PlannerInfo *root, RelOptInfo *rel)
--- 365,394 ----
  				(errcode(ERRCODE_FDW_UNABLE_TO_CREATE_REPLY),
  				 errmsg("filename is required for file_fdw scan")));
  
+ 	/*
+ 	 * Split textarray option off from the list because it's handled
+ 	 * here instead of being passed as another parameter to BeginCopyFrom().
+ 	 */
+ 	prev = NULL;
+ 	foreach (lc, options)
+ 	{
+ 		DefElem	   *def = lfirst(lc);
+ 		if (strcmp(def->defname, "textarray") == 0)
+ 		{
+ 			textarray = defGetBoolean(def);
+ 			options = list_delete_cell(options, lc, prev);
+ 			break;
+ 		}
+ 		prev = lc;
+ 	}
+ 	textarray_param = (Const *) makeBoolConst(textarray,false);
+ 
+ 	if (textarray)
+ 	{
+ 		/* make sure the table has one column and it's oy type text[] */
+ 		/* XXX fill in this piece */
+ 	}
+ 	
  	/* Construct FdwPlan and store relid and options in private area */
  	fplan = makeNode(FdwPlan);
  	size = estimate_costs(strVal(filename), rel,
***************
*** 354,359 **** filePlanRelScan(Oid foreigntableid, PlannerInfo *root, RelOptInfo *rel)
--- 397,403 ----
  	fplan->fdw_private = NIL;
  	fplan->fdw_private = lappend(fplan->fdw_private, relid);
  	fplan->fdw_private = lappend(fplan->fdw_private, filename);
+ 	fplan->fdw_private = lappend(fplan->fdw_private, textarray_param);
  	fplan->fdw_private = lappend(fplan->fdw_private, options);
  
  	return fplan;
***************
*** 374,379 **** fileBeginScan(FdwPlan *fplan, ParamListInfo params)
--- 418,424 ----
  	Const		   *relid_const;
  	Oid				relid;
  	Value		   *filename;
+     Const          *textarray;
  	List		   *options;
  	Relation		rel;
  	CopyState		cstate;
***************
*** 385,391 **** fileBeginScan(FdwPlan *fplan, ParamListInfo params)
  	/* Get oid of the relation and option list from private area of FdwPlan. */
  	relid_const = list_nth(fplan->fdw_private, 0);
  	filename = list_nth(fplan->fdw_private, 1);
! 	options = list_nth(fplan->fdw_private, 2);
  
  	relid = DatumGetObjectId(relid_const->constvalue);
  
--- 430,437 ----
  	/* Get oid of the relation and option list from private area of FdwPlan. */
  	relid_const = list_nth(fplan->fdw_private, 0);
  	filename = list_nth(fplan->fdw_private, 1);
! 	textarray = list_nth(fplan->fdw_private, 2);
! 	options = list_nth(fplan->fdw_private, 3);
  
  	relid = DatumGetObjectId(relid_const->constvalue);
  
***************
*** 405,415 **** fileBeginScan(FdwPlan *fplan, ParamListInfo params)
--- 451,471 ----
  	festate = palloc0(sizeof(FdwExecutionState));
  	fdw_private = palloc0(sizeof(FileFdwPrivate));
  	fdw_private->filename = strVal(filename);
+ 	fdw_private->textarray = textarray->constvalue;
  	fdw_private->rel = rel;
  	fdw_private->cstate = cstate;
  	fdw_private->options = options;
  	festate->fdw_private = (void *) fdw_private;
  
+ 	if (fdw_private->textarray)
+ 	{
+ 		fdw_private->text_array_stash_size = FILE_FDW_TEXTARRAY_STASH_INIT;
+ 		fdw_private->text_array_values =
+ 			palloc(FILE_FDW_TEXTARRAY_STASH_INIT * sizeof(Datum));
+ 		fdw_private->text_array_nulls =
+ 			palloc(FILE_FDW_TEXTARRAY_STASH_INIT * sizeof(bool));
+ 	}
+ 
  	return festate;
  }
  
***************
*** 438,445 **** fileIterate(FdwExecutionState *festate, TupleTableSlot *slot)
  	 * EOF.
  	 */
  	ExecClearTuple(slot);
! 	found = NextCopyFrom(fdw_private->cstate, slot->tts_values, slot->tts_isnull,
! 						 NULL);
  	if (found)
  		ExecStoreVirtualTuple(slot);
  
--- 494,515 ----
  	 * EOF.
  	 */
  	ExecClearTuple(slot);
! 	if (fdw_private->textarray)
! 	{
! 		char **raw_fields;
! 		int nfields;
! 		
! 		found = NextLineCopyFrom(fdw_private->cstate, &raw_fields, &nfields,
! 							 NULL);
! 		if (found)
! 			makeTextArray(fdw_private, slot, raw_fields, nfields);
! 	}
! 	else
! 	{
! 		/* let the COPY code do the work */
! 		found = NextCopyFrom(fdw_private->cstate, slot->tts_values, 
! 							 slot->tts_isnull, NULL);
! 	}
  	if (found)
  		ExecStoreVirtualTuple(slot);
  
***************
*** 546,548 **** estimate_costs(const char *filename, RelOptInfo *baserel,
--- 616,691 ----
  	return stat_buf.st_size;
  }
  
+ static void 
+ makeTextArray(FileFdwPrivate *fdw_private, TupleTableSlot *slot, char **raw_fields, int nfields)
+ {
+ 	Datum     *values;
+ 	bool      *nulls;
+ 	int        dims[1];
+ 	int        lbs[1];
+ 	int        fld;
+ 	Datum      result;
+ 	int        fldct = nfields;
+ 	char      *string;
+ 
+ 	if (nfields == 1 && 
+ 		raw_fields[0] == NULL  
+ 		/* fixme - probably need to get this from fdw_private */
+ 		/* && cstate->null_print_len == 0 */
+ 		   )
+ 	{
+ 		/* Treat an empty line as having no fields */
+ 		fldct = 0;
+ 	}	
+ 	else if (nfields > fdw_private->text_array_stash_size)
+ 	{
+ 		while (fdw_private->text_array_stash_size < nfields)
+ 			fdw_private->text_array_stash_size *= 2;
+ 
+ 		fdw_private->text_array_values =repalloc(
+ 			fdw_private->text_array_values,
+ 			fdw_private->text_array_stash_size * sizeof(Datum));
+ 		fdw_private->text_array_nulls =repalloc(
+ 			fdw_private->text_array_nulls,
+ 			fdw_private->text_array_stash_size * sizeof(bool));		
+ 	}
+ 
+ 	values = fdw_private->text_array_values;
+ 	nulls = fdw_private->text_array_nulls;
+ 
+ 	dims[0] = fldct;
+ 	lbs[0] = 1; /* sql arrays typically start at 1 */
+ 
+ 	for (fld=0; fld < fldct; fld++)
+ 	{
+ 		string = raw_fields[fld];
+ 
+ 		if (string == NULL)
+ 		{
+ 			values[fld] = PointerGetDatum(NULL);
+ 			nulls[fld] = true;
+ 		}
+ 		else
+ 		{
+ 			nulls[fld] = false;
+ 			values[fld] = PointerGetDatum(
+ 				DirectFunctionCall1(textin, 
+ 									PointerGetDatum(string)));
+ 		}
+ 	}
+ 
+ 	result = PointerGetDatum(construct_md_array(
+ 								 values, 
+ 								 nulls,
+ 								 1,
+ 								 dims,
+ 								 lbs,
+ 								 TEXTOID,
+ 								 -1,
+ 								 false,
+ 								 'i'));
+ 
+ 	slot->tts_values[0] = result;
+ 	slot->tts_isnull[0] = false;
+ 
+ }
*** a/contrib/file_fdw/input/file_fdw.source
--- b/contrib/file_fdw/input/file_fdw.source
***************
*** 67,77 **** CREATE FOREIGN TABLE agg_bad (
--- 67,86 ----
  ) SERVER file_server
  OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', delimiter ';', quote '@', escape '"', null '');
  
+ CREATE FOREIGN TABLE jagged_text (
+ 	t	text[]
+ ) SERVER file_server
+ OPTIONS (format 'csv', filename '@abs_srcdir@/data/jagged.csv', header 'true', textarray 'true');
+ 
  -- basic query tests
  SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
  SELECT * FROM agg_csv ORDER BY a;
  SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
  
+ -- textarray tests
+ SELECT * FROM jagged_text;
+ SELECT t[3] AS a, t[1] AS b, t[99] AS c  FROM jagged_text;
+ 
  -- error context report tests
  SELECT * FROM agg_bad;               -- ERROR
  
*** a/contrib/file_fdw/output/file_fdw.source
--- b/contrib/file_fdw/output/file_fdw.source
***************
*** 62,67 **** CREATE FOREIGN TABLE agg_bad (
--- 62,71 ----
  	b	float4
  ) SERVER file_server
  OPTIONS (format 'csv', filename '@abs_srcdir@/data/agg.bad', header 'true', delimiter ';', quote '@', escape '"', null '');
+ CREATE FOREIGN TABLE jagged_text (
+ 	t	text[]
+ ) SERVER file_server
+ OPTIONS (format 'csv', filename '@abs_srcdir@/data/jagged.csv', header 'true', textarray 'true');
  -- basic query tests
  SELECT * FROM agg_text WHERE b > 10.0 ORDER BY a;
    a  |   b    
***************
*** 86,91 **** SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
--- 90,116 ----
   100 |  99.097 | 100 |  99.097
  (3 rows)
  
+ -- textarray tests
+ SELECT * FROM jagged_text;
+                      t                      
+ --------------------------------------------
+  {"one field"}
+  {two,fields}
+  {three,NULL,"fields of which one is null"}
+  {"next line has no fields"}
+  {}
+ (5 rows)
+ 
+ SELECT t[3] AS a, t[1] AS b, t[99] AS c  FROM jagged_text;
+               a              |            b            | c 
+ -----------------------------+-------------------------+---
+                              | one field               | 
+                              | two                     | 
+  fields of which one is null | three                   | 
+                              | next line has no fields | 
+                              |                         | 
+ (5 rows)
+ 
  -- error context report tests
  SELECT * FROM agg_bad;               -- ERROR
  ERROR:  invalid input syntax for type real: "aaa"
***************
*** 155,161 **** SET ROLE file_fdw_superuser;
  -- cleanup
  RESET ROLE;
  DROP FOREIGN DATA WRAPPER file_fdw CASCADE;
! NOTICE:  drop cascades to 7 other objects
  DETAIL:  drop cascades to server file_server
  drop cascades to user mapping for file_fdw_user
  drop cascades to user mapping for file_fdw_superuser
--- 180,186 ----
  -- cleanup
  RESET ROLE;
  DROP FOREIGN DATA WRAPPER file_fdw CASCADE;
! NOTICE:  drop cascades to 8 other objects
  DETAIL:  drop cascades to server file_server
  drop cascades to user mapping for file_fdw_user
  drop cascades to user mapping for file_fdw_superuser
***************
*** 163,166 **** drop cascades to user mapping for no_priv_user
--- 188,192 ----
  drop cascades to foreign table agg_text
  drop cascades to foreign table agg_csv
  drop cascades to foreign table agg_bad
+ drop cascades to foreign table jagged_text
  DROP ROLE IF EXISTS file_fdw_superuser, file_fdw_user, no_priv_user;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to