On Wed, Nov 19, 2014 at 7:01 AM, Peter Geoghegan <p...@heroku.com> wrote:
> On Tue, Nov 4, 2014 at 7:26 AM, Amit Kapila <amit.kapil...@gmail.com> wrote:
>> I think these functions will be quite useful for debugging purpose
>> and we already have similar function's for other index (btree).
>
> This patch has bitrotted. I attach rebased revision, for the
> convenience of others - V1.3 of pageinspect will now incorporate both
> GIN stuff, and BRIN stuff. Seems like this patch was affected by the
> recent problems with header includes - that's fixed.

Thanks for the updated version! That's a cool timing, I wanted to move
this patch forward these days... I think as well that it would be a
useful addition for pageinspect (and I was in need of them yesterday,
and soon today btw).

> Amit wrote:
> 1. Documentation seems to be missing, other API's exposed
> via pageinspect are documented at:
> http://www.postgresql.org/docs/devel/static/pageinspect.html
Done.

> 2.
> +CREATE FUNCTION gin_metapage(IN page bytea,
> +    OUT pending_head bigint,
> +    OUT pending_tail bigint,
> +    OUT version int4)
> +AS 'MODULE_PATHNAME', 'gin_metapage'
> +LANGUAGE C STRICT;
> a. Isn't it better to name the function as gin_metap(..) similar to
> existing function bt_metap(..)?
I actually liked more gin_metapage_info, a name similar to the
newly-introduced brin indexes.

> b. Can this function have a similar signature as bt_metap() which means
> it should take input as relname?
That's mostly a matter of taste but I think we should definitely pass
a raw page to it as it is now. This has the advantage to add an extra
check if the page passed is really a meta page of not, something
useful for development.

> 3. Can gin_dataleafpage() API have similar name and signature as
> API bt_page_items() exposed for btree?
What about gin_leafpage_items then?

> 4. Can we have any better name for gin_pageopaq (other API name's
> in this module are self explanatory)?
gin_page_opaque_info? Because we get back information about the opaque
portion of the page. Feel free if you have any better idea.

Updated patch, with some more things improved and cleaned up (addition
of header of ginfuncs.c, addition of array of decoded item pointers
for compressed data leaf pages), is attached.

One last thing not only interesting for this patch: it may be good to
expose DatumGetItemPointer and ItemPointerGetDatum in for extensions
analyzing content of pages. I am not sure where though, a place like
utils/*.h may be useful. Thoughts?
Regards,
-- 
Michael
diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile
index a59de8a..aec5258 100644
--- a/contrib/pageinspect/Makefile
+++ b/contrib/pageinspect/Makefile
@@ -1,12 +1,13 @@
 # contrib/pageinspect/Makefile
 
 MODULE_big	= pageinspect
-OBJS		= rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o brinfuncs.o $(WIN32RES)
+OBJS		= rawpage.o heapfuncs.o btreefuncs.o fsmfuncs.o \
+		  brinfuncs.o ginfuncs.o $(WIN32RES)
 
 EXTENSION = pageinspect
-DATA = pageinspect--1.3.sql pageinspect--1.0--1.1.sql \
-	pageinspect--1.2--1.3.sql \
-	pageinspect--1.1--1.2.sql pageinspect--unpackaged--1.0.sql
+DATA = pageinspect--1.3.sql pageinspect--1.2--1.3.sql \
+	pageinspect--1.1--1.2.sql pageinspect--1.0--1.1.sql \
+	pageinspect--unpackaged--1.0.sql
 PGFILEDESC = "pageinspect - functions to inspect contents of database pages"
 
 ifdef USE_PGXS
diff --git a/contrib/pageinspect/ginfuncs.c b/contrib/pageinspect/ginfuncs.c
new file mode 100644
index 0000000..8927951
--- /dev/null
+++ b/contrib/pageinspect/ginfuncs.c
@@ -0,0 +1,280 @@
+/*
+ * ginfuncs.c
+ *		Functions to investigate the content of GIN indexes
+ *
+ * Copyright (c) 2014, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		contrib/pageinspect/ginfuncs.c
+ */
+#include "postgres.h"
+
+#include "access/gin.h"
+#include "access/gin_private.h"
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+
+#define DatumGetItemPointer(X)	 ((ItemPointer) DatumGetPointer(X))
+#define ItemPointerGetDatum(X)	 PointerGetDatum(X)
+
+
+PG_FUNCTION_INFO_V1(gin_metapage_info);
+PG_FUNCTION_INFO_V1(gin_page_opaque_info);
+PG_FUNCTION_INFO_V1(gin_leafpage_items);
+
+typedef struct gin_leafpage_items_state
+{
+	TupleDesc	tupd;
+	GinPostingList *seg;
+	GinPostingList *lastseg;
+} gin_leafpage_items_state;
+
+
+Datum
+gin_metapage_info(PG_FUNCTION_ARGS)
+{
+	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+	int			raw_page_size;
+	TupleDesc	tupdesc;
+	Page		page;
+	GinPageOpaque opaq;
+	GinMetaPageData *metadata;
+	HeapTuple	resultTuple;
+	Datum		values[10];
+	bool		nulls[10];
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 (errmsg("must be superuser to use raw page functions"))));
+
+	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
+	if (raw_page_size < BLCKSZ)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("input page too small (%d bytes)", raw_page_size)));
+	page = VARDATA(raw_page);
+
+	opaq = (GinPageOpaque) PageGetSpecialPointer(page);
+	if (opaq->flags != GIN_META)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("input page is not a GIN metapage"),
+				 errdetail("Flags %04X, expected %04X",
+						   opaq->flags, GIN_META)));
+
+	/* Build a tuple descriptor for our result type */
+	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+		elog(ERROR, "return type must be a row type");
+
+	metadata = GinPageGetMeta(page);
+
+	memset(nulls, 0, sizeof(nulls));
+
+	values[0] = Int64GetDatum(metadata->head);
+	values[1] = Int64GetDatum(metadata->tail);
+	values[2] = Int32GetDatum(metadata->tailFreeSize);
+	values[3] = Int64GetDatum(metadata->nPendingPages);
+	values[4] = Int64GetDatum(metadata->nPendingHeapTuples);
+
+	/* statistics, updated by VACUUM */
+	values[5] = Int64GetDatum(metadata->nTotalPages);
+	values[6] = Int64GetDatum(metadata->nEntryPages);
+	values[7] = Int64GetDatum(metadata->nDataPages);
+	values[8] = Int64GetDatum(metadata->nEntries);
+
+	values[9] = Int32GetDatum(metadata->ginVersion);
+
+	/* Build and return the result tuple. */
+	resultTuple = heap_form_tuple(tupdesc, values, nulls);
+
+	return HeapTupleGetDatum(resultTuple);
+}
+
+
+Datum
+gin_page_opaque_info(PG_FUNCTION_ARGS)
+{
+	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+	int			raw_page_size;
+	TupleDesc	tupdesc;
+	Page		page;
+	GinPageOpaque opaq;
+	HeapTuple	resultTuple;
+	Datum		values[3];
+	bool		nulls[10];
+	Datum		flags[16];
+	int			nflags = 0;
+	uint16		flagbits;
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 (errmsg("must be superuser to use raw page functions"))));
+
+	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
+	if (raw_page_size < BLCKSZ)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("input page too small (%d bytes)", raw_page_size)));
+	page = VARDATA(raw_page);
+
+	opaq = (GinPageOpaque) PageGetSpecialPointer(page);
+
+	/* Build a tuple descriptor for our result type */
+	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+		elog(ERROR, "return type must be a row type");
+
+	/* Convert the flags bitmask to an array of human-readable names */
+	flagbits = opaq->flags;
+	if (flagbits & GIN_DATA)
+		flags[nflags++] = CStringGetTextDatum("data");
+	if (flagbits & GIN_LEAF)
+		flags[nflags++] = CStringGetTextDatum("leaf");
+	if (flagbits & GIN_DELETED)
+		flags[nflags++] = CStringGetTextDatum("deleted");
+	if (flagbits & GIN_META)
+		flags[nflags++] = CStringGetTextDatum("meta");
+	if (flagbits & GIN_LIST)
+		flags[nflags++] = CStringGetTextDatum("list");
+	if (flagbits & GIN_LIST_FULLROW)
+		flags[nflags++] = CStringGetTextDatum("list_fullrow");
+	if (flagbits & GIN_INCOMPLETE_SPLIT)
+		flags[nflags++] = CStringGetTextDatum("incomplete_split");
+	if (flagbits & GIN_COMPRESSED)
+		flags[nflags++] = CStringGetTextDatum("compressed");
+	flagbits &= ~(GIN_DATA | GIN_LEAF | GIN_DELETED | GIN_META | GIN_LIST |
+				  GIN_LIST_FULLROW | GIN_INCOMPLETE_SPLIT | GIN_COMPRESSED);
+	if (flagbits)
+	{
+		/* any flags we don't recognize are printed in hex */
+		flags[nflags++] = DirectFunctionCall1(to_hex32, Int32GetDatum(flagbits));
+	}
+
+	memset(nulls, 0, sizeof(nulls));
+
+	values[0] = Int64GetDatum(opaq->rightlink);
+	values[1] = Int64GetDatum(opaq->maxoff);
+	values[2] = PointerGetDatum(
+		construct_array(flags, nflags, TEXTOID, -1, false, 'i'));
+
+	/* Build and return the result tuple. */
+	resultTuple = heap_form_tuple(tupdesc, values, nulls);
+
+	return HeapTupleGetDatum(resultTuple);
+}
+
+
+Datum
+gin_leafpage_items(PG_FUNCTION_ARGS)
+{
+	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
+	int			raw_page_size;
+	FuncCallContext *fctx;
+	gin_leafpage_items_state *inter_call_data;
+
+	if (!superuser())
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+				 (errmsg("must be superuser to use raw page functions"))));
+
+	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		TupleDesc	tupdesc;
+		MemoryContext mctx;
+		Page		page;
+		GinPageOpaque opaq;
+
+		if (raw_page_size < BLCKSZ)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				  errmsg("input page too small (%d bytes)", raw_page_size)));
+		page = VARDATA(raw_page);
+
+		if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GinPageOpaqueData)))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("input page is not a valid GIN data leaf page"),
+					 errdetail("Special size %d, expected %d",
+							   (int) PageGetSpecialSize(page),
+							   (int) MAXALIGN(sizeof(GinPageOpaqueData)))));
+
+		opaq = (GinPageOpaque) PageGetSpecialPointer(page);
+		if (opaq->flags != (GIN_DATA | GIN_LEAF | GIN_COMPRESSED))
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("input page is not a compressed GIN data leaf page"),
+					 errdetail("Flags %04X, expected %04X",
+							   opaq->flags,
+							   (GIN_DATA | GIN_LEAF | GIN_COMPRESSED))));
+
+		fctx = SRF_FIRSTCALL_INIT();
+		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+
+		inter_call_data = palloc(sizeof(gin_leafpage_items_state));
+
+		/* Build a tuple descriptor for our result type */
+		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+			elog(ERROR, "return type must be a row type");
+
+		inter_call_data->tupd = tupdesc;
+
+		inter_call_data->seg = GinDataLeafPageGetPostingList(page);
+		inter_call_data->lastseg = (GinPostingList *)
+			(((char *) inter_call_data->seg) +
+			 GinDataLeafPageGetPostingListSize(page));
+
+		fctx->user_fctx = inter_call_data;
+
+		MemoryContextSwitchTo(mctx);
+	}
+
+	fctx = SRF_PERCALL_SETUP();
+	inter_call_data = fctx->user_fctx;
+
+	if (inter_call_data->seg != inter_call_data->lastseg)
+	{
+		HeapTuple	resultTuple;
+		Datum		result;
+		Datum		values[3];
+		bool		nulls[3];
+		int			ndecoded, i;
+		GinPostingList	*cur = inter_call_data->seg;
+		ItemPointer		tids;
+		Datum			*tids_datum;
+
+		memset(nulls, 0, sizeof(nulls));
+
+		values[0] = ItemPointerGetDatum(&cur->first);
+		values[1] = UInt16GetDatum(cur->nbytes);
+
+		/* build an array of decoded item pointers */
+		tids = ginPostingListDecode(cur, &ndecoded);
+		tids_datum = (Datum *) palloc(ndecoded * sizeof(Datum));
+		for (i = 0; i < ndecoded; i++)
+			tids_datum[i] = ItemPointerGetDatum(&tids[i]);
+		values[2] = PointerGetDatum(
+			construct_array(tids_datum, ndecoded, TIDOID,
+							sizeof(ItemPointerData), false, 'i'));
+		pfree(tids_datum);
+		pfree(tids);
+
+		/* Build and return the result tuple. */
+		resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
+		result = HeapTupleGetDatum(resultTuple);
+
+		inter_call_data->seg = GinNextPostingListSegment(cur);
+
+		SRF_RETURN_NEXT(fctx, result);
+	}
+	else
+		SRF_RETURN_DONE(fctx);
+}
diff --git a/contrib/pageinspect/pageinspect--1.2--1.3.sql b/contrib/pageinspect/pageinspect--1.2--1.3.sql
index 9bc4dde..dd37469 100644
--- a/contrib/pageinspect/pageinspect--1.2--1.3.sql
+++ b/contrib/pageinspect/pageinspect--1.2--1.3.sql
@@ -40,4 +40,41 @@ CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid regclass,
 	OUT value text)
 RETURNS SETOF record
 AS 'MODULE_PATHNAME', 'brin_page_items'
+
+--
+-- gin_metapage_info()
+--
+CREATE FUNCTION gin_metapage_info(IN page bytea,
+    OUT pending_head bigint,
+    OUT pending_tail bigint,
+    OUT tail_free_size int4,
+    OUT n_pending_pages bigint,
+    OUT n_pending_tuples bigint,
+    OUT n_total_pages bigint,
+    OUT n_entry_pages bigint,
+    OUT n_data_pages bigint,
+    OUT n_entries bigint,
+    OUT version int4)
+AS 'MODULE_PATHNAME', 'gin_metapage_info'
+LANGUAGE C STRICT;
+
+--
+-- gin_page_opaque_info()
+--
+CREATE FUNCTION gin_page_opaque_info(IN page bytea,
+    OUT rightlink bigint,
+    OUT maxoff int4,
+    OUT flags text[])
+AS 'MODULE_PATHNAME', 'gin_page_opaque_info'
+LANGUAGE C STRICT;
+
+--
+-- gin_leafpage_items()
+--
+CREATE FUNCTION gin_leafpage_items(IN page bytea,
+    OUT first_tid tid,
+    OUT nbytes int2,
+    OUT tids tid[])
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'gin_leafpage_items'
 LANGUAGE C STRICT;
diff --git a/contrib/pageinspect/pageinspect--1.3.sql b/contrib/pageinspect/pageinspect--1.3.sql
index 856dcdf..9697486 100644
--- a/contrib/pageinspect/pageinspect--1.3.sql
+++ b/contrib/pageinspect/pageinspect--1.3.sql
@@ -144,3 +144,45 @@ CREATE FUNCTION fsm_page_contents(IN page bytea)
 RETURNS text
 AS 'MODULE_PATHNAME', 'fsm_page_contents'
 LANGUAGE C STRICT;
+
+--
+-- GIN functions
+--
+
+--
+-- gin_metapage_info()
+--
+CREATE FUNCTION gin_metapage_info(IN page bytea,
+    OUT pending_head bigint,
+    OUT pending_tail bigint,
+    OUT tail_free_size int4,
+    OUT n_pending_pages bigint,
+    OUT n_pending_tuples bigint,
+    OUT n_total_pages bigint,
+    OUT n_entry_pages bigint,
+    OUT n_data_pages bigint,
+    OUT n_entries bigint,
+    OUT version int4)
+AS 'MODULE_PATHNAME', 'gin_metapage_info'
+LANGUAGE C STRICT;
+
+--
+-- gin_page_opaque_info()
+--
+CREATE FUNCTION gin_page_opaque_info(IN page bytea,
+    OUT rightlink bigint,
+    OUT maxoff int4,
+    OUT flags text[])
+AS 'MODULE_PATHNAME', 'gin_page_opaque_info'
+LANGUAGE C STRICT;
+
+--
+-- gin_leafpage_items()
+--
+CREATE FUNCTION gin_leafpage_items(IN page bytea,
+    OUT first_tid tid,
+    OUT nbytes int2,
+    OUT tids tid[])
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'gin_leafpage_items'
+LANGUAGE C STRICT;
diff --git a/doc/src/sgml/pageinspect.sgml b/doc/src/sgml/pageinspect.sgml
index 70517ac..d9e2903 100644
--- a/doc/src/sgml/pageinspect.sgml
+++ b/doc/src/sgml/pageinspect.sgml
@@ -302,6 +302,90 @@ brintest-# order by blknum, attnum limit 6;
 
    <varlistentry>
     <term>
+     <function>gin_metapage_info(page bytea) returns record</function>
+     <indexterm>
+      <primary>gin_metapage_info</primary>
+     </indexterm>
+    </term>
+
+    <listitem>
+     <para>
+      <function>gin_metapage_info</function> returns information about
+      a <acronym>GIN</acronym> index metapage.  For example:
+<screen>
+test=# SELECT * FROM gin_metapage_info(get_raw_page('gin_index', 0));
+-[ RECORD 1 ]----+-----------
+pending_head     | 4294967295
+pending_tail     | 4294967295
+tail_free_size   | 0
+n_pending_pages  | 0
+n_pending_tuples | 0
+n_total_pages    | 7
+n_entry_pages    | 6
+n_data_pages     | 0
+n_entries        | 693
+version          | 2
+</screen>
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>
+     <function>gin_page_opaque_info(page bytea) returns record</function>
+     <indexterm>
+      <primary>gin_page_opaque_info</primary>
+     </indexterm>
+    </term>
+
+    <listitem>
+     <para>
+      <function>gin_page_opaque_info</function> returns information about
+      a <acronym>GIN</acronym> index opaque area, like the page type.
+      For example:
+<screen>
+test=# SELECT * FROM gin_page_opaque_info(get_raw_page('gin_index', 2));
+ rightlink | maxoff |         flags
+-----------+--------+------------------------
+         5 |      0 | {data,leaf,compressed}
+(1 row)
+</screen>
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>
+     <function>gin_leafpage_items(page bytea) returns setof record</function>
+     <indexterm>
+      <primary>gin_leafpage_items</primary>
+     </indexterm>
+    </term>
+
+    <listitem>
+     <para>
+      <function>gin_leafpage_items</function> returns information about
+      the data stored in a <acronym>GIN</acronym> leaf page.  For example:
+<screen>
+ test=# SELECT first_tid, nbytes,
+               array_length(tids,1) AS num_tids,
+               tids[2] AS second_tid
+        FROM gin_leafpage_items(get_raw_page('gin_index', 3)) LIMIT 5;
+ first_tid | nbytes | num_tids | second_tid
+-----------+--------+----------+------------
+ (149,94)  |    248 |      244 | (0,149)
+ (154,11)  |    248 |      245 | (0,154)
+ (158,62)  |    248 |      244 | (0,158)
+ (163,14)  |    248 |      244 | (0,163)
+ (168,18)  |    248 |      245 | (0,168)
+ (5 rows)
+</screen>
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>
      <function>fsm_page_contents(page bytea) returns text</function>
      <indexterm>
       <primary>fsm_page_contents</primary>
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to