/*-------------------------------------------------------------------------
 * txid.c
 *
 *	Our own datatype for safe storage of transaction ID's.
 *
 *	Copyright (c) 2003-2004, PostgreSQL Global Development Group
 *	Author: Jan Wieck, Afilias USA INC.
 *
 *  Support for 64-bit transaction ID's by Marko Kreen.
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include <limits.h>

#include "access/xact.h"
#include "access/transam.h"
#include "executor/spi.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
#include "utils/array.h"
#include "utils/lsyscache.h"
#include "funcapi.h"

#ifndef PG_GETARG_INT64
#define PG_GETARG_INT64(n)	(int64 )DatumGetPointer(PG_GETARG_DATUM(n))
#endif
#ifndef PG_RETURN_INT64
#define PG_RETURN_INT64(x)	return PointerGetDatum(alloc_txid(x))
#endif

#ifdef INT64_IS_BUSTED
#error txid needs working int64
#endif

typedef uint64 txid;

typedef struct
{
	int32		varsz;
	uint32      nxip;
	txid xmin;
	txid xmax;
	txid xip[1];
}	txid_snapshot;

struct txid_epoch_state {
	uint64			last_value;
	uint64			epoch;
};

/*
 * this caches the txid_epoch table.
 * The struct should be updated only together with the table.
 */
static struct txid_epoch_state epoch_state = { 0, 0 };

/*
 * public functions
 */

PG_FUNCTION_INFO_V1(get_current_txid);
PG_FUNCTION_INFO_V1(get_min_txid);
PG_FUNCTION_INFO_V1(get_max_txid);
Datum		get_current_txid(PG_FUNCTION_ARGS);
Datum		get_min_txid(PG_FUNCTION_ARGS);
Datum		get_max_txid(PG_FUNCTION_ARGS);

PG_FUNCTION_INFO_V1(txid_snapshot_in);
PG_FUNCTION_INFO_V1(txid_snapshot_out);
PG_FUNCTION_INFO_V1(txid_snapshot_recv);
PG_FUNCTION_INFO_V1(txid_snapshot_send);
PG_FUNCTION_INFO_V1(txid_in_snapshot);
PG_FUNCTION_INFO_V1(txid_not_in_snapshot);
PG_FUNCTION_INFO_V1(txid_current_snapshot);
PG_FUNCTION_INFO_V1(txid_snapshot_xmin);
PG_FUNCTION_INFO_V1(txid_snapshot_xmax);
PG_FUNCTION_INFO_V1(txid_snapshot_old_txids);
PG_FUNCTION_INFO_V1(txid_array_old_txids);
Datum		txid_snapshot_in(PG_FUNCTION_ARGS);
Datum		txid_snapshot_out(PG_FUNCTION_ARGS);
Datum		txid_snapshot_recv(PG_FUNCTION_ARGS);
Datum		txid_snapshot_send(PG_FUNCTION_ARGS);
Datum		txid_in_snapshot(PG_FUNCTION_ARGS);
Datum		txid_not_in_snapshot(PG_FUNCTION_ARGS);
Datum		txid_current_snapshot(PG_FUNCTION_ARGS);
Datum		txid_snapshot_xmin(PG_FUNCTION_ARGS);
Datum		txid_snapshot_xmax(PG_FUNCTION_ARGS);
Datum       txid_snapshot_old_txids(PG_FUNCTION_ARGS);
Datum		txid_array_old_txids(PG_FUNCTION_ARGS);

/*
 * do a TransactionId -> txid conversion
 */
static txid convert_xid(TransactionId xid)
{
	uint64 epoch;

	/* avoid issues with the the special meaning of 0 */
	if (xid == InvalidTransactionId)
		return (1LL << 63) - 1;

	/* return special xid's as-is */
	if (xid < FirstNormalTransactionId)
		return xid;

	/* xid can on both sides on wrap-around */
	epoch = epoch_state.epoch;
	if (TransactionIdPrecedes(xid, epoch_state.last_value)) {
		if (xid > epoch_state.last_value)
			epoch--;
	} else if (TransactionIdFollows(xid, epoch_state.last_value)) {
		if (xid < epoch_state.last_value)
			epoch++;
	}
	return (epoch << 32) | xid;
}

/*
 * load values from txid_epoch table.
 */
static int load_epoch(void)
{
	HeapTuple row;
	TupleDesc rdesc;
	bool isnull = false;
	Datum tmp;
	int res;
	uint64 db_epoch, db_value;

	res = SPI_connect();
	if (res < 0)
		elog(ERROR, "cannot connect to SPI");

	res = SPI_execute("select epoch, last_value from txid_epoch", true, 0);
	if (res != SPI_OK_SELECT)
		elog(ERROR, "load_epoch: select failed?");
	if (SPI_processed != 1)
		elog(ERROR, "load_epoch: there must be exactly 1 row");

	row = SPI_tuptable->vals[0];
	rdesc = SPI_tuptable->tupdesc;

	tmp = SPI_getbinval(row, rdesc, 1, &isnull);
	if (isnull)
		elog(ERROR, "load_epoch: epoch is NULL");
	db_epoch = DatumGetInt64(tmp);

	tmp = SPI_getbinval(row, rdesc, 2, &isnull);
	if (isnull)
		elog(ERROR, "load_epoch: last_value is NULL");
	db_value = DatumGetInt64(tmp);
	
	SPI_finish();

	/*
	 * If the db has lesser values, then some updates were lost.
	 *
	 * Should that be special-cased?  ATM just use db values.
	 * Thus immidiate update.
	 */
	epoch_state.epoch = db_epoch;
	epoch_state.last_value = db_value;
	return 1;
}

/*
 * updates last_value and epoch, if needed
 */
static void save_epoch(void)
{
	int res;
	char qbuf[200];
	TransactionId xid = GetTopTransactionId();
	uint64 new_epoch, new_value;

	/*
	 * keep intermediate state.
	 */
	new_value = xid;
	new_epoch = epoch_state.epoch;
	if (xid < epoch_state.last_value)
		new_epoch++;

	sprintf(qbuf, "update txid_epoch set epoch = %llu, last_value = %llu",
				new_epoch, new_value);

	res = SPI_connect();
	if (res < 0)
		elog(ERROR, "cannot connect to SPI");
	res = SPI_execute(qbuf, false, 0);
	if (res != SPI_OK_UPDATE)
		elog(ERROR, "save_epoch: select failed?");
	if (SPI_processed != 1)
		elog(ERROR, "save_epoch: there must be exactly 1 row");

	SPI_finish();

	/*
	 * Seems the update was successful, update internal state too.
     *
	 * There is a chance that the TX will be rollbacked, but then
	 * another backend will do the update, or this one at next
	 * checkpoint.
	 */
	epoch_state.epoch = new_epoch;
	epoch_state.last_value = new_value;
}

/* number of transactions */
#define RECHECK_INTERVAL  100000

static void check_epoch(void)
{
	TransactionId xid = GetTopTransactionId();
	TransactionId recheck, tx_count;
	int ok = 1;

	/* should not happen, but just in case */
	if (xid == InvalidTransactionId)
		return;

	/* new backend */
	if (epoch_state.last_value == 0)
		load_epoch();
	
	/* try to avoid concurrent access */
	recheck = RECHECK_INTERVAL + 100*(MyProcPid & 0xFF);

	/* read table more often than save */
	tx_count = xid - (TransactionId)epoch_state.last_value;
	if (tx_count > recheck / 16)
		ok = load_epoch();

	/*
	 * save if too old, then update it here.
	 */
	tx_count = xid - (TransactionId)epoch_state.last_value;
	if (!ok || tx_count > recheck)
		save_epoch();
}

static int _cmp_txid(const void *aa, const void *bb)
{
	const uint64 *a = aa;
	const uint64 *b = bb;
	if (*a < *b)
		return -1;
	if (*a > *b)
		return 1;
	return 0;
}

static void sort_snapshot(txid_snapshot *snap)
{
	qsort(snap->xip, snap->nxip, sizeof(txid), _cmp_txid);
}

/*
 *		getCurrentXid	- Return the current transaction ID as xxid
 */
Datum
get_current_txid(PG_FUNCTION_ARGS)
{
	txid val;

	check_epoch();

	val = convert_xid(GetTopTransactionId());
	PG_RETURN_INT64(val);
}


/*
 *		getMinXid	- Return the minxid from the current snapshot
 */
Datum
get_min_txid(PG_FUNCTION_ARGS)
{
	txid val;
	if (SerializableSnapshot == NULL)
		elog(ERROR, "Slony-I: SerializableSnapshot is NULL in getMinXid()");

	check_epoch();

	val = convert_xid(SerializableSnapshot->xmin);
	PG_RETURN_INT64(val);
}


/*
 *		getMaxXid	- Return the maxxid from the current snapshot
 */
Datum
get_max_txid(PG_FUNCTION_ARGS)
{
	txid val;
	if (SerializableSnapshot == NULL)
		elog(ERROR, "Slony-I: SerializableSnapshot is NULL in getMaxXid()");

	check_epoch();

	val = convert_xid(SerializableSnapshot->xmax);
	PG_RETURN_INT64(val);
}


/*
 *		xxid_snapshot_in	- input function for type xxid_snapshot
 */
Datum
txid_snapshot_in(PG_FUNCTION_ARGS)
{
	static int	a_size = 0;
	static txid *xip = NULL;

	int			a_used = 0;
	txid xmin;
	txid xmax;
	txid_snapshot *snap;
	int			size;

	char	   *str = PG_GETARG_CSTRING(0);
	char	   *endp;

	if (a_size == 0)
	{
		a_size = 4096;
		xip = (txid *) malloc(sizeof(txid) * a_size);
		if (xip == NULL)
			elog(ERROR, "Out of memory in xxid_snapshot_in");
	}

	xmin = (txid) strtoull(str, &endp, 0);
	if (*endp != ':')
		elog(ERROR, "illegal xxid_snapshot input format");
	str = endp + 1;

	xmax = (txid) strtoull(str, &endp, 0);
	if (*endp != ':')
		elog(ERROR, "illegal xxid_snapshot input format");
	str = endp + 1;

	while (*str != '\0')
	{
		if (a_used >= a_size)
		{
			a_size *= 2;
			xip = (txid *) realloc(xip, sizeof(txid) * a_size);
			if (xip == NULL)
				elog(ERROR, "Out of memory in xxid_snapshot_in");
		}

		if (*str == '\'')
		{
			str++;
			xip[a_used++] = (txid) strtoull(str, &endp, 0);
			if (*endp != '\'')
				elog(ERROR, "illegal xxid_snapshot input format");
			str = endp + 1;
		}
		else
		{
			xip[a_used++] = (txid) strtoull(str, &endp, 0);
			str = endp;
		}
		if (*str == ',')
			str++;
		else
		{
			if (*str != '\0')
				elog(ERROR, "illegal xxid_snapshot input format");
		}
	}

	size = offsetof(txid_snapshot, xip) + sizeof(txid) * a_used;
	snap = (txid_snapshot *) palloc(size);
	snap->varsz = size;
	snap->xmin = xmin;
	snap->xmax = xmax;
	snap->nxip = a_used;
	if (a_used > 0)
		memcpy(&(snap->xip[0]), xip, sizeof(txid) * a_used);

	sort_snapshot(snap);
	
	PG_RETURN_POINTER(snap);
}

/*
 *		xxid_snapshot_out	- output function for type xxid_snapshot
 */
Datum
txid_snapshot_out(PG_FUNCTION_ARGS)
{
	txid_snapshot *snap = (txid_snapshot *) PG_GETARG_VARLENA_P(0);

	char	   *str = palloc(60 + snap->nxip * 30);
	char	   *cp = str;
	int			i;

	snprintf(str, 26, "%llu:%llu:", snap->xmin, snap->xmax);
	cp = str + strlen(str);

	for (i = 0; i < snap->nxip; i++)
	{
		snprintf(cp, 26, "%llu%s", snap->xip[i],
				 (i < snap->nxip - 1) ? "," : "");
		cp += strlen(cp);
	}

	PG_RETURN_CSTRING(str);
}

Datum
txid_snapshot_recv(PG_FUNCTION_ARGS)
{
	StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
	txid_snapshot *snap;
	unsigned i, count, size;

	count = pq_getmsgint(buf, 4);
	size = offsetof(txid_snapshot, xip) + sizeof(txid) * count;
	snap = palloc(size);

	snap->varsz = size;
	snap->nxip = count;
	snap->xmin = pq_getmsgint64(buf);
	snap->xmax = pq_getmsgint64(buf);
	for (i = 0; i < count; i++)
		snap->xip[i] = pq_getmsgint64(buf);
	PG_RETURN_POINTER(snap);
}

Datum
txid_snapshot_send(PG_FUNCTION_ARGS)
{
	int i;
	txid_snapshot *snap = (txid_snapshot *) PG_GETARG_VARLENA_P(0);
	StringInfoData buf;

	pq_begintypsend(&buf);
	pq_sendint(&buf, snap->nxip, 4);
	pq_sendint64(&buf, snap->xmin);
	pq_sendint64(&buf, snap->xmin);
	for (i = 0; i < snap->nxip; i++)
		pq_sendint64(&buf, snap->xip[i]);
	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}


/*
 *		xxid_lt_snapshot	- is xid < snapshot ?
 */
Datum
txid_in_snapshot(PG_FUNCTION_ARGS)
{
	txid value = PG_GETARG_INT64(0);
	txid_snapshot *snap = (txid_snapshot *) PG_GETARG_VARLENA_P(1);
	int			i;

	if (value < snap->xmin)
		PG_RETURN_BOOL(true);

	if (value >= snap->xmax)
		PG_RETURN_BOOL(false);

	for (i = 0; i < snap->nxip; i++)
	{
		if (value == snap->xip[i])
			PG_RETURN_BOOL(false);
	}

	PG_RETURN_BOOL(true);
}


/*
 *		xxid_ge_snapshot	- is xid >= snapshot ?
 */
Datum
txid_not_in_snapshot(PG_FUNCTION_ARGS)
{
	txid value = PG_GETARG_INT64(0);
	txid_snapshot *snap = (txid_snapshot *) PG_GETARG_VARLENA_P(1);
	int			i;

	if (value < snap->xmin)
		PG_RETURN_BOOL(false);

	if (value >= snap->xmax)
		PG_RETURN_BOOL(true);

	for (i = 0; i < snap->nxip; i++)
	{
		if (value == snap->xip[i])
			PG_RETURN_BOOL(true);
	}

	PG_RETURN_BOOL(false);
}


Datum
txid_current_snapshot(PG_FUNCTION_ARGS)
{
	txid_snapshot *snap;
	unsigned num, i, size;

	if (SerializableSnapshot == NULL)
		elog(ERROR, "get_current_snapshot: SerializableSnapshot == NULL");

	check_epoch();

	num = SerializableSnapshot->xcnt;
	size = offsetof(txid_snapshot, xip) + sizeof(txid) * num;
	snap = palloc(size);
	snap->varsz = size;
	snap->xmin = convert_xid(SerializableSnapshot->xmin);
	snap->xmax = convert_xid(SerializableSnapshot->xmax);
	snap->nxip = num;
	for (i = 0; i < num; i++)
		snap->xip[i] = SerializableSnapshot->xip[i];

	sort_snapshot(snap);

	PG_RETURN_POINTER(snap);
}

Datum
txid_snapshot_xmin(PG_FUNCTION_ARGS)
{
	txid_snapshot *snap = (txid_snapshot *) PG_GETARG_VARLENA_P(0);

	PG_RETURN_INT64(snap->xmin);
}

Datum
txid_snapshot_xmax(PG_FUNCTION_ARGS)
{
	txid_snapshot *snap = (txid_snapshot *) PG_GETARG_VARLENA_P(0);

	PG_RETURN_INT64(snap->xmax);
}

static int calc_old_txids(txid_snapshot *prev, txid_snapshot *cur,
	   	txid **res)
{
	txid  *buf;
	int len = 0, i, j;
	buf = palloc(prev->nxip * sizeof(txid));
	for (i = 0; i < prev->nxip; i++) {
		int got = 0;
		for (j = 0; j < cur->nxip; j++) {
			if (prev->xip[i] == cur->xip[j]) {
				got = 1;
				break;
			}
		}
		/* if it is still ongoing, then skip */
		if (got)
			continue;
		/* it finished in between the snapshots */
		buf[len++] = prev->xip[i];
	}
	*res = buf;
	return len;
}

Datum
txid_array_old_txids(PG_FUNCTION_ARGS)
{
	int i, len = 0;
	txid *list;
	Datum *buf;
	ArrayType *res;
	txid_snapshot *prev = (txid_snapshot *) PG_GETARG_VARLENA_P(0);
	txid_snapshot *cur = (txid_snapshot *) PG_GETARG_VARLENA_P(1);
	int16 typlen;
	bool byval;
	char align;

	len = calc_old_txids(prev, cur, &list);
	buf = palloc(len * sizeof(Datum));
	for (i = 0; i < len; i++)
		buf[i] = Int64GetDatum(list[i]);
	pfree(list);

	get_typlenbyvalalign(INT8OID, &typlen, &byval, &align);
	res = construct_array(buf, len, INT8OID, typlen, byval, align);

	PG_RETURN_POINTER(res);
}


struct old_state {
	int len;
	int pos;
	txid *list;
};

Datum
txid_snapshot_old_txids(PG_FUNCTION_ARGS)
{
	FuncCallContext *fctx;
	struct old_state *state;

	if (SRF_IS_FIRSTCALL()) {
		txid *list;
		txid_snapshot *prev = (txid_snapshot *) PG_GETARG_VARLENA_P(0);
		txid_snapshot *cur = (txid_snapshot *) PG_GETARG_VARLENA_P(1);
		int len = calc_old_txids(prev, cur, &list);
		int statelen = sizeof(*state) + len*sizeof(txid);
		
		fctx = SRF_FIRSTCALL_INIT();
		state = MemoryContextAlloc(fctx->multi_call_memory_ctx, statelen);
		state->len = len;
		state->pos = 0;
		state->list = (txid *)((char *)state + sizeof(*state));
		memcpy(state->list, list, sizeof(txid) * len);
		fctx->user_fctx = state;
	}
	fctx = SRF_PERCALL_SETUP();
	state = fctx->user_fctx;
	if (state->pos < state->len) {
		Datum res = Int64GetDatum(state->list[state->pos]);
		state->pos++;
		SRF_RETURN_NEXT(fctx, res);
	} else {
		SRF_RETURN_DONE(fctx);
	}
}

/*
 * There are two variants of the same, as I have not yet determined which one
 * is more useful.
 */

