From 8444117409b1d4326f3f8014aa29069a4493fd1f Mon Sep 17 00:00:00 2001
From: Hari Babu <kommi.haribabu@gmail.com>
Date: Fri, 22 Feb 2019 00:22:43 +1100
Subject: [PATCH 4/4] New prefer-read target_session_attrs type

With this prefer-read option type, application can prefer
connecting to a read-only server if available from the list
of hosts, otherwise connect it to read-write server
---
 doc/src/sgml/libpq.sgml               |  15 ++-
 src/interfaces/libpq/fe-connect.c     | 126 ++++++++++++++++++++++----
 src/interfaces/libpq/libpq-fe.h       |   3 +-
 src/interfaces/libpq/libpq-int.h      |  10 +-
 src/test/recovery/t/001_stream_rep.pl |  14 ++-
 5 files changed, 143 insertions(+), 25 deletions(-)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index 5c29beef51..d4bddef6cf 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -1585,8 +1585,19 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
         returns <literal>on</literal>, the connection will be closed.
         If multiple hosts were specified in the connection string, any 
         remaining servers will be tried just as if the connection
-        attempt had failed.  The default value of this parameter,
-        <literal>any</literal>, regards all connections as acceptable.
+        attempt had failed.
+      </para>
+      
+      <para>
+        If this parameter is set to <literal>prefer-read</literal>, connections
+        where <literal>SHOW transaction_read_only</literal> returns <literal>on</literal>
+        are preferred. If no such connections can be found, then a connection
+        that allows read-write transactions will be accepted.
+      </para>
+
+      <para>    
+    The default value of this parameter is <literal>any</literal>,
+    regards all connections as acceptable.
       </para>
       </listitem>
     </varlistentry>
diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c
index 25a153f48c..6943674067 100644
--- a/src/interfaces/libpq/fe-connect.c
+++ b/src/interfaces/libpq/fe-connect.c
@@ -322,7 +322,7 @@ static const internalPQconninfoOption PQconninfoOptions[] = {
 
 	{"target_session_attrs", "PGTARGETSESSIONATTRS",
 		DefaultTargetSessionAttrs, NULL,
-		"Target-Session-Attrs", "", 11, /* sizeof("read-write") = 11 */
+		"Target-Session-Attrs", "", 12, /* sizeof("prefer-read") = 12 */
 	offsetof(struct pg_conn, target_session_attrs)},
 
 	/* Terminating entry --- MUST BE LAST */
@@ -1243,6 +1243,8 @@ connectOptions2(PGconn *conn)
 			conn->requested_session_type = SESSION_TYPE_ANY;
 		else if (strcmp(conn->target_session_attrs, "read-write") == 0)
 			conn->requested_session_type = SESSION_TYPE_READ_WRITE;
+		else if (strcmp(conn->target_session_attrs, "prefer-read") == 0)
+			conn->requested_session_type = SESSION_TYPE_PREFER_READ;
 		else
 		{
 			conn->status = CONNECTION_BAD;
@@ -2137,13 +2139,31 @@ keep_going:						/* We will come back to here until there is
 
 		if (conn->whichhost + 1 >= conn->nconnhost)
 		{
-			/*
-			 * Oops, no more hosts.  An appropriate error message is already
-			 * set up, so just set the right status.
-			 */
-			goto error_return;
+			if (conn->read_write_host_index >= 0)
+			{
+				/*
+				 * Getting here means, failed to connect to read-only servers
+				 * and now try connect to read-write server again.
+				 */
+				conn->whichhost = conn->read_write_host_index;
+
+				/*
+				 * Reset the host index value to avoid recursion during the
+				 * second connection attempt.
+				 */
+				conn->read_write_host_index = -2;
+			}
+			else
+			{
+				/*
+				 * Oops, no more hosts.  An appropriate error message is already
+				 * set up, so just set the right status.
+				 */
+				goto error_return;
+			}
 		}
-		conn->whichhost++;
+		else
+			conn->whichhost++;
 
 		/* Drop any address info for previous host */
 		release_conn_addrinfo(conn);
@@ -2347,6 +2367,7 @@ keep_going:						/* We will come back to here until there is
 							conn->try_next_addr = true;
 							goto keep_going;
 						}
+
 						appendPQExpBuffer(&conn->errorMessage,
 										  libpq_gettext("could not create socket: %s\n"),
 										  SOCK_STRERROR(SOCK_ERRNO, sebuf, sizeof(sebuf)));
@@ -3225,14 +3246,16 @@ keep_going:						/* We will come back to here until there is
 		case CONNECTION_CHECK_TARGET:
 			{
 				/*
-				 * If a read-write connection is required, see if we have one.
+				 * If a read-write or prefer-read connection is required,
+				 * see if we have one.
 				 *
 				 * Servers before 7.4 lack the transaction_read_only GUC, but
 				 * by the same token they don't have any read-only mode, so we
 				 * may just skip the test in that case.
 				 */
 				if (conn->sversion >= 70400 &&
-					conn->requested_session_type == SESSION_TYPE_READ_WRITE)
+					(conn->requested_session_type == SESSION_TYPE_READ_WRITE ||
+					conn->requested_session_type == SESSION_TYPE_PREFER_READ))
 				{
 					if (conn->sversion < 120000)
 					{
@@ -3253,15 +3276,23 @@ keep_going:						/* We will come back to here until there is
 							restoreErrorMessage(conn, &savedMessage);
 							goto error_return;
 						}
+
 						conn->status = CONNECTION_CHECK_WRITABLE;
+
 						restoreErrorMessage(conn, &savedMessage);
 						return PGRES_POLLING_READING;
 					}
-					else if (conn->transaction_read_only)
+					else if ((conn->transaction_read_only
+								&& (conn->requested_session_type == SESSION_TYPE_READ_WRITE)) ||
+							(!conn->transaction_read_only
+								&& (conn->requested_session_type == SESSION_TYPE_PREFER_READ)
+								&& (conn->read_write_host_index != -2)))
 					{
-						/* Not writable; fail this connection. */
+						/* Not a requested type; fail this connection. */
 						const char *displayed_host;
 						const char *displayed_port;
+						const char *type = (conn->requested_session_type == SESSION_TYPE_PREFER_READ) ?
+											"read-only" : "writable";
 
 						/* Append error report to conn->errorMessage. */
 						if (conn->connhost[conn->whichhost].type == CHT_HOST_ADDRESS)
@@ -3273,15 +3304,19 @@ keep_going:						/* We will come back to here until there is
 							displayed_port = DEF_PGPORT_STR;
 
 						appendPQExpBuffer(&conn->errorMessage,
-										  libpq_gettext("could not make a writable "
+										  libpq_gettext("could not make a %s "
 														"connection to server "
 														"\"%s:%s\"\n"),
-										  displayed_host, displayed_port);
+										  type, displayed_host, displayed_port);
 
 						/* Close connection politely. */
 						conn->status = CONNECTION_OK;
 						sendTerminateConn(conn);
 
+						/* Record read-write host index */
+						if (conn->read_write_host_index == -1)
+							conn->read_write_host_index = conn->whichhost;
+
 						/*
 						 * Try next host if any, but we don't want to consider
 						 * additional addresses for this host.
@@ -3289,6 +3324,39 @@ keep_going:						/* We will come back to here until there is
 						conn->try_next_host = true;
 						goto keep_going;
 					}
+					else /* obtained the requested type, consume it */
+					{
+						/* We can release the address list now. */
+						release_conn_addrinfo(conn);
+
+						/* We are open for business! */
+						conn->status = CONNECTION_OK;
+						return PGRES_POLLING_OK;
+					}
+				}
+
+				/*
+				 * Requested type is prefer-read, then record this host index
+				 * and try the other before considering it later
+				 */
+				if ((conn->target_session_attrs != NULL) &&
+					   (conn->requested_session_type == SESSION_TYPE_PREFER_READ) &&
+					   (conn->read_write_host_index != -2))
+				{
+					/* Close connection politely. */
+					conn->status = CONNECTION_OK;
+					sendTerminateConn(conn);
+
+					/* Record read-write host index */
+					if (conn->read_write_host_index == -1)
+						conn->read_write_host_index = conn->whichhost;
+
+					/*
+					 * Try next host if any, but we don't want to consider
+					 * additional addresses for this host.
+					 */
+					conn->try_next_host = true;
+					goto keep_going;
 				}
 
 				/* We can release the address list now. */
@@ -3358,11 +3426,22 @@ keep_going:						/* We will come back to here until there is
 					char	   *val;
 
 					val = PQgetvalue(res, 0, 0);
-					if (strncmp(val, "on", 2) == 0)
+
+					/*
+					 * Server is read-only and requested mode is read-write, ignore it.
+					 * Server is read-write and requested mode is prefer-read, record
+					 * it for the first time and try to consume in the next scan (it means
+					 * no read-only server is found in the first scan).
+					 */
+					if (((strncmp(val, "on", 2) == 0) &&
+							(conn->requested_session_type == SESSION_TYPE_READ_WRITE)) ||
+						((strncmp(val, "off", 3) == 0) &&
+							(conn->requested_session_type == SESSION_TYPE_PREFER_READ) &&
+							(conn->read_write_host_index != -2)))
 					{
-						/* Not writable; fail this connection. */
-						const char *displayed_host;
-						const char *displayed_port;
+						/* Not a requested type; fail this connection. */
+						const char *type = (conn->requested_session_type == SESSION_TYPE_PREFER_READ) ?
+											"read-only" : "writable";
 
 						PQclear(res);
 						restoreErrorMessage(conn, &savedMessage);
@@ -3377,15 +3456,19 @@ keep_going:						/* We will come back to here until there is
 							displayed_port = DEF_PGPORT_STR;
 
 						appendPQExpBuffer(&conn->errorMessage,
-										  libpq_gettext("could not make a writable "
+										  libpq_gettext("could not make a %s "
 														"connection to server "
 														"\"%s:%s\"\n"),
-										  displayed_host, displayed_port);
+										  type, displayed_host, displayed_port);
 
 						/* Close connection politely. */
 						conn->status = CONNECTION_OK;
 						sendTerminateConn(conn);
 
+						/* Record read-write host index */
+						if (conn->read_write_host_index == -1)
+							conn->read_write_host_index = conn->whichhost;
+
 						/*
 						 * Try next host if any, but we don't want to consider
 						 * additional addresses for this host.
@@ -3394,7 +3477,7 @@ keep_going:						/* We will come back to here until there is
 						goto keep_going;
 					}
 
-					/* Session is read-write, so we're good. */
+					/* Session is requested type, so we're good. */
 					PQclear(res);
 					termPQExpBuffer(&savedMessage);
 
@@ -3608,6 +3691,9 @@ makeEmptyPGconn(void)
 		conn = NULL;
 	}
 
+	/* Initial value */
+	conn->read_write_host_index = -1;
+
 	return conn;
 }
 
diff --git a/src/interfaces/libpq/libpq-fe.h b/src/interfaces/libpq/libpq-fe.h
index 15bb82a885..a7de100c3b 100644
--- a/src/interfaces/libpq/libpq-fe.h
+++ b/src/interfaces/libpq/libpq-fe.h
@@ -74,7 +74,8 @@ typedef enum
 typedef enum
 {
 	SESSION_TYPE_ANY = 0,		/* Any session (default) */
-	SESSION_TYPE_READ_WRITE		/* Read-write session */
+	SESSION_TYPE_READ_WRITE,	/* Read-write session */
+	SESSION_TYPE_PREFER_READ	/* Prefer read only session */
 } TargetSessionAttrsType;
 
 typedef enum
diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h
index b0ac98b90d..1878598633 100644
--- a/src/interfaces/libpq/libpq-int.h
+++ b/src/interfaces/libpq/libpq-int.h
@@ -363,7 +363,7 @@ struct pg_conn
 	char	   *krbsrvname;		/* Kerberos service name */
 #endif
 
-	/* Type of connection to make.  Possible values: any, read-write. */
+	/* Type of connection to make.  Possible values: any, read-write, perfer-read. */
 	char	   *target_session_attrs;
 	TargetSessionAttrsType requested_session_type;
 
@@ -400,6 +400,14 @@ struct pg_conn
 	pg_conn_host *connhost;		/* details about each named host */
 	char	   *connip;			/* IP address for current network connection */
 
+	/*
+	 * First read-write host index in the connection string.
+	 *
+	 * Initial value is -1, then the index of the first read-write
+	 * host, -2 during the second attempt of connection to avoid recursion.
+	 */
+	int 		read_write_host_index;
+
 	/* Connection data */
 	pgsocket	sock;			/* FD for socket, PGINVALID_SOCKET if
 								 * unconnected */
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index beb45551a2..0e398136a5 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 26;
+use Test::More tests => 29;
 
 # Initialize master node
 my $node_master = get_new_node('master');
@@ -117,6 +117,18 @@ test_target_session_attrs($node_master, $node_standby_1, $node_master, "any",
 test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
 	"any", 0);
 
+# Connect to standby1 in "prefer-read" mode with master,standby1 list.
+test_target_session_attrs($node_master, $node_standby_1, $node_standby_1, "prefer-read",
+	0);
+
+# Connect to standby1 in "prefer-read" mode with standby1,master list.
+test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
+	"prefer-read", 0);
+
+# Connect to node_master in "prefer-read" mode with only master list.
+test_target_session_attrs($node_master, $node_master, $node_master,
+	"prefer-read", 0);
+
 note "switching to physical replication slot";
 
 # Switch to using a physical replication slot. We can do this without a new
-- 
2.20.1.windows.1

