From 0bbae7f241b36a81432cb438019d697ad6d46d70 Mon Sep 17 00:00:00 2001
From: Peter Smith <peter.b.smith@fujitsu.com>
Date: Thu, 23 Jan 2025 14:36:26 +1100
Subject: [PATCH v56] DOCS - Generated Column Replication.

This patch adds a new section "Generated Column Replication" to the
"Logical Replication" documentation chapter.

Author: Peter Smith
Reviewed By: Vignesh C, Peter Eisentraut
Discussion: https://www.postgresql.org/message-id/flat/B80D17B2-2C8E-4C7D-87F2-E5B4BE3C069E%40gmail.com
---
 doc/src/sgml/ddl.sgml                    |   1 +
 doc/src/sgml/logical-replication.sgml    | 346 +++++++++++++++++++++++++++++++
 doc/src/sgml/ref/create_publication.sgml |   5 +
 3 files changed, 352 insertions(+)

diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index dea04d6..7ff39ae 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -519,6 +519,7 @@ CREATE TABLE people (
       <link linkend="sql-createpublication-params-with-publish-generated-columns">
       <literal>publish_generated_columns</literal></link> or by including them
       in the column list of the <command>CREATE PUBLICATION</command> command.
+      See <xref linkend="logical-replication-gencols"/> for details.
      </para>
     </listitem>
    </itemizedlist>
diff --git a/doc/src/sgml/logical-replication.sgml b/doc/src/sgml/logical-replication.sgml
index ab683cf..d5283f5 100644
--- a/doc/src/sgml/logical-replication.sgml
+++ b/doc/src/sgml/logical-replication.sgml
@@ -1430,6 +1430,14 @@ test_sub=# SELECT * FROM child ORDER BY a;
   </para>
 
   <para>
+   Generated columns can also be specified in a column list. This allows
+   generated columns to be published, regardless of the publication parameter
+   <link linkend="sql-createpublication-params-with-publish-generated-columns">
+   <literal>publish_generated_columns</literal></link>. See
+   <xref linkend="logical-replication-gencols"/> for details.
+  </para>
+
+  <para>
    Specifying a column list when the publication also publishes
    <link linkend="sql-createpublication-params-for-tables-in-schema"><literal>FOR TABLES IN SCHEMA</literal></link>
    is not supported.
@@ -1594,6 +1602,344 @@ test_sub=# SELECT * FROM t1 ORDER BY id;
 
  </sect1>
 
+ <sect1 id="logical-replication-gencols">
+  <title>Generated Column Replication</title>
+
+  <para>
+   Typically, a table at the subscriber will be defined the same as the
+   publisher table, so if the publisher table has a <link linkend="ddl-generated-columns">
+   <literal>GENERATED column</literal></link> then the subscriber table will
+   have a matching generated column. In this case, it is always the subscriber
+   table generated column value that is used.
+  </para>
+
+  <para>
+   For example, note below that subscriber table generated column value comes from the
+   subscriber column's calculation.
+<programlisting>
+test_pub=# CREATE TABLE tab_gen_to_gen (a int, b int GENERATED ALWAYS AS (a + 1) STORED);
+CREATE TABLE
+test_pub=# INSERT INTO tab_gen_to_gen VALUES (1),(2),(3);
+INSERT 0 3
+test_pub=# CREATE PUBLICATION pub1 FOR TABLE tab_gen_to_gen;
+CREATE PUBLICATION
+test_pub=# SELECT * FROM tab_gen_to_gen;
+ a | b
+---+---
+ 1 | 2
+ 2 | 3
+ 3 | 4
+(3 rows)
+
+test_sub=# CREATE TABLE tab_gen_to_gen (a int, b int GENERATED ALWAYS AS (a * 100) STORED);
+CREATE TABLE
+test_sub=# CREATE SUBSCRIPTION sub1 CONNECTION 'dbname=test_pub' PUBLICATION pub1;
+CREATE SUBSCRIPTION
+test_sub=# SELECT * from tab_gen_to_gen;
+ a | b
+---+----
+ 1 | 100
+ 2 | 200
+ 3 | 300
+(3 rows)
+</programlisting>
+  </para>
+
+  <para>
+   In fact, prior to version 18.0, logical replication does not publish
+   <literal>GENERATED</literal> columns at all.
+  </para>
+
+  <para>
+   But, replicating a generated column to a regular column can sometimes be
+   desirable.
+   <tip>
+    <para>
+     This feature may be useful when replicating data to a
+     non-PostgreSQL database via plugin output, especially if the target database
+     does not support generated columns.
+    </para>
+  </tip>
+  </para>
+
+ <sect2 id="logical-replication-gencols-howto">
+  <title>How to Publish Generated Columns</title>
+
+  <para>
+   Generated columns are not published by default, but users can opt to
+   publish stored generated columns just like regular ones.
+  </para>
+
+  <para>
+   There are two ways to do this:
+   <itemizedlist>
+     <listitem>
+      <para>
+       Set the <command>PUBLICATION</command> parameter
+       <link linkend="sql-createpublication-params-with-publish-generated-columns">
+       <literal>publish_generated_columns</literal></link> to <literal>stored</literal>.
+       This instructs PostgreSQL logical replication to publish current and
+       future stored generated columns of the publication's tables.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       Specify a table <link linkend="logical-replication-col-lists">column list</link>
+       to explicitly nominate which stored generated columns will be published.
+      </para>
+
+      <note>
+       <para>
+        When determining which table columns will be published, a column list
+        takes precedence, overriding the effect of the
+        <literal>publish_generated_columns</literal> parameter.
+       </para>
+      </note>
+     </listitem>
+   </itemizedlist>
+  </para>
+ </sect2>
+
+ <sect2 id="logical-replication-gencols-behavior-summary">
+   <title>Behavior Summary</title>
+
+   <para>
+    The following table summarizes behavior when there are generated columns
+    involved in the logical replication. Results are shown for when
+    publishing generated columns is not enabled, and for when it is
+    enabled.
+   </para>
+
+   <table id="logical-replication-gencols-table-summary">
+    <title>Replication Result Summary</title>
+    <tgroup cols="4">
+
+    <thead>
+     <row>
+      <entry>Publish generated columns?</entry>
+      <entry>Publisher table column</entry>
+      <entry>Subscriber table column</entry>
+      <entry>Result</entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry>No</entry>
+      <entry>GENERATED</entry>
+      <entry>GENERATED</entry>
+      <entry>Publisher table column is not replicated. Use the subscriber table generated column value.</entry>
+     </row>
+
+     <row>
+      <entry>No</entry>
+      <entry>GENERATED</entry>
+      <entry>regular</entry>
+      <entry>Publisher table column is not replicated. Use the subscriber table regular column default value.</entry>
+     </row>
+
+     <row>
+      <entry>No</entry>
+      <entry>GENERATED</entry>
+      <entry>--missing--</entry>
+      <entry>Publisher table column is not replicated. Nothing happens.</entry>
+     </row>
+
+     <row>
+      <entry>Yes</entry>
+      <entry>GENERATED</entry>
+      <entry>GENERATED</entry>
+      <entry>ERROR. Not supported.</entry>
+     </row>
+
+     <row>
+      <entry>Yes</entry>
+      <entry>GENERATED</entry>
+      <entry>regular</entry>
+      <entry>Publisher table column value is replicated to the subscriber table column.</entry>
+     </row>
+
+     <row>
+      <entry>Yes</entry>
+      <entry>GENERATED</entry>
+      <entry>--missing--</entry>
+      <entry>ERROR. The column is reported as missing from the subscriber table.</entry>
+     </row>
+    </tbody>
+   </tgroup>
+   </table>
+
+   <warning>
+    <para>
+     There's currently no support for subscriptions comprising several
+     publications where the same table has been published with different column
+     lists. See <xref linkend="logical-replication-col-lists"/>.
+    </para>
+
+    <para>
+     This same situation can occur if one publication is publishing generated
+     columns, while another publication in the same subscription is not
+     publishing generated columns for the same table.
+    </para>
+   </warning>
+
+   <note>
+    <para>
+     If the subscriber is from a release prior to 18, then initial table
+     synchronization won't copy generated columns even if they are defined in
+     the publisher.
+    </para>
+   </note>
+ </sect2>
+
+ <sect2 id="logical-replication-gencols-examples">
+   <title>Examples</title>
+
+  <para>
+   Setup the publisher and subscriber tables. Note that the subscriber
+   table columns have same names, but are not defined the same as the
+   publisher columns.
+<programlisting>
+test_pub=# CREATE TABLE t1 (
+               a int PRIMARY KEY,
+               b int,
+               c int GENERATED ALWAYS AS (a + 1) STORED,
+               d int GENERATED ALWAYS AS (b + 1) STORED);
+
+test_pub=# CREATE TABLE t2 (
+               a int PRIMARY KEY,
+               b int,
+               c int GENERATED ALWAYS AS (a + 1) STORED,
+               d int GENERATED ALWAYS AS (b + 1) STORED);
+</programlisting>
+
+<programlisting>
+test_sub=# CREATE TABLE t1 (
+               a int PRIMARY KEY,
+               b int,
+               c int,
+               d int GENERATED ALWAYS AS (b * 100) STORED);
+
+test_sub=# CREATE TABLE t2 (
+               a int PRIMARY KEY,
+               b int,
+               c int,
+               d int);
+</programlisting>
+  </para>
+
+  <para>
+   Create the <literal>PUBLICATION</literal> and the <literal>SUBSCRIPTION</literal>.
+   Note that the publication specifies a column list for table <literal>t2</literal>.
+   The publication also sets parameter <literal>publish_generated_columns=none</literal>,
+   but that is just for demonstration because <literal>none</literal> is the
+   default anyway.
+<programlisting>
+test_pub=# CREATE PUBLICATION pub1 FOR TABLE t1, t2(a,c)
+               WITH (publish_generated_columns=none);
+</programlisting>
+
+<programlisting>
+test_sub=# CREATE SUBSCRIPTION sub1
+               CONNECTION 'dbname=test_pub'
+               PUBLICATION pub1;
+</programlisting>
+  </para>
+
+  <para>
+   Insert some data to the publisher tables:
+<programlisting>
+test_pub=# INSERT INTO t1 VALUES (1,2);
+INSERT 0 1
+test_pub=# INSERT INTO t2 VALUES (1,2);
+INSERT 0 1
+
+test_pub=# SELECT * FROM t1;
+ a | b | c | d
+---+---+---+---
+ 1 | 2 | 2 | 3
+(1 row)
+
+test_pub=# SELECT * FROM t2;
+ a | b | c | d
+---+---+---+---
+ 1 | 2 | 2 | 3
+(1 row)
+</programlisting>
+  </para>
+
+  <para>
+   Observe how columns for table <literal>t1</literal> were replicated:
+<programlisting>
+test_sub=# SELECT * FROM t1;
+ a | b | c |  d
+---+---+---+-----
+ 1 | 2 |   | 200
+(1 row)
+</programlisting>
+  <itemizedlist>
+   <listitem><para>
+    <literal>t1.a</literal> is a regular column. It gets replicated normally.
+   </para></listitem>
+
+   <listitem><para>
+    <literal>t1.b</literal> is a regular column. It gets replicated normally.
+   </para></listitem>
+
+   <listitem><para>
+    <literal>t1.c</literal> is a generated column. It is not replicated because
+    <literal>publish_generated_columns=none</literal>. The subscriber
+    <literal>t2.c</literal> default column value is used.
+   </para></listitem>
+
+   <listitem><para>
+    <literal>t1.d</literal> is a generated column. It is not replicated because
+    <literal>publish_generated_columns=none</literal>. The subscriber
+    <literal>t2.d</literal> generated column value is used.
+   </para></listitem>
+  </itemizedlist>
+  </para>
+
+  <para>
+   Observe how columns for table <literal>t2</literal> were replicated.
+<programlisting>
+test_sub=# SELECT * FROM t2;
+ a | b | c | d
+---+---+---+---
+ 1 |   | 2 |
+(1 row)
+</programlisting>
+  <itemizedlist>
+   <listitem><para>
+    <literal>t2.a</literal> is a regular column. It was specified in the column
+    list, so is replicated normally.
+   </para></listitem>
+
+   <listitem><para>
+    <literal>t2.b</literal> is a regular column. It was not specified in column
+    list so is not replicated. The subscriber <literal>t2.b</literal> default
+    value is used.
+   </para></listitem>
+
+   <listitem><para>
+    <literal>t2.c</literal> is a generated column. It was specified in the
+    column list, so is replicated to the subscriber <literal>t2.c</literal>
+    regular column.
+   </para></listitem>
+
+   <listitem><para>
+    <literal>t2.d</literal> is a generated column. It was not specified in the
+    column list, so is not replicated. The subscriber <literal>t2.d</literal>
+    default value is used.
+   </para></listitem>
+  </itemizedlist>
+  </para>
+
+ </sect2>
+
+ </sect1>
+
  <sect1 id="logical-replication-conflicts">
   <title>Conflicts</title>
 
diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml
index e822ea2..73f0c8d 100644
--- a/doc/src/sgml/ref/create_publication.sgml
+++ b/doc/src/sgml/ref/create_publication.sgml
@@ -217,6 +217,11 @@ CREATE PUBLICATION <replaceable class="parameter">name</replaceable>
            in the publisher.
           </para>
          </note>
+
+         <para>
+          See <xref linkend="logical-replication-gencols"/> for more details about
+          logical replication of generated columns.
+         </para>
         </listitem>
        </varlistentry>
 
-- 
1.8.3.1

