From 8f6e912979de0b6158c5122aea8169a33c35ba99 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Tue, 26 May 2020 13:48:17 +0900
Subject: [PATCH v13 3/3] Documentation update.

---
 doc/src/sgml/config.sgml              |  47 +++++
 doc/src/sgml/database-encryption.sgml | 285 ++++++++++++++++++++++++++
 doc/src/sgml/filelist.sgml            |   1 +
 doc/src/sgml/func.sgml                |  99 +++++++++
 doc/src/sgml/installation.sgml        |   5 +-
 doc/src/sgml/postgres.sgml            |   1 +
 doc/src/sgml/ref/initdb.sgml          |  19 ++
 doc/src/sgml/ref/pgupgrade.sgml       |   7 +
 doc/src/sgml/storage.sgml             |   5 +
 9 files changed, 467 insertions(+), 2 deletions(-)
 create mode 100644 doc/src/sgml/database-encryption.sgml

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 427947cf49..3513971ad2 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -7674,6 +7674,39 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
     </variablelist>
    </sect1>
 
+   <sect1 id="runtime-config-encryption">
+    <title>Encryption Key Management</title>
+
+    <variablelist>
+     <varlistentry id="guc-cluster-passphrase-command" xreflabel="cluster_passphrase_command">
+      <term><varname>cluster_passphrase_command</varname> (<type>string</type>)
+      <indexterm>
+       <primary><varname>cluster_passphrase_command</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        This option specifies an external command to be invoked when a passphrase
+        for key management system needs to be obtained.
+       </para>
+       <para>
+        The command must print the passphrase to the standard output and exit
+        with code 0.  In the parameter value, <literal>%p</literal> is
+        replaced by a prompt string.  (Write <literal>%%</literal> for a
+        literal <literal>%</literal>.)  Note that the prompt string will
+        probably contain whitespace, so be sure to quote adequately.  A single
+        newline is stripped from the end of the output if present.  The passphrase
+        must be at least 64 bytes.
+       </para>
+       <para>
+        This parameter can only be set in the <filename>postgresql.conf</filename>
+        file or on the server command line.
+       </para>
+      </listitem>
+     </varlistentry>
+    </variablelist>
+   </sect1>
+
    <sect1 id="runtime-config-client">
     <title>Client Connection Defaults</title>
 
@@ -9518,6 +9551,20 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir'
       </listitem>
      </varlistentry>
 
+      <varlistentry id="guc-key-management-enabled" xreflabel="key_management_enabled">
+      <term><varname>key_management_enabled</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary>Key management configuration parameter parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Reports whether encryption key management is enabled for this cluster.
+        See <xref linkend="app-initdb-cluster-passphrase-command"/> for more information.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-data-directory-mode" xreflabel="data_directory_mode">
       <term><varname>data_directory_mode</varname> (<type>integer</type>)
       <indexterm>
diff --git a/doc/src/sgml/database-encryption.sgml b/doc/src/sgml/database-encryption.sgml
new file mode 100644
index 0000000000..608d70abf4
--- /dev/null
+++ b/doc/src/sgml/database-encryption.sgml
@@ -0,0 +1,285 @@
+<!-- doc/src/sgml/database-encryption.sgml -->
+
+<chapter id="database-encryption">
+ <title>Database Encryption</title>
+
+ <indexterm zone="database-encryption">
+  <primary>Server Side Encryption</primary>
+ </indexterm>
+
+ <para>
+  The purpose of database encryption is to protect the confidential data
+  stored in a database from being revealed.
+ </para>
+
+ <sect1 id="encryption-key-management">
+  <title>Encryption Key Management</title>
+
+  <para>
+   <productname>PostgreSQL</productname> supports internal
+   <firstterm>Encryption Key Management System</firstterm>, which is designed
+   to manage the life cycles of cryptographic keys within the
+   <productname>PostgreSQL</productname>.  This includes dealing with their
+   generation, storage, usage and rotation.
+  </para>
+
+  <para>
+   Encryption key management system is enabled when
+   <productname>PostgreSQL</productname> is built with
+   <literal>--with-openssl</literal> and
+   <xref linkend="app-initdb-cluster-passphrase-command"/> is specified during
+   <command>initdb</command>.  The cluster passphrase provided by
+   <option>--cluster-passphrase-command</option> option during
+   <command>initdb</command> and the one generated by
+   <xref linkend="guc-cluster-passphrase-command"/> in the
+   <filename>postgresql.conf</filename> must match, otherwise, the database
+   cluster will not start up. Please note that the cluster passphrase command
+   passed to <command>initdb</command> must return a passphrase equal or longer
+   than 64 bytes and less than 1024 bytes. For example.
+<programlisting>
+initdb -D dbname --cluster-passphrase-command="cat /path/to/passphrase-file"
+</programlisting>
+  </para>
+
+  <para>
+   Encryption keys managed by <productname>PostgreSQL</productname>'s
+   key management system are only used for the internal usage such as
+   transparent data encryption in a future release.  These encryption keys never
+   be taken out of database in the plaintext form.  Also, encryption key
+   management system controls pre-defined encryption keys.  There is no
+   interface to add and remove managed encryption keys.
+  </para>
+
+  <sect2 id="key-encryption-key">
+   <title>Key Encryption Key(<acronym>KEK</acronym>)</title>
+
+   <para>
+    During <command>initdb</command> process, the cluster passphrase provided
+    by <option>--cluster-passphrase-command</option> is derived into
+    <firstterm>Key Encryption Key (<acronym>KEK</acronym>)</firstterm>.
+    <acronym>KEK</acronym> encapsulates cryptographic keys managed inside
+    <productname>PostgreSQL</productname> described in
+    <xref linkend="key-derivations"/> using by a way of authenticated
+    encryption described in <xref linkend="key-wrapping"/> before storing
+    the keys to a persistent storage. <acronym>KEK</acronym> must be stored
+    in a trusted key store, such as key vault software or services, or a
+    hardware security module.
+   </para>
+
+   <para>
+    When a <productname>PostgreSQL</productname> server with encryption key
+    management enabled is started, the <varname>cluster_passphrase_command</varname>
+    parameter in <filename>postgresql.conf</filename> will be evaluated and
+    the cluster passphrase will be derived into <acronym>KEK</acronym> in similar
+    ways as initdb.
+   </para>
+
+   <para>
+    After that, the cryptographic keys will be retrieved from
+    <filename>pg_cryptokeys</filename> directory to be restored and
+    integrity-checked by the key management system using <acronym>KEK</acronym>.
+    If this process fails, it is likely that the cluster passphrase supplied
+    to the cluster is not the same as that supplied to the
+    <command>initdb</command> process.  The cluster will refuse to start in this
+    case and user has to manually correct the cluster passphrase.
+   </para>
+
+   <para>
+    <acronym>KEK</acronym> is not stored physically within the
+    <productname>PostgreSQL</productname> server as they are designed
+    to be derived from the correctly configured cluster passphrase.
+   </para>
+  </sect2>
+
+  <sect2 id="key-derivations">
+   <title>Key Derivations</title>
+
+   <para>
+    Encryption key management system can manage multiple cryptographic keys
+    that have different purposes and usages within <productname>PostgreSQL</productname>.
+    Currently, encryption key management system manages no cryptographic key.
+   </para>
+  </sect2>
+
+  <sect2 id="key-wrapping">
+   <title>Key Protection</title>
+
+   <para>
+    Key management system persists cryptographic keys to the disk after wrapping
+    them by <acronym>KEK</acronym>.  This section describes how key maangement
+    system wrap and unwrap key.
+   </para>
+
+   <para>
+    Key management system uses Encryption with Associated Data
+    (<acronym>AEAD</acronym>) to wrap cryptographic keys, which is a form of
+    encryption. In addition to provide a way to protect confidential data from
+    being revealed, it provides a way to check its integrity and authenticity
+    of some associated data.  It follows Encrypt-Then-MAC approach, basing on
+    the composition of the Advanced Encryption Standard (<acronym>AES</acronym>)
+    in the Cipher Block Chaining (<acronym>CBC</acronym>) mode of operation
+    for encryption with random initialization vector(<acronym>IV</acronym>) and
+    the <literal>HMAC-SHA</literal> message authentication code
+    (<acronym>MAC</acronym>).
+   </para>
+
+   <para>
+    Key management system uses two kinds of cryptographic keys for key wrapping:
+   </para>
+
+   <para>
+    <variablelist>
+     <varlistentry>
+      <term><literal>Encryption Key</literal></term>
+      <listitem>
+       <para>
+        Encryption key is 256 bits long randomly generate key.  It is primarily used
+        to a key for encapsulate or restore data with <acronym>AES256</acronym>
+        algorithm.
+       </para>
+      </listitem>
+     </varlistentry>
+     <varlistentry>
+      <term><literal>MAC Key</literal></term>
+      <listitem>
+       <para>
+        <acronym>MAC</acronym> key is 512 bits long randomly generated key.
+        <acronym>SHA512</acronym> is the algorithm used along with the
+        <acronym>MAC</acronym> key to compute a cryptographic hash for integrity
+        check purposes.
+       </para>
+      </listitem>
+     </varlistentry>
+    </variablelist>
+   </para>
+
+   <para>
+    Key management systems's key wrapping algorithm is as follows:
+
+    <orderedlist>
+     <listitem>
+      <simpara>Generate random <acronym>IV</acronym>.</simpara>
+     </listitem>
+     <listitem>
+      <simpara>Add padding to the plaintext following PKCS#7 described in
+      <ulink url="https://tools.ietf.org/html/rfc2315">RFC2315</ulink>.</simpara>
+     </listitem>
+     <listitem>
+      <simpara>Encrypt padded plain text with the <acronym>IV</acronym>
+       using <acronym>AES256</acronym> in <acronym>CBC</acronym>
+       mode.</simpara>
+     </listitem>
+     <listitem>
+      <simpara>Compute <acronym>HMAC</acronym> over the encrypted data.</simpara>
+     </listitem>
+     <listitem>
+      <simpara>Concatenate <acronym>HMAC</acronym>, <acronym>IV</acronym>
+       and encrypted ciphertext as the result of ciphertext.</simpara>
+     </listitem>
+    </orderedlist>
+   </para>
+
+   <para>
+    The length of the result ciphertext can be inferred from that of the plaintext
+    by following formula:
+<programlisting>
+ Ciphertext Length = 64 + 16 + 16 * (floor(input_size / 16) + 1)
+</programlisting>
+   </para>
+  </sect2>
+
+  <sect2 id="key-management-rotation">
+   <title>Key Rotation Process</title>
+
+   <para>
+    Encryption keys in general are not interminable, the longer the same key
+    is in use, the chance  of it being breached increases. Performing key
+    rotation on a regular basis help meet standardized security practices
+    such as <ulink url="https://www.pcisecuritystandards.org/">PCI-DSS</ulink>
+    and it is a good practice in security to limit the number of encrypted
+    bytes available for a specific key version. The key lifetimes are based
+    on key length, key strength, algorithm and total number of bytes
+    enciphered. The key management system provides a efficient method to
+    perform key rotation.
+   </para>
+
+   <para>
+    Please be aware that the phrase <literal>"key rotation"</literal> here
+    only refers to the rotation of <acronym>KEK</acronym>. The cryptographic
+    keys managed by encryption key management system are not rotated; they
+    will in fact be the same before and after a <literal>"key rotation"</literal>.
+    This can be justified because the actual keys are never stored anywhere
+    physically, presented to user or captured in logging. What is being
+    rotated here is the <acronym>KEK</acronym> who is responsible for
+    encapsulating and restoring cryptographic keys.
+   </para>
+
+   <para>
+    Since <acronym>KEK</acronym> is derived from a cluster passphrase, the
+    <literal>"key rotation"</literal> ultimately refers to the rotation of
+    cluster passphrase and deriving a new <acronym>KEK</acronym> from the
+    new cluster passphrase. The new <acronym>KEK</acronym> can then be used
+    to encapsulate all encryptions keys and store the new results in
+    <filename>pg_cryptokeys</filename> directory.
+   </para>
+
+   <para>
+    To complete the cluster passphrase rotation, user needs to follow the
+    steps below:
+   </para>
+   <itemizedlist>
+    <listitem>
+     <para>
+      Ensure the <productname>PostgreSQL</productname> server is running
+      correctly with KMS enabled.  Passphrase rotation cannot be completed
+      with the server shut down.
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      Update <xref linkend="guc-cluster-passphrase-command"/> parameter and
+      load such that the new command will return a new cluster passphrase.
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      In a session, execute <function>pg_rotate_cluster_passphrase()</function>
+      SQL function to initiate the rotation. The function returns true upon
+      successful key rotation and false if otherwise.
+<programlisting>
+=# SELECT pg_rotate_cluster_passphrase();
+pg_rotate_cluster_passphrase
+------------------------------
+ t
+(1 row)
+</programlisting>
+     </para>
+    </listitem>
+   </itemizedlist>
+
+   <para>
+    Upon successful cluster passphrase rotation, all managed cryptographic keys
+    will be re-encapsulated by the new <acronym>KEK</acronym> derived from the
+    new cluster passphrase. The new encapsulated  keys will be stored in
+    <filename>pg_cryptokeys</filename> directory.  Please note that the cryptographic
+    keys are the same as before; the rotation process only changes the
+    <acronym>KEK</acronym> that is used to encapsulate and verify the actual
+    cryptographic keys. This way, there is no need to decrypt all the encrypted
+    data with the old keys and re-encrypt them with the new.
+   </para>
+
+   <para>
+    In case of a crash during the cluster passphrase rotation process, the key
+    management system is able to recover to the previous sets of cryptographic
+    keys the next time server starts up. This is possible because the key rotation
+    and encapsulation process are done on a separate temporary key directory called
+    <filename>pg_cryptokeys_tmp</filename> and it will replace
+    <filename>pg_cryptokeys</filename> and be deleted only when everything is
+    successfully finished. If the server starts with pg_cryptokeys_tmp folder present,
+    it would indicate that previous attempt of cluster passphrase rotation was not
+    completed. In this case, the server will discard <filename>pg_cryptokeys_tmp</filename>
+    folder and load the keys in <filename>pg_cryptokeys</filename> as usual.
+   </para>
+  </sect2>
+ </sect1>
+</chapter>
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index 64b5da0070..8cf16ec1c9 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -48,6 +48,7 @@
 <!ENTITY wal           SYSTEM "wal.sgml">
 <!ENTITY logical-replication    SYSTEM "logical-replication.sgml">
 <!ENTITY jit    SYSTEM "jit.sgml">
+<!ENTITY database-encryption SYSTEM "database-encryption.sgml">
 
 <!-- programmer's guide -->
 <!ENTITY bgworker   SYSTEM "bgworker.sgml">
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index f766c1bc67..1790db49cb 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -26729,4 +26729,103 @@ SELECT m.* FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid),
 
   </sect1>
 
+  <sect1 id="functions-encryption">
+   <title>Encryption Functions</title>
+
+   <sect2 id="functions-data-encryption">
+    <title>Data Encryption Function</title>
+    <para>
+     The functions shown in
+     <xref linkend="functions-encryption-table"/> are for encrypting
+     and decrypting data with the <literal>SQL key</literal> described in
+     <xref linkend="encryption-key-management"/>.
+    </para>
+
+    <table id="functions-encryption-table">
+     <title>Encryption <acronym>SQL</acronym> Functions</title>
+     <tgroup cols="3">
+      <thead>
+       <row>
+        <entry>Function</entry>
+        <entry>Return Type</entry>
+        <entry>Description</entry>
+       </row>
+      </thead>
+      <tbody>
+
+       <row>
+        <entry>
+         <indexterm>
+          <primary>pg_encrypt</primary>
+         </indexterm>
+         <literal><function>pg_encrypt(<parameter>data</parameter> <type>text</type>)</function></literal>
+        </entry>
+        <entry>
+         <type>bytea</type>
+        </entry>
+        <entry>
+         Encrypt the given data with the internal SQL key
+        </entry>
+       </row>
+
+       <row>
+        <entry>
+         <indexterm>
+          <primary>pg_unwrap</primary>
+         </indexterm>
+         <literal><function>pg_decrypt(<parameter>data</parameter> <type>bytea</type>)</function></literal>
+        </entry>
+        <entry>
+         <type>text</type>
+        </entry>
+        <entry>
+         Decrypt the given data with the internal SQL key
+        </entry>
+       </row>
+      </tbody>
+     </tgroup>
+    </table>
+   </sect2>
+
+   <sect2 id="functions-key-management">
+    <title>Key Management Functions</title>
+    <para>
+     The function shown in
+     <xref linkend="functions-key-management-table"/> are for encryption
+     key management described in <xref linkend="encryption-key-management"/>.
+    </para>
+
+    <table id="functions-key-management-table">
+     <title>Encryption Key Management <acronym>SQL</acronym> Functions</title>
+     <tgroup cols="3">
+      <thead>
+       <row>
+        <entry>Function</entry>
+        <entry>Return Type</entry>
+        <entry>Description</entry>
+       </row>
+      </thead>
+      <tbody>
+       <row>
+        <entry>
+         <indexterm>
+          <primary>pg_rotate_cluster_passphrase</primary>
+         </indexterm>
+         <literal><function>pg_rotate_cluster_passphrase()</function></literal>
+        </entry>
+        <entry>
+         <type>boolean</type>
+        </entry>
+        <entry>
+         Rotate the cluster passphrase. See
+         <xref linkend="key-management-rotation"/> for details.
+        </entry>
+       </row>
+
+      </tbody>
+     </tgroup>
+    </table>
+   </sect2>
+  </sect1>
+
 </chapter>
diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml
index 552303e211..9e6823570c 100644
--- a/doc/src/sgml/installation.sgml
+++ b/doc/src/sgml/installation.sgml
@@ -979,8 +979,9 @@ build-postgresql:
        <listitem>
         <para>
          Build with support for <acronym>SSL</acronym> (encrypted)
-         connections. This requires the <productname>OpenSSL</productname>
-         package to be installed.  <filename>configure</filename> will check
+         connections and key management. This requires the
+         <productname>OpenSSL</productname> package to be installed.
+         <filename>configure</filename> will check
          for the required header files and libraries to make sure that
          your <productname>OpenSSL</productname> installation is sufficient
          before proceeding.
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index c41ce9499b..a96196c85d 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -170,6 +170,7 @@ break is not needed in a wider output rendering.
   &wal;
   &logical-replication;
   &jit;
+  &database-encryption;
   &regress;
 
  </part>
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index b6a55ce105..e131989d59 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -163,6 +163,25 @@ PostgreSQL documentation
       </listitem>
      </varlistentry>
 
+     <varlistentry id="app-initdb-cluster-passphrase-command" xreflabel="cluster passphrase command">
+      <term><option>--cluster-passphrase-command=<replaceable class="parameter">command</replaceable></option></term>
+      <listitem>
+       <para>
+        This option specifies an external command to be invoked when a passphrase
+        for key management system needs to be obtained.
+       </para>
+       <para>
+        The command must print the passphrase to the standard output and exit
+        with code 0.  In the parameter value, <literal>%p</literal> is
+        replaced by a prompt string.  (Write <literal>%%</literal> for a
+        literal <literal>%</literal>.)  Note that the prompt string will
+        probably contain whitespace, so be sure to quote adequately.  A single
+        newline is stripped from the end of the output if present.  The passphrase
+        must be at least 64 bytes.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-D <replaceable class="parameter">directory</replaceable></option></term>
       <term><option>--pgdata=<replaceable class="parameter">directory</replaceable></option></term>
diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml
index 6779a5bddc..9140f5dadb 100644
--- a/doc/src/sgml/ref/pgupgrade.sgml
+++ b/doc/src/sgml/ref/pgupgrade.sgml
@@ -821,6 +821,13 @@ psql --username=postgres --file=script.sql postgres
    is down.
   </para>
 
+  <para>
+   During the upgrade <command>pg_upgrade</command> copies the all internal keys
+   to the new cluster. If you want to upgrade from the old cluster that enables
+   the key management to the new cluster that also enables, you must use the same
+   <varname>cluster_passphrase_command</varname> to both clusters. Otherwise
+   <command>pg_upgrade</command> fails due to mismatching the cluster passphrase.
+  </para>
  </refsect1>
 
  <refsect1>
diff --git a/doc/src/sgml/storage.sgml b/doc/src/sgml/storage.sgml
index 3234adb639..05e9b262a4 100644
--- a/doc/src/sgml/storage.sgml
+++ b/doc/src/sgml/storage.sgml
@@ -77,6 +77,11 @@ Item
  <entry>Subdirectory containing transaction commit timestamp data</entry>
 </row>
 
+<row>
+ <entry><filename>pg_cryptokeys</filename></entry>
+ <entry>Subdirectory containing cryptographic keys</entry>
+</row>
+
 <row>
  <entry><filename>pg_dynshmem</filename></entry>
  <entry>Subdirectory containing files used by the dynamic shared memory
-- 
2.23.0

