 config/c-compiler.m4                     |  34 +++
 configure                                | 147 ++++++++++-
 configure.ac                             |  62 ++++-
 src/Makefile.global.in                   |   3 +
 src/common/wchar.c                       | 116 +++++++--
 src/include/pg_config.h.in               |   9 +
 src/include/port/pg_utf8.h               |  72 ++++++
 src/port/Makefile                        |   6 +
 src/port/pg_utf8_fallback.c              | 132 ++++++++++
 src/port/pg_utf8_sse42.c                 | 424 +++++++++++++++++++++++++++++++
 src/port/pg_utf8_sse42_choose.c          |  69 +++++
 src/test/regress/expected/conversion.out |  52 ++++
 src/test/regress/sql/conversion.sql      |  28 ++
 src/tools/msvc/Solution.pm               |   3 +
 14 files changed, 1131 insertions(+), 26 deletions(-)

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 780e906ecc..a346d8429a 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -591,6 +591,40 @@ if test x"$pgac_cv_gcc_atomic_int64_cas" = x"yes"; then
   AC_DEFINE(HAVE_GCC__ATOMIC_INT64_CAS, 1, [Define to 1 if you have __atomic_compare_exchange_n(int64 *, int64 *, int64).])
 fi])# PGAC_HAVE_GCC__ATOMIC_INT64_CAS
 
+# PGAC_SSE42_UTF8_INTRINSICS
+# ---------------------------
+# XXX this was copy-pasted from the equivalent CRC checks -- there may be bugs.
+#
+# Check if the compiler supports x86 instructions added in SSSE3 and SSE 4.1,
+# in particular _mm_alignr_epi8, _mm_shuffle_epi8, and _mm_testz_si128.
+# We don't test for SSE2 intrinsics, but they are assumed to be present if
+# on x86-64 platforms.
+#
+# An optional compiler flag can be passed as argument (e.g. -msse4.2). If the
+# intrinsics are supported, sets pgac_sse41_intrinsics, and CFLAGS_SSE42.
+#
+# Note: We could create a new CFLAGS macro for SSE4.1, but it doesn't seem worth it.
+AC_DEFUN([PGAC_SSE42_UTF8_INTRINSICS],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_sse42_utf8_intrinsics_$1])])dnl
+AC_CACHE_CHECK([for _mm_alignr_epi8, _mm_shuffle_epi8, and _mm_testz_si128 with CFLAGS=$1], [Ac_cachevar],
+[pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS $1"
+AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <nmmintrin.h>],
+  [ __m128i zero = _mm_setzero_si128();
+    return _mm_testz_si128(zero,
+      _mm_shuffle_epi8(zero,
+      _mm_alignr_epi8(zero, zero, 1)));])],
+  [Ac_cachevar=yes],
+  [Ac_cachevar=no])
+CFLAGS="$pgac_save_CFLAGS"])
+if test x"$Ac_cachevar" = x"yes"; then
+  CFLAGS_SSE42="$1"
+  pgac_sse42_utf8_intrinsics=yes
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_SSE42_UTF8_INTRINSICS
+
+
 # PGAC_SSE42_CRC32_INTRINSICS
 # ---------------------------
 # Check if the compiler supports the x86 CRC instructions added in SSE 4.2,
diff --git a/configure b/configure
index ce9ea36999..fd7e1c5e0f 100755
--- a/configure
+++ b/configure
@@ -647,6 +647,7 @@ MSGFMT_FLAGS
 MSGFMT
 PG_CRC32C_OBJS
 CFLAGS_ARMV8_CRC32C
+PG_UTF8_OBJS
 CFLAGS_SSE42
 have_win32_dbghelp
 LIBOBJS
@@ -17670,6 +17671,93 @@ $as_echo "#define HAVE__CPUID 1" >>confdefs.h
 
 fi
 
+# Check for Intel SSSE3 and SSE 4.1 intrinsics for UTF-8 validation.
+# Note: we reuse the flag, runtime check, and naming scheme used for SSE4.2.
+#
+# First check if the _mm_alignr_epi8, _mm_shuffle_epi8, and _mm_testz_si128
+# intrinsics can be used
+# with the default compiler flags. If not, check if adding the -msse4.2
+# flag helps. CFLAGS_SSE42 is set to -msse4.2 if that's required.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_alignr_epi8, _mm_shuffle_epi8, and _mm_testz_si128 with CFLAGS=" >&5
+$as_echo_n "checking for _mm_alignr_epi8, _mm_shuffle_epi8, and _mm_testz_si128 with CFLAGS=... " >&6; }
+if ${pgac_cv_sse42_utf8_intrinsics_+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS "
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <nmmintrin.h>
+int
+main ()
+{
+ __m128i zero = _mm_setzero_si128();
+    return _mm_testz_si128(zero,
+      _mm_shuffle_epi8(zero,
+      _mm_alignr_epi8(zero, zero, 1)));
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv_sse42_utf8_intrinsics_=yes
+else
+  pgac_cv_sse42_utf8_intrinsics_=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+CFLAGS="$pgac_save_CFLAGS"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_utf8_intrinsics_" >&5
+$as_echo "$pgac_cv_sse42_utf8_intrinsics_" >&6; }
+if test x"$pgac_cv_sse42_utf8_intrinsics_" = x"yes"; then
+  CFLAGS_SSE42=""
+  pgac_sse42_utf8_intrinsics=yes
+fi
+
+if test x"$pgac_sse42_utf8_intrinsics" != x"yes"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm_alignr_epi8, _mm_shuffle_epi8, and _mm_testz_si128 with CFLAGS=-msse4.2" >&5
+$as_echo_n "checking for _mm_alignr_epi8, _mm_shuffle_epi8, and _mm_testz_si128 with CFLAGS=-msse4.2... " >&6; }
+if ${pgac_cv_sse42_utf8_intrinsics__msse4_2+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS -msse4.2"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <nmmintrin.h>
+int
+main ()
+{
+ __m128i zero = _mm_setzero_si128();
+    return _mm_testz_si128(zero,
+      _mm_shuffle_epi8(zero,
+      _mm_alignr_epi8(zero, zero, 1)));
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv_sse42_utf8_intrinsics__msse4_2=yes
+else
+  pgac_cv_sse42_utf8_intrinsics__msse4_2=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+CFLAGS="$pgac_save_CFLAGS"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_sse42_utf8_intrinsics__msse4_2" >&5
+$as_echo "$pgac_cv_sse42_utf8_intrinsics__msse4_2" >&6; }
+if test x"$pgac_cv_sse42_utf8_intrinsics__msse4_2" = x"yes"; then
+  CFLAGS_SSE42="-msse4.2"
+  pgac_sse42_utf8_intrinsics=yes
+fi
+
+fi
+
+
 # Check for Intel SSE 4.2 intrinsics to do CRC calculations.
 #
 # First check if the _mm_crc32_u8 and _mm_crc32_u64 intrinsics can be used
@@ -17777,6 +17865,63 @@ if ac_fn_c_try_compile "$LINENO"; then :
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 
+# Select UTF-8 validator implementation.
+# XXX this was copy-pasted from the equivalent CRC checks -- there may be bugs.
+#
+# If we are targeting a processor that has SSE 4.2 instructions, we can use
+# those to validate UTF-8 characters. If we're not targeting such
+# a processor, but we can nevertheless produce code that uses the SSE
+# intrinsics, perhaps with some extra CFLAGS, compile both implementations and
+# select which one to use at runtime, depending on whether SSE 4.2 is supported
+# by the processor we're running on.
+#
+# You can override this logic by setting the appropriate USE_*_UTF8 flag to 1
+# in the template or configure command line.
+if test x"$USE_SSE42_UTF8" = x"" && test x"$USE_SSE42_UTF8_WITH_RUNTIME_CHECK" = x"" && test x"$USE_FALLBACK_UTF8" = x""; then
+  if test x"$pgac_sse42_utf8_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then
+    USE_SSE42_UTF8=1
+  else
+    # the CPUID instruction is needed for the runtime check.
+    if test x"$pgac_sse42_utf8_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then
+      USE_SSE42_UTF8_WITH_RUNTIME_CHECK=1
+    else
+      # fall back to algorithm which doesn't require any special
+      # CPU support.
+      USE_FALLBACK_UTF8=1
+    fi
+  fi
+fi
+
+# Set PG_UTF8_OBJS appropriately depending on the selected implementation.
+# XXX this was copy-pasted from the equivalent CRC checks -- there may be bugs.
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking which UTF-8 validator to use" >&5
+$as_echo_n "checking which UTF-8 validator to use... " >&6; }
+if test x"$USE_SSE42_UTF8" = x"1"; then
+
+$as_echo "#define USE_SSE42_UTF8 1" >>confdefs.h
+
+  PG_UTF8_OBJS="pg_utf8_sse42.o"
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2" >&5
+$as_echo "SSE 4.2" >&6; }
+else
+  if test x"$USE_SSE42_UTF8_WITH_RUNTIME_CHECK" = x"1"; then
+
+$as_echo "#define USE_SSE42_UTF8_WITH_RUNTIME_CHECK 1" >>confdefs.h
+
+    PG_UTF8_OBJS="pg_utf8_sse42.o pg_utf8_fallback.o pg_utf8_sse42_choose.o"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: SSE 4.2 with runtime check" >&5
+$as_echo "SSE 4.2 with runtime check" >&6; }
+  else
+
+$as_echo "#define USE_FALLBACK_UTF8 1" >>confdefs.h
+
+    PG_UTF8_OBJS="pg_utf8_fallback.o"
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: slicing-by-8" >&5
+$as_echo "slicing-by-8" >&6; }
+  fi
+fi
+
+
 # Check for ARMv8 CRC Extension intrinsics to do CRC calculations.
 #
 # First check if __crc32c* intrinsics can be used with the default compiler
@@ -17903,7 +18048,7 @@ if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" &&
           # fall back to slicing-by-8 algorithm, which doesn't require any
           # special CPU support.
           USE_SLICING_BY_8_CRC32C=1
-	fi
+        fi
       fi
     fi
   fi
diff --git a/configure.ac b/configure.ac
index 07da84d401..d18965cde5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2017,6 +2017,19 @@ if test x"$pgac_cv__cpuid" = x"yes"; then
   AC_DEFINE(HAVE__CPUID, 1, [Define to 1 if you have __cpuid.])
 fi
 
+# Check for Intel SSSE3 and SSE 4.1 intrinsics for UTF-8 validation.
+# Note: we reuse the flag, runtime check, and naming scheme used for SSE4.2.
+#
+# First check if the _mm_alignr_epi8, _mm_shuffle_epi8, and _mm_testz_si128
+# intrinsics can be used
+# with the default compiler flags. If not, check if adding the -msse4.2
+# flag helps. CFLAGS_SSE42 is set to -msse4.2 if that's required.
+PGAC_SSE42_UTF8_INTRINSICS([])
+if test x"$pgac_sse42_utf8_intrinsics" != x"yes"; then
+  PGAC_SSE42_UTF8_INTRINSICS([-msse4.2])
+fi
+AC_SUBST(CFLAGS_SSE42)
+
 # Check for Intel SSE 4.2 intrinsics to do CRC calculations.
 #
 # First check if the _mm_crc32_u8 and _mm_crc32_u64 intrinsics can be used
@@ -2036,6 +2049,53 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [
 #endif
 ])], [SSE4_2_TARGETED=1])
 
+# Select UTF-8 validator implementation.
+# XXX this was copy-pasted from the equivalent CRC checks -- there may be bugs.
+#
+# If we are targeting a processor that has SSE 4.2 instructions, we can use
+# those to validate UTF-8 characters. If we're not targeting such
+# a processor, but we can nevertheless produce code that uses the SSE
+# intrinsics, perhaps with some extra CFLAGS, compile both implementations and
+# select which one to use at runtime, depending on whether SSE 4.2 is supported
+# by the processor we're running on.
+#
+# You can override this logic by setting the appropriate USE_*_UTF8 flag to 1
+# in the template or configure command line.
+if test x"$USE_SSE42_UTF8" = x"" && test x"$USE_SSE42_UTF8_WITH_RUNTIME_CHECK" = x"" && test x"$USE_FALLBACK_UTF8" = x""; then
+  if test x"$pgac_sse42_utf8_intrinsics" = x"yes" && test x"$SSE4_2_TARGETED" = x"1" ; then
+    USE_SSE42_UTF8=1
+  else
+    # the CPUID instruction is needed for the runtime check.
+    if test x"$pgac_sse42_utf8_intrinsics" = x"yes" && (test x"$pgac_cv__get_cpuid" = x"yes" || test x"$pgac_cv__cpuid" = x"yes"); then
+      USE_SSE42_UTF8_WITH_RUNTIME_CHECK=1
+    else
+      # fall back to algorithm which doesn't require any special
+      # CPU support.
+      USE_FALLBACK_UTF8=1
+    fi
+  fi
+fi
+
+# Set PG_UTF8_OBJS appropriately depending on the selected implementation.
+# XXX this was copy-pasted from the equivalent CRC checks -- there may be bugs.
+AC_MSG_CHECKING([which UTF-8 validator to use])
+if test x"$USE_SSE42_UTF8" = x"1"; then
+  AC_DEFINE(USE_SSE42_UTF8, 1, [Define to 1 use Intel SSE 4.2 instructions.])
+  PG_UTF8_OBJS="pg_utf8_sse42.o"
+  AC_MSG_RESULT(SSE 4.2)
+else
+  if test x"$USE_SSE42_UTF8_WITH_RUNTIME_CHECK" = x"1"; then
+    AC_DEFINE(USE_SSE42_UTF8_WITH_RUNTIME_CHECK, 1, [Define to 1 to use Intel SSE 4.2 instructions with a runtime check.])
+    PG_UTF8_OBJS="pg_utf8_sse42.o pg_utf8_fallback.o pg_utf8_sse42_choose.o"
+    AC_MSG_RESULT(SSE 4.2 with runtime check)
+  else
+    AC_DEFINE(USE_FALLBACK_UTF8, 1, [Define to 1 to use Intel SSE 4.2 instructions with a runtime check.])
+    PG_UTF8_OBJS="pg_utf8_fallback.o"
+    AC_MSG_RESULT(slicing-by-8)
+  fi
+fi
+AC_SUBST(PG_UTF8_OBJS)
+
 # Check for ARMv8 CRC Extension intrinsics to do CRC calculations.
 #
 # First check if __crc32c* intrinsics can be used with the default compiler
@@ -2084,7 +2144,7 @@ if test x"$USE_SLICING_BY_8_CRC32C" = x"" && test x"$USE_SSE42_CRC32C" = x"" &&
           # fall back to slicing-by-8 algorithm, which doesn't require any
           # special CPU support.
           USE_SLICING_BY_8_CRC32C=1
-	fi
+        fi
       fi
     fi
   fi
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 74b3a6acd2..1d51ebe9c6 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -721,6 +721,9 @@ LIBOBJS = @LIBOBJS@
 # files needed for the chosen CRC-32C implementation
 PG_CRC32C_OBJS = @PG_CRC32C_OBJS@
 
+# files needed for the chosen UTF-8 validation implementation
+PG_UTF8_OBJS = @PG_UTF8_OBJS@
+
 LIBS := -lpgcommon -lpgport $(LIBS)
 
 # to make ws2_32.lib the last library
diff --git a/src/common/wchar.c b/src/common/wchar.c
index 6e7d731e02..742957e67e 100644
--- a/src/common/wchar.c
+++ b/src/common/wchar.c
@@ -13,6 +13,7 @@
 #include "c.h"
 
 #include "mb/pg_wchar.h"
+#include "port/pg_utf8.h"
 
 
 /*
@@ -1189,6 +1190,15 @@ pg_eucjp_verifystr(const unsigned char *s, int len)
 		int			l;
 
 		/* fast path for ASCII-subset characters */
+		l = check_ascii(s, len);
+		if (l)
+		{
+			s += l;
+			len -= l;
+			continue;
+		}
+
+		/* Found non-ASCII or zero above, so verify a single character. */
 		if (!IS_HIGHBIT_SET(*s))
 		{
 			if (*s == '\0')
@@ -1247,6 +1257,15 @@ pg_euckr_verifystr(const unsigned char *s, int len)
 		int			l;
 
 		/* fast path for ASCII-subset characters */
+		l = check_ascii(s, len);
+		if (l)
+		{
+			s += l;
+			len -= l;
+			continue;
+		}
+
+		/* Found non-ASCII or zero above, so verify a single character. */
 		if (!IS_HIGHBIT_SET(*s))
 		{
 			if (*s == '\0')
@@ -1330,6 +1349,15 @@ pg_euctw_verifystr(const unsigned char *s, int len)
 		int			l;
 
 		/* fast path for ASCII-subset characters */
+		l = check_ascii(s, len);
+		if (l)
+		{
+			s += l;
+			len -= l;
+			continue;
+		}
+
+		/* Found non-ASCII or zero above, so verify a single character. */
 		if (!IS_HIGHBIT_SET(*s))
 		{
 			if (*s == '\0')
@@ -1383,6 +1411,15 @@ pg_johab_verifystr(const unsigned char *s, int len)
 		int			l;
 
 		/* fast path for ASCII-subset characters */
+		l = check_ascii(s, len);
+		if (l)
+		{
+			s += l;
+			len -= l;
+			continue;
+		}
+
+		/* Found non-ASCII or zero above, so verify a single character. */
 		if (!IS_HIGHBIT_SET(*s))
 		{
 			if (*s == '\0')
@@ -1433,6 +1470,15 @@ pg_mule_verifystr(const unsigned char *s, int len)
 		int			l;
 
 		/* fast path for ASCII-subset characters */
+		l = check_ascii(s, len);
+		if (l)
+		{
+			s += l;
+			len -= l;
+			continue;
+		}
+
+		/* Found non-ASCII or zero above, so verify a single character. */
 		if (!IS_HIGHBIT_SET(*s))
 		{
 			if (*s == '\0')
@@ -1502,6 +1548,15 @@ pg_sjis_verifystr(const unsigned char *s, int len)
 		int			l;
 
 		/* fast path for ASCII-subset characters */
+		l = check_ascii(s, len);
+		if (l)
+		{
+			s += l;
+			len -= l;
+			continue;
+		}
+
+		/* Found non-ASCII or zero above, so verify a single character. */
 		if (!IS_HIGHBIT_SET(*s))
 		{
 			if (*s == '\0')
@@ -1551,6 +1606,15 @@ pg_big5_verifystr(const unsigned char *s, int len)
 		int			l;
 
 		/* fast path for ASCII-subset characters */
+		l = check_ascii(s, len);
+		if (l)
+		{
+			s += l;
+			len -= l;
+			continue;
+		}
+
+		/* Found non-ASCII or zero above, so verify a single character. */
 		if (!IS_HIGHBIT_SET(*s))
 		{
 			if (*s == '\0')
@@ -1600,6 +1664,15 @@ pg_gbk_verifystr(const unsigned char *s, int len)
 		int			l;
 
 		/* fast path for ASCII-subset characters */
+		l = check_ascii(s, len);
+		if (l)
+		{
+			s += l;
+			len -= l;
+			continue;
+		}
+
+		/* Found non-ASCII or zero above, so verify a single character. */
 		if (!IS_HIGHBIT_SET(*s))
 		{
 			if (*s == '\0')
@@ -1649,6 +1722,15 @@ pg_uhc_verifystr(const unsigned char *s, int len)
 		int			l;
 
 		/* fast path for ASCII-subset characters */
+		l = check_ascii(s, len);
+		if (l)
+		{
+			s += l;
+			len -= l;
+			continue;
+		}
+
+		/* Found non-ASCII or zero above, so verify a single character. */
 		if (!IS_HIGHBIT_SET(*s))
 		{
 			if (*s == '\0')
@@ -1709,6 +1791,15 @@ pg_gb18030_verifystr(const unsigned char *s, int len)
 		int			l;
 
 		/* fast path for ASCII-subset characters */
+		l = check_ascii(s, len);
+		if (l)
+		{
+			s += l;
+			len -= l;
+			continue;
+		}
+
+		/* Found non-ASCII or zero above, so verify a single character. */
 		if (!IS_HIGHBIT_SET(*s))
 		{
 			if (*s == '\0')
@@ -1760,30 +1851,7 @@ pg_utf8_verifychar(const unsigned char *s, int len)
 static int
 pg_utf8_verifystr(const unsigned char *s, int len)
 {
-	const unsigned char *start = s;
-
-	while (len > 0)
-	{
-		int			l;
-
-		/* fast path for ASCII-subset characters */
-		if (!IS_HIGHBIT_SET(*s))
-		{
-			if (*s == '\0')
-				break;
-			l = 1;
-		}
-		else
-		{
-			l = pg_utf8_verifychar(s, len);
-			if (l == -1)
-				break;
-		}
-		s += l;
-		len -= l;
-	}
-
-	return s - start;
+	return pg_validate_utf8(s, len);
 }
 
 /*
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 55cab4d2bf..303dae4441 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -905,6 +905,15 @@
 /* Define to 1 to build with PAM support. (--with-pam) */
 #undef USE_PAM
 
+/* Define to 1 to use the fallback UTF-8 validator written in C. */
+#undef USE_FALLBACK_UTF8
+
+/* Define to 1 use the UTF-8 validator written with Intel SSE instructions. */
+#undef USE_SSE42_UTF8
+
+/* Define to 1 use the UTF-8 validator written with Intel SSE instructions with runtime check. */
+#undef USE_SSE42_UTF8_WITH_RUNTIME_CHECK
+
 /* Define to 1 to use software CRC-32C implementation (slicing-by-8). */
 #undef USE_SLICING_BY_8_CRC32C
 
diff --git a/src/include/port/pg_utf8.h b/src/include/port/pg_utf8.h
new file mode 100644
index 0000000000..a259c59cf5
--- /dev/null
+++ b/src/include/port/pg_utf8.h
@@ -0,0 +1,72 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_utf8.h
+ *	  Routines for fast validation of UTF-8 text.
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/pg_utf8.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_UTF8_H
+#define PG_UTF8_H
+
+
+#if defined(USE_SSE42_CRC32C)
+/* Use Intel SSE4.2 instructions. */
+extern int pg_validate_utf8_sse42(const unsigned char *s, int len);
+
+#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK)
+/*
+ * Use Intel SSE 4.2 instructions, but perform a runtime check first
+ * to check that they are available.
+ */
+extern int (*pg_validate_utf8) (const unsigned char *s, int len);
+extern int pg_validate_utf8_sse42(const unsigned char *s, int len);
+
+#endif							/* USE_SSE42_CRC32C */
+
+extern int pg_validate_utf8_fallback(const unsigned char *s, int len);
+
+
+/* from https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord */
+#define HAS_ZERO(chunk) ( \
+	((chunk) - UINT64CONST(0x0101010101010101)) & \
+	 ~(chunk) & \
+	 UINT64CONST(0x8080808080808080))
+
+/* Verify a chunk of bytes for valid ASCII including a zero-byte check. */
+static inline int
+check_ascii(const unsigned char *s, int len)
+{
+	uint64		half1, half2,
+				highbit_mask;
+
+	if  (len >= 2 * sizeof(uint64))
+	{
+		memcpy(&half1, s, sizeof(uint64));
+		memcpy(&half2, s + sizeof(uint64), sizeof(uint64));
+
+		/*
+		 * If there are any zero bytes, bail and let the slow
+		 * path handle it.
+		 */
+		if (HAS_ZERO(half1) || HAS_ZERO(half2))
+			return 0;
+
+		/* Check if any bytes in this chunk have the high bit set. */
+		highbit_mask = ((half1 | half2) & UINT64CONST(0x8080808080808080));
+
+		if (!highbit_mask)
+			return 2 * sizeof(uint64);
+		else
+			return 0;
+	}
+
+	return 0;
+}
+
+#endif							/* PG_UTF8_H */
diff --git a/src/port/Makefile b/src/port/Makefile
index e41b005c4f..7a7e000b9d 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -40,6 +40,7 @@ LIBS += $(PTHREAD_LIBS)
 OBJS = \
 	$(LIBOBJS) \
 	$(PG_CRC32C_OBJS) \
+	$(PG_UTF8_OBJS) \
 	chklocale.o \
 	erand48.o \
 	inet_net_ntop.o \
@@ -88,6 +89,11 @@ libpgport.a: $(OBJS)
 thread.o: CFLAGS+=$(PTHREAD_CFLAGS)
 thread_shlib.o: CFLAGS+=$(PTHREAD_CFLAGS)
 
+# all versions of pg_utf8_sse42.o need CFLAGS_SSE42
+pg_utf8_sse42.o: CFLAGS+=$(CFLAGS_SSE42)
+pg_utf8_sse42_shlib.o: CFLAGS+=$(CFLAGS_SSE42)
+pg_utf8_sse42_srv.o: CFLAGS+=$(CFLAGS_SSE42)
+
 # all versions of pg_crc32c_sse42.o need CFLAGS_SSE42
 pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_SSE42)
 pg_crc32c_sse42_shlib.o: CFLAGS+=$(CFLAGS_SSE42)
diff --git a/src/port/pg_utf8_fallback.c b/src/port/pg_utf8_fallback.c
new file mode 100644
index 0000000000..1615c48233
--- /dev/null
+++ b/src/port/pg_utf8_fallback.c
@@ -0,0 +1,132 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_utf8_fallback.c
+ *	  Validate UTF-8 with a fast path for the ASCII subset.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/port/pg_utf8_fallback.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "c.h"
+
+#include "port/pg_utf8.h"
+
+
+#define IS_CONTINUATION_BYTE(c) (((c) & 0b11000000) == 0b10000000)
+
+/*
+ * See the comment in common/wchar.c under "multibyte sequence validators".
+ */
+int
+pg_validate_utf8_fallback(const unsigned char *s, int len)
+{
+	const unsigned char *start = s;
+	unsigned char b1, b2, b3, b4;
+
+	while (len > 0)
+	{
+		int			l;
+
+		/* fast path for ASCII-subset characters */
+		l = check_ascii(s, len);
+		if (l)
+		{
+			s += l;
+			len -= l;
+			continue;
+		}
+
+		/* Found non-ASCII or zero above, so verify a single character. */
+		if (!IS_HIGHBIT_SET(*s))
+		{
+			if (*s == '\0')
+				break;
+			l = 1;
+		}
+		/* code points U+0080 through U+07FF */
+		else if ((*s & 0b11100000) == 0b11000000)
+		{
+			l = 2;
+			if (len < l)
+				break;
+
+			b1 = *s;
+			b2 = *(s + 1);
+
+			if (!IS_CONTINUATION_BYTE(b2))
+				break;
+
+			/* check 2-byte overlong: 1100.000x.10xx.xxxx */
+			if (b1 < 0xC2)
+				break;
+		}
+		/* code points U+0800 through U+D7FF and U+E000 through U+FFFF */
+		else if ((*s & 0b11110000) == 0b11100000)
+		{
+			l = 3;
+			if (len < l)
+				break;
+
+			b1 = *s;
+			b2 = *(s + 1);
+			b3 = *(s + 2);
+
+			if (!IS_CONTINUATION_BYTE(b2) ||
+				!IS_CONTINUATION_BYTE(b3))
+				break;
+
+			/* check 3-byte overlong: 1110.0000 1001.xxxx 10xx.xxxx */
+			if (b1 == 0xE0 && b2 < 0xA0)
+				break;
+
+			/* check surrogate: 1110.1101 101x.xxxx 10xx.xxxx */
+			if (b1 == 0xED && b2 > 0x9F)
+				break;
+		}
+		/* code points U+010000 through U+10FFFF */
+		else if ((*s & 0b11111000) == 0b11110000)
+		{
+			l = 4;
+			if (len < l)
+				break;
+
+			b1 = *s;
+			b2 = *(s + 1);
+			b3 = *(s + 2);
+			b4 = *(s + 3);
+
+			if (!IS_CONTINUATION_BYTE(b2) ||
+				!IS_CONTINUATION_BYTE(b3) ||
+				!IS_CONTINUATION_BYTE(b4))
+				break;
+
+			/*
+			 * check 4-byte overlong:
+			 * 1111.0000 1000.xxxx 10xx.xxxx 10xx.xxxx
+			 */
+			if (b1 == 0xF0 && b2 < 0x90)
+				break;
+
+			/*
+			 * check too large:
+			 * 1111.0100 1001.xxxx 10xx.xxxx 10xx.xxxx
+			 */
+			if ((b1 == 0xF4 && b2 > 0x8F) || b1 > 0xF4)
+				break;
+		}
+		else
+			/* invalid byte */
+			break;
+
+		s += l;
+		len -= l;
+	}
+
+	return s - start;
+}
diff --git a/src/port/pg_utf8_sse42.c b/src/port/pg_utf8_sse42.c
new file mode 100644
index 0000000000..417f2142c7
--- /dev/null
+++ b/src/port/pg_utf8_sse42.c
@@ -0,0 +1,424 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_utf8_sse2.c
+ *	  Validate UTF-8 with Intel SSE 4.2 instructions.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/port/pg_utf8_fallback.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "c.h"
+
+#include <nmmintrin.h>
+
+#include "port/pg_utf8.h"
+
+/* TODO: cite paper */
+
+/*
+ * Lookup tables for classifying two-byte sequences
+ *
+ * These constants were taken nearly verbatim from simdjson (Apache 2.0 license)
+ *
+ * XXX had to add a bunch of casts to prevent warnings -- needs more work
+ *
+ * IMHO a better symbol name for TOO_LONG is ASC_CONT
+ *
+ * simdjson also didn't seem to put the numerical values in a logical order,
+ * but the only one that MUST be as below is TWO_CONTS, since that indicates
+ * we can't say there's an error until we look at previous bytes.
+ */
+#define TOO_SHORT   (uint8) (1 << 0)	/* 11______ 0_______ */
+										/* 11______ 11______ */
+#define TOO_LONG	(uint8) (1 << 1)	/* 0_______ 10______ */
+#define OVERLONG_3	(uint8) (1 << 2)	/* 11100000 100_____ */
+#define SURROGATE	(uint8) (1 << 4)	/* 11101101 101_____ */
+#define OVERLONG_2	(uint8) (1 << 5)	/* 1100000_ 10______ */
+#define TWO_CONTS	(uint8) (1 << 7)	/* 10______ 10______ */
+#define TOO_LARGE	(uint8) (1 << 3)	/* 11110100 1001____ */
+										/* 11110100 101_____ */
+										/* 11110101 1001____ */
+										/* 11110101 101_____ */
+										/* 1111011_ 1001____ */
+										/* 1111011_ 101_____ */
+										/* 11111___ 1001____ */
+										/* 11111___ 101_____ */
+#define TOO_LARGE_1000 (uint8) (1 << 6)	/* 11110101 1000____ */
+										/* 1111011_ 1000____ */
+										/* 11111___ 1000____ */
+#define OVERLONG_4	(uint8) (1 << 6)	/* 11110000 1000____ */
+
+/* These all have ____ in byte 1 */
+#define CARRY (uint8) (TOO_SHORT | TOO_LONG | TWO_CONTS)
+
+/* XXX the following tables could just be static variables */
+
+/*
+ * table for looking up possible errors in the high nibble of
+ * the first byte of a 2-byte sequence
+ */
+static inline const __m128i
+byte_1_high_table()
+{
+	return _mm_setr_epi8(
+		// 0_______ ________ <ASCII in byte 1>
+		TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+		TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+		// 10______ ________ <continuation in byte 1>
+		TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+		// 1100____ ________ <two byte lead in byte 1>
+		TOO_SHORT | OVERLONG_2,
+		// 1101____ ________ <two byte lead in byte 1>
+		TOO_SHORT,
+		// 1110____ ________ <three byte lead in byte 1>
+		TOO_SHORT | OVERLONG_3 | SURROGATE,
+		// 1111____ ________ <four+ byte lead in byte 1>
+		TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+	);
+}
+
+/*
+ * table for looking up possible errors in the low nibble of
+ * the first byte of a 2-byte sequence
+ */
+static inline const __m128i
+byte_1_low_table()
+{
+	return _mm_setr_epi8(
+		// ____0000 ________
+		(uint8) (CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4),
+		// ____0001 ________
+		(uint8) (CARRY | OVERLONG_2),
+		// ____001_ ________
+		CARRY,
+		CARRY,
+
+		// ____0100 ________
+		(uint8) (CARRY | TOO_LARGE),
+		// ____0101 ________
+		(uint8) (CARRY | TOO_LARGE | TOO_LARGE_1000),
+		// ____011_ ________
+		(uint8) (CARRY | TOO_LARGE | TOO_LARGE_1000),
+		(uint8) (CARRY | TOO_LARGE | TOO_LARGE_1000),
+
+		// ____1___ ________
+		(uint8) (CARRY | TOO_LARGE | TOO_LARGE_1000),
+		(uint8) (CARRY | TOO_LARGE | TOO_LARGE_1000),
+		(uint8) (CARRY | TOO_LARGE | TOO_LARGE_1000),
+		(uint8) (CARRY | TOO_LARGE | TOO_LARGE_1000),
+		(uint8) (CARRY | TOO_LARGE | TOO_LARGE_1000),
+		// ____1101 ________
+		(uint8) (CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE),
+		(uint8) (CARRY | TOO_LARGE | TOO_LARGE_1000),
+		(uint8) (CARRY | TOO_LARGE | TOO_LARGE_1000)
+	);
+}
+
+/*
+ * table for looking up possible errors in the high nibble of
+ * the second byte of a 2-byte sequence
+ */
+static inline const __m128i
+byte_2_high_table()
+{
+	return _mm_setr_epi8(
+		// ________ 0_______ <ASCII in byte 2>
+		TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+		TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+
+		// ________ 1000____
+		(uint8) (TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4),
+		// ________ 1001____
+		(uint8) (TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE),
+		// ________ 101_____
+		(uint8) (TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE),
+		(uint8) (TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE),
+
+		// ________ 11______
+		TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+	);
+}
+
+/* helper functions to wrap intrinsics */
+
+/* return a zeroed vector */
+static inline const __m128i
+vzero()
+{
+	return _mm_setzero_si128();
+}
+
+/* perform an unaligned load from memory and return the register */
+static inline const __m128i
+vload(const unsigned char *raw_input)
+{
+	return _mm_loadu_si128((const __m128i *) raw_input);
+}
+
+/* return a vector with set bits where any bytes in the input are zero */
+static inline const __m128i
+has_zero(const __m128i v)
+{
+	return _mm_cmpeq_epi8(v, vzero());
+}
+
+/* return a vector with each 8-bit lane populated with the input scalar */
+static inline __m128i
+splat(uint8 byte)
+{
+	return _mm_set1_epi8(byte);
+}
+
+/* perform signed greater-than on all 8-bit lanes */
+static inline __m128i
+greater_than(const __m128i v1, const __m128i v2)
+{
+	return _mm_cmpgt_epi8(v1, v2);
+}
+
+/*
+ * Shift right each 8-bit lane
+ *
+ * There is no intrinsic to do this on 8-bit lanes, so shift right in each
+ * 16-bit lane then apply a mask of 1-bytes shifted the same amount.
+ */
+static inline __m128i
+shift_right(const __m128i v, const int n)
+{
+	const __m128i shift16 = _mm_srli_epi16(v, n);
+	const __m128i mask = splat(0xFF >> n);
+	return _mm_and_si128(shift16, mask);
+}
+
+/* Bitwise vector operations */
+static inline __m128i
+bitwise_and(const __m128i v1, const __m128i v2)
+{
+	return _mm_and_si128(v1, v2);
+}
+
+static inline __m128i
+bitwise_or(const __m128i v1, const __m128i v2)
+{
+	return _mm_or_si128(v1, v2);
+}
+
+static inline __m128i
+bitwise_xor(const __m128i v1, const __m128i v2)
+{
+	return _mm_xor_si128(v1, v2);
+}
+
+/*
+ * Do unsigned subtraction, but instead of wrapping around
+ * on overflow, stop at zero. Useful for emulating unsigned
+ * comparison.
+ */
+static inline __m128i
+saturating_sub(const __m128i v1, const __m128i v2)
+{
+	return _mm_subs_epu8(v1, v2);
+}
+
+/* return false if a register is zero, true otherwise */
+static inline bool
+to_bool(const __m128i v)
+{
+	/* _mm_testz_si128 returns 1 if the bitwise AND of the two arguments is zero. */
+	return !_mm_testz_si128(v, v);
+}
+
+/*
+ * Shift entire "input" register right by N 8-bit lanes, and
+ * replace the first N lanes with the last N lanes from the
+ * "prev" register. Can be stated in C thusly:
+ *
+ * (prev << 128) | input) >> (N * 8)
+ *
+ * The third argument to the intrinsic must be a numeric constant, so
+ * we must have separate functions for different shift amounts.
+ */
+static inline __m128i
+prev1(__m128i prev, __m128i input)
+{
+	return _mm_alignr_epi8(input, prev, sizeof(__m128i) - 1);
+}
+
+static inline __m128i
+prev2(__m128i prev, __m128i input)
+{
+	return _mm_alignr_epi8(input, prev, sizeof(__m128i) - 2);
+}
+
+static inline __m128i
+prev3(__m128i prev, __m128i input)
+{
+	return _mm_alignr_epi8(input, prev, sizeof(__m128i) - 3);
+}
+
+/*
+ * For each 1-byte lane in the input, use that value as an index
+ * into the lookup register as if it were a 16-element byte array.
+ */
+static inline __m128i
+lookup(const __m128i input, __m128i lookup)
+{
+	return _mm_shuffle_epi8(lookup, input);
+}
+
+/* The actual algorithm */
+
+/*
+ * classify each 2-byte sequence in the input register
+ *
+ * Technically, it leaves off the last byte, but we'll get it
+ * from the "prev" register on the next loop iteration.
+ */
+static inline __m128i
+classify(const __m128i prev, const __m128i input)
+{
+	const __m128i input_shift1 = prev1(prev, input);
+
+	/* put the relevant nibbles into their own bytes in their own registers */
+	const __m128i byte_1_high = shift_right(input_shift1, 4);
+	const __m128i byte_1_low  = bitwise_and(input_shift1, splat(0x0F));
+	const __m128i byte_2_high = shift_right(input, 4);
+
+	/* lookup the possible errors for each set of nibbles */
+	const __m128i lookup_1_high = lookup(byte_1_high, byte_1_high_table());
+	const __m128i lookup_1_low  = lookup(byte_1_low, byte_1_low_table());
+	const __m128i lookup_2_high = lookup(byte_2_high, byte_2_high_table());
+
+	/*
+	 * AND all the lookups together. At this point, non-zero
+	 * values in vector returned represent
+	 *
+	 * 1) invalid 2-byte sequences
+	 * 2) the second continuation byte of a possible 3- or 4-byte character
+	 * 3) the third continuation byte of a possible 4-byte character
+	 */
+	return bitwise_and(bitwise_and(lookup_1_high, lookup_1_low), lookup_2_high);
+}
+
+/*
+ * Return a mask of locations of lead bytes for 3- and 4-byte characters.
+ * Such lead bytes are found 2 and 3 bytes earlier in the sequence, respectivel.
+ */
+static inline __m128i
+get_lead_byte_mask(const __m128i prev, const __m128i input, const __m128i special_cases)
+{
+	/* create registers that are shifted up by 2 and 3 bytes */
+	const __m128i input_shift2 = prev2(prev, input);
+	const __m128i input_shift3 = prev3(prev, input);
+
+	/*
+	 * Look in the shifted registers for valid 3- or 4-byte leads.
+	 * There is no unsigned comparison, so we use saturating subtraction
+	 * followed by signed comparison with zero. Any non-zero bytes
+	 * in the result represent valid leads.
+	 */
+	const __m128i is_third_byte  = saturating_sub(input_shift2, splat(0b11100000u-1));
+	const __m128i is_fourth_byte = saturating_sub(input_shift3, splat(0b11110000u-1));
+
+	/* OR them together for easier comparison */
+	const __m128i temp = bitwise_or(is_third_byte, is_fourth_byte);
+
+	/*
+	 * If we find valid leads 2 or 3 bytes previous, set all bits for the current byte.
+	 * Signed arithmetic is okay because the values are small.
+	 */
+	const __m128i must23 = greater_than(temp, vzero());
+
+	/*
+	 * greater_than() sets all bits in the result when true. We want to compare
+	 * with the result of the classifier so apply a mask to allow only the high bit
+	 * to be set. This matches the TWO_CONTS symbol above.
+	 */
+	return bitwise_and(must23, splat(0x80));
+}
+
+static const __m128i
+check_utf8_bytes(const __m128i prev, const __m128i input)
+{
+	const __m128i special_cases = classify(prev, input);
+	const __m128i lead_byte_mask = get_lead_byte_mask(prev, input, special_cases);
+	return bitwise_xor(lead_byte_mask, special_cases);
+}
+
+int
+pg_validate_utf8_sse42(const unsigned char *s, int len)
+{
+	const unsigned char *start = s;
+	const int orig_len = len;
+
+	/*
+	 * The first time through the loop we have no previous input or error,
+	 * so use a zeroed register.
+	 */
+	__m128i prev = vzero();
+	__m128i error = vzero();
+	__m128i input;
+
+	while (len >= sizeof(__m128i))
+	{
+		input = vload(s);
+
+		/* check for zeros */
+		error = bitwise_or(error, has_zero(input));
+
+		/* TODO: fast path for ascii bytes? */
+
+		/* do the UTF-8 validation */
+		error = bitwise_or(error, check_utf8_bytes(prev, input));
+
+		prev = input;
+		s += sizeof(__m128i);
+		len -= sizeof(__m128i);
+	}
+
+	if (len > 0)
+	{
+		/*
+		 * We don't have enough remaining input bytes for a full register,
+		 * so back-fill with zero bytes.
+		 */
+		unsigned char inbuf[sizeof(__m128i)];
+		memset(inbuf, 0, sizeof(__m128i));
+		memcpy(inbuf, s, len);
+
+		input = vload(inbuf);
+
+		/*
+		 * Likewise, when we do the zero check, we don't want the trailing
+		 * zeros to cause false positives, so create a buffer to load
+		 * into a mask register
+		 */
+		unsigned char maskbuf[sizeof(__m128i)];
+		memset(maskbuf, 0, sizeof(__m128i));
+		/* XXX is this portable? */
+		memset(maskbuf + len, -1, sizeof(__m128i) - len);
+
+		const __m128i trailing_mask = vload(maskbuf);
+
+		/* check for zeros */
+		error = bitwise_or(error, has_zero(bitwise_and(input, trailing_mask)));
+
+		/* do the UTF-8 validation */
+		error = bitwise_or(error, check_utf8_bytes(prev, input));
+	}
+
+	// FIXME: in the new noError conversions, we could have incomplete bytes
+	// at the end. We'll need some extra logic to find the end of the
+	// last verified character.
+	// For now, it's correct to give up on any error.
+	if (to_bool(error))
+		return pg_validate_utf8_fallback(start, orig_len);
+	else
+		return orig_len;
+}
diff --git a/src/port/pg_utf8_sse42_choose.c b/src/port/pg_utf8_sse42_choose.c
new file mode 100644
index 0000000000..263b840150
--- /dev/null
+++ b/src/port/pg_utf8_sse42_choose.c
@@ -0,0 +1,69 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_utf8_sse42_choose.c
+ *	  Choose between Intel SSE 4.2 and fallback implementation.
+ *
+ * On first call, checks if the CPU we're running on supports Intel SSE
+ * 4.2. If it does, use SSE instructions for UTF-8 validation. Otherwise,
+ * fall back to the pure C implementation which has a fast path for ASCII
+ * text.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/port/pg_utf8_choose.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "c.h"
+
+#ifdef HAVE__GET_CPUID
+#include <cpuid.h>
+#endif
+
+#ifdef HAVE__CPUID
+#include <intrin.h>
+#endif
+
+#include "port/pg_utf8.h"
+
+static bool
+pg_utf8_sse42_available(void)
+{
+	/* To save from checking every SSE2 intrinsic, insist on 64-bit. */
+#ifdef __x86_64__
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif							/* HAVE__GET_CPUID */
+	return (exx[2] & (1 << 20)) != 0;	/* SSE 4.2 */
+
+#else
+	return false;
+#endif							/* __x86_64__ */
+}
+
+/*
+ * This gets called on the first call. It replaces the function pointer
+ * so that subsequent calls are routed directly to the chosen implementation.
+ */
+static int
+pg_validate_utf8_choose(const unsigned char *s, int len)
+{
+	if (pg_utf8_sse42_available())
+		pg_validate_utf8 = pg_validate_utf8_sse42;
+	else
+		pg_validate_utf8 = pg_validate_utf8_fallback;
+
+	return pg_validate_utf8(s, len);
+}
+
+int	(*pg_validate_utf8) (const unsigned char *s, int len) = pg_validate_utf8_choose;
diff --git a/src/test/regress/expected/conversion.out b/src/test/regress/expected/conversion.out
index e34ab20974..e37bda8057 100644
--- a/src/test/regress/expected/conversion.out
+++ b/src/test/regress/expected/conversion.out
@@ -72,6 +72,58 @@ $$;
 --
 -- UTF-8
 --
+CREATE TABLE utf8_verification_inputs (inbytes bytea, description text);
+insert into utf8_verification_inputs  values
+  ('\xaf',		'bare continuation'),
+  ('\xc5',		'missing second byte in 2-byte char'),
+  ('\xc080',	'smallest 2-byte overlong'),
+  ('\xc1bf',	'largest 2-byte overlong'),
+  ('\xc280',	'next 2-byte after overlongs'),
+  ('\xdfbf',	'largest 2-byte'),
+  ('\xe9af',	'missing third byte in 3-byte char'),
+  ('\xe08080',	'smallest 3-byte overlong'),
+  ('\xe09fbf',	'largest 3-byte overlong'),
+  ('\xe0a080',	'next 3-byte after overlong'),
+  ('\xed9fbf',	'last before surrogates'),
+  ('\xeda080',	'smallest surrogate'),
+  ('\xedbfbf',	'largest surrogate'),
+  ('\xee8080',	'next after surrogates'),
+  ('\xefbfbf',	'largest 3-byte'),
+  ('\xf1afbf',	'missing fourth byte in 4-byte char'),
+  ('\xf0808080',	'smallest 4-byte overlong'),
+  ('\xf08fbfbf',	'largest 4-byte overlong'),
+  ('\xf0908080',	'next 4-byte after overlong'),
+  ('\xf48fbfbf',	'largest 4-byte'),
+  ('\xf4908080',	'smallest too large'),
+  ('\xfa9a9a8a8a',	'5 byte');
+-- Test UTF-8 verification
+select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_verification_inputs;
+            description             |   result   |   errorat    |                             error                              
+------------------------------------+------------+--------------+----------------------------------------------------------------
+ bare continuation                  | \x         | \xaf         | invalid byte sequence for encoding "UTF8": 0xaf
+ missing second byte in 2-byte char | \x         | \xc5         | invalid byte sequence for encoding "UTF8": 0xc5
+ smallest 2-byte overlong           | \x         | \xc080       | invalid byte sequence for encoding "UTF8": 0xc0 0x80
+ largest 2-byte overlong            | \x         | \xc1bf       | invalid byte sequence for encoding "UTF8": 0xc1 0xbf
+ next 2-byte after overlongs        | \xc280     |              | 
+ largest 2-byte                     | \xdfbf     |              | 
+ missing third byte in 3-byte char  | \x         | \xe9af       | invalid byte sequence for encoding "UTF8": 0xe9 0xaf
+ smallest 3-byte overlong           | \x         | \xe08080     | invalid byte sequence for encoding "UTF8": 0xe0 0x80 0x80
+ largest 3-byte overlong            | \x         | \xe09fbf     | invalid byte sequence for encoding "UTF8": 0xe0 0x9f 0xbf
+ next 3-byte after overlong         | \xe0a080   |              | 
+ last before surrogates             | \xed9fbf   |              | 
+ smallest surrogate                 | \x         | \xeda080     | invalid byte sequence for encoding "UTF8": 0xed 0xa0 0x80
+ largest surrogate                  | \x         | \xedbfbf     | invalid byte sequence for encoding "UTF8": 0xed 0xbf 0xbf
+ next after surrogates              | \xee8080   |              | 
+ largest 3-byte                     | \xefbfbf   |              | 
+ missing fourth byte in 4-byte char | \x         | \xf1afbf     | invalid byte sequence for encoding "UTF8": 0xf1 0xaf 0xbf
+ smallest 4-byte overlong           | \x         | \xf0808080   | invalid byte sequence for encoding "UTF8": 0xf0 0x80 0x80 0x80
+ largest 4-byte overlong            | \x         | \xf08fbfbf   | invalid byte sequence for encoding "UTF8": 0xf0 0x8f 0xbf 0xbf
+ next 4-byte after overlong         | \xf0908080 |              | 
+ largest 4-byte                     | \xf48fbfbf |              | 
+ smallest too large                 | \x         | \xf4908080   | invalid byte sequence for encoding "UTF8": 0xf4 0x90 0x80 0x80
+ 5 byte                             | \x         | \xfa9a9a8a8a | invalid byte sequence for encoding "UTF8": 0xfa
+(22 rows)
+
 CREATE TABLE utf8_inputs (inbytes bytea, description text);
 insert into utf8_inputs  values
   ('\x666f6f',		'valid, pure ASCII'),
diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql
index ea85f20ed8..7f761cd630 100644
--- a/src/test/regress/sql/conversion.sql
+++ b/src/test/regress/sql/conversion.sql
@@ -74,6 +74,34 @@ $$;
 --
 -- UTF-8
 --
+CREATE TABLE utf8_verification_inputs (inbytes bytea, description text);
+insert into utf8_verification_inputs  values
+  ('\xaf',		'bare continuation'),
+  ('\xc5',		'missing second byte in 2-byte char'),
+  ('\xc080',	'smallest 2-byte overlong'),
+  ('\xc1bf',	'largest 2-byte overlong'),
+  ('\xc280',	'next 2-byte after overlongs'),
+  ('\xdfbf',	'largest 2-byte'),
+  ('\xe9af',	'missing third byte in 3-byte char'),
+  ('\xe08080',	'smallest 3-byte overlong'),
+  ('\xe09fbf',	'largest 3-byte overlong'),
+  ('\xe0a080',	'next 3-byte after overlong'),
+  ('\xed9fbf',	'last before surrogates'),
+  ('\xeda080',	'smallest surrogate'),
+  ('\xedbfbf',	'largest surrogate'),
+  ('\xee8080',	'next after surrogates'),
+  ('\xefbfbf',	'largest 3-byte'),
+  ('\xf1afbf',	'missing fourth byte in 4-byte char'),
+  ('\xf0808080',	'smallest 4-byte overlong'),
+  ('\xf08fbfbf',	'largest 4-byte overlong'),
+  ('\xf0908080',	'next 4-byte after overlong'),
+  ('\xf48fbfbf',	'largest 4-byte'),
+  ('\xf4908080',	'smallest too large'),
+  ('\xfa9a9a8a8a',	'5 byte');
+
+-- Test UTF-8 verification
+select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_verification_inputs;
+
 CREATE TABLE utf8_inputs (inbytes bytea, description text);
 insert into utf8_inputs  values
   ('\x666f6f',		'valid, pure ASCII'),
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index 2aa062b2c9..5f778570ee 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -489,6 +489,9 @@ sub GenerateFiles
 		USE_NAMED_POSIX_SEMAPHORES => undef,
 		USE_OPENSSL                => undef,
 		USE_PAM                    => undef,
+		USE_FALLBACK_UTF8 => undef,
+		USE_SSE42_UTF8 => undef,
+		USE_SSE42_UTF8_WITH_RUNTIME_CHECK => undef,
 		USE_SLICING_BY_8_CRC32C    => undef,
 		USE_SSE42_CRC32C           => undef,
 		USE_SSE42_CRC32C_WITH_RUNTIME_CHECK => 1,
