From 8aea5020dc2bb472876cab6c468908b6c45e854d Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@2ndquadrant.com>
Date: Fri, 15 May 2020 11:13:15 +0800
Subject: [PATCH v1 2/3] Use perfect hashing for NFC Unicode normalization
 quick check

This makes the normalization check nearly 40% faster, while only adding 5kB
to the binary size. The hash lookup reuses the existing array of bitfields
used for binary search to get the quick check property and is generated as
part of make update-unicode. Since the hash function is part of the
repository, format it like pgindent would.

NFKC quick check still uses binary search.
---
 .../generate-unicode_normprops_table.pl       |  38 +-
 src/common/unicode_norm.c                     |  36 +-
 src/include/common/unicode_normprops_table.h  | 344 ++++++++++++++++++
 src/tools/PerfectHash.pm                      |   2 +-
 4 files changed, 412 insertions(+), 8 deletions(-)

diff --git a/src/common/unicode/generate-unicode_normprops_table.pl b/src/common/unicode/generate-unicode_normprops_table.pl
index e8e5097c09..7f00cb0ae4 100644
--- a/src/common/unicode/generate-unicode_normprops_table.pl
+++ b/src/common/unicode/generate-unicode_normprops_table.pl
@@ -9,6 +9,10 @@
 use strict;
 use warnings;
 
+use FindBin;
+use lib "$FindBin::RealBin/../../tools/";
+use PerfectHash;
+
 my %data;
 
 print
@@ -17,6 +21,8 @@ print
 print <<EOS;
 #include "common/unicode_norm.h"
 
+typedef int (*qc_hash_func) (const void *key);
+
 /*
  * We use a bit field here to save space.
  */
@@ -24,7 +30,14 @@ typedef struct
 {
 	unsigned int codepoint:21;
 	signed int	quickcheck:4;	/* really UnicodeNormalizationQC */
-}			pg_unicode_normprops;
+} pg_unicode_normprops;
+
+typedef struct
+{
+	const pg_unicode_normprops *normprops;
+	qc_hash_func hash;
+	int			num_normprops;
+} unicode_norm_info;
 EOS
 
 foreach my $line (<ARGV>)
@@ -66,6 +79,7 @@ foreach my $prop (sort keys %data)
 	  "static const pg_unicode_normprops UnicodeNormProps_${prop}[] = {\n";
 
 	my %subdata = %{ $data{$prop} };
+	my @cp_packed;
 	foreach my $cp (sort { $a <=> $b } keys %subdata)
 	{
 		my $qc;
@@ -82,7 +96,29 @@ foreach my $prop (sort keys %data)
 			die;
 		}
 		printf "\t{0x%04X, %s},\n", $cp, $qc;
+
+		# Save the bytes as a string in network order.
+		push @cp_packed, pack('N', $cp);
 	}
 
 	print "};\n";
+
+	# Emit the definition of the hash function.
+
+	# To save space, skip building the function for the larger "C" form.
+	next if $prop eq "NFKC_QC";
+
+	my $funcname = $prop . '_hash_func';
+
+	my $f = PerfectHash::generate_hash_function(\@cp_packed, $funcname,
+		fixed_key_length => 4);
+	print "\nstatic $f\n";
+
+	# Emit the struct that wraps the hash lookup info into one variable.
+	printf "static const unicode_norm_info ";
+	printf "UnicodeNormInfo_%s = {\n", $prop;
+	printf "\tUnicodeNormProps_%s,\n", $prop;
+	printf "\t%s,\n",                  $funcname;
+	printf "\t%d\n",                   scalar @cp_packed;
+	printf "};\n";
 }
diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c
index ab5ce59345..1714837e64 100644
--- a/src/common/unicode_norm.c
+++ b/src/common/unicode_norm.c
@@ -476,6 +476,34 @@ qc_compare(const void *p1, const void *p2)
 	return (v1 - v2);
 }
 
+static const pg_unicode_normprops *
+qc_hash_lookup(pg_wchar ch, const unicode_norm_info * norminfo)
+{
+	int			h;
+	uint32		hashkey;
+
+	/*
+	 * Compute the hash function. The hash key is the codepoint with the bytes
+	 * in network order.
+	 */
+	hashkey = htonl(ch);
+	h = norminfo->hash(&hashkey);
+
+	/* An out-of-range result implies no match */
+	if (h < 0 || h >= norminfo->num_normprops)
+		return NULL;
+
+	/*
+	 * Since it's a perfect hash, we need only match to the specific codepoint
+	 * it identifies.
+	 */
+	if (ch != norminfo->normprops[h].codepoint)
+		return NULL;
+
+	/* Success! */
+	return &norminfo->normprops[h];
+}
+
 /*
  * Look up the normalization quick check character property
  */
@@ -483,18 +511,14 @@ static UnicodeNormalizationQC
 qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)
 {
 	pg_unicode_normprops key;
-	pg_unicode_normprops *found = NULL;
+	const pg_unicode_normprops *found = NULL;
 
 	key.codepoint = ch;
 
 	switch (form)
 	{
 		case UNICODE_NFC:
-			found = bsearch(&key,
-							UnicodeNormProps_NFC_QC,
-							lengthof(UnicodeNormProps_NFC_QC),
-							sizeof(pg_unicode_normprops),
-							qc_compare);
+			found = qc_hash_lookup(ch, &UnicodeNormInfo_NFC_QC);
 			break;
 		case UNICODE_NFKC:
 			found = bsearch(&key,
diff --git a/src/include/common/unicode_normprops_table.h b/src/include/common/unicode_normprops_table.h
index 93a2e55b75..4e8d2c46d6 100644
--- a/src/include/common/unicode_normprops_table.h
+++ b/src/include/common/unicode_normprops_table.h
@@ -2,6 +2,8 @@
 
 #include "common/unicode_norm.h"
 
+typedef int (*qc_hash_func) (const void *key);
+
 /*
  * We use a bit field here to save space.
  */
@@ -11,6 +13,13 @@ typedef struct
 	signed int	quickcheck:4;	/* really UnicodeNormalizationQC */
 } pg_unicode_normprops;
 
+typedef struct
+{
+	const pg_unicode_normprops *normprops;
+	qc_hash_func hash;
+	int			num_normprops;
+} unicode_norm_info;
+
 static const pg_unicode_normprops UnicodeNormProps_NFC_QC[] = {
 	{0x0300, UNICODE_NORM_QC_MAYBE},
 	{0x0301, UNICODE_NORM_QC_MAYBE},
@@ -1245,6 +1254,341 @@ static const pg_unicode_normprops UnicodeNormProps_NFC_QC[] = {
 	{0x2FA1D, UNICODE_NORM_QC_NO},
 };
 
+static int
+NFC_QC_hash_func(const void *key)
+{
+	static const int16 h[2463] = {
+		0, -2717, 0, 221, 1293, 223, 1295, 225,
+		226, 241, 0, 229, 230, 231, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		-386, 0, 0, 0, 0, 0, 0, 0,
+		-163, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		-246, -175, 1260, 0, 0, 0, -174, -173,
+		0, -172, 0, 0, 0, 0, 0, 0,
+		1049, 0, 300, 301, 1071, 0, 1071, 0,
+		1071, 1071, 1057, 0, 0, 0, 0, 1061,
+		0, -1053, 1664, 0, 2956, 0, 0, -13,
+		0, 0, 0, 0, 2156, 0, 0, 0,
+		0, 0, 0, 0, 71, 0, 1082, 0,
+		1083, 1083, 0, 1084, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 359, 360, 361,
+		-1091, 363, -762, -130, -129, -128, -127, -126,
+		137, -124, -708, -707, -706, -120, -185, -705,
+		-117, -184, -1307, -114, -113, -112, -111, 0,
+		386, 387, 388, 389, -90, 391, 171, 172,
+		394, -94, -183, 397, 398, 399, -98, -225,
+		402, -1019, -636, -1019, -225, 407, 408, 409,
+		410, 411, 674, 413, -171, -170, -169, 417,
+		352, -168, 420, 353, -770, 423, 424, 425,
+		426, 427, 428, 32767, 239, 239, 239, 239,
+		239, 239, 239, 239, 239, 239, 239, 239,
+		239, 239, 32767, 32767, 237, 32767, 236, 32767,
+		32767, 234, 234, 234, 234, 617, 234, 234,
+		234, -2483, 234, -1430, 1526, -1430, 1527, 47,
+		48, 471, 230, 32767, 32767, 32767, 227, 227,
+		227, 227, 227, 227, 227, 227, 227, 227,
+		227, 227, 227, 227, 227, 227, 227, 227,
+		-159, 227, 227, 227, 227, 227, 227, 227,
+		64, 227, 227, 227, 227, 227, 227, 227,
+		227, 227, 227, 227, 227, 227, 227, 227,
+		227, 227, 227, 227, 227, 227, 227, 227,
+		-19, 52, 1487, 227, 227, 227, 53, 54,
+		227, 55, 227, 227, 227, 227, 227, 227,
+		1276, 227, -989, 32767, 1296, 225, 1296, 225,
+		1296, 1296, 1282, 225, 225, 225, 225, 1286,
+		225, -828, 1889, 225, 3181, 225, 225, 212,
+		225, 225, 225, 225, 2381, 225, 225, 225,
+		225, 225, 225, 225, 296, 225, 1307, 225,
+		1308, 1308, 225, 1309, 225, 225, 225, 225,
+		225, 225, 225, 225, 225, 225, 225, 225,
+		225, 225, 225, 225, 225, 584, 585, 586,
+		-866, 588, -537, 95, 96, 97, 98, 99,
+		362, 101, -483, -482, -481, 105, 40, -480,
+		108, 41, -1082, 111, 112, 113, 114, 225,
+		611, 612, 613, 614, 135, 616, 396, 397,
+		619, 131, 42, 622, 623, 624, 127, 0,
+		627, -794, -411, -794, 0, 632, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		-272, 32767, 32767, 32767, 0, 32767, 32767, 32767,
+		32767, 32767, -166, -165, 32767, 32767, 32767, 32767,
+		-164, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 397, 32767, 396, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 386,
+		0, 386, 386, 386, 386, 386, 386, 386,
+		223, 386, 386, 386, 32767, 385, 385, 385,
+		385, 385, 32767, 384, 32767, 383, 383, 32767,
+		382, 382, 32767, 381, 381, 381, 381, 381,
+		135, 206, 1641, 381, 32767, 32767, 32767, 32767,
+		32767, 32767, -160, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 1148, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 0,
+		32767, 32767, 32767, 0, 0, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, -257, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, -910, -910, 32767, 32767,
+		0, 32767, 0, 32767, 0, 32767, 0, 32767,
+		147, 32767, 0, 32767, 0, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 0, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 143, 32767, 144, 32767, 145,
+		32767, 146, 32767, 0, 32767, 148, 32767, 149,
+		32767, 32767, 32767, -160, 32767, 32767, 32767, 32767,
+		32767, 32767, 15, 32767, 32767, 0, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		145, 32767, 144, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 0, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 0, -148, 32767, 32767, 32767, 32767,
+		32767, 32767, 2009, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 0, 32767, 32767, 135, -918, 32767,
+		151, 32767, 32767, 0, 1, 2, 3, 4,
+		133, 5, 6, 7, 8, 9, 10, 11,
+		32767, 32767, -1248, 32767, 13, 154, 188, 188,
+		32767, 32767, 32767, 32767, 32767, 155, 16, 32767,
+		32767, 32767, 32767, 32767, 32767, -1853, -1054, 18,
+		-1052, -1051, -1036, 22, 32767, 157, 32767, 28,
+		23, 1077, 673, 25, -2930, 0, 32767, 32767,
+		32767, 32767, 32767, 27, 32767, 155, 32767, 154,
+		32767, 32767, -62, 28, -42, 30, -1051, 32,
+		-1050, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 34,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 129, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 672, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 0, 32767,
+		32767, 32767, 32767, 32767, -156, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, -155, 32767, 32767,
+		32767, 0, 0, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		73, 32767, 32767, 32767, 32767, 74, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 675,
+		32767, 32767, 32767, 32767, 32767, 75, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 165, 32767, 32767, 32767, 166, 167,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 170, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 689, 690, 691, 692, 693, 694, 695,
+		696, 697, 698, 699, 700, 701, 702, 703,
+		704, 705, 706, 707, 708, 709, 710, 711,
+		712, 713, 714, 715, 716, 717, 718, 719,
+		720, 721, 722, -304, -303, -302, -301, -300,
+		-299, -298, -297, 930, -295, -294, -293, -292,
+		-291, -290, -289, -288, -287, -286, -285, -284,
+		-283, -282, -281, -280, -279, -278, -277, -276,
+		-275, 753, 754, 755, 646, 757, -712, -1765,
+		952, -712, 2244, -712, 2245, 765, 766, 767,
+		768, 125, 770, 771, 772, 773, 774, 775,
+		603, 777, 778, 779, 780, 781, 782, 783,
+		784, 2011, 786, 787, 788, 789, 790, 791,
+		792, 793, 794, 795, 796, 797, 798, 799,
+		800, 801, 802, 803, 804, 805, 806, 603,
+		603, 809, 603, 811, 603, 603, 814, 815,
+		816, 817, 435, 819, 820, 821, 3539, 823,
+		603, -468, 603, -468, 603, 603, 589, 831,
+		603, 603, 603, 835, 836, 837, 838, 839,
+		840, 841, 842, 843, 844, 845, 846, 847,
+		848, 849, 850, 851, 852, 1239, 854, 855,
+		856, 857, 858, 859, 860, 1024, 862, 863,
+		864, 865, 866, 867, 868, 869, 870, 871,
+		872, 873, 874, 875, 876, 877, 878, 879,
+		880, 881, 882, 883, 884, 1131, 1061, -373,
+		888, 889, 890, 1065, 1065, 893, 1066, 895,
+		896, 897, 898, 899, 900, -148, 902, 603,
+		603, -166, 906, -164, 908, -162, -161, -146,
+		912, 913, 914, 915, -145, 917, 1971, -745,
+		920, -2035, 922, 923, 937, 925, 926, 927,
+		928, -1227, 930, 931, 932, 933, 934, 935,
+		936, 866, 938, -143, 940, -142, -141, 943,
+		-140, 32767, 945, 946, 947, 948, 949, 950,
+		951, 952, 953, 954, 955, 956, 957, 958,
+		959, 960, 961, -65, -64, -63, -62, -61,
+		-60, -59, -58, 1169, -56, -55, -54, -53,
+		-52, -51, -50, -49, -48, -47, -46, -45,
+		-44, -43, -42, -41, -40, -39, -38, -37,
+		-36, 992, 993, 994, 885, 996, -473, -1526,
+		1191, -473, 2483, -473, 2484, 1004, 1005, 1006,
+		1007, 364, 1009, 1010, 1011, 1012, 1013, 1014,
+		842, 1016, 1017, 1018, 1019, 1020, 1021, 1022,
+		1023, 2250, 1025, 1026, 1027, 1028, 1029, 1030,
+		1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038,
+		1039, 1040, 1041, 1042, 1043, 1044, 1045, 842,
+		842, 1048, 842, 1050, 842, 842, 1053, 1054,
+		1055, 1056, 674, 1058, 1059, 1060, 3778, 1062,
+		842, -229, 842, -229, 842, 842, 828, 1070,
+		842, 842, 842, 1074, 1075, 1076, 1077, 1078,
+		1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086,
+		1087, 1088, 1089, 1090, 1091, 1478, 1093, 1094,
+		1095, 1096, 1097, 1098, 1099, 1263, 1101, 1102,
+		1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110,
+		1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118,
+		1119, 1120, 1121, 1122, 1123, 1370, 1300, -134,
+		1127, 1128, 1129, 1304, 1304, 1132, 1305, 1134,
+		1135, 1136, 1137, 1138, 1139, 91, 1141, 842,
+		842, 73, 1145, 75, 1147, 77, 78, 93,
+		1151, 1152, 1153, 1154, 94, 1156, 2210, -506,
+		1159, -1796, 1161, 1162, 1176, 1164, 1165, 1166,
+		1167, -988, 1169, 1170, 1171, 1172, 1173, 1174,
+		1175, 1105, 1177, 96, 1179, 97, 98, 1182,
+		99, 1184, 1185, 1186, 1187, 1188, 1189, 1190,
+		1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198,
+		1199, 1200, 0, 174, 175, 176, 177, 178,
+		179, 180, 181, 1408, 183, 184, 185, 186,
+		187, 188, 189, 190, 191, 192, 193, 194,
+		195, 196, 197, 198, 199, 200, 201, 202,
+		203, 0, 0, 206, 0, 208, 0, 0,
+		211, 212, 213, 214, -168, 216, 217, 218,
+		2936, 220, 0, -1071, 0, -1071, 0, 0,
+		-14, 228, 0, 0, 0, 232, 233, 234,
+		235, 236, 237, 238, 239, 240, 241, 242,
+		243, 244, 245, 246, 247, 248, 249, 636,
+		251, 252, 253, 254, 255, 256, 257, 421,
+		259, 260, 261, 262, 263, 264, 265, 266,
+		267, 268, 269, 270, 271, 272, 273, 274,
+		275, 276, 277, 278, 279, 280, 281, 528,
+		458, -976, 285, 286, 287, 462, 462, 290,
+		463, 292, 293, 294, 295, 296, 297, -751,
+		299, 0, 0, -769, 303, -767, 305, -765,
+		-764, -749, 309, 310, 311, 312, -748, 314,
+		1368, -1348, 317, -2638, 319, 320, 334, 322,
+		323, 324, 325, -1830, 327, 328, 329, 330,
+		331, 332, 333, 263, 335, -746, 337, -745,
+		-744, 340, -743, 342, 343, 344, 345, 346,
+		347, 348, 349, 350, 351, 352, 353, 354,
+		355, 356, 357, 358, 0, 0, 0, 1453,
+		0, 1126, 495, 495, 495, 495, 495, 233,
+		495, 1080, 1080, 1080, 495, 561, 1082, 495,
+		563, 1687, 495, 495, 495, 495, 385, 0,
+		0, 0, 0, 480, 0, 221, 221, 0,
+		489, 579, 0, 0, 0, 498, 626, 0,
+		1422, 1040, 1424, 631, 0, 0, 0, 0,
+		0, -262, 0, 585, 585, 585, 0, 66,
+		587, 0, 68, 1192, 0, 0, 0, 0,
+		0, 0, 32767, 32767, 32767, 32767, 669, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 670,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 142, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 115, 116, 117, 118, 119, 120,
+		121, 122, 123, 124, 125, 126, 127, 128,
+		129, 130, 131, 132, 133, 134, 135, 136,
+		137, 138, 139, 140, 141, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767,
+		32767, 32767, 32767, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 1027, 1027, 1027,
+		1027, 1027, 1027, 1027, 1027, -199, 1027, 1027,
+		1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027,
+		1027, 1027, 1027, 1027, 1027, 1027, 1027, 1027,
+		1027, 1027, 1027, 0, 0, 0, 110, 0,
+		1470, 2524, -192, 1473, -1482, 1475, -1481, 0,
+		0, 0, 0, 644, 0, 0, 0, 0,
+		0, 0, 173, 0, 0, 0, 0, 0,
+		0, 0, 0, -1226, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0,
+		0, 204, 205, 0, 207, 0, 209, 210,
+		0, 0, 0, 0, 383, 0, 0,
+	};
+
+	const unsigned char *k = (const unsigned char *) key;
+	size_t		keylen = 4;
+	uint32		a = 0;
+	uint32		b = 0;
+
+	while (keylen--)
+	{
+		unsigned char c = *k++;
+
+		a = a * 257 + c;
+		b = b * 17 + c;
+	}
+	return h[a % 2463] + h[b % 2463];
+}
+
+static const unicode_norm_info UnicodeNormInfo_NFC_QC = {
+	UnicodeNormProps_NFC_QC,
+	NFC_QC_hash_func,
+	1231
+};
+
 static const pg_unicode_normprops UnicodeNormProps_NFKC_QC[] = {
 	{0x00A0, UNICODE_NORM_QC_NO},
 	{0x00A8, UNICODE_NORM_QC_NO},
diff --git a/src/tools/PerfectHash.pm b/src/tools/PerfectHash.pm
index d6841589a3..26efa7ad70 100644
--- a/src/tools/PerfectHash.pm
+++ b/src/tools/PerfectHash.pm
@@ -124,7 +124,7 @@ sub generate_hash_function
 	$f .= sprintf "\tstatic const %s h[%d] = {\n", $elemtype, $nhash;
 	for (my $i = 0; $i < $nhash; $i++)
 	{
-		$f .= sprintf "%s%6d,%s",
+		$f .= sprintf "%s%d,%s",
 		  ($i % 8 == 0 ? "\t\t" : " "),
 		  $hashtab[$i],
 		  ($i % 8 == 7 ? "\n" : "");
-- 
2.22.0

