On Thu, Jan 4, 2024 at 10:01 AM jian he <jian.universal...@gmail.com> wrote: > > I still cannot git apply your patch cleanly. in
I don't know why you're using that -- the git apply man page even says "Use git-am(1) to create commits from patches generated by git-format-patch(1) and/or received by email." Or, if that fails, use "patch". > http://cfbot.cputube.org/ i cannot find your patch. > ( so, it might be that I test based on incomplete information). > but only hashfn_unstable.h influences bench_hash/bench_hash.c. > > so I attached the whole patch that I had git applied, that is the > changes i applied for the following tests. Well, aside from the added text-editor detritus, it looks like this has everything except v11-0008, without which I still get improvement for the pgstat hash. > Model name: Intel(R) Core(TM) i5-14600K > The following is tested with another machine, also listed machine spec below. > I tested 3 times, the results is very similar as following: > select * from bench_cstring_hash_aligned(100000); 4705.686 ms > select * from bench_cstring_hash_unaligned(100000); 6835.753 ms > select * from bench_pgstat_hash(100000); 2678.978 ms > select * from bench_pgstat_hash_fh(100000); 6199.017 ms > select * from bench_string_hash(100000); 847.699 ms I was fully prepared to believe something like 32-bit Arm would have difficulty with 64-bit shifts/multiplies etc., but this makes no sense at all. In this test, on my machine, HEAD's pgstat_hash is 3x faster than HEAD's "strlen + hash_bytes", but for you it's 3x slower. To improve reproducibility, I've added the .sql files and a bench script to v13. I invite you to run bench_hash.sh and see if that changes anything. v13 also - adds an assert that aligned and unaligned C string calculations give the same result - properly mixes roleid in the namespace hash, since it's now convenient to do so (0005 is an alternate method) - removes the broken makefile from the benchmark (not for commit anyway)
From cf64f9a0603837dd89efdf1aa455395906e75ded Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Fri, 5 Jan 2024 17:21:53 +0700 Subject: [PATCH v13 5/6] WIP: a safer way to accumulate a single struct member into the hash state --- src/backend/catalog/namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index d1eae2a2d4..83fd57906c 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -258,7 +258,9 @@ spcachekey_hash(SearchPathCacheKey key) fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0); - fasthash_accum(&hs, (const char*) &key.roleid, sizeof(Oid)); + hs.accum = key.roleid; + fasthash_combine(&hs); + sp_len = fasthash_accum_cstring(&hs, key.searchPath); /* pass the length to tweak the final mix */ -- 2.43.0
From 3ff66ebbe9f27639984c726dbd4005002b2615b9 Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Sun, 24 Dec 2023 09:46:44 +0700 Subject: [PATCH v13 6/6] Add benchmarks for hashing --- bench_cstr_aligned.sql | 1 + bench_cstr_unaligned.sql | 1 + bench_pgstat_fh.sql | 2 + bench_pgstat_orig.sql | 1 + bench_string_hash.sql | 2 + contrib/bench_hash/aligned_keywords.h | 991 +++++++++++++++++++++++++ contrib/bench_hash/bench_hash--1.0.sql | 30 + contrib/bench_hash/bench_hash.c | 169 +++++ contrib/bench_hash/bench_hash.control | 5 + contrib/bench_hash/meson.build | 19 + contrib/meson.build | 1 + runbench.sh | 16 + 12 files changed, 1238 insertions(+) create mode 100644 bench_cstr_aligned.sql create mode 100644 bench_cstr_unaligned.sql create mode 100644 bench_pgstat_fh.sql create mode 100644 bench_pgstat_orig.sql create mode 100644 bench_string_hash.sql create mode 100644 contrib/bench_hash/aligned_keywords.h create mode 100644 contrib/bench_hash/bench_hash--1.0.sql create mode 100644 contrib/bench_hash/bench_hash.c create mode 100644 contrib/bench_hash/bench_hash.control create mode 100644 contrib/bench_hash/meson.build create mode 100755 runbench.sh diff --git a/bench_cstr_aligned.sql b/bench_cstr_aligned.sql new file mode 100644 index 0000000000..9ce6074fe2 --- /dev/null +++ b/bench_cstr_aligned.sql @@ -0,0 +1 @@ +select * from bench_cstring_hash_aligned(100000); diff --git a/bench_cstr_unaligned.sql b/bench_cstr_unaligned.sql new file mode 100644 index 0000000000..d654be3c07 --- /dev/null +++ b/bench_cstr_unaligned.sql @@ -0,0 +1 @@ +select * from bench_cstring_hash_unaligned(100000); diff --git a/bench_pgstat_fh.sql b/bench_pgstat_fh.sql new file mode 100644 index 0000000000..1130361c43 --- /dev/null +++ b/bench_pgstat_fh.sql @@ -0,0 +1,2 @@ +select * from bench_pgstat_hash_fh(100000); + diff --git a/bench_pgstat_orig.sql b/bench_pgstat_orig.sql new file mode 100644 index 0000000000..bd6d084fc2 --- /dev/null +++ b/bench_pgstat_orig.sql @@ -0,0 +1 @@ +select * from bench_pgstat_hash(100000); diff --git a/bench_string_hash.sql b/bench_string_hash.sql new file mode 100644 index 0000000000..fad5a41811 --- /dev/null +++ b/bench_string_hash.sql @@ -0,0 +1,2 @@ +select * from bench_string_hash(100000); + diff --git a/contrib/bench_hash/aligned_keywords.h b/contrib/bench_hash/aligned_keywords.h new file mode 100644 index 0000000000..c2bd67c856 --- /dev/null +++ b/contrib/bench_hash/aligned_keywords.h @@ -0,0 +1,991 @@ +/* created by copying from kwlist_d.h with this patch: + +--- a/src/tools/gen_keywordlist.pl ++++ b/src/tools/gen_keywordlist.pl +@@ -97,7 +97,9 @@ while (<$kif>) + { + if (/^PG_KEYWORD\("(\w+)"/) + { +- push @keywords, $1; ++ my $len = length($1) + 1; ++ my $aligned = $1 . "\\0" . "_" x ( ($len % 8) == 0 ? 0 : (8-($len % 8)) ); ++ push @keywords, $aligned; + } + } + +@@ -127,7 +129,7 @@ for my $i (0 .. $#keywords - 1) + # Emit the string containing all the keywords. + + printf $kwdef qq|static const char %s_kw_string[] =\n\t"|, $varname; +-print $kwdef join qq|\\0"\n\t"|, @keywords; ++print $kwdef join qq|"\n\t"|, @keywords; + print $kwdef qq|";\n\n|; + + # Emit an array of numerical offsets which will be used to index into the +@@ -145,7 +147,7 @@ foreach my $name (@keywords) + + # Calculate the cumulative offset of the next keyword, + # taking into account the null terminator. +- $offset += $this_length + 1; ++ $offset += $this_length -1; + + # Update max keyword length. + $max_len = $this_length if $max_len < $this_length; + +*/ + + +static const char aligned_words[] = + "abort\0__" + "absent\0_" + "absolute\0_______" + "access\0_" + "action\0_" + "add\0____" + "admin\0__" + "after\0__" + "aggregate\0______" + "all\0____" + "also\0___" + "alter\0__" + "always\0_" + "analyse\0" + "analyze\0" + "and\0____" + "any\0____" + "array\0__" + "as\0_____" + "asc\0____" + "asensitive\0_____" + "assertion\0______" + "assignment\0_____" + "asymmetric\0_____" + "at\0_____" + "atomic\0_" + "attach\0_" + "attribute\0______" + "authorization\0__" + "backward\0_______" + "before\0_" + "begin\0__" + "between\0" + "bigint\0_" + "binary\0_" + "bit\0____" + "boolean\0" + "both\0___" + "breadth\0" + "by\0_____" + "cache\0__" + "call\0___" + "called\0_" + "cascade\0" + "cascaded\0_______" + "case\0___" + "cast\0___" + "catalog\0" + "chain\0__" + "char\0___" + "character\0______" + "characteristics\0" + "check\0__" + "checkpoint\0_____" + "class\0__" + "close\0__" + "cluster\0" + "coalesce\0_______" + "collate\0" + "collation\0______" + "column\0_" + "columns\0" + "comment\0" + "comments\0_______" + "commit\0_" + "committed\0______" + "compression\0____" + "concurrently\0___" + "configuration\0__" + "conflict\0_______" + "connection\0_____" + "constraint\0_____" + "constraints\0____" + "content\0" + "continue\0_______" + "conversion\0_____" + "copy\0___" + "cost\0___" + "create\0_" + "cross\0__" + "csv\0____" + "cube\0___" + "current\0" + "current_catalog\0" + "current_date\0___" + "current_role\0___" + "current_schema\0_" + "current_time\0___" + "current_timestamp\0______" + "current_user\0___" + "cursor\0_" + "cycle\0__" + "data\0___" + "database\0_______" + "day\0____" + "deallocate\0_____" + "dec\0____" + "decimal\0" + "declare\0" + "default\0" + "defaults\0_______" + "deferrable\0_____" + "deferred\0_______" + "definer\0" + "delete\0_" + "delimiter\0______" + "delimiters\0_____" + "depends\0" + "depth\0__" + "desc\0___" + "detach\0_" + "dictionary\0_____" + "disable\0" + "discard\0" + "distinct\0_______" + "do\0_____" + "document\0_______" + "domain\0_" + "double\0_" + "drop\0___" + "each\0___" + "else\0___" + "enable\0_" + "encoding\0_______" + "encrypted\0______" + "end\0____" + "enum\0___" + "escape\0_" + "event\0__" + "except\0_" + "exclude\0" + "excluding\0______" + "exclusive\0______" + "execute\0" + "exists\0_" + "explain\0" + "expression\0_____" + "extension\0______" + "external\0_______" + "extract\0" + "false\0__" + "family\0_" + "fetch\0__" + "filter\0_" + "finalize\0_______" + "first\0__" + "float\0__" + "following\0______" + "for\0____" + "force\0__" + "foreign\0" + "format\0_" + "forward\0" + "freeze\0_" + "from\0___" + "full\0___" + "function\0_______" + "functions\0______" + "generated\0______" + "global\0_" + "grant\0__" + "granted\0" + "greatest\0_______" + "group\0__" + "grouping\0_______" + "groups\0_" + "handler\0" + "having\0_" + "header\0_" + "hold\0___" + "hour\0___" + "identity\0_______" + "if\0_____" + "ilike\0__" + "immediate\0______" + "immutable\0______" + "implicit\0_______" + "import\0_" + "in\0_____" + "include\0" + "including\0______" + "increment\0______" + "indent\0_" + "index\0__" + "indexes\0" + "inherit\0" + "inherits\0_______" + "initially\0______" + "inline\0_" + "inner\0__" + "inout\0__" + "input\0__" + "insensitive\0____" + "insert\0_" + "instead\0" + "int\0____" + "integer\0" + "intersect\0______" + "interval\0_______" + "into\0___" + "invoker\0" + "is\0_____" + "isnull\0_" + "isolation\0______" + "join\0___" + "json\0___" + "json_array\0_____" + "json_arrayagg\0__" + "json_object\0____" + "json_objectagg\0_" + "json_scalar\0____" + "json_serialize\0_" + "key\0____" + "keys\0___" + "label\0__" + "language\0_______" + "large\0__" + "last\0___" + "lateral\0" + "leading\0" + "leakproof\0______" + "least\0__" + "left\0___" + "level\0__" + "like\0___" + "limit\0__" + "listen\0_" + "load\0___" + "local\0__" + "localtime\0______" + "localtimestamp\0_" + "location\0_______" + "lock\0___" + "locked\0_" + "logged\0_" + "mapping\0" + "match\0__" + "matched\0" + "materialized\0___" + "maxvalue\0_______" + "merge\0__" + "method\0_" + "minute\0_" + "minvalue\0_______" + "mode\0___" + "month\0__" + "move\0___" + "name\0___" + "names\0__" + "national\0_______" + "natural\0" + "nchar\0__" + "new\0____" + "next\0___" + "nfc\0____" + "nfd\0____" + "nfkc\0___" + "nfkd\0___" + "no\0_____" + "none\0___" + "normalize\0______" + "normalized\0_____" + "not\0____" + "nothing\0" + "notify\0_" + "notnull\0" + "nowait\0_" + "null\0___" + "nullif\0_" + "nulls\0__" + "numeric\0" + "object\0_" + "of\0_____" + "off\0____" + "offset\0_" + "oids\0___" + "old\0____" + "on\0_____" + "only\0___" + "operator\0_______" + "option\0_" + "options\0" + "or\0_____" + "order\0__" + "ordinality\0_____" + "others\0_" + "out\0____" + "outer\0__" + "over\0___" + "overlaps\0_______" + "overlay\0" + "overriding\0_____" + "owned\0__" + "owner\0__" + "parallel\0_______" + "parameter\0______" + "parser\0_" + "partial\0" + "partition\0______" + "passing\0" + "password\0_______" + "placing\0" + "plans\0__" + "policy\0_" + "position\0_______" + "preceding\0______" + "precision\0______" + "prepare\0" + "prepared\0_______" + "preserve\0_______" + "primary\0" + "prior\0__" + "privileges\0_____" + "procedural\0_____" + "procedure\0______" + "procedures\0_____" + "program\0" + "publication\0____" + "quote\0__" + "range\0__" + "read\0___" + "real\0___" + "reassign\0_______" + "recheck\0" + "recursive\0______" + "ref\0____" + "references\0_____" + "referencing\0____" + "refresh\0" + "reindex\0" + "relative\0_______" + "release\0" + "rename\0_" + "repeatable\0_____" + "replace\0" + "replica\0" + "reset\0__" + "restart\0" + "restrict\0_______" + "return\0_" + "returning\0______" + "returns\0" + "revoke\0_" + "right\0__" + "role\0___" + "rollback\0_______" + "rollup\0_" + "routine\0" + "routines\0_______" + "row\0____" + "rows\0___" + "rule\0___" + "savepoint\0______" + "scalar\0_" + "schema\0_" + "schemas\0" + "scroll\0_" + "search\0_" + "second\0_" + "security\0_______" + "select\0_" + "sequence\0_______" + "sequences\0______" + "serializable\0___" + "server\0_" + "session\0" + "session_user\0___" + "set\0____" + "setof\0__" + "sets\0___" + "share\0__" + "show\0___" + "similar\0" + "simple\0_" + "skip\0___" + "smallint\0_______" + "snapshot\0_______" + "some\0___" + "sql\0____" + "stable\0_" + "standalone\0_____" + "start\0__" + "statement\0______" + "statistics\0_____" + "stdin\0__" + "stdout\0_" + "storage\0" + "stored\0_" + "strict\0_" + "strip\0__" + "subscription\0___" + "substring\0______" + "support\0" + "symmetric\0______" + "sysid\0__" + "system\0_" + "system_user\0____" + "table\0__" + "tables\0_" + "tablesample\0____" + "tablespace\0_____" + "temp\0___" + "template\0_______" + "temporary\0______" + "text\0___" + "then\0___" + "ties\0___" + "time\0___" + "timestamp\0______" + "to\0_____" + "trailing\0_______" + "transaction\0____" + "transform\0______" + "treat\0__" + "trigger\0" + "trim\0___" + "true\0___" + "truncate\0_______" + "trusted\0" + "type\0___" + "types\0__" + "uescape\0" + "unbounded\0______" + "uncommitted\0____" + "unencrypted\0____" + "union\0__" + "unique\0_" + "unknown\0" + "unlisten\0_______" + "unlogged\0_______" + "until\0__" + "update\0_" + "user\0___" + "using\0__" + "vacuum\0_" + "valid\0__" + "validate\0_______" + "validator\0______" + "value\0__" + "values\0_" + "varchar\0" + "variadic\0_______" + "varying\0" + "verbose\0" + "version\0" + "view\0___" + "views\0__" + "volatile\0_______" + "when\0___" + "where\0__" + "whitespace\0_____" + "window\0_" + "with\0___" + "within\0_" + "without\0" + "work\0___" + "wrapper\0" + "write\0__" + "xml\0____" + "xmlattributes\0__" + "xmlconcat\0______" + "xmlelement\0_____" + "xmlexists\0______" + "xmlforest\0______" + "xmlnamespaces\0__" + "xmlparse\0_______" + "xmlpi\0__" + "xmlroot\0" + "xmlserialize\0___" + "xmltable\0_______" + "year\0___" + "yes\0____" + "zone\0___"; + +static const uint16 word_offsets[] = { + 0, + 8, + 16, + 32, + 40, + 48, + 56, + 64, + 72, + 88, + 96, + 104, + 112, + 120, + 128, + 136, + 144, + 152, + 160, + 168, + 176, + 192, + 208, + 224, + 240, + 248, + 256, + 264, + 280, + 296, + 312, + 320, + 328, + 336, + 344, + 352, + 360, + 368, + 376, + 384, + 392, + 400, + 408, + 416, + 424, + 440, + 448, + 456, + 464, + 472, + 480, + 496, + 512, + 520, + 536, + 544, + 552, + 560, + 576, + 584, + 600, + 608, + 616, + 624, + 640, + 648, + 664, + 680, + 696, + 712, + 728, + 744, + 760, + 776, + 784, + 800, + 816, + 824, + 832, + 840, + 848, + 856, + 864, + 872, + 888, + 904, + 920, + 936, + 952, + 976, + 992, + 1000, + 1008, + 1016, + 1032, + 1040, + 1056, + 1064, + 1072, + 1080, + 1088, + 1104, + 1120, + 1136, + 1144, + 1152, + 1168, + 1184, + 1192, + 1200, + 1208, + 1216, + 1232, + 1240, + 1248, + 1264, + 1272, + 1288, + 1296, + 1304, + 1312, + 1320, + 1328, + 1336, + 1352, + 1368, + 1376, + 1384, + 1392, + 1400, + 1408, + 1416, + 1432, + 1448, + 1456, + 1464, + 1472, + 1488, + 1504, + 1520, + 1528, + 1536, + 1544, + 1552, + 1560, + 1576, + 1584, + 1592, + 1608, + 1616, + 1624, + 1632, + 1640, + 1648, + 1656, + 1664, + 1672, + 1688, + 1704, + 1720, + 1728, + 1736, + 1744, + 1760, + 1768, + 1784, + 1792, + 1800, + 1808, + 1816, + 1824, + 1832, + 1848, + 1856, + 1864, + 1880, + 1896, + 1912, + 1920, + 1928, + 1936, + 1952, + 1968, + 1976, + 1984, + 1992, + 2000, + 2016, + 2032, + 2040, + 2048, + 2056, + 2064, + 2080, + 2088, + 2096, + 2104, + 2112, + 2128, + 2144, + 2152, + 2160, + 2168, + 2176, + 2192, + 2200, + 2208, + 2224, + 2240, + 2256, + 2272, + 2288, + 2304, + 2312, + 2320, + 2328, + 2344, + 2352, + 2360, + 2368, + 2376, + 2392, + 2400, + 2408, + 2416, + 2424, + 2432, + 2440, + 2448, + 2456, + 2472, + 2488, + 2504, + 2512, + 2520, + 2528, + 2536, + 2544, + 2552, + 2568, + 2584, + 2592, + 2600, + 2608, + 2624, + 2632, + 2640, + 2648, + 2656, + 2664, + 2680, + 2688, + 2696, + 2704, + 2712, + 2720, + 2728, + 2736, + 2744, + 2752, + 2760, + 2776, + 2792, + 2800, + 2808, + 2816, + 2824, + 2832, + 2840, + 2848, + 2856, + 2864, + 2872, + 2880, + 2888, + 2896, + 2904, + 2912, + 2920, + 2928, + 2944, + 2952, + 2960, + 2968, + 2976, + 2992, + 3000, + 3008, + 3016, + 3024, + 3040, + 3048, + 3064, + 3072, + 3080, + 3096, + 3112, + 3120, + 3128, + 3144, + 3152, + 3168, + 3176, + 3184, + 3192, + 3208, + 3224, + 3240, + 3248, + 3264, + 3280, + 3288, + 3296, + 3312, + 3328, + 3344, + 3360, + 3368, + 3384, + 3392, + 3400, + 3408, + 3416, + 3432, + 3440, + 3456, + 3464, + 3480, + 3496, + 3504, + 3512, + 3528, + 3536, + 3544, + 3560, + 3568, + 3576, + 3584, + 3592, + 3608, + 3616, + 3632, + 3640, + 3648, + 3656, + 3664, + 3680, + 3688, + 3696, + 3712, + 3720, + 3728, + 3736, + 3752, + 3760, + 3768, + 3776, + 3784, + 3792, + 3800, + 3816, + 3824, + 3840, + 3856, + 3872, + 3880, + 3888, + 3904, + 3912, + 3920, + 3928, + 3936, + 3944, + 3952, + 3960, + 3968, + 3984, + 4000, + 4008, + 4016, + 4024, + 4040, + 4048, + 4064, + 4080, + 4088, + 4096, + 4104, + 4112, + 4120, + 4128, + 4144, + 4160, + 4168, + 4184, + 4192, + 4200, + 4216, + 4224, + 4232, + 4248, + 4264, + 4272, + 4288, + 4304, + 4312, + 4320, + 4328, + 4336, + 4352, + 4360, + 4376, + 4392, + 4408, + 4416, + 4424, + 4432, + 4440, + 4456, + 4464, + 4472, + 4480, + 4488, + 4504, + 4520, + 4536, + 4544, + 4552, + 4560, + 4576, + 4592, + 4600, + 4608, + 4616, + 4624, + 4632, + 4640, + 4656, + 4672, + 4680, + 4688, + 4696, + 4712, + 4720, + 4728, + 4736, + 4744, + 4752, + 4768, + 4776, + 4784, + 4800, + 4808, + 4816, + 4824, + 4832, + 4840, + 4848, + 4856, + 4864, + 4880, + 4896, + 4912, + 4928, + 4944, + 4960, + 4976, + 4984, + 4992, + 5008, + 5024, + 5032, + 5040, +}; + + +#define SCANKEYWORDS_NUM_KEYWORDS 473 + diff --git a/contrib/bench_hash/bench_hash--1.0.sql b/contrib/bench_hash/bench_hash--1.0.sql new file mode 100644 index 0000000000..43ce946bf6 --- /dev/null +++ b/contrib/bench_hash/bench_hash--1.0.sql @@ -0,0 +1,30 @@ +/* src/test/modules/bench_hash/bench_hash--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION bench_hash" to load this file. \quit + +CREATE FUNCTION bench_string_hash(int4) +RETURNS int +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + + +CREATE FUNCTION bench_cstring_hash_unaligned(int4) +RETURNS int +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FUNCTION bench_cstring_hash_aligned(int4) +RETURNS int +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FUNCTION bench_pgstat_hash(int4) +RETURNS int +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FUNCTION bench_pgstat_hash_FH(int4) +RETURNS int +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; diff --git a/contrib/bench_hash/bench_hash.c b/contrib/bench_hash/bench_hash.c new file mode 100644 index 0000000000..017cf333ce --- /dev/null +++ b/contrib/bench_hash/bench_hash.c @@ -0,0 +1,169 @@ +/*------------------------------------------------------------------------- + * + * bench_hash.c + * + * Copyright (c) 2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/bench_hash/bench_hash.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "fmgr.h" + +PG_MODULE_MAGIC; + +#include "aligned_keywords.h" + +#include "common/hashfn.h" +#include "common/hashfn_unstable.h" +#include "miscadmin.h" +#include "utils/memutils.h" +#include "utils/pgstat_internal.h" + + +PG_FUNCTION_INFO_V1(bench_string_hash); +Datum +bench_string_hash(PG_FUNCTION_ARGS) +{ + int32 count = PG_GETARG_INT32(0); + uint32 hash = 0; + + while (count-- > 0) + { + for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS; i++) + { + int idx = word_offsets[i]; + int s_len = strlen(&aligned_words[idx]); + hash += hash_bytes((const unsigned char *) &aligned_words[idx], s_len); + } + CHECK_FOR_INTERRUPTS(); + } + + PG_RETURN_INT32(hash); +} + + +PG_FUNCTION_INFO_V1(bench_cstring_hash_unaligned); +Datum +bench_cstring_hash_unaligned(PG_FUNCTION_ARGS) +{ + int32 count = PG_GETARG_INT32(0); + uint32 hash = 0; + + char* p = (char*) palloc(5048); + memcpy(p, aligned_words, 5048); + + while (count-- > 0) + { + for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS; i++) + { + int idx = word_offsets[i]; + int s_len; + fasthash_state hs; + + fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0); + s_len = fasthash_accum_cstring_unaligned(&hs, &p[idx]); + hash += fasthash_final32(&hs, s_len); + } + CHECK_FOR_INTERRUPTS(); + } + + PG_RETURN_INT32(hash); +} + + +PG_FUNCTION_INFO_V1(bench_cstring_hash_aligned); +Datum +bench_cstring_hash_aligned(PG_FUNCTION_ARGS) +{ + int32 count = PG_GETARG_INT32(0); + uint32 hash = 0; + + char* p = (char*) palloc(5048); + memcpy(p, aligned_words, 5048); + + while (count-- > 0) + { + for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS; i++) + { + int idx = word_offsets[i]; + int s_len; + fasthash_state hs; + + fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0); + s_len = fasthash_accum_cstring_aligned(&hs, &p[idx]); + hash += fasthash_final32(&hs, s_len); + } + CHECK_FOR_INTERRUPTS(); + } + + PG_RETURN_INT32(hash); +} + +static inline uint32 +pgstat_hash_hash_key_orig(const void *d, size_t size, void *arg) +{ + const PgStat_HashKey *key = (PgStat_HashKey *) d; + uint32 hash; + + Assert(size == sizeof(PgStat_HashKey) && arg == NULL); + + hash = murmurhash32(key->kind); + hash = hash_combine(hash, murmurhash32(key->dboid)); + hash = hash_combine(hash, murmurhash32(key->objoid)); + + return hash; +} + +static inline uint32 +pgstat_hash_hash_key_FH(const void *d, size_t size, void *arg) +{ + const PgStat_HashKey *key = (PgStat_HashKey *) d; + + Assert(size == sizeof(PgStat_HashKey) && arg == NULL); + + return fasthash32((const char *) key, size, 0); +} + +PG_FUNCTION_INFO_V1(bench_pgstat_hash); +Datum +bench_pgstat_hash(PG_FUNCTION_ARGS) +{ + int32 count = PG_GETARG_INT32(0); + uint32 hash = 0; + + while (count-- > 0) + { + for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS - 3; i++) + { + int idx = word_offsets[i]; + hash += pgstat_hash_hash_key_orig((const void *) &aligned_words[idx], sizeof(PgStat_HashKey), NULL); + } + CHECK_FOR_INTERRUPTS(); + } + + PG_RETURN_INT32(hash); +} + +PG_FUNCTION_INFO_V1(bench_pgstat_hash_fh); +Datum +bench_pgstat_hash_fh(PG_FUNCTION_ARGS) +{ + int32 count = PG_GETARG_INT32(0); + uint32 hash = 0; + + while (count-- > 0) + { + for (int i=0; i< SCANKEYWORDS_NUM_KEYWORDS - 3; i++) + { + int idx = word_offsets[i]; + hash += pgstat_hash_hash_key_FH((const void *) &aligned_words[idx], sizeof(PgStat_HashKey), NULL); + } + CHECK_FOR_INTERRUPTS(); + } + + PG_RETURN_INT32(hash); +} diff --git a/contrib/bench_hash/bench_hash.control b/contrib/bench_hash/bench_hash.control new file mode 100644 index 0000000000..ffc63858d2 --- /dev/null +++ b/contrib/bench_hash/bench_hash.control @@ -0,0 +1,5 @@ +# bench_hash extension +comment = 'benchmark some hash functions' +default_version = '1.0' +module_pathname = '$libdir/bench_hash' +relocatable = true diff --git a/contrib/bench_hash/meson.build b/contrib/bench_hash/meson.build new file mode 100644 index 0000000000..f8d88d8b5c --- /dev/null +++ b/contrib/bench_hash/meson.build @@ -0,0 +1,19 @@ +# Copyright (c) 2022-2023, PostgreSQL Global Development Group + +bench_hash_sources = files( + 'bench_hash.c', +) + +bench_hash = shared_module('bench_hash', + bench_hash_sources, +# link_with: pgport_srv, + kwargs: contrib_mod_args, +) +contrib_targets += bench_hash + +install_data( + 'bench_hash.control', + 'bench_hash--1.0.sql', + kwargs: contrib_data_args, +) + diff --git a/contrib/meson.build b/contrib/meson.build index c12dc906ca..1c16f0d0a8 100644 --- a/contrib/meson.build +++ b/contrib/meson.build @@ -12,6 +12,7 @@ contrib_doc_args = { 'install_dir': contrib_doc_dir, } +subdir('bench_hash') subdir('adminpack') subdir('amcheck') subdir('auth_delay') diff --git a/runbench.sh b/runbench.sh new file mode 100755 index 0000000000..dfad1afa7f --- /dev/null +++ b/runbench.sh @@ -0,0 +1,16 @@ +# create extension bench_hash; + +cat bench_string_hash.sql +pgbench -n -T 20 -f bench_string_hash.sql -M prepared | grep latency + +cat bench_cstr_unaligned.sql +pgbench -n -T 20 -f bench_cstr_unaligned.sql -M prepared | grep latency + +cat bench_cstr_aligned.sql +pgbench -n -T 20 -f bench_cstr_aligned.sql -M prepared | grep latency + +cat bench_pgstat_orig.sql +pgbench -n -T 20 -f bench_pgstat_orig.sql -M prepared | grep latency + +cat bench_pgstat_fh.sql +pgbench -n -T 20 -f bench_pgstat_fh.sql -M prepared | grep latency -- 2.43.0
From b9cbedf2ecda8d2929f3ec60f19532529259c7bb Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Mon, 18 Dec 2023 11:10:28 +0700 Subject: [PATCH v13 4/6] Use fasthash for the search path cache This serves to demonstrate the incremental API, allowing inlined hash calculation without a strlen call. This brings the general case performance closer to the optimization done in commit a86c61c9ee. Jeff Davis, with some adjustments by me Discussion: https://postgr.es/m/b40292c99e623defe5eadedab1d438cf51a4107c.camel%40j-davis.com --- src/backend/catalog/namespace.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c index eecc50a958..d1eae2a2d4 100644 --- a/src/backend/catalog/namespace.c +++ b/src/backend/catalog/namespace.c @@ -41,7 +41,7 @@ #include "catalog/pg_ts_template.h" #include "catalog/pg_type.h" #include "commands/dbcommands.h" -#include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "funcapi.h" #include "mb/pg_wchar.h" #include "miscadmin.h" @@ -253,11 +253,16 @@ static bool MatchNamedCall(HeapTuple proctup, int nargs, List *argnames, static inline uint32 spcachekey_hash(SearchPathCacheKey key) { - const unsigned char *bytes = (const unsigned char *) key.searchPath; - int blen = strlen(key.searchPath); + fasthash_state hs; + int sp_len; - return hash_combine(hash_bytes(bytes, blen), - hash_uint32(key.roleid)); + fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0); + + fasthash_accum(&hs, (const char*) &key.roleid, sizeof(Oid)); + sp_len = fasthash_accum_cstring(&hs, key.searchPath); + + /* pass the length to tweak the final mix */ + return fasthash_final32(&hs, sp_len); } static inline bool -- 2.43.0
From ada0dcec91474e2c89afd79e6c9c35eeae88d875 Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Sat, 9 Dec 2023 16:24:56 +0700 Subject: [PATCH v13 2/6] Use fasthash for pgstat_hash_hash_key Previously this called the 32-bit Murmur finalizer on the three elements, then joined with hash_combine(). Fasthash is simpler, executes faster and takes up less binary space. While the collision and bias behavior were almost certainly fine with the previous coding, now we have measurements to prove it. Discussion: --- src/include/utils/pgstat_internal.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 9862589f36..bbbb35bcd8 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -14,7 +14,7 @@ #define PGSTAT_INTERNAL_H -#include "common/hashfn.h" +#include "common/hashfn_unstable.h" #include "lib/dshash.h" #include "lib/ilist.h" #include "pgstat.h" @@ -777,15 +777,10 @@ static inline uint32 pgstat_hash_hash_key(const void *d, size_t size, void *arg) { const PgStat_HashKey *key = (PgStat_HashKey *) d; - uint32 hash; Assert(size == sizeof(PgStat_HashKey) && arg == NULL); - hash = murmurhash32(key->kind); - hash = hash_combine(hash, murmurhash32(key->dboid)); - hash = hash_combine(hash, murmurhash32(key->objoid)); - - return hash; + return fasthash32((const char *) key, size, 0); } /* -- 2.43.0
From 8117b3123216d8a3b2f2bf79804745b29733ddd1 Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Mon, 18 Dec 2023 11:10:28 +0700 Subject: [PATCH v13 3/6] Add optimized string hashing to hashfn_unstable.h Given an already-initialized hash state and a C-string, accumulate the hash of the string into the hash state and return the length for the caller to (optionally) save for the finalizer. This avoids a strlen call. If the string pointer is aligned, we can use a word- at-a-time algorithm both for NUL lookahead and for computing the remainder length up to the NUL. The latter was inspired by NetBSD's strlen(). The aligned case is only used on 64-bit platforms, since it's not worth the extra complexity for 32-bit. Jeff Davis and John Naylor Discussion: https://postgr.es/m/3820f030fd008ff14134b3e9ce5cc6dd623ed479.camel%40j-davis.com --- src/include/common/hashfn_unstable.h | 114 ++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 3 deletions(-) diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h index 5e882532d2..8d8952beb3 100644 --- a/src/include/common/hashfn_unstable.h +++ b/src/include/common/hashfn_unstable.h @@ -16,6 +16,9 @@ and may differ by hardware platform. #ifndef HASHFN_UNSTABLE_H #define HASHFN_UNSTABLE_H +#include "port/pg_bitutils.h" +#include "port/pg_bswap.h" + /* * fasthash is a modification of code taken from * https://code.google.com/archive/p/fast-hash/source/default/source @@ -57,8 +60,8 @@ and may differ by hardware platform. fasthash_state hs; fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0); -fasthash_accum(&hs, <pointer to a chunk of the input>, <chunk length, up to 8>); -return fasthash_final32(&hs, <final length>); +len = fasthash_accum_cstring(&hs, *str); +return fasthash_final32(&hs, len); */ @@ -66,11 +69,12 @@ return fasthash_final32(&hs, <final length>); typedef struct fasthash_state { uint64 accum; -#define FH_SIZEOF_ACCUM sizeof(uint64) uint64 hash; } fasthash_state; +#define FH_SIZEOF_ACCUM 8 +StaticAssertDecl(sizeof(((fasthash_state*) 0)->accum) == FH_SIZEOF_ACCUM, "wrong size for size macro"); #define FH_UNKNOWN_LENGTH 1 @@ -151,6 +155,110 @@ fasthash_accum(fasthash_state *hs, const char *k, int len) fasthash_combine(hs); } +/* From: https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord */ +#define haszero64(v) \ + (((v) - 0x0101010101010101UL) & ~(v) & 0x8080808080808080UL) + +/* + * With an aligned pointer, we consume the string a word at a time. Loading + * the word containing the NUL terminator cannot segfault since page boundaries + * are MAXALIGN'd. For that last word, only use bytes up to the NUL for the hash. + * The algorithm was adopted from NetBSD's strlen. + */ +static inline int +fasthash_accum_cstring_aligned(fasthash_state *hs, const char *str) +{ + const char *const start = str; + const char *buf = start; + int remainder; + uint64 zero_bytes; + + Assert(PointerIsAligned(start, uint64)); + for (;;) + { + uint64 chunk = *(uint64 *)buf; + + /* + * With little-endian representation, we can use this calculation, + * which sets bits in the first byte in the result word + * that corresponds to a zero byte in the original word. + * The rest of the bytes are indeterminate, so cannot be used + * on big-endian machines without either swapping or a bytewise check. + */ +#ifdef WORDS_BIGENDIAN + zero_bytes = haszero64(pg_bswap(chunk)); +#else + zero_bytes = haszero64(chunk); +#endif + if (zero_bytes) + break; + + hs->accum = chunk; + fasthash_combine(hs); + buf += FH_SIZEOF_ACCUM; + } + + /* + * Bytes with set bits will be 0x80, so + * calculate the first occurrence of a zero byte within the input word + * by counting the number of trailing (for LE) + * zeros and dividing the result by 8. + */ + remainder = pg_rightmost_one_pos64(zero_bytes) / BITS_PER_BYTE; + fasthash_accum(hs, buf, remainder); + buf += remainder; + + return buf - start; +} + +static inline int +fasthash_accum_cstring_unaligned(fasthash_state *hs, const char *str) +{ + const char *const start = str; + const char *buf = str; + + while (*buf) + { + int chunk_len = 0; + + while (chunk_len < FH_SIZEOF_ACCUM && buf[chunk_len] != '\0') + chunk_len++; + + fasthash_accum(hs, buf, chunk_len); + buf += chunk_len; + } + + return buf - start; +} + +/* + * Accumulate the input into the hash state + * and return the length of the string. + */ +static inline int +fasthash_accum_cstring(fasthash_state *hs, const char *str) +{ +#if SIZEOF_VOID_P >= 8 + + int len; +#ifdef USE_ASSERT_CHECKING + int len_check; + fasthash_state hs_check; + + memcpy(&hs_check, hs, sizeof(fasthash_state)); + len_check = fasthash_accum_cstring_unaligned(&hs_check, str); +#endif + if (PointerIsAligned(str, uint64)) + { + len = fasthash_accum_cstring_aligned(hs, str); + Assert(hs_check.hash == hs->hash && len_check == len); + return len; + } +#endif /* SIZEOF_VOID_P */ + + return fasthash_accum_cstring_unaligned(hs, str); +} + /* * The finalizer * -- 2.43.0
From a44af6d75684ff4d5d039041b2827be4716cd159 Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Mon, 27 Nov 2023 17:03:38 +0700 Subject: [PATCH v13 1/6] Add inlineable, incremental hash functions for in-memory use A number of places hash NUL-termminated strings. Currently, we need to call strlen first because hash_bytes needs the length. For short strings the C library call has a large overhead, and strlen calls show up prominently in profiles. Per suggestion from Andres Freund, add hash functions with an incremental interface. Instead of trying to whack around hash_bytes while maintaining its current behavior on all platforms, we base this work on fasthash (MIT licensed) which is simple, faster than hash_bytes for inputs over 12 bytes long, and also passes the hash function testing suite SMHasher. The original functions have been reimplemented using our new incremental interface to validate that this method will still give the same answer, provided we have the input length ahead of time. Future work will use these for some existing uses of simplehash and dynahash. The new functionality lives in a new header hashfn_unstable.h. The name implies we have the freedom to change things across versions that would be unacceptable for our other hash functions that are used for e.g. hash indexes and hash partitioning. As such, these should only be used for in-memory data structures like hash tables. There is also no guarantee of being endian-independent. Reviewed by Jeff Davis and (in an earlier version) Heikki Linnakangas Discussion: https://postgr.es/m/20231122223432.lywt4yz2bn7tlp27%40awork3.anarazel.de --- src/include/common/hashfn_unstable.h | 219 +++++++++++++++++++++++++++ src/tools/pgindent/typedefs.list | 1 + 2 files changed, 220 insertions(+) create mode 100644 src/include/common/hashfn_unstable.h diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h new file mode 100644 index 0000000000..5e882532d2 --- /dev/null +++ b/src/include/common/hashfn_unstable.h @@ -0,0 +1,219 @@ +/* +Building blocks for creating fast inlineable hash functions. The +unstable designation is in contrast to hashfn.h, which cannot break +compatibility because hashes can be writen to disk and so must produce +the same hashes between versions. + +The functions in this file are not guaranteed to be stable between versions, +and may differ by hardware platform. + + * + * Portions Copyright (c) 2018-2023, PostgreSQL Global Development Group + * + * src/include/common/hashfn_unstable.c + */ + +#ifndef HASHFN_UNSTABLE_H +#define HASHFN_UNSTABLE_H + +/* + * fasthash is a modification of code taken from + * https://code.google.com/archive/p/fast-hash/source/default/source + * under the terms of the MIT licencse. The original copyright + * notice follows: + */ + +/* The MIT License + + Copyright (C) 2012 Zilong Tan (eric.zl...@gmail.com) + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, copy, + modify, merge, publish, distribute, sublicense, and/or sell copies + of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* + * There are two interfaces available. Examples assume a 32-bit hash: + * + * 1) When the length is known ahead of time, use fasthash32(). + * 2) When the length is not known, use the incremental interface. To + * ensure good results, keep track of the length and pass it to the finalizer: + +fasthash_state hs; +fasthash_init(&hs, FH_UNKNOWN_LENGTH, 0); +fasthash_accum(&hs, <pointer to a chunk of the input>, <chunk length, up to 8>); +return fasthash_final32(&hs, <final length>); + +*/ + + +typedef struct fasthash_state +{ + uint64 accum; +#define FH_SIZEOF_ACCUM sizeof(uint64) + + uint64 hash; +} fasthash_state; + + +#define FH_UNKNOWN_LENGTH 1 + +/* + * Initialize the hash state. + * + * "len" is the length of the input, if known ahead of time. + * If that is not known, pass FH_UNKNOWN_LENGTH. + * "seed" can be zero. + */ +static inline void +fasthash_init(fasthash_state *hs, int len, uint64 seed) +{ + memset(hs, 0, sizeof(fasthash_state)); + hs->hash = seed ^ (len * 0x880355f21e6d1965); +} + +/* Both the finalizer and part of the combining step */ +static inline uint64 +fasthash_mix(uint64 h, uint64 tweak) +{ + h ^= (h >> 23) + tweak; + h *= 0x2127599bf4325c37; + h ^= h >> 47; + return h; +} + +static inline void +fasthash_combine(fasthash_state *hs) +{ + hs->hash ^= fasthash_mix(hs->accum, 0); + hs->hash *= 0x880355f21e6d1965; + + /* reset hash state for next input */ + hs->accum = 0; +} + +/* Accumulate up to 8 bytes of input and combine it into the hash */ +static inline void +fasthash_accum(fasthash_state *hs, const char *k, int len) +{ + uint32 lower_four; + + Assert(hs->accum == 0); + Assert(len <= FH_SIZEOF_ACCUM); + + switch (len) + { + case 8: + memcpy(&hs->accum, k, 8); + break; + case 7: + hs->accum |= (uint64) k[6] << 48; + /* FALLTHROUGH */ + case 6: + hs->accum |= (uint64) k[5] << 40; + /* FALLTHROUGH */ + case 5: + hs->accum |= (uint64) k[4] << 32; + /* FALLTHROUGH */ + case 4: + memcpy(&lower_four, k, sizeof(lower_four)); + hs->accum |= lower_four; + break; + case 3: + hs->accum |= (uint64) k[2] << 16; + /* FALLTHROUGH */ + case 2: + hs->accum |= (uint64) k[1] << 8; + /* FALLTHROUGH */ + case 1: + hs->accum |= (uint64) k[0]; + break; + case 0: + return; + } + + fasthash_combine(hs); +} + +/* + * The finalizer + * + * "tweak" is the input length when the caller doesn't know + * the length ahead of time, such as for NUL-terminated + * strings, otherwise zero. + */ +static inline uint64 +fasthash_final64(fasthash_state *hs, uint64 tweak) +{ + return fasthash_mix(hs->hash, tweak); +} + +/* + * Reduce a 64-bit hash to a 32-bit hash. + * + * This provides a bit more additional mixing compared to + * just taking the lower 32-bits. + */ +static inline uint32 +fasthash_reduce32(uint64 h) +{ + /* + * The following trick converts the 64-bit hashcode to Fermat residue, + * which shall retain information from both the higher and lower parts of + * hashcode. + */ + return h - (h >> 32); +} + +static inline uint32 +fasthash_final32(fasthash_state *hs, uint64 tweak) +{ + return fasthash_reduce32(fasthash_final64(hs, tweak)); +} + +/* + * The original fasthash64 function, re-implemented using + * the incremental interface. + */ +static inline uint64 +fasthash64(const char *k, int len, uint64 seed) +{ + fasthash_state hs; + + fasthash_init(&hs, len, seed); + + while (len >= FH_SIZEOF_ACCUM) + { + fasthash_accum(&hs, k, FH_SIZEOF_ACCUM); + k += FH_SIZEOF_ACCUM; + len -= FH_SIZEOF_ACCUM; + } + + fasthash_accum(&hs, k, len); + return fasthash_final64(&hs, 0); +} + +/* Like fasthash64, but returns a 32-bit hash */ +static inline uint64 +fasthash32(const char *k, int len, uint64 seed) +{ + return fasthash_reduce32(fasthash64(k, len, seed)); +} + +#endif /* HASHFN_UNSTABLE_H */ diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 5fd46b7bd1..eb2e6b6309 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3329,6 +3329,7 @@ exec_thread_arg execution_state explain_get_index_name_hook_type f_smgr +fasthash_state fd_set fe_scram_state fe_scram_state_enum -- 2.43.0