For the bootstrap data conversion, it was desirable for postgres.bki to remain unchanged, so some ugly quoting hacks were added to genbki.pl to match the quoting conventions in the DATA() lines. At this point, it's possible (and worthwhile I think) to remove those, and along the way simplify the tokenizing rules in bootscanner.l. This will result in some largish changes to postgres.bki, but they're easy to reason about and have no functional consequence. Make check passes.
Patch 0001 removes the special case rule that dashes, negative numbers, and octals remain unquoted, so handling these cases can now be removed from bootscanner.l as well. Change in postgres.bki: Dashes and negative numbers will now be quoted. Patch 0002 removes type- and attribute-specific ad-hoc quoting rules. Change in postgres.bki: Array-like types in pg_proc that only have one element will no longer be quoted. Currently, Catalog.pm, genbki.pl, and bootscanner.l all have different ideas on how to parse and format array types. Patch 0003 rips all that out and does it once and for all in Catalog.pm. Change in postgres.bki: Array types now look like '_foo'. -John Naylor
From 492f701535ccbb7a728a1c5b38bd4ec993908ae8 Mon Sep 17 00:00:00 2001 From: John Naylor <jcnay...@gmail.com> Date: Sun, 15 Apr 2018 22:59:44 +0700 Subject: [PATCH 1/3] Quote all non-word characters in postgres.bki This simplifies both genbki.pl and bootscanner.l. As a result, negative numbers and lone dashes are now double-quoted in postgres.bki. As of commit 4f85f664695, there are no longer octals in the data, but if they're added in the future, they will be double-quoted as well. --- src/backend/bootstrap/bootscanner.l | 3 +-- src/backend/catalog/genbki.pl | 7 +------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/backend/bootstrap/bootscanner.l b/src/backend/bootstrap/bootscanner.l index 2ce6e52..d9af1cb 100644 --- a/src/backend/bootstrap/bootscanner.l +++ b/src/backend/bootstrap/bootscanner.l @@ -66,8 +66,7 @@ static int yyline = 1; /* line number for error reporting */ D [0-9] -oct \\{D}{D}{D} -id ([A-Za-z0-9_]|{oct}|\-)+ +id [A-Za-z0-9_]+ sid \"([^\"])*\" arrayid [A-Za-z0-9_]+\[{D}*\] diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index a1894f8..c44f5f8 100644 --- a/src/backend/catalog/genbki.pl +++ b/src/backend/catalog/genbki.pl @@ -678,12 +678,7 @@ sub print_bki_insert or $atttype eq 'int2vector' or $atttype =~ /\[\]$/ - # Quote strings that have non-word characters. We make - # exceptions for values that are octals or negative numbers, - # for the same historical reason as above. - or ( $bki_value =~ /\W/ - and $bki_value !~ /^\\\d{3}$/ - and $bki_value !~ /^-\d*$/)); + or ($bki_value =~ /\W/)); # Contains non-word characters push @bki_values, $bki_value; } -- 2.7.4
From 6b06ff04f412319e3d86b090caf8f47da3430f00 Mon Sep 17 00:00:00 2001 From: John Naylor <jcnay...@gmail.com> Date: Tue, 17 Apr 2018 18:04:04 +0700 Subject: [PATCH] Remove historical hard-coded quoting rules This also allows us to skip checking for '_null_'. Since the previous commit caused all minus-signs to be quoted, removing oprname from the quoting rules is a no-op. In passing, remove check for whitespace, since it's redundant with the check for non-word characters. --- src/backend/catalog/genbki.pl | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index 25522fa..2d63907 100644 --- a/src/backend/catalog/genbki.pl +++ b/src/backend/catalog/genbki.pl @@ -667,19 +667,8 @@ sub print_bki_insert $bki_value = '' if $bki_value eq '\0'; $bki_value = sprintf(qq'"%s"', $bki_value) - if $bki_value ne '_null_' - and $bki_value !~ /^"[^"]+"$/ + if $bki_value !~ /^"[^"]+"$/ # Not already quoted and ( length($bki_value) == 0 # Empty string - or $bki_value =~ /\s/ # Contains whitespace - - # To preserve historical formatting, operator names are - # always quoted. Likewise for values of multi-element types, - # even if they only contain a single element. - or $attname eq 'oprname' - or $atttype eq 'oidvector' - or $atttype eq 'int2vector' - or $atttype =~ /\[\]$/ - or ($bki_value =~ /\W/)); # Contains non-word characters push @bki_values, $bki_value; -- 2.7.4
From 40da2f9b7f9781093b443a2ed02237509f224f83 Mon Sep 17 00:00:00 2001 From: John Naylor <jcnay...@gmail.com> Date: Mon, 16 Apr 2018 00:46:47 +0700 Subject: [PATCH] Remove special array handling from bootstrap.c Formerly, Catalog.pm turned a given C array type declaration in the catalog header files into a SQL type, i.e. 'foo[]'. Along the way, genbki.pl turned this into '_foo' for the purpose of type lookups, but wrote 'foo[]' to postgres.bki. During bootstrap, bootscanner.l had to have a special case rule to tokenize this, and then MapArrayTypeName() would turn 'foo[]' into '_foo' one more time. This seems pointless, so do the right thing in Catalog.pm and rip out all downstream array type munging logic. It also seems strange to single out array type names to check that the length is less than NAMEDATALEN, so this behavior is not kept. The SGML documentation is not updated, since it only mentions array types in the context of header files. --- src/backend/bootstrap/bootscanner.l | 6 ------ src/backend/bootstrap/bootstrap.c | 30 ------------------------------ src/backend/catalog/Catalog.pm | 8 ++++++-- src/backend/catalog/genbki.pl | 8 ++------ src/include/bootstrap/bootstrap.h | 2 -- 5 files changed, 8 insertions(+), 46 deletions(-) diff --git a/src/backend/bootstrap/bootscanner.l b/src/backend/bootstrap/bootscanner.l index d9af1cb..ab474e6 100644 --- a/src/backend/bootstrap/bootscanner.l +++ b/src/backend/bootstrap/bootscanner.l @@ -65,10 +65,8 @@ static int yyline = 1; /* line number for error reporting */ %option prefix="boot_yy" -D [0-9] id [A-Za-z0-9_]+ sid \"([^\"])*\" -arrayid [A-Za-z0-9_]+\[{D}*\] %% @@ -111,10 +109,6 @@ insert { return INSERT_TUPLE; } "NOT" { return XNOT; } "NULL" { return XNULL; } -{arrayid} { - yylval.str = MapArrayTypeName(yytext); - return ID; - } {id} { yylval.str = scanstr(yytext); return ID; diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 59cd4b1..a148bdc 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -1037,36 +1037,6 @@ AllocateAttribute(void) } /* - * MapArrayTypeName - * - * Given a type name, produce the corresponding array type name by prepending - * '_' and truncating as needed to fit in NAMEDATALEN-1 bytes. This is only - * used in bootstrap mode, so we can get away with assuming that the input is - * ASCII and we don't need multibyte-aware truncation. - * - * The given string normally ends with '[]' or '[digits]'; we discard that. - * - * The result is a palloc'd string. - */ -char * -MapArrayTypeName(const char *s) -{ - int i, - j; - char newStr[NAMEDATALEN]; - - newStr[0] = '_'; - j = 1; - for (i = 0; i < NAMEDATALEN - 2 && s[i] != '['; i++, j++) - newStr[j] = s[i]; - - newStr[j] = '\0'; - - return pstrdup(newStr); -} - - -/* * index_register() -- record an index that has been set up for building * later. * diff --git a/src/backend/catalog/Catalog.pm b/src/backend/catalog/Catalog.pm index c32ce2f..07057c1 100644 --- a/src/backend/catalog/Catalog.pm +++ b/src/backend/catalog/Catalog.pm @@ -161,10 +161,14 @@ sub ParseHeader { $atttype = $RENAME_ATTTYPE{$atttype}; } - if ($attname =~ /(.*)\[.*\]/) # array attribute + + # If the C name ends with '[]' or '[digits]', we have + # an array type, so we discard that from the name and + # prepend '_' to the type. + if ($attname =~ /(\w+)\[\d*\]/) { $attname = $1; - $atttype .= '[]'; + $atttype = '_' . $atttype; } $column{type} = $atttype; diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl index f937a37..f6adbe7 100644 --- a/src/backend/catalog/genbki.pl +++ b/src/backend/catalog/genbki.pl @@ -350,7 +350,7 @@ EOM # Replace OID synonyms with OIDs per the appropriate lookup rule. # - # If the column type is oidvector or oid[], we have to replace + # If the column type is oidvector or _oid, we have to replace # each element of the array as per the lookup rule. if ($column->{lookup}) { @@ -368,7 +368,7 @@ EOM \%bki_values, @lookupnames); $bki_values{$attname} = join(' ', @lookupoids); } - elsif ($atttype eq 'oid[]') + elsif ($atttype eq '_oid') { if ($bki_values{$attname} ne '_null_') { @@ -597,10 +597,6 @@ sub morph_row_for_pgattr $row->{attname} = $attname; - # Adjust type name for arrays: foo[] becomes _foo, so we can look it up in - # pg_type - $atttype = '_' . $1 if $atttype =~ /(.+)\[\]$/; - # Copy the type data from pg_type, and add some type-dependent items my $type = $types{$atttype}; diff --git a/src/include/bootstrap/bootstrap.h b/src/include/bootstrap/bootstrap.h index 4f41294..7856669 100644 --- a/src/include/bootstrap/bootstrap.h +++ b/src/include/bootstrap/bootstrap.h @@ -44,8 +44,6 @@ extern void InsertOneTuple(Oid objectid); extern void InsertOneValue(char *value, int i); extern void InsertOneNull(int i); -extern char *MapArrayTypeName(const char *s); - extern void index_register(Oid heap, Oid ind, IndexInfo *indexInfo); extern void build_indices(void); -- 2.7.4