On Mon, Apr 29, 2024 at 1:17 PM Corey Huinker <[email protected]> wrote:
>>
>> I've splitted it to7 patches.
>> each patch split one <sect1> into separate new files.
>
>
> Seems like a good start. Looking at the diffs of these, I wonder if we would
> be better off with a func/ directory, each function gets its own file in that
> dir, and either these files above include the individual files, or the
> original func.sgml just becomes the organizer of all the functions. That
> would allow us to do future reorganizations with minimal churn, make
> validation of this patch a bit more straightforward, and make it easier for
> future editors to find the function they need to edit.
looking back.
The patch is big. no convenient way to review/validate it.
so I created a python script to automate it.
we can review the python script.
(just googling around, I know little about python).
* create new files for holding the content.
func-string.sgml
func-matching.sgml
func-datetime.sgml
func-json.sgml
func-aggregate.sgml
func-info.sgml
func-admin.sgml
* locate parts that need to copy paste to a newly created file, based
on line number.
line number pattern is mentioned here:
http://postgr.es/m/CACJufxEcMjjn-m6fpC2wXHsQbE5nyd%3Dxt6k-jDizBVUKK6O4KQ%40mail.gmail.com
* insert placeholder string in func.sgml:
&func-string;
&func-matching;
&func-datetime;
&func-json;
&func-aggregate;
&func-info;
&func-admin;
* copy the parts to new files.
* validate newly created file. (must only have 2 occurrences of "sect1").
* delete the parts from func.sgml files, since they already copy to new files.
sed --in-place "2408,4180d ; 5330,7760d ; 8942,11127d ; 15502,19436d ;
21567,22985d ; 24346,28017d ; 28020,30714d " func.sgml
* manually change doc/src/sgml/filelist.sgml
<!ENTITY func SYSTEM "func.sgml">
+<!ENTITY func-string SYSTEM "func-string.sgml">
+<!ENTITY func-matching SYSTEM "func-matching.sgml">
+<!ENTITY func-datetime SYSTEM "func-datetime.sgml">
+<!ENTITY func-json SYSTEM "func-json.sgml">
+<!ENTITY func-aggregate SYSTEM "func-aggregate.sgml">
+<!ENTITY func-info SYSTEM "func-info.sgml">
+<!ENTITY func-admin SYSTEM "func-admin.sgml">
2 requirements.
1. manual change doc/src/sgml/filelist.sgml as mentioned before;
2. in python script, at line 35, i use
"os.chdir("/home/jian/Desktop/pg_src/src7/postgres/doc/src/sgml")"
you need to change to your "doc/src/sgml" directory accordingly.
import subprocess
import os
import re
func_string_place_holder="&func-string;\n"
func_matching_place_holder="&func-matching;\n"
func_datetime_place_holder="&func-datetime;\n"
func_json_place_holder="&func-json;\n"
func_aggregate_place_holder="&func-aggregate;\n"
func_info_place_holder="&func-info;\n"
func_admin_place_holder="&func-admin;\n"
func_string="func-string.sgml"
func_matching="func-matching.sgml"
func_datetime="func-datetime.sgml"
func_json="func-json.sgml"
func_aggregate="func-aggregate.sgml"
func_info="func-info.sgml"
func_admin="func-admin.sgml"
func_string_line_begin_lineno = -1
func_string_line_end_lineno = -1
func_matching_begin_lineno = -1
func_matching_end_lineno = -1
func_datetime_begin_lineno = -1
func_datetime_end_lineno = -1
func_json_begin_lineno = -1
func_json_end_lineno = -1
func_aggregate_begin = -1
func_aggregate_end = -1
func_info_begin_lineno = -1
func_info_end_lineno = -1
func_admin_begin_lineno = -1
func_admin_end_lineno = -1
os.chdir("/home/jian/Desktop/pg_src/src7/postgres/doc/src/sgml")
target_file="func.sgml"
subprocess.call(["touch", func_string])
subprocess.call(["touch", func_matching])
subprocess.call(["touch", func_datetime])
subprocess.call(["touch", func_json])
subprocess.call(["touch", func_aggregate])
subprocess.call(["touch", func_info])
subprocess.call(["touch", func_admin])
def printall():
print(f'func_string_line_begin_lineno:{func_string_line_begin_lineno}')
print(f'func_string_line_end_lineno:{func_string_line_end_lineno}')
print(f'func_matching_begin_lineno:{func_matching_begin_lineno}')
print(f'func_matching_end_lineno:{func_matching_end_lineno}')
print(f'func_datetime_begin_lineno:{func_datetime_begin_lineno}')
print(f'func_datetime_end_lineno:{func_datetime_end_lineno}')
print(f'func_json_begin_lineno:{func_json_begin_lineno}')
print(f'func_json_end_lineno:{func_json_end_lineno}')
print(f'func_aggregate_begin:{func_aggregate_begin}')
print(f'func_aggregate_end:{func_aggregate_end}')
print(f'func_info_begin_lineno:{func_info_begin_lineno}')
print(f'func_info_end_lineno:{func_info_end_lineno}')
print(f'func_admin_begin_lineno:{func_admin_begin_lineno}')
print(f'func_admin_end_lineno:{func_admin_end_lineno}')
def get_line_number(file_name: str):
global func_string_line_begin_lineno
global func_string_line_end_lineno
global func_matching_begin_lineno
global func_matching_end_lineno
global func_datetime_begin_lineno
global func_datetime_end_lineno
global func_json_begin_lineno
global func_json_end_lineno
global func_aggregate_begin
global func_aggregate_end
global func_info_begin_lineno
global func_info_end_lineno
global func_admin_begin_lineno
global func_admin_end_lineno
with open(file_name, 'r+') as f:
for i, line in enumerate(f, 1):
if r'<sect1 id="functions-string">' in line:
func_string_line_begin_lineno = i
elif r'<sect1 id="functions-binarystring">' in line:
func_string_line_end_lineno = i - 1
elif r'<sect1 id="functions-matching">' in line:
func_matching_begin_lineno = i
elif r'<sect1 id="functions-formatting">' in line:
func_matching_end_lineno = i - 1
elif r'<sect1 id="functions-datetime">' in line:
func_datetime_begin_lineno = i
elif r'<sect1 id="functions-enum">' in line:
func_datetime_end_lineno = i - 1
elif r'<sect1 id="functions-json">' in line:
func_json_begin_lineno = i
elif r'<sect1 id="functions-sequence">' in line:
func_json_end_lineno = i - 1
elif r'<sect1 id="functions-aggregate">' in line:
func_aggregate_begin = i
elif r'<sect1 id="functions-window">' in line:
func_aggregate_end = i - 1
elif r'<sect1 id="functions-info">' in line:
func_info_begin_lineno = i
elif r'<sect1 id="functions-admin">' in line:
func_admin_begin_lineno = i
func_info_end_lineno = i - 1
elif r'<sect1 id="functions-trigger">' in line:
func_admin_end_lineno = i - 1
#line number is important property. check line begin and line end
def precheck_line_info():
if ((func_string_line_begin_lineno < 0) or (func_string_line_end_lineno < 0) or
(func_matching_begin_lineno < 0) or (func_matching_end_lineno < 0) or
(func_datetime_begin_lineno < 0) or (func_datetime_end_lineno < 0) or
(func_json_begin_lineno < 0) or (func_json_end_lineno < 0) or
(func_aggregate_begin < 0) or (func_aggregate_end < 0) or
(func_info_begin_lineno < 0) or (func_info_end_lineno < 0) or
(func_admin_begin_lineno < 0) or (func_admin_end_lineno < 0)):
ValueError("don't have related file")
print("quiting")
quit()
#line number is important property.
def precheck_line_begining_info():
if ((func_string_line_begin_lineno < 0) or
(func_matching_begin_lineno < 0) or
(func_datetime_begin_lineno < 0) or
(func_json_begin_lineno < 0) or
(func_aggregate_begin < 0) or
(func_info_begin_lineno < 0) or
(func_admin_begin_lineno < 0) ):
ValueError("don't have related file")
print("quiting")
quit()
#validate new file
def validate_new_file(file_name: str, pattern: str):
matches_all = []
with open(file_name, 'r+') as f:
for i, line in enumerate(f, 1):
matches = re.findall(pattern, line)
if (len(matches) > 0):
matches_all = matches_all + matches
if (len(matches_all) != 2):
print(f'{file_name} should only have 2 \"{pattern}\" attribute')
quit()
#because &func-info, &func-admin section is nearby, we need special care to the func_info_end_lineno number.
def get_line_number_speical(file_name: str):
global func_info_begin_lineno
global func_info_end_lineno
with open(file_name, 'r+') as f:
for i, line in enumerate(f, 1):
if func_admin_place_holder in line:
func_info_end_lineno = i - 2
# insert content before line X
def write_line(file_name:str, line:int, content:str):
line -= 1 # Python starts counting at 0, but people start counting at one. This accounts for that.
with open(file_name, "r") as file: # Open the file in read mode
lines = file.readlines() # Assign the file as a list to a variable
lines[line] = lines[line] + content # concatenate the content
with open(file_name, "w") as file: # Open the file in write mode
file.write("".join(lines)) # Write the modified content to the file
#--------------------------step1. get the line number info and validate it.
get_line_number(target_file)
precheck_line_info()
#--------------------------step2. wrirte place_holderstring to it.
write_line(target_file,(func_string_line_begin_lineno -1), func_string_place_holder)
write_line(target_file,(func_matching_begin_lineno), func_matching_place_holder)
write_line(target_file,(func_datetime_begin_lineno +1), func_datetime_place_holder)
write_line(target_file,(func_json_begin_lineno +2), func_json_place_holder)
write_line(target_file,(func_aggregate_begin +3), func_aggregate_place_holder)
write_line(target_file,(func_info_begin_lineno +4), func_info_place_holder)
write_line(target_file,(func_admin_begin_lineno + 5), func_admin_place_holder)
#re-evaulate the line number again
get_line_number(target_file)
get_line_number_speical(target_file)
precheck_line_info()
#--------------------------step3 construct sed command and execute it
sed_command_string = f'sed -n {func_string_line_begin_lineno},{func_string_line_end_lineno}p {target_file} > {func_string}'
sed_command_matching = f'sed -n {func_matching_begin_lineno},{func_matching_end_lineno}p {target_file} > {func_matching}'
sed_command_datetime = f'sed -n {func_datetime_begin_lineno},{func_datetime_end_lineno}p {target_file} > {func_datetime}'
sed_command_json = f'sed -n {func_json_begin_lineno},{func_json_end_lineno}p {target_file} > {func_json}'
sed_command_aggregate = f'sed -n {func_aggregate_begin},{func_aggregate_end}p {target_file} > {func_aggregate}'
sed_command_func_info = f'sed -n {func_info_begin_lineno},{func_info_end_lineno}p {target_file} > {func_info}'
sed_command_func_admin = f'sed -n {func_admin_begin_lineno},{func_admin_end_lineno}p {target_file} > {func_admin}'
def print_sed_command():
print(sed_command_string)
print(sed_command_matching)
print(sed_command_datetime)
print(sed_command_json)
print(sed_command_aggregate)
print(sed_command_func_info)
print(sed_command_func_admin)
print_sed_command()
subprocess.call([sed_command_string], shell=True)
subprocess.call([sed_command_matching], shell=True)
subprocess.call([sed_command_datetime], shell=True)
subprocess.call([sed_command_json], shell=True)
subprocess.call([sed_command_aggregate], shell=True)
subprocess.call([sed_command_func_info], shell=True)
subprocess.call([sed_command_func_admin], shell=True)
#--------------------------step4 validate new file's content.
validate_new_file(func_string,"sect1")
validate_new_file(func_matching,"sect1")
validate_new_file(func_datetime,"sect1")
validate_new_file(func_json,"sect1")
validate_new_file(func_aggregate,"sect1")
validate_new_file(func_info,"sect1")
validate_new_file(func_admin,"sect1")
get_line_number(func_string)
get_line_number(func_matching)
get_line_number(func_datetime)
get_line_number(func_json)
get_line_number(func_aggregate)
get_line_number(func_info)
get_line_number(func_admin)
precheck_line_begining_info()
#--------------------------step5 validate new file
get_line_number(target_file)
get_line_number_speical(target_file)
precheck_line_info()
printall()
sed_in_place_delete = 'sed --in-place "{0},{1}d ; {2},{3}d ; {4},{5}d ; {6},{7}d ; {8},{9}d ; {10},{11}d ; {12},{13}d " '.format(
func_string_line_begin_lineno,
func_string_line_end_lineno,
func_matching_begin_lineno,
func_matching_end_lineno,
func_datetime_begin_lineno,
func_datetime_end_lineno,
func_json_begin_lineno,
func_json_end_lineno,
func_aggregate_begin,
func_aggregate_end,
func_info_begin_lineno,
func_info_end_lineno,
func_admin_begin_lineno,
func_admin_end_lineno,
)
sed_in_place_delete = sed_in_place_delete + target_file
print(sed_in_place_delete)
subprocess.call([sed_in_place_delete], shell=True)