commit f4408163156a468667ae24d3b7eb33b5b8346cf0 Author: Laslo Hunhold <d...@frign.de> AuthorDate: Sat Aug 27 02:09:10 2022 +0200 Commit: Laslo Hunhold <d...@frign.de> CommitDate: Sat Aug 27 02:09:10 2022 +0200
Add UNICODE_VERSION variable to Makefile and add to manual-templates This simplifies updating to new Unicode versions a bit, but will not be added to config.mk as changing between Unicode versions is not as simple as downloading new files. Apart from that, it is necessary to check all the different implemented algorithms for changes. Signed-off-by: Laslo Hunhold <d...@frign.de> diff --git a/Makefile b/Makefile index 9cdb406..f61e50e 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,8 @@ include config.mk +UNICODE_VERSION = 14.0.0 + BENCHMARK =\ benchmark/case\ benchmark/character\ @@ -81,46 +83,46 @@ MAN7 =\ all: data/LICENSE $(MAN3:=.3) $(MAN7:=.7) libgrapheme.a libgrapheme.so data/DerivedCoreProperties.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/DerivedCoreProperties.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/DerivedCoreProperties.txt data/EastAsianWidth.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/EastAsianWidth.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/EastAsianWidth.txt data/emoji-data.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/emoji/emoji-data.txt data/GraphemeBreakProperty.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakProperty.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakProperty.txt data/GraphemeBreakTest.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakTest.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt data/LICENSE: wget -O $@ https://www.unicode.org/license.txt data/LineBreak.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/LineBreak.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/LineBreak.txt data/LineBreakTest.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/LineBreakTest.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/LineBreakTest.txt data/SentenceBreakProperty.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakProperty.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/SentenceBreakProperty.txt data/SentenceBreakTest.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakTest.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/SentenceBreakTest.txt data/SpecialCasing.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/SpecialCasing.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/SpecialCasing.txt data/UnicodeData.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt data/WordBreakProperty.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakProperty.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/WordBreakProperty.txt data/WordBreakTest.txt: - wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakTest.txt + wget -O $@ https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/WordBreakTest.txt benchmark/case.o: benchmark/case.c config.mk gen/word-test.h grapheme.h benchmark/util.h benchmark/character.o: benchmark/character.c config.mk gen/character-test.h grapheme.h benchmark/util.h @@ -232,10 +234,10 @@ libgrapheme.so: $(SRC:=.o) $(CC) -o $@ $(SOFLAGS) $(LDFLAGS) $(SRC:=.o) $(MAN3:=.3): - SH=$(SH) $(SH) $(@:.3=.sh) > $@ + SH=$(SH) UNICODE_VERSION=$(UNICODE_VERSION) $(SH) $(@:.3=.sh) > $@ $(MAN7:=.7): - SH=$(SH) $(SH) $(@:.7=.sh) > $@ + SH=$(SH) UNICODE_VERSION=$(UNICODE_VERSION) $(SH) $(@:.7=.sh) > $@ benchmark: $(BENCHMARK) for m in $(BENCHMARK); do ./$$m; done diff --git a/man/libgrapheme.sh b/man/libgrapheme.sh index 040a659..37c2d7a 100644 --- a/man/libgrapheme.sh +++ b/man/libgrapheme.sh @@ -51,7 +51,7 @@ example illustrating the possible usage. .Xr grapheme_next_word_break_utf8 3 .Sh STANDARDS .Nm -is compliant with the Unicode 14.0.0 specification. +is compliant with the Unicode $UNICODE_VERSION specification. .Sh MOTIVATION The idea behind every character encoding scheme like ASCII or Unicode is to express abstract characters (which can be thought of as shapes diff --git a/man/template/to_case.sh b/man/template/to_case.sh index eb12764..30951bb 100644 --- a/man/template/to_case.sh +++ b/man/template/to_case.sh @@ -52,7 +52,7 @@ is .Xr libgrapheme 7 .Sh STANDARDS .Fn grapheme_to_$CASE -is compliant with the Unicode 14.0.0 specification. +is compliant with the Unicode $UNICODE_VERSION specification. .Sh AUTHORS .An Laslo Hunhold Aq Mt d...@frign.de EOF