This is an automated email from the git hooks/post-receive script. rene pushed a commit to branch master in repository hunspell.
commit cadb20abfd2aad13ab00cdc39188d8fe84c3ca55 Author: Rene Engelhard <[email protected]> Date: Thu Apr 21 14:45:17 2016 +0200 Imported Upstream version 1.2.2~b --- BUGS | 2 - ChangeLog | 40 ++++++ NEWS | 13 ++ configure | 79 +++++------ configure.ac | 7 +- hunspell.pc.in | 2 +- po/hu.gmo | Bin 7563 -> 7564 bytes po/hu.po | 27 ++-- src/hunspell/Makefile.am | 14 +- src/hunspell/Makefile.in | 69 ++++----- src/hunspell/affentry.cxx | 10 +- src/hunspell/affixmgr.cxx | 165 +++++++++++----------- src/hunspell/affixmgr.hxx | 26 ++-- src/hunspell/csutil.cxx | 91 +++++++----- src/hunspell/csutil.hxx | 9 +- src/hunspell/filemgr.cxx | 38 +++++ src/hunspell/filemgr.hxx | 19 +++ src/hunspell/hashmgr.cxx | 91 ++++++------ src/hunspell/hashmgr.hxx | 11 +- src/hunspell/htypes.hxx | 5 +- src/hunspell/hunspell.cxx | 77 ++++++---- src/hunspell/hunspell.h | 10 +- src/hunspell/hunspell.hxx | 14 +- src/hunspell/hunzip.cxx | 191 +++++++++++++++++++++++++ src/hunspell/hunzip.hxx | 41 ++++++ src/hunspell/suggestmgr.cxx | 13 +- src/hunspell/suggestmgr.hxx | 2 +- src/parsers/Makefile.am | 2 +- src/parsers/Makefile.in | 4 +- src/tools/Makefile.am | 16 ++- src/tools/Makefile.in | 45 ++++-- src/tools/affixcompress | 183 ++++++++++++++++++++++++ src/tools/chmorph.cxx | 46 +++++- src/tools/example.cxx | 59 +++----- src/tools/hunspell.cxx | 128 +++++++++++------ src/tools/hunzip.cxx | 22 +++ src/tools/hzip.c | 281 +++++++++++++++++++++++++++++++++++++ src/win_api/hunspelldll.c | 10 +- tests/IJ.good | 2 + tests/Makefile.am | 1 + tests/Makefile.in | 1 + tests/suggestiontest/Makefile.orig | 2 +- 42 files changed, 1419 insertions(+), 449 deletions(-) diff --git a/BUGS b/BUGS index b18d135..98558e1 100644 --- a/BUGS +++ b/BUGS @@ -1,5 +1,3 @@ -* Problems with UTF-8 flag support on ARM platform (fail reputf and maputf test) - * Interactive interface has some visualisation problem with UTF-8 characters * -U, -u options doesn't support Unicode. diff --git a/ChangeLog b/ChangeLog index dc3c224..46d1473 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,43 @@ +2007-11-16 Németh László <nemeth at OOo>: + - ZWSP, REP, tabulator problems: need for the final release? + + * Bug 1851246 IDB_Winshell Visual C, hunspell.cxx, csutil.cxx, hunspelldll.c + + * Bug 1856572 Mark de Does C prototype problem + + * hunspell.pc: 1857450 wrong prefix in hunspell.pc.in + + * Bug 1857512, reported by Rene Engelhard, Mark de Does + + * csutil.cxx: Bug 1863239, notrailingcomma patch and optimization of + get_currect_cs() by Caolan McNamara. + + * csutil.cxx: patches for ARM platform, signed_chars.dpatch + by Rene Engelhard and arm_structure_alignment.dpatch by + Steinar H. Gunderson <[email protected]> + + * hunzip.*, hzip.c: new hzip compression format + + * tools/affixcompressor: affix compressor utility (similar to + munch, but it generates affix table automatically), works + with million-words dictionaries of agglutinative languages. + + * README: fix problems reported by Pham Ngoc Khanh. + + * hunspell.pc: reset numbering scheme: libhunspell-1.2 + + * csutil.cxx, suggestmgr: Warning-free in OOo builds + + * hentry??, csutil.cxx: fix protected memory problems with + stored pointers on several not x86 platforms. + + * fix iconv support on Solaris platform + + * tests/IJ.good: add missing test file + + * csutil.cxx: fix const char* related errors. Compiling bug + with Visual C++ reported by Ryan VanderMeulen and Ingo H. De Boer. + 2007-11-01 Németh László <nemeth at OOo>: * hunspell/*: new feature: morphological generation, also fix experimental morphological analysis and stemming. diff --git a/NEWS b/NEWS index befdaa0..a68e581 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,16 @@ +2008-01-16: Hunspell 1.2.2 release: + - multiple dictionary (dic file) support to use extra (medical, + geographical etc.) dictionaries + + - optional compressed dictionary format "hzip" for aff and dic files + + - support encrypted dictionaries for closed OpenOffice.org extensions or + other commercial programs + + - new affix compression tool "affixcompress" + + - bug fixes + 2007-11-01: Hunspell 1.2.1 release: - new memory efficient condition checking algorithm for affix rules diff --git a/configure b/configure index d8f741d..f2d6276 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.59 for hunspell 1.2.1. +# Generated by GNU Autoconf 2.59 for hunspell 1.2.2b. # # Report bugs to <[email protected]>. # @@ -423,8 +423,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='hunspell' PACKAGE_TARNAME='hunspell' -PACKAGE_VERSION='1.2.1' -PACKAGE_STRING='hunspell 1.2.1' +PACKAGE_VERSION='1.2.2b' +PACKAGE_STRING='hunspell 1.2.2b' PACKAGE_BUGREPORT='[email protected]' ac_unique_file="config.h.in" @@ -954,7 +954,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures hunspell 1.2.1 to adapt to many kinds of systems. +\`configure' configures hunspell 1.2.2b to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1021,7 +1021,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of hunspell 1.2.1:";; + short | recursive ) echo "Configuration of hunspell 1.2.2b:";; esac cat <<\_ACEOF @@ -1171,7 +1171,7 @@ fi test -n "$ac_init_help" && exit 0 if $ac_init_version; then cat <<\_ACEOF -hunspell configure 1.2.1 +hunspell configure 1.2.2b generated by GNU Autoconf 2.59 Copyright (C) 2003 Free Software Foundation, Inc. @@ -1185,7 +1185,7 @@ cat >&5 <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by hunspell $as_me 1.2.1, which was +It was created by hunspell $as_me 1.2.2b, which was generated by GNU Autoconf 2.59. Invocation command line was $ $0 $@ @@ -1629,9 +1629,6 @@ test -n "$target_alias" && test "$program_prefix$program_suffix$program_transform_name" = \ NONENONEs,x,x, && program_prefix=${target_alias}- -case "$host" in -arm*) XFAILED="flagutf8.test maputf.test";; -esac am__api_version="1.9" @@ -1916,7 +1913,7 @@ fi # Define the identity of the package. PACKAGE=hunspell - VERSION=1.2.1 + VERSION=1.2.2b cat >>confdefs.h <<_ACEOF @@ -4197,7 +4194,7 @@ ia64-*-hpux*) ;; *-*-irix6*) # Find out which ABI we are using. - echo '#line 4200 "configure"' > conftest.$ac_ext + echo '#line 4197 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -5332,7 +5329,7 @@ fi # Provide some information about the compiler. -echo "$as_me:5335:" \ +echo "$as_me:5332:" \ "checking for Fortran 77 compiler version" >&5 ac_compiler=`set X $ac_compile; echo $2` { (eval echo "$as_me:$LINENO: \"$ac_compiler --version </dev/null >&5\"") >&5 @@ -6395,11 +6392,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:6398: $lt_compile\"" >&5) + (eval echo "\"\$as_me:6395: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:6402: \$? = $ac_status" >&5 + echo "$as_me:6399: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -6663,11 +6660,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:6666: $lt_compile\"" >&5) + (eval echo "\"\$as_me:6663: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:6670: \$? = $ac_status" >&5 + echo "$as_me:6667: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -6767,11 +6764,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:6770: $lt_compile\"" >&5) + (eval echo "\"\$as_me:6767: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:6774: \$? = $ac_status" >&5 + echo "$as_me:6771: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -8236,7 +8233,7 @@ linux*) libsuff= case "$host_cpu" in x86_64*|s390x*|powerpc64*) - echo '#line 8239 "configure"' > conftest.$ac_ext + echo '#line 8236 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -9133,7 +9130,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 9136 "configure" +#line 9133 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -9233,7 +9230,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<EOF -#line 9236 "configure" +#line 9233 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11576,11 +11573,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:11579: $lt_compile\"" >&5) + (eval echo "\"\$as_me:11576: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:11583: \$? = $ac_status" >&5 + echo "$as_me:11580: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -11680,11 +11677,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:11683: $lt_compile\"" >&5) + (eval echo "\"\$as_me:11680: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:11687: \$? = $ac_status" >&5 + echo "$as_me:11684: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -12216,7 +12213,7 @@ linux*) libsuff= case "$host_cpu" in x86_64*|s390x*|powerpc64*) - echo '#line 12219 "configure"' > conftest.$ac_ext + echo '#line 12216 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -13274,11 +13271,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:13277: $lt_compile\"" >&5) + (eval echo "\"\$as_me:13274: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:13281: \$? = $ac_status" >&5 + echo "$as_me:13278: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -13378,11 +13375,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:13381: $lt_compile\"" >&5) + (eval echo "\"\$as_me:13378: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:13385: \$? = $ac_status" >&5 + echo "$as_me:13382: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -14827,7 +14824,7 @@ linux*) libsuff= case "$host_cpu" in x86_64*|s390x*|powerpc64*) - echo '#line 14830 "configure"' > conftest.$ac_ext + echo '#line 14827 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -15605,11 +15602,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:15608: $lt_compile\"" >&5) + (eval echo "\"\$as_me:15605: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:15612: \$? = $ac_status" >&5 + echo "$as_me:15609: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -15873,11 +15870,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:15876: $lt_compile\"" >&5) + (eval echo "\"\$as_me:15873: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:15880: \$? = $ac_status" >&5 + echo "$as_me:15877: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -15977,11 +15974,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:15980: $lt_compile\"" >&5) + (eval echo "\"\$as_me:15977: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:15984: \$? = $ac_status" >&5 + echo "$as_me:15981: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -17446,7 +17443,7 @@ linux*) libsuff= case "$host_cpu" in x86_64*|s390x*|powerpc64*) - echo '#line 17449 "configure"' > conftest.$ac_ext + echo '#line 17446 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -23884,7 +23881,7 @@ _ASBOX } >&5 cat >&5 <<_CSEOF -This file was extended by hunspell $as_me 1.2.1, which was +This file was extended by hunspell $as_me 1.2.2b, which was generated by GNU Autoconf 2.59. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -23947,7 +23944,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -hunspell config.status 1.2.1 +hunspell config.status 1.2.2b configured by $0, generated by GNU Autoconf 2.59, with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" diff --git a/configure.ac b/configure.ac index 61083a4..19ec498 100644 --- a/configure.ac +++ b/configure.ac @@ -4,15 +4,12 @@ m4_pattern_allow AC_PREREQ(2.59) -AC_INIT([hunspell],[1.2.1],[[email protected]]) +AC_INIT([hunspell],[1.2.2b],[[email protected]]) AC_CANONICAL_SYSTEM -case "$host" in -arm*) XFAILED="flagutf8.test maputf.test";; -esac AC_SUBST(XFAILED) -AM_INIT_AUTOMAKE(hunspell, 1.2.1) +AM_INIT_AUTOMAKE(hunspell, 1.2.2b) HUNSPELL_VERSION_MAJOR=`echo $VERSION | cut -d"." -f1` HUNSPELL_VERSION_MINOR=`echo $VERSION | cut -d"." -f2` AC_SUBST(HUNSPELL_VERSION_MAJOR) diff --git a/hunspell.pc.in b/hunspell.pc.in index 2b85618..b9f51a2 100644 --- a/hunspell.pc.in +++ b/hunspell.pc.in @@ -1,4 +1,4 @@ -prefix=/usr +prefix=@prefix@ exec_prefix=${prefix} libdir=${prefix}/lib includedir=${prefix}/include diff --git a/po/hu.gmo b/po/hu.gmo index a004fec..10d4fdb 100644 Binary files a/po/hu.gmo and b/po/hu.gmo differ diff --git a/po/hu.po b/po/hu.po index 6271e70..037f6a2 100644 --- a/po/hu.po +++ b/po/hu.po @@ -306,12 +306,12 @@ msgstr " -w\t\tki #, fuzzy, c-format msgid "Example: hunspell -d english file.txt # interactive spelling\n" msgstr "" -"P�lda: hunspell -d en_US f�jl.txt # interakt�v helyes�r�s-ellen�rz�s\n" +"P�ld�k: hunspell -d en_US f�jl.txt # interakt�v helyes�r�s-ellen�rz�s\n" #: src/hunspell/hunspell.cxx:1008 #, c-format msgid " hunspell -l file.txt # print misspelled words\n" -msgstr " hunspell -l f�jl.txt # ki�rja a hib�s szavakat\n" +msgstr " hunspell -l f�jl.txt # ki�rja a hib�s szavakat\n" #: src/hunspell/hunspell.cxx:1009 #, c-format @@ -329,7 +329,7 @@ msgstr "" #: src/hunspell/hunspell.cxx:1019 #, fuzzy, c-format msgid "Copyright (C) 2002-2005 Nemeth Laszlo. License: GNU LGPL.\n" -msgstr "Copyright (C) 2002-2007 N�meth L�szl�. Licenc: GNU LGPL.\n" +msgstr "Copyright (C) 2002-2008 N�meth L�szl�. Licenc: MPL/GPL/LGPL.\n" #: src/hunspell/hunspell.cxx:1021 #, c-format @@ -373,16 +373,21 @@ msgstr "Nem lehet megnyitni a ragoz msgid "Hunspell has been compiled without Ncurses user interface.\n" msgstr "A Hunspell Ncurses felhaszn�l�i fel�let n�lk�l lett ford�tva.\n" +#~ msgid " --check-url\tCheck URLs, e-mail addresses and directory paths\n" +#~ msgstr " --check-url\tURL-ek, lev�lc�mek �s �tvonalak ellen�rz�se\n" + +#~ msgid " hunspell -i utf-8 file.txt # check UTF-8 encoded file\n" +#~ msgstr "" +#~ " hunspell -i utf-8 f�jl.txt # UTF-8-as �llom�ny ellen�rz�se\n" + +#~ msgid " -P password\tset password for encrypted dictionaries\n" +#~ msgstr " -P jelsz�\tjelsz� megad�sa a titkos�tott sz�t�rakhoz\n" + +#~ msgid "Bug reports: http://hunspell.sourceforge.net\n" +#~ msgstr "Hibajelz�s: http://hunspell.sourceforge.net\n" + #~ msgid " -D\t\tshow detected path of the dictionary\n" #~ msgstr " -D\t\tki�rja a bet�lt�tt sz�t�r �tvonal�t\n" #~ msgid " -i enc\tinput encoding\n" #~ msgstr " -i k�d\tbemeneti karakterk�dol�s\n" - -#~ msgid " hunspell -i utf8 file.txt # check UTF-8 encoded file\n" -#~ msgstr "" -#~ " hunspell -i utf8 f�jl.txt # UTF-8 k�dol�s� �llom�nyt " -#~ "ellen�riz\n" - -#~ msgid "Bug reports: http://hunspell.sourceforge.net\n" -#~ msgstr "Hibabejelent�s: http://hunspell.sourceforge.net\n" diff --git a/src/hunspell/Makefile.am b/src/hunspell/Makefile.am index cefd66b..f83b0a9 100644 --- a/src/hunspell/Makefile.am +++ b/src/hunspell/Makefile.am @@ -1,15 +1,15 @@ -libhunspell_la_LDFLAGS = -version-info 1:1:0 +#libhunspell_la_LDFLAGS = -version-info x:x:x -lib_LTLIBRARIES = libhunspell.la -libhunspell_includedir = $(includedir)/hunspell -libhunspell_la_SOURCES=affentry.cxx affixmgr.cxx csutil.cxx \ +lib_LTLIBRARIES = libhunspell-1.2.la +libhunspell_1_2_includedir = $(includedir)/hunspell +libhunspell_1_2_la_SOURCES=affentry.cxx affixmgr.cxx csutil.cxx \ dictmgr.cxx hashmgr.cxx hunspell.cxx utf_info.cxx \ suggestmgr.cxx license.myspell license.hunspell \ - phonet.cxx + phonet.cxx filemgr.cxx hunzip.cxx -libhunspell_include_HEADERS=affentry.hxx htypes.hxx affixmgr.hxx \ +libhunspell_1_2_include_HEADERS=affentry.hxx htypes.hxx affixmgr.hxx \ csutil.hxx hunspell.hxx atypes.hxx dictmgr.hxx hunspell.h \ suggestmgr.hxx baseaffix.hxx hashmgr.hxx langnum.hxx \ - phonet.hxx + phonet.hxx filemgr.hxx hunzip.hxx EXTRA_DIST=hunspell.dsp makefile.mk README diff --git a/src/hunspell/Makefile.in b/src/hunspell/Makefile.in index a700fd8..8e6f837 100644 --- a/src/hunspell/Makefile.in +++ b/src/hunspell/Makefile.in @@ -14,6 +14,8 @@ @SET_MAKE@ +#libhunspell_la_LDFLAGS = -version-info x:x:x + srcdir = @srcdir@ top_srcdir = @top_srcdir@ @@ -39,7 +41,7 @@ build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ subdir = src/hunspell -DIST_COMMON = README $(libhunspell_include_HEADERS) \ +DIST_COMMON = README $(libhunspell_1_2_include_HEADERS) \ $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/codeset.m4 \ @@ -60,14 +62,14 @@ am__vpath_adj = case $$p in \ esac; am__strip_dir = `echo $$p | sed -e 's|^.*/||'`; am__installdirs = "$(DESTDIR)$(libdir)" \ - "$(DESTDIR)$(libhunspell_includedir)" + "$(DESTDIR)$(libhunspell_1_2_includedir)" libLTLIBRARIES_INSTALL = $(INSTALL) LTLIBRARIES = $(lib_LTLIBRARIES) -libhunspell_la_LIBADD = -am_libhunspell_la_OBJECTS = affentry.lo affixmgr.lo csutil.lo \ +libhunspell_1_2_la_LIBADD = +am_libhunspell_1_2_la_OBJECTS = affentry.lo affixmgr.lo csutil.lo \ dictmgr.lo hashmgr.lo hunspell.lo utf_info.lo suggestmgr.lo \ - phonet.lo -libhunspell_la_OBJECTS = $(am_libhunspell_la_OBJECTS) + phonet.lo filemgr.lo hunzip.lo +libhunspell_1_2_la_OBJECTS = $(am_libhunspell_1_2_la_OBJECTS) DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir) depcomp = $(SHELL) $(top_srcdir)/depcomp am__depfiles_maybe = depfiles @@ -79,10 +81,10 @@ LTCXXCOMPILE = $(LIBTOOL) --tag=CXX --mode=compile $(CXX) $(DEFS) \ CXXLD = $(CXX) CXXLINK = $(LIBTOOL) --tag=CXX --mode=link $(CXXLD) $(AM_CXXFLAGS) \ $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ -SOURCES = $(libhunspell_la_SOURCES) -DIST_SOURCES = $(libhunspell_la_SOURCES) -libhunspell_includeHEADERS_INSTALL = $(INSTALL_HEADER) -HEADERS = $(libhunspell_include_HEADERS) +SOURCES = $(libhunspell_1_2_la_SOURCES) +DIST_SOURCES = $(libhunspell_1_2_la_SOURCES) +libhunspell_1_2_includeHEADERS_INSTALL = $(INSTALL_HEADER) +HEADERS = $(libhunspell_1_2_include_HEADERS) ETAGS = etags CTAGS = ctags DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) @@ -215,18 +217,17 @@ target_alias = @target_alias@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ -libhunspell_la_LDFLAGS = -version-info 1:1:0 -lib_LTLIBRARIES = libhunspell.la -libhunspell_includedir = $(includedir)/hunspell -libhunspell_la_SOURCES = affentry.cxx affixmgr.cxx csutil.cxx \ +lib_LTLIBRARIES = libhunspell-1.2.la +libhunspell_1_2_includedir = $(includedir)/hunspell +libhunspell_1_2_la_SOURCES = affentry.cxx affixmgr.cxx csutil.cxx \ dictmgr.cxx hashmgr.cxx hunspell.cxx utf_info.cxx \ suggestmgr.cxx license.myspell license.hunspell \ - phonet.cxx + phonet.cxx filemgr.cxx hunzip.cxx -libhunspell_include_HEADERS = affentry.hxx htypes.hxx affixmgr.hxx \ +libhunspell_1_2_include_HEADERS = affentry.hxx htypes.hxx affixmgr.hxx \ csutil.hxx hunspell.hxx atypes.hxx dictmgr.hxx hunspell.h \ suggestmgr.hxx baseaffix.hxx hashmgr.hxx langnum.hxx \ - phonet.hxx + phonet.hxx filemgr.hxx hunzip.hxx EXTRA_DIST = hunspell.dsp makefile.mk README all: all-am @@ -289,8 +290,8 @@ clean-libLTLIBRARIES: echo "rm -f \"$${dir}/so_locations\""; \ rm -f "$${dir}/so_locations"; \ done -libhunspell.la: $(libhunspell_la_OBJECTS) $(libhunspell_la_DEPENDENCIES) - $(CXXLINK) -rpath $(libdir) $(libhunspell_la_LDFLAGS) $(libhunspell_la_OBJECTS) $(libhunspell_la_LIBADD) $(LIBS) +libhunspell-1.2.la: $(libhunspell_1_2_la_OBJECTS) $(libhunspell_1_2_la_DEPENDENCIES) + $(CXXLINK) -rpath $(libdir) $(libhunspell_1_2_la_LDFLAGS) $(libhunspell_1_2_la_OBJECTS) $(libhunspell_1_2_la_LIBADD) $(LIBS) mostlyclean-compile: -rm -f *.$(OBJEXT) @@ -302,8 +303,10 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/affixmgr.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/csutil.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dictmgr.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/filemgr.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hashmgr.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hunspell.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hunzip.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/phonet.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/suggestmgr.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/utf_info.Plo@am__quote@ @@ -338,22 +341,22 @@ clean-libtool: distclean-libtool: -rm -f libtool uninstall-info-am: -install-libhunspell_includeHEADERS: $(libhunspell_include_HEADERS) +install-libhunspell_1_2_includeHEADERS: $(libhunspell_1_2_include_HEADERS) @$(NORMAL_INSTALL) - test -z "$(libhunspell_includedir)" || $(mkdir_p) "$(DESTDIR)$(libhunspell_includedir)" - @list='$(libhunspell_include_HEADERS)'; for p in $$list; do \ + test -z "$(libhunspell_1_2_includedir)" || $(mkdir_p) "$(DESTDIR)$(libhunspell_1_2_includedir)" + @list='$(libhunspell_1_2_include_HEADERS)'; for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ f=$(am__strip_dir) \ - echo " $(libhunspell_includeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(libhunspell_includedir)/$$f'"; \ - $(libhunspell_includeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(libhunspell_includedir)/$$f"; \ + echo " $(libhunspell_1_2_includeHEADERS_INSTALL) '$$d$$p' '$(DESTDIR)$(libhunspell_1_2_includedir)/$$f'"; \ + $(libhunspell_1_2_includeHEADERS_INSTALL) "$$d$$p" "$(DESTDIR)$(libhunspell_1_2_includedir)/$$f"; \ done -uninstall-libhunspell_includeHEADERS: +uninstall-libhunspell_1_2_includeHEADERS: @$(NORMAL_UNINSTALL) - @list='$(libhunspell_include_HEADERS)'; for p in $$list; do \ + @list='$(libhunspell_1_2_include_HEADERS)'; for p in $$list; do \ f=$(am__strip_dir) \ - echo " rm -f '$(DESTDIR)$(libhunspell_includedir)/$$f'"; \ - rm -f "$(DESTDIR)$(libhunspell_includedir)/$$f"; \ + echo " rm -f '$(DESTDIR)$(libhunspell_1_2_includedir)/$$f'"; \ + rm -f "$(DESTDIR)$(libhunspell_1_2_includedir)/$$f"; \ done ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) @@ -435,7 +438,7 @@ check-am: all-am check: check-am all-am: Makefile $(LTLIBRARIES) $(HEADERS) installdirs: - for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libhunspell_includedir)"; do \ + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(libhunspell_1_2_includedir)"; do \ test -z "$$dir" || $(mkdir_p) "$$dir"; \ done install: install-am @@ -483,7 +486,7 @@ info: info-am info-am: -install-data-am: install-libhunspell_includeHEADERS +install-data-am: install-libhunspell_1_2_includeHEADERS install-exec-am: install-libLTLIBRARIES @@ -512,7 +515,7 @@ ps: ps-am ps-am: uninstall-am: uninstall-info-am uninstall-libLTLIBRARIES \ - uninstall-libhunspell_includeHEADERS + uninstall-libhunspell_1_2_includeHEADERS .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ clean-libLTLIBRARIES clean-libtool ctags distclean \ @@ -520,13 +523,13 @@ uninstall-am: uninstall-info-am uninstall-libLTLIBRARIES \ distclean-tags distdir dvi dvi-am html html-am info info-am \ install install-am install-data install-data-am install-exec \ install-exec-am install-info install-info-am \ - install-libLTLIBRARIES install-libhunspell_includeHEADERS \ + install-libLTLIBRARIES install-libhunspell_1_2_includeHEADERS \ install-man install-strip installcheck installcheck-am \ installdirs maintainer-clean maintainer-clean-generic \ mostlyclean mostlyclean-compile mostlyclean-generic \ mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \ uninstall-am uninstall-info-am uninstall-libLTLIBRARIES \ - uninstall-libhunspell_includeHEADERS + uninstall-libhunspell_1_2_includeHEADERS # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/src/hunspell/affentry.cxx b/src/hunspell/affentry.cxx index 0ffe557..fd737e0 100644 --- a/src/hunspell/affentry.cxx +++ b/src/hunspell/affentry.cxx @@ -470,14 +470,14 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) if (numconds == 0) return 1; char * p = c.conds; st--; - int c = 1; + int i = 1; while (1) { switch (*p) { case '\0': return 1; case '[': { p = nextchar(p); pos = st; break; } case '^': { p = nextchar(p); neg = true; break; } case ']': { if (!neg && !ingroup) return 0; - c++; + i++; pos = NULL; neg = false; ingroup = false; @@ -515,17 +515,17 @@ inline int SfxEntry::test_condition(const char * st, const char * beg) } if (pos && st != pos) { if (neg) return 0; - else if (c == numconds) return 1; + else if (i == numconds) return 1; ingroup = true; } if (p && *p != '\0') p = nextchar(p); } else if (pos) { if (neg) return 0; - else if (c == numconds) return 1; + else if (i == numconds) return 1; ingroup = true; } if (!pos) { - c++; + i++; st--; if (st < beg && p && *p != '\0') return 0; // word <= condition } diff --git a/src/hunspell/affixmgr.cxx b/src/hunspell/affixmgr.cxx index d3e36be..251d7ae 100644 --- a/src/hunspell/affixmgr.cxx +++ b/src/hunspell/affixmgr.cxx @@ -25,10 +25,12 @@ using namespace std; #endif #endif -AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr) +AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key) { // register hash manager and load affix data from aff file - pHMgr = ptr; + pHMgr = ptr[0]; + alldic = ptr; + maxdic = md; keystring = NULL; trystring = NULL; encoding=NULL; @@ -107,7 +109,7 @@ AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr) contclasses[j] = 0; } - if (parse_file(affpath)) { + if (parse_file(affpath, key)) { HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath); } @@ -244,14 +246,10 @@ AffixMgr::~AffixMgr() // read in aff file and build up prefix and suffix entry objects -int AffixMgr::parse_file(const char * affpath) +int AffixMgr::parse_file(const char * affpath, const char * key) { - - // io buffers - char line[MAXLNLEN+1]; - - // affix type - char ft; + char * line; // io buffers + char ft; // affix type // checking flag duplication char dupflags[CONTSIZE]; @@ -261,8 +259,7 @@ int AffixMgr::parse_file(const char * affpath) int firstline = 1; // open the affix file - FILE * afflst; - afflst = fopen(affpath,"r"); + FileMgr * afflst = new FileMgr(affpath, key); if (!afflst) { HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath); return 1; @@ -271,10 +268,9 @@ int AffixMgr::parse_file(const char * affpath) // step one is to parse the affix file building up the internal // affix data structures - // read in each line ignoring any that do not // start with a known line type indicator - while (fgets(line,MAXLNLEN,afflst)) { + while (line = afflst->getline()) { mychomp(line); /* remove byte order mark */ @@ -289,7 +285,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the keyboard string */ if (strncmp(line,"KEY",3) == 0) { if (parse_string(line, &keystring, "KEY")) { - fclose(afflst); + delete afflst; return 1; } } @@ -297,7 +293,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the try string */ if (strncmp(line,"TRY",3) == 0) { if (parse_string(line, &trystring, "TRY")) { - fclose(afflst); + delete afflst; return 1; } } @@ -305,7 +301,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the name of the character set used by the .dict and .aff */ if (strncmp(line,"SET",3) == 0) { if (parse_string(line, &encoding, "SET")) { - fclose(afflst); + delete afflst; return 1; } if (strcmp(encoding, "UTF-8") == 0) { @@ -325,7 +321,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by the controlled compound words */ if (strncmp(line,"COMPOUNDFLAG",12) == 0) { if (parse_flag(line, &compoundflag, "COMPOUNDFLAG")) { - fclose(afflst); + delete afflst; return 1; } } @@ -334,12 +330,12 @@ int AffixMgr::parse_file(const char * affpath) if (strncmp(line,"COMPOUNDBEGIN",13) == 0) { if (complexprefixes) { if (parse_flag(line, &compoundend, "COMPOUNDBEGIN")) { - fclose(afflst); + delete afflst; return 1; } } else { if (parse_flag(line, &compoundbegin, "COMPOUNDBEGIN")) { - fclose(afflst); + delete afflst; return 1; } } @@ -348,7 +344,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by compound words */ if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) { if (parse_flag(line, &compoundmiddle, "COMPOUNDMIDDLE")) { - fclose(afflst); + delete afflst; return 1; } } @@ -356,12 +352,12 @@ int AffixMgr::parse_file(const char * affpath) if (strncmp(line,"COMPOUNDEND",11) == 0) { if (complexprefixes) { if (parse_flag(line, &compoundbegin, "COMPOUNDEND")) { - fclose(afflst); + delete afflst; return 1; } } else { if (parse_flag(line, &compoundend, "COMPOUNDEND")) { - fclose(afflst); + delete afflst; return 1; } } @@ -370,7 +366,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the data used by compound_check() method */ if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) { if (parse_num(line, &cpdwordmax, "COMPOUNDWORDMAX")) { - fclose(afflst); + delete afflst; return 1; } } @@ -378,7 +374,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag sign compounds in dictionary */ if (strncmp(line,"COMPOUNDROOT",12) == 0) { if (parse_flag(line, &compoundroot, "COMPOUNDROOT")) { - fclose(afflst); + delete afflst; return 1; } } @@ -386,7 +382,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by compound_check() method */ if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) { if (parse_flag(line, &compoundpermitflag, "COMPOUNDPERMITFLAG")) { - fclose(afflst); + delete afflst; return 1; } } @@ -394,7 +390,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by compound_check() method */ if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) { if (parse_flag(line, &compoundforbidflag, "COMPOUNDFORBIDFLAG")) { - fclose(afflst); + delete afflst; return 1; } } @@ -417,7 +413,7 @@ int AffixMgr::parse_file(const char * affpath) if (strncmp(line,"NOSUGGEST",9) == 0) { if (parse_flag(line, &nosuggest, "NOSUGGEST")) { - fclose(afflst); + delete afflst; return 1; } } @@ -425,7 +421,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by forbidden words */ if (strncmp(line,"FORBIDDENWORD",13) == 0) { if (parse_flag(line, &forbiddenword, "FORBIDDENWORD")) { - fclose(afflst); + delete afflst; return 1; } } @@ -433,7 +429,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by forbidden words */ if (strncmp(line,"LEMMA_PRESENT",13) == 0) { if (parse_flag(line, &lemma_present, "LEMMA_PRESENT")) { - fclose(afflst); + delete afflst; return 1; } } @@ -441,7 +437,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by circumfixes */ if (strncmp(line,"CIRCUMFIX",9) == 0) { if (parse_flag(line, &circumfix, "CIRCUMFIX")) { - fclose(afflst); + delete afflst; return 1; } } @@ -449,7 +445,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by fogemorphemes */ if (strncmp(line,"ONLYINCOMPOUND",14) == 0) { if (parse_flag(line, &onlyincompound, "ONLYINCOMPOUND")) { - fclose(afflst); + delete afflst; return 1; } } @@ -457,7 +453,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by `needaffixs' */ if (strncmp(line,"PSEUDOROOT",10) == 0) { if (parse_flag(line, &needaffix, "PSEUDOROOT")) { - fclose(afflst); + delete afflst; return 1; } } @@ -465,7 +461,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by `needaffixs' */ if (strncmp(line,"NEEDAFFIX",9) == 0) { if (parse_flag(line, &needaffix, "NEEDAFFIX")) { - fclose(afflst); + delete afflst; return 1; } } @@ -473,7 +469,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the minimal length for words in compounds */ if (strncmp(line,"COMPOUNDMIN",11) == 0) { if (parse_num(line, &cpdmin, "COMPOUNDMIN")) { - fclose(afflst); + delete afflst; return 1; } if (cpdmin < 1) cpdmin = 1; @@ -482,7 +478,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the max. words and syllables in compounds */ if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) { if (parse_cpdsyllable(line)) { - fclose(afflst); + delete afflst; return 1; } } @@ -490,7 +486,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by compound_check() method */ if (strncmp(line,"SYLLABLENUM",11) == 0) { if (parse_string(line, &cpdsyllablenum, "SYLLABLENUM")) { - fclose(afflst); + delete afflst; return 1; } } @@ -503,7 +499,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the extra word characters */ if (strncmp(line,"WORDCHARS",9) == 0) { if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, "WORDCHARS", utf8)) { - fclose(afflst); + delete afflst; return 1; } } @@ -511,7 +507,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the ignored characters (for example, Arabic optional diacretics charachters */ if (strncmp(line,"IGNORE",6) == 0) { if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, "IGNORE", utf8)) { - fclose(afflst); + delete afflst; return 1; } } @@ -519,7 +515,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the typical fault correcting table */ if (strncmp(line,"REP",3) == 0) { if (parse_reptable(line, afflst)) { - fclose(afflst); + delete afflst; return 1; } } @@ -527,7 +523,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the phonetic translation table */ if (strncmp(line,"PHONE",5) == 0) { if (parse_phonetable(line, afflst)) { - fclose(afflst); + delete afflst; return 1; } } @@ -535,7 +531,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the checkcompoundpattern table */ if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) { if (parse_checkcpdtable(line, afflst)) { - fclose(afflst); + delete afflst; return 1; } } @@ -543,7 +539,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the defcompound table */ if (strncmp(line,"COMPOUNDRULE",12) == 0) { if (parse_defcpdtable(line, afflst)) { - fclose(afflst); + delete afflst; return 1; } } @@ -551,7 +547,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the related character map table */ if (strncmp(line,"MAP",3) == 0) { if (parse_maptable(line, afflst)) { - fclose(afflst); + delete afflst; return 1; } } @@ -559,7 +555,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the word breakpoints table */ if (strncmp(line,"BREAK",5) == 0) { if (parse_breaktable(line, afflst)) { - fclose(afflst); + delete afflst; return 1; } } @@ -567,7 +563,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the language for language specific codes */ if (strncmp(line,"LANG",4) == 0) { if (parse_string(line, &lang, "LANG")) { - fclose(afflst); + delete afflst; return 1; } langnum = get_lang_num(lang); @@ -575,14 +571,14 @@ int AffixMgr::parse_file(const char * affpath) if (strncmp(line,"VERSION",7) == 0) { if (parse_string(line, &version, "VERSION")) { - fclose(afflst); + delete afflst; return 1; } } if (strncmp(line,"MAXNGRAMSUGS",12) == 0) { if (parse_num(line, &maxngramsugs, "MAXNGRAMSUGS")) { - fclose(afflst); + delete afflst; return 1; } } @@ -598,7 +594,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by forbidden words */ if (strncmp(line,"KEEPCASE",8) == 0) { if (parse_flag(line, &keepcase, "KEEPCASE")) { - fclose(afflst); + delete afflst; return 1; } } @@ -606,7 +602,7 @@ int AffixMgr::parse_file(const char * affpath) /* parse in the flag used by the affix generator */ if (strncmp(line,"SUBSTANDARD",11) == 0) { if (parse_flag(line, &substandard, "SUBSTANDARD")) { - fclose(afflst); + delete afflst; return 1; } } @@ -625,7 +621,7 @@ int AffixMgr::parse_file(const char * affpath) dupflags_ini = 0; } if (parse_affix(line, ft, afflst, dupflags)) { - fclose(afflst); + delete afflst; process_pfx_tree_to_list(); process_sfx_tree_to_list(); return 1; @@ -633,7 +629,7 @@ int AffixMgr::parse_file(const char * affpath) } } - fclose(afflst); + delete afflst; // convert affix trees to sorted list process_pfx_tree_to_list(); @@ -2731,7 +2727,7 @@ char * AffixMgr::morphgen(char * ts, int wl, const unsigned short * ap, if (cmp == 0) { char * newword = sptr->add(ts, wl); if (newword) { - hentry * check = pHMgr->lookup(newword); + hentry * check = pHMgr->lookup(newword); // XXX extra dic if (!check || !check->astr || !TESTAFF(check->astr, forbiddenword, check->alen)) { return newword; @@ -2767,7 +2763,7 @@ char * AffixMgr::morphgen(char * ts, int wl, const unsigned short * ap, int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts, int wl, const unsigned short * ap, unsigned short al, char * bad, int badl, - char * phone) + char * phon) { int nh=0; // first add root word to list @@ -2778,8 +2774,8 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts wlst[nh].orig = NULL; nh++; // add special phonetic version - if (phone && (nh < maxn)) { - wlst[nh].word = mystrdup(phone); + if (phon && (nh < maxn)) { + wlst[nh].word = mystrdup(phon); wlst[nh].allow = (1 == 0); wlst[nh].orig = mystrdup(ts); nh++; @@ -2809,11 +2805,11 @@ int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts wlst[nh].orig = NULL; nh++; // add special phonetic version - if (phone && (nh < maxn)) { + if (phon && (nh < maxn)) { char st[MAXWORDUTF8LEN]; - strcpy(st, phone); + strcpy(st, phon); strcat(st, sptr->getKey()); - reverseword(st + strlen(phone)); + reverseword(st + strlen(phon)); wlst[nh].word = mystrdup(st); wlst[nh].allow = (1 == 0); wlst[nh].orig = mystrdup(newword); @@ -3097,8 +3093,12 @@ FLAG AffixMgr::get_lemma_present() // utility method to look up root words in hash table struct hentry * AffixMgr::lookup(const char * word) { - if (! pHMgr) return NULL; - return pHMgr->lookup(word); + int i; + struct hentry * he = NULL; + for (i = 0; i < *maxdic && !he; i++) { + he = (alldic[i])->lookup(word); + } + return he; } // return the value of suffix @@ -3203,7 +3203,7 @@ int AffixMgr::parse_cpdsyllable(char * line) } /* parse in the typical fault correcting table */ -int AffixMgr::parse_reptable(char * line, FILE * af) +int AffixMgr::parse_reptable(char * line, FileMgr * af) { if (numrep != 0) { HUNSPELL_WARNING(stderr, "error: duplicate REP tables used\n"); @@ -3243,9 +3243,9 @@ int AffixMgr::parse_reptable(char * line, FILE * af) } /* now parse the numrep lines to read in the remainder of the table */ - char * nl = line; + char * nl; for (int j=0; j < numrep; j++) { - if (!fgets(nl,MAXLNLEN,af)) return 1; + if (!(nl = af->getline())) return 1; mychomp(nl); tp = nl; i = 0; @@ -3283,7 +3283,7 @@ int AffixMgr::parse_reptable(char * line, FILE * af) } /* parse in the typical fault correcting table */ -int AffixMgr::parse_phonetable(char * line, FILE * af) +int AffixMgr::parse_phonetable(char * line, FileMgr * af) { if (phone) { HUNSPELL_WARNING(stderr, "error: duplicate PHONE tables used\n"); @@ -3327,9 +3327,9 @@ int AffixMgr::parse_phonetable(char * line, FILE * af) } /* now parse the phone->num lines to read in the remainder of the table */ - char * nl = line; + char * nl; for (int j=0; j < phone->num; j++) { - if (!fgets(nl,MAXLNLEN,af)) return 1; + if (!(nl = af->getline())) return 1; mychomp(nl); tp = nl; i = 0; @@ -3370,7 +3370,7 @@ int AffixMgr::parse_phonetable(char * line, FILE * af) } /* parse in the checkcompoundpattern table */ -int AffixMgr::parse_checkcpdtable(char * line, FILE * af) +int AffixMgr::parse_checkcpdtable(char * line, FileMgr * af) { if (numcheckcpd != 0) { HUNSPELL_WARNING(stderr, "error: duplicate compound pattern tables used\n"); @@ -3410,9 +3410,9 @@ int AffixMgr::parse_checkcpdtable(char * line, FILE * af) } /* now parse the numcheckcpd lines to read in the remainder of the table */ - char * nl = line; + char * nl; for (int j=0; j < numcheckcpd; j++) { - if (!fgets(nl,MAXLNLEN,af)) return 1; + if (!(nl = af->getline())) return 1; mychomp(nl); tp = nl; i = 0; @@ -3450,7 +3450,7 @@ int AffixMgr::parse_checkcpdtable(char * line, FILE * af) } /* parse in the compound rule table */ -int AffixMgr::parse_defcpdtable(char * line, FILE * af) +int AffixMgr::parse_defcpdtable(char * line, FileMgr * af) { if (numdefcpd != 0) { HUNSPELL_WARNING(stderr, "error: duplicate compound rule tables used\n"); @@ -3490,9 +3490,9 @@ int AffixMgr::parse_defcpdtable(char * line, FILE * af) } /* now parse the numdefcpd lines to read in the remainder of the table */ - char * nl = line; + char * nl; for (int j=0; j < numdefcpd; j++) { - if (!fgets(nl,MAXLNLEN,af)) return 1; + if (!(nl = af->getline())) return 1; mychomp(nl); tp = nl; i = 0; @@ -3533,7 +3533,7 @@ int AffixMgr::parse_defcpdtable(char * line, FILE * af) /* parse in the character map table */ -int AffixMgr::parse_maptable(char * line, FILE * af) +int AffixMgr::parse_maptable(char * line, FileMgr * af) { if (nummap != 0) { HUNSPELL_WARNING(stderr, "error: duplicate MAP tables used\n"); @@ -3573,9 +3573,9 @@ int AffixMgr::parse_maptable(char * line, FILE * af) } /* now parse the nummap lines to read in the remainder of the table */ - char * nl = line; + char * nl; for (int j=0; j < nummap; j++) { - if (!fgets(nl,MAXLNLEN,af)) return 1; + if (!(nl = af->getline())) return 1; mychomp(nl); tp = nl; i = 0; @@ -3630,7 +3630,7 @@ int AffixMgr::parse_maptable(char * line, FILE * af) } /* parse in the word breakpoint table */ -int AffixMgr::parse_breaktable(char * line, FILE * af) +int AffixMgr::parse_breaktable(char * line, FileMgr * af) { if (numbreak != 0) { HUNSPELL_WARNING(stderr, "error: duplicate word breakpoint tables used\n"); @@ -3670,9 +3670,9 @@ int AffixMgr::parse_breaktable(char * line, FILE * af) } /* now parse the numbreak lines to read in the remainder of the table */ - char * nl = line; + char * nl; for (int j=0; j < numbreak; j++) { - if (!fgets(nl,MAXLNLEN,af)) return 1; + if (!(nl = af->getline())) return 1; mychomp(nl); tp = nl; i = 0; @@ -3734,7 +3734,7 @@ void AffixMgr::reverse_condition(char * piece) { } } -int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflags) +int AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupflags) { int numents = 0; // number of affentry structures to parse @@ -3745,7 +3745,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag struct affentry * nptr= NULL; char * tp = line; - char * nl = line; + char * nl; char * piece; int i = 0; @@ -3820,7 +3820,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag // now parse numents affentries for this affix for (int j=0; j < numents; j++) { - if (!fgets(nl,MAXLNLEN,af)) return 1; + if (!(nl = af->getline())) return 1; mychomp(nl); tp = nl; i = 0; @@ -3898,6 +3898,7 @@ int AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflag if (pHMgr->is_aliasf()) { int index = atoi(dash + 1); nptr->contclasslen = (unsigned short) pHMgr->get_aliasf(index, &(nptr->contclass)); + if (!nptr->contclasslen) HUNSPELL_WARNING(stderr, "error: bad affix flag alias: \"%s\"\n", dash+1); } else { nptr->contclasslen = (unsigned short) pHMgr->decode_flags(&(nptr->contclass), dash + 1); flag_qsort(nptr->contclass, 0, nptr->contclasslen); diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx index 644d2c9..969780d 100644 --- a/src/hunspell/affixmgr.hxx +++ b/src/hunspell/affixmgr.hxx @@ -27,6 +27,8 @@ class AffixMgr AffEntry * pFlag[CONTSIZE]; AffEntry * sFlag[CONTSIZE]; HashMgr * pHMgr; + HashMgr ** alldic; + int * maxdic; char * keystring; char * trystring; char * encoding; @@ -96,8 +98,9 @@ class AffixMgr flag flag_mode; public: - - AffixMgr(const char * affpath, HashMgr * ptr); + + AffixMgr(const char * affpath, HashMgr** ptr, int * md, + const char * key = NULL); ~AffixMgr(); struct hentry * affix_check(const char * word, int len, const unsigned short needflag = (unsigned short) 0, @@ -150,7 +153,7 @@ public: short numsyllable, short maxwordnum, short wnum, hentry ** words, char hu_mov_rule, char ** result, char * partresult); - struct hentry * lookup(const char * word); + struct hentry * lookup(const char * word); int get_numrep(); struct replentry * get_reptable(); struct phonetable * get_phonetable(); @@ -171,7 +174,6 @@ public: FLAG get_compoundbegin(); FLAG get_forbiddenword(); FLAG get_nosuggest(); -// FLAG get_circumfix(); FLAG get_needaffix(); FLAG get_onlyincompound(); FLAG get_compoundroot(); @@ -193,17 +195,17 @@ public: int get_checksharps(void); private: - int parse_file(const char * affpath); + int parse_file(const char * affpath, const char * key); int parse_flag(char * line, unsigned short * out, const char * name); int parse_num(char * line, int * out, const char * name); int parse_cpdsyllable(char * line); - int parse_reptable(char * line, FILE * af); - int parse_phonetable(char * line, FILE * af); - int parse_maptable(char * line, FILE * af); - int parse_breaktable(char * line, FILE * af); - int parse_checkcpdtable(char * line, FILE * af); - int parse_defcpdtable(char * line, FILE * af); - int parse_affix(char * line, const char at, FILE * af, char * dupflags); + int parse_reptable(char * line, FileMgr * af); + int parse_phonetable(char * line, FileMgr * af); + int parse_maptable(char * line, FileMgr * af); + int parse_breaktable(char * line, FileMgr * af); + int parse_checkcpdtable(char * line, FileMgr * af); + int parse_defcpdtable(char * line, FileMgr * af); + int parse_affix(char * line, const char at, FileMgr * af, char * dupflags); void reverse_condition(char *); int condlen(char *); diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx index 6914957..784ffa9 100644 --- a/src/hunspell/csutil.cxx +++ b/src/hunspell/csutil.cxx @@ -55,8 +55,8 @@ static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instan /* only UTF-16 (BMP) implementation */ char * u16_u8(char * dest, int size, const w_char * src, int srclen) { - char * u8 = dest; - char * u8_max = u8 + size; + signed char * u8 = (signed char *)dest; + signed char * u8_max = (signed char *)(u8 + size); const w_char * u2 = src; const w_char * u2_max = src + srclen; while ((u2 < u2_max) && (u8 < u8_max)) { @@ -103,7 +103,7 @@ char * u16_u8(char * dest, int size, const w_char * src, int srclen) { /* only UTF-16 (BMP) implementation */ int u8_u16(w_char * dest, int size, const char * src) { - const char * u8 = src; + const signed char * u8 = (const signed char *)src; w_char * u2 = dest; w_char * u2_max = u2 + size; @@ -125,7 +125,7 @@ int u8_u16(w_char * dest, int size, const char * src) { case 0x90: case 0xa0: case 0xb0: { - HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %ld. character position\n%s\n", static_cast<long>(u8 - src), src); + HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Unexpected continuation bytes in %ld. character position\n%s\n", static_cast<long>(u8 - (signed char *)src), src); u2->h = 0xff; u2->l = 0xfd; break; @@ -137,7 +137,7 @@ int u8_u16(w_char * dest, int size, const char * src) { u2->l = (*u8 << 6) + (*(u8+1) & 0x3f); u8++; } else { - HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - src), src); + HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src); u2->h = 0xff; u2->l = 0xfd; } @@ -151,12 +151,12 @@ int u8_u16(w_char * dest, int size, const char * src) { u2->l = (*u8 << 6) + (*(u8+1) & 0x3f); u8++; } else { - HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - src), src); + HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src); u2->h = 0xff; u2->l = 0xfd; } } else { - HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - src), src); + HUNSPELL_WARNING(stderr, "UTF-8 encoding error. Missing continuation byte in %ld. character position:\n%s\n", static_cast<long>(u8 - (signed char *)src), src); u2->h = 0xff; u2->l = 0xfd; } @@ -415,10 +415,10 @@ char * tr(char * text, char oldc, char newc) { // otherwise return -1 int morphcmp(const char * s, const char * t) { - int se; - int te; - char * sl; - char * tl; + int se = 0; + int te = 0; + const char * sl; + const char * tl; const char * olds; const char * oldt; if (!s || !t) return 1; @@ -515,7 +515,7 @@ int fieldlen(const char * r) char * copy_field(char * dest, const char * morph, const char * var) { if (!morph) return NULL; - char * beg = strstr(morph, var); + const char * beg = strstr(morph, var); if (beg) { char * d = dest; for (beg += MORPH_TAG_LEN; *beg != ' ' && *beg != '\t' && @@ -681,6 +681,20 @@ void mkallcap_utf(w_char * u, int nc, int langnum) { if (*p != '\0') *d= csconv[((unsigned char)*p)].cupper; } + // conversion function for protected memory + void store_pointer(char * dest, char * source) + { + memcpy(dest, &source, sizeof(char *)); + } + + // conversion function for protected memory + char * get_stored_pointer(char * s) + { + char * p; + memcpy(&p, s, sizeof(char *)); + return p; + } + // these are simple character mappings for the // encodings supported // supplying isupper, tolower, and toupper @@ -941,7 +955,7 @@ struct cs_info iso1_tbl[] = { { 0x00, 0xfc, 0xdc }, { 0x00, 0xfd, 0xdd }, { 0x00, 0xfe, 0xde }, -{ 0x00, 0xff, 0xff }, +{ 0x00, 0xff, 0xff } }; @@ -1201,7 +1215,7 @@ struct cs_info iso2_tbl[] = { { 0x00, 0xfc, 0xdc }, { 0x00, 0xfd, 0xdd }, { 0x00, 0xfe, 0xde }, -{ 0x00, 0xff, 0xff }, +{ 0x00, 0xff, 0xff } }; @@ -1461,7 +1475,7 @@ struct cs_info iso3_tbl[] = { { 0x00, 0xfc, 0xdc }, { 0x00, 0xfd, 0xdd }, { 0x00, 0xfe, 0xde }, -{ 0x00, 0xff, 0xff }, +{ 0x00, 0xff, 0xff } }; struct cs_info iso4_tbl[] = { @@ -1720,7 +1734,7 @@ struct cs_info iso4_tbl[] = { { 0x00, 0xfc, 0xdc }, { 0x00, 0xfd, 0xdd }, { 0x00, 0xfe, 0xde }, -{ 0x00, 0xff, 0xff }, +{ 0x00, 0xff, 0xff } }; struct cs_info iso5_tbl[] = { @@ -1979,7 +1993,7 @@ struct cs_info iso5_tbl[] = { { 0x00, 0xfc, 0xac }, { 0x00, 0xfd, 0xfd }, { 0x00, 0xfe, 0xae }, -{ 0x00, 0xff, 0xaf }, +{ 0x00, 0xff, 0xaf } }; struct cs_info iso6_tbl[] = { @@ -2238,7 +2252,7 @@ struct cs_info iso6_tbl[] = { { 0x00, 0xfc, 0xfc }, { 0x00, 0xfd, 0xfd }, { 0x00, 0xfe, 0xfe }, -{ 0x00, 0xff, 0xff }, +{ 0x00, 0xff, 0xff } }; struct cs_info iso7_tbl[] = { @@ -2497,7 +2511,7 @@ struct cs_info iso7_tbl[] = { { 0x00, 0xfc, 0xbc }, { 0x00, 0xfd, 0xbe }, { 0x00, 0xfe, 0xbf }, -{ 0x00, 0xff, 0xff }, +{ 0x00, 0xff, 0xff } }; struct cs_info iso8_tbl[] = { @@ -2756,7 +2770,7 @@ struct cs_info iso8_tbl[] = { { 0x00, 0xfc, 0xfc }, { 0x00, 0xfd, 0xfd }, { 0x00, 0xfe, 0xfe }, -{ 0x00, 0xff, 0xff }, +{ 0x00, 0xff, 0xff } }; struct cs_info iso9_tbl[] = { @@ -3015,7 +3029,7 @@ struct cs_info iso9_tbl[] = { { 0x00, 0xfc, 0xdc }, { 0x00, 0xfd, 0x49 }, { 0x00, 0xfe, 0xde }, -{ 0x00, 0xff, 0xff }, +{ 0x00, 0xff, 0xff } }; struct cs_info iso10_tbl[] = { @@ -3274,7 +3288,7 @@ struct cs_info iso10_tbl[] = { { 0x00, 0xfc, 0xfc }, { 0x00, 0xfd, 0xfd }, { 0x00, 0xfe, 0xfe }, -{ 0x00, 0xff, 0xff }, +{ 0x00, 0xff, 0xff } }; struct cs_info koi8r_tbl[] = { @@ -3533,7 +3547,7 @@ struct cs_info koi8r_tbl[] = { { 0x01, 0xdc, 0xfc }, { 0x01, 0xdd, 0xfd }, { 0x01, 0xde, 0xfe }, -{ 0x01, 0xdf, 0xff }, +{ 0x01, 0xdf, 0xff } }; struct cs_info koi8u_tbl[] = { @@ -3792,7 +3806,7 @@ struct cs_info koi8u_tbl[] = { { 0x01, 0xdc, 0xfc }, { 0x01, 0xdd, 0xfd }, { 0x01, 0xde, 0xfe }, -{ 0x01, 0xdf, 0xff }, +{ 0x01, 0xdf, 0xff } }; struct cs_info cp1251_tbl[] = { @@ -4051,7 +4065,7 @@ struct cs_info cp1251_tbl[] = { { 0x00, 0xfc, 0xdc }, { 0x00, 0xfd, 0xdd }, { 0x00, 0xfe, 0xde }, -{ 0x00, 0xff, 0xdf }, +{ 0x00, 0xff, 0xdf } }; struct cs_info iso13_tbl[] = { @@ -4310,7 +4324,7 @@ struct cs_info iso13_tbl[] = { { 0x00, 0xFC, 0xDC }, { 0x00, 0xFD, 0xDD }, { 0x00, 0xFE, 0xDE }, -{ 0x00, 0xFF, 0xFF }, +{ 0x00, 0xFF, 0xFF } }; @@ -4570,7 +4584,7 @@ struct cs_info iso14_tbl[] = { { 0x00, 0xfc, 0xdc }, { 0x00, 0xfd, 0xdd }, { 0x00, 0xfe, 0xde }, -{ 0x00, 0xff, 0xff }, +{ 0x00, 0xff, 0xff } }; struct cs_info iso15_tbl[] = { @@ -4829,7 +4843,7 @@ struct cs_info iso15_tbl[] = { { 0x00, 0xfc, 0xdc }, { 0x00, 0xfd, 0xdd }, { 0x00, 0xfe, 0xde }, -{ 0x00, 0xff, 0xbe }, +{ 0x00, 0xff, 0xbe } }; struct cs_info iscii_devanagari_tbl[] = { @@ -5088,7 +5102,7 @@ struct cs_info iscii_devanagari_tbl[] = { { 0x00, 0xfc, 0xfc }, { 0x00, 0xfd, 0xfd }, { 0x00, 0xfe, 0xfe }, -{ 0x00, 0xff, 0xff }, +{ 0x00, 0xff, 0xff } }; struct enc_entry encds[] = { @@ -5108,7 +5122,7 @@ struct enc_entry encds[] = { {"ISO8859-13", iso13_tbl}, {"ISO8859-14", iso14_tbl}, {"ISO8859-15", iso15_tbl}, -{"ISCII-DEVANAGARI", iscii_devanagari_tbl}, +{"ISCII-DEVANAGARI", iscii_devanagari_tbl} }; struct cs_info * get_current_cs(const char * es) { @@ -5117,6 +5131,7 @@ struct cs_info * get_current_cs(const char * es) { for (int i = 0; i < n; i++) { if (strcmp(es,encds[i].enc_name) == 0) { ccs = encds[i].cs_table; + break; } } return ccs; @@ -5362,14 +5377,14 @@ int get_captype(char * word, int nl, cs_info * csconv) { int ncap = 0; int nneutral = 0; int firstcap = 0; - - for (char * q = word; *q != '\0'; q++) { - if (csconv[*((unsigned char *)q)].ccase) ncap++; - if (csconv[*((unsigned char *)q)].cupper == csconv[*((unsigned char *)q)].clower) nneutral++; - } - if (ncap) { - firstcap = csconv[*((unsigned char *) word)].ccase; - } + if (csconv == NULL) return NOCAP; + for (char * q = word; *q != '\0'; q++) { + if (csconv[*((unsigned char *)q)].ccase) ncap++; + if (csconv[*((unsigned char *)q)].cupper == csconv[*((unsigned char *)q)].clower) nneutral++; + } + if (ncap) { + firstcap = csconv[*((unsigned char *) word)].ccase; + } // now finally set the captype if (ncap == 0) { diff --git a/src/hunspell/csutil.hxx b/src/hunspell/csutil.hxx index df7979b..442cac2 100644 --- a/src/hunspell/csutil.hxx +++ b/src/hunspell/csutil.hxx @@ -30,13 +30,12 @@ #define MSEP_REC '\n' #define MSEP_ALT '\v' - // default flags #define DEFAULTFLAGS 65510 #define FORBIDDENWORD 65510 #define ONLYUPCASEFLAG 65511 -typedef struct { +typedef struct __attribute__ ((packed)) { unsigned char l; unsigned char h; } w_char; @@ -200,4 +199,10 @@ int morphcmp(const char * s, const char * t); int get_sfxcount(const char * morph); +// conversion function for protected memory +void store_pointer(char * dest, char * source); + +// conversion function for protected memory +char * get_stored_pointer(char * s); + #endif diff --git a/src/hunspell/filemgr.cxx b/src/hunspell/filemgr.cxx new file mode 100644 index 0000000..165fc77 --- /dev/null +++ b/src/hunspell/filemgr.cxx @@ -0,0 +1,38 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "filemgr.hxx" + +int FileMgr::fail(const char * err, const char * par) { + fprintf(stderr, err, par); + return -1; +} + +FileMgr::FileMgr(const char * file, const char * key) { + hin = NULL; + fin = fopen(file, "r"); + if (!fin) { + // check hzipped file + char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION)); + if (st) { + strcpy(st, file); + strcat(st, HZIP_EXTENSION); + hin = new Hunzip(st, key); + } + } + if (!fin && !hin) fail(MSG_OPEN, file); +} + +FileMgr::~FileMgr() +{ + if (fin) fclose(fin); + if (hin) delete hin; +} + +char * FileMgr::getline() { + const char * l; + if (fin) return fgets(in, BUFSIZE - 1, fin); + if (hin && (l = hin->getline())) return strcpy(in, l); + return NULL; +} diff --git a/src/hunspell/filemgr.hxx b/src/hunspell/filemgr.hxx new file mode 100644 index 0000000..593228d --- /dev/null +++ b/src/hunspell/filemgr.hxx @@ -0,0 +1,19 @@ +/* file manager class - read lines of files [filename] OR [filename.hz] */ +#ifndef _FILEMGR_HXX_ +#define _FILEMGR_HXX_ +#include "hunzip.hxx" + +class FileMgr +{ +protected: + FILE * fin; + Hunzip * hin; + char in[BUFSIZE + 50]; // input buffer + int fail(const char * err, const char * par); + +public: + FileMgr(const char * filename, const char * key = NULL); + ~FileMgr(); + char * getline(); +}; +#endif diff --git a/src/hunspell/hashmgr.cxx b/src/hunspell/hashmgr.cxx index 08e061c..5f0b169 100644 --- a/src/hunspell/hashmgr.cxx +++ b/src/hunspell/hashmgr.cxx @@ -29,7 +29,7 @@ using namespace std; // build a hash table from a munched word list -HashMgr::HashMgr(const char * tpath, const char * apath) +HashMgr::HashMgr(const char * tpath, const char * apath, const char * key) { tablesize = 0; tableptr = NULL; @@ -48,8 +48,8 @@ HashMgr::HashMgr(const char * tpath, const char * apath) numaliasm = 0; aliasm = NULL; forbiddenword = FORBIDDENWORD; // forbidden word signing flag - load_config(apath); - int ec = load_tables(tpath); + load_config(apath, key); + int ec = load_tables(tpath, key); if (ec) { /* error condition - what should we do here */ HUNSPELL_WARNING(stderr, "Hash Manager Error : %d\n",ec); @@ -129,7 +129,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, int al, const char * desc, bool onlyupcase) { bool upcasehomonym = false; - int descl = desc ? (aliasm ? sizeof(char *) : strlen(desc) + 1) : 0; + int descl = desc ? (aliasm ? sizeof(short) : strlen(desc) + 1) : 0; // variable-length hash record with word and optional fields struct hentry* hp = (struct hentry *) malloc (sizeof(struct hentry) + wbl + descl); @@ -161,7 +161,8 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff, hp->var = H_OPT; if (aliasm) { hp->var += H_OPT_ALIASM; - *((char **) (hpw + wbl + 1)) = get_aliasm(atoi(desc)); +// *((char **) (hpw + wbl + 1)) = get_aliasm(atoi(desc)); + store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc))); } else { strcpy(hpw + wbl + 1, desc); if (complexprefixes) { @@ -236,12 +237,12 @@ int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl, if (al) memcpy(flags2, flags, al * sizeof(unsigned short)); flags2[al] = ONLYUPCASEFLAG; if (utf8) { - char st[MAXDELEN]; - w_char w[MAXDELEN]; - int wlen = u8_u16(w, MAXDELEN, word); + char st[BUFSIZE]; + w_char w[BUFSIZE]; + int wlen = u8_u16(w, BUFSIZE, word); mkallsmall_utf(w, wlen, langnum); mkallcap_utf(w, 1, langnum); - u16_u8(st, MAXDELEN, w, wlen); + u16_u8(st, BUFSIZE, w, wlen); return add_word(st,wbl,wcl,flags2,al+1,dp, true); } else { mkallsmall(word, csconv); @@ -256,8 +257,8 @@ int HashMgr::add_hidden_capitalized_word(char * word, int wbl, int wcl, int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) { int len; if (utf8) { - w_char dest_utf[MAXDELEN]; - len = u8_u16(dest_utf, MAXDELEN, word); + w_char dest_utf[BUFSIZE]; + len = u8_u16(dest_utf, BUFSIZE, word); *captype = get_captype_utf8(dest_utf, len, langnum); } else { len = wbl; @@ -269,8 +270,8 @@ int HashMgr::get_clen_and_captype(const char * word, int wbl, int * captype) { // remove word with FORBIDDENWORD flag (not implemented) int HashMgr::remove(const char * word) { - struct hentry * dp = lookup(word); -/* +/* struct hentry * dp = lookup(word); + if (!word || (!dp->astr || !TESTAFF(dp->astr, forbiddenword, pt->alen))) { int wbl = strlen(word); int wcl = get_clen_and_captype(word, wbl, &captype); @@ -344,22 +345,22 @@ struct hentry * HashMgr::walk_hashtable(int &col, struct hentry * hp) const } // load a munched word list and build a hash table on the fly -int HashMgr::load_tables(const char * tpath) +int HashMgr::load_tables(const char * tpath, const char * key) { int al; char * ap; char * dp; unsigned short * flags; + char * ts; - // raw dictionary - munched file - FILE * rawdict = fopen(tpath, "r"); - if (rawdict == NULL) return 1; + // open dictionary file + FileMgr * dict = new FileMgr(tpath, key); + if (dict == NULL) return 1; // first read the first line of file to get hash table size */ - char ts[MAXDELEN]; - if (! fgets(ts, MAXDELEN-1,rawdict)) { + if (!(ts = dict->getline())) { HUNSPELL_WARNING(stderr, "error: empty dic file\n"); - fclose(rawdict); + delete dict; return 2; } mychomp(ts); @@ -373,7 +374,7 @@ int HashMgr::load_tables(const char * tpath) if ((*ts < '1') || (*ts > '9')) HUNSPELL_WARNING(stderr, "error - missing word count in dictionary file\n"); tablesize = atoi(ts); if (!tablesize) { - fclose(rawdict); + delete dict; return 4; } tablesize = tablesize + 5 + USERWORD; @@ -382,7 +383,7 @@ int HashMgr::load_tables(const char * tpath) // allocate the hash table tableptr = (struct hentry **) malloc(tablesize * sizeof(struct hentry *)); if (! tableptr) { - fclose(rawdict); + delete dict; return 3; } for (int i=0; i<tablesize; i++) tableptr[i] = NULL; @@ -390,7 +391,7 @@ int HashMgr::load_tables(const char * tpath) // loop through all words on much list and add to hash // table and create word and affix strings - while (fgets(ts,MAXDELEN-1,rawdict)) { + while ((ts = dict->getline())) { mychomp(ts); // split each line into word and morphological description dp = strchr(ts,'\t'); @@ -443,16 +444,15 @@ int HashMgr::load_tables(const char * tpath) // add the word and its index plus its capitalized form optionally if (add_word(ts,wbl,wcl,flags,al,dp, false) || add_hidden_capitalized_word(ts, wbl, wcl, flags, al, dp, captype)) { - fclose(rawdict); + delete dict; return 5; } } - fclose(rawdict); + delete dict; return 0; } - // the hash function is a simple load and rotate // algorithm borrowed @@ -506,8 +506,8 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags) { break; } case FLAG_UNI: { // UTF-8 characters - w_char w[MAXDELEN/2]; - len = u8_u16(w, MAXDELEN/2, flags); + w_char w[BUFSIZE/2]; + len = u8_u16(w, BUFSIZE/2, flags); *result = (unsigned short *) malloc(len * sizeof(short)); if (!*result) return -1; memcpy(*result, w, len * sizeof(short)); @@ -566,16 +566,13 @@ char * HashMgr::encode_flag(unsigned short f) { } // read in aff file and set flag mode -int HashMgr::load_config(const char * affpath) +int HashMgr::load_config(const char * affpath, const char * key) { + char * line; // io buffers int firstline = 1; - - // io buffers - char line[MAXDELEN+1]; // open the affix file - FILE * afflst; - afflst = fopen(affpath,"r"); + FileMgr * afflst = new FileMgr(affpath, key); if (!afflst) { HUNSPELL_WARNING(stderr, "Error - could not open affix description file %s\n",affpath); return 1; @@ -584,7 +581,7 @@ int HashMgr::load_config(const char * affpath) // read in each line ignoring any that do not // start with a known line type indicator - while (fgets(line,MAXDELEN,afflst)) { + while ((line = afflst->getline())) { mychomp(line); /* remove byte order mark */ @@ -608,7 +605,7 @@ int HashMgr::load_config(const char * affpath) if (strncmp(line,"FORBIDDENWORD",13) == 0) { char * st = NULL; if (parse_string(line, &st, "FORBIDDENWORD")) { - fclose(afflst); + delete afflst; return 1; } forbiddenword = decode_flag(st); @@ -616,7 +613,7 @@ int HashMgr::load_config(const char * affpath) } if (strncmp(line, "SET", 3) == 0) { if (parse_string(line, &enc, "SET")) { - fclose(afflst); + delete afflst; return 1; } if (strcmp(enc, "UTF-8") == 0) { @@ -630,7 +627,7 @@ int HashMgr::load_config(const char * affpath) } if (strncmp(line, "LANG", 4) == 0) { if (parse_string(line, &lang, "LANG")) { - fclose(afflst); + delete afflst; return 1; } langnum = get_lang_num(lang); @@ -639,21 +636,21 @@ int HashMgr::load_config(const char * affpath) /* parse in the ignored characters (for example, Arabic optional diacritics characters */ if (strncmp(line,"IGNORE",6) == 0) { if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, "IGNORE", utf8)) { - fclose(afflst); + delete afflst; return 1; } } if ((strncmp(line,"AF",2) == 0) && isspace(line[2])) { if (parse_aliasf(line, afflst)) { - fclose(afflst); + delete afflst; return 1; } } if ((strncmp(line,"AM",2) == 0) && isspace(line[2])) { if (parse_aliasm(line, afflst)) { - fclose(afflst); + delete afflst; return 1; } } @@ -662,12 +659,12 @@ int HashMgr::load_config(const char * affpath) if (((strncmp(line,"SFX",3) == 0) || (strncmp(line,"PFX",3) == 0)) && isspace(line[3])) break; } if (csconv == NULL) csconv = get_current_cs("ISO8859-1"); - fclose(afflst); + delete afflst; return 0; } /* parse in the ALIAS table */ -int HashMgr::parse_aliasf(char * line, FILE * af) +int HashMgr::parse_aliasf(char * line, FileMgr * af) { if (numaliasf != 0) { HUNSPELL_WARNING(stderr, "error: duplicate AF (alias for flag vector) tables used\n"); @@ -723,9 +720,9 @@ int HashMgr::parse_aliasf(char * line, FILE * af) } /* now parse the numaliasf lines to read in the remainder of the table */ - char * nl = line; + char * nl; for (int j=0; j < numaliasf; j++) { - if (!fgets(nl,MAXDELEN,af)) return 1; + if (!(nl = af->getline())) return 1; mychomp(nl); tp = nl; i = 0; @@ -788,7 +785,7 @@ int HashMgr::get_aliasf(int index, unsigned short ** fvec) { } /* parse morph alias definitions */ -int HashMgr::parse_aliasm(char * line, FILE * af) +int HashMgr::parse_aliasm(char * line, FileMgr * af) { if (numaliasm != 0) { HUNSPELL_WARNING(stderr, "error: duplicate AM (aliases for morphological descriptions) tables used\n"); @@ -836,7 +833,7 @@ int HashMgr::parse_aliasm(char * line, FILE * af) /* now parse the numaliasm lines to read in the remainder of the table */ char * nl = line; for (int j=0; j < numaliasm; j++) { - if (!fgets(nl,MAXDELEN,af)) return 1; + if (!(nl = af->getline())) return 1; mychomp(nl); tp = nl; i = 0; diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx index d88de48..9664e5f 100644 --- a/src/hunspell/hashmgr.hxx +++ b/src/hunspell/hashmgr.hxx @@ -8,6 +8,7 @@ #endif #include "htypes.hxx" +#include "filemgr.hxx" enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI }; @@ -35,7 +36,7 @@ class HashMgr public: - HashMgr(const char * tpath, const char * apath); + HashMgr(const char * tpath, const char * apath, const char * key = NULL); ~HashMgr(); struct hentry * lookup(const char *) const; @@ -55,14 +56,14 @@ public: private: int get_clen_and_captype(const char * word, int wbl, int * captype); - int load_tables(const char * tpath); + int load_tables(const char * tpath, const char * key); int add_word(const char * word, int wbl, int wcl, unsigned short * ap, int al, const char * desc, bool onlyupcase); - int load_config(const char * affpath); - int parse_aliasf(char * line, FILE * af); + int load_config(const char * affpath, const char * key); + int parse_aliasf(char * line, FileMgr * af); int add_hidden_capitalized_word(char * word, int wbl, int wcl, unsigned short * flags, int al, char * dp, int captype); - int parse_aliasm(char * line, FILE * af); + int parse_aliasm(char * line, FileMgr * af); }; diff --git a/src/hunspell/htypes.hxx b/src/hunspell/htypes.hxx index bc078c3..718a0f8 100644 --- a/src/hunspell/htypes.hxx +++ b/src/hunspell/htypes.hxx @@ -1,8 +1,6 @@ #ifndef _HTYPES_HXX_ #define _HTYPES_HXX_ -#define MAXDELEN 8192 - #define ROTATE_LEN 5 #define ROTATE(v,q) \ @@ -15,7 +13,8 @@ #define HENTRY_WORD(h) &(h->word) #define HENTRY_DATA(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \ - *((char **) (&(h->word) + h->blen + 1)) : &(h->word) + h->blen + 1) : NULL) + get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : NULL) +// *((char **) (&(h->word) + h->blen + 1)) : &(h->word) + h->blen + 1) : NULL) #define HENTRY_FIND(h,p) (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL) // approx. number of user defined words diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx index 502b997..f2f7536 100644 --- a/src/hunspell/hunspell.cxx +++ b/src/hunspell/hunspell.cxx @@ -21,19 +21,22 @@ using namespace std; #endif #endif -Hunspell::Hunspell(const char * affpath, const char * dpath) +Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key) { encoding = NULL; csconv = NULL; utf8 = 0; complexprefixes = 0; + affixpath = mystrdup(affpath); + maxdic = 0; /* first set up the hash manager */ - pHMgr = new HashMgr(dpath, affpath); + pHMgr[0] = new HashMgr(dpath, affpath, key); + if (pHMgr[0]) maxdic = 1; /* next set up the affix manager */ /* it needs access to the hash manager lookup methods */ - pAMgr = new AffixMgr(affpath,pHMgr); + pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key); /* get the preferred try string and the dictionary */ /* encoding from the Affix Manager for that dictionary */ @@ -55,18 +58,28 @@ Hunspell::~Hunspell() { if (pSMgr) delete pSMgr; if (pAMgr) delete pAMgr; - if (pHMgr) delete pHMgr; + for (int i = 0; i < maxdic; i++) delete pHMgr[i]; + maxdic = 0; pSMgr = NULL; pAMgr = NULL; - pHMgr = NULL; + pHMgr[0] = NULL; #ifdef MOZILLA_CLIENT free(csconv); #endif csconv= NULL; if (encoding) free(encoding); encoding = NULL; + if (affixpath) free(affixpath); + affixpath = NULL; } +// load extra dictionaries +int Hunspell::add_dic(const char * dpath, const char * key) { + if (maxdic == MAXDIC) return 1; + pHMgr[maxdic] = new HashMgr(dpath, affixpath, key); + if (pHMgr[maxdic]) maxdic++; else return 1; + return 0; +} // make a copy of src at destination while removing all leading // blanks and removing any trailing periods after recording @@ -334,7 +347,7 @@ int Hunspell::spell(const char * word, int * info, char ** root) int abbv = 0; int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv); int info2 = 0; - if (wl == 0) return 1; + if (wl == 0 || maxdic == 0) return 1; if (root) *root = NULL; // allow numbers with dots and commas (but forbid double separators: "..", ",," etc.) @@ -559,7 +572,7 @@ int Hunspell::spell(const char * word, int * info, char ** root) struct hentry * Hunspell::checkword(const char * w, int * info, char ** root) { struct hentry * he = NULL; - int len; + int len, i; char w2[MAXWORDUTF8LEN]; const char * word; @@ -586,7 +599,8 @@ struct hentry * Hunspell::checkword(const char * w, int * info, char ** root) } // look word in hash table - if (pHMgr) he = pHMgr->lookup(word); + for (i = 0; (i < maxdic) && !he; i ++) { + he = (pHMgr[i])->lookup(word); // check forbidden and onlyincompound words if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) { @@ -607,6 +621,7 @@ struct hentry * Hunspell::checkword(const char * w, int * info, char ** root) (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)) )) he = he->next_homonym; + } // check with affixes if (!he && pAMgr) { @@ -668,7 +683,7 @@ int Hunspell::suggest(char*** slst, const char * word) int onlycmpdsug = 0; char cw[MAXWORDUTF8LEN]; char wspace[MAXWORDUTF8LEN]; - if (! pSMgr) return 0; + if (!pSMgr || maxdic == 0) return 0; w_char unicw[MAXWORDLEN]; int nc = strlen(word); if (utf8) { @@ -820,27 +835,27 @@ int Hunspell::suggest(char*** slst, const char * word) if ((ns == 0 || onlycmpdsug) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) { switch(captype) { case NOCAP: { - ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr); + ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic); break; } case HUHCAP: { memcpy(wspace,cw,(wl+1)); mkallsmall2(wspace, unicw, nc); - ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr); + ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); break; } case INITCAP: { capwords = 1; memcpy(wspace,cw,(wl+1)); mkallsmall2(wspace, unicw, nc); - ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr); + ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); break; } case ALLCAP: { memcpy(wspace,cw,(wl+1)); mkallsmall2(wspace, unicw, nc); int oldns = ns; - ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr); + ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic); for (int j = oldns; j < ns; j++) mkallcap((*slst)[j]); break; @@ -933,7 +948,7 @@ int Hunspell::suggest_auto(char*** slst, const char * word) { char cw[MAXWORDUTF8LEN]; char wspace[MAXWORDUTF8LEN]; - if (! pSMgr) return 0; + if (!pSMgr || maxdic == 0) return 0; int wl = strlen(word); if (utf8) { if (wl >= MAXWORDUTF8LEN) return 0; @@ -1056,13 +1071,13 @@ int Hunspell::stem(char*** slst, char ** desc, int n) alt = strstr(alt, " | "); } int pln = line_tok(tok, &pl, MSEP_ALT); - for (int i = 0; i < pln; i++) { + for (int k = 0; k < pln; k++) { // add derivational suffixes - if (strstr(pl[i], MORPH_DERI_SFX)) { + if (strstr(pl[k], MORPH_DERI_SFX)) { // remove inflectional suffixes - char * is = strstr(pl[i], MORPH_INFL_SFX); + char * is = strstr(pl[k], MORPH_INFL_SFX); if (is) *is = '\0'; - char * sg = pSMgr->suggest_gen(&(pl[i]), 1, pl[i]); + char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]); if (sg) { char ** gen; int genl = line_tok(sg, &gen, MSEP_REC); @@ -1075,10 +1090,10 @@ int Hunspell::stem(char*** slst, char ** desc, int n) } } else { sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result); - if (strstr(pl[i], MORPH_SURF_PFX)) { - copy_field(result2 + strlen(result2), pl[i], MORPH_SURF_PFX); + if (strstr(pl[k], MORPH_SURF_PFX)) { + copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX); } - copy_field(result2 + strlen(result2), pl[i], MORPH_STEM); + copy_field(result2 + strlen(result2), pl[k], MORPH_STEM); } } freelist(&pl, pln); @@ -1100,7 +1115,7 @@ int Hunspell::suggest_pos_stems(char*** slst, const char * word) { char cw[MAXWORDUTF8LEN]; char wspace[MAXWORDUTF8LEN]; - if (! pSMgr) return 0; + if (! pSMgr || maxdic == 0) return 0; int wl = strlen(word); if (utf8) { if (wl >= MAXWORDUTF8LEN) return 0; @@ -1220,13 +1235,13 @@ int Hunspell::mkinitsmall2(char * p, w_char * u, int nc) int Hunspell::add(const char * word) { - if (pHMgr) return pHMgr->add(word, NULL); + if (pHMgr[0]) return (pHMgr[0])->add(word, NULL); return 0; } int Hunspell::add_with_affix(const char * word, const char * example) { - if (pHMgr) return pHMgr->add_with_affix(word, example); + if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example); return 0; } @@ -1234,7 +1249,7 @@ int Hunspell::add_with_affix(const char * word, const char * example) int Hunspell::remove(const char * word) { - if (pHMgr) return pHMgr->remove(word); + if (pHMgr[0]) return (pHMgr[0])->remove(word); return 0; } @@ -1248,7 +1263,7 @@ struct cs_info * Hunspell::get_csconv() return csconv; } -char * Hunspell::cat_result(char * result, char * st) +void Hunspell::cat_result(char * result, char * st) { if (st) { if (*result) strcat(result, "\n"); @@ -1261,7 +1276,7 @@ int Hunspell::analyze(char*** slst, const char * word) { char cw[MAXWORDUTF8LEN]; char wspace[MAXWORDUTF8LEN]; - if (! pSMgr) return 0; + if (! pSMgr || maxdic == 0) return 0; int wl = strlen(word); if (utf8) { if (wl >= MAXWORDUTF8LEN) return 0; @@ -1530,7 +1545,7 @@ char * Hunspell::morph_with_correction(const char * word) { char cw[MAXWORDUTF8LEN]; char wspace[MAXWORDUTF8LEN]; - if (! pSMgr) return NULL; + if (! pSMgr || maxdic == 0) return NULL; int wl = strlen(word); if (utf8) { if (wl >= MAXWORDUTF8LEN) return NULL; @@ -1680,6 +1695,12 @@ Hunhandle *Hunspell_create(const char * affpath, const char * dpath) return (Hunhandle*)(new Hunspell(affpath, dpath)); } +Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath, + const char * key) +{ + return (Hunhandle*)(new Hunspell(affpath, dpath, key)); +} + void Hunspell_destroy(Hunhandle *pHunspell) { delete (Hunspell*)(pHunspell); diff --git a/src/hunspell/hunspell.h b/src/hunspell/hunspell.h index 452599c..dc8d501 100644 --- a/src/hunspell/hunspell.h +++ b/src/hunspell/hunspell.h @@ -8,6 +8,10 @@ extern "C" { typedef struct Hunhandle Hunhandle; Hunhandle *Hunspell_create(const char * affpath, const char * dpath); + +Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath, + const char * key); + void Hunspell_destroy(Hunhandle *pHunspell); /* spell(word) - spellcheck word @@ -65,19 +69,19 @@ int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word, /* add word to the run-time dictionary */ -int Hunspell_add(const char * word); +int Hunspell_add(Hunhandle *pHunspell, const char * word); /* add word to the run-time dictionary with affix flags of * the example (a dictionary word): Hunspell will recognize * affixed forms of the new word, too. */ -int Hunspell_add_with_affix(const char * word, const char * example); +int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example); /* remove word from the run-time dictionary */ /* NOTE: not implemented yet */ -int Hunspell_remove(const char * word); +int Hunspell_remove(Hunhandle *pHunspell, const char * word); #ifdef __cplusplus diff --git a/src/hunspell/hunspell.hxx b/src/hunspell/hunspell.hxx index 38c141e..8461b54 100644 --- a/src/hunspell/hunspell.hxx +++ b/src/hunspell/hunspell.hxx @@ -10,6 +10,7 @@ #define SPELL_NOCAP (1 << 3) #define SPELL_INITCAP (1 << 4) +#define MAXDIC 20 #define MAXSUGGESTION 15 #define MAXSHARPS 5 @@ -33,14 +34,17 @@ class Hunspell #endif { AffixMgr* pAMgr; - HashMgr* pHMgr; + HashMgr* pHMgr[MAXDIC]; + int maxdic; SuggestMgr* pSMgr; + char * affixpath; char * encoding; struct cs_info * csconv; int langnum; int utf8; int complexprefixes; char** wordbreak; + char * key; public: @@ -48,10 +52,12 @@ public: * input: path of affix file and dictionary file */ - Hunspell(const char * affpath, const char * dpath); - + Hunspell(const char * affpath, const char * dpath, const char * key = NULL); ~Hunspell(); + /* load extra dictionaries (only dic files) */ + int add_dic(const char * dpath, const char * key = NULL); + /* spell(word) - spellcheck word * output: 0 = bad word, not 0 = good word * @@ -164,7 +170,7 @@ private: hentry * spellsharps(char * base, char *, int, int, char * tmp, int * info, char **root); int is_keepcase(const hentry * rv); int insert_sug(char ***slst, char * word, int ns); - char * cat_result(char * result, char * st); + void cat_result(char * result, char * st); char * stem_description(const char * desc); }; diff --git a/src/hunspell/hunzip.cxx b/src/hunspell/hunzip.cxx new file mode 100644 index 0000000..93912df --- /dev/null +++ b/src/hunspell/hunzip.cxx @@ -0,0 +1,191 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "hunzip.hxx" + +#define CODELEN 65536 +#define BASEBITREC 5000 + +#define UNCOMPRESSED '\002' +#define MAGIC "hz0" +#define MAGIC_ENCRYPT "hz1" +#define MAGICLEN (sizeof(MAGIC) - 1) + +int Hunzip::fail(const char * err, const char * par) { + fprintf(stderr, err, par); + return -1; +} + +Hunzip::Hunzip(const char * file, const char * key) { + bufsiz = 0; + lastbit = 0; + inc = 0; + outc = 0; + dec = NULL; + filename = (char *) malloc(strlen(file) + 1); + if (filename) strcpy(filename, file); + if (getcode(key) == -1) bufsiz = -1; + else bufsiz = getbuf(); +} + +int Hunzip::getcode(const char * key) { + unsigned char c[2]; + int i, j, n, o, p; + int allocatedbit = BASEBITREC; + const char * enc = key; + + fin = fopen(filename, "r"); + if (!fin) return -1; + + // read magic number + if ((fread(in, 1, 3, fin) < MAGICLEN) + || !(strncmp(MAGIC, in, MAGICLEN) == 0 || + strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) { + return fail(MSG_FORMAT, filename); + } + + // check encryption + if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) { + unsigned char cs; + if (!key) return fail(MSG_KEY, filename); + if (fread(&c, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename); + for (cs = 0; *enc; enc++) cs ^= *enc; + if (cs != c[0]) return fail(MSG_KEY, filename); + enc = key; + } else key = NULL; + + // read record count + if (fread(&c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename); + + if (key) { + c[0] ^= *enc; + if (*(++enc) == '\0') enc = key; + c[1] ^= *enc; + } + + n = ((int) c[0] << 8) + c[1]; + dec = (struct bit *) malloc(BASEBITREC * sizeof(struct bit)); + if (!dec) return fail(MSG_MEMORY, filename); + dec[0].v[0] = 0; + dec[0].v[1] = 0; + + // read codes + for (i = 0; i < n; i++) { + unsigned char l; + if (fread(c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename); + if (key) { + if (*(++enc) == '\0') enc = key; + c[0] ^= *enc; + if (*(++enc) == '\0') enc = key; + c[1] ^= *enc; + } + if (fread(&l, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename); + if (key) { + if (*(++enc) == '\0') enc = key; + l ^= *enc; + } + if (fread(in, 1, l/8+1, fin) < l/8+1) return fail(MSG_FORMAT, filename); + if (key) for (j = 0; j <= l/8; j++) { + if (*(++enc) == '\0') enc = key; + in[j] ^= *enc; + } + p = 0; + for (j = 0; j < l; j++) { + int b = (in[j/8] & (1 << (7 - (j % 8)))) ? 1 : 0; + int oldp = p; + p = dec[p].v[b]; + if (p == 0) { + lastbit++; + if (lastbit == allocatedbit) { + allocatedbit += BASEBITREC; + dec = (struct bit *) realloc(dec, allocatedbit * sizeof(struct bit)); + } + dec[lastbit].v[0] = 0; + dec[lastbit].v[1] = 0; + dec[oldp].v[b] = lastbit; + p = lastbit; + } + } + dec[p].c[0] = c[0]; + dec[p].c[1] = c[1]; + } + return 0; +} + +Hunzip::~Hunzip() +{ + if (dec) free(dec); + if (fin) fclose(fin); + if (filename) free(filename); +} + +int Hunzip::getbuf() { + int p = 0; + int o = 0; + do { + if (inc == 0) inbits = fread(in, 1, BUFSIZE, fin) * 8; + for (; inc < inbits; inc++) { + int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0; + int oldp = p; + p = dec[p].v[b]; + if (p == 0) { + if (oldp == lastbit) { + fclose(fin); + fin = NULL; + // add last odd byte + if (dec[lastbit].c[0]) out[o++] = dec[lastbit].c[1]; + return o; + } + out[o++] = dec[oldp].c[0]; + out[o++] = dec[oldp].c[1]; + if (o == BUFSIZE) return o; + p = dec[p].v[b]; + } + } + inc = 0; + } while (inbits == BUFSIZE * 8); + return fail(MSG_FORMAT, filename); +} + +const char * Hunzip::getline() { + char linebuf[BUFSIZE]; + int l = 0, eol = 0, left = 0, right = 0; + char end; + if (bufsiz == -1) return NULL; + while (l < bufsiz && !eol) { + linebuf[l++] = out[outc]; + switch (out[outc]) { + case '\t': break; + case 31: { // escape + if (++outc == bufsiz) { + bufsiz = getbuf(); + outc = 0; + } + linebuf[l - 1] = out[outc]; + break; + } + case ' ': break; + default: if (((unsigned char) out[outc]) < 47) { + if (out[outc] > 32) { + right = out[outc] - 31; + if (++outc == bufsiz) { + bufsiz = getbuf(); + outc = 0; + } + } + if (out[outc] == 30) left = 9; else left = out[outc]; + linebuf[l-1] = '\n'; + eol = 1; + } + } + if (++outc == bufsiz) { + outc = 0; + bufsiz = fin ? getbuf(): -1; + } + } + if (right) strcpy(linebuf + l - 1, line + strlen(line) - right - 1); + else linebuf[l] = '\0'; + strcpy(line + left, linebuf); + return line; +} diff --git a/src/hunspell/hunzip.hxx b/src/hunspell/hunzip.hxx new file mode 100644 index 0000000..52109d1 --- /dev/null +++ b/src/hunspell/hunzip.hxx @@ -0,0 +1,41 @@ +/* hunzip: file decompression for sorted dictionaries with optional encryption, + * algorithm: prefix-suffix encoding and 16-bit Huffman encoding */ + +#ifndef _HUNZIP_HXX_ +#define _HUNZIP_HXX_ + +#define BUFSIZE 65536 +#define HZIP_EXTENSION ".hz" + +#define MSG_OPEN "error: %s: cannot open\n" +#define MSG_FORMAT "error: %s: not in hzip format\n" +#define MSG_MEMORY "error: %s: missing memory\n" +#define MSG_KEY "error: %s: missing or bad password\n" + +struct bit { + unsigned char c[2]; + int v[2]; +}; + +class Hunzip +{ + +protected: + char * filename; + FILE * fin; + int bufsiz, lastbit, inc, inbits, outc; + struct bit * dec; // code table + char in[BUFSIZE]; // input buffer + char out[BUFSIZE + 1]; // Huffman-decoded buffer + char line[BUFSIZE + 50]; // decoded line + int getcode(const char * key); + int getbuf(); + int fail(const char * err, const char * par); + +public: + Hunzip(const char * filename, const char * key = NULL); + ~Hunzip(); + const char * getline(); +}; + +#endif diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx index b1a58f3..ce54f57 100644 --- a/src/hunspell/suggestmgr.cxx +++ b/src/hunspell/suggestmgr.cxx @@ -1028,7 +1028,7 @@ int SuggestMgr::movechar_utf(char ** wlst, const w_char * word, int wl, int ns, } // generate a set of suggestions for very poorly spelled words -int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr) +int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr** pHMgr, int md) { int i, j; @@ -1037,8 +1037,6 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr) int lp, lpphon; int nonbmp = 0; - if (!pHMgr) return ns; - // exhaustively search through all root words // keeping track of the MAX_ROOTS most similar root words struct hentry * roots[MAX_ROOTS]; @@ -1088,8 +1086,9 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr) mkallcap(candidate, csconv); phonet(candidate, target, n, *ph); } - - while ((hp = pHMgr->walk_hashtable(col, hp))) { + + for (i = 0; i < md; i++) { + while ((hp = (pHMgr[i])->walk_hashtable(col, hp))) { if ((hp->astr) && (pAMgr) && (TESTAFF(hp->astr, pAMgr->get_forbiddenword(), hp->alen) || TESTAFF(hp->astr, ONLYUPCASEFLAG, hp->alen) || @@ -1135,7 +1134,7 @@ int SuggestMgr::ngsuggest(char** wlst, char * w, int ns, HashMgr* pHMgr) lval = scoresphon[j]; } } - } + }} // find minimum threshhold for a passable suggestion // mangle original word three differnt ways @@ -1557,7 +1556,7 @@ char * SuggestMgr::suggest_hentry_gen(hentry * rv, char * pattern) *result = '\0'; int sfxcount = get_sfxcount(pattern); -// if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) return NULL; + if (get_sfxcount(HENTRY_DATA(rv)) > sfxcount) return NULL; if (HENTRY_DATA(rv)) { char * aff = pAMgr->morphgen(HENTRY_WORD(rv), rv->blen, rv->astr, rv->alen, diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx index d22884f..0e61572 100644 --- a/src/hunspell/suggestmgr.hxx +++ b/src/hunspell/suggestmgr.hxx @@ -51,7 +51,7 @@ public: ~SuggestMgr(); int suggest(char*** slst, const char * word, int nsug, int * onlycmpdsug); - int ngsuggest(char ** wlst, char * word, int ns, HashMgr* pHMgr); + int ngsuggest(char ** wlst, char * word, int ns, HashMgr** pHMgr, int md); int suggest_auto(char*** slst, const char * word, int nsug); int suggest_stems(char*** slst, const char * word, int nsug); int suggest_pos_stems(char*** slst, const char * word, int nsug); diff --git a/src/parsers/Makefile.am b/src/parsers/Makefile.am index ffd13f3..c8e34be 100644 --- a/src/parsers/Makefile.am +++ b/src/parsers/Makefile.am @@ -7,4 +7,4 @@ noinst_PROGRAMS=testparser testparser_SOURCES=firstparser.cxx firstparser.hxx htmlparser.cxx htmlparser.hxx latexparser.cxx latexparser.hxx manparser.cxx manparser.hxx testparser.cxx textparser.cxx textparser.hxx # need mystrdup() -LDADD = ../hunspell/libhunspell.la +LDADD = ../hunspell/libhunspell-1.2.la diff --git a/src/parsers/Makefile.in b/src/parsers/Makefile.in index 888958b..08c1d03 100644 --- a/src/parsers/Makefile.in +++ b/src/parsers/Makefile.in @@ -74,7 +74,7 @@ am_testparser_OBJECTS = firstparser.$(OBJEXT) htmlparser.$(OBJEXT) \ textparser.$(OBJEXT) testparser_OBJECTS = $(am_testparser_OBJECTS) testparser_LDADD = $(LDADD) -testparser_DEPENDENCIES = ../hunspell/libhunspell.la +testparser_DEPENDENCIES = ../hunspell/libhunspell-1.2.la DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir) depcomp = $(SHELL) $(top_srcdir)/depcomp am__depfiles_maybe = depfiles @@ -236,7 +236,7 @@ libparsers_a_SOURCES = firstparser.cxx htmlparser.cxx \ testparser_SOURCES = firstparser.cxx firstparser.hxx htmlparser.cxx htmlparser.hxx latexparser.cxx latexparser.hxx manparser.cxx manparser.hxx testparser.cxx textparser.cxx textparser.hxx # need mystrdup() -LDADD = ../hunspell/libhunspell.la +LDADD = ../hunspell/libhunspell-1.2.la all: all-am .SUFFIXES: diff --git a/src/tools/Makefile.am b/src/tools/Makefile.am index 4ce8357..3e92633 100644 --- a/src/tools/Makefile.am +++ b/src/tools/Makefile.am @@ -1,22 +1,26 @@ -bin_PROGRAMS=analyze chmorph example hunspell munch unmunch +bin_PROGRAMS=analyze chmorph example hunspell munch unmunch hzip hunzip INCLUDES=-I${top_srcdir}/src/hunspell -I${top_srcdir}/src/parsers +hzip_SOURCES=hzip.c +hunzip_SOURCES=hunzip.cxx +hunzip_LDADD = ../hunspell/libhunspell-1.2.la + munch_SOURCES=munch.c unmunch_SOURCES=unmunch.c include_HEADERS=munch.h unmunch.h example_SOURCES=example.cxx -example_LDADD = ../hunspell/libhunspell.la +example_LDADD = ../hunspell/libhunspell-1.2.la hunspell_SOURCES=hunspell.cxx -hunspell_LDADD = @LIBINTL@ ../hunspell/libhunspell.la \ +hunspell_LDADD = @LIBINTL@ ../hunspell/libhunspell-1.2.la \ ../parsers/libparsers.a @CURSESLIB@ @READLINELIB@ analyze_SOURCES=analyze.cxx -analyze_LDADD = ../hunspell/libhunspell.la +analyze_LDADD = ../hunspell/libhunspell-1.2.la chmorph_SOURCES=chmorph.cxx -chmorph_LDADD = ../hunspell/libhunspell.la ../parsers/libparsers.a +chmorph_LDADD = ../hunspell/libhunspell-1.2.la ../parsers/libparsers.a -EXTRA_DIST=makealias +EXTRA_DIST=makealias affixcompress diff --git a/src/tools/Makefile.in b/src/tools/Makefile.in index 4155684..bd5b851 100644 --- a/src/tools/Makefile.in +++ b/src/tools/Makefile.in @@ -39,7 +39,8 @@ build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ bin_PROGRAMS = analyze$(EXEEXT) chmorph$(EXEEXT) example$(EXEEXT) \ - hunspell$(EXEEXT) munch$(EXEEXT) unmunch$(EXEEXT) + hunspell$(EXEEXT) munch$(EXEEXT) unmunch$(EXEEXT) \ + hzip$(EXEEXT) hunzip$(EXEEXT) subdir = src/tools DIST_COMMON = $(include_HEADERS) $(srcdir)/Makefile.am \ $(srcdir)/Makefile.in @@ -60,18 +61,24 @@ binPROGRAMS_INSTALL = $(INSTALL_PROGRAM) PROGRAMS = $(bin_PROGRAMS) am_analyze_OBJECTS = analyze.$(OBJEXT) analyze_OBJECTS = $(am_analyze_OBJECTS) -analyze_DEPENDENCIES = ../hunspell/libhunspell.la +analyze_DEPENDENCIES = ../hunspell/libhunspell-1.2.la am_chmorph_OBJECTS = chmorph.$(OBJEXT) chmorph_OBJECTS = $(am_chmorph_OBJECTS) -chmorph_DEPENDENCIES = ../hunspell/libhunspell.la \ +chmorph_DEPENDENCIES = ../hunspell/libhunspell-1.2.la \ ../parsers/libparsers.a am_example_OBJECTS = example.$(OBJEXT) example_OBJECTS = $(am_example_OBJECTS) -example_DEPENDENCIES = ../hunspell/libhunspell.la +example_DEPENDENCIES = ../hunspell/libhunspell-1.2.la am_hunspell_OBJECTS = hunspell.$(OBJEXT) hunspell_OBJECTS = $(am_hunspell_OBJECTS) -hunspell_DEPENDENCIES = ../hunspell/libhunspell.la \ +hunspell_DEPENDENCIES = ../hunspell/libhunspell-1.2.la \ ../parsers/libparsers.a +am_hunzip_OBJECTS = hunzip.$(OBJEXT) +hunzip_OBJECTS = $(am_hunzip_OBJECTS) +hunzip_DEPENDENCIES = ../hunspell/libhunspell-1.2.la +am_hzip_OBJECTS = hzip.$(OBJEXT) +hzip_OBJECTS = $(am_hzip_OBJECTS) +hzip_LDADD = $(LDADD) am_munch_OBJECTS = munch.$(OBJEXT) munch_OBJECTS = $(am_munch_OBJECTS) munch_LDADD = $(LDADD) @@ -98,10 +105,11 @@ CXXLD = $(CXX) CXXLINK = $(LIBTOOL) --tag=CXX --mode=link $(CXXLD) $(AM_CXXFLAGS) \ $(CXXFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ SOURCES = $(analyze_SOURCES) $(chmorph_SOURCES) $(example_SOURCES) \ - $(hunspell_SOURCES) $(munch_SOURCES) $(unmunch_SOURCES) + $(hunspell_SOURCES) $(hunzip_SOURCES) $(hzip_SOURCES) \ + $(munch_SOURCES) $(unmunch_SOURCES) DIST_SOURCES = $(analyze_SOURCES) $(chmorph_SOURCES) \ - $(example_SOURCES) $(hunspell_SOURCES) $(munch_SOURCES) \ - $(unmunch_SOURCES) + $(example_SOURCES) $(hunspell_SOURCES) $(hunzip_SOURCES) \ + $(hzip_SOURCES) $(munch_SOURCES) $(unmunch_SOURCES) am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ @@ -243,20 +251,23 @@ target_cpu = @target_cpu@ target_os = @target_os@ target_vendor = @target_vendor@ INCLUDES = -I${top_srcdir}/src/hunspell -I${top_srcdir}/src/parsers +hzip_SOURCES = hzip.c +hunzip_SOURCES = hunzip.cxx +hunzip_LDADD = ../hunspell/libhunspell-1.2.la munch_SOURCES = munch.c unmunch_SOURCES = unmunch.c include_HEADERS = munch.h unmunch.h example_SOURCES = example.cxx -example_LDADD = ../hunspell/libhunspell.la +example_LDADD = ../hunspell/libhunspell-1.2.la hunspell_SOURCES = hunspell.cxx -hunspell_LDADD = @LIBINTL@ ../hunspell/libhunspell.la \ +hunspell_LDADD = @LIBINTL@ ../hunspell/libhunspell-1.2.la \ ../parsers/libparsers.a @CURSESLIB@ @READLINELIB@ analyze_SOURCES = analyze.cxx -analyze_LDADD = ../hunspell/libhunspell.la +analyze_LDADD = ../hunspell/libhunspell-1.2.la chmorph_SOURCES = chmorph.cxx -chmorph_LDADD = ../hunspell/libhunspell.la ../parsers/libparsers.a -EXTRA_DIST = makealias +chmorph_LDADD = ../hunspell/libhunspell-1.2.la ../parsers/libparsers.a +EXTRA_DIST = makealias affixcompress all: all-am .SUFFIXES: @@ -330,6 +341,12 @@ example$(EXEEXT): $(example_OBJECTS) $(example_DEPENDENCIES) hunspell$(EXEEXT): $(hunspell_OBJECTS) $(hunspell_DEPENDENCIES) @rm -f hunspell$(EXEEXT) $(CXXLINK) $(hunspell_LDFLAGS) $(hunspell_OBJECTS) $(hunspell_LDADD) $(LIBS) +hunzip$(EXEEXT): $(hunzip_OBJECTS) $(hunzip_DEPENDENCIES) + @rm -f hunzip$(EXEEXT) + $(CXXLINK) $(hunzip_LDFLAGS) $(hunzip_OBJECTS) $(hunzip_LDADD) $(LIBS) +hzip$(EXEEXT): $(hzip_OBJECTS) $(hzip_DEPENDENCIES) + @rm -f hzip$(EXEEXT) + $(LINK) $(hzip_LDFLAGS) $(hzip_OBJECTS) $(hzip_LDADD) $(LIBS) munch$(EXEEXT): $(munch_OBJECTS) $(munch_DEPENDENCIES) @rm -f munch$(EXEEXT) $(LINK) $(munch_LDFLAGS) $(munch_OBJECTS) $(munch_LDADD) $(LIBS) @@ -347,6 +364,8 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chmorph.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/example.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hunspell.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hunzip.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hzip.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/munch.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/unmunch.Po@am__quote@ diff --git a/src/tools/affixcompress b/src/tools/affixcompress new file mode 100755 index 0000000..a2b9508 --- /dev/null +++ b/src/tools/affixcompress @@ -0,0 +1,183 @@ +#!/bin/sh +# affix compressor utility for Hunspell +# 2007 (c) László Németh, version 0.2 +# usage: affixcompress sorted_word_list_file +case $# in +0) echo \ +"affixcompress - compress a huge sorted word list to Hunspell aff and dic file +Usage: affixcompress sorted_word_list_file +Note: output may need manually added affix parameters (SET character_encoding, +TRY suggestion_characters etc., see man(4) hunspell)" + exit 0;; +esac + +# profiling +#AWK="pgawk --profile" +AWK="gawk" + +export LC_ALL=C +rm -f $1.aff $1.dic +cat $1 | $AWK ' +{ + # calculate frequent suffixes + A[$1] = 1 + len = length($1) + if (len > 2) { +# print $1, substr($1, 1, len - 1), substr($1, len, 1) >"/dev/stderr" + B[substr($1, 1, len - 1)] = substr($1, len, 1); + } + for(i = 2; i < len; i++) { + r = substr($1, 1, i) + if (i == 2) { + if (prev != r) { + delete A + delete B + print "Deleted roots: ", prev > "/dev/stderr" + } + prev = r + } + if (A[r]) { +# print $1 ": " r " és "substr($1, i + 1, len - i + 1) >"/dev/stderr" + sfx[substr($1, i + 1, len - i + 1)]++ + } else if (B[r] && B[r] != substr($1, i + 1, 1)) { + r2 = substr($1, i + 1, len - i + 1) + sfy[r2,B[r]]++ + } + } +} +END { + for (i in sfx) print i, 0, sfx[i] + for (i in sfy) print i, sfy[i] +} +' | tr '\034' ' ' >affixcompress0.tmp +sort -rnk 3 affixcompress0.tmp | $AWK '$3 > 20{print $0}' | head -20000 >affixcompress1.tmp +cat affixcompress1.tmp | +$AWK ' +function potential_roots() { + # potential roots with most frequent suffixes + for(word in W) if (W[word]==1) { + print word >"word" + len = length(word); + for(i = 2; i < len; i++) { + root = substr(word, 1, i) + suff = substr(word, i + 1, len - i + 1) + if ((W[root]!="") && (sfxfr[suff] > 100)) C[root]++ + if (sfz[suff]) { + l = split(sfz[suff], a) + for (k=1; k <= l; k++) if ((W[root a[k]]!="") && (sfyfr[root a[k]] > 100)) { + C[root a[k]]++ + } + } + } + } + + # calculate roots + for(word in W) if (W[word]==1) { + print word >"word2" + len = length(word); + z = 0 + # choose most frequent root (maybe the original word) + max = C[word] + maxword = word + maxsuff = 0 + for(i = 2; i < len; i++) { + root = substr(word, 1, i) + suff = substr(word, i + 1, len - i + 1) + if ((sfx[suff] != "") && (C[root] > max)) { + max = C[root] + maxword = root + maxsuff = sfx[suff] + } + if (sfz[suff] != "") { + l = split(sfz[suff], a) + for (k=1; k <= l; k++) if (C[root a[k]] > max) { + max = C[root a[k]] + maxword = root a[k] + maxsuff = sfy[suff,a[k]] + } + } + } + if (max > 0) { + if (maxsuff > 0) print maxword, maxsuff; else print maxword + A[maxword]++ + z=1 + } else { + for(i = 2; i < len; i++) { + root = substr(word, 1, i) + suff = substr(word, i + 1, len - i + 1) + if ((A[root] > 0) && sfx[suff]!="") { + print root, sfx[suff] + z = 1 + break + } + if (sfz[suff]) { + l = split(sfz[suff], a) + for (k=1; k <= l; k++) if (A[root a[k]]!="") { + print root a[k], sfy[suff,a[k]] + z = 1 + break + } + } + } + } + if (z == 0) { + print word + A[word]++ + } + } + delete A + delete C +} +FILENAME == "-" { + if ($2 == 0) { + sfx[$1] = NR + sfxfr[$1] = $3 + } else { + sfy[$1,$2] = NR + sfyfr[$1,$2] = $3 + sfz[$1] = sfz[$1] " " $2 + } + maxsuf = NR + next +} +{ + cap = substr($1, 1, 3) + if (cap != prev) { + potential_roots() + delete W + print "Deleted class:", prev > "/dev/stderr" + } + prev = cap + W[$1] = 1 +} +END { + potential_roots() + # write out frequent suffixes + out=FILENAME ".aff" + print "FLAG num" >out + for (i in sfx) if (sfx[i] > 0) { + print "SFX", sfx[i], "Y 1" >out + print "SFX", sfx[i], "0", i, "." >out + } + for (i in sfy) if (sfy[i] > 0) { + print "SFX", sfy[i], "Y 1" >out + split(i, c, "\034"); + print "SFX", sfy[i], c[2], c[1], c[2] >out + } +} +' - $1 >affixcompress2.tmp +sort -nk 2 affixcompress2.tmp >affixcompress3.tmp +cat affixcompress3.tmp | $AWK -v out="$1.dic" ' +{ + if (A[$1]=="") A[$1]=$2; + else if ($2!="") A[$1] = A[$1] "," $2 +} +END { + for (i in A) n++ + print n >out + for (i in A) { + if (A[i]=="") print i + else print i "/" A[i] + } +} +' | sort >>$1.dic diff --git a/src/tools/chmorph.cxx b/src/tools/chmorph.cxx index c6c246a..c2f372e 100644 --- a/src/tools/chmorph.cxx +++ b/src/tools/chmorph.cxx @@ -37,10 +37,11 @@ main(int argc, char** argv) } Hunspell *pMS = new Hunspell(argv[1], argv[2]); - TextParser * p = new TextParser("qwertzuiopasdfghjklyxcvbnm���������QWERTZUIOPASDFGHJKLYXCVBNM���������"); + TextParser * p = new TextParser("|${}:/_+qwertzuiopasdfghjklyxcvbnm���������QWERTZUIOPASDFGHJKLYXCVBNM���������"); char buf[MAXLNLEN]; char * next; + int num = 0; while(fgets(buf,MAXLNLEN,f)) { p->put_line(buf); @@ -60,18 +61,59 @@ main(int argc, char** argv) free(pl[i]); pl[i] = r; gen = 1; + } else { +// free(pl[i]); +// pl[i] = NULL; } } if (gen) { char **pl2; +// pln = uniqlist(pl, pln); int pl2n = pMS->generate(&pl2, next, pl, pln); if (pl2n) { - p->change_token(pl2[0]); +// pl2n = uniqlist(pl2, pl2n); + char x[MAXLNLEN]; + char * x2 = pl2[0]; + num++; + if (pl2n>1) { + strcpy(x, "$"); + for (int j = 0; (j < pl2n) && (j < 5); j++) { + strcat(x, pl2[j]); + if (j < pln && pl[j]) { + strcat(x, "{"); + char * p2 = x + strlen(x); + for (char * p3 = pl[j]; *p3; p3++, p2++) { + if (*p3 == ' ' || *p3 == '\t') *p2 = '+'; + else *p2 = *p3; + } + strcpy(p2, "}"); + } + + strcat(x, "|"); + } + x[strlen(x) - 1] = '$'; + x2 = x; + } +// p->change_token(pl2[0]); + p->change_token(x2); freelist(&pl2, pl2n); // jump over the (possibly un)modified word free(next); next=p->next_token(); } + } else { + char x[MAXLNLEN]; + strcpy(x, next); + strcat(x, "{"); + char * p2 = x + strlen(x); + for (char * p3 = pl[0]; *p3; p3++, p2++) { + if (*p3 == ' ' || *p3 == '\t') *p2 = '+'; + else *p2 = *p3; + } + strcpy(p2, "}"); + p->change_token(x); + free(next); + next=p->next_token(); } freelist(&pl, pln); } diff --git a/src/tools/example.cxx b/src/tools/example.cxx index 0f53927..029c4e9 100644 --- a/src/tools/example.cxx +++ b/src/tools/example.cxx @@ -12,52 +12,33 @@ int main(int argc, char** argv) { - char * af; - char * df; - char * wtc; FILE* wtclst; - /* first parse the command line options */ - /* arg1 - affix file, arg2 dictionary file, arg3 - file of words to check */ - - if (argv[1]) { - af = mystrdup(argv[1]); - } else { - fprintf(stderr,"correct syntax is:\n"); - fprintf(stderr,"example affix_file dictionary_file file_of_words_to_check\n"); - exit(1); - } - if (argv[2]) { - df = mystrdup(argv[2]); - } else { - fprintf(stderr,"correct syntax is:\n"); - fprintf(stderr,"example affix_file dictionary_file file_of_words_to_check\n"); - exit(1); - } - if (argv[3]) { - wtc = mystrdup(argv[3]); - } else { - fprintf(stderr,"correct syntax is:\n"); - fprintf(stderr,"example affix_file dictionary_file file_of_words_to_check\n"); - exit(1); - } + /* first parse the command line options */ + if (argc < 4) { + fprintf(stderr,"example (multiple dictionary version.:\n"); + fprintf(stderr,"example affix_file dictionary_file(s) file_of_words_to_check\n"); + exit(1); + } - /* open the words to check list */ - wtclst = fopen(wtc,"r"); - if (!wtclst) { - fprintf(stderr,"Error - could not open file of words to check\n"); - exit(1); - } - + /* open the words to check list */ + wtclst = fopen(argv[argc - 1],"r"); + if (!wtclst) { + fprintf(stderr,"Error - could not open file of words to check\n"); + exit(1); + } - Hunspell * pMS= new Hunspell(af,df); - int k; int dp; char buf[101]; - while(fgets(buf,100,wtclst)) { + Hunspell * pMS= new Hunspell(argv[1], argv[2]); + + // load extra dictionaries + if (argc > 4) for (k = 3; k < argc - 1; k++) pMS->add_dic(argv[k]); + + while(fgets(buf, 100, wtclst)) { k = strlen(buf); *(buf + k - 1) = '\0'; dp = pMS->spell(buf); @@ -80,10 +61,6 @@ main(int argc, char** argv) delete pMS; fclose(wtclst); - free(wtc); - free(df); - free(af); - return 0; } diff --git a/src/tools/hunspell.cxx b/src/tools/hunspell.cxx index 80d5135..96d17fd 100644 --- a/src/tools/hunspell.cxx +++ b/src/tools/hunspell.cxx @@ -169,9 +169,13 @@ char * chenc(char * st, const char * enc1, const char * enc2) { char * source = st; char * dest = text_conv; iconv_t conv = iconv_open(enc2, enc1); - size_t res = iconv(conv, (ICONV_CONST char **) &source, &c1, &dest, &c2); - iconv_close(conv); - if (res != (size_t) -1) out = text_conv; + if (conv == (iconv_t) -1) { + fprintf(stderr, gettext("error - iconv_open: %s -> %s"), enc2, enc1); + } else { + size_t res = iconv(conv, (ICONV_CONST char **) &source, &c1, &dest, &c2); + iconv_close(conv); + if (res != (size_t) -1) out = text_conv; + } } #endif return out; @@ -208,12 +212,18 @@ TextParser * get_parser(int format, char * extension, Hunspell * pMS) { size_t c2 = MAXLNLEN; char * dest = text_conv; iconv_t conv = iconv_open("UTF-8", dic_enc); - iconv(conv, (ICONV_CONST char **) &wchars, &c1, &dest, &c2); - iconv_close(conv); - wordchars_utf16 = (unsigned short *) malloc(sizeof(unsigned short) * wlen); - int n = u8_u16((w_char *) wordchars_utf16, wlen, text_conv); - if (n > 0) flag_qsort(wordchars_utf16, 0, n); - wordchars_utf16_len = n; + if (conv == (iconv_t) -1) { + fprintf(stderr, gettext("error - iconv_open: UTF-8 -> %s"), dic_enc); + wordchars_utf16 = NULL; + wordchars_utf16_len = 0; + } else { + iconv(conv, (ICONV_CONST char **) &wchars, &c1, &dest, &c2); + iconv_close(conv); + wordchars_utf16 = (unsigned short *) malloc(sizeof(unsigned short) * wlen); + int n = u8_u16((w_char *) wordchars_utf16, wlen, text_conv); + if (n > 0) flag_qsort(wordchars_utf16, 0, n); + wordchars_utf16_len = n; + } } } else { // 8-bit input encoding @@ -223,30 +233,34 @@ TextParser * get_parser(int format, char * extension, Hunspell * pMS) { char ch[2]; char u8[10]; iconv_t conv = iconv_open("UTF-8", io_enc); - for (int i = 32; i < 256; i++) { - size_t c1 = 1; - size_t c2 = 10; - char * dest = u8; - u8[0] = '\0'; - char * ch8bit = ch; - ch[0] = (char) i; - ch[1] = '\0'; - size_t res = iconv(conv, (ICONV_CONST char **) &ch8bit, &c1, &dest, &c2); - if (res != (size_t) -1) { - unsigned short idx; - w_char w; - w.l = 0; - w.h = 0; - u8_u16(&w, 1, u8); - idx = (w.h << 8) + w.l; - if (unicodeisalpha(idx)) { - *pletters = (char) i; - pletters++; - } + if (conv == (iconv_t) -1) { + fprintf(stderr, gettext("error - iconv_open: UTF-8 -> %s"), io_enc); + } else { + for (int i = 32; i < 256; i++) { + size_t c1 = 1; + size_t c2 = 10; + char * dest = u8; + u8[0] = '\0'; + char * ch8bit = ch; + ch[0] = (char) i; + ch[1] = '\0'; + size_t res = iconv(conv, (ICONV_CONST char **) &ch8bit, &c1, &dest, &c2); + if (res != (size_t) -1) { + unsigned short idx; + w_char w; + w.l = 0; + w.h = 0; + u8_u16(&w, 1, u8); + idx = (w.h << 8) + w.l; + if (unicodeisalpha(idx)) { + *pletters = (char) i; + pletters++; + } + } } + iconv_close(conv); } - *pletters = '\0'; - iconv_close(conv); + *pletters = '\0'; // UTF-8 wordchars -> 8 bit wordchars int len = 0; @@ -261,10 +275,13 @@ TextParser * get_parser(int format, char * extension, Hunspell * pMS) { size_t c1 = len + 1; size_t c2 = len + 1; iconv_t conv = iconv_open(io_enc, dic_enc); - iconv(conv, (ICONV_CONST char **) &wchars, &c1, &dest, &c2); - iconv_close(conv); + if (conv == (iconv_t) -1) { + fprintf(stderr, gettext("error - iconv_open: %s -> %s"), io_enc, dic_enc); + } else { + iconv(conv, (ICONV_CONST char **) &wchars, &c1, &dest, &c2); + iconv_close(conv); + } } - if (*letters) wordchars = mystrdup(letters); } #else @@ -758,6 +775,7 @@ void dialogscreen(TextParser * parser, char * token, int beginrow = rowindex - pos_tab(parser->get_prevline(0), parser->get_tokenpos()) / x; if (beginrow >= MAXPREVLINE) beginrow = MAXPREVLINE - 1; +/* for (int i = 0; i < MAXPREVLINE; i++) { char * line = (char *) calloc(1, x + 1); strncpy(line, lines[prevline] + x * rowindex, x); @@ -769,24 +787,35 @@ void dialogscreen(TextParser * parser, char * token, } free(line); } - +*/ char * line = (char *) calloc(1, x + 1); + int poslen; strncpy(line, lines[0] + x * rowindex, parser->get_tokenpos() % x); - mvprintw(MAXPREVLINE + 1 - beginrow, 0, "%s", chenc(line, io_enc, ui_enc)); - free(line); +// fprintf(stderr, "%s\n", line); +// exit(1); + chenc(line, io_enc, ui_enc); + if (strcmp(ui_enc, "UTF-8")==0) { + char * p; + w_char dest_utf[BUFSIZ]; + poslen = u8_u16(dest_utf, BUFSIZ, line); +// for (p = line; *p; p++) if (*p == '\t') exit(1); + } else poslen = strlen(line); +// mvprintw(MAXPREVLINE + 1 - beginrow, 0, "%s", chenc(line, io_enc, ui_enc)); attron(A_REVERSE); - printw("%s", chenc(token, io_enc, ui_enc)); +// mvprintw(MAXPREVLINE + 1 - beginrow, 0, "%s", chenc(line, io_enc, ui_enc)); + mvprintw(MAXPREVLINE + 1 - beginrow, poslen, "%s", chenc(token, io_enc, ui_enc)); attroff(A_REVERSE); + free(line); - mvprintw(MAXPREVLINE + 2, 0, "\n"); - for (int i = 0; i < ns; i++) { +// mvprintw(MAXPREVLINE + 2, 0, "\n"); +/* for (int i = 0; i < ns; i++) { if ((ns > 10) && (i < 10)) { printw(" 0%d: %s\n", i, chenc(wlst[i], dic_enc, ui_enc)); } else { printw(" %d: %s\n", i, chenc(wlst[i], dic_enc, ui_enc)); } } - +*/ mvprintw(y-3, 0, "%s\n", gettext("\n[SP] <number> R)epl A)ccept I)nsert L)ookup U)ncap Q)uit e(X)it or ? for help\n")); } @@ -1196,6 +1225,11 @@ char * exist2(char * dir, int len, char * name, char * ext) { strcat(buf, name); strcat(buf, ext); if (exist(buf)) return mystrdup(buf); + strcat(buf, HZIP_EXTENSION); + if (exist(buf)) { + buf[strlen(buf) - strlen(HZIP_EXTENSION)] = '\0'; + return mystrdup(buf); + } return NULL; } @@ -1214,6 +1248,7 @@ char * search(char * begin, char * name, char * ext) { int main(int argc, char** argv) { Hunspell * pMS = NULL; + char * key = NULL; int arg_files = -1; // first filename argumentum position in argv int format = FMT_TEXT; @@ -1256,9 +1291,13 @@ int main(int argc, char** argv) } else if (argstate == 3) { io_enc = argv[i]; argstate = 0; + } else if (argstate == 4) { + key = argv[i]; + argstate = 0; } else if (strcmp(argv[i],"-d")==0) argstate=1; else if (strcmp(argv[i],"-p")==0) argstate=2; else if (strcmp(argv[i],"-i")==0) argstate=3; + else if (strcmp(argv[i],"-P")==0) argstate=4; else if ((strcmp(argv[i],"-h") == 0) || (strcmp(argv[i],"--help") == 0)) { fprintf(stderr,gettext("Usage: hunspell [OPTION]... [FILE]...\n")); fprintf(stderr,gettext("Check spelling of each FILE. Without FILE, check standard input.\n")); @@ -1276,6 +1315,7 @@ int main(int argc, char** argv) fprintf(stderr,gettext(" -L\t\tprint lines with mispelled words\n")); fprintf(stderr,gettext(" -n\t\tnroff/troff input file format\n")); fprintf(stderr,gettext(" -p dict\tset dict custom dictionary\n")); + fprintf(stderr,gettext(" -P password\tset password for encrypted dictionaries\n")); fprintf(stderr,gettext(" -t\t\tTeX/LaTeX input file format\n")); // experimental functions: missing Unicode support // fprintf(stderr,gettext(" -u\t\tshow typical misspellings\n")); @@ -1287,7 +1327,7 @@ int main(int argc, char** argv) fprintf(stderr,"\n"); fprintf(stderr,gettext("Example: hunspell -d en_US file.txt # interactive spelling\n")); fprintf(stderr,gettext(" hunspell -l file.txt # print misspelled words\n")); - fprintf(stderr,gettext(" hunspell -i utf8 file.txt # check UTF-8 encoded file\n")); + fprintf(stderr,gettext(" hunspell -i utf-8 file.txt # check UTF-8 encoded file\n")); fprintf(stderr,"\n"); fprintf(stderr,gettext("Bug reports: http://hunspell.sourceforge.net\n")); exit(0); @@ -1296,7 +1336,7 @@ int main(int argc, char** argv) fprintf(stdout,"\n"); if (strcmp(argv[i],"-vv")!=0) { fprintf(stdout,"\n"); - fprintf(stdout,gettext("Copyright (C) 2002-2007 Nemeth Laszlo. License: GNU LGPL.\n")); + fprintf(stdout,gettext("Copyright (C) 2002-2008 L\303\241szl\303\263 N\303\251meth. License: MPL/GPL/LGPL.\n")); fprintf(stdout,"\n"); fprintf(stdout,gettext("Based on OpenOffice.org's Myspell library.\n")); fprintf(stdout,gettext("Myspell's copyright (C) Kevin Hendricks, 2001-2002, License: BSD.\n")); @@ -1376,7 +1416,7 @@ int main(int argc, char** argv) char * dic = search(path, dicname, ".dic"); if (aff && dic) { if (showpath) fprintf(stderr, "%s\n%s\n", aff, dic); - pMS = new Hunspell(aff, dic); + pMS = new Hunspell(aff, dic, key); } else { fprintf(stderr,gettext("Can't open affix or dictionary files.\n")); exit(1); diff --git a/src/tools/hunzip.cxx b/src/tools/hunzip.cxx new file mode 100644 index 0000000..5d1581d --- /dev/null +++ b/src/tools/hunzip.cxx @@ -0,0 +1,22 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "hunzip.hxx" + +#define DESC "hunzip - decompress a hzip file to the standard output\n" \ +"Usage: hunzip file.hz [password]\n" + +int fail(const char * err, const char * par) { + fprintf(stderr, err, par); + return 1; +} + +int main(int argc, char** argv) { + Hunzip * h; + const char * s; + if (argc == 1 || strcmp(argv[1], "-h") == 0) return fail(DESC, NULL); + h = new Hunzip(argv[1], (argc > 2) ? argv[2] : NULL); + while (h && (s = h->getline())) printf("%s", s); + return 0; +} diff --git a/src/tools/hzip.c b/src/tools/hzip.c new file mode 100644 index 0000000..7c63297 --- /dev/null +++ b/src/tools/hzip.c @@ -0,0 +1,281 @@ +/* hzip: file compression for sorted dictionaries with optional encryption, + * algorithm: prefix-suffix encoding and 16-bit Huffman encoding */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define CODELEN 65536 +#define BUFSIZE 65536 +#define EXTENSION ".hz" + +#define ESCAPE 31 +#define MAGIC "hz0" +#define MAGIC_ENCRYPTED "hz1" + +#define DESC "hzip - dictionary compression utility\n" \ +"Usage: hzip [-h | -P password ] [file1 file2 ..]\n" \ +" -P password encrypted compression\n" \ +" -h display this help and exit\n" + +enum { code_LEAF, code_TERM, code_NODE}; + +struct item { + unsigned short word; + int count; + char type; + struct item * left; + struct item * right; +}; + +int fail(const char * err, const char * par) { + fprintf(stderr, err, par); + return 1; +} + +void code2table(struct item * tree, char **table, char * code, int deep) { + int first = 0; + if (!code) { + first = 1; + code = malloc(sizeof(char) * CODELEN); + } + code[deep] = '1'; + if (tree->left) code2table(tree->left, table, code, deep + 1); + if (tree->type != code_NODE) { + int i = tree->word; + code[deep] = '\0'; + if (tree->type == code_TERM) i = CODELEN; // terminal code + table[i] = malloc((deep + 1) * sizeof(char)); + strcpy(table[i], code); + } + code[deep] = '0'; + if (tree->right) code2table(tree->right, table, code, deep + 1); + if (first) free(code); +} + +struct item * newitem(int c, struct item * l, struct item * r, int t) { + struct item * ni = (struct item *) malloc(sizeof(struct item)); + ni->type = t; + ni->word = 0; + ni->count = c; + ni->left = l; + ni->right = r; + return ni; +} + +// return length of the freq array +int get_freqdata(struct item *** dest, FILE * f, unsigned short * termword) { + int freq[CODELEN]; + int i, j, k, n; + char c[2]; + for (i = 0; i < CODELEN; i++) freq[i] = 0; + while((j = getc(f)) != -1 && (k = getc(f)) != -1) { + c[0] = j; + c[1] = k; + freq[*((unsigned short *) c)]++; + } + if (j != -1) { + c[0] = 1; + c[1] = j; + } else { + c[0] = 0; + c[1] = 0; + } + *dest = (struct item **) malloc((CODELEN + 1) * sizeof(struct item *)); + if (!*dest) return -1; + for (i = 0, n = 0; i < CODELEN; i++) if (freq[i]) { + (*dest)[n] = newitem(freq[i], NULL, NULL, code_LEAF); + (*dest)[n]->word = i; + n++; + } + // terminal sequence (also contains the last odd byte of the file) + (*dest)[n] = newitem(1, NULL, NULL, code_TERM); + *termword = *((unsigned short *) c); + return n + 1; +} + +void get_codetable(struct item **l, int n, char ** table) { + int i; + while (n > 1) { + int min = 0; + int mi2 = 1; + for (i = 1; i < n; i++) { + if (l[i]->count < l[min]->count) { + mi2 = min; + min = i; + } else if (l[i]->count < l[mi2]->count) mi2 = i; + } + l[min] = newitem(l[min]->count + l[mi2]->count, l[min], l[mi2], code_NODE); + for (i = mi2 + 1; i < n; i++) l[i - 1] = l[i]; + n--; + } + code2table(l[0], table, NULL, 0); +} + +void write_bits(FILE *f, char * bitbuf, int *bits, char * code) { + while (*code) { + int b = (*bits) % 8; + if (!b) bitbuf[(*bits) / 8] = ((*code) - '0') << 7; + else bitbuf[(*bits) / 8] |= (((*code) - '0') << (7 - b)); + (*bits)++; + code++; + if (*bits == BUFSIZE * 8) { + fwrite(bitbuf, sizeof(char), BUFSIZE, f); + *bits = 0; + } + } +} + +void encode_file(char ** table, int n, FILE *f, FILE *f2, unsigned short tw, char * key) { + char bitbuf[BUFSIZE]; + int i, bits = 0; + unsigned char cl, ch; + int cx[2]; + char c[2]; + char * enc = key; + + // header and codes + fprintf(f2, "%s", (key ? MAGIC_ENCRYPTED : MAGIC)); // 3-byte HEADER + cl = (unsigned char) (n & 0x00ff); + ch = (unsigned char) (n >> 8); + if (key) { + unsigned char cs; + for (cs = 0; *enc; enc++) cs ^= *enc; + fprintf(f2, "%c", cs); // 1-byte check sum + enc = key; + ch ^= *enc; + if ((*(++enc)) == '\0') enc = key; + cl ^= *enc; + } + fprintf(f2, "%c%c", ch, cl); // upper and lower byte of record count + for (i = 0; i < BUFSIZE; i++) bitbuf[i] = '\0'; + for (i = 0; i < CODELEN + 1; i++) if (table[i]) { + unsigned short * d = (unsigned short *) &c; + *d = (unsigned short) i; + if (i == CODELEN) *d = tw; + if (key) { + if (*(++enc) == '\0') enc = key; + c[0] ^= *enc; + if (*(++enc) == '\0') enc = key; + c[1] ^= *enc; + } + fprintf(f2, "%c%c", c[0], c[1]); // 2-character code id + bits = 0; + write_bits(f2, bitbuf, &bits, table[i]); + if (key) { + if (*(++enc) == '\0') enc = key; + fprintf(f2, "%c", ((unsigned char) bits) ^ *enc); + for (cl = 0; cl <= bits/8; cl++) { + if (*(++enc) == '\0') enc = key; + bitbuf[cl] ^= *enc; + } + } else fprintf(f2, "%c", (unsigned char) bits); // 1-byte code length + fwrite(bitbuf, sizeof(char), bits/8 + 1, f2); // x-byte code + } + + // file encoding + bits = 0; + while((cx[0] = getc(f)) != -1 && (cx[1] = getc(f)) != -1) { + c[0] = cx[0]; + c[1] = cx[1]; + write_bits(f2, bitbuf, &bits, table[*((unsigned short *) c)]); + } + // terminal suffixes + write_bits(f2, bitbuf, &bits, table[CODELEN]); + if (bits > 0) fwrite(bitbuf, sizeof(char), bits/8 + 1, f2); +} + +void prefixcompress(FILE *f, FILE *tempfile) { + char buf[BUFSIZE]; + char buf2[BUFSIZE * 2]; + char prev[BUFSIZE]; + int prevlen = 0; + while(fgets(buf,BUFSIZE,f)) { + int i, j, k, m, c; + int pfx = prevlen; + char * p = buf2; + m = j = 0; + for (i = 0; buf[i]; i++) { + if ((pfx > 0) && (buf[i] == prev[i])) { + j++; + } else pfx = 0; + } + if (i > 0 && buf[i - 1] == '\n') { + if (j == i) j--; // line duplicate + if (j > 29) j = 29; + c = j; + if (c == '\t') c = 30; + // common suffix + for (; buf[i - m - 2] == prev[prevlen - m - 2] && + m < i - j - 1 && m < 15; m++); + if (m == 1) m = 0; + } else { + j = 0; + m = -1; + } + for (k = j; k < i - m - 1; k++, p++) { + if (((unsigned char) buf[k]) < 47 && buf[k] != '\t' && buf[k] != ' ') { + *p = ESCAPE; + p++; + } + *p = buf[k]; + } + if (m > 0) { + *p = m + 31; // 33-46 + p++; + } + if (i > 0 && buf[i - 1] == '\n') { + *p = c; + fwrite(buf2, 1, p - buf2 + 1, tempfile); + } else fwrite(buf2, 1, p - buf2, tempfile); + memcpy(prev, buf, i); + prevlen = i; + } +} + +int hzip(const char * filename, char * key) { + struct item ** list; + char * table[CODELEN + 1]; + int n; + char out[BUFSIZE]; + FILE *f, *f2, *tempfile; + unsigned short termword; + strcpy(out, filename); + strcat(out, EXTENSION); + f = fopen(filename, "r"); + if (!f) return fail("hzip: %s: Permission denied\n", filename); + tempfile = tmpfile(); + if (!tempfile) return fail("hzip: cannot create temporary file\n", NULL); + f2 = fopen(out, "w"); + if (!f2) return fail("hzip: %s: Permission denied\n", out); + for (n = 0; n < CODELEN; n++) table[n] = NULL; + prefixcompress(f, tempfile); + rewind(tempfile); + n = get_freqdata(&list, tempfile, &termword); + get_codetable(list, n, table); + rewind(tempfile); + encode_file(table, n, tempfile, f2, termword, key); + fclose(f2); + return 0; +} + +int main(int argc, char** argv) { + int i, j = 0; + char * key = NULL; + for (i = 1; i < argc; i++) { + if (*(argv[i]) == '-') { + if (*(argv[i] + 1) == 'h') + return fail(DESC, NULL); + if (*(argv[i] + 1) == 'P') { + if (i + 1 == argc) + return fail("hzip: missing password\n", NULL); + key = argv[i + 1]; + i++; + continue; + } + return fail("hzip: no such option: %s\n", argv[i]); + } else if (hzip(argv[i], key) != 0) return 1; else j = 1; + } + if (j == 0) return fail("hzip: need a filename parameter\n", NULL); + return 0; +} diff --git a/src/win_api/hunspelldll.c b/src/win_api/hunspelldll.c index 583d96e..797359b 100644 --- a/src/win_api/hunspelldll.c +++ b/src/win_api/hunspelldll.c @@ -48,6 +48,12 @@ DLLEXPORT void * hunspell_initialize(char *aff_file, char *dict_file) return pMS; } +DLLEXPORT void * hunspell_initialize_key(char *aff_file, char *dict_file, char * key) +{ + Hunspell * pMS = new Hunspell(aff_file, dict_file, key); + return pMS; +} + DLLEXPORT void hunspell_uninitialize(Hunspell *pMS) { delete pMS; @@ -82,9 +88,9 @@ DLLEXPORT char * hunspell_get_dic_encoding(Hunspell *pMS) return pMS->get_dic_encoding(); } -DLLEXPORT int hunspell_put_word(Hunspell *pMS, char *word) +DLLEXPORT int hunspell_add(Hunspell *pMS, char *word) { - return pMS->put_word(word); + return pMS->add(word); } diff --git a/tests/IJ.good b/tests/IJ.good new file mode 100644 index 0000000..5f888f0 --- /dev/null +++ b/tests/IJ.good @@ -0,0 +1,2 @@ +ijs +IJs diff --git a/tests/Makefile.am b/tests/Makefile.am index 8e0d947..5fa971e 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -476,6 +476,7 @@ ignoreutf.test \ 1463589_utf.wrong \ IJ.aff \ IJ.dic \ +IJ.good \ IJ.sug \ IJ.test \ IJ.wrong \ diff --git a/tests/Makefile.in b/tests/Makefile.in index 836ddd5..76180ff 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -661,6 +661,7 @@ ignoreutf.test \ 1463589_utf.wrong \ IJ.aff \ IJ.dic \ +IJ.good \ IJ.sug \ IJ.test \ IJ.wrong \ diff --git a/tests/suggestiontest/Makefile.orig b/tests/suggestiontest/Makefile.orig index 65e24c8..a983776 100644 --- a/tests/suggestiontest/Makefile.orig +++ b/tests/suggestiontest/Makefile.orig @@ -1,4 +1,4 @@ -all: aspell.txt hunspell.txt +all: ./prepare List*txt ./test List*txt -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-openoffice/hunspell.git

