http://git-wip-us.apache.org/repos/asf/flex-utilities/blob/4e4f9830/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/LinguisticRule.as ---------------------------------------------------------------------- diff --git a/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/LinguisticRule.as b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/LinguisticRule.as new file mode 100644 index 0000000..6ad78c6 --- /dev/null +++ b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/LinguisticRule.as @@ -0,0 +1,1118 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// + + +package com.adobe.linguistics.spelling.core +{ +// import com.adobe.linguistics.spelling.core.PhoneticTable; + import com.adobe.linguistics.spelling.core.env.*; + import com.adobe.linguistics.spelling.core.error.*; + import com.adobe.linguistics.spelling.core.rule.*; + import com.adobe.linguistics.spelling.core.utils.SimpleNumberParser; + + import flash.utils.Dictionary; + + + public class LinguisticRule + { + + + private var _encoding:String // ToDo, not sure how to handle this encoding stuff... + + private var snp:SimpleNumberParser = new SimpleNumberParser(); + + private var _prefixFlagTable:Array; + private var _prefixKeyTable:Array; + private var _suffixFlagTable:Array; + private var _suffixKeyTable:Array; + private var _optSuffixKeyTable:Dictionary; + private var _optPrefixKeyTable:Dictionary; +//these are attributes + private var _keyString:String; + private var _tryString:String; + private var _noSuggest:Number;// don't suggest words signed with NOSUGGEST flag + private var _forbiddenWord:Number; // forbidden word signing flag + private var _circumfix:Number=0; //Circumfix Flag + private var _ignoredChars:String; // letters + spec. word characters + private var _wordChars:String; //extends tokenizer of Hunspell command line interface with additional word character. For example, dot, dash, n-dash, numbers, percent sign are word character in Hungarian. + private var _languageCode:String; + private var _version:String; + private var _maxngramsugs:int = -1; // undefined + private var _nosplitsugs:int = 0; + private var _sugswithdots:int = 0; + private var _fullStrip:int; + private var _keepCase:Number; + private var _haveContClass:Boolean;//added to support (double) prefixes + + private var _flagMode:int; + private var _needAffix:Number; + private var _contClasses:Dictionary;//this is list of all possible contclasses + /* ToDo */ + + private var _onlyInCompound:Number = 0; +// private var _phoneTable:PhoneticTable; //phone table +/* + + +ToDO: should be removed after we have complex-affix support and compound-word support.. + + + pHMgr = ptr[0]; + alldic = ptr; + maxdic = md; + keystring = NULL; + trystring = NULL; + encoding=NULL; + utf8 = 0; + complexprefixes = 0; + maptable = NULL; + nummap = 0; + breaktable = NULL; + numbreak = 0; + reptable = NULL; + numrep = 0; + iconvtable = NULL; + oconvtable = NULL; + checkcpdtable = NULL; + // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN) + simplifiedcpd = 0; + numcheckcpd = 0; + defcpdtable = NULL; + numdefcpd = 0; + phone = NULL; + compoundflag = FLAG_NULL; // permits word in compound forms + compoundbegin = FLAG_NULL; // may be first word in compound forms + compoundmiddle = FLAG_NULL; // may be middle word in compound forms + compoundend = FLAG_NULL; // may be last word in compound forms + compoundroot = FLAG_NULL; // compound word signing flag + compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word + compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word + checkcompounddup = 0; // forbid double words in compounds + checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution) + checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds + checkcompoundtriple = 0; // forbid compounds with triple letters + simplifiedtriple = 0; // allow simplified triple letters in compounds (Schiff+fahrt -> Schiffahrt) + forbiddenword = FORBIDDENWORD; // forbidden word signing flag + nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag + lang = NULL; // language + langnum = 0; // language code (see http://l10n.openoffice.org/languages.html) + needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes + cpdwordmax = -1; // default: unlimited wordcount in compound words + cpdmin = -1; // undefined + cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words + cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX) + cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search) + cpdvowels_utf16_len=0; // vowels + pfxappnd=NULL; // previous prefix for counting the syllables of prefix BUG + sfxappnd=NULL; // previous suffix for counting a special syllables BUG + cpdsyllablenum=NULL; // syllable count incrementing flag + checknum=0; // checking numbers, and word with numbers + wordchars=NULL; // letters + spec. word characters + wordchars_utf16=NULL; // letters + spec. word characters + wordchars_utf16_len=0; // letters + spec. word characters + ignorechars=NULL; // letters + spec. word characters + ignorechars_utf16=NULL; // letters + spec. word characters + ignorechars_utf16_len=0; // letters + spec. word characters + version=NULL; // affix and dictionary file version string + havecontclass=0; // flags of possible continuing classes (double affix) + // LEMMA_PRESENT: not put root into the morphological output. Lemma presents + // in morhological description in dictionary file. It's often combined with PSEUDOROOT. + lemma_present = FLAG_NULL; + circumfix = FLAG_NULL; + onlyincompound = FLAG_NULL; + maxngramsugs = -1; // undefined + nosplitsugs = 0; + sugswithdots = 0; + keepcase = 0; + checksharps = 0; + substandard = FLAG_NULL; + fullstrip = 0; + */ + + private var _simpleFilterTable:Array; + private var _mapFilterTable:Array; + private var _iconvFilterTable:Array; //Contains conversion table for ICONV conversion + private var _oconvFilterTable:Array;//Contains conversion table for OCONV conversion + private var _breakTable:Array;//Contains list of characters in BREAK rule + private var _aliasfTable:Array;//Contains conversion table for AF rule + /* internal use only properties. */ + private var _pfxEntry:PrefixEntry; + private var _sfxEntry:SuffixEntry; + private var _optSfxEntry:OptimizedSuffixEntry; + private var _optPfxEntry:OptimizedPrefixEntry; + private var _dictMgr:DictionaryManager; + + public function LinguisticRule() + { + + this._prefixFlagTable = new Array() + this._prefixKeyTable = new Array(); + this._suffixFlagTable = new Array(); + this._suffixKeyTable = new Array(); + this._optSuffixKeyTable = new Dictionary(true); + this._optPrefixKeyTable = new Dictionary(true); + + this._simpleFilterTable = new Array(); + this._mapFilterTable = new Array(); + this._iconvFilterTable=new Array(); + this._oconvFilterTable=new Array(); + this._breakTable=new Array();//We are not adding any break points by default. Hunspell C does this for -, ^-, -$ + this._aliasfTable=new Array(); +// this._phoneTable=new PhoneticTable(); + + + /* init the attributes */ + this.noSuggest = InternalConstants.FLAG_NULL; + this.tryString= null; + this.keyString= null; + this.ignoredChars = null; + this.wordChars = null; + this.version = null; + this.languageCode = null; + this.forbiddenWord = InternalConstants.FORBIDDENWORD; + this.needAffix=InternalConstants.FLAG_NULL; + this.circumfix=InternalConstants.FLAG_NULL; + this.maxNgramSuggestions = -1; // undefined + this.nosplitSuggestions = 0; + this.suggestionsWithDots = 0; + this.fullStrip = 0; + this.keepCase = 0; + this.onlyInCompound = 0; + this.flagMode = InternalConstants.FLAG_CHAR; + this._contClasses= new Dictionary; + /* */ + + + this._dictMgr = null; + + + } + + /* + * Deprecated function for now... + * History: + * A pre-version of implementation for error detection. After I optimized the code for performance, + * I drop this function by that time, but you know performance meassuring is a tricky problem... + * ToDo: Need a revisit when we implementing complex-affix support and compound-word support. + */ + // check if word with affixes is correctly spelled + public function affixCheck( word:String, needFlag:int, inCompound:int ):HashEntry { + var rv:HashEntry = null; + // check all prefixes (also crossed with suffixes if allowed) + rv = optSuffixCheck(word, needFlag, inCompound); + if( rv ) return rv; + // if still not found check all suffixes + rv = optPrefixCheck(word, 0, null, inCompound, needFlag); + return rv; + } + + // This function checks if word with affixes is correctly spelled. + public function affixCheck2( word:String, needFlag:int, inCompound:int ):HashEntry { + var rv:HashEntry = null; + if ( word.length <2 ) return rv; + // check onelevel prefix case or one level prefix+one level suffix: un->run or under->taker (note: hypothetical words) also will check milli->litre->s and d'->autre->s + rv = optPrefixCheck2(word, inCompound, needFlag); + if( rv ) return rv; + // check all one level suffix drink->able or drink->s + rv = optSuffixCheck2(word,0,null, needFlag, inCompound); + + + //double affix checking + if(this.haveContClass) + { + if(rv) return rv; + //check all 2 level suffixes case: drink->able->s + rv= optTwoSuffixCheck(word,0, null, needFlag,0); + + if(rv) return rv; + //check prefix and then 2 level suffix case un->drink->able->s + rv= optTwoPrefixCheck(word, 0, needFlag); + + } + + return rv; + } + + + /* + * Deprecated function for now... + * History: + * A pre-version of implementation for error detection. After I optimized the code for performance, + * I drop this function by that time, but you know performance meassuring is a tricky problem... + * ToDo: Need a revisit when we implementing complex-affix support and compound-word support. + */ + public function optPrefixCheck(word:String, sfxopts:int, ppfx:AffixEntry, needFlag:int, inCompound:int) :HashEntry { + var rv:HashEntry = null; + var tmpWord:String; + // first handle the special case of 0 length prefixes + if ( _optPrefixKeyTable[''] != undefined ) { + _optPfxEntry = _optPrefixKeyTable['']; + while ( _optPfxEntry ) { + // fogemorpheme + // permit prefixes in compounds + // check prefix + rv = _optPfxEntry.checkWord(word, sfxopts, ppfx, inCompound, needFlag); + if ( rv ) { + return rv; + } + _optPfxEntry = _optPfxEntry.nextElementWithKey; + } + } + + // now handle the general case + for ( var i:int =1; i < word.length ; ++i ) { + tmpWord = word.substr(0,i); + if ( _optPrefixKeyTable[tmpWord] != undefined ) { + _optPfxEntry = _optPrefixKeyTable[tmpWord]; + // fogemorpheme + // permit prefixes in compounds + // check prefix + while ( _optPfxEntry ) { + rv = _optPfxEntry.checkWord(word, sfxopts, ppfx, inCompound, needFlag); + if ( rv) { + return rv; + } + _optPfxEntry = _optPfxEntry.nextElementWithKey; + } + } + } + + return rv; + } + +// This function checks one level prefix OR one level prefix+ one level suffix + public function optPrefixCheck2(word:String, inCompound:int, needFlag:int) :HashEntry { + var rv:HashEntry = null; + var tmpWord:String; + var i:int; + var locOptPfxEntry:OptimizedPrefixEntry=null;//local optimised prefix entry added because we are adding optTwoPrefixCheck + // first handle the special case of 0 length prefixes + if ( _optPrefixKeyTable[''] != undefined ) { + for ( i=0; i<_optPrefixKeyTable[''].length; ++i ) { + locOptPfxEntry=_optPrefixKeyTable[''][i]; + // fogemorpheme + // permit prefixes in compounds + // check prefix + while ( locOptPfxEntry ) { + rv = locOptPfxEntry.checkWord2(word, inCompound, needFlag); + if ( rv) { + return rv; + } + locOptPfxEntry = locOptPfxEntry.nextElementWithKey; + } + } + } + + // now handle the general case + var firstKeyStr:String = word.charAt(0); + var secondKeyNum:Number = word.charCodeAt(1); + var breakFlag:Boolean = false; + if ( _optPrefixKeyTable[firstKeyStr] != undefined ) { + for ( i=0; i< _optPrefixKeyTable[firstKeyStr].length; ++i ) { + locOptPfxEntry=_optPrefixKeyTable[firstKeyStr][i]; + if ( (locOptPfxEntry.affixKey.length!=1) ) { + if ( locOptPfxEntry.affixKey.charCodeAt(1)> secondKeyNum ) + break; + if ( locOptPfxEntry.affixKey.charCodeAt(1)< secondKeyNum) { + if (breakFlag) break; + else continue; + } + breakFlag = true; + } + if (word.indexOf(locOptPfxEntry.affixKey) != 0) + continue; + // fogemorpheme + // permit prefixes in compounds + // check prefix + while ( locOptPfxEntry ) { + rv = locOptPfxEntry.checkWord2(word, inCompound, needFlag); + if ( rv) { + return rv; + } + locOptPfxEntry = locOptPfxEntry.nextElementWithKey; + } + } + } + + return rv; + } + + // This is a new function added to include one level prefix checking followed by two level suffix checking + public function optTwoPrefixCheck(word:String, inCompound:int, needFlag:int) :HashEntry { + var rv:HashEntry = null; + //pfx=null;//TODO:Need to figure these out, seems they will be needed for compound rules. keeping for some time + //sfxrevkey=null;//TODO:Need to figure these out, seems they will be needed for compound rules. keeping for some time + var tmpWord:String; + var i:int; + var locOptPfxEntry:OptimizedPrefixEntry=null;//local optimised prefix entry + // first handle the special case of 0 length prefixes + if ( _optPrefixKeyTable[''] != undefined ) { + for ( i=0; i<_optPrefixKeyTable[''].length; ++i ) { + locOptPfxEntry=_optPrefixKeyTable[''][i]; + + while ( locOptPfxEntry ) { + rv = locOptPfxEntry.checkTwoWord(word, inCompound, needFlag); + if ( rv) { + return rv; + } + locOptPfxEntry = locOptPfxEntry.nextElementWithKey; + } + } + } + + // now handle the general case + var firstKeyStr:String = word.charAt(0); + var secondKeyNum:Number = word.charCodeAt(1); + var breakFlag:Boolean = false; + if ( _optPrefixKeyTable[firstKeyStr] != undefined ) { + for ( i=0; i< _optPrefixKeyTable[firstKeyStr].length; ++i ) { + locOptPfxEntry=_optPrefixKeyTable[firstKeyStr][i]; + if ( (locOptPfxEntry.affixKey.length!=1) ) { + if ( locOptPfxEntry.affixKey.charCodeAt(1)> secondKeyNum ) + break; + if ( locOptPfxEntry.affixKey.charCodeAt(1)< secondKeyNum) { + if (breakFlag) break; + else continue; + } + breakFlag = true; + } + if (word.indexOf(locOptPfxEntry.affixKey) != 0) + continue; + while ( locOptPfxEntry ) { + rv = locOptPfxEntry.checkTwoWord(word, inCompound, needFlag); + if ( rv) { + return rv; + } + locOptPfxEntry = locOptPfxEntry.nextElementWithKey; + } + } + } + return rv;//this most certainly will return NULL + } + + + /* + * Deprecated function for now... + * History: + * A pre-version of implementation for error detection. After I optimized the code for performance, + * I drop this function by that time, but you know performance meassuring is a tricky problem... + * ToDo: Need a revisit when we implementing complex-affix support and compound-word support. + */ + public function optSuffixCheck( word:String, needFlag:int, inCompound:int):HashEntry { + var rv:HashEntry = null; + var tmpWord:String; + // first handle the special case of 0 length suffixes + if ( this._optSuffixKeyTable[''] != undefined ) { + _optSfxEntry = this._optSuffixKeyTable['']; + while ( _optSfxEntry ) { + // fogemorpheme + // permit prefixes in compounds + // check prefix + rv = _optSfxEntry.checkWord(word, inCompound, needFlag); + if ( rv ) { + return rv; + } + _optSfxEntry = _optSfxEntry.nextElementWithKey; + } + + } + // now handle the general case + for ( var i:int =word.length-1; i > 0 ; --i ) { + tmpWord = word.substr(i); + if ( _optSuffixKeyTable[tmpWord] != undefined ) { + _optSfxEntry = _optSuffixKeyTable[tmpWord]; + // fogemorpheme + // permit prefixes in compounds + // check prefix + while ( _optSfxEntry ) { + rv = _optSfxEntry.checkWord(word, inCompound, needFlag); + if ( rv) { + return rv; + } + _optSfxEntry = _optSfxEntry.nextElementWithKey; + } + } + } + + return rv; + } + +//This function takes care of all one level suffix stripping. This is called from other affix stripping functions also + public function optSuffixCheck2( word:String, sfxopts:int, ppfx:AffixEntry, needFlag:int, inCompound:int, cclass:int=0, pfxcclass:int=0):HashEntry { + var rv:HashEntry = null; + var tmpWord:String; + var locOptSfxEntry:OptimizedSuffixEntry=null;//local optimised suffic entry + // first handle the special case of 0 length suffixes + if ( this._optSuffixKeyTable[''] != undefined ) { + locOptSfxEntry=this._optSuffixKeyTable['']; + while ( locOptSfxEntry ) { + //if(!cclass|| locOptSfxEntry.contclass) + //{ + + + + // fogemorpheme + // permit prefixes in compounds + // check prefix + + //if((_optSfxEntry &&!(_optSfxEntry.contclass && HashEntry.TESTAFF(_optSfxEntry.contclass, this._needAffix)))||(ppfx&& !(ppfx.contclass && HashEntry.TESTAFF(ppfx.contclass,this._needAffix))))// needaffix on prefix or first suffix + //{ + + rv = locOptSfxEntry.checkWord2(word, sfxopts, ppfx, inCompound, needFlag, cclass, pfxcclass); + if ( rv ) { + _optSfxEntry = locOptSfxEntry;//WIll possibily needed in compound check + return rv; + } + // } + //} + locOptSfxEntry = locOptSfxEntry.nextElementWithKey; + } + + } + // now handle the general case + for ( var i:int =word.length-1; i >= 0 ; --i ) { + tmpWord = word.substr(i); + if ( _optSuffixKeyTable[tmpWord] != undefined ) { + locOptSfxEntry = (_optSuffixKeyTable[tmpWord] is OptimizedSuffixEntry)? _optSuffixKeyTable[tmpWord] : null; + // fogemorpheme + // permit prefixes in compounds + // check prefix + while ( locOptSfxEntry ) { + //if(_optSfxEntry &&HashEntry.TESTAFF(_optSfxEntry.contclass, this._needAffix)||(ppfx&& HashEntry.TESTAFF(ppfx.contclass,this._needAffix)))// needaffix on prefix or first suffix + //{ + + rv = locOptSfxEntry.checkWord2(word, sfxopts, ppfx, inCompound, needFlag, cclass, pfxcclass); + if ( rv) { + _optSfxEntry = locOptSfxEntry;//WIll possibily needed in compound check + return rv; + } + //} + locOptSfxEntry = locOptSfxEntry.nextElementWithKey; + } + } + } + + return rv; + } + + // This is a new function added to include two level suffix checking + public function optTwoSuffixCheck(word:String, sfxopts:int, ppfx:AffixEntry,needFlag:int,pfxcclass:int=0) :HashEntry { + var rv:HashEntry = null; + var tmpWord:String; + var locOptSfxEntry:OptimizedSuffixEntry;//local optimised suffic entry + // first handle the special case of 0 length suffixes + if ( this._optSuffixKeyTable[''] != undefined ) + { + locOptSfxEntry=this._optSuffixKeyTable['']; + while ( locOptSfxEntry ) + { + for(var j:int=0; locOptSfxEntry.flags && j<locOptSfxEntry.flags.length; j++) + { + if(this.contClasses[locOptSfxEntry.flags[j]]==true) + { //if this can be a possible contclass check furthur + rv = locOptSfxEntry.checkTwoWord(word, sfxopts, ppfx, needFlag, locOptSfxEntry.flags[j], pfxcclass ); + if (rv) + { + _optSfxEntry = locOptSfxEntry;//WIll possibily needed in compound check + return rv; + } + + } + } + // get next suffix entry from table + locOptSfxEntry = locOptSfxEntry.nextElementWithKey; + } + } + + //now handle the general case + for ( var i:int =word.length-1; i >= 0 ; --i ) + { + tmpWord = word.substr(i); + if ( _optSuffixKeyTable[tmpWord] != undefined ) + { + locOptSfxEntry = (_optSuffixKeyTable[tmpWord] is OptimizedSuffixEntry)? _optSuffixKeyTable[tmpWord] : null; + + while ( locOptSfxEntry ) + { + for(j=0;locOptSfxEntry.flags && j<locOptSfxEntry.flags.length; j++) + { + if(this.contClasses[locOptSfxEntry.flags[j]]==true) + { + //if this can be a possible contclass check furthur + rv = locOptSfxEntry.checkTwoWord(word, sfxopts, ppfx, needFlag,locOptSfxEntry.flags[j], pfxcclass ); + if ( rv) + { + _optSfxEntry = locOptSfxEntry;//WIll possibily needed in compound check + return rv; + } + } + } + locOptSfxEntry = locOptSfxEntry.nextElementWithKey; + } + } + } + + return rv;//will be null in most cases + + } + + /* + * Deprecated function for now... + * History: + * A pre-version of implementation for error detection. After I optimized the code for performance, + * I drop this function by that time, but you know performance meassuring is a tricky problem... + * ToDo: Need a revisit when we implementing complex-affix support and compound-word support. + */ + public function suffixCheck2( word:String, sfxopts:int, ppfx:AffixEntry, needFlag:int, inCompound:int):HashEntry { + var rv:HashEntry = null; + var tmpWord:String; + // first handle the special case of 0 length suffixes + if ( this._suffixKeyTable[''] != undefined ) { + _sfxEntry = this._suffixKeyTable['']; + while ( _sfxEntry ) { + // fogemorpheme + // permit prefixes in compounds + // check prefix + rv = _sfxEntry.checkWord(word, sfxopts, ppfx, inCompound, needFlag); + if ( rv ) { + return rv; + } + _sfxEntry = _sfxEntry.nextElementWithKey; + } + + } + // now handle the general case + for ( var i:int =word.length-1; i > 0 ; --i ) { + tmpWord = word.substr(i); + if ( _suffixKeyTable[tmpWord] != undefined ) { + _sfxEntry = _suffixKeyTable[tmpWord]; + // fogemorpheme + // permit prefixes in compounds + // check prefix + while ( _sfxEntry ) { + rv = _sfxEntry.checkWord(word, sfxopts, ppfx, inCompound, needFlag); + if ( rv) { + return rv; + } + _sfxEntry = _sfxEntry.nextElementWithKey; + } + } + } + + return rv; + } + + + /* + * Deprecated function for now... + * History: + * A pre-version of implementation for error detection. After I optimized the code for performance, + * I drop this function by that time, but you know performance meassuring is a tricky problem... + * ToDo: Need a revisit when we implementing complex-affix support and compound-word support. + */ + private function prefixCheck2(word:String, inCompound:int, needFlag:int) :HashEntry { + var rv:HashEntry = null; + var tmpWord:String; + // first handle the special case of 0 length prefixes + if ( _prefixKeyTable[''] != undefined ) { + _pfxEntry = _prefixKeyTable['']; + while ( _pfxEntry ) { + // fogemorpheme + // permit prefixes in compounds + // check prefix + rv = _pfxEntry.checkWord(word, inCompound, needFlag); + if ( rv ) { + return rv; + } + _pfxEntry = _pfxEntry.nextElementWithKey; + } + } + + // now handle the general case + for ( var i:int =1; i < word.length ; ++i ) { + tmpWord = word.substr(0,i); + if ( _prefixKeyTable[tmpWord] != undefined ) { + _pfxEntry = _prefixKeyTable[tmpWord]; + // fogemorpheme + // permit prefixes in compounds + // check prefix + while ( _pfxEntry ) { + rv = _pfxEntry.checkWord(word, inCompound, needFlag); + if ( rv) { + return rv; + } + _pfxEntry = _pfxEntry.nextElementWithKey; + } + } + } + + return rv; + } + + public function lookup(word:String ):HashEntry { + var he:HashEntry = null; + var i:int; + // look word in hash table + for ( i=0; i < this._dictMgr.dictonaryList.length && !he; ++i ) { + he = this._dictMgr.dictonaryList[i].getElement(word); + } + return he; + } + + public function set flagMode(value:int) :void { + this._flagMode = value; + } + + public function get flagMode():int { + return this._flagMode; + } + + public function set encoding(value:String) :void { + this._encoding = value; + } + + public function get encoding():String { + return this._encoding; + } + + public function set keepCase(value:Number):void { + this._keepCase = value; + } + + public function get keepCase():Number { + return this._keepCase; + } + + public function set haveContClass(value:Boolean):void { + this._haveContClass = value; + } + + public function get haveContClass():Boolean { + return this._haveContClass; + } + + public function set needAffix(value:Number):void { + this._needAffix = value; + } + public function get needAffix():Number { + return this._needAffix; + } + + + public function set circumfix(value:Number):void { + this._circumfix = value; + } + public function get circumfix():Number { + return this._circumfix; + } + + public function set onlyInCompound(value:Number):void { + this._onlyInCompound = value; + } + public function get onlyInCompound():Number { + return this._onlyInCompound; + } + + public function set dictionaryManager(value:DictionaryManager) :void { + this._dictMgr = value; + } + + public function get dictionaryManager():DictionaryManager { + return this._dictMgr; + } + + + public function set fullStrip(value:int):void { + this._fullStrip = value; + } + + public function get fullStrip():int { + return this._fullStrip; + } + + public function set suggestionsWithDots(value:int):void { + this._sugswithdots = value; + } + + public function get suggestionsWithDots():int { + return this._sugswithdots; + } + + public function set nosplitSuggestions(value:int ) :void { + this._nosplitsugs = value; + } + + public function get nosplitSuggestions():int { + return this._nosplitsugs; + } + + public function set maxNgramSuggestions(value:int ) :void { + this._maxngramsugs = value; + } + + public function get maxNgramSuggestions():int { + return this._maxngramsugs; + } + + public function set version(value:String) :void { + this._version = value; + } + + public function get version():String { + return this._version; + } + + public function set languageCode(value:String) :void { + this._languageCode = value; + } + + public function get languageCode():String { + return this._languageCode; + } + + public function set wordChars(value:String):void { + this._wordChars= value; + } + + public function get wordChars():String { + return this._wordChars; + } + + public function addMapFilter(mapString:String ):Boolean { + var mf:MapFilter = new MapFilter(mapString); + for ( var i:int; i< this._mapFilterTable.length; ++i ) { + if ( this._mapFilterTable[i].mapCharSet == mapString ) { + return false; + } + } + this._mapFilterTable.push(mf); + return true; + } + + public function addSimpleFilter(matchString:String, replacement:String):Boolean { + var sf:SimpleFilter = new SimpleFilter( matchString, replacement); + for ( var i:int; i< this._simpleFilterTable.length; ++i ) { + if ( (this._simpleFilterTable[i].matchString==matchString) && (this._simpleFilterTable[i].replacement==replacement ) ) { + return false; + } + } + this._simpleFilterTable.push(sf); + return true; + } + + + //--adding to iconv/oconv table + + public function addConvFilter(matchString:String, replacement:String, ioflag:Boolean):Boolean { + var convTable:Array; + convTable=(ioflag==true)?this._iconvFilterTable:this._oconvFilterTable; + for ( var i:int; convTable && i< convTable.length; ++i ) { + if ( (convTable[i].matchString==matchString) && (convTable[i].replacement==replacement ) ) { + return false; + } + } + var sf:SimpleFilter = new SimpleFilter( matchString, replacement); + convTable.push(sf); + return true; + } + + public function addAffixEntry(affixFlag:int, stripString:String, affixValue:String, conditionsStr:String, morph:String = "", permission:Boolean = false, affixType:int = 0, contclass:String=null):Boolean{ + if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false; + if ( affixType == 0 ) { + if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false; + var pfxEntry:PrefixEntry = new PrefixEntry(affixFlag,stripString,affixValue,conditionsStr,morph,permission,contclass); + pfxEntry.attributeManager = this; + addPrefixEntry(pfxEntry); + addOptPrefixEntry(pfxEntry); + }else { + if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false; + var sfxEntry:SuffixEntry = new SuffixEntry(affixFlag,stripString,affixValue,conditionsStr,morph,permission,contclass); + sfxEntry.attributeManager = this; + addSuffixEntry(sfxEntry); + addOptSuffixEntry(sfxEntry); + } + return true; + } + + private function addOptPrefixEntry(pfxEntry:PrefixEntry):Boolean { + var optPfxEntry:OptimizedPrefixEntry + var hashKey:String = pfxEntry.affixKey.charAt(0); + optPfxEntry = new OptimizedPrefixEntry(pfxEntry); + optPfxEntry.attributeManager = this; + //insert prefix key table.... + if ( _optPrefixKeyTable[hashKey] == undefined ) { + _optPrefixKeyTable[hashKey] = new Array(); + _optPrefixKeyTable[hashKey].push(optPfxEntry); + } + else { + for each( var optPfxKeyEntry:OptimizedPrefixEntry in _optPrefixKeyTable[hashKey] ){ + if ( optPfxKeyEntry.affixKey == pfxEntry.affixKey ) { + while( optPfxKeyEntry.nextElementWithKey != null ) { + if ( optPfxKeyEntry.isSimilarObject(pfxEntry) ) { + optPfxKeyEntry.extendObject(pfxEntry); + return true; + } + optPfxKeyEntry = optPfxKeyEntry.nextElementWithKey; + } + if ( optPfxKeyEntry.isSimilarObject(pfxEntry) ) { + optPfxKeyEntry.extendObject(pfxEntry); + return true; + } + optPfxKeyEntry.nextElementWithKey = optPfxEntry; + return true; + } + } + _optPrefixKeyTable[hashKey].push(optPfxEntry); + _optPrefixKeyTable[hashKey].sortOn("affixKey"); + } + return true; + + } + + + private function addOptSuffixEntry(sfxEntry:SuffixEntry):Boolean { + var optSfxEntry:OptimizedSuffixEntry + + //insert suffix key table.... + if ( _optSuffixKeyTable[sfxEntry.affixKey] == undefined ) { + optSfxEntry = new OptimizedSuffixEntry(sfxEntry); + optSfxEntry.attributeManager = this; + _optSuffixKeyTable[sfxEntry.affixKey] = optSfxEntry; + } + else { + var optSfxKeyEntry:OptimizedSuffixEntry = _optSuffixKeyTable[sfxEntry.affixKey]; + while( optSfxKeyEntry.nextElementWithKey != null ) { + if ( optSfxKeyEntry.isSimilarObject(sfxEntry) ) { + optSfxKeyEntry.extendObject(sfxEntry); + return true; + } + optSfxKeyEntry = optSfxKeyEntry.nextElementWithKey; + } + if ( optSfxKeyEntry.isSimilarObject(sfxEntry) ) { + optSfxKeyEntry.extendObject(sfxEntry); + return true; + } + optSfxEntry = new OptimizedSuffixEntry(sfxEntry); + optSfxEntry.attributeManager = this; + optSfxKeyEntry.nextElementWithKey = optSfxEntry; + } + return true; + + } + + + + private function addPrefixEntry(pfxEntry:PrefixEntry):Boolean { + // We may combine prefix/suffix insertion into one function in the future, it could be good for reduce the code size. + // Since may there is some difference between prefix and suffix, so leave it with different class and different table.... + // need better consideration for performance and code style in next step... + var flagChar:String; + flagChar = String.fromCharCode(pfxEntry.flag); + // insert prefix flag table... + if ( _prefixFlagTable[flagChar] == undefined ) + _prefixFlagTable[flagChar] = pfxEntry; + else { + var pfxFlagEntry:PrefixEntry = _prefixFlagTable[flagChar]; + while( pfxFlagEntry.nextElementWithFlag != null ) { + pfxFlagEntry = pfxFlagEntry.nextElementWithFlag; + } + pfxFlagEntry.nextElementWithFlag = pfxEntry; + } + + //insert prefix key table.... + if ( _prefixKeyTable[pfxEntry.affixKey] == undefined ) + _prefixKeyTable[pfxEntry.affixKey] = pfxEntry; + else { + var pfxKeyEntry:PrefixEntry = _prefixKeyTable[pfxEntry.affixKey]; + while( pfxKeyEntry.nextElementWithKey != null ) { + pfxKeyEntry = pfxKeyEntry.nextElementWithKey; + } + pfxKeyEntry.nextElementWithKey = pfxEntry; + } + return true; + } + + private function addSuffixEntry(sfxEntry:SuffixEntry ):Boolean { + // We may combine prefix/suffix insertion into one function in the future, it could be good for reduce the code size. + // Since may there is some difference between prefix and suffix, so leave it with different class and different table.... + // need better consideration for performance and code style in next step... + var flagChar:String; + flagChar = String.fromCharCode(sfxEntry.flag); + // insert suffix flag table... + if ( _suffixFlagTable[flagChar] == undefined ) + _suffixFlagTable[flagChar] = sfxEntry; + else { + var sfxFlagEntry:SuffixEntry = _suffixFlagTable[flagChar]; + while( sfxFlagEntry.nextElementWithFlag != null ) { + sfxFlagEntry = sfxFlagEntry.nextElementWithFlag; + } + sfxFlagEntry.nextElementWithFlag = sfxEntry; + } + + //insert suffix key table.... + if ( _suffixKeyTable[sfxEntry.affixKey] == undefined ) + _suffixKeyTable[sfxEntry.affixKey] = sfxEntry; + else { + var sfxKeyEntry:SuffixEntry = _suffixKeyTable[sfxEntry.affixKey]; + while( sfxKeyEntry.nextElementWithKey != null ) { + sfxKeyEntry = sfxKeyEntry.nextElementWithKey; + } + sfxKeyEntry.nextElementWithKey = sfxEntry; + } + + return true; + } + + /* + * Deprecated function for now... + * History: + * A pre-version of implementation for error detection. After I optimized the code for performance, + * I drop this function by that time, but you know performance meassuring is a tricky problem... + * ToDo: Need a revisit when we implementing complex-affix support and compound-word support. + */ + public function addAffixEntry2(affixFlag:int, stripString:String, affixValue:String, conditionsStr:String, morph:String = "", permission:Boolean = false, affixType:int = 0):Boolean{ + if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false; + if ( affixType == 0 ) { + if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false; + var pfxEntry:PrefixEntry = new PrefixEntry(affixFlag,stripString,affixValue,conditionsStr,morph,permission); + pfxEntry.attributeManager = this; + addPrefixEntry(pfxEntry); + }else { + if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false; + var sfxEntry:SuffixEntry = new SuffixEntry(affixFlag,stripString,affixValue,conditionsStr,morph,permission); + sfxEntry.attributeManager = this; + addSuffixEntry(sfxEntry); + } + return true; + } + + public function get prefixFlagTable():Array { + return this._prefixFlagTable; + } + + public function get prefixKeyTable():Array { + return this._prefixKeyTable; + } + + public function get suffixFlagTable():Array { + return this._suffixFlagTable; + } + + public function get suffixKeyTable():Array { + return this._suffixKeyTable; + } + + public function set forbiddenWord(value:Number) :void { + this._forbiddenWord = value; + } + + public function get forbiddenWord():Number { + return this._forbiddenWord; + } + + public function set ignoredChars(value:String ) :void { + this._ignoredChars = value; + } + + public function get ignoredChars():String { + return this._ignoredChars; + } + + public function set keyString(value:String):void { + this._keyString = value; + } + + public function get keyString():String { + if ( this._keyString == null ) this._keyString=InternalConstants.SPELL_KEYSTRING; + return this._keyString; + } + + public function set tryString(value:String):void { + this._tryString = value; + } + + public function get tryString():String { + return this._tryString; + } + + public function get contClasses():Dictionary { + return _contClasses; + } + + + public function set noSuggest(value:Number ):void { + this._noSuggest = value; + } + public function get noSuggest():Number { + return this._noSuggest; + } + + public function get simpleFilterTable():Array { + return this._simpleFilterTable; + } + + public function get iconvFilterTable():Array { + return this._iconvFilterTable; + } + + public function get oconvFilterTable():Array { + return this._oconvFilterTable; + } + +/* public function get phoneTable():PhoneticTable { + return this._phoneTable; + } +*/ + public function get breakTable():Array { + return this._breakTable; + } + public function get aliasfTable():Array{ + return this._aliasfTable; + } + + + public function get mapFilterTable():Array { + return this._mapFilterTable; + } + + /*This function is used for supporting ICONV/OCONV rule. This function is called whenever an input or output conversion is needed.*/ + public function conv(word:String,convWord:Array,ioflag:Boolean):Boolean{ + var searchIndex:int=0; + var change:Boolean=false; + var wspace:String; + var convTable:Array=(ioflag)?this._iconvFilterTable:this._oconvFilterTable; + if ( (convTable==null) || (convTable.length == 0) ) return false; + for ( var i:int = 0; i < convTable.length; ++i ) { + while ( (searchIndex = word.indexOf( convTable[i].matchString,searchIndex)) != -1 ){ + searchIndex = searchIndex + convTable[i].matchString.length; + wspace = word.substr(0, searchIndex-convTable[i].matchString.length) + + convTable[i].replacement + + word.substr(searchIndex); + if(wspace) + word=wspace; + change=true; + } + + } + convWord.push(wspace); + return change; + } + + } +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/flex-utilities/blob/4e4f9830/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SpellingInfo.as ---------------------------------------------------------------------- diff --git a/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SpellingInfo.as b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SpellingInfo.as new file mode 100644 index 0000000..2b874dd --- /dev/null +++ b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SpellingInfo.as @@ -0,0 +1,32 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// + + +package com.adobe.linguistics.spelling.core +{ + public class SpellingInfo + { + public var Info:Number; + public function SpellingInfo(information:Number) + { + Info=information; + } + + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/flex-utilities/blob/4e4f9830/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyDictionary.as ---------------------------------------------------------------------- diff --git a/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyDictionary.as b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyDictionary.as new file mode 100644 index 0000000..4b80d7c --- /dev/null +++ b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyDictionary.as @@ -0,0 +1,207 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// + + + +package com.adobe.linguistics.spelling.core +{ + + import com.adobe.linguistics.spelling.core.container.HashTable; + import com.adobe.linguistics.spelling.core.env.InternalConstants; + import com.adobe.linguistics.spelling.core.error.*; + import com.adobe.linguistics.spelling.core.utils.StringUtils; + + import flash.utils.Dictionary; + + + public class SquigglyDictionary + { + private var _hashtbl:HashTable; + private var _forbiddenword:int; + private var _ignoredCharSet:String; + private var _flag_mode:int; + private var _languageCode:String; + + + + public function SquigglyDictionary(attrMgr:LinguisticRule) + { + _hashtbl = new HashTable( true ); // useWeakReferences:Boolean = true + + if ( !attrMgr) { + _forbiddenword = InternalConstants.FORBIDDENWORD; + _ignoredCharSet = null; + _flag_mode = InternalConstants.FLAG_CHAR; + _languageCode = null; + } + } + + private function set forbiddenword(value:int ) :void { + this._forbiddenword = value; + } + + public function get forbiddenword():int { + return this._forbiddenword; + } + + private function set ignoredCharSet(value:String):void { + this._ignoredCharSet = value; + } + + public function get ignoredCharSet():String { + return this._ignoredCharSet; + } + + private function set flag_mode(value:int):void { + this._flag_mode = value; + } + + public function get flag_mode():int { + return this._flag_mode; + } + + private function set languageCode(value:String ) :void { + this._languageCode = value; + } + + public function get languageCode():String { + return this._languageCode; + } + + public function containsKey(key:String ):Boolean + { + return _hashtbl.containsKey(key ); + } + + public function getElement( key:String ):HashEntry { + var res:* = _hashtbl.getElement(key ); + return (res is HashEntry) ? res:null; +// return _hashtbl.getElement(key ); + } + + public function put(key:String, affixString:String=null, description:String = null):Boolean { + return addWord( key, affixString, description ); + } + + public function get dictionary():Dictionary { + return this._hashtbl.hashMap; + } + + public function filter( callback:Function, thisObject:* = null):Array { + var res:Array = new Array(); + var index:int; + var dict:Dictionary = this._hashtbl.hashMap; + for ( var key:* in dict ) { + if ( callback( key, index, res ) ) { + res.push( key ); + } + } + + return (res.length == 0) ? null: res; + } + + + public function addWord( word:String, affix:String = null, desc:String = null ) :Boolean { + var res:Boolean = false; + if ( word == null ) return false; + var captype:int = StringUtils.getCapType(word); + if ( addWordWithAffix(word,affix,desc,false ) ) + res = true; + addHiddenCapitalizedWord(word,captype, affix,desc); + return res; + } + + private function addHiddenCapitalizedWord( word:String, captype:int, affix:String=null, desc:String=null ) :Boolean { + // add inner capitalized forms to handle the following allcap forms: + // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG + // Allcaps with suffixes: CIA's -> CIA'S + if (((captype == InternalConstants.HUHCAP) || (captype == InternalConstants.HUHINITCAP) ||((captype == InternalConstants.ALLCAP) && (affix != null))) && + !((affix != null) && HashEntry.TESTAFF(affix, _forbiddenword))) { + affix += String.fromCharCode(InternalConstants.ONLYUPCASEFLAG); + word = word.toLocaleLowerCase(); + word = word.charAt(0).toLocaleUpperCase() + word.substr(1); + addWordWithAffix(word,affix,desc,true); + } + return true; + } + + private function addWordWithAffix( word:String, affix:String, desc:String, onlyupcase:Boolean ):Boolean { + var upcasehomonym:Boolean = false; + if (_ignoredCharSet != null) { + word = StringUtils.removeIgnoredChars(word, _ignoredCharSet); + } +//ToDo: the following comment should be removed after we have complex-affix support. +// +// if (complexprefixes) { +// reverseword(word); +// } +// hp->var = H_OPT; +// if (aliasm) { +// hp->var += H_OPT_ALIASM; +// store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc))); +// } else { +// strcpy(hpw + wbl + 1, desc); +// if (complexprefixes) { +// if (utf8) reverseword_utf(HENTRY_DATA(hp)); +// else reverseword(HENTRY_DATA(hp)); +// } +// } +// if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON; + + if ( _hashtbl.containsKey(word) ) { + + var hentry:HashEntry = _hashtbl.getElement(word); + while ( hentry.next != null ) { + // remove hidden onlyupcase homonym + if ( !onlyupcase ) { + if ( (hentry.affixFlagVector != null) && hentry.testAffix(InternalConstants.ONLYUPCASEFLAG) ) { + hentry.affixFlagVector = affix; + hentry.variableFields = desc; /* need a better implementation,refer the beginning of this function */ + return true; + } + }else { + upcasehomonym = true; + } + hentry = hentry.next; + } + // remove hidden onlyupcase homonym + if ( !onlyupcase ) { + if ( (hentry.affixFlagVector != null) && hentry.testAffix(InternalConstants.ONLYUPCASEFLAG) ) { + hentry.affixFlagVector = affix; + hentry.variableFields = desc; /* need a better implementation,refer the beginning of this function */ + return true; + } + }else { + upcasehomonym = true; + } + + if ( !upcasehomonym ) { + hentry.addEntry(affix,desc); + return true; + }else + return false; + }else { + _hashtbl.put(word, new HashEntry(affix,desc) ); + return true; + } + } + + + } +} http://git-wip-us.apache.org/repos/asf/flex-utilities/blob/4e4f9830/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyEngine.as ---------------------------------------------------------------------- diff --git a/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyEngine.as b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyEngine.as new file mode 100644 index 0000000..d7fdc5c --- /dev/null +++ b/Squiggly/main/SpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyEngine.as @@ -0,0 +1,426 @@ +//////////////////////////////////////////////////////////////////////////////// +// +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//////////////////////////////////////////////////////////////////////////////// + + +package com.adobe.linguistics.spelling.core +{ + + import com.adobe.linguistics.spelling.core.env.ExternalConstants; + import com.adobe.linguistics.spelling.core.env.InternalConstants; + import com.adobe.linguistics.spelling.core.utils.*; + + public class SquigglyEngine + { + + private var _ignoreCappedWord:Boolean; // Hello is always correct + private var _ignoreAllUpperCase:Boolean; // HELLO is always correct + private var _ignoreWordWithNumber:Boolean; // win2003 is always correct + private var wordBreak:Array; // Used to hold BREAK characters for BREAK rule + private const SPELL_COMPOUND:int = (1 << 0); + private const SPELL_FORBIDDEN:int = (1 << 1); + private const SPELL_ALLCAP:int = (1 << 2); + private const SPELL_NOCAP:int = (1 << 3); + private const SPELL_INITCAP:int = (1 << 4); + + private const MAXDIC:int = 20; + private const MAXSHARPS:int = 5; + + private var attributeMgr:LinguisticRule; + private var dictMgr:DictionaryManager; + private var sugestionMgr:SuggestionManager; + private var encoding:String; + private var wordbreak:Array;//an Array that holds the word breaks + private var langCode:int; + private var complexPrefixes:int; + private var maxWordLength:int; + + public function SquigglyEngine( rule:LinguisticRule, dict:SquigglyDictionary ) + { + if ( rule == null ) throw new Error("illegal argument for constructor", 200901); + if ( dict == null ) throw new Error("illegal argument for constructor", 200901); + + maxWordLength = InternalConstants.MAXWORDLEN; + + dictMgr = new DictionaryManager(); + dictMgr.addDictionary(dict); + attributeMgr = rule; + attributeMgr.dictionaryManager = dictMgr; + sugestionMgr = new SuggestionManager( rule, false); + this.wordbreak=attributeMgr.breakTable; + this.ignoreWordWithNumber = false; + this.ignoreCappedWord = false; + this.ignoreAllUpperCase = false; + + } + + public function set ignoreWordWithNumber( value:Boolean):void { + this._ignoreWordWithNumber =value; + } + public function get ignoreWordWithNumber():Boolean { + return this._ignoreWordWithNumber; + } + + public function set ignoreCappedWord(value:Boolean):void { + this._ignoreCappedWord = value; + } + public function get ignoreCappedWord():Boolean { + return this._ignoreCappedWord; + } + + public function set ignoreAllUpperCase(value:Boolean ):void { + this._ignoreAllUpperCase = value; + } + public function get ignoreAllUpperCase():Boolean { + return this._ignoreAllUpperCase; + } + + public function set fastMode(value:Boolean ) :void { + this.sugestionMgr.fastMode = value; + } + + public function get fastMode():Boolean { + return this.sugestionMgr.fastMode; + } + + public function addDictionary( dict:SquigglyDictionary ) : Boolean { + return dictMgr.addDictionary(dict); + } + + public function spell( word:String ) :Boolean { + if ( word.length > maxWordLength ) return false; + + word = StringUtils.normalize(word); + + var captype:int = InternalConstants.NOCAP; + var hasNumber:Boolean =false; //assuming that there are no numbers; + var abbv:int = 0; + var i:int; + var rv:HashEntry = null; + var info:SpellingInfo = new SpellingInfo(0); + var wspace:String; + // input conversion USING ICONV TABLE +/* //Commented code is a unit test code + var teststr:String="maruÌviÌaÌ "; + var teststr2:String; +this.attributeMgr.conv(teststr,convWord,true); + teststr2=convWord.pop(); + if(teststr2){ + trace("Called Word "+teststr+"converted word "+teststr2); + } + else + trace("NUUUllll"); +*/ + + var convWord:Array=new Array; + if(this.attributeMgr && this.attributeMgr.iconvFilterTable && this.attributeMgr.iconvFilterTable.length!=0){ + this.attributeMgr.conv(word,convWord,InternalConstants.CONV_ICONV); + wspace=convWord.pop(); + if(wspace) word=wspace; + } + + // first skip over any leading or trailing blanks + word = StringUtils.trim( word ); + + // now strip off any trailing periods (recording their presence) + for ( i = word.length-1; (i>=0) && (word.charCodeAt(i) == 46) ; --i ) { // '.' + abbv++; + } + word = word.substr(0, word.length- abbv ); + captype = StringUtils.getCapType(word); + hasNumber=StringUtils.getHasNumber(word); + if ( (dictMgr.isEmpty()) || (word.length == 0) ) return false; + + // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.) + const NBEGIN:int = 0, NNUM:int=1, NSEP:int=2; + var nstate:int = NBEGIN; + var charCode:int; + for ( i=0 ; i < word.length ; ++i ) { + charCode = word.charCodeAt(i); + if ( (charCode <= 57 ) && ( charCode >= 48) ) { // '0' to '9' + nstate = NNUM; + }else if ( (charCode==44) || (charCode==45) || (charCode==46) ) { //',' or '.' or '-' + if ( (nstate == NSEP) || ( i==0 ) ) return false; + nstate = NSEP; + }else break; + } + if ( (i==word.length) && ( nstate == NNUM ) ) return true;//checks if all are just numbers + // ignore word with Number. + if ( ignoreWordWithNumber && hasNumber)return true;//Ignore word with numbers! + + // ignore cappitalized word or ignore all upper case word. + if ( (ignoreCappedWord &&( (captype&InternalConstants.HUHINITCAP) || (captype&InternalConstants.INITCAP))&&(hasNumber==false) ) || (ignoreAllUpperCase&&(captype & InternalConstants.ALLCAP)&&(hasNumber==false)) ) return true; //return only if it does not have number + + + switch(captype) { + case InternalConstants.HUHCAP: + case InternalConstants.HUHINITCAP: + case InternalConstants.NOCAP: + rv = checkWord(word,info); + if ( (abbv!=0) && (rv == null ) ) { + word += "."; + rv = checkWord(word,info); + } + break; + case InternalConstants.ALLCAP: + rv = checkWord(word,info); + if( rv ) break; + if ( (abbv!=0 ) ) { + word +="."; + rv = checkWord(word,info); + if ( rv ) break; + } + // ToDo: Spec. prefix handling for Catalan, French, Italian: + // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia). + // need better understand... + + //sharps handle.... + + word = word.charAt(0).toUpperCase()+word.slice(1).toLocaleLowerCase(); + + case InternalConstants.INITCAP: + if (captype == InternalConstants.INITCAP) info.Info +=ExternalConstants.SPELL_INITCAP; + wspace = word.toLocaleLowerCase(); + rv = checkWord(word,info); + if (captype == InternalConstants.INITCAP) info.Info -=ExternalConstants.SPELL_INITCAP; + + // forbid bad capitalization + // (for example, ijs -> Ijs instead of IJs in Dutch) + // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag) + if (info.Info & ExternalConstants.SPELL_FORBIDDEN) { + rv = null; + } + + if ( rv && (captype == InternalConstants.ALLCAP ) ) { + if ( attributeMgr && rv.affixFlagVector && attributeMgr.keepCase && rv.testAffix(attributeMgr.keepCase) ) rv = null; + } + if ( rv) break; + + rv = checkWord(wspace,info); + if ( !rv && abbv ) { + wspace += "."; + rv = checkWord(wspace,info); + if ( !rv) { + word += "."; + if (captype == InternalConstants.INITCAP) info.Info +=ExternalConstants.SPELL_INITCAP; + rv = checkWord(word,info); + if (captype == InternalConstants.INITCAP) info.Info -=ExternalConstants.SPELL_INITCAP; + if ( rv && (captype == InternalConstants.ALLCAP ) ) { + if ( attributeMgr && rv.affixFlagVector && attributeMgr.keepCase && rv.testAffix(attributeMgr.keepCase) ) rv = null; + } + } + } + if ( rv && (captype == InternalConstants.ALLCAP ) ) { + if ( attributeMgr && rv.affixFlagVector && attributeMgr.keepCase && rv.testAffix(attributeMgr.keepCase) ) rv = null; + } + break; + default: + } + + if ( rv ) return true; + + //implementation break-table... recursive breaking at break points + + if(wordbreak){ + var nbr:int=0; + var parseArr:Array; + var searchIndex:int=0; + for(i=0; i<wordbreak.length;i++){ + //Search for number of break points in this word + searchIndex=0; + wspace=word; + while (wspace && ((searchIndex=wspace.indexOf(wordbreak[i])) != -1 )) { + nbr++; + if(nbr>InternalConstants.MAX_WORD_BREAKS) return false;//Limiting maximum Word breaks + if(searchIndex<word.length)wspace=wspace.substr(searchIndex+1); + } + } + + + for(var j:int=0; j<wordbreak.length;j++){ + + if(word.search(wordbreak[j])!=-1 && (parseArr=word.split(wordbreak[j]))!=null) + { + + for(i=0;i<parseArr.length;i++) + if(! spell(parseArr[i]) ) return false;//keep checking all parts of the input word. If any part is wrongly spelt send false + + return true;//no part is spelled wrong so send correct + } + + } + + } + + return false; + } + + public function suggest( word:String ) : Array { + if ( word.length > maxWordLength ) return null; + var captype:int = InternalConstants.NOCAP; + var capwords:int = 0; + + var abbv:int = 0; + var i:int,ns:int; + var wspace:String; + var slst:Array = new Array(); + var convWord:Array=new Array; + // input conversion USING ICONV TABLE + if(this.attributeMgr && this.attributeMgr.iconvFilterTable.length!=0){ + this.attributeMgr.conv(word,convWord,InternalConstants.CONV_ICONV); + wspace=convWord.pop(); + if(wspace)word=wspace; + } + + // first skip over any leading or trailing blanks + word = StringUtils.trim( word ); + // now strip off any trailing periods (recording their presence) + for ( i = word.length-1; (i>=0) && (word.charCodeAt(i) == 46) ; --i ) { // '.' + abbv++; + } + word = word.substr(0, word.length- abbv ); + captype = StringUtils.getCapType(word); + if ( (dictMgr.isEmpty()) || (word.length == 0) ) return null; + switch(captype) { + case InternalConstants.NOCAP: { + ns = sugestionMgr.suggest( slst, word, InternalConstants.NOCAP ); + break; + } + case InternalConstants.INITCAP:{ + capwords = 1; + ns = sugestionMgr.suggest( slst, word, InternalConstants.INITCAP ); + if ( ns == -1) break; + wspace = word.toLocaleLowerCase(); + ns = sugestionMgr.suggest( slst, wspace, InternalConstants.NOCAP ); + break; + } + case InternalConstants.HUHINITCAP:{ + capwords = 1; + } + case InternalConstants.HUHCAP: { // ToDo: still a lot of work... + ns = sugestionMgr.suggest( slst, word, InternalConstants.HUHCAP ); + break; + } + case InternalConstants.ALLCAP: { + wspace = word.toLocaleLowerCase(); + ns = sugestionMgr.suggest( slst, wspace, InternalConstants.NOCAP ); + if ( ns == -1) break; + if ( this.attributeMgr.keepCase && spell(word ) ) { + //ns = insert_sug(slst, wspace, ns); ToDo + } + wspace = word.charAt(0).toUpperCase()+word.slice(1).toLocaleLowerCase(); + ns = sugestionMgr.suggest( slst, wspace, InternalConstants.INITCAP ); + break; + } + } + + // try ngram approach since found nothing + if ( this.attributeMgr && (this.attributeMgr.maxNgramSuggestions != 0)) { + ns = sugestionMgr.nsuggest(slst,word); + } + + // try dash suggestion (Afo-American -> Afro-American) + + // capitalize + if (capwords) { + for ( i=0;i<slst.length; ++i ) { + slst[i] = slst[i].charAt(0).toUpperCase()+slst[i].slice(1); + } + } + + // expand suggestions with dot(s) + if ( abbv && this.attributeMgr.suggestionsWithDots ) { + for ( i=0;i<slst.length; ++i ) { + slst[i] += "."; + } + + } + + // remove bad capitalized and forbidden forms + + // remove original one + for ( i=0;i<slst.length;++i) { + if ( slst[i] == word ) + slst.splice(i,1); + } + + // remove duplications + + // output conversion + + if(this.attributeMgr && this.attributeMgr.oconvFilterTable && this.attributeMgr.oconvFilterTable.length!=0){ + for(i=0;i<slst.length;++i){ + if(this.attributeMgr.conv(slst[i],convWord,InternalConstants.CONV_OCONV)) + {wspace=convWord.pop();delete(slst[i]); slst[i]=wspace;} + } + } + + // if suggestions removed by nosuggest, onlyincompound parameters + + + return (slst.length!=0) ? slst :null; + } + + private function checkWord( word:String, info:SpellingInfo ):HashEntry { + var i:int; + var he:HashEntry = null; + if ( attributeMgr.ignoredChars ) { + word = StringUtils.removeIgnoredChars(word, attributeMgr.ignoredChars); + } + // word reversing wrapper for complex prefixes + /* + if(complexprefixes) { + word=reverseword(word); + } + */ + + // look word in hash table + for ( i=0; i < dictMgr.dictonaryList.length && !he; ++i ) { + he = dictMgr.dictonaryList[i].getElement(word); + // check forbidden and onlyincompound words + if ( he && (he.affixFlagVector != null) && + ((attributeMgr) && ( he.testAffix(attributeMgr.forbiddenWord))) + + ) { + // ToDo: LANG_hu section: set dash information for suggestions + return null; + } + // ToDo: he = next not needaffix, onlyincompound homonym or onlyupcase word +/* while (he && (he.affixFlagVector) && + ((attributeMgr.needAffix && testAffix(he.affixFlagVector, attributeMgr.needAffix)) || + (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || + (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)) + )) //he = he.next; should maintain a next homonym which is not being maintained as of now next_homonym; +*/ } + + // check with affixes + if ( !he && attributeMgr ) { + he = attributeMgr.affixCheck2(word,0,0); + //DO not allow affixed forms of forbidden words + if ( he && (he.affixFlagVector != null) && (attributeMgr) && he.testAffix(attributeMgr.forbiddenWord) ) { + // ToDo: LANG_hu section: set dash information for suggestions + return null; + } + } + + return he; + } + + + } +} \ No newline at end of file
