http://git-wip-us.apache.org/repos/asf/flex-utilities/blob/4e4f9830/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/LinguisticRule.as ---------------------------------------------------------------------- diff --git a/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/LinguisticRule.as b/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/LinguisticRule.as deleted file mode 100644 index 6ad78c6..0000000 --- a/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/LinguisticRule.as +++ /dev/null @@ -1,1118 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//////////////////////////////////////////////////////////////////////////////// - - -package com.adobe.linguistics.spelling.core -{ -// import com.adobe.linguistics.spelling.core.PhoneticTable; - import com.adobe.linguistics.spelling.core.env.*; - import com.adobe.linguistics.spelling.core.error.*; - import com.adobe.linguistics.spelling.core.rule.*; - import com.adobe.linguistics.spelling.core.utils.SimpleNumberParser; - - import flash.utils.Dictionary; - - - public class LinguisticRule - { - - - private var _encoding:String // ToDo, not sure how to handle this encoding stuff... - - private var snp:SimpleNumberParser = new SimpleNumberParser(); - - private var _prefixFlagTable:Array; - private var _prefixKeyTable:Array; - private var _suffixFlagTable:Array; - private var _suffixKeyTable:Array; - private var _optSuffixKeyTable:Dictionary; - private var _optPrefixKeyTable:Dictionary; -//these are attributes - private var _keyString:String; - private var _tryString:String; - private var _noSuggest:Number;// don't suggest words signed with NOSUGGEST flag - private var _forbiddenWord:Number; // forbidden word signing flag - private var _circumfix:Number=0; //Circumfix Flag - private var _ignoredChars:String; // letters + spec. word characters - private var _wordChars:String; //extends tokenizer of Hunspell command line interface with additional word character. For example, dot, dash, n-dash, numbers, percent sign are word character in Hungarian. - private var _languageCode:String; - private var _version:String; - private var _maxngramsugs:int = -1; // undefined - private var _nosplitsugs:int = 0; - private var _sugswithdots:int = 0; - private var _fullStrip:int; - private var _keepCase:Number; - private var _haveContClass:Boolean;//added to support (double) prefixes - - private var _flagMode:int; - private var _needAffix:Number; - private var _contClasses:Dictionary;//this is list of all possible contclasses - /* ToDo */ - - private var _onlyInCompound:Number = 0; -// private var _phoneTable:PhoneticTable; //phone table -/* - - -ToDO: should be removed after we have complex-affix support and compound-word support.. - - - pHMgr = ptr[0]; - alldic = ptr; - maxdic = md; - keystring = NULL; - trystring = NULL; - encoding=NULL; - utf8 = 0; - complexprefixes = 0; - maptable = NULL; - nummap = 0; - breaktable = NULL; - numbreak = 0; - reptable = NULL; - numrep = 0; - iconvtable = NULL; - oconvtable = NULL; - checkcpdtable = NULL; - // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN) - simplifiedcpd = 0; - numcheckcpd = 0; - defcpdtable = NULL; - numdefcpd = 0; - phone = NULL; - compoundflag = FLAG_NULL; // permits word in compound forms - compoundbegin = FLAG_NULL; // may be first word in compound forms - compoundmiddle = FLAG_NULL; // may be middle word in compound forms - compoundend = FLAG_NULL; // may be last word in compound forms - compoundroot = FLAG_NULL; // compound word signing flag - compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word - compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word - checkcompounddup = 0; // forbid double words in compounds - checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution) - checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds - checkcompoundtriple = 0; // forbid compounds with triple letters - simplifiedtriple = 0; // allow simplified triple letters in compounds (Schiff+fahrt -> Schiffahrt) - forbiddenword = FORBIDDENWORD; // forbidden word signing flag - nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag - lang = NULL; // language - langnum = 0; // language code (see http://l10n.openoffice.org/languages.html) - needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes - cpdwordmax = -1; // default: unlimited wordcount in compound words - cpdmin = -1; // undefined - cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words - cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX) - cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search) - cpdvowels_utf16_len=0; // vowels - pfxappnd=NULL; // previous prefix for counting the syllables of prefix BUG - sfxappnd=NULL; // previous suffix for counting a special syllables BUG - cpdsyllablenum=NULL; // syllable count incrementing flag - checknum=0; // checking numbers, and word with numbers - wordchars=NULL; // letters + spec. word characters - wordchars_utf16=NULL; // letters + spec. word characters - wordchars_utf16_len=0; // letters + spec. word characters - ignorechars=NULL; // letters + spec. word characters - ignorechars_utf16=NULL; // letters + spec. word characters - ignorechars_utf16_len=0; // letters + spec. word characters - version=NULL; // affix and dictionary file version string - havecontclass=0; // flags of possible continuing classes (double affix) - // LEMMA_PRESENT: not put root into the morphological output. Lemma presents - // in morhological description in dictionary file. It's often combined with PSEUDOROOT. - lemma_present = FLAG_NULL; - circumfix = FLAG_NULL; - onlyincompound = FLAG_NULL; - maxngramsugs = -1; // undefined - nosplitsugs = 0; - sugswithdots = 0; - keepcase = 0; - checksharps = 0; - substandard = FLAG_NULL; - fullstrip = 0; - */ - - private var _simpleFilterTable:Array; - private var _mapFilterTable:Array; - private var _iconvFilterTable:Array; //Contains conversion table for ICONV conversion - private var _oconvFilterTable:Array;//Contains conversion table for OCONV conversion - private var _breakTable:Array;//Contains list of characters in BREAK rule - private var _aliasfTable:Array;//Contains conversion table for AF rule - /* internal use only properties. */ - private var _pfxEntry:PrefixEntry; - private var _sfxEntry:SuffixEntry; - private var _optSfxEntry:OptimizedSuffixEntry; - private var _optPfxEntry:OptimizedPrefixEntry; - private var _dictMgr:DictionaryManager; - - public function LinguisticRule() - { - - this._prefixFlagTable = new Array() - this._prefixKeyTable = new Array(); - this._suffixFlagTable = new Array(); - this._suffixKeyTable = new Array(); - this._optSuffixKeyTable = new Dictionary(true); - this._optPrefixKeyTable = new Dictionary(true); - - this._simpleFilterTable = new Array(); - this._mapFilterTable = new Array(); - this._iconvFilterTable=new Array(); - this._oconvFilterTable=new Array(); - this._breakTable=new Array();//We are not adding any break points by default. Hunspell C does this for -, ^-, -$ - this._aliasfTable=new Array(); -// this._phoneTable=new PhoneticTable(); - - - /* init the attributes */ - this.noSuggest = InternalConstants.FLAG_NULL; - this.tryString= null; - this.keyString= null; - this.ignoredChars = null; - this.wordChars = null; - this.version = null; - this.languageCode = null; - this.forbiddenWord = InternalConstants.FORBIDDENWORD; - this.needAffix=InternalConstants.FLAG_NULL; - this.circumfix=InternalConstants.FLAG_NULL; - this.maxNgramSuggestions = -1; // undefined - this.nosplitSuggestions = 0; - this.suggestionsWithDots = 0; - this.fullStrip = 0; - this.keepCase = 0; - this.onlyInCompound = 0; - this.flagMode = InternalConstants.FLAG_CHAR; - this._contClasses= new Dictionary; - /* */ - - - this._dictMgr = null; - - - } - - /* - * Deprecated function for now... - * History: - * A pre-version of implementation for error detection. After I optimized the code for performance, - * I drop this function by that time, but you know performance meassuring is a tricky problem... - * ToDo: Need a revisit when we implementing complex-affix support and compound-word support. - */ - // check if word with affixes is correctly spelled - public function affixCheck( word:String, needFlag:int, inCompound:int ):HashEntry { - var rv:HashEntry = null; - // check all prefixes (also crossed with suffixes if allowed) - rv = optSuffixCheck(word, needFlag, inCompound); - if( rv ) return rv; - // if still not found check all suffixes - rv = optPrefixCheck(word, 0, null, inCompound, needFlag); - return rv; - } - - // This function checks if word with affixes is correctly spelled. - public function affixCheck2( word:String, needFlag:int, inCompound:int ):HashEntry { - var rv:HashEntry = null; - if ( word.length <2 ) return rv; - // check onelevel prefix case or one level prefix+one level suffix: un->run or under->taker (note: hypothetical words) also will check milli->litre->s and d'->autre->s - rv = optPrefixCheck2(word, inCompound, needFlag); - if( rv ) return rv; - // check all one level suffix drink->able or drink->s - rv = optSuffixCheck2(word,0,null, needFlag, inCompound); - - - //double affix checking - if(this.haveContClass) - { - if(rv) return rv; - //check all 2 level suffixes case: drink->able->s - rv= optTwoSuffixCheck(word,0, null, needFlag,0); - - if(rv) return rv; - //check prefix and then 2 level suffix case un->drink->able->s - rv= optTwoPrefixCheck(word, 0, needFlag); - - } - - return rv; - } - - - /* - * Deprecated function for now... - * History: - * A pre-version of implementation for error detection. After I optimized the code for performance, - * I drop this function by that time, but you know performance meassuring is a tricky problem... - * ToDo: Need a revisit when we implementing complex-affix support and compound-word support. - */ - public function optPrefixCheck(word:String, sfxopts:int, ppfx:AffixEntry, needFlag:int, inCompound:int) :HashEntry { - var rv:HashEntry = null; - var tmpWord:String; - // first handle the special case of 0 length prefixes - if ( _optPrefixKeyTable[''] != undefined ) { - _optPfxEntry = _optPrefixKeyTable['']; - while ( _optPfxEntry ) { - // fogemorpheme - // permit prefixes in compounds - // check prefix - rv = _optPfxEntry.checkWord(word, sfxopts, ppfx, inCompound, needFlag); - if ( rv ) { - return rv; - } - _optPfxEntry = _optPfxEntry.nextElementWithKey; - } - } - - // now handle the general case - for ( var i:int =1; i < word.length ; ++i ) { - tmpWord = word.substr(0,i); - if ( _optPrefixKeyTable[tmpWord] != undefined ) { - _optPfxEntry = _optPrefixKeyTable[tmpWord]; - // fogemorpheme - // permit prefixes in compounds - // check prefix - while ( _optPfxEntry ) { - rv = _optPfxEntry.checkWord(word, sfxopts, ppfx, inCompound, needFlag); - if ( rv) { - return rv; - } - _optPfxEntry = _optPfxEntry.nextElementWithKey; - } - } - } - - return rv; - } - -// This function checks one level prefix OR one level prefix+ one level suffix - public function optPrefixCheck2(word:String, inCompound:int, needFlag:int) :HashEntry { - var rv:HashEntry = null; - var tmpWord:String; - var i:int; - var locOptPfxEntry:OptimizedPrefixEntry=null;//local optimised prefix entry added because we are adding optTwoPrefixCheck - // first handle the special case of 0 length prefixes - if ( _optPrefixKeyTable[''] != undefined ) { - for ( i=0; i<_optPrefixKeyTable[''].length; ++i ) { - locOptPfxEntry=_optPrefixKeyTable[''][i]; - // fogemorpheme - // permit prefixes in compounds - // check prefix - while ( locOptPfxEntry ) { - rv = locOptPfxEntry.checkWord2(word, inCompound, needFlag); - if ( rv) { - return rv; - } - locOptPfxEntry = locOptPfxEntry.nextElementWithKey; - } - } - } - - // now handle the general case - var firstKeyStr:String = word.charAt(0); - var secondKeyNum:Number = word.charCodeAt(1); - var breakFlag:Boolean = false; - if ( _optPrefixKeyTable[firstKeyStr] != undefined ) { - for ( i=0; i< _optPrefixKeyTable[firstKeyStr].length; ++i ) { - locOptPfxEntry=_optPrefixKeyTable[firstKeyStr][i]; - if ( (locOptPfxEntry.affixKey.length!=1) ) { - if ( locOptPfxEntry.affixKey.charCodeAt(1)> secondKeyNum ) - break; - if ( locOptPfxEntry.affixKey.charCodeAt(1)< secondKeyNum) { - if (breakFlag) break; - else continue; - } - breakFlag = true; - } - if (word.indexOf(locOptPfxEntry.affixKey) != 0) - continue; - // fogemorpheme - // permit prefixes in compounds - // check prefix - while ( locOptPfxEntry ) { - rv = locOptPfxEntry.checkWord2(word, inCompound, needFlag); - if ( rv) { - return rv; - } - locOptPfxEntry = locOptPfxEntry.nextElementWithKey; - } - } - } - - return rv; - } - - // This is a new function added to include one level prefix checking followed by two level suffix checking - public function optTwoPrefixCheck(word:String, inCompound:int, needFlag:int) :HashEntry { - var rv:HashEntry = null; - //pfx=null;//TODO:Need to figure these out, seems they will be needed for compound rules. keeping for some time - //sfxrevkey=null;//TODO:Need to figure these out, seems they will be needed for compound rules. keeping for some time - var tmpWord:String; - var i:int; - var locOptPfxEntry:OptimizedPrefixEntry=null;//local optimised prefix entry - // first handle the special case of 0 length prefixes - if ( _optPrefixKeyTable[''] != undefined ) { - for ( i=0; i<_optPrefixKeyTable[''].length; ++i ) { - locOptPfxEntry=_optPrefixKeyTable[''][i]; - - while ( locOptPfxEntry ) { - rv = locOptPfxEntry.checkTwoWord(word, inCompound, needFlag); - if ( rv) { - return rv; - } - locOptPfxEntry = locOptPfxEntry.nextElementWithKey; - } - } - } - - // now handle the general case - var firstKeyStr:String = word.charAt(0); - var secondKeyNum:Number = word.charCodeAt(1); - var breakFlag:Boolean = false; - if ( _optPrefixKeyTable[firstKeyStr] != undefined ) { - for ( i=0; i< _optPrefixKeyTable[firstKeyStr].length; ++i ) { - locOptPfxEntry=_optPrefixKeyTable[firstKeyStr][i]; - if ( (locOptPfxEntry.affixKey.length!=1) ) { - if ( locOptPfxEntry.affixKey.charCodeAt(1)> secondKeyNum ) - break; - if ( locOptPfxEntry.affixKey.charCodeAt(1)< secondKeyNum) { - if (breakFlag) break; - else continue; - } - breakFlag = true; - } - if (word.indexOf(locOptPfxEntry.affixKey) != 0) - continue; - while ( locOptPfxEntry ) { - rv = locOptPfxEntry.checkTwoWord(word, inCompound, needFlag); - if ( rv) { - return rv; - } - locOptPfxEntry = locOptPfxEntry.nextElementWithKey; - } - } - } - return rv;//this most certainly will return NULL - } - - - /* - * Deprecated function for now... - * History: - * A pre-version of implementation for error detection. After I optimized the code for performance, - * I drop this function by that time, but you know performance meassuring is a tricky problem... - * ToDo: Need a revisit when we implementing complex-affix support and compound-word support. - */ - public function optSuffixCheck( word:String, needFlag:int, inCompound:int):HashEntry { - var rv:HashEntry = null; - var tmpWord:String; - // first handle the special case of 0 length suffixes - if ( this._optSuffixKeyTable[''] != undefined ) { - _optSfxEntry = this._optSuffixKeyTable['']; - while ( _optSfxEntry ) { - // fogemorpheme - // permit prefixes in compounds - // check prefix - rv = _optSfxEntry.checkWord(word, inCompound, needFlag); - if ( rv ) { - return rv; - } - _optSfxEntry = _optSfxEntry.nextElementWithKey; - } - - } - // now handle the general case - for ( var i:int =word.length-1; i > 0 ; --i ) { - tmpWord = word.substr(i); - if ( _optSuffixKeyTable[tmpWord] != undefined ) { - _optSfxEntry = _optSuffixKeyTable[tmpWord]; - // fogemorpheme - // permit prefixes in compounds - // check prefix - while ( _optSfxEntry ) { - rv = _optSfxEntry.checkWord(word, inCompound, needFlag); - if ( rv) { - return rv; - } - _optSfxEntry = _optSfxEntry.nextElementWithKey; - } - } - } - - return rv; - } - -//This function takes care of all one level suffix stripping. This is called from other affix stripping functions also - public function optSuffixCheck2( word:String, sfxopts:int, ppfx:AffixEntry, needFlag:int, inCompound:int, cclass:int=0, pfxcclass:int=0):HashEntry { - var rv:HashEntry = null; - var tmpWord:String; - var locOptSfxEntry:OptimizedSuffixEntry=null;//local optimised suffic entry - // first handle the special case of 0 length suffixes - if ( this._optSuffixKeyTable[''] != undefined ) { - locOptSfxEntry=this._optSuffixKeyTable['']; - while ( locOptSfxEntry ) { - //if(!cclass|| locOptSfxEntry.contclass) - //{ - - - - // fogemorpheme - // permit prefixes in compounds - // check prefix - - //if((_optSfxEntry &&!(_optSfxEntry.contclass && HashEntry.TESTAFF(_optSfxEntry.contclass, this._needAffix)))||(ppfx&& !(ppfx.contclass && HashEntry.TESTAFF(ppfx.contclass,this._needAffix))))// needaffix on prefix or first suffix - //{ - - rv = locOptSfxEntry.checkWord2(word, sfxopts, ppfx, inCompound, needFlag, cclass, pfxcclass); - if ( rv ) { - _optSfxEntry = locOptSfxEntry;//WIll possibily needed in compound check - return rv; - } - // } - //} - locOptSfxEntry = locOptSfxEntry.nextElementWithKey; - } - - } - // now handle the general case - for ( var i:int =word.length-1; i >= 0 ; --i ) { - tmpWord = word.substr(i); - if ( _optSuffixKeyTable[tmpWord] != undefined ) { - locOptSfxEntry = (_optSuffixKeyTable[tmpWord] is OptimizedSuffixEntry)? _optSuffixKeyTable[tmpWord] : null; - // fogemorpheme - // permit prefixes in compounds - // check prefix - while ( locOptSfxEntry ) { - //if(_optSfxEntry &&HashEntry.TESTAFF(_optSfxEntry.contclass, this._needAffix)||(ppfx&& HashEntry.TESTAFF(ppfx.contclass,this._needAffix)))// needaffix on prefix or first suffix - //{ - - rv = locOptSfxEntry.checkWord2(word, sfxopts, ppfx, inCompound, needFlag, cclass, pfxcclass); - if ( rv) { - _optSfxEntry = locOptSfxEntry;//WIll possibily needed in compound check - return rv; - } - //} - locOptSfxEntry = locOptSfxEntry.nextElementWithKey; - } - } - } - - return rv; - } - - // This is a new function added to include two level suffix checking - public function optTwoSuffixCheck(word:String, sfxopts:int, ppfx:AffixEntry,needFlag:int,pfxcclass:int=0) :HashEntry { - var rv:HashEntry = null; - var tmpWord:String; - var locOptSfxEntry:OptimizedSuffixEntry;//local optimised suffic entry - // first handle the special case of 0 length suffixes - if ( this._optSuffixKeyTable[''] != undefined ) - { - locOptSfxEntry=this._optSuffixKeyTable['']; - while ( locOptSfxEntry ) - { - for(var j:int=0; locOptSfxEntry.flags && j<locOptSfxEntry.flags.length; j++) - { - if(this.contClasses[locOptSfxEntry.flags[j]]==true) - { //if this can be a possible contclass check furthur - rv = locOptSfxEntry.checkTwoWord(word, sfxopts, ppfx, needFlag, locOptSfxEntry.flags[j], pfxcclass ); - if (rv) - { - _optSfxEntry = locOptSfxEntry;//WIll possibily needed in compound check - return rv; - } - - } - } - // get next suffix entry from table - locOptSfxEntry = locOptSfxEntry.nextElementWithKey; - } - } - - //now handle the general case - for ( var i:int =word.length-1; i >= 0 ; --i ) - { - tmpWord = word.substr(i); - if ( _optSuffixKeyTable[tmpWord] != undefined ) - { - locOptSfxEntry = (_optSuffixKeyTable[tmpWord] is OptimizedSuffixEntry)? _optSuffixKeyTable[tmpWord] : null; - - while ( locOptSfxEntry ) - { - for(j=0;locOptSfxEntry.flags && j<locOptSfxEntry.flags.length; j++) - { - if(this.contClasses[locOptSfxEntry.flags[j]]==true) - { - //if this can be a possible contclass check furthur - rv = locOptSfxEntry.checkTwoWord(word, sfxopts, ppfx, needFlag,locOptSfxEntry.flags[j], pfxcclass ); - if ( rv) - { - _optSfxEntry = locOptSfxEntry;//WIll possibily needed in compound check - return rv; - } - } - } - locOptSfxEntry = locOptSfxEntry.nextElementWithKey; - } - } - } - - return rv;//will be null in most cases - - } - - /* - * Deprecated function for now... - * History: - * A pre-version of implementation for error detection. After I optimized the code for performance, - * I drop this function by that time, but you know performance meassuring is a tricky problem... - * ToDo: Need a revisit when we implementing complex-affix support and compound-word support. - */ - public function suffixCheck2( word:String, sfxopts:int, ppfx:AffixEntry, needFlag:int, inCompound:int):HashEntry { - var rv:HashEntry = null; - var tmpWord:String; - // first handle the special case of 0 length suffixes - if ( this._suffixKeyTable[''] != undefined ) { - _sfxEntry = this._suffixKeyTable['']; - while ( _sfxEntry ) { - // fogemorpheme - // permit prefixes in compounds - // check prefix - rv = _sfxEntry.checkWord(word, sfxopts, ppfx, inCompound, needFlag); - if ( rv ) { - return rv; - } - _sfxEntry = _sfxEntry.nextElementWithKey; - } - - } - // now handle the general case - for ( var i:int =word.length-1; i > 0 ; --i ) { - tmpWord = word.substr(i); - if ( _suffixKeyTable[tmpWord] != undefined ) { - _sfxEntry = _suffixKeyTable[tmpWord]; - // fogemorpheme - // permit prefixes in compounds - // check prefix - while ( _sfxEntry ) { - rv = _sfxEntry.checkWord(word, sfxopts, ppfx, inCompound, needFlag); - if ( rv) { - return rv; - } - _sfxEntry = _sfxEntry.nextElementWithKey; - } - } - } - - return rv; - } - - - /* - * Deprecated function for now... - * History: - * A pre-version of implementation for error detection. After I optimized the code for performance, - * I drop this function by that time, but you know performance meassuring is a tricky problem... - * ToDo: Need a revisit when we implementing complex-affix support and compound-word support. - */ - private function prefixCheck2(word:String, inCompound:int, needFlag:int) :HashEntry { - var rv:HashEntry = null; - var tmpWord:String; - // first handle the special case of 0 length prefixes - if ( _prefixKeyTable[''] != undefined ) { - _pfxEntry = _prefixKeyTable['']; - while ( _pfxEntry ) { - // fogemorpheme - // permit prefixes in compounds - // check prefix - rv = _pfxEntry.checkWord(word, inCompound, needFlag); - if ( rv ) { - return rv; - } - _pfxEntry = _pfxEntry.nextElementWithKey; - } - } - - // now handle the general case - for ( var i:int =1; i < word.length ; ++i ) { - tmpWord = word.substr(0,i); - if ( _prefixKeyTable[tmpWord] != undefined ) { - _pfxEntry = _prefixKeyTable[tmpWord]; - // fogemorpheme - // permit prefixes in compounds - // check prefix - while ( _pfxEntry ) { - rv = _pfxEntry.checkWord(word, inCompound, needFlag); - if ( rv) { - return rv; - } - _pfxEntry = _pfxEntry.nextElementWithKey; - } - } - } - - return rv; - } - - public function lookup(word:String ):HashEntry { - var he:HashEntry = null; - var i:int; - // look word in hash table - for ( i=0; i < this._dictMgr.dictonaryList.length && !he; ++i ) { - he = this._dictMgr.dictonaryList[i].getElement(word); - } - return he; - } - - public function set flagMode(value:int) :void { - this._flagMode = value; - } - - public function get flagMode():int { - return this._flagMode; - } - - public function set encoding(value:String) :void { - this._encoding = value; - } - - public function get encoding():String { - return this._encoding; - } - - public function set keepCase(value:Number):void { - this._keepCase = value; - } - - public function get keepCase():Number { - return this._keepCase; - } - - public function set haveContClass(value:Boolean):void { - this._haveContClass = value; - } - - public function get haveContClass():Boolean { - return this._haveContClass; - } - - public function set needAffix(value:Number):void { - this._needAffix = value; - } - public function get needAffix():Number { - return this._needAffix; - } - - - public function set circumfix(value:Number):void { - this._circumfix = value; - } - public function get circumfix():Number { - return this._circumfix; - } - - public function set onlyInCompound(value:Number):void { - this._onlyInCompound = value; - } - public function get onlyInCompound():Number { - return this._onlyInCompound; - } - - public function set dictionaryManager(value:DictionaryManager) :void { - this._dictMgr = value; - } - - public function get dictionaryManager():DictionaryManager { - return this._dictMgr; - } - - - public function set fullStrip(value:int):void { - this._fullStrip = value; - } - - public function get fullStrip():int { - return this._fullStrip; - } - - public function set suggestionsWithDots(value:int):void { - this._sugswithdots = value; - } - - public function get suggestionsWithDots():int { - return this._sugswithdots; - } - - public function set nosplitSuggestions(value:int ) :void { - this._nosplitsugs = value; - } - - public function get nosplitSuggestions():int { - return this._nosplitsugs; - } - - public function set maxNgramSuggestions(value:int ) :void { - this._maxngramsugs = value; - } - - public function get maxNgramSuggestions():int { - return this._maxngramsugs; - } - - public function set version(value:String) :void { - this._version = value; - } - - public function get version():String { - return this._version; - } - - public function set languageCode(value:String) :void { - this._languageCode = value; - } - - public function get languageCode():String { - return this._languageCode; - } - - public function set wordChars(value:String):void { - this._wordChars= value; - } - - public function get wordChars():String { - return this._wordChars; - } - - public function addMapFilter(mapString:String ):Boolean { - var mf:MapFilter = new MapFilter(mapString); - for ( var i:int; i< this._mapFilterTable.length; ++i ) { - if ( this._mapFilterTable[i].mapCharSet == mapString ) { - return false; - } - } - this._mapFilterTable.push(mf); - return true; - } - - public function addSimpleFilter(matchString:String, replacement:String):Boolean { - var sf:SimpleFilter = new SimpleFilter( matchString, replacement); - for ( var i:int; i< this._simpleFilterTable.length; ++i ) { - if ( (this._simpleFilterTable[i].matchString==matchString) && (this._simpleFilterTable[i].replacement==replacement ) ) { - return false; - } - } - this._simpleFilterTable.push(sf); - return true; - } - - - //--adding to iconv/oconv table - - public function addConvFilter(matchString:String, replacement:String, ioflag:Boolean):Boolean { - var convTable:Array; - convTable=(ioflag==true)?this._iconvFilterTable:this._oconvFilterTable; - for ( var i:int; convTable && i< convTable.length; ++i ) { - if ( (convTable[i].matchString==matchString) && (convTable[i].replacement==replacement ) ) { - return false; - } - } - var sf:SimpleFilter = new SimpleFilter( matchString, replacement); - convTable.push(sf); - return true; - } - - public function addAffixEntry(affixFlag:int, stripString:String, affixValue:String, conditionsStr:String, morph:String = "", permission:Boolean = false, affixType:int = 0, contclass:String=null):Boolean{ - if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false; - if ( affixType == 0 ) { - if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false; - var pfxEntry:PrefixEntry = new PrefixEntry(affixFlag,stripString,affixValue,conditionsStr,morph,permission,contclass); - pfxEntry.attributeManager = this; - addPrefixEntry(pfxEntry); - addOptPrefixEntry(pfxEntry); - }else { - if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false; - var sfxEntry:SuffixEntry = new SuffixEntry(affixFlag,stripString,affixValue,conditionsStr,morph,permission,contclass); - sfxEntry.attributeManager = this; - addSuffixEntry(sfxEntry); - addOptSuffixEntry(sfxEntry); - } - return true; - } - - private function addOptPrefixEntry(pfxEntry:PrefixEntry):Boolean { - var optPfxEntry:OptimizedPrefixEntry - var hashKey:String = pfxEntry.affixKey.charAt(0); - optPfxEntry = new OptimizedPrefixEntry(pfxEntry); - optPfxEntry.attributeManager = this; - //insert prefix key table.... - if ( _optPrefixKeyTable[hashKey] == undefined ) { - _optPrefixKeyTable[hashKey] = new Array(); - _optPrefixKeyTable[hashKey].push(optPfxEntry); - } - else { - for each( var optPfxKeyEntry:OptimizedPrefixEntry in _optPrefixKeyTable[hashKey] ){ - if ( optPfxKeyEntry.affixKey == pfxEntry.affixKey ) { - while( optPfxKeyEntry.nextElementWithKey != null ) { - if ( optPfxKeyEntry.isSimilarObject(pfxEntry) ) { - optPfxKeyEntry.extendObject(pfxEntry); - return true; - } - optPfxKeyEntry = optPfxKeyEntry.nextElementWithKey; - } - if ( optPfxKeyEntry.isSimilarObject(pfxEntry) ) { - optPfxKeyEntry.extendObject(pfxEntry); - return true; - } - optPfxKeyEntry.nextElementWithKey = optPfxEntry; - return true; - } - } - _optPrefixKeyTable[hashKey].push(optPfxEntry); - _optPrefixKeyTable[hashKey].sortOn("affixKey"); - } - return true; - - } - - - private function addOptSuffixEntry(sfxEntry:SuffixEntry):Boolean { - var optSfxEntry:OptimizedSuffixEntry - - //insert suffix key table.... - if ( _optSuffixKeyTable[sfxEntry.affixKey] == undefined ) { - optSfxEntry = new OptimizedSuffixEntry(sfxEntry); - optSfxEntry.attributeManager = this; - _optSuffixKeyTable[sfxEntry.affixKey] = optSfxEntry; - } - else { - var optSfxKeyEntry:OptimizedSuffixEntry = _optSuffixKeyTable[sfxEntry.affixKey]; - while( optSfxKeyEntry.nextElementWithKey != null ) { - if ( optSfxKeyEntry.isSimilarObject(sfxEntry) ) { - optSfxKeyEntry.extendObject(sfxEntry); - return true; - } - optSfxKeyEntry = optSfxKeyEntry.nextElementWithKey; - } - if ( optSfxKeyEntry.isSimilarObject(sfxEntry) ) { - optSfxKeyEntry.extendObject(sfxEntry); - return true; - } - optSfxEntry = new OptimizedSuffixEntry(sfxEntry); - optSfxEntry.attributeManager = this; - optSfxKeyEntry.nextElementWithKey = optSfxEntry; - } - return true; - - } - - - - private function addPrefixEntry(pfxEntry:PrefixEntry):Boolean { - // We may combine prefix/suffix insertion into one function in the future, it could be good for reduce the code size. - // Since may there is some difference between prefix and suffix, so leave it with different class and different table.... - // need better consideration for performance and code style in next step... - var flagChar:String; - flagChar = String.fromCharCode(pfxEntry.flag); - // insert prefix flag table... - if ( _prefixFlagTable[flagChar] == undefined ) - _prefixFlagTable[flagChar] = pfxEntry; - else { - var pfxFlagEntry:PrefixEntry = _prefixFlagTable[flagChar]; - while( pfxFlagEntry.nextElementWithFlag != null ) { - pfxFlagEntry = pfxFlagEntry.nextElementWithFlag; - } - pfxFlagEntry.nextElementWithFlag = pfxEntry; - } - - //insert prefix key table.... - if ( _prefixKeyTable[pfxEntry.affixKey] == undefined ) - _prefixKeyTable[pfxEntry.affixKey] = pfxEntry; - else { - var pfxKeyEntry:PrefixEntry = _prefixKeyTable[pfxEntry.affixKey]; - while( pfxKeyEntry.nextElementWithKey != null ) { - pfxKeyEntry = pfxKeyEntry.nextElementWithKey; - } - pfxKeyEntry.nextElementWithKey = pfxEntry; - } - return true; - } - - private function addSuffixEntry(sfxEntry:SuffixEntry ):Boolean { - // We may combine prefix/suffix insertion into one function in the future, it could be good for reduce the code size. - // Since may there is some difference between prefix and suffix, so leave it with different class and different table.... - // need better consideration for performance and code style in next step... - var flagChar:String; - flagChar = String.fromCharCode(sfxEntry.flag); - // insert suffix flag table... - if ( _suffixFlagTable[flagChar] == undefined ) - _suffixFlagTable[flagChar] = sfxEntry; - else { - var sfxFlagEntry:SuffixEntry = _suffixFlagTable[flagChar]; - while( sfxFlagEntry.nextElementWithFlag != null ) { - sfxFlagEntry = sfxFlagEntry.nextElementWithFlag; - } - sfxFlagEntry.nextElementWithFlag = sfxEntry; - } - - //insert suffix key table.... - if ( _suffixKeyTable[sfxEntry.affixKey] == undefined ) - _suffixKeyTable[sfxEntry.affixKey] = sfxEntry; - else { - var sfxKeyEntry:SuffixEntry = _suffixKeyTable[sfxEntry.affixKey]; - while( sfxKeyEntry.nextElementWithKey != null ) { - sfxKeyEntry = sfxKeyEntry.nextElementWithKey; - } - sfxKeyEntry.nextElementWithKey = sfxEntry; - } - - return true; - } - - /* - * Deprecated function for now... - * History: - * A pre-version of implementation for error detection. After I optimized the code for performance, - * I drop this function by that time, but you know performance meassuring is a tricky problem... - * ToDo: Need a revisit when we implementing complex-affix support and compound-word support. - */ - public function addAffixEntry2(affixFlag:int, stripString:String, affixValue:String, conditionsStr:String, morph:String = "", permission:Boolean = false, affixType:int = 0):Boolean{ - if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false; - if ( affixType == 0 ) { - if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false; - var pfxEntry:PrefixEntry = new PrefixEntry(affixFlag,stripString,affixValue,conditionsStr,morph,permission); - pfxEntry.attributeManager = this; - addPrefixEntry(pfxEntry); - }else { - if ( stripString == null || affixValue == null || conditionsStr==null || conditionsStr=="" ) return false; - var sfxEntry:SuffixEntry = new SuffixEntry(affixFlag,stripString,affixValue,conditionsStr,morph,permission); - sfxEntry.attributeManager = this; - addSuffixEntry(sfxEntry); - } - return true; - } - - public function get prefixFlagTable():Array { - return this._prefixFlagTable; - } - - public function get prefixKeyTable():Array { - return this._prefixKeyTable; - } - - public function get suffixFlagTable():Array { - return this._suffixFlagTable; - } - - public function get suffixKeyTable():Array { - return this._suffixKeyTable; - } - - public function set forbiddenWord(value:Number) :void { - this._forbiddenWord = value; - } - - public function get forbiddenWord():Number { - return this._forbiddenWord; - } - - public function set ignoredChars(value:String ) :void { - this._ignoredChars = value; - } - - public function get ignoredChars():String { - return this._ignoredChars; - } - - public function set keyString(value:String):void { - this._keyString = value; - } - - public function get keyString():String { - if ( this._keyString == null ) this._keyString=InternalConstants.SPELL_KEYSTRING; - return this._keyString; - } - - public function set tryString(value:String):void { - this._tryString = value; - } - - public function get tryString():String { - return this._tryString; - } - - public function get contClasses():Dictionary { - return _contClasses; - } - - - public function set noSuggest(value:Number ):void { - this._noSuggest = value; - } - public function get noSuggest():Number { - return this._noSuggest; - } - - public function get simpleFilterTable():Array { - return this._simpleFilterTable; - } - - public function get iconvFilterTable():Array { - return this._iconvFilterTable; - } - - public function get oconvFilterTable():Array { - return this._oconvFilterTable; - } - -/* public function get phoneTable():PhoneticTable { - return this._phoneTable; - } -*/ - public function get breakTable():Array { - return this._breakTable; - } - public function get aliasfTable():Array{ - return this._aliasfTable; - } - - - public function get mapFilterTable():Array { - return this._mapFilterTable; - } - - /*This function is used for supporting ICONV/OCONV rule. This function is called whenever an input or output conversion is needed.*/ - public function conv(word:String,convWord:Array,ioflag:Boolean):Boolean{ - var searchIndex:int=0; - var change:Boolean=false; - var wspace:String; - var convTable:Array=(ioflag)?this._iconvFilterTable:this._oconvFilterTable; - if ( (convTable==null) || (convTable.length == 0) ) return false; - for ( var i:int = 0; i < convTable.length; ++i ) { - while ( (searchIndex = word.indexOf( convTable[i].matchString,searchIndex)) != -1 ){ - searchIndex = searchIndex + convTable[i].matchString.length; - wspace = word.substr(0, searchIndex-convTable[i].matchString.length) + - convTable[i].replacement + - word.substr(searchIndex); - if(wspace) - word=wspace; - change=true; - } - - } - convWord.push(wspace); - return change; - } - - } -} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/flex-utilities/blob/4e4f9830/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/SpellingInfo.as ---------------------------------------------------------------------- diff --git a/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/SpellingInfo.as b/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/SpellingInfo.as deleted file mode 100644 index 2b874dd..0000000 --- a/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/SpellingInfo.as +++ /dev/null @@ -1,32 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//////////////////////////////////////////////////////////////////////////////// - - -package com.adobe.linguistics.spelling.core -{ - public class SpellingInfo - { - public var Info:Number; - public function SpellingInfo(information:Number) - { - Info=information; - } - - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/flex-utilities/blob/4e4f9830/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyDictionary.as ---------------------------------------------------------------------- diff --git a/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyDictionary.as b/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyDictionary.as deleted file mode 100644 index 4b80d7c..0000000 --- a/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyDictionary.as +++ /dev/null @@ -1,207 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//////////////////////////////////////////////////////////////////////////////// - - - -package com.adobe.linguistics.spelling.core -{ - - import com.adobe.linguistics.spelling.core.container.HashTable; - import com.adobe.linguistics.spelling.core.env.InternalConstants; - import com.adobe.linguistics.spelling.core.error.*; - import com.adobe.linguistics.spelling.core.utils.StringUtils; - - import flash.utils.Dictionary; - - - public class SquigglyDictionary - { - private var _hashtbl:HashTable; - private var _forbiddenword:int; - private var _ignoredCharSet:String; - private var _flag_mode:int; - private var _languageCode:String; - - - - public function SquigglyDictionary(attrMgr:LinguisticRule) - { - _hashtbl = new HashTable( true ); // useWeakReferences:Boolean = true - - if ( !attrMgr) { - _forbiddenword = InternalConstants.FORBIDDENWORD; - _ignoredCharSet = null; - _flag_mode = InternalConstants.FLAG_CHAR; - _languageCode = null; - } - } - - private function set forbiddenword(value:int ) :void { - this._forbiddenword = value; - } - - public function get forbiddenword():int { - return this._forbiddenword; - } - - private function set ignoredCharSet(value:String):void { - this._ignoredCharSet = value; - } - - public function get ignoredCharSet():String { - return this._ignoredCharSet; - } - - private function set flag_mode(value:int):void { - this._flag_mode = value; - } - - public function get flag_mode():int { - return this._flag_mode; - } - - private function set languageCode(value:String ) :void { - this._languageCode = value; - } - - public function get languageCode():String { - return this._languageCode; - } - - public function containsKey(key:String ):Boolean - { - return _hashtbl.containsKey(key ); - } - - public function getElement( key:String ):HashEntry { - var res:* = _hashtbl.getElement(key ); - return (res is HashEntry) ? res:null; -// return _hashtbl.getElement(key ); - } - - public function put(key:String, affixString:String=null, description:String = null):Boolean { - return addWord( key, affixString, description ); - } - - public function get dictionary():Dictionary { - return this._hashtbl.hashMap; - } - - public function filter( callback:Function, thisObject:* = null):Array { - var res:Array = new Array(); - var index:int; - var dict:Dictionary = this._hashtbl.hashMap; - for ( var key:* in dict ) { - if ( callback( key, index, res ) ) { - res.push( key ); - } - } - - return (res.length == 0) ? null: res; - } - - - public function addWord( word:String, affix:String = null, desc:String = null ) :Boolean { - var res:Boolean = false; - if ( word == null ) return false; - var captype:int = StringUtils.getCapType(word); - if ( addWordWithAffix(word,affix,desc,false ) ) - res = true; - addHiddenCapitalizedWord(word,captype, affix,desc); - return res; - } - - private function addHiddenCapitalizedWord( word:String, captype:int, affix:String=null, desc:String=null ) :Boolean { - // add inner capitalized forms to handle the following allcap forms: - // Mixed caps: OpenOffice.org -> OPENOFFICE.ORG - // Allcaps with suffixes: CIA's -> CIA'S - if (((captype == InternalConstants.HUHCAP) || (captype == InternalConstants.HUHINITCAP) ||((captype == InternalConstants.ALLCAP) && (affix != null))) && - !((affix != null) && HashEntry.TESTAFF(affix, _forbiddenword))) { - affix += String.fromCharCode(InternalConstants.ONLYUPCASEFLAG); - word = word.toLocaleLowerCase(); - word = word.charAt(0).toLocaleUpperCase() + word.substr(1); - addWordWithAffix(word,affix,desc,true); - } - return true; - } - - private function addWordWithAffix( word:String, affix:String, desc:String, onlyupcase:Boolean ):Boolean { - var upcasehomonym:Boolean = false; - if (_ignoredCharSet != null) { - word = StringUtils.removeIgnoredChars(word, _ignoredCharSet); - } -//ToDo: the following comment should be removed after we have complex-affix support. -// -// if (complexprefixes) { -// reverseword(word); -// } -// hp->var = H_OPT; -// if (aliasm) { -// hp->var += H_OPT_ALIASM; -// store_pointer(hpw + wbl + 1, get_aliasm(atoi(desc))); -// } else { -// strcpy(hpw + wbl + 1, desc); -// if (complexprefixes) { -// if (utf8) reverseword_utf(HENTRY_DATA(hp)); -// else reverseword(HENTRY_DATA(hp)); -// } -// } -// if (strstr(HENTRY_DATA(hp), MORPH_PHON)) hp->var += H_OPT_PHON; - - if ( _hashtbl.containsKey(word) ) { - - var hentry:HashEntry = _hashtbl.getElement(word); - while ( hentry.next != null ) { - // remove hidden onlyupcase homonym - if ( !onlyupcase ) { - if ( (hentry.affixFlagVector != null) && hentry.testAffix(InternalConstants.ONLYUPCASEFLAG) ) { - hentry.affixFlagVector = affix; - hentry.variableFields = desc; /* need a better implementation,refer the beginning of this function */ - return true; - } - }else { - upcasehomonym = true; - } - hentry = hentry.next; - } - // remove hidden onlyupcase homonym - if ( !onlyupcase ) { - if ( (hentry.affixFlagVector != null) && hentry.testAffix(InternalConstants.ONLYUPCASEFLAG) ) { - hentry.affixFlagVector = affix; - hentry.variableFields = desc; /* need a better implementation,refer the beginning of this function */ - return true; - } - }else { - upcasehomonym = true; - } - - if ( !upcasehomonym ) { - hentry.addEntry(affix,desc); - return true; - }else - return false; - }else { - _hashtbl.put(word, new HashEntry(affix,desc) ); - return true; - } - } - - - } -} http://git-wip-us.apache.org/repos/asf/flex-utilities/blob/4e4f9830/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyEngine.as ---------------------------------------------------------------------- diff --git a/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyEngine.as b/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyEngine.as deleted file mode 100644 index d7fdc5c..0000000 --- a/Squiggly/main/AdobeSpellingEngine/src/com/adobe/linguistics/spelling/core/SquigglyEngine.as +++ /dev/null @@ -1,426 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// -// Licensed to the Apache Software Foundation (ASF) under one or more -// contributor license agreements. See the NOTICE file distributed with -// this work for additional information regarding copyright ownership. -// The ASF licenses this file to You under the Apache License, Version 2.0 -// (the "License"); you may not use this file except in compliance with -// the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//////////////////////////////////////////////////////////////////////////////// - - -package com.adobe.linguistics.spelling.core -{ - - import com.adobe.linguistics.spelling.core.env.ExternalConstants; - import com.adobe.linguistics.spelling.core.env.InternalConstants; - import com.adobe.linguistics.spelling.core.utils.*; - - public class SquigglyEngine - { - - private var _ignoreCappedWord:Boolean; // Hello is always correct - private var _ignoreAllUpperCase:Boolean; // HELLO is always correct - private var _ignoreWordWithNumber:Boolean; // win2003 is always correct - private var wordBreak:Array; // Used to hold BREAK characters for BREAK rule - private const SPELL_COMPOUND:int = (1 << 0); - private const SPELL_FORBIDDEN:int = (1 << 1); - private const SPELL_ALLCAP:int = (1 << 2); - private const SPELL_NOCAP:int = (1 << 3); - private const SPELL_INITCAP:int = (1 << 4); - - private const MAXDIC:int = 20; - private const MAXSHARPS:int = 5; - - private var attributeMgr:LinguisticRule; - private var dictMgr:DictionaryManager; - private var sugestionMgr:SuggestionManager; - private var encoding:String; - private var wordbreak:Array;//an Array that holds the word breaks - private var langCode:int; - private var complexPrefixes:int; - private var maxWordLength:int; - - public function SquigglyEngine( rule:LinguisticRule, dict:SquigglyDictionary ) - { - if ( rule == null ) throw new Error("illegal argument for constructor", 200901); - if ( dict == null ) throw new Error("illegal argument for constructor", 200901); - - maxWordLength = InternalConstants.MAXWORDLEN; - - dictMgr = new DictionaryManager(); - dictMgr.addDictionary(dict); - attributeMgr = rule; - attributeMgr.dictionaryManager = dictMgr; - sugestionMgr = new SuggestionManager( rule, false); - this.wordbreak=attributeMgr.breakTable; - this.ignoreWordWithNumber = false; - this.ignoreCappedWord = false; - this.ignoreAllUpperCase = false; - - } - - public function set ignoreWordWithNumber( value:Boolean):void { - this._ignoreWordWithNumber =value; - } - public function get ignoreWordWithNumber():Boolean { - return this._ignoreWordWithNumber; - } - - public function set ignoreCappedWord(value:Boolean):void { - this._ignoreCappedWord = value; - } - public function get ignoreCappedWord():Boolean { - return this._ignoreCappedWord; - } - - public function set ignoreAllUpperCase(value:Boolean ):void { - this._ignoreAllUpperCase = value; - } - public function get ignoreAllUpperCase():Boolean { - return this._ignoreAllUpperCase; - } - - public function set fastMode(value:Boolean ) :void { - this.sugestionMgr.fastMode = value; - } - - public function get fastMode():Boolean { - return this.sugestionMgr.fastMode; - } - - public function addDictionary( dict:SquigglyDictionary ) : Boolean { - return dictMgr.addDictionary(dict); - } - - public function spell( word:String ) :Boolean { - if ( word.length > maxWordLength ) return false; - - word = StringUtils.normalize(word); - - var captype:int = InternalConstants.NOCAP; - var hasNumber:Boolean =false; //assuming that there are no numbers; - var abbv:int = 0; - var i:int; - var rv:HashEntry = null; - var info:SpellingInfo = new SpellingInfo(0); - var wspace:String; - // input conversion USING ICONV TABLE -/* //Commented code is a unit test code - var teststr:String="maruÌviÌaÌ "; - var teststr2:String; -this.attributeMgr.conv(teststr,convWord,true); - teststr2=convWord.pop(); - if(teststr2){ - trace("Called Word "+teststr+"converted word "+teststr2); - } - else - trace("NUUUllll"); -*/ - - var convWord:Array=new Array; - if(this.attributeMgr && this.attributeMgr.iconvFilterTable && this.attributeMgr.iconvFilterTable.length!=0){ - this.attributeMgr.conv(word,convWord,InternalConstants.CONV_ICONV); - wspace=convWord.pop(); - if(wspace) word=wspace; - } - - // first skip over any leading or trailing blanks - word = StringUtils.trim( word ); - - // now strip off any trailing periods (recording their presence) - for ( i = word.length-1; (i>=0) && (word.charCodeAt(i) == 46) ; --i ) { // '.' - abbv++; - } - word = word.substr(0, word.length- abbv ); - captype = StringUtils.getCapType(word); - hasNumber=StringUtils.getHasNumber(word); - if ( (dictMgr.isEmpty()) || (word.length == 0) ) return false; - - // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.) - const NBEGIN:int = 0, NNUM:int=1, NSEP:int=2; - var nstate:int = NBEGIN; - var charCode:int; - for ( i=0 ; i < word.length ; ++i ) { - charCode = word.charCodeAt(i); - if ( (charCode <= 57 ) && ( charCode >= 48) ) { // '0' to '9' - nstate = NNUM; - }else if ( (charCode==44) || (charCode==45) || (charCode==46) ) { //',' or '.' or '-' - if ( (nstate == NSEP) || ( i==0 ) ) return false; - nstate = NSEP; - }else break; - } - if ( (i==word.length) && ( nstate == NNUM ) ) return true;//checks if all are just numbers - // ignore word with Number. - if ( ignoreWordWithNumber && hasNumber)return true;//Ignore word with numbers! - - // ignore cappitalized word or ignore all upper case word. - if ( (ignoreCappedWord &&( (captype&InternalConstants.HUHINITCAP) || (captype&InternalConstants.INITCAP))&&(hasNumber==false) ) || (ignoreAllUpperCase&&(captype & InternalConstants.ALLCAP)&&(hasNumber==false)) ) return true; //return only if it does not have number - - - switch(captype) { - case InternalConstants.HUHCAP: - case InternalConstants.HUHINITCAP: - case InternalConstants.NOCAP: - rv = checkWord(word,info); - if ( (abbv!=0) && (rv == null ) ) { - word += "."; - rv = checkWord(word,info); - } - break; - case InternalConstants.ALLCAP: - rv = checkWord(word,info); - if( rv ) break; - if ( (abbv!=0 ) ) { - word +="."; - rv = checkWord(word,info); - if ( rv ) break; - } - // ToDo: Spec. prefix handling for Catalan, French, Italian: - // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia). - // need better understand... - - //sharps handle.... - - word = word.charAt(0).toUpperCase()+word.slice(1).toLocaleLowerCase(); - - case InternalConstants.INITCAP: - if (captype == InternalConstants.INITCAP) info.Info +=ExternalConstants.SPELL_INITCAP; - wspace = word.toLocaleLowerCase(); - rv = checkWord(word,info); - if (captype == InternalConstants.INITCAP) info.Info -=ExternalConstants.SPELL_INITCAP; - - // forbid bad capitalization - // (for example, ijs -> Ijs instead of IJs in Dutch) - // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag) - if (info.Info & ExternalConstants.SPELL_FORBIDDEN) { - rv = null; - } - - if ( rv && (captype == InternalConstants.ALLCAP ) ) { - if ( attributeMgr && rv.affixFlagVector && attributeMgr.keepCase && rv.testAffix(attributeMgr.keepCase) ) rv = null; - } - if ( rv) break; - - rv = checkWord(wspace,info); - if ( !rv && abbv ) { - wspace += "."; - rv = checkWord(wspace,info); - if ( !rv) { - word += "."; - if (captype == InternalConstants.INITCAP) info.Info +=ExternalConstants.SPELL_INITCAP; - rv = checkWord(word,info); - if (captype == InternalConstants.INITCAP) info.Info -=ExternalConstants.SPELL_INITCAP; - if ( rv && (captype == InternalConstants.ALLCAP ) ) { - if ( attributeMgr && rv.affixFlagVector && attributeMgr.keepCase && rv.testAffix(attributeMgr.keepCase) ) rv = null; - } - } - } - if ( rv && (captype == InternalConstants.ALLCAP ) ) { - if ( attributeMgr && rv.affixFlagVector && attributeMgr.keepCase && rv.testAffix(attributeMgr.keepCase) ) rv = null; - } - break; - default: - } - - if ( rv ) return true; - - //implementation break-table... recursive breaking at break points - - if(wordbreak){ - var nbr:int=0; - var parseArr:Array; - var searchIndex:int=0; - for(i=0; i<wordbreak.length;i++){ - //Search for number of break points in this word - searchIndex=0; - wspace=word; - while (wspace && ((searchIndex=wspace.indexOf(wordbreak[i])) != -1 )) { - nbr++; - if(nbr>InternalConstants.MAX_WORD_BREAKS) return false;//Limiting maximum Word breaks - if(searchIndex<word.length)wspace=wspace.substr(searchIndex+1); - } - } - - - for(var j:int=0; j<wordbreak.length;j++){ - - if(word.search(wordbreak[j])!=-1 && (parseArr=word.split(wordbreak[j]))!=null) - { - - for(i=0;i<parseArr.length;i++) - if(! spell(parseArr[i]) ) return false;//keep checking all parts of the input word. If any part is wrongly spelt send false - - return true;//no part is spelled wrong so send correct - } - - } - - } - - return false; - } - - public function suggest( word:String ) : Array { - if ( word.length > maxWordLength ) return null; - var captype:int = InternalConstants.NOCAP; - var capwords:int = 0; - - var abbv:int = 0; - var i:int,ns:int; - var wspace:String; - var slst:Array = new Array(); - var convWord:Array=new Array; - // input conversion USING ICONV TABLE - if(this.attributeMgr && this.attributeMgr.iconvFilterTable.length!=0){ - this.attributeMgr.conv(word,convWord,InternalConstants.CONV_ICONV); - wspace=convWord.pop(); - if(wspace)word=wspace; - } - - // first skip over any leading or trailing blanks - word = StringUtils.trim( word ); - // now strip off any trailing periods (recording their presence) - for ( i = word.length-1; (i>=0) && (word.charCodeAt(i) == 46) ; --i ) { // '.' - abbv++; - } - word = word.substr(0, word.length- abbv ); - captype = StringUtils.getCapType(word); - if ( (dictMgr.isEmpty()) || (word.length == 0) ) return null; - switch(captype) { - case InternalConstants.NOCAP: { - ns = sugestionMgr.suggest( slst, word, InternalConstants.NOCAP ); - break; - } - case InternalConstants.INITCAP:{ - capwords = 1; - ns = sugestionMgr.suggest( slst, word, InternalConstants.INITCAP ); - if ( ns == -1) break; - wspace = word.toLocaleLowerCase(); - ns = sugestionMgr.suggest( slst, wspace, InternalConstants.NOCAP ); - break; - } - case InternalConstants.HUHINITCAP:{ - capwords = 1; - } - case InternalConstants.HUHCAP: { // ToDo: still a lot of work... - ns = sugestionMgr.suggest( slst, word, InternalConstants.HUHCAP ); - break; - } - case InternalConstants.ALLCAP: { - wspace = word.toLocaleLowerCase(); - ns = sugestionMgr.suggest( slst, wspace, InternalConstants.NOCAP ); - if ( ns == -1) break; - if ( this.attributeMgr.keepCase && spell(word ) ) { - //ns = insert_sug(slst, wspace, ns); ToDo - } - wspace = word.charAt(0).toUpperCase()+word.slice(1).toLocaleLowerCase(); - ns = sugestionMgr.suggest( slst, wspace, InternalConstants.INITCAP ); - break; - } - } - - // try ngram approach since found nothing - if ( this.attributeMgr && (this.attributeMgr.maxNgramSuggestions != 0)) { - ns = sugestionMgr.nsuggest(slst,word); - } - - // try dash suggestion (Afo-American -> Afro-American) - - // capitalize - if (capwords) { - for ( i=0;i<slst.length; ++i ) { - slst[i] = slst[i].charAt(0).toUpperCase()+slst[i].slice(1); - } - } - - // expand suggestions with dot(s) - if ( abbv && this.attributeMgr.suggestionsWithDots ) { - for ( i=0;i<slst.length; ++i ) { - slst[i] += "."; - } - - } - - // remove bad capitalized and forbidden forms - - // remove original one - for ( i=0;i<slst.length;++i) { - if ( slst[i] == word ) - slst.splice(i,1); - } - - // remove duplications - - // output conversion - - if(this.attributeMgr && this.attributeMgr.oconvFilterTable && this.attributeMgr.oconvFilterTable.length!=0){ - for(i=0;i<slst.length;++i){ - if(this.attributeMgr.conv(slst[i],convWord,InternalConstants.CONV_OCONV)) - {wspace=convWord.pop();delete(slst[i]); slst[i]=wspace;} - } - } - - // if suggestions removed by nosuggest, onlyincompound parameters - - - return (slst.length!=0) ? slst :null; - } - - private function checkWord( word:String, info:SpellingInfo ):HashEntry { - var i:int; - var he:HashEntry = null; - if ( attributeMgr.ignoredChars ) { - word = StringUtils.removeIgnoredChars(word, attributeMgr.ignoredChars); - } - // word reversing wrapper for complex prefixes - /* - if(complexprefixes) { - word=reverseword(word); - } - */ - - // look word in hash table - for ( i=0; i < dictMgr.dictonaryList.length && !he; ++i ) { - he = dictMgr.dictonaryList[i].getElement(word); - // check forbidden and onlyincompound words - if ( he && (he.affixFlagVector != null) && - ((attributeMgr) && ( he.testAffix(attributeMgr.forbiddenWord))) - - ) { - // ToDo: LANG_hu section: set dash information for suggestions - return null; - } - // ToDo: he = next not needaffix, onlyincompound homonym or onlyupcase word -/* while (he && (he.affixFlagVector) && - ((attributeMgr.needAffix && testAffix(he.affixFlagVector, attributeMgr.needAffix)) || - (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) || - (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)) - )) //he = he.next; should maintain a next homonym which is not being maintained as of now next_homonym; -*/ } - - // check with affixes - if ( !he && attributeMgr ) { - he = attributeMgr.affixCheck2(word,0,0); - //DO not allow affixed forms of forbidden words - if ( he && (he.affixFlagVector != null) && (attributeMgr) && he.testAffix(attributeMgr.forbiddenWord) ) { - // ToDo: LANG_hu section: set dash information for suggestions - return null; - } - } - - return he; - } - - - } -} \ No newline at end of file