Author: chenpei
Date: Mon Nov  4 16:47:38 2013
New Revision: 1538670

URL: http://svn.apache.org/r1538670
Log:
CTAKES-253 - YTEX ctakes patches port. Thanks Vijay Garla.
* 
ctakes-context-tokenizer\src\main\java\org\apache\ctakes\contexttokenizer\ae\ContextDependentTokenizerAnnotator.java
 
add null check: changed to avoid NPE in case BaseToken is null. Also ignore 
newline tokens (they should be treated as whitespace). 
* 
ctakes-core\src\main\java\org\apache\ctakes\core\fsm\adapters\NumberTokenAdapter.java
 
add null check: ignore empty numbertokens 
* ctakes-core\src\main\java\org\apache\ctakes\core\fsm\machine\DateFSM.java 
Modified to include years in dates 

Modified:
    
ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java
    
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java
    
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java

Modified: 
ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java
URL: 
http://svn.apache.org/viewvc/ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java?rev=1538670&r1=1538669&r2=1538670&view=diff
==============================================================================
--- 
ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java
 (original)
+++ 
ctakes/trunk/ctakes-context-tokenizer/src/main/java/org/apache/ctakes/contexttokenizer/ae/ContextDependentTokenizerAnnotator.java
 Mon Nov  4 16:47:38 2013
@@ -18,21 +18,21 @@
  */
 package org.apache.ctakes.contexttokenizer.ae;
 
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.log4j.Logger;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.FSIterator;
-import org.apache.uima.cas.text.AnnotationIndex;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.JFSIndexRepository;
-import org.apache.uima.resource.ResourceInitializationException;
-
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.log4j.Logger;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.JFSIndexRepository;
+import org.apache.uima.resource.ResourceInitializationException;
+
 
 import org.apache.ctakes.core.ae.TokenizerAnnotator;
 import org.apache.ctakes.core.fsm.adapters.ContractionTokenAdapter;
@@ -57,20 +57,21 @@ import org.apache.ctakes.core.fsm.output
 import org.apache.ctakes.core.fsm.output.RomanNumeralToken;
 import org.apache.ctakes.core.fsm.output.TimeToken;
 import org.apache.ctakes.core.fsm.token.BaseToken;
-import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
-import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
-import org.apache.ctakes.typesystem.type.syntax.NumToken;
-import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
-import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
-import org.apache.ctakes.typesystem.type.syntax.WordToken;
-import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.FractionAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.PersonTitleAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.RangeAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation;
-import org.apache.ctakes.typesystem.type.textsem.TimeAnnotation;
-import org.apache.ctakes.typesystem.type.textspan.Sentence;
+import org.apache.ctakes.core.fsm.token.EolToken;
+import org.apache.ctakes.typesystem.type.syntax.ContractionToken;
+import org.apache.ctakes.typesystem.type.syntax.NewlineToken;
+import org.apache.ctakes.typesystem.type.syntax.NumToken;
+import org.apache.ctakes.typesystem.type.syntax.PunctuationToken;
+import org.apache.ctakes.typesystem.type.syntax.SymbolToken;
+import org.apache.ctakes.typesystem.type.syntax.WordToken;
+import org.apache.ctakes.typesystem.type.textsem.DateAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.FractionAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.MeasurementAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.PersonTitleAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.RangeAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.RomanNumeralAnnotation;
+import org.apache.ctakes.typesystem.type.textsem.TimeAnnotation;
+import org.apache.ctakes.typesystem.type.textspan.Sentence;
 
 /**
  * Finds tokens based on context.
@@ -123,7 +124,10 @@ public class ContextDependentTokenizerAn
                                while (btaItr.hasNext()) {
                                        
org.apache.ctakes.typesystem.type.syntax.BaseToken bta = 
(org.apache.ctakes.typesystem.type.syntax.BaseToken) btaItr
                                                        .next();
-                                       
baseTokenList.add(adaptToBaseToken(bta));
+                                       // ignore newlines, avoid null tokens
+                                       BaseToken bt = adaptToBaseToken(bta);
+                                       if(bt != null && !(bt instanceof 
EolToken))
+                                               baseTokenList.add(bt);
                                }
 
                                // execute FSM logic

Modified: 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java
URL: 
http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java?rev=1538670&r1=1538669&r2=1538670&view=diff
==============================================================================
--- 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java
 (original)
+++ 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/adapters/NumberTokenAdapter.java
 Mon Nov  4 16:47:38 2013
@@ -21,6 +21,8 @@ package org.apache.ctakes.core.fsm.adapt
 import org.apache.ctakes.core.fsm.token.NumberToken;
 import org.apache.ctakes.typesystem.type.syntax.NumToken;
 
+import com.google.common.base.Strings;
+
 /**
  * Adapts JCas token annotation to interface expected by the Context Dependent
  * Tokenizer.
@@ -36,7 +38,7 @@ public class NumberTokenAdapter extends 
        {
                super(nta);
                
-               if (nta.getCoveredText().length() > 0 && 
nta.getCoveredText().charAt(0) == '-')
+               if (!Strings.isNullOrEmpty(nta.getCoveredText()) && 
nta.getCoveredText().length() > 0 && nta.getCoveredText().charAt(0) == '-')
                {
                        iv_isPositive = false;
                }               

Modified: 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java
URL: 
http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java?rev=1538670&r1=1538669&r2=1538670&view=diff
==============================================================================
--- 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java
 (original)
+++ 
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/fsm/machine/DateFSM.java
 Mon Nov  4 16:47:38 2013
@@ -220,16 +220,16 @@ public class DateFSM {
                startState.addTransition(new AnyCondition(), startState);
 
                monthFullTextState.addTransition(dayNumCondition, dayNumState);
-               monthFullTextState.addTransition(yearNotDayNumCondition, 
ntEndState);
+               monthFullTextState.addTransition(yearNotDayNumCondition, 
endState);
                monthFullTextState.addTransition(new AnyCondition(), 
startState);
 
                monthShortTextState.addTransition(dayNumCondition, dayNumState);
                monthShortTextState.addTransition(periodCondition, periodState);
-               monthShortTextState.addTransition(yearNotDayNumCondition, 
ntEndState);
+               monthShortTextState.addTransition(yearNotDayNumCondition, 
endState);
                monthShortTextState.addTransition(new AnyCondition(), 
startState);
 
                periodState.addTransition(dayNumCondition, dayNumState);
-               periodState.addTransition(yearNotDayNumCondition, ntEndState);
+               periodState.addTransition(yearNotDayNumCondition, endState);
                periodState.addTransition(new AnyCondition(), startState);
 
                dayNumState.addTransition(yearNumCondition, endState);


Reply via email to