Thanks Sean for early reply,
Here are the content of file you are looking for
*1. tinyDictSpec.xml*
============
<?xml version="1.0" encoding="UTF-8"?>
<lookupSpecification>
<dictionaries>
<dictionary>
<name>LabAnnotatorTestDict</name>
<implementationName>org.apache.ctakes.dictionary.lookup2.dictionary.UmlsJdbcRareWordDictionary</implementationName>
<properties>
<property key="jdbcDriver"
value="com.mysql.jdbc.Driver"/>
<property key="jdbcUrl"
value="jdbc:mysql://localhost:3306/aiunstructured?useUnicode=true&characterEncoding=utf8&useSSL=false&autoReconnect=true"/>
<property key="jdbcUser" value="root"/>
<property key="jdbcPass" value=""/>
<property key="umlsUrl" value="
https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser"/>
<property key="umlsVendor" value="NLM-6515182895"/>
<property key="umlsUser" value=""/>
<property key="umlsPass" value=""/>
<property key="rareWordTable" value="rareword"/>
</properties>
</dictionary>
</dictionaries>
<conceptFactories>
<conceptFactory>
<name>LabAnnotatorTestConcepts</name>
<implementationName>org.apache.ctakes.dictionary.lookup2.concept.UmlsJdbcConceptFactory</implementationName>
<properties>
<property key="jdbcDriver"
value="com.mysql.jdbc.Driver"/>
<property key="jdbcUrl"
value="jdbc:mysql://localhost:3306/aiunstructured?useUnicode=true&characterEncoding=utf8&useSSL=false&autoReconnect=true"/>
<property key="jdbcUser" value="root"/>
<property key="jdbcPass" value=""/>
<property key="umlsUrl" value="
https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser"/>
<property key="umlsVendor" value="NLM-6515182895"/>
<property key="umlsUser" value=""/>
<property key="umlsPass" value=""/>
<property key="tuiTable" value="tui"/>
</properties>
</conceptFactory>
</conceptFactories>
<dictionaryConceptPairs>
<dictionaryConceptPair>
<name>LabAnnotatorPair</name>
<dictionaryName>LabAnnotatorTestDict</dictionaryName>
<conceptFactoryName>LabAnnotatorTestConcepts</conceptFactoryName>
</dictionaryConceptPair>
</dictionaryConceptPairs>
<rareWordConsumer>
<name>Term Consumer</name>
<implementationName>org.apache.ctakes.dictionary.lookup2.consumer.DefaultTermConsumer</implementationName>
<properties>
<property key="codingScheme" value="custom"/>
</properties>
</rareWordConsumer>
</lookupSpecification>
===========
*2. drugConcept.xml*
<?xml version="1.0" encoding="UTF-8"?>
<lookupSpecification>
<dictionaries>
<dictionary>
<name>LabAnnotatorTestDict</name>
<implementationName>org.apache.ctakes.raxactakes.dictionary.lookup2.dictionary.UmlsJdbcDrugTermsDictonary</implementationName>
<properties>
<property key="jdbcDriver"
value="com.mysql.jdbc.Driver"/>
<property key="jdbcUrl"
value="jdbc:mysql://localhost:3306/aiunstructured?useUnicode=true&characterEncoding=utf8&useSSL=false&autoReconnect=true"/>
<property key="jdbcUser" value="root"/>
<property key="jdbcPass" value=""/>
<property key="umlsUrl" value="
https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser"/>
<property key="umlsVendor" value="NLM-6515182895"/>
<property key="umlsUser" value=""/>
<property key="umlsPass" value=""/>
<property key="rareWordTable" value="drug"/>
</properties>
</dictionary>
</dictionaries>
<conceptFactories>
<conceptFactory>
<name>LabAnnotatorTestConcepts</name>
<implementationName>org.apache.ctakes.raxactakes.dictionary.lookup2.concept.UmlsJdbcDrugNameConceptFactory
</implementationName>
<properties>
<property key="jdbcDriver"
value="com.mysql.jdbc.Driver"/>
<property key="jdbcUrl"
value="jdbc:mysql://localhost:3306/aiunstructured?useUnicode=true&characterEncoding=utf8&useSSL=false&autoReconnect=true"/>
<property key="jdbcUser" value="root"/>
<property key="jdbcPass" value=""/>
<property key="umlsUrl" value="
https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser"/>
<property key="umlsVendor" value="NLM-6515182895"/>
<property key="umlsUser" value=""/>
<property key="umlsPass" value=""/>
<property key="tuiTable" value="tui"/>
</properties>
</conceptFactory>
</conceptFactories>
<dictionaryConceptPairs>
<dictionaryConceptPair>
<name>LabAnnotatorPair</name>
<dictionaryName>LabAnnotatorTestDict</dictionaryName>
<conceptFactoryName>LabAnnotatorTestConcepts</conceptFactoryName>
</dictionaryConceptPair>
</dictionaryConceptPairs>
<rareWordConsumer>
<name>Term Consumer</name>
<implementationName>org.apache.ctakes.dictionary.lookup2.consumer.DefaultTermConsumer</implementationName>
<properties>
<property key="codingScheme" value="custom"/>
</properties>
</rareWordConsumer>
</lookupSpecification>
*=======*
*3. personName.xml*
<?xml version="1.0" encoding="UTF-8"?>
<lookupSpecification>
<dictionaries>
<dictionary>
<name>LabAnnotatorTestDict</name>
<implementationName>org.apache.ctakes.raxactakes.dictionary.lookup2.dictionary.UmlsJdbcPersonDictionary</implementationName>
<properties>
<property key="jdbcDriver"
value="com.mysql.jdbc.Driver"/>
<property key="jdbcUrl"
value="jdbc:mysql://localhost:3306/aiunstructured?useUnicode=true&characterEncoding=utf8&useSSL=false&autoReconnect=true"/>
<property key="jdbcUser" value="root"/>
<property key="jdbcPass" value=""/>
<property key="umlsUrl" value="
https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser"/>
<property key="umlsVendor" value="NLM-6515182895"/>
<property key="umlsUser" value=""/>
<property key="umlsPass" value=""/>
<property key="rareWordTable" value="person_name"/>
</properties>
</dictionary>
</dictionaries>
<conceptFactories>
<conceptFactory>
<name>LabAnnotatorTestConcepts</name>
<implementationName>org.apache.ctakes.raxactakes.dictionary.lookup2.concept.UmlsJdbcPersonNameConceptFactory</implementationName>
<properties>
<property key="jdbcDriver"
value="com.mysql.jdbc.Driver"/>
<property key="jdbcUrl"
value="jdbc:mysql://localhost:3306/aiunstructured?useUnicode=true&characterEncoding=utf8&useSSL=false&autoReconnect=true"/>
<property key="jdbcUser" value="root"/>
<property key="jdbcPass" value=""/>
<property key="umlsUrl" value="
https://uts-ws.nlm.nih.gov/restful/isValidUMLSUser"/>
<property key="umlsVendor" value="NLM-6515182895"/>
<property key="umlsUser" value=""/>
<property key="umlsPass" value=""/>
<property key="tuiTable" value="tui"/>
</properties>
</conceptFactory>
</conceptFactories>
<dictionaryConceptPairs>
<dictionaryConceptPair>
<name>LabAnnotatorPair</name>
<dictionaryName>LabAnnotatorTestDict</dictionaryName>
<conceptFactoryName>LabAnnotatorTestConcepts</conceptFactoryName>
</dictionaryConceptPair>
</dictionaryConceptPairs>
<rareWordConsumer>
<name>Term Consumer</name>
<implementationName>org.apache.ctakes.dictionary.lookup2.consumer.DefaultTermConsumer</implementationName>
<properties>
<property key="codingScheme" value="custom"/>
</properties>
</rareWordConsumer>
</lookupSpecification>
*RaxaDefaultJcasTermAnnotator* is similar to the
org.apache.ctakes.dictionary.lookup2.ae.*DefaultJCasTermAnnotator* , I have
only changed the value of _minimumLookupSpan (to 1) variable
of AbstractJCasTermAnnotator.
On Thu, Feb 21, 2019 at 11:41 AM Finan, Sean <
[email protected]> wrote:
> Hi Zakir,
>
> In order for me to help you, I need to know more about:
> Your primary dictionary:
> LookupXml=org/apache/ctakes/dictionary/lookup/fast/tinyDictSpec.xml
>
> Your custom dictionary lookup #1:
> add
> org.apache.ctakes.raxactakes.dictionary.lookup2.ae.RaxaJCasTermAnnotator
> LookupXml=org/apache/ctakes/dictionary/lookup/fast/drugConcept.xml
>
> Your custom dictionary lookup #2:
> add
> org.apache.ctakes.raxactakes.dictionary.lookup2.ae.RaxaJCasTermAnnotator
> LookupXml=org/apache/ctakes/dictionary/lookup/fast/personName.xml
>
>
> As for your metrics,
> >For lookup span
> value of 3 (default), rest call was taking less than 2s for text like (
> Systolic blood pressure 180 ) is now taking around 5s.
>
> Does this mean that a document containing such text took 2 seconds, or
> that averaging over discovered annotations per took 2 seconds?
>
> I realize that moving from 3 characters to 1 means that every "a" "to"
> "in" "of" "an" "1" "2" ... is used for lookup. However, that should not
> multiply the processing time *2.5
>
>
> I have to wonder if the non-ctakes
> org.apache.ctakes.raxactakes.dictionary.lookup2.ae
> .RaxaDefaultJCasTermAnnotator
> is doing something suspect.
>
>
> Sean
>
>
> ________________________________________
> From: Zakir Saifi <[email protected]>
> Sent: Thursday, February 21, 2019 12:18 AM
> To: [email protected]
> Subject: Making Ctakes Faster after Changing default lookup span value
> [EXTERNAL]
>
> Hi Everyone,
>
> I am using Ctakes for Structuring some clinical Text. In my clinical text,
> there are single characters word like *P 90 (Pulse 90) *etc. I want Ctakes
> to detect those. Since the default minimum span detected by Ctakes is 3.
> I was not able to detect these concepts. Therefore I have changed the Value
> of the _minimumLookupSpan to 1. Now I am able to detect the one character
> word using Ctakes after adding them to my Custom Dictionary.
>
> My Problem is that after changing the value of _minimumLookupSpan, ctakes
> has become slow.
> I am using Ctakes-web-Rest (Rest Service using Ctakes). For lookup span
> value of 3 (default), rest call was taking less than 2s for text like (
> Systolic blood pressure 180 ) is now taking around 5s.
>
> How can I make Ctakes faster?. Any configuration which helps to improve the
> performance without losing the current detection rate.
>
> Here is the content of my current Piper file.
>
> load DefaultFastPipeline
> add
> org.apache.ctakes.raxactakes.dictionary.lookup2.ae
> .RaxaDefaultJCasTermAnnotator
> LookupXml=org/apache/ctakes/dictionary/lookup/fast/tinyDictSpec.xml
> add LabValueFinder
> add
> org.apache.ctakes.raxactakes.dictionary.lookup2.ae.RaxaJCasTermAnnotator
> LookupXml=org/apache/ctakes/dictionary/lookup/fast/drugConcept.xml
> add org.apache.ctakes.drugner.ae.DrugMentionAnnotator
>
> STATUS_BOUNDARY_ANN_TYPE="org.apache.ctakes.typesystem.type.textsem.MedicationMention"
> add
> org.apache.ctakes.raxactakes.dictionary.lookup2.ae.RaxaJCasTermAnnotator
> LookupXml=org/apache/ctakes/dictionary/lookup/fast/personName.xml
> add org.apache.ctakes.raxactakes.core.ae.PersonNameFinder
>
> addDescription EventAnnotator
> addLogged BackwardsTimeAnnotator
> classifierJarPath=/org/apache/ctakes/temporal/ae/timeannotator/model.jar
> addLogged DocTimeRelAnnotator
> classifierJarPath=/org/apache/ctakes/temporal/ae/doctimerel/model.jar
> addLogged EventTimeRelationAnnotator
> classifierJarPath=/org/apache/ctakes/temporal/ae/eventtime/model.jar
> addLogged EventEventRelationAnnotator
> classifierJarPath=/org/apache/ctakes/temporal/ae/eventevent/model.jar
> addLogged ContextualModalityAnnotator
>
> classifierJarPath=/org/apache/ctakes/temporal/ae/contextualmodality/model.jar
> addLogged EventAnnotator
> classifierJarPath=/org/apache/ctakes/temporal/ae/eventannotator/model.jar
>
> --
> Regards
> Zakir Saifi
> (Software Developer at Raxa)
>
--
Regards
Zakir Saifi
(Software Developer at Raxa)