This is an automated email from the ASF dual-hosted git repository.

rec pushed a commit to branch 
feature/215-Improve-descriptor-scanning-performance-when-there-are-many-classloaders
in repository https://gitbox.apache.org/repos/asf/uima-uimafit.git

commit 3196606a57e0fadf8ac49747797db2bc2f4cbda9
Author: Richard Eckart de Castilho <r...@apache.org>
AuthorDate: Wed Nov 23 09:26:34 2022 +0100

    Issue #215: Improve descriptor scanning performance when there are many 
classloaders
    
    - Implement a per-location caching of pre-parsed and pre-resolved type 
system descriptions
---
 .../fit/factory/TypeSystemDescriptionFactory.java  | 50 +++++++++++++++++++---
 .../org/apache/uima/fit/ComponentTestBase.java     |  4 --
 .../factory/TypeSystemDescriptionFactoryTest.java  |  6 ++-
 3 files changed, 49 insertions(+), 11 deletions(-)

diff --git 
a/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java
 
b/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java
index 74de8fb..1b611d8 100644
--- 
a/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java
+++ 
b/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java
@@ -24,7 +24,9 @@ import static 
org.apache.uima.util.CasCreationUtils.mergeTypeSystems;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.ServiceLoader;
 import java.util.WeakHashMap;
 
@@ -51,11 +53,16 @@ public final class TypeSystemDescriptionFactory {
 
   private static final Object CREATE_LOCK = new Object();
 
+  private static final TypeSystemDescription PLACEHOLDER = new 
TypeSystemDescription_impl();
+
+  private static WeakHashMap<String, TypeSystemDescription> typeDescriptors;
+
   private static WeakHashMap<ClassLoader, String[]> 
typeDescriptorLocationsByClassloader;
 
   private static WeakHashMap<ClassLoader, TypeSystemDescription> 
typeDescriptorByClassloader;
 
   static {
+    typeDescriptors = new WeakHashMap<>();
     typeDescriptorLocationsByClassloader = new WeakHashMap<>();
     typeDescriptorByClassloader = new WeakHashMap<>();
   }
@@ -124,13 +131,13 @@ public final class TypeSystemDescriptionFactory {
     TypeSystemDescription tsd = typeDescriptorByClassloader.get(cl);
     if (tsd == null) {
       synchronized (CREATE_LOCK) {
+        ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
         List<TypeSystemDescription> tsdList = new ArrayList<>();
 
-        loadTypeSystemDescriptionsFromScannedLocations(tsdList);
+        loadTypeSystemDescriptionsFromScannedLocations(tsdList, resMgr);
         loadTypeSystemDescriptionsFromSPIs(tsdList);
 
         LOG.trace("Merging type systems and resolving imports...");
-        ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
         tsd = mergeTypeSystems(tsdList, resMgr);
         typeDescriptorByClassloader.put(cl, tsd);
       }
@@ -138,12 +145,20 @@ public final class TypeSystemDescriptionFactory {
     return (TypeSystemDescription) tsd.clone();
   }
 
-  static void 
loadTypeSystemDescriptionsFromScannedLocations(List<TypeSystemDescription> 
tsdList)
-          throws ResourceInitializationException {
+  static void 
loadTypeSystemDescriptionsFromScannedLocations(List<TypeSystemDescription> 
tsdList,
+          ResourceManager aResMgr) throws ResourceInitializationException {
     for (String location : scanTypeDescriptors()) {
       try {
-        XMLInputSource xmlInputType1 = new XMLInputSource(location);
-        tsdList.add(getXMLParser().parseTypeSystemDescription(xmlInputType1));
+        TypeSystemDescription description = typeDescriptors.get(location);
+
+        if (description == PLACEHOLDER) {
+          // If the description has not yet been loaded, load it
+          description = getXMLParser().parseTypeSystemDescription(new 
XMLInputSource(location));
+          description.resolveImports(aResMgr);
+          typeDescriptors.put(location, description);
+        }
+
+        tsdList.add(description);
         LOG.debug("Detected type system at [{}]", location);
       } catch (IOException e) {
         throw new ResourceInitializationException(e);
@@ -178,14 +193,36 @@ public final class TypeSystemDescriptionFactory {
     synchronized (SCAN_LOCK) {
       ClassLoader cl = ClassLoaderUtils.findClassloader();
       String[] typeDescriptorLocations = 
typeDescriptorLocationsByClassloader.get(cl);
+
       if (typeDescriptorLocations == null) {
         typeDescriptorLocations = scanDescriptors(MetaDataType.TYPE_SYSTEM);
+
+        internTypeDescriptorLocations(typeDescriptorLocations);
+
         typeDescriptorLocationsByClassloader.put(cl, typeDescriptorLocations);
       }
+
       return typeDescriptorLocations;
     }
   }
 
+  private static void internTypeDescriptorLocations(String[] 
typeDescriptorLocations) {
+    // We "intern" the location strings because we will use them as keys in 
the WeakHashMap
+    // caching the parsed type systems. As part of this process, we put a 
PLACEHOLDER into the
+    // map which is replaced when the type system is actually loaded
+    Map<String, String> locationStrings = new HashMap<>();
+    typeDescriptors.keySet().stream().forEach(loc -> locationStrings.put(loc, 
loc));
+    for (int i = 0; i < typeDescriptorLocations.length; i++) {
+      String existingLocString = 
locationStrings.get(typeDescriptorLocations[i]);
+      if (existingLocString == null) {
+        typeDescriptors.put(typeDescriptorLocations[i], PLACEHOLDER);
+        locationStrings.put(typeDescriptorLocations[i], 
typeDescriptorLocations[i]);
+      } else {
+        typeDescriptorLocations[i] = existingLocString;
+      }
+    }
+  }
+
   /**
    * Force rescan of type descriptors. The next call to {@link 
#scanTypeDescriptors()} will rescan
    * all auto-import locations.
@@ -195,6 +232,7 @@ public final class TypeSystemDescriptionFactory {
     synchronized (SCAN_LOCK) {
       typeDescriptorLocationsByClassloader.clear();
       typeDescriptorByClassloader.clear();
+      typeDescriptors.clear();
     }
   }
 }
diff --git 
a/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java 
b/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java
index f747d4a..13dc3cd 100644
--- a/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java
+++ b/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java
@@ -29,10 +29,6 @@ import 
org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.apache.uima.util.CasCreationUtils;
 import org.junit.jupiter.api.BeforeEach;
 
-/**
- * 
- * 
- */
 public class ComponentTestBase {
 
   private static ThreadLocal<JCas> JCAS = new ThreadLocal<JCas>();
diff --git 
a/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java
 
b/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java
index 052c36b..e7768e4 100644
--- 
a/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java
+++ 
b/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java
@@ -27,9 +27,11 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.uima.fit.factory.spi.TypeSystemDescriptionProviderForTesting;
+import org.apache.uima.fit.internal.ResourceManagerFactory;
 import org.apache.uima.fit.type.AnalyzedText;
 import org.apache.uima.fit.type.Sentence;
 import org.apache.uima.fit.type.Token;
+import org.apache.uima.resource.ResourceManager;
 import org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.apache.uima.util.CasCreationUtils;
 import org.junit.jupiter.api.Test;
@@ -54,8 +56,10 @@ public class TypeSystemDescriptionFactoryTest {
 
   @Test
   public void testLoadingFromScannedLocations() throws Exception {
+    ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
+
     List<TypeSystemDescription> tsds = new ArrayList<>();
-    loadTypeSystemDescriptionsFromScannedLocations(tsds);
+    loadTypeSystemDescriptionsFromScannedLocations(tsds, resMgr);
     TypeSystemDescription tsd = CasCreationUtils.mergeTypeSystems(tsds);
 
     assertNotNull(tsd.getType(Token.class.getName()));

Reply via email to