This is an automated email from the ASF dual-hosted git repository. rec pushed a commit to branch feature/215-Improve-descriptor-scanning-performance-when-there-are-many-classloaders in repository https://gitbox.apache.org/repos/asf/uima-uimafit.git
commit 3196606a57e0fadf8ac49747797db2bc2f4cbda9 Author: Richard Eckart de Castilho <r...@apache.org> AuthorDate: Wed Nov 23 09:26:34 2022 +0100 Issue #215: Improve descriptor scanning performance when there are many classloaders - Implement a per-location caching of pre-parsed and pre-resolved type system descriptions --- .../fit/factory/TypeSystemDescriptionFactory.java | 50 +++++++++++++++++++--- .../org/apache/uima/fit/ComponentTestBase.java | 4 -- .../factory/TypeSystemDescriptionFactoryTest.java | 6 ++- 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java b/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java index 74de8fb..1b611d8 100644 --- a/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java +++ b/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java @@ -24,7 +24,9 @@ import static org.apache.uima.util.CasCreationUtils.mergeTypeSystems; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.ServiceLoader; import java.util.WeakHashMap; @@ -51,11 +53,16 @@ public final class TypeSystemDescriptionFactory { private static final Object CREATE_LOCK = new Object(); + private static final TypeSystemDescription PLACEHOLDER = new TypeSystemDescription_impl(); + + private static WeakHashMap<String, TypeSystemDescription> typeDescriptors; + private static WeakHashMap<ClassLoader, String[]> typeDescriptorLocationsByClassloader; private static WeakHashMap<ClassLoader, TypeSystemDescription> typeDescriptorByClassloader; static { + typeDescriptors = new WeakHashMap<>(); typeDescriptorLocationsByClassloader = new WeakHashMap<>(); typeDescriptorByClassloader = new WeakHashMap<>(); } @@ -124,13 +131,13 @@ public final class TypeSystemDescriptionFactory { TypeSystemDescription tsd = typeDescriptorByClassloader.get(cl); if (tsd == null) { synchronized (CREATE_LOCK) { + ResourceManager resMgr = ResourceManagerFactory.newResourceManager(); List<TypeSystemDescription> tsdList = new ArrayList<>(); - loadTypeSystemDescriptionsFromScannedLocations(tsdList); + loadTypeSystemDescriptionsFromScannedLocations(tsdList, resMgr); loadTypeSystemDescriptionsFromSPIs(tsdList); LOG.trace("Merging type systems and resolving imports..."); - ResourceManager resMgr = ResourceManagerFactory.newResourceManager(); tsd = mergeTypeSystems(tsdList, resMgr); typeDescriptorByClassloader.put(cl, tsd); } @@ -138,12 +145,20 @@ public final class TypeSystemDescriptionFactory { return (TypeSystemDescription) tsd.clone(); } - static void loadTypeSystemDescriptionsFromScannedLocations(List<TypeSystemDescription> tsdList) - throws ResourceInitializationException { + static void loadTypeSystemDescriptionsFromScannedLocations(List<TypeSystemDescription> tsdList, + ResourceManager aResMgr) throws ResourceInitializationException { for (String location : scanTypeDescriptors()) { try { - XMLInputSource xmlInputType1 = new XMLInputSource(location); - tsdList.add(getXMLParser().parseTypeSystemDescription(xmlInputType1)); + TypeSystemDescription description = typeDescriptors.get(location); + + if (description == PLACEHOLDER) { + // If the description has not yet been loaded, load it + description = getXMLParser().parseTypeSystemDescription(new XMLInputSource(location)); + description.resolveImports(aResMgr); + typeDescriptors.put(location, description); + } + + tsdList.add(description); LOG.debug("Detected type system at [{}]", location); } catch (IOException e) { throw new ResourceInitializationException(e); @@ -178,14 +193,36 @@ public final class TypeSystemDescriptionFactory { synchronized (SCAN_LOCK) { ClassLoader cl = ClassLoaderUtils.findClassloader(); String[] typeDescriptorLocations = typeDescriptorLocationsByClassloader.get(cl); + if (typeDescriptorLocations == null) { typeDescriptorLocations = scanDescriptors(MetaDataType.TYPE_SYSTEM); + + internTypeDescriptorLocations(typeDescriptorLocations); + typeDescriptorLocationsByClassloader.put(cl, typeDescriptorLocations); } + return typeDescriptorLocations; } } + private static void internTypeDescriptorLocations(String[] typeDescriptorLocations) { + // We "intern" the location strings because we will use them as keys in the WeakHashMap + // caching the parsed type systems. As part of this process, we put a PLACEHOLDER into the + // map which is replaced when the type system is actually loaded + Map<String, String> locationStrings = new HashMap<>(); + typeDescriptors.keySet().stream().forEach(loc -> locationStrings.put(loc, loc)); + for (int i = 0; i < typeDescriptorLocations.length; i++) { + String existingLocString = locationStrings.get(typeDescriptorLocations[i]); + if (existingLocString == null) { + typeDescriptors.put(typeDescriptorLocations[i], PLACEHOLDER); + locationStrings.put(typeDescriptorLocations[i], typeDescriptorLocations[i]); + } else { + typeDescriptorLocations[i] = existingLocString; + } + } + } + /** * Force rescan of type descriptors. The next call to {@link #scanTypeDescriptors()} will rescan * all auto-import locations. @@ -195,6 +232,7 @@ public final class TypeSystemDescriptionFactory { synchronized (SCAN_LOCK) { typeDescriptorLocationsByClassloader.clear(); typeDescriptorByClassloader.clear(); + typeDescriptors.clear(); } } } diff --git a/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java b/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java index f747d4a..13dc3cd 100644 --- a/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java +++ b/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java @@ -29,10 +29,6 @@ import org.apache.uima.resource.metadata.TypeSystemDescription; import org.apache.uima.util.CasCreationUtils; import org.junit.jupiter.api.BeforeEach; -/** - * - * - */ public class ComponentTestBase { private static ThreadLocal<JCas> JCAS = new ThreadLocal<JCas>(); diff --git a/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java b/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java index 052c36b..e7768e4 100644 --- a/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java +++ b/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java @@ -27,9 +27,11 @@ import java.util.ArrayList; import java.util.List; import org.apache.uima.fit.factory.spi.TypeSystemDescriptionProviderForTesting; +import org.apache.uima.fit.internal.ResourceManagerFactory; import org.apache.uima.fit.type.AnalyzedText; import org.apache.uima.fit.type.Sentence; import org.apache.uima.fit.type.Token; +import org.apache.uima.resource.ResourceManager; import org.apache.uima.resource.metadata.TypeSystemDescription; import org.apache.uima.util.CasCreationUtils; import org.junit.jupiter.api.Test; @@ -54,8 +56,10 @@ public class TypeSystemDescriptionFactoryTest { @Test public void testLoadingFromScannedLocations() throws Exception { + ResourceManager resMgr = ResourceManagerFactory.newResourceManager(); + List<TypeSystemDescription> tsds = new ArrayList<>(); - loadTypeSystemDescriptionsFromScannedLocations(tsds); + loadTypeSystemDescriptionsFromScannedLocations(tsds, resMgr); TypeSystemDescription tsd = CasCreationUtils.mergeTypeSystems(tsds); assertNotNull(tsd.getType(Token.class.getName()));