Xikui Wang has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/1416
Change subject: WIP - Load parser from classpath in ParserFactoryProvider ...................................................................... WIP - Load parser from classpath in ParserFactoryProvider Change the behavior of ParserFactoryProvider to allow it loads parser from classpath. Change-Id: I2ac039fe3daaf0636cf004289bd0c8a3229197a9 --- M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java A asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory M asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java M asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java A asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java 12 files changed, 175 insertions(+), 24 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/16/1416/1 diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java index 1fc97c9..3dd3903 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/api/IDataParserFactory.java @@ -58,4 +58,6 @@ * @param metaType */ public void setMetaType(ARecordType metaType); + + public String[] getFormats(); } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java index efc9574..b0a1db2 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/ADMDataParserFactory.java @@ -30,6 +30,7 @@ public class ADMDataParserFactory extends AbstractRecordStreamParserFactory<char[]> { private static final long serialVersionUID = 1L; + private static String[] formats = { "adm", "json", "semi-structured" }; @Override public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext ctx) { @@ -56,4 +57,9 @@ public void setMetaType(ARecordType metaType) { } + @Override + public String[] getFormats() { + return formats; + } + } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java index f724b48..a502457 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java @@ -34,6 +34,7 @@ public class DelimitedDataParserFactory extends AbstractRecordStreamParserFactory<char[]> { private static final long serialVersionUID = 1L; + private static String[] formats = { "csv", "delimited-text" }; @Override public IRecordDataParser<char[]> createRecordParser(IHyracksTaskContext ctx) throws HyracksDataException { @@ -97,4 +98,9 @@ public void setMetaType(ARecordType metaType) { } + @Override + public String[] getFormats() { + return formats; + } + } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java index a4c8679..796eb9d 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/HiveDataParserFactory.java @@ -34,6 +34,7 @@ public class HiveDataParserFactory implements IRecordDataParserFactory<Writable> { private static final long serialVersionUID = 1L; + private static String[] formats = { "hive", "hive-parser"}; private Map<String, String> configuration; private ARecordType recordType; private String hiveSerdeClassName; @@ -71,4 +72,8 @@ public void setMetaType(ARecordType metaType) { } + @Override public String[] getFormats() { + return formats; + } + } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java index 7465455..25308f2 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RSSParserFactory.java @@ -31,6 +31,7 @@ public class RSSParserFactory implements IRecordDataParserFactory<SyndEntryImpl> { private static final long serialVersionUID = 1L; + private static String[] formats = { "rss" }; private ARecordType recordType; @Override @@ -58,4 +59,9 @@ public void setMetaType(ARecordType metaType) { } + @Override + public String[] getFormats() { + return formats; + } + } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java index 96c592a..af9aab9 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/RecordWithMetadataParserFactory.java @@ -37,6 +37,7 @@ public class RecordWithMetadataParserFactory<I, O> implements IRecordDataParserFactory<I> { private static final long serialVersionUID = 1L; + private static String[] formats = { "record-with-metadata" }; private ARecordType metaType; private ARecordType recordType; private IRecordDataParserFactory<O> recordParserFactory; @@ -82,6 +83,11 @@ } @Override + public String[] getFormats() { + return formats; + } + + @Override public Class<?> getRecordClass() { return converterFactory.getInputClass(); } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java index 3539f6e..771f56a 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/TweetParserFactory.java @@ -31,6 +31,7 @@ public class TweetParserFactory implements IRecordDataParserFactory<String> { private static final long serialVersionUID = 1L; + private static String[] formats = { "twitter-status" }; private ARecordType recordType; @Override @@ -59,4 +60,9 @@ // do nothing } + @Override + public String[] getFormats() { + return formats; + } + } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java index ebe3276..cbcbded 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/provider/ParserFactoryProvider.java @@ -18,6 +18,13 @@ */ package org.apache.asterix.external.provider; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.nio.charset.Charset; +import java.util.Collections; +import java.util.Enumeration; +import java.util.HashMap; import java.util.Map; import org.apache.asterix.common.exceptions.AsterixException; @@ -31,8 +38,15 @@ import org.apache.asterix.external.parser.factory.TweetParserFactory; import org.apache.asterix.external.util.ExternalDataConstants; import org.apache.asterix.external.util.ExternalDataUtils; +import org.apache.commons.io.IOUtils; +import org.codehaus.jettison.json.JSONArray; +import org.codehaus.jettison.json.JSONException; +import org.codehaus.jettison.json.JSONObject; public class ParserFactoryProvider { + + private static final String RESOURCE = "META-INF/services/org.apache.asterix.external.api.IDataParserFactory"; + private static Map<String, Class> factories = null; private ParserFactoryProvider() { } @@ -54,32 +68,60 @@ return parserFactory; } + protected static IDataParserFactory getInstance(Class clazz) throws AsterixException { + try { + return (IDataParserFactory) clazz.newInstance(); + } catch (IllegalAccessException | InstantiationException | ClassCastException e) { + throw new AsterixException("Cannot create: " + clazz.getSimpleName(), e); + } + } + @SuppressWarnings("rawtypes") public static IDataParserFactory getDataParserFactory(String parser) throws AsterixException { - switch (parser) { - case ExternalDataConstants.FORMAT_ADM: - case ExternalDataConstants.FORMAT_JSON: - case ExternalDataConstants.FORMAT_SEMISTRUCTURED: - return new ADMDataParserFactory(); - case ExternalDataConstants.FORMAT_DELIMITED_TEXT: - case ExternalDataConstants.FORMAT_CSV: - return new DelimitedDataParserFactory(); - case ExternalDataConstants.FORMAT_HIVE: - case ExternalDataConstants.PARSER_HIVE: - return new HiveDataParserFactory(); - case ExternalDataConstants.FORMAT_TWEET: - return new TweetParserFactory(); - case ExternalDataConstants.FORMAT_RSS: - return new RSSParserFactory(); - case ExternalDataConstants.FORMAT_RECORD_WITH_METADATA: - return new RecordWithMetadataParserFactory(); - default: - try { - return (IDataParserFactory) Class.forName(parser).newInstance(); - } catch (IllegalAccessException | ClassNotFoundException | InstantiationException - | ClassCastException e) { - throw new AsterixException("Unknown format: " + parser, e); - } + + if (factories == null) { + factories = initFactories(); } + + if (factories.containsKey(parser)) { + return getInstance(factories.get(parser)); + } + + try { + // ideally, this should not happen + return (IDataParserFactory) Class.forName(parser).newInstance(); + } catch (IllegalAccessException | ClassNotFoundException | InstantiationException | ClassCastException e) { + throw new AsterixException("Unknown format: " + parser, e); + } + } + + protected static Map<String, Class> initFactories() throws AsterixException { + Map<String, Class> factories = new HashMap<>(); + ClassLoader cl = ParserFactoryProvider.class.getClassLoader(); + final Charset encoding = Charset.forName("UTF-8"); + try { + Enumeration<URL> urls = cl.getResources(RESOURCE); + for (URL url : Collections.list(urls)) { + System.out.println(url); + InputStream is = url.openStream(); + String config = IOUtils.toString(is, encoding); + is.close(); + String[] classNames = config.split("\n"); + for (String className : classNames) { + final Class<?> clazz = Class.forName(className); + String[] formats = ((IDataParserFactory) clazz.newInstance()).getFormats(); + for (String format : formats) { + if (factories.containsKey(format)) { + throw new AsterixException("Duplicate format " + format); + } + factories.put(format, clazz); + } + } + } + } catch (IOException | ClassNotFoundException | InstantiationException + | IllegalAccessException e) { + throw new AsterixException(e); + } + return factories; } } diff --git a/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory b/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory new file mode 100644 index 0000000..840f619 --- /dev/null +++ b/asterixdb/asterix-external-data/src/main/resources/META-INF/services/org.apache.asterix.external.api.IDataParserFactory @@ -0,0 +1,6 @@ +org.apache.asterix.external.parser.factory.ADMDataParserFactory +org.apache.asterix.external.parser.factory.DelimitedDataParserFactory +org.apache.asterix.external.parser.factory.HiveDataParserFactory +org.apache.asterix.external.parser.factory.RecordWithMetadataParserFactory +org.apache.asterix.external.parser.factory.RSSParserFactory +org.apache.asterix.external.parser.factory.TweetParserFactory \ No newline at end of file diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java index 17b83c2..d8cc3bb 100644 --- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java +++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/library/ClassAdParserFactory.java @@ -34,6 +34,7 @@ public class ClassAdParserFactory implements IRecordDataParserFactory<char[]> { private static final long serialVersionUID = 1L; + private static final String[] formats = { "line-separated" }; public static final String KEY_OLD_FORMAT = "old-format"; public static final String KEY_EVALUATE = "evaluate"; public static final String KEY_KEEP_EXPR = "keep-expr"; @@ -121,4 +122,9 @@ public void setMetaType(ARecordType metaType) { } + @Override + public String[] getFormats() { + return formats; + } + } diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java index 5b23094..91919d1 100644 --- a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java +++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/factory/TestRecordWithPKParserFactory.java @@ -18,6 +18,7 @@ */ package org.apache.asterix.external.parser.factory; +import java.util.ArrayList; import java.util.Map; import java.util.TreeMap; @@ -37,6 +38,7 @@ public class TestRecordWithPKParserFactory<T> implements IRecordDataParserFactory<RecordWithPK<T>> { private static final long serialVersionUID = 1L; + private static final ArrayList<String> formats = new ArrayList<>(); private ARecordType recordType; private IRecordDataParserFactory<char[]> recordParserFactory; private String format; @@ -49,6 +51,7 @@ public void configure(Map<String, String> configuration) throws AsterixException { TreeMap<String, String> parserConf = new TreeMap<String, String>(); format = configuration.get(ExternalDataConstants.KEY_RECORD_FORMAT); + formats.add(format); parserConf.put(ExternalDataConstants.KEY_FORMAT, format); recordParserFactory = (IRecordDataParserFactory<char[]>) ParserFactoryProvider.getDataParserFactory(null, parserConf); @@ -75,4 +78,10 @@ @Override public void setMetaType(ARecordType metaType) { } + + @Override + public String[] getFormats() { + return (String[]) formats.toArray(); + } + } diff --git a/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java new file mode 100644 index 0000000..effb7cd --- /dev/null +++ b/asterixdb/asterix-external-data/src/test/java/org/apache/asterix/external/parser/test/ParserFactoryProviderLoadParserTest.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.external.parser.test; + +import org.apache.asterix.common.exceptions.AsterixException; +import org.apache.asterix.external.api.IDataParserFactory; +import org.apache.asterix.external.parser.factory.ADMDataParserFactory; +import org.apache.asterix.external.parser.factory.DelimitedDataParserFactory; +import org.apache.asterix.external.parser.factory.HiveDataParserFactory; +import org.apache.asterix.external.parser.factory.RSSParserFactory; +import org.apache.asterix.external.parser.factory.TweetParserFactory; +import org.apache.asterix.external.provider.ParserFactoryProvider; +import org.junit.Assert; +import org.junit.Test; + + +public class ParserFactoryProviderLoadParserTest { + + IDataParserFactory factory; + @Test + public void test() throws AsterixException { + boolean result = true; + factory = ParserFactoryProvider.getDataParserFactory("csv"); + result = result && factory instanceof DelimitedDataParserFactory; + factory = ParserFactoryProvider.getDataParserFactory("adm"); + result = result && factory instanceof ADMDataParserFactory; + factory = ParserFactoryProvider.getDataParserFactory("rss"); + result = result && factory instanceof RSSParserFactory; + factory = ParserFactoryProvider.getDataParserFactory("hive"); + result = result && factory instanceof HiveDataParserFactory; + factory = ParserFactoryProvider.getDataParserFactory("twitter-status"); + result = result && factory instanceof TweetParserFactory; + Assert.assertTrue(result); + } +} -- To view, visit https://asterix-gerrit.ics.uci.edu/1416 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I2ac039fe3daaf0636cf004289bd0c8a3229197a9 Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Xikui Wang <xkk...@gmail.com>