This is an automated email from the ASF dual-hosted git repository. imaxon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 934ce9d903d36990d22febd69e08a81d0c8e40b2 Author: Rui Guo <ru...@uci.edu> AuthorDate: Tue Apr 14 12:17:13 2020 -0700 [NO ISSUE] Rename tokenizer factories Rename the variables to make things clear. Previously, tokenizer factories were named tokenizer which could be confusing. Change-Id: Ie72d420e18509489d0fc0f9c98b162202a62be55 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/5804 Contrib: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Reviewed-by: Ian Maxon <ima...@uci.edu> --- .../nontagged/BinaryTokenizerFactoryProvider.java | 27 ++++++++++++---------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryTokenizerFactoryProvider.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryTokenizerFactoryProvider.java index 5ef7702..6333e92 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryTokenizerFactoryProvider.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryTokenizerFactoryProvider.java @@ -30,26 +30,29 @@ import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.NGramUTF8Strin import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.UTF8NGramTokenFactory; import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.UTF8WordTokenFactory; +// ToDo: maybe we can make the constructor method of the tokenizers private so that tokenizers have to be generated via this provider +// Currently, different call sites of tokenizers use **different parameters**, and this can be error-prone +// A centralized provider can avoid the bugs due to different parameters. public class BinaryTokenizerFactoryProvider implements IBinaryTokenizerFactoryProvider { public static final BinaryTokenizerFactoryProvider INSTANCE = new BinaryTokenizerFactoryProvider(); - private static final IBinaryTokenizerFactory aqlStringTokenizer = + private static final IBinaryTokenizerFactory stringTokenizerFactory = new DelimitedUTF8StringBinaryTokenizerFactory(true, true, new UTF8WordTokenFactory(ATypeTag.SERIALIZED_STRING_TYPE_TAG, ATypeTag.SERIALIZED_INT32_TYPE_TAG)); - private static final IBinaryTokenizerFactory aqlStringNoTypeTagTokenizer = + private static final IBinaryTokenizerFactory stringNoTypeTagTokenizerFactory = new DelimitedUTF8StringBinaryTokenizerFactory(true, false, new UTF8WordTokenFactory(ATypeTag.STRING.serialize(), ATypeTag.INTEGER.serialize())); - private static final IBinaryTokenizerFactory aqlHashingStringTokenizer = + private static final IBinaryTokenizerFactory stringHashingTokenizerFactory = new DelimitedUTF8StringBinaryTokenizerFactory(true, true, new HashedUTF8WordTokenFactory( ATypeTag.SERIALIZED_INT32_TYPE_TAG, ATypeTag.SERIALIZED_INT32_TYPE_TAG)); - private static final IBinaryTokenizerFactory orderedListTokenizer = + private static final IBinaryTokenizerFactory orderedListTokenizerFactory = new AOrderedListBinaryTokenizerFactory(new AListElementTokenFactory()); - private static final IBinaryTokenizerFactory unorderedListTokenizer = + private static final IBinaryTokenizerFactory unorderedListTokenizerFactory = new AUnorderedListBinaryTokenizerFactory(new AListElementTokenFactory()); @Override @@ -58,16 +61,16 @@ public class BinaryTokenizerFactoryProvider implements IBinaryTokenizerFactoryPr switch (typeTag) { case STRING: if (hashedTokens) { - return aqlHashingStringTokenizer; + return stringHashingTokenizerFactory; } else if (!typeTageAlreadyRemoved) { - return aqlStringTokenizer; + return stringTokenizerFactory; } else { - return aqlStringNoTypeTagTokenizer; + return stringNoTypeTagTokenizerFactory; } case ARRAY: - return orderedListTokenizer; + return orderedListTokenizerFactory; case MULTISET: - return unorderedListTokenizer; + return unorderedListTokenizerFactory; default: return null; } @@ -86,9 +89,9 @@ public class BinaryTokenizerFactoryProvider implements IBinaryTokenizerFactoryPr ATypeTag.SERIALIZED_INT32_TYPE_TAG)); } case ARRAY: - return orderedListTokenizer; + return orderedListTokenizerFactory; case MULTISET: - return unorderedListTokenizer; + return unorderedListTokenizerFactory; default: return null; }