This is an automated email from the ASF dual-hosted git repository.

airborne pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


    from 25496122781 [ci](perf) adjust restart doris wait time to 5m (#52181)
     add 7b4d330af5c [feature](inverted index) add custom analyzer (#50143)

No new revisions were added by this update.

Summary of changes:
 be/src/agent/agent_server.cpp                      |   13 +
 be/src/agent/task_worker_pool.cpp                  |   27 +
 be/src/agent/task_worker_pool.h                    |    4 +
 be/src/common/config.cpp                           |    2 +
 be/src/common/config.h                             |    2 +
 be/src/olap/inverted_index_parser.cpp              |    9 +
 be/src/olap/inverted_index_parser.h                |    7 +
 be/src/olap/rowset/segment_v2/column_reader.cpp    |    8 +-
 ...unction_query.h => abstract_analysis_factory.h} |   27 +-
 .../inverted_index/analysis_factory_mgr.cpp        |   75 +
 .../inverted_index/analysis_factory_mgr.h          |   51 +
 .../inverted_index/analyzer/analyzer.cpp           |  148 +-
 .../segment_v2/inverted_index/analyzer/analyzer.h  |   19 +-
 .../analyzer/basic/basic_tokenizer.cpp             |    3 +-
 .../inverted_index/analyzer/custom_analyzer.cpp    |  108 ++
 .../inverted_index/analyzer/custom_analyzer.h      |   86 +
 .../analyzer/custom_analyzer_config.cpp            |   62 +
 .../analyzer/custom_analyzer_config.h              |   76 +
 .../inverted_index/query/conjunction_query.cpp     |   50 +-
 .../inverted_index/query/conjunction_query.h       |    6 +-
 .../inverted_index/query/disjunction_query.cpp     |   24 +-
 .../inverted_index/query/disjunction_query.h       |    2 +-
 .../inverted_index/query/phrase_edge_query.cpp     |   20 +-
 .../inverted_index/query/phrase_edge_query.h       |    2 +-
 .../inverted_index/query/phrase_prefix_query.cpp   |   41 +-
 .../inverted_index/query/phrase_query.cpp          |  152 +-
 .../segment_v2/inverted_index/query/phrase_query.h |    9 +-
 .../phrase_query/ordered_sloppy_phrase_matcher.cpp |   15 +-
 .../inverted_index/query/prefix_query.cpp          |   30 +-
 .../segment_v2/inverted_index/query/prefix_query.h |    5 +-
 .../rowset/segment_v2/inverted_index/query/query.h |   19 +-
 .../segment_v2/inverted_index/query/query_info.h   |   58 +
 .../inverted_index/query/regexp_query.cpp          |    8 +-
 .../inverted_index/query_v2/term_query.cpp         |    7 +-
 .../inverted_index/query_v2/term_query.h           |   10 +-
 .../rowset/segment_v2/inverted_index/setting.h     |  136 ++
 .../token_filter/ascii_folding_filter.cpp          | 2020 ++++++++++++++++++++
 .../ascii_folding_filter.h}                        |   35 +-
 .../ascii_folding_filter_factory.h}                |   32 +-
 .../token_filter/lower_case_filter.h               |   84 +
 .../lower_case_filter_factory.h}                   |   34 +-
 .../token_filter.h}                                |   28 +-
 .../token_filter_factory.h}                        |   28 +-
 .../token_filter/word_delimiter_filter.cpp         |  249 +++
 .../token_filter/word_delimiter_filter.h           |  143 ++
 .../token_filter/word_delimiter_filter_factory.h   |  201 ++
 .../token_filter/word_delimiter_iterator.cpp       |  279 +++
 .../token_filter/word_delimiter_iterator.h         |   88 +
 .../segment_v2/inverted_index/token_stream.h       |   60 +
 .../tokenizer/keyword/keyword_tokenizer.h          |   73 +
 .../keyword/keyword_tokenizer_factory.h}           |   33 +-
 .../inverted_index/tokenizer/ngram/char_matcher.h  |  110 ++
 .../ngram/edge_ngram_tokenizer.h}                  |   27 +-
 .../tokenizer/ngram/edge_ngram_tokenizer_factory.h |   65 +
 .../tokenizer/ngram/ngram_tokenizer.cpp            |  148 ++
 .../tokenizer/ngram/ngram_tokenizer.h              |   78 +
 .../tokenizer/ngram/ngram_tokenizer_factory.cpp    |   83 +
 .../tokenizer/ngram/ngram_tokenizer_factory.h      |   64 +
 .../tokenizer/standard/standard_tokenizer.h        |   81 +
 .../standard/standard_tokenizer_factory.h}         |   31 +-
 .../tokenizer/standard/standard_tokenizer_impl.cpp | 1982 +++++++++++++++++++
 .../tokenizer/standard/standard_tokenizer_impl.h   |  299 +++
 .../inverted_index/tokenizer/tokenizer.h           |   52 +
 .../tokenizer_factory.h}                           |   28 +-
 .../inverted_index/util/docid_set_iterator.h       |    2 +-
 .../segment_v2/inverted_index/util/term_iterator.h |   41 +-
 .../inverted_index/util/term_position_iterator.h   |   43 +-
 .../inverted_index/util/union_term_iterator.h      |   30 +-
 .../rowset/segment_v2/inverted_index_reader.cpp    |   19 +-
 .../rowset/segment_v2/inverted_index_writer.cpp    |   37 +-
 be/src/runtime/exec_env.h                          |    8 +
 be/src/runtime/exec_env_init.cpp                   |    5 +
 be/src/runtime/index_policy/index_policy_mgr.cpp   |  173 ++
 be/src/runtime/index_policy/index_policy_mgr.h     |   52 +
 be/src/vec/exprs/vmatch_predicate.cpp              |    1 +
 be/src/vec/exprs/vmatch_predicate.h                |    2 +-
 be/src/vec/functions/function_tokenize.cpp         |   97 +-
 be/src/vec/functions/function_tokenize.h           |    5 +-
 be/src/vec/functions/match.cpp                     |  104 +-
 be/src/vec/functions/match.h                       |   21 +-
 .../ananlyzer/custom_analyzer_test.cpp             |  357 ++++
 .../compaction/util/index_compaction_utils.cpp     |    4 +-
 .../query/phrase_edge_query_test.cpp               |   28 +-
 .../query/phrase_prefix_query_test.cpp             |   29 +-
 .../ordered_sloppy_phrase_matcher_test.cpp         |    2 +-
 .../inverted_index/query/phrase_query_test.cpp     |   70 +-
 .../segment_v2/inverted_index/setting_test.cpp     |  184 ++
 .../ascii_folding_filter_factory_test.cpp          |  215 +++
 .../lower_case_filter_factory_test.cpp             |  109 ++
 .../word_delimiter_filter_factory_test.cpp         |  147 ++
 .../token_filter/word_delimiter_filter_test.cpp    |  331 ++++
 .../token_filter/word_delimiter_iterator_test.cpp  |  267 +++
 .../tokenizer/edge_ngram_tokenizer_test.cpp        |  241 +++
 .../tokenizer/keyword_analyzer_test.cpp            |  147 ++
 .../tokenizer/ngram_tokenizer_test.cpp             |  205 ++
 .../tokenizer/standard_tokenizer_factory_test.cpp  |  149 ++
 .../util/union_term_iterator_test.cpp              |   39 +-
 .../runtime/index_policy/index_policy_mgr_test.cpp |  179 ++
 be/test/vec/function/function_match_test.cpp       |   10 +-
 ...lti_match.cpp => function_multi_match_test.cpp} |    0
 .../antlr4/org/apache/doris/nereids/DorisLexer.g4  |    3 +
 .../antlr4/org/apache/doris/nereids/DorisParser.g4 |   12 +
 fe/fe-core/src/main/cup/sql_parser.cup             |   46 +
 .../doris/analysis/CreateIndexPolicyStmt.java      |   97 +
 .../apache/doris/analysis/DropIndexPolicyStmt.java |   78 +
 .../apache/doris/analysis/InvertedIndexUtil.java   |   25 +-
 .../org/apache/doris/analysis/MatchPredicate.java  |    5 +
 .../apache/doris/analysis/ShowIndexPolicyStmt.java |   68 +
 .../main/java/org/apache/doris/catalog/Env.java    |   19 +
 .../main/java/org/apache/doris/catalog/Index.java  |    4 +
 .../java/org/apache/doris/common/FeNameFormat.java |    4 +
 .../AsciiFoldingTokenFilterValidator.java          |   48 +
 .../doris/indexpolicy/BasePolicyValidator.java     |   52 +
 .../doris/indexpolicy/DropIndexPolicyLog.java      |   51 +
 .../indexpolicy/EdgeNGramTokenizerValidator.java   |  106 +
 .../org/apache/doris/indexpolicy/IndexPolicy.java  |  120 ++
 .../apache/doris/indexpolicy/IndexPolicyMgr.java   |  430 +++++
 .../doris/indexpolicy/IndexPolicyTypeEnum.java     |   31 +-
 .../indexpolicy/KeywordTokenizerValidator.java     |   36 +-
 .../indexpolicy/LowerCaseTokenFilterValidator.java |   36 +-
 .../doris/indexpolicy/NGramTokenizerValidator.java |  106 +
 .../doris/indexpolicy/PolicyPropertyValidator.java |   23 +-
 .../indexpolicy/StandardTokenizerValidator.java    |   53 +
 .../WordDelimiterTokenFilterValidator.java         |  120 ++
 .../org/apache/doris/journal/JournalEntity.java    |   12 +
 .../org/apache/doris/master/ReportHandler.java     |   99 +-
 .../doris/nereids/parser/LogicalPlanBuilder.java   |   84 +
 .../expressions/functions/scalar/Tokenize.java     |    4 +-
 .../apache/doris/nereids/trees/plans/PlanType.java |   11 +-
 .../plans/commands/CreateIndexAnalyzerCommand.java |   75 +
 .../commands/CreateIndexTokenFilterCommand.java    |   75 +
 .../commands/CreateIndexTokenizerCommand.java      |   75 +
 .../plans/commands/DropIndexAnalyzerCommand.java   |   62 +
 .../commands/DropIndexTokenFilterCommand.java      |   62 +
 .../plans/commands/DropIndexTokenizerCommand.java  |   62 +
 .../plans/commands/ShowIndexAnalyzerCommand.java   |   60 +
 .../commands/ShowIndexTokenFilterCommand.java      |   60 +
 .../plans/commands/ShowIndexTokenizerCommand.java  |   60 +
 .../trees/plans/visitor/CommandVisitor.java        |   54 +
 .../java/org/apache/doris/persist/EditLog.java     |   20 +
 .../org/apache/doris/persist/OperationType.java    |    4 +
 .../doris/persist/meta/MetaPersistMethod.java      |    6 +
 .../doris/persist/meta/PersistMetaModules.java     |    2 +-
 .../main/java/org/apache/doris/qe/DdlExecutor.java |    6 +
 .../java/org/apache/doris/qe/ShowExecutor.java     |    9 +
 .../java/org/apache/doris/task/AgentBatchTask.java |   10 +
 .../org/apache/doris/task/PushIndexPolicyTask.java |   63 +
 fe/fe-core/src/main/jflex/sql_scanner.flex         |    3 +
 .../doris/indexpolicy/PolicyValidatorTests.java    |  203 ++
 gensrc/script/doris_builtins_functions.py          |    2 +-
 gensrc/thrift/AgentService.thrift                  |   19 +
 gensrc/thrift/Exprs.thrift                         |    1 +
 gensrc/thrift/MasterService.thrift                 |    1 +
 gensrc/thrift/Types.thrift                         |    1 +
 .../analyzer/test_custom_analyzer.out              |  Bin 0 -> 1217 bytes
 .../analyzer/test_custom_analyzer1.out             |  Bin 0 -> 528 bytes
 .../data/inverted_index_p0/test_tokenize.out       |  Bin 3771 -> 11439 bytes
 .../nereids_function_p0/scalar_function/Array.out  |  Bin 773605 -> 774955 
bytes
 .../nereids_function_p0/scalar_function/Array1.out |  Bin 33282709 -> 33283025 
bytes
 .../suites/function_p0/test_array_map.groovy       |    4 +-
 .../analyzer/test_custom_analyzer.groovy           |  141 ++
 .../analyzer/test_custom_analyzer1.groovy          |   72 +
 162 files changed, 13272 insertions(+), 832 deletions(-)
 copy be/src/olap/rowset/segment_v2/inverted_index/{query/disjunction_query.h 
=> abstract_analysis_factory.h} (53%)
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/analysis_factory_mgr.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/analysis_factory_mgr.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer_config.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer_config.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/query/query_info.h
 create mode 100644 be/src/olap/rowset/segment_v2/inverted_index/setting.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/token_filter/ascii_folding_filter.cpp
 copy be/src/olap/rowset/segment_v2/inverted_index/{query/disjunction_query.h 
=> token_filter/ascii_folding_filter.h} (51%)
 copy be/src/olap/rowset/segment_v2/inverted_index/{query/disjunction_query.h 
=> token_filter/ascii_folding_filter_factory.h} (54%)
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/token_filter/lower_case_filter.h
 copy be/src/olap/rowset/segment_v2/inverted_index/{query_v2/term_query.cpp => 
token_filter/lower_case_filter_factory.h} (53%)
 copy be/src/olap/rowset/segment_v2/inverted_index/{query/disjunction_query.h 
=> token_filter/token_filter.h} (53%)
 copy be/src/olap/rowset/segment_v2/inverted_index/{query/disjunction_query.h 
=> token_filter/token_filter_factory.h} (53%)
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_factory.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_iterator.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_iterator.h
 create mode 100644 be/src/olap/rowset/segment_v2/inverted_index/token_stream.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/keyword/keyword_tokenizer.h
 copy be/src/olap/rowset/segment_v2/inverted_index/{query_v2/term_query.h => 
tokenizer/keyword/keyword_tokenizer_factory.h} (51%)
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/char_matcher.h
 copy be/src/olap/rowset/segment_v2/inverted_index/{query/disjunction_query.h 
=> tokenizer/ngram/edge_ngram_tokenizer.h} (53%)
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/edge_ngram_tokenizer_factory.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/ngram_tokenizer.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/ngram_tokenizer.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/ngram_tokenizer_factory.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/ngram/ngram_tokenizer_factory.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/standard/standard_tokenizer.h
 copy be/src/olap/rowset/segment_v2/inverted_index/{query_v2/term_query.h => 
tokenizer/standard/standard_tokenizer_factory.h} (50%)
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/standard/standard_tokenizer_impl.cpp
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/standard/standard_tokenizer_impl.h
 create mode 100644 
be/src/olap/rowset/segment_v2/inverted_index/tokenizer/tokenizer.h
 copy be/src/olap/rowset/segment_v2/inverted_index/{query/disjunction_query.h 
=> tokenizer/tokenizer_factory.h} (53%)
 create mode 100644 be/src/runtime/index_policy/index_policy_mgr.cpp
 create mode 100644 be/src/runtime/index_policy/index_policy_mgr.h
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/ananlyzer/custom_analyzer_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/setting_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/token_filter/ascii_folding_filter_factory_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/token_filter/lower_case_filter_factory_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_factory_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_filter_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/token_filter/word_delimiter_iterator_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/tokenizer/edge_ngram_tokenizer_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/tokenizer/keyword_analyzer_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/tokenizer/ngram_tokenizer_test.cpp
 create mode 100644 
be/test/olap/rowset/segment_v2/inverted_index/tokenizer/standard_tokenizer_factory_test.cpp
 create mode 100644 be/test/runtime/index_policy/index_policy_mgr_test.cpp
 rename be/test/vec/function/{function_multi_match.cpp => 
function_multi_match_test.cpp} (100%)
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/analysis/CreateIndexPolicyStmt.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/analysis/DropIndexPolicyStmt.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/analysis/ShowIndexPolicyStmt.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/AsciiFoldingTokenFilterValidator.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/BasePolicyValidator.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/DropIndexPolicyLog.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/EdgeNGramTokenizerValidator.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicy.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicyMgr.java
 copy be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query.cpp => 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/IndexPolicyTypeEnum.java 
(53%)
 copy be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query.cpp => 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/KeywordTokenizerValidator.java
 (54%)
 copy be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query.cpp => 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/LowerCaseTokenFilterValidator.java
 (54%)
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/NGramTokenizerValidator.java
 copy be/src/olap/rowset/segment_v2/inverted_index/query_v2/term_query.cpp => 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/PolicyPropertyValidator.java
 (53%)
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/StandardTokenizerValidator.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/indexpolicy/WordDelimiterTokenFilterValidator.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateIndexAnalyzerCommand.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateIndexTokenFilterCommand.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateIndexTokenizerCommand.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropIndexAnalyzerCommand.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropIndexTokenFilterCommand.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DropIndexTokenizerCommand.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowIndexAnalyzerCommand.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowIndexTokenFilterCommand.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowIndexTokenizerCommand.java
 create mode 100644 
fe/fe-core/src/main/java/org/apache/doris/task/PushIndexPolicyTask.java
 create mode 100644 
fe/fe-core/src/test/java/org/apache/doris/indexpolicy/PolicyValidatorTests.java
 create mode 100644 
regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer.out
 create mode 100644 
regression-test/data/inverted_index_p0/analyzer/test_custom_analyzer1.out
 create mode 100644 
regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer.groovy
 create mode 100644 
regression-test/suites/inverted_index_p0/analyzer/test_custom_analyzer1.groovy


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to