hailin0 commented on code in PR #2821:
URL:
https://github.com/apache/incubator-seatunnel/pull/2821#discussion_r982454257
##########
seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/sink/ElasticsearchSink.java:
##########
@@ -45,8 +45,7 @@ public String getPluginName() {
}
@Override
- public void prepare(org.apache.seatunnel.shade.com.typesafe.config.Config
pluginConfig) throws
- PrepareFailException {
+ public void prepare(org.apache.seatunnel.shade.com.typesafe.config.Config
pluginConfig) throws PrepareFailException {
Review Comment:
rename `org.apache.seatunnel.shade.com.typesafe.config.Config` to `Config`
##########
docs/en/connector-v2/source/Elasticsearch.md:
##########
@@ -0,0 +1,64 @@
+# Elasticsearch
+
+> Elasticsearch source connector
+
+## Description
+
+Used to read data from Elasticsearch.
+
+support version >= 2.x and < 8.x.
+
+## Key features
+
+- [x] [batch](../../concept/connector-v2-features.md)
+- [x] [stream](../../concept/connector-v2-features.md)
+- [x] [exactly-once](../../concept/connector-v2-features.md)
+- [ ] [schema projection](../../concept/connector-v2-features.md)
+- [ ] [parallelism](../../concept/connector-v2-features.md)
+- [ ] [support user-defined split](../../concept/connector-v2-features.md)
+
+## Options
+
+| name | type | required | default value |
+|-------------|--------| -------- |---------------|
+| hosts | array | yes | - |
+| username | string | no | - |
+| password | string | no | - |
+| index | string | yes | - |
+| source | array | yes | - |
+| scroll_time | string | no | 1m |
+| scroll_size | int | no | 100 |
+
+
+
+### hosts [array]
+Elasticsearch cluster http address, the format is `host:port`, allowing
multiple hosts to be specified. Such as `["host1:9200", "host2:9200"]`.
+
+### username [string]
+x-pack username.
+
+### password [string]
+x-pack password.
+
+### index [string]
+Elasticsearch index name, support * fuzzy matching.
+
+### source [array]
+The fields of index.
+You can get the document id by specifying the field `_id`.If sink _id to other
index,you need specify an alias for _id due to the Elasticsearch limit.
+
+### scroll_time [String]
+Amount of time Elasticsearch will keep the search context alive for scroll
requests.
+
+### scroll_size [int]
+Maximum number of hits to be returned with each Elasticsearch scroll request.
+
+## Examples
+simple
Review Comment:
Add complex example
##########
seatunnel-connectors-v2/connector-elasticsearch/src/main/java/org/apache/seatunnel/connectors/seatunnel/elasticsearch/source/ElasticsearchSourceReader.java:
##########
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.connectors.seatunnel.elasticsearch.source;
+
+import org.apache.seatunnel.api.source.Collector;
+import org.apache.seatunnel.api.source.SourceReader;
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import
org.apache.seatunnel.connectors.seatunnel.elasticsearch.client.EsRestClient;
+import
org.apache.seatunnel.connectors.seatunnel.elasticsearch.dto.source.ScrollResult;
+import
org.apache.seatunnel.connectors.seatunnel.elasticsearch.dto.source.SourceIndexInfo;
+
+import org.apache.seatunnel.shade.com.typesafe.config.Config;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Deque;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+public class ElasticsearchSourceReader implements SourceReader<SeaTunnelRow,
ElasticsearchSourceSplit> {
+
+ protected static final Logger LOG =
LoggerFactory.getLogger(ElasticsearchSourceReader.class);
+
+ SourceReader.Context context;
+
+ private Config pluginConfig;
+
+ private EsRestClient esRestClient;
+
+ Deque<ElasticsearchSourceSplit> splits = new LinkedList<>();
+ boolean noMoreSplit;
+
+ private final long pollNextWaitTime = 1000L;
+
+ public ElasticsearchSourceReader(SourceReader.Context context, Config
pluginConfig) {
+ this.context = context;
+ this.pluginConfig = pluginConfig;
+ }
+
+ @Override
+ public void open() {
+ esRestClient = EsRestClient.createInstance(this.pluginConfig);
+ }
+
+ @Override
+ public void close() throws IOException {
+ esRestClient.close();
+ }
+
+ @Override
+ public void pollNext(Collector<SeaTunnelRow> output) throws Exception {
+ ElasticsearchSourceSplit split = splits.poll();
+ if (null != split) {
+ SourceIndexInfo sourceIndexInfo = split.getSourceIndexInfo();
+
+ ScrollResult scrollResult =
esRestClient.searchByScroll(sourceIndexInfo.getIndex(),
sourceIndexInfo.getSource(), sourceIndexInfo.getScrollTime(),
sourceIndexInfo.getScrollSize());
+ outputFromScrollResult(scrollResult, sourceIndexInfo.getSource(),
output);
+ while (scrollResult.getDocs() != null &&
scrollResult.getDocs().size() > 0) {
+ scrollResult =
esRestClient.searchWithScrollId(scrollResult.getScrollId(),
sourceIndexInfo.getScrollTime());
+ outputFromScrollResult(scrollResult,
sourceIndexInfo.getSource(), output);
+ }
+ } else if (noMoreSplit) {
+ // signal to the source that we have reached the end of the data.
+ LOG.info("Closed the bounded ELasticsearch source");
+ context.signalNoMoreElement();
+ } else {
+ Thread.sleep(pollNextWaitTime);
+ }
+ }
+
+ private void outputFromScrollResult(ScrollResult scrollResult,
List<String> source, Collector<SeaTunnelRow> output) {
+ int sourceSize = source.size();
+ for (Map<String, Object> doc : scrollResult.getDocs()) {
+ SeaTunnelRow seaTunnelRow = new SeaTunnelRow(sourceSize);
+ for (int i = 0; i < sourceSize; i++) {
+ Object value = doc.get(source.get(i));
+ seaTunnelRow.setField(i, String.valueOf(value));
Review Comment:
`String.valueOf(value)` Convert all field to string ?
you can mapping elasticsearch datatype to seatunnel datatype ?
reference
https://github.com/apache/incubator-seatunnel/tree/dev/seatunnel-connectors-v2/connector-iotdb/src/main/java/org/apache/seatunnel/connectors/seatunnel/iotdb/serialize
##########
seatunnel-e2e/seatunnel-flink-connector-v2-e2e/connector-elasticsearch-flink-e2e/src/test/java/org/apache/seatunnel/e2e/flink/v2/elasticsearch/ElasticsearchSourceToConsoleIT.java:
##########
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.e2e.flink.v2.elasticsearch;
+
+import
org.apache.seatunnel.connectors.seatunnel.elasticsearch.client.EsRestClient;
+import org.apache.seatunnel.e2e.flink.FlinkContainer;
+
+import com.google.common.collect.Lists;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.containers.Container;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.elasticsearch.ElasticsearchContainer;
+import org.testcontainers.utility.DockerImageName;
+
+import java.io.IOException;
+
+public class ElasticsearchSourceToConsoleIT extends FlinkContainer {
+
+ private static final Logger LOGGER =
LoggerFactory.getLogger(ElasticsearchSourceToConsoleIT.class);
+
+ private ElasticsearchContainer container;
+
+ @SuppressWarnings({"checkstyle:MagicNumber", "checkstyle:Indentation"})
+ @BeforeEach
+ public void startElasticsearchContainer() throws InterruptedException {
+ container = new
ElasticsearchContainer(DockerImageName.parse("elasticsearch:6.8.23").asCompatibleSubstituteFor("docker.elastic.co/elasticsearch/elasticsearch"))
+ .withNetwork(NETWORK)
+ .withNetworkAliases("elasticsearch")
+ .withLogConsumer(new Slf4jLogConsumer(LOGGER));
+ container.start();
+ LOGGER.info("Elasticsearch container started");
+ Thread.sleep(5000L);
+ createIndexDocs();
+ }
+
+ /**
+ * create a index,and bulk some documents
+ */
+ private void createIndexDocs() {
+ EsRestClient esRestClient =
EsRestClient.createInstance(Lists.newArrayList(container.getHttpHostAddress()),
"", "");
+ String requestBody =
"{\"index\":{\"_index\":\"st_index\",\"_type\":\"st\"}}\n" +
+ "{\"name\":\"EbvYoFkXtS\",\"age\":18}\n" +
+ "{\"index\":{\"_index\":\"st_index\",\"_type\":\"st\"}}\n" +
+ "{\"name\":\"LjFMprGLJZ\",\"age\":19}\n" +
+ "{\"index\":{\"_index\":\"st_index\",\"_type\":\"st\"}}\n" +
+ "{\"name\":\"uJTtAVuSyI\",\"age\":20}\n";
+ esRestClient.bulk(requestBody);
+ try {
+ esRestClient.close();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Test
+ public void testElasticsearchSourceToConsoleSink() throws IOException,
InterruptedException {
+ Container.ExecResult execResult =
executeSeaTunnelFlinkJob("/elasticsearch/elasticsearch_to_console.conf");
+ Assertions.assertEquals(0, execResult.getExitCode());
Review Comment:
validate data rows & values & types when e2e execute ends
reference
https://github.com/apache/incubator-seatunnel/blob/dev/seatunnel-e2e/seatunnel-flink-connector-v2-e2e/connector-mongodb-flink-e2e/src/test/java/org/apache/seatunnel/e2e/flink/v2/mongodb/MongodbIT.java#L106
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]