[
https://issues.apache.org/jira/browse/NIFI-2417?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15509910#comment-15509910
]
ASF GitHub Bot commented on NIFI-2417:
--------------------------------------
Github user mattyb149 commented on a diff in the pull request:
https://github.com/apache/nifi/pull/733#discussion_r79826318
--- Diff:
nifi-nar-bundles/nifi-elasticsearch-bundle/nifi-elasticsearch-processors/src/main/java/org/apache/nifi/processors/elasticsearch/QueryElasticsearchHttp.java
---
@@ -0,0 +1,410 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.elasticsearch;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.nifi.annotation.behavior.EventDriven;
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.SupportsBatching;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.util.StandardValidators;
+import org.apache.nifi.stream.io.ByteArrayInputStream;
+import org.codehaus.jackson.JsonNode;
+
+import okhttp3.HttpUrl;
+import okhttp3.OkHttpClient;
+import okhttp3.Response;
+import okhttp3.ResponseBody;
+
+@InputRequirement(InputRequirement.Requirement.INPUT_ALLOWED)
+@EventDriven
+@SupportsBatching
+@Tags({ "elasticsearch", "query", "read", "get", "http" })
+@CapabilityDescription("Queries Elasticsearch using the specified
connection properties. "
+ + "Note that the full body of each page of documents will be read
into memory before being "
+ + "written to Flow Files for transfer. Also note that the
Elasticsearch max_result_window index "
+ + "setting is the upper bound on the number of records that can be
retrieved using this query. "
+ + "To retrieve more records, use the ScrollElasticsearchHttp
processor.")
+@WritesAttributes({
+ @WritesAttribute(attribute = "filename", description = "The
filename attribute is set to the document identifier"),
+ @WritesAttribute(attribute = "es.index", description = "The
Elasticsearch index containing the document"),
+ @WritesAttribute(attribute = "es.type", description = "The
Elasticsearch document type"),
+ @WritesAttribute(attribute = "es.result.*", description = "If
Target is 'Flow file attributes', the JSON attributes of "
+ + "each result will be placed into corresponding
attributes with this prefix.") })
+public class QueryElasticsearchHttp extends
AbstractElasticsearchHttpProcessor {
+
+ private static final String FIELD_INCLUDE_QUERY_PARAM =
"_source_include";
+ private static final String QUERY_QUERY_PARAM = "q";
+ private static final String SORT_QUERY_PARAM = "sort";
+ private static final String FROM_QUERY_PARAM = "from";
+ private static final String SIZE_QUERY_PARAM = "size";
+
+ public static final String TARGET_FLOW_FILE_CONTENT = "Flow file
content";
+ public static final String TARGET_FLOW_FILE_ATTRIBUTES = "Flow file
attributes";
+ private static final String ATTRIBUTE_PREFIX = "es.result.";
+
+ public static final Relationship REL_SUCCESS = new
Relationship.Builder()
+ .name("success")
+ .description(
+ "All FlowFiles that are read from Elasticsearch are
routed to this relationship.")
+ .build();
+
+ public static final Relationship REL_FAILURE = new
Relationship.Builder()
+ .name("failure")
+ .description(
+ "All FlowFiles that cannot be read from Elasticsearch
are routed to this relationship. Note that only incoming "
+ + "flow files will be routed to
failure.").build();
+
+ public static final Relationship REL_RETRY = new Relationship.Builder()
+ .name("retry")
+ .description(
+ "A FlowFile is routed to this relationship if the
document cannot be fetched but attempting the operation again may "
+ + "succeed. Note that if the processor has no
incoming connections, flow files may still be sent to this relationship "
+ + "based on the processor properties and the
results of the fetch operation.")
+ .build();
+
+ public static final PropertyDescriptor QUERY = new
PropertyDescriptor.Builder()
+ .name("query-es-query").displayName("Query")
+ .description("The Lucene-style query to run against
ElasticSearch").required(true)
+
.expressionLanguageSupported(true).addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ public static final PropertyDescriptor INDEX = new
PropertyDescriptor.Builder()
+ .name("query-es-index").displayName("Index")
+ .description("The name of the index to read
from").required(true)
+
.expressionLanguageSupported(true).addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
+ .build();
+
+ public static final PropertyDescriptor TYPE = new
PropertyDescriptor.Builder()
+ .name("query-es-type")
+ .displayName("Type")
+ .description(
+ "The (optional) type of this document, used by
Elasticsearch for indexing and searching. If the property is empty or set "
+ + "to _all, the first document matching the
identifier across all types will be retrieved.")
--- End diff --
I couldn't get this to work with a setting of _all, but that might be my
Elasticsearch config. Any pointers?
> Implement Query and Scroll processors for ElasticSearch
> -------------------------------------------------------
>
> Key: NIFI-2417
> URL: https://issues.apache.org/jira/browse/NIFI-2417
> Project: Apache NiFi
> Issue Type: New Feature
> Components: Extensions
> Affects Versions: 1.0.0
> Reporter: Joseph Gresock
> Assignee: Joseph Gresock
> Priority: Minor
> Fix For: 1.1.0
>
>
> FetchElasticsearchHttp allows users to select a single document from
> Elasticsearch in NiFi, but there is no way to run a query to retrieve
> multiple documents.
> We should add a QueryElasticsearchHttp processor for running a query and
> returning a flow file per result, for small result sets. This should allow
> both input and non-input execution.
> A separate ScrollElasticsearchHttp processor would also be useful for
> scrolling through a huge result set. This should use the state manager to
> maintain the scroll_id value, and use this as input to the next scroll page.
> As a result, this processor should not allow flow file input, but should
> retrieve one page per run.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)