[ https://issues.apache.org/jira/browse/DRILL-7437?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17059791#comment-17059791 ]
ASF GitHub Bot commented on DRILL-7437: --------------------------------------- cgivre commented on pull request #1892: DRILL-7437: Storage Plugin for Generic HTTP REST API URL: https://github.com/apache/drill/pull/1892#discussion_r392705743 ########## File path: contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/util/SimpleHttp.java ########## @@ -0,0 +1,240 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.http.util; + +import okhttp3.Cache; +import okhttp3.Credentials; +import okhttp3.FormBody; +import okhttp3.Interceptor; +import okhttp3.OkHttpClient; +import okhttp3.OkHttpClient.Builder; +import okhttp3.Request; +import okhttp3.Response; + +import org.apache.drill.common.exceptions.UserException; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.store.http.HttpAPIConfig; +import org.apache.drill.exec.store.http.HttpStoragePluginConfig; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; + + +/** + * This class performs the actual HTTP requests for the HTTP Storage Plugin. The core method is the getInputStream() + * method which accepts a url and opens an InputStream with that URL's contents. + */ +public class SimpleHttp { + private static final Logger logger = LoggerFactory.getLogger(SimpleHttp.class); + + private final OkHttpClient client; + + private final HttpStoragePluginConfig config; + + private final FragmentContext context; + + private final HttpAPIConfig apiConfig; + + public SimpleHttp(HttpStoragePluginConfig config, FragmentContext context, String connectionName) { + this.config = config; + this.context = context; + this.apiConfig = config.connections().get(connectionName); + client = setupHttpClient(); + } + + + + public InputStream getInputStream(String urlStr) { + Request.Builder requestBuilder; + + // The configuration does not allow for any other request types other than POST and GET. + if (apiConfig.method().equals("get")) { + // Handle GET requests + requestBuilder = new Request.Builder().url(urlStr); + } else { + // Handle POST requests + FormBody.Builder formBodyBuilder = buildPostBody(); + requestBuilder = new Request.Builder() + .url(urlStr) + .post(formBodyBuilder.build()); + } + + // Add headers to request + if (apiConfig.headers() != null) { + for (Map.Entry<String, String> entry : apiConfig.headers().entrySet()) { + String key = entry.getKey(); + String value = entry.getValue(); + requestBuilder.addHeader(key, value); + } + } + + // Build the request object + Request request = requestBuilder.build(); + logger.debug("Headers: {}", request.headers()); + + try { + // Execute the request + Response response = client + .newCall(request) + .execute(); + + // If the request is unsuccessful, throw a UserException + if (!response.isSuccessful()) { + throw UserException + .dataReadError() + .message("Error retrieving data from HTTP Storage Plugin: " + response.code() + " " + response.message()) + .addContext("URL: ", urlStr) + .addContext("Response code: ", response.code()) + .build(logger); + } + logger.debug("HTTP Request for {} successful.", urlStr); + logger.debug("Response Headers: {} ", response.headers().toString()); + + // Return the InputStream of the response + return Objects.requireNonNull(response.body()).byteStream(); + } catch (IOException e) { + throw UserException + .dataReadError(e) + .message("Error retrieving data from HTTP Storage Plugin: %s", e.getMessage()) + .addContext("URL Requested:" + urlStr) + .build(logger); + } + } + + /** + * Function configures the OkHTTP3 server object with configuration info from the user. + * + * @return OkHttpClient configured server + */ + private OkHttpClient setupHttpClient() { + Builder builder = new OkHttpClient.Builder(); + + // Set up the HTTP Cache. Future possibilities include making the cache size and retention configurable but + // right now it is on or off. The writer will write to the Drill temp directory if it is accessible and + // output a warning if not. + if (config.cacheResults()) { + setupCache(builder); + } + + // If the API uses basic authentication add the authentication code. + if (apiConfig.authType().toLowerCase().equals("basic")) { + logger.debug("Adding Interceptor"); + builder.addInterceptor(new BasicAuthInterceptor(apiConfig.userName(), apiConfig.password())); + } + + // Set timeout + builder.connectTimeout(config.timeout(), TimeUnit.SECONDS); + builder.writeTimeout(config.timeout(), TimeUnit.SECONDS); + builder.readTimeout(config.timeout(), TimeUnit.SECONDS); + + return builder.build(); + } + + /** + * This function accepts a Builder object as input and configures response caching. In order for + * caching to work, the DRILL_TMP_DIR variable must be set either as a system environment variable or in the + * Drill configurations. + * <p> + * The function will attempt to get the DRILL_TMP_DIR from these places, and if it cannot, it will issue a warning in the logger. + * + * @param builder Builder the Builder object to which the cacheing is to be configured + */ + private void setupCache(Builder builder) { + int cacheSize = 10 * 1024 * 1024; // TODO Add cache size in MB to config + String drillTempDir; + + try { + if (context.getOptions().getOption(ExecConstants.DRILL_TMP_DIR) != null) { + drillTempDir = context.getOptions().getOption(ExecConstants.DRILL_TMP_DIR).string_val; + } else { + drillTempDir = System.getenv("DRILL_TMP_DIR"); + } + File cacheDirectory = new File(drillTempDir); + if (cacheDirectory == null) { + logger.warn("HTTP Storage plugin caching requires the DRILL_TMP_DIR to be configured. Please either set DRILL_TMP_DIR or disable HTTP caching."); + } else { + Cache cache = new Cache(cacheDirectory, cacheSize); + logger.debug("Caching HTTP Query Results at: {}", drillTempDir); + + builder.cache(cache); + } + } catch (Exception e) { + logger.warn("HTTP Storage plugin caching requires the DRILL_TMP_DIR to be configured. Please either set DRILL_TMP_DIR or disable HTTP caching."); + } + } + + /** + * This function accepts text from a post body in the format: + * key1=value1 + * key2=value2 + * + * and creates the appropriate headers. + * + * @return FormBodu.Builder The populated formbody builder Review comment: Fixed ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > Storage Plugin for Generic HTTP REST API > ---------------------------------------- > > Key: DRILL-7437 > URL: https://issues.apache.org/jira/browse/DRILL-7437 > Project: Apache Drill > Issue Type: New Feature > Reporter: Charles Givre > Assignee: Charles Givre > Priority: Minor > Fix For: Future > > > In many data analytic situations there is a need to obtain reference data > which is volatile or hosted on a service with a REST API. > For instance, consider the case of a financial dataset which you want to run > a currency conversion. Or in the security arena, an organization might have > a service that returns network information about an IT asset. The goal being > to enable Drill to quickly incorporate external data that is only accessible > via REST API. > This plugin is not intended to be a substitute for dedicated storage plugins > with systems that use a REST API, such as Apache Solr or ElasticSearch. > This plugin is based on several projects that were posted on github but never > completed or submitted to Drill. Posted here for attribution: > * [https://github.com/kevinlynx/drill-storage-http] > * [https://github.com/mayunSaicmotor/drill-storage-http] > -- This message was sent by Atlassian Jira (v8.3.4#803005)