This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit eb85c6eeca77c748a01d304625ea5d608a3e12c0
Author: Tim Armstrong <tarmstr...@cloudera.com>
AuthorDate: Sat May 9 21:27:41 2020 -0700

    IMPALA-9793: Impala quickstart cluster with docker-compose
    
    What works:
    * A single node cluster can be started up with docker-compose
    * HMS data is stored in Derby database in a docker volume
    * Filesystem data is stored in a shared docker volume, using the
      localfs support in the Hadoop client.
    * A Kudu cluster with a single master can be optionally added on
      to the Impala cluster.
    * TPC-DS data can be loaded automatically by a data loading container.
    
    We need to set up a docker network called quickstart-network,
    purely because docker-compose insists on generating network names
    with underscores, which are part of the FQDN and end up causing
    problems with Java's URL parsing, which rejects these technically
    invalid domain names.
    
    How to run:
    
    Instructions for running the quickstart cluster are in
    docker/README.md.
    
    How to build containers:
    
      ./buildall.sh -release -noclean -notests -ninja
      ninja quickstart_hms_image quickstart_client_image docker_images
    
    How to upload containers to dockerhub:
    
      IMPALA_QUICKSTART_IMAGE_PREFIX=timgarmstrong/
      for i in impalad_coord_exec impalad_coordinator statestored \
               impalad_executor catalogd impala_quickstart_client \
               impala_quickstart_hms
      do
        docker tag $i ${IMPALA_QUICKSTART_IMAGE_PREFIX}$i
        docker push ${IMPALA_QUICKSTART_IMAGE_PREFIX}$i
      done
    
    I pushed containers build from commit f260cce22, which
    was branched from 6cb7cecacf on master.
    
    Misc other stuff:
    * Added more metadata to all images.
    
    TODO:
    * Test and instructions to run against Kudu quickstart
    * Upload latest version of containers before merging.
    
    Change-Id: Ifc0b862af40a368381ada7ec2a355fe4b0aa778c
    Reviewed-on: http://gerrit.cloudera.org:8080/15966
    Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
---
 docker/CMakeLists.txt                            |   36 +-
 docker/README.md                                 |  144 ++-
 docker/docker-build.sh                           |   39 +
 docker/impala_base/Dockerfile                    |   10 +
 docker/quickstart-kudu-minimal.yml               |  128 +++
 docker/quickstart-load-data.yml                  |   38 +
 docker/quickstart.yml                            |  104 ++
 docker/quickstart_client/Dockerfile              |   70 ++
 docker/quickstart_client/data-load-entrypoint.sh |   86 ++
 docker/quickstart_client/load_tpcds_kudu.sql     |  877 +++++++++++++++
 docker/quickstart_client/load_tpcds_parquet.sql  | 1248 ++++++++++++++++++++++
 docker/quickstart_conf/hive-site.xml             |   74 ++
 docker/quickstart_hms/Dockerfile                 |   67 ++
 docker/quickstart_hms/hms-entrypoint.sh          |   68 ++
 14 files changed, 2985 insertions(+), 4 deletions(-)

diff --git a/docker/CMakeLists.txt b/docker/CMakeLists.txt
index 60fd8c2..7fe085b 100644
--- a/docker/CMakeLists.txt
+++ b/docker/CMakeLists.txt
@@ -19,6 +19,8 @@ set(IMPALA_BASE_BUILD_CONTEXT_DIR
   ${CMAKE_SOURCE_DIR}/docker/build_context
 )
 
+set(DOCKER_BUILD ${CMAKE_SOURCE_DIR}/docker/docker-build.sh)
+
 find_program(LSB_RELEASE_EXEC lsb_release)
 execute_process(COMMAND ${LSB_RELEASE_EXEC} -is
   OUTPUT_VARIABLE LSB_RELEASE_ID
@@ -38,6 +40,7 @@ else()
 endif()
 MESSAGE(STATUS "Picked docker base image based on host OS: 
${DISTRO_BASE_IMAGE}")
 
+
 if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
   # Add a target to build a base docker image for 'build_type'. 
'build_context_args' are
   # passed to the setup_build_context.py script.
@@ -58,7 +61,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
       # sent to the docker daemon. This allows the Dockerfile built to copy 
all necessary
       # dependencies.
       COMMAND tar cvh . -C ${CMAKE_SOURCE_DIR}/docker/impala_base/ . |
-              docker build -t impala_base_${build_type}
+              ${DOCKER_BUILD} -t impala_base_${build_type}
                   --build-arg BASE_IMAGE=${DISTRO_BASE_IMAGE} -
       WORKING_DIRECTORY ${IMPALA_BASE_BUILD_CONTEXT_DIR}/${build_type}
       DEPENDS impala_base_build_context_${build_type} 
${CMAKE_SOURCE_DIR}/docker/impala_base/Dockerfile
@@ -88,7 +91,7 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
       # build context used for the base image is used for each daemon image. 
This allows
       # each daemon image to only copy in the dependencies it requires.
       COMMAND tar cvh . -C ${CMAKE_SOURCE_DIR}/docker/${daemon_name}/ . |
-              docker build --build-arg BASE_IMAGE=impala_base_${build_type}
+              ${DOCKER_BUILD} --build-arg BASE_IMAGE=impala_base_${build_type}
                   -t ${image_name} -
       WORKING_DIRECTORY ${IMPALA_BASE_BUILD_CONTEXT_DIR}/${build_type}
       DEPENDS impala_base_image_${build_type} ${build_dir}/Dockerfile
@@ -123,5 +126,34 @@ if (NOT ${DISTRO_BASE_IMAGE} STREQUAL "UNSUPPORTED")
 
   # Generate a text file with all of the release daemon images.
   file(WRITE ${CMAKE_SOURCE_DIR}/docker/docker-images.txt 
"${daemon_image_names}")
+
+  # HMS quickstart image, which requires Hive and Hadoop builds.
+  set(quickstart_hms_build_dir ${CMAKE_SOURCE_DIR}/docker/quickstart_hms)
+  add_custom_target(quickstart_hms_build_setup
+    COMMAND rm -f ${quickstart_hms_build_dir}/hive 
${quickstart_hms_build_dir}/hadoop
+    COMMAND ${CMAKE_COMMAND} -E create_symlink $ENV{HIVE_HOME} 
${quickstart_hms_build_dir}/hive
+    COMMAND ${CMAKE_COMMAND} -E create_symlink $ENV{HADOOP_HOME} 
${quickstart_hms_build_dir}/hadoop
+  )
+  add_custom_target(quickstart_hms_image
+    # Supply the appropriate base image as an argument for the Dockerfile.
+    # Use tar with -h flag to assemble a tarball including all the symlinked 
files and
+    # directories in the build context.
+    COMMAND tar cvh . -C ${quickstart_hms_build_dir} . | ${DOCKER_BUILD} 
--build-arg BASE_IMAGE=${DISTRO_BASE_IMAGE} -t impala_quickstart_hms -
+    DEPENDS ${quickstart_hms_build_dir}/Dockerfile quickstart_hms_build_setup
+    COMMENT "Building quickstart HMS docker image."
+    VERBATIM
+  )
+
+  # Client quickstart image, which only requires some scripts.
+  set(quickstart_client_build_dir ${CMAKE_SOURCE_DIR}/docker/quickstart_client)
+  add_custom_target(quickstart_client_image
+    # Supply the appropriate base image as an argument for the Dockerfile.
+    # Use tar with -h flag to assemble a tarball including all the symlinked 
files and
+    # directories in the build context.
+    COMMAND tar cvh . -C ${quickstart_client_build_dir} . | ${DOCKER_BUILD} 
${COMMON_DOCKER_BUILD_ARGS} --build-arg BASE_IMAGE=${DISTRO_BASE_IMAGE} -t 
impala_quickstart_client -
+    DEPENDS ${quickstart_client_build_dir}/Dockerfile 
${quickstart_client_build_dir}/data-load-entrypoint.sh
+    COMMENT "Building quickstart client docker image."
+    VERBATIM
+  )
 endif()
 
diff --git a/docker/README.md b/docker/README.md
index 210b733..e56d8fa 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -5,7 +5,147 @@ containers, parallelizing the test execution across test 
suites. See that file
 for more details.
 
 This also contains infrastructure to build `impala_base`, `catalogd`,
-``statestored`, `impalad_coordinator`, `impalad_executor` and
+`statestored`, `impalad_coordinator`, `impalad_executor` and
 `impalad_coord_exec` container images from the output of an Impala build.
-The containers can be built via the CMake target docker_images. See
+The containers can be built via the CMake target `docker_images`. See
 CMakeLists.txt for the build targets.
+
+# Docker Quickstart with docker-compose
+Various docker-compose files in this directory provide a convenient way to run
+a basic Impala service with a single Impala Daemon and minimal set of 
supporting
+services. A Hive MetaStore service is used to manage metadata. All filesystem 
data
+is stored in Docker volumes. The default storage location for tables is in the
+`impala-quickstart-warehouse` volume, i.e. if you create a table in Impala, it 
will
+be stored in that volume by default.
+
+## Prerequisites:
+A docker network called `quickstart-network` must be created, and the 
`QUICKSTART_IP` and
+`QUICKSTART_LISTEN_ADDR` environment variables must be set.
+
+```bash
+docker network create -d bridge quickstart-network
+export QUICKSTART_IP=$(docker network inspect quickstart-network -f '{{(index 
.IPAM.Config 0).Gateway}}')
+export QUICKSTART_LISTEN_ADDR=$QUICKSTART_IP
+```
+
+If you want the cluster to be open to connections from other hosts, you can set
+`QUICKSTART_LISTEN_ADDR`:
+
+```bash
+export QUICKSTART_LISTEN_ADDR=0.0.0.0
+```
+
+You can optionally set `IMPALA_QUICKSTART_IMAGE_PREFIX` to pull prebuilt 
images from a DockerHub repo,
+for example:
+
+```bash
+  export IMPALA_QUICKSTART_IMAGE_PREFIX="timgarmstrong/"
+```
+
+Leave `IMPALA_QUICKSTART_IMAGE_PREFIX` unset to use images built from a local 
Impala dev environment.
+
+## Starting the cluster:
+
+To start the base quickstart cluster without Kudu:
+
+```bash
+  docker-compose -f docker/quickstart.yml up -d
+```
+
+To load data in background into Parquet and Kudu formats:
+
+```bash
+  docker-compose -f docker/quickstart.yml -f 
docker/quickstart-kudu-minimal.yml \
+                 -f docker/quickstart-load-data.yml up -d
+```
+
+To follow the data loading process, you can use the `docker logs` command, 
e.g.:
+
+```bash
+  docker logs -f docker_data-loader_1
+```
+
+## Connecting to the cluster:
+
+The impala service can be connected to `$QUICKSTART_IP`, or if you set
+`QUICKSTART_LISTEN_ADDR=0.0.0.0`, you can connect to it on `localhost` or your
+machine's host name.
+
+## Connecting with containerized impala-shell:
+
+```bash
+  docker run --network=quickstart-network -it \
+     ${IMPALA_QUICKSTART_IMAGE_PREFIX}impala_quickstart_client impala-shell
+```
+
+Or with a pre-installed impala-shell:
+
+```bash
+  impala-shell -i ${QUICKSTART_IP}
+```
+
+## Accessing the Warehouse volume
+If you want to directly interact with the contents of the warehouse in the
+`impala-quickstart-warehouse` Docker volume or copy data from the host into the
+quickstart warehouse, you can mount the volume in another container. E.g. to 
run
+an Ubuntu 18.04 container with the warehouse directory mounted at
+`/user/hive/warehouse` and your home directory mounted at `/host_dir`, you
+can run the following command:
+
+```bash
+docker run -v ~:/host_dir -v 
docker_impala-quickstart-warehouse:/user/hive/warehouse \
+    -it ubuntu:18.04 /bin/bash
+```
+
+In the container, you can find the external and managed tablespaces stored in
+the `impala-quickstart-warehouse` volume, for example:
+
+```
+root@377747c68bfa:/# ls /user/hive/warehouse/external/tpcds_raw/
+call_center       customer_demographics   inventory  store_returns  web_sales
+catalog_page      date_dim                item       store_sales    web_site
+catalog_returns   dbgen_version           promotion  time_dim
+catalog_sales     generated               reason     warehouse
+customer          household_demographics  ship_mode  web_page
+customer_address  income_band             store      web_returns
+t@377747c68bfa:/# head -n2 
/user/hive/warehouse/external/tpcds_raw/time_dim/time_dim.dat
+0|AAAAAAAABAAAAAAA|0|0|0|0|AM|third|night||
+1|AAAAAAAACAAAAAAA|1|0|0|1|AM|third|night||
+```
+
+It is then possible to copy data files from the host into an external table.
+In impala-shell, create an external table:
+```sql
+create external table quickstart_example(s string)
+stored as textfile
+location '/user/hive/warehouse/external/quickstart_example';
+```
+
+Then in the host and container shells, create a text file and copy it into the
+external table directory.
+```bash
+# On host:
+echo 'hello world' > ~/hw.txt
+
+# In container:
+cp /host_dir/hw.txt /user/hive/warehouse/external/quickstart_example
+```
+
+You can then refresh the table to pick up the data file and query the table:
+```sql
+refresh quickstart_example;
+select * from quickstart_example;
+```
+
+## Environment Variable Overrides:
+
+The following environment variables influence the behaviour of the various
+quickstart docker compose files.
+* `KUDU_QUICKSTART_VERSION` - defaults to latest, can be overridden to a
+  different tag to use different Kudu images.
+* `IMPALA_QUICKSTART_VERSION` - defaults to latest, can be overridden to a
+  different tag to use different Impala images.
+* `IMPALA_QUICKSTART_IMAGE_PREFIX` - defaults to using local images, change to
+  `"timgarmstrong/"` to use my prebuilt images.
+* `QUICKSTART_LISTEN_ADDR` - can be set to either `$QUICKSTART_IP` to listen on
+  only the docker network interface, or `0.0.0.0` to listen on all interfaces.
diff --git a/docker/docker-build.sh b/docker/docker-build.sh
new file mode 100755
index 0000000..e05eb60
--- /dev/null
+++ b/docker/docker-build.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Wrapper that invokes "docker build" with the provided arguments
+# and additional, common, build arguments.
+
+set -euo pipefail
+
+ARGS=()
+
+# Add common metadata to docker images.
+VCS_REF=$(git rev-parse HEAD || echo "")
+VERSION=$(grep 'VERSION' ${IMPALA_HOME}/bin/version.info  | cut -d ' ' -f 2 || 
echo "")
+ARGS+=("--build-arg" 'MAINTAINER=Apache Impala <d...@impala.apache.org>')
+ARGS+=("--build-arg" 'URL=https://impala.apache.org')
+ARGS+=("--build-arg" 'VCS_TYPE=git')
+ARGS+=("--build-arg" 'VCS_URL=https://gitbox.apache.org/repos/asf/impala.git')
+ARGS+=("--build-arg" "VERSION=$VERSION")
+ARGS+=("--build-arg" "VCS_REF=$VCS_REF")
+
+# Add caller-provided arguments to end.
+ARGS+=("$@")
+
+exec docker build "${ARGS[@]}"
diff --git a/docker/impala_base/Dockerfile b/docker/impala_base/Dockerfile
index 955a46f..d15af90 100644
--- a/docker/impala_base/Dockerfile
+++ b/docker/impala_base/Dockerfile
@@ -50,3 +50,13 @@ RUN cd /opt/impala/bin && ln -s impalad statestored && ln -s 
impalad catalogd &&
     mkdir /opt/impala/rangercache
 
 WORKDIR /opt/impala/
+
+LABEL name="Apache Impala Daemon Base Image" \
+      description="Common base image for Apache Impala daemons." \
+      # Common labels.
+      org.label-schema.maintainer=$MAINTAINER \
+      org.label-schema.url=$URL \
+      org.label-schema.vcs-ref=$VCS_REF \
+      org.label-schema.vcs-type=$VCS_TYPE \
+      org.label-schema.vcs-url=$VCS_URL \
+      org.label-schema.version=$VERSION
diff --git a/docker/quickstart-kudu-minimal.yml 
b/docker/quickstart-kudu-minimal.yml
new file mode 100644
index 0000000..1cb3ef0
--- /dev/null
+++ b/docker/quickstart-kudu-minimal.yml
@@ -0,0 +1,128 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Minimal Apache Kudu cluster for quickstart with a single master and three 
tservers.
+# The Kudu repository contains a docker compose file (docker/quickstart.yml) 
for a more
+# production-like Kudu cluster with multiple masters, etc. This file is 
derived from that
+# file.
+version: "3"
+services:
+  kudu-master-1:
+    image: apache/kudu:${KUDU_QUICKSTART_VERSION:-latest}
+    ports:
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:7051:7051"
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:8051:8051"
+    command: ["master"]
+    volumes:
+      - kudu-minimal-master-1:/var/lib/kudu
+    environment:
+      # TODO: Use `host.docker.internal` instead of QUICKSTART_IP when it
+      # works on Linux (https://github.com/docker/for-linux/issues/264)
+      - >
+        MASTER_ARGS=--fs_wal_dir=/var/lib/kudu/master
+        --rpc_bind_addresses=0.0.0.0:7051
+        --rpc_advertised_addresses=${QUICKSTART_IP:?Please set QUICKSTART_IP 
environment variable}:7051
+        --webserver_port=8051
+        --webserver_advertised_addresses=${QUICKSTART_IP}:8051
+        --webserver_doc_root=/opt/kudu/www
+        --stderrthreshold=0
+        --use_hybrid_clock=false
+        --memory_limit_hard_bytes=1073741824
+    networks:
+      - quickstart-network
+  kudu-tserver-1:
+    image: apache/kudu:${KUDU_QUICKSTART_VERSION:-latest}
+    depends_on:
+      - kudu-master-1
+    ports:
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:7050:7050"
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:8050:8050"
+    command: ["tserver"]
+    volumes:
+      - kudu-minimal-tserver-1:/var/lib/kudu
+    environment:
+      - KUDU_MASTERS=kudu-master-1:7051
+      - >
+        TSERVER_ARGS=--fs_wal_dir=/var/lib/kudu/tserver
+        --rpc_bind_addresses=0.0.0.0:7050
+        --rpc_advertised_addresses=${QUICKSTART_IP}:7050
+        --webserver_port=8050
+        --webserver_advertised_addresses=${QUICKSTART_IP}:8050
+        --webserver_doc_root=/opt/kudu/www
+        --stderrthreshold=0
+        --use_hybrid_clock=false
+        --memory_limit_hard_bytes=1073741824
+    networks:
+      - quickstart-network
+  kudu-tserver-2:
+    image: apache/kudu:${KUDU_QUICKSTART_VERSION:-latest}
+    depends_on:
+      - kudu-master-1
+    ports:
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:7150:7150"
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:8150:8150"
+    command: ["tserver"]
+    volumes:
+      - kudu-minimal-tserver-2:/var/lib/kudu
+    environment:
+      - KUDU_MASTERS=kudu-master-1:7051
+      - >
+        TSERVER_ARGS=--fs_wal_dir=/var/lib/kudu/tserver
+        --rpc_bind_addresses=0.0.0.0:7150
+        --rpc_advertised_addresses=${QUICKSTART_IP}:7150
+        --webserver_port=8150
+        --webserver_advertised_addresses=${QUICKSTART_IP}:8150
+        --webserver_doc_root=/opt/kudu/www
+        --stderrthreshold=0
+        --use_hybrid_clock=false
+        --memory_limit_hard_bytes=1073741824
+    networks:
+      - quickstart-network
+  kudu-tserver-3:
+    image: apache/kudu:${KUDU_QUICKSTART_VERSION:-latest}
+    depends_on:
+      - kudu-master-1
+    ports:
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:7250:7250"
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:8250:8250"
+    command: ["tserver"]
+    volumes:
+      - kudu-minimal-tserver-3:/var/lib/kudu
+    environment:
+      - KUDU_MASTERS=kudu-master-1:7051
+      - >
+        TSERVER_ARGS=--fs_wal_dir=/var/lib/kudu/tserver
+        --rpc_bind_addresses=0.0.0.0:7250
+        --rpc_advertised_addresses=${QUICKSTART_IP}:7250
+        --webserver_port=8250
+        --webserver_advertised_addresses=${QUICKSTART_IP}:8250
+        --webserver_doc_root=/opt/kudu/www
+        --stderrthreshold=0
+        --use_hybrid_clock=false
+        --memory_limit_hard_bytes=1073741824
+    networks:
+      - quickstart-network
+volumes:
+  kudu-minimal-master-1:
+  kudu-minimal-tserver-1:
+  kudu-minimal-tserver-2:
+  kudu-minimal-tserver-3:
+
+networks:
+  quickstart-network:
+    external: true
+
diff --git a/docker/quickstart-load-data.yml b/docker/quickstart-load-data.yml
new file mode 100644
index 0000000..d247510
--- /dev/null
+++ b/docker/quickstart-load-data.yml
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This launches a container that will load data into the Impala cluster that 
is defined
+# in quickstart.yml.
+version: "3"
+services:
+  data-loader:
+    image: 
${IMPALA_QUICKSTART_IMAGE_PREFIX:-}impala_quickstart_client:${IMPALA_QUICKSTART_VERSION:-latest}
+    depends_on:
+      - impalad-1
+    command: ["load_tpcds"]
+    volumes:
+      - impala-quickstart-warehouse:/user/hive/warehouse
+      - ./quickstart_conf:/opt/impala/conf:ro
+    networks:
+      - quickstart-network
+volumes:
+  impala-quickstart-warehouse:
+
+networks:
+  quickstart-network:
+    external: true
+
diff --git a/docker/quickstart.yml b/docker/quickstart.yml
new file mode 100644
index 0000000..e50056f
--- /dev/null
+++ b/docker/quickstart.yml
@@ -0,0 +1,104 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This defines a basic Impala service with a single Impala Daemon and the 
minimal set of
+# services required to support it. A Hive MetaStore service is used to manage 
metadata.
+# All filesystem data is stored in Docker volumes. The default storage 
location for tables
+# is in the impala-quickstart-warehouse volume, i.e. if you create a table in 
Impala, it
+# will be stored in that volume by default.
+#
+# See README.md in this directory for usage instructions.
+version: "3"
+services:
+  hms:
+    image: 
${IMPALA_QUICKSTART_IMAGE_PREFIX:-}impala_quickstart_hms:${IMPALA_QUICKSTART_VERSION:-latest}
+    # Give the HMS an explicit hostname to avoid issues with 
docker-compose-generated
+    # hostnames including underscore, which is rejected by Java's URL parser.
+    container_name: quickstart-hive-metastore
+    command: ["hms"]
+    volumes:
+      # Volume used to store Apache Derby database.
+      - impala-quickstart-warehouse:/var/lib/hive
+      # Warehouse directory. HMS does file operations so needs access to the
+      # shared volume.
+      - impala-quickstart-warehouse:/user/hive/warehouse
+      - ./quickstart_conf:/opt/hive/conf:ro
+    networks:
+      - quickstart-network
+  statestored:
+    image: 
${IMPALA_QUICKSTART_IMAGE_PREFIX:-}statestored:${IMPALA_QUICKSTART_VERSION:-latest}
+    ports:
+      # Web debug UI
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:25010:25010"
+    command: ["-redirect_stdout_stderr=false", "-logtostderr", "-v=1"]
+    volumes:
+      - ./quickstart_conf:/opt/impala/conf:ro
+    networks:
+      - quickstart-network
+  catalogd:
+    depends_on:
+      - statestored
+      - hms
+    image: 
${IMPALA_QUICKSTART_IMAGE_PREFIX:-}catalogd:${IMPALA_QUICKSTART_VERSION:-latest}
+    ports:
+      # Web debug UI
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:25020:25020"
+    command: ["-redirect_stdout_stderr=false", "-logtostderr", "-v=1",
+              "-hms_event_polling_interval_s=1", 
"-invalidate_tables_timeout_s=999999"]
+    volumes:
+      # Warehouse directory. Catalog does file operations so needs access to 
the
+      # shared volume.
+      - impala-quickstart-warehouse:/user/hive/warehouse
+      - ./quickstart_conf:/opt/impala/conf:ro
+    networks:
+      - quickstart-network
+  impalad-1:
+    image: 
${IMPALA_QUICKSTART_IMAGE_PREFIX:-}impalad_coord_exec:${IMPALA_QUICKSTART_VERSION:-latest}
+    depends_on:
+      - statestored
+      - catalogd
+    ports:
+      # Beeswax endpoint (deprecated)
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:21000:21000"
+      # HS2 endpoint
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:21050:21050"
+      # Web debug UI
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:25000:25000"
+      # HS2 over HTTP endpoint.
+      - "${QUICKSTART_LISTEN_ADDR:?Please set QUICKSTART_LISTEN_ADDR 
environment variable}:28000:28000"
+    command: [ "-v=1",
+              "-redirect_stdout_stderr=false", "-logtostderr",
+              "-kudu_master_hosts=kudu-master-1:7051",
+              "-mt_dop_auto_fallback=true",
+              
"-default_query_options=mt_dop=4,default_file_format=parquet,default_transactional_type=insert_only",
+              "-mem_limit=4gb"]
+    environment:
+      # Keep the Java heap small to preserve memory for query execution.
+      - JAVA_TOOL_OPTIONS="-Xmx1g"
+    volumes:
+      - impala-quickstart-warehouse:/user/hive/warehouse
+      - ./quickstart_conf:/opt/impala/conf:ro
+    networks:
+      - quickstart-network
+volumes:
+  impala-quickstart-hms:
+  impala-quickstart-warehouse:
+
+networks:
+  quickstart-network:
+    external: true
+
diff --git a/docker/quickstart_client/Dockerfile 
b/docker/quickstart_client/Dockerfile
new file mode 100644
index 0000000..96554f2
--- /dev/null
+++ b/docker/quickstart_client/Dockerfile
@@ -0,0 +1,70 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Build an image that runs a script to load data into the quickstart warehouse.
+# The data load script is os-independent, so only build for a fixed OS.
+ARG BASE_IMAGE=ubuntu:18.04
+FROM ${BASE_IMAGE}
+
+# Common label arguments.
+ARG MAINTAINER
+ARG URL
+ARG VCS_REF
+ARG VCS_TYPE
+ARG VCS_URL
+ARG VERSION
+
+# Install useful utilities. Set to non-interactive to avoid issues when 
installing tzdata.
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update && \
+  apt-get install -y \
+  sudo netcat-openbsd less curl iproute2 vim iputils-ping \
+  libsasl2-dev libsasl2-2 libsasl2-modules libsasl2-modules-gssapi-mit \
+  tzdata krb5-user python-pip && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/*
+
+# Install impala-shell from pip.
+# TODO: consider if it would be better to use the latest impala-shell from the 
build
+# environment.
+RUN pip install impala-shell
+
+# Use a non-privileged impala user to run the daemons in the container.
+# That user should own everything in the /opt/impala and /var/lib/impala 
subdirectories
+RUN groupadd -r impala -g 1000 && useradd --no-log-init -r -u 1000 -g 1000 
impala && \
+    mkdir -p /opt/impala && chown impala /opt/impala
+USER impala
+
+# Copy the client entrypoint and dataload files.
+WORKDIR /opt/impala
+COPY --chown=impala data-load-entrypoint.sh /data-load-entrypoint.sh
+COPY --chown=impala *.sql /opt/impala/sql/
+
+USER impala
+
+# Add the entrypoint.
+ENTRYPOINT ["/data-load-entrypoint.sh"]
+
+LABEL name="Apache Impala Quickstart Client" \
+      description="Client tools for Impala quickstart, including impala-shell 
and data loading utilities." \
+      # Common labels.
+      org.label-schema.maintainer=$MAINTAINER \
+      org.label-schema.url=$URL \
+      org.label-schema.vcs-ref=$VCS_REF \
+      org.label-schema.vcs-type=$VCS_TYPE \
+      org.label-schema.vcs-url=$VCS_URL \
+      org.label-schema.version=$VERSION
diff --git a/docker/quickstart_client/data-load-entrypoint.sh 
b/docker/quickstart_client/data-load-entrypoint.sh
new file mode 100755
index 0000000..f1f584d
--- /dev/null
+++ b/docker/quickstart_client/data-load-entrypoint.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+################################################################################
+#
+# This script follows the pattern described in the docker best practices here:
+# 
https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#entrypoint
+################################################################################
+
+set -euo pipefail
+set -x
+
+if [[ $# -eq 0 ]]; then
+  echo "Must provide at least one argument."
+  exit 1
+elif [[ "$1" = "load_tpcds" ]]; then
+  echo "Loading TPC-DS data"
+  
IMPALA_TOOLCHAIN_BASE=https://native-toolchain.s3.amazonaws.com/build/7-f2ddef91e9/
+  TPCDS_VERSION=2.1.0
+  
TPCDS_TARBALL=tpc-ds-${TPCDS_VERSION}-gcc-4.9.2-ec2-package-ubuntu-18-04.tar.gz
+  
TPCDS_URL=${IMPALA_TOOLCHAIN_BASE}tpc-ds/${TPCDS_VERSION}-gcc-4.9.2/${TPCDS_TARBALL}
+
+  curl ${TPCDS_URL} --output tpcds.tar.gz
+  tar xzf tpcds.tar.gz
+
+  # The base directory for Hive external tables, in a mounted volume.
+  WAREHOUSE_EXTERNAL_DIR=/user/hive/warehouse/external
+  TPCDS_RAW_DIR=${WAREHOUSE_EXTERNAL_DIR}/tpcds_raw
+
+  # Use a marker file to avoid regenerating the data if already present in
+  # the warehouse. dsdgen is a serial process and somewhat slow.
+  if ! stat ${TPCDS_RAW_DIR}/generated; then
+    SCALE_FACTOR=1
+    # Generate the data. This creates one .dat file for each table.
+    ./tpc-ds-${TPCDS_VERSION}/bin/dsdgen -force -verbose -scale ${SCALE_FACTOR}
+
+    # Move the tables into the warehouse, one per subdirectory
+    for FILE in *.dat; do
+      FILE_DIR=${TPCDS_RAW_DIR}/${FILE%.dat}
+      rm -rf "${FILE_DIR}"
+      mkdir -p "${FILE_DIR}"
+      mv "${FILE}" "${FILE_DIR}"
+    done
+    touch ${TPCDS_RAW_DIR}/generated
+  fi
+
+  IMPALA_SHELL="impala-shell --protocol=hs2 -i docker_impalad-1_1"
+
+  # Wait until Impala comes up (it started in parallel with the data loader).
+  for i in $(seq 300); do
+    if ${IMPALA_SHELL} -q 'select version()'; then
+      break
+    fi
+    echo "Waiting for impala to come up"
+    sleep 0.5
+  done
+
+  ${IMPALA_SHELL} -f /opt/impala/sql/load_tpcds_parquet.sql
+  # Load data into Kudu if the Kudu master is up.
+  if ping -c1 kudu-master-1; then
+    ${IMPALA_SHELL} -f /opt/impala/sql/load_tpcds_kudu.sql
+  fi
+elif [[ "$1" = "impala-shell" ]]; then
+  shift
+  # Execute impala-shell with any extra arguments provided.
+  exec impala-shell --protocol=hs2 --history_file=/tmp/impalahistory \
+       -i docker_impalad-1_1 "$@"
+else
+  # Execute the provided input as a command
+  exec "$@"
+fi
diff --git a/docker/quickstart_client/load_tpcds_kudu.sql 
b/docker/quickstart_client/load_tpcds_kudu.sql
new file mode 100644
index 0000000..a7b8ad6
--- /dev/null
+++ b/docker/quickstart_client/load_tpcds_kudu.sql
@@ -0,0 +1,877 @@
+---- Licensed to the Apache Software Foundation (ASF) under one
+---- or more contributor license agreements.  See the NOTICE file
+---- distributed with this work for additional information
+---- regarding copyright ownership.  The ASF licenses this file
+---- to you under the Apache License, Version 2.0 (the
+---- "License"); you may not use this file except in compliance
+---- with the License.  You may obtain a copy of the License at
+----
+----   http://www.apache.org/licenses/LICENSE-2.0
+----
+---- Unless required by applicable law or agreed to in writing,
+---- software distributed under the License is distributed on an
+---- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+---- KIND, either express or implied.  See the License for the
+---- specific language governing permissions and limitations
+---- under the License.
+
+---- Template SQL statements to create and load TPCDS tables in Kudu.
+---- TODO: Improve partitioning schemes used for tables.
+CREATE DATABASE IF NOT EXISTS tpcds_kudu;
+DROP TABLE IF EXISTS tpcds_kudu.call_center;
+DROP TABLE IF EXISTS tpcds_kudu.catalog_page;
+DROP TABLE IF EXISTS tpcds_kudu.catalog_returns;
+DROP TABLE IF EXISTS tpcds_kudu.catalog_sales;
+DROP TABLE IF EXISTS tpcds_kudu.customer;
+DROP TABLE IF EXISTS tpcds_kudu.customer_address;
+DROP TABLE IF EXISTS tpcds_kudu.customer_demographics;
+DROP TABLE IF EXISTS tpcds_kudu.date_dim;
+DROP TABLE IF EXISTS tpcds_kudu.household_demographics;
+DROP TABLE IF EXISTS tpcds_kudu.income_band;
+DROP TABLE IF EXISTS tpcds_kudu.inventory;
+DROP TABLE IF EXISTS tpcds_kudu.item;
+DROP TABLE IF EXISTS tpcds_kudu.promotion;
+DROP TABLE IF EXISTS tpcds_kudu.ship_mode;
+DROP TABLE IF EXISTS tpcds_kudu.store;
+DROP TABLE IF EXISTS tpcds_kudu.store_returns;
+DROP TABLE IF EXISTS tpcds_kudu.store_sales;
+DROP TABLE IF EXISTS tpcds_kudu.time_dim;
+DROP TABLE IF EXISTS tpcds_kudu.warehouse;
+DROP TABLE IF EXISTS tpcds_kudu.web_page;
+DROP TABLE IF EXISTS tpcds_kudu.web_returns;
+DROP TABLE IF EXISTS tpcds_kudu.web_sales;
+DROP TABLE IF EXISTS tpcds_kudu.web_site;
+
+CREATE TABLE tpcds_kudu.store_sales (
+  ss_ticket_number BIGINT,
+  ss_item_sk BIGINT,
+  ss_sold_date_sk BIGINT,
+  ss_sold_time_sk BIGINT,
+  ss_customer_sk BIGINT,
+  ss_cdemo_sk BIGINT,
+  ss_hdemo_sk BIGINT,
+  ss_addr_sk BIGINT,
+  ss_store_sk BIGINT,
+  ss_promo_sk BIGINT,
+  ss_quantity BIGINT,
+  ss_wholesale_cost DECIMAL(7,2),
+  ss_list_price DECIMAL(7,2),
+  ss_sales_price DECIMAL(7,2),
+  ss_ext_discount_amt DECIMAL(7,2),
+  ss_ext_sales_price DECIMAL(7,2),
+  ss_ext_wholesale_cost DECIMAL(7,2),
+  ss_ext_list_price DECIMAL(7,2),
+  ss_ext_tax DECIMAL(7,2),
+  ss_coupon_amt DECIMAL(7,2),
+  ss_net_paid DECIMAL(7,2),
+  ss_net_paid_inc_tax DECIMAL(7,2),
+  ss_net_profit DECIMAL(7,2),
+  PRIMARY KEY (ss_ticket_number, ss_item_sk)
+)
+PARTITION BY HASH (ss_ticket_number,ss_item_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.store_sales
+SELECT
+  ss_ticket_number,
+  ss_item_sk,
+  ss_sold_date_sk,
+  ss_sold_time_sk,
+  ss_customer_sk,
+  ss_cdemo_sk,
+  ss_hdemo_sk,
+  ss_addr_sk,
+  ss_store_sk,
+  ss_promo_sk,
+  ss_quantity,
+  ss_wholesale_cost,
+  ss_list_price,
+  ss_sales_price,
+  ss_ext_discount_amt,
+  ss_ext_sales_price,
+  ss_ext_wholesale_cost,
+  ss_ext_list_price,
+  ss_ext_tax,
+  ss_coupon_amt,
+  ss_net_paid,
+  ss_net_paid_inc_tax,ss_net_profit
+FROM tpcds_parquet.store_sales;
+
+---- WEB_SALES
+CREATE TABLE tpcds_kudu.web_sales (
+  ws_order_number BIGINT,
+  ws_item_sk BIGINT,
+  ws_sold_date_sk BIGINT,
+  ws_sold_time_sk BIGINT,
+  ws_ship_date_sk BIGINT,
+  ws_bill_customer_sk BIGINT,
+  ws_bill_cdemo_sk BIGINT,
+  ws_bill_hdemo_sk BIGINT,
+  ws_bill_addr_sk BIGINT,
+  ws_ship_customer_sk BIGINT,
+  ws_ship_cdemo_sk BIGINT,
+  ws_ship_hdemo_sk BIGINT,
+  ws_ship_addr_sk BIGINT,
+  ws_web_page_sk BIGINT,
+  ws_web_site_sk BIGINT,
+  ws_ship_mode_sk BIGINT,
+  ws_warehouse_sk BIGINT,
+  ws_promo_sk BIGINT,
+  ws_quantity BIGINT,
+  ws_wholesale_cost DECIMAL(7,2),
+  ws_list_price DECIMAL(7,2),
+  ws_sales_price DECIMAL(7,2),
+  ws_ext_discount_amt DECIMAL(7,2),
+  ws_ext_sales_price DECIMAL(7,2),
+  ws_ext_wholesale_cost DECIMAL(7,2),
+  ws_ext_list_price DECIMAL(7,2),
+  ws_ext_tax DECIMAL(7,2),
+  ws_coupon_amt DECIMAL(7,2),
+  ws_ext_ship_cost DECIMAL(7,2),
+  ws_net_paid DECIMAL(7,2),
+  ws_net_paid_inc_tax DECIMAL(7,2),
+  ws_net_paid_inc_ship DECIMAL(7,2),
+  ws_net_paid_inc_ship_tax DECIMAL(7,2),
+  ws_net_profit DECIMAL(7,2),
+  PRIMARY KEY (ws_order_number, ws_item_sk)
+)
+PARTITION BY HASH (ws_order_number,ws_item_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.web_sales
+SELECT
+  ws_order_number,
+  ws_item_sk,
+  ws_sold_date_sk,
+  ws_sold_time_sk,
+  ws_ship_date_sk,
+  ws_bill_customer_sk,
+  ws_bill_cdemo_sk,
+  ws_bill_hdemo_sk,
+  ws_bill_addr_sk,
+  ws_ship_customer_sk,
+  ws_ship_cdemo_sk,
+  ws_ship_hdemo_sk,
+  ws_ship_addr_sk,
+  ws_web_page_sk,
+  ws_web_site_sk,
+  ws_ship_mode_sk,
+  ws_warehouse_sk,
+  ws_promo_sk,
+  ws_quantity,
+  ws_wholesale_cost,
+  ws_list_price,
+  ws_sales_price,
+  ws_ext_discount_amt,
+  ws_ext_sales_price,
+  ws_ext_wholesale_cost,
+  ws_ext_list_price,
+  ws_ext_tax,
+  ws_coupon_amt,
+  ws_ext_ship_cost,
+  ws_net_paid,
+  ws_net_paid_inc_tax,
+  ws_net_paid_inc_ship,
+  ws_net_paid_inc_ship_tax,
+  ws_net_profit
+FROM tpcds_parquet.web_sales;
+
+---- CATALOG_SALES
+CREATE TABLE tpcds_kudu.catalog_sales (
+  cs_order_number BIGINT,
+  cs_item_sk BIGINT,
+  cs_sold_date_sk BIGINT,
+  cs_sold_time_sk BIGINT,
+  cs_ship_date_sk BIGINT,
+  cs_bill_customer_sk BIGINT,
+  cs_bill_cdemo_sk BIGINT,
+  cs_bill_hdemo_sk BIGINT,
+  cs_bill_addr_sk BIGINT,
+  cs_ship_customer_sk BIGINT,
+  cs_ship_cdemo_sk BIGINT,
+  cs_ship_hdemo_sk BIGINT,
+  cs_ship_addr_sk BIGINT,
+  cs_call_center_sk BIGINT,
+  cs_catalog_page_sk BIGINT,
+  cs_ship_mode_sk BIGINT,
+  cs_warehouse_sk BIGINT,
+  cs_promo_sk BIGINT,
+  cs_quantity BIGINT,
+  cs_wholesale_cost DECIMAL(7,2),
+  cs_list_price DECIMAL(7,2),
+  cs_sales_price DECIMAL(7,2),
+  cs_ext_discount_amt DECIMAL(7,2),
+  cs_ext_sales_price DECIMAL(7,2),
+  cs_ext_wholesale_cost DECIMAL(7,2),
+  cs_ext_list_price DECIMAL(7,2),
+  cs_ext_tax DECIMAL(7,2),
+  cs_coupon_amt DECIMAL(7,2),
+  cs_ext_ship_cost DECIMAL(7,2),
+  cs_net_paid DECIMAL(7,2),
+  cs_net_paid_inc_tax DECIMAL(7,2),
+  cs_net_paid_inc_ship DECIMAL(7,2),
+  cs_net_paid_inc_ship_tax DECIMAL(7,2),
+  cs_net_profit DECIMAL(7,2),
+  PRIMARY KEY (cs_order_number, cs_item_sk)
+)
+PARTITION BY HASH (cs_order_number,cs_item_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.catalog_sales
+SELECT
+  cs_order_number,
+  cs_item_sk,
+  cs_sold_date_sk,
+  cs_sold_time_sk,
+  cs_ship_date_sk,
+  cs_bill_customer_sk,
+  cs_bill_cdemo_sk,
+  cs_bill_hdemo_sk,
+  cs_bill_addr_sk,
+  cs_ship_customer_sk,
+  cs_ship_cdemo_sk,
+  cs_ship_hdemo_sk,
+  cs_ship_addr_sk,
+  cs_call_center_sk,
+  cs_catalog_page_sk,
+  cs_ship_mode_sk,
+  cs_warehouse_sk,
+  cs_promo_sk,
+  cs_quantity,
+  cs_wholesale_cost,
+  cs_list_price,
+  cs_sales_price,
+  cs_ext_discount_amt,
+  cs_ext_sales_price,
+  cs_ext_wholesale_cost,
+  cs_ext_list_price,
+  cs_ext_tax,
+  cs_coupon_amt,
+  cs_ext_ship_cost,
+  cs_net_paid,
+  cs_net_paid_inc_tax,
+  cs_net_paid_inc_ship,
+  cs_net_paid_inc_ship_tax,
+  cs_net_profit
+FROM tpcds_parquet.catalog_sales;
+
+---- STORE_RETURNS
+CREATE TABLE tpcds_kudu.store_returns (
+  sr_ticket_number BIGINT,
+  sr_item_sk BIGINT,
+  sr_returned_date_sk BIGINT,
+  sr_return_time_sk BIGINT,
+  sr_customer_sk BIGINT,
+  sr_cdemo_sk BIGINT,
+  sr_hdemo_sk BIGINT,
+  sr_addr_sk BIGINT,
+  sr_store_sk BIGINT,
+  sr_reason_sk BIGINT,
+  sr_return_quantity BIGINT,
+  sr_return_amt DECIMAL(7,2),
+  sr_return_tax DECIMAL(7,2),
+  sr_return_amt_inc_tax DECIMAL(7,2),
+  sr_fee DECIMAL(7,2),
+  sr_return_ship_cost DECIMAL(7,2),
+  sr_refunded_cash DECIMAL(7,2),
+  sr_reversed_charge DECIMAL(7,2),
+  sr_store_credit DECIMAL(7,2),
+  sr_net_loss DECIMAL(7,2),
+  PRIMARY KEY (sr_ticket_number, sr_item_sk)
+)
+PARTITION BY HASH (sr_ticket_number,sr_item_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.store_returns
+SELECT
+  sr_ticket_number,
+  sr_item_sk,
+  sr_returned_date_sk,
+  sr_return_time_sk,
+  sr_customer_sk,
+  sr_cdemo_sk,
+  sr_hdemo_sk,
+  sr_addr_sk,
+  sr_store_sk,
+  sr_reason_sk,
+  sr_return_quantity,
+  sr_return_amt,
+  sr_return_tax,
+  sr_return_amt_inc_tax,
+  sr_fee,
+  sr_return_ship_cost,
+  sr_refunded_cash,
+  sr_reversed_charge,
+  sr_store_credit,
+  sr_net_loss
+FROM tpcds_parquet.store_returns;
+
+---- WEB_RETURNS
+CREATE TABLE tpcds_kudu.web_returns (
+  wr_order_number BIGINT,
+  wr_item_sk BIGINT,
+  wr_returned_date_sk BIGINT,
+  wr_returned_time_sk BIGINT,
+  wr_refunded_customer_sk BIGINT,
+  wr_refunded_cdemo_sk BIGINT,
+  wr_refunded_hdemo_sk BIGINT,
+  wr_refunded_addr_sk BIGINT,
+  wr_returning_customer_sk BIGINT,
+  wr_returning_cdemo_sk BIGINT,
+  wr_returning_hdemo_sk BIGINT,
+  wr_returning_addr_sk BIGINT,
+  wr_web_page_sk BIGINT,
+  wr_reason_sk BIGINT,
+  wr_return_quantity BIGINT,
+  wr_return_amt DECIMAL(7,2),
+  wr_return_tax DECIMAL(7,2),
+  wr_return_amt_inc_tax DECIMAL(7,2),
+  wr_fee DECIMAL(7,2),
+  wr_return_ship_cost DECIMAL(7,2),
+  wr_refunded_cash DECIMAL(7,2),
+  wr_reversed_charge DECIMAL(7,2),
+  wr_account_credit DECIMAL(7,2),
+  wr_net_loss DECIMAL(7,2),
+  PRIMARY KEY (wr_order_number, wr_item_sk)
+)
+PARTITION BY HASH (wr_order_number,wr_item_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.web_returns
+SELECT
+  wr_order_number,
+  wr_item_sk,
+  wr_returned_date_sk,
+  wr_returned_time_sk,
+  wr_refunded_customer_sk,
+  wr_refunded_cdemo_sk,
+  wr_refunded_hdemo_sk,
+  wr_refunded_addr_sk,
+  wr_returning_customer_sk,
+  wr_returning_cdemo_sk,
+  wr_returning_hdemo_sk,
+  wr_returning_addr_sk,
+  wr_web_page_sk,
+  wr_reason_sk,
+  wr_return_quantity,
+  wr_return_amt,
+  wr_return_tax,
+  wr_return_amt_inc_tax,
+  wr_fee,
+  wr_return_ship_cost,
+  wr_refunded_cash,
+  wr_reversed_charge,
+  wr_account_credit,
+  wr_net_loss
+FROM tpcds_parquet.web_returns;
+
+---- CATALOG_RETURNS
+CREATE TABLE tpcds_kudu.catalog_returns (
+  cr_order_number BIGINT,
+  cr_item_sk BIGINT,
+  cr_returned_date_sk BIGINT,
+  cr_returned_time_sk BIGINT,
+  cr_refunded_customer_sk BIGINT,
+  cr_refunded_cdemo_sk BIGINT,
+  cr_refunded_hdemo_sk BIGINT,
+  cr_refunded_addr_sk BIGINT,
+  cr_returning_customer_sk BIGINT,
+  cr_returning_cdemo_sk BIGINT,
+  cr_returning_hdemo_sk BIGINT,
+  cr_returning_addr_sk BIGINT,
+  cr_call_center_sk BIGINT,
+  cr_catalog_page_sk BIGINT,
+  cr_ship_mode_sk BIGINT,
+  cr_warehouse_sk BIGINT,
+  cr_reason_sk BIGINT,
+  cr_return_quantity BIGINT,
+  cr_return_amount DECIMAL(7,2),
+  cr_return_tax DECIMAL(7,2),
+  cr_return_amt_inc_tax DECIMAL(7,2),
+  cr_fee DECIMAL(7,2),
+  cr_return_ship_cost DECIMAL(7,2),
+  cr_refunded_cash DECIMAL(7,2),
+  cr_reversed_charge DECIMAL(7,2),
+  cr_store_credit DECIMAL(7,2),
+  cr_net_loss DECIMAL(7,2),
+  PRIMARY KEY (cr_order_number, cr_item_sk)
+)
+PARTITION BY HASH (cr_order_number,cr_item_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.catalog_returns
+SELECT
+  cr_order_number,
+  cr_item_sk,
+  cr_returned_date_sk,
+  cr_returned_time_sk,
+  cr_refunded_customer_sk,
+  cr_refunded_cdemo_sk,
+  cr_refunded_hdemo_sk,
+  cr_refunded_addr_sk,
+  cr_returning_customer_sk,
+  cr_returning_cdemo_sk,
+  cr_returning_hdemo_sk,
+  cr_returning_addr_sk,
+  cr_call_center_sk,
+  cr_catalog_page_sk,
+  cr_ship_mode_sk,
+  cr_warehouse_sk,
+  cr_reason_sk,
+  cr_return_quantity,
+  cr_return_amount,
+  cr_return_tax,
+  cr_return_amt_inc_tax,
+  cr_fee,
+  cr_return_ship_cost,
+  cr_refunded_cash,
+  cr_reversed_charge,
+  cr_store_credit,
+  cr_net_loss
+FROM tpcds_parquet.catalog_returns;
+
+---- INVENTORY
+CREATE TABLE tpcds_kudu.inventory (
+  inv_date_sk BIGINT,
+  inv_item_sk BIGINT,
+  inv_warehouse_sk BIGINT,
+  inv_quantity_on_hand BIGINT,
+  PRIMARY KEY (inv_date_sk, inv_item_sk, inv_warehouse_sk)
+)
+PARTITION BY HASH (inv_item_sk,inv_date_sk,inv_warehouse_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.inventory SELECT * FROM tpcds_parquet.inventory;
+
+---- CUSTOMER
+CREATE TABLE tpcds_kudu.customer (
+  c_customer_sk BIGINT PRIMARY KEY,
+  c_customer_id STRING,
+  c_current_cdemo_sk BIGINT,
+  c_current_hdemo_sk BIGINT,
+  c_current_addr_sk BIGINT,
+  c_first_shipto_date_sk BIGINT,
+  c_first_sales_date_sk BIGINT,
+  c_salutation STRING,
+  c_first_name STRING,
+  c_last_name STRING,
+  c_preferred_cust_flag STRING,
+  c_birth_day INT,
+  c_birth_month INT,
+  c_birth_year INT,
+  c_birth_country STRING,
+  c_login STRING,
+  c_email_address STRING,
+  c_last_review_date STRING
+)
+PARTITION BY HASH (c_customer_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.customer SELECT * FROM tpcds_parquet.customer;
+
+---- CUSTOMER_ADDRESS
+CREATE TABLE tpcds_kudu.customer_address (
+  ca_address_sk BIGINT PRIMARY KEY,
+  ca_address_id STRING,
+  ca_street_number STRING,
+  ca_street_name STRING,
+  ca_street_type STRING,
+  ca_suite_number STRING,
+  ca_city STRING,
+  ca_county STRING,
+  ca_state STRING,
+  ca_zip STRING,
+  ca_country STRING,
+  ca_gmt_offset DECIMAL(5,2),
+  ca_location_type STRING
+)
+PARTITION BY HASH (ca_address_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.customer_address
+SELECT * FROM tpcds_parquet.customer_address;
+
+---- CUSTOMER_DEMOGRAPHICS
+CREATE TABLE tpcds_kudu.customer_demographics (
+  cd_demo_sk BIGINT PRIMARY KEY,
+  cd_gender STRING,
+  cd_marital_status STRING,
+  cd_education_status STRING,
+  cd_purchase_estimate BIGINT,
+  cd_credit_rating STRING,
+  cd_dep_count BIGINT,
+  cd_dep_employed_count BIGINT,
+  cd_dep_college_count BIGINT
+)
+PARTITION BY HASH (cd_demo_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.customer_demographics
+SELECT * FROM tpcds_parquet.customer_demographics;
+
+---- DATE_DIM
+CREATE TABLE tpcds_kudu.date_dim (
+  d_date_sk BIGINT PRIMARY KEY,
+  d_date_id STRING,
+  d_date STRING,
+  d_month_seq BIGINT,
+  d_week_seq BIGINT,
+  d_quarter_seq BIGINT,
+  d_year BIGINT,
+  d_dow BIGINT,
+  d_moy BIGINT,
+  d_dom BIGINT,
+  d_qoy BIGINT,
+  d_fy_year BIGINT,
+  d_fy_quarter_seq BIGINT,
+  d_fy_week_seq BIGINT,
+  d_day_name STRING,
+  d_quarter_name STRING,
+  d_holiday STRING,
+  d_weekend STRING,
+  d_following_holiday STRING,
+  d_first_dom BIGINT,
+  d_last_dom BIGINT,
+  d_same_day_ly BIGINT,
+  d_same_day_lq BIGINT,
+  d_current_day STRING,
+  d_current_week STRING,
+  d_current_month STRING,
+  d_current_quarter STRING,
+  d_current_year STRING
+)
+PARTITION BY HASH (d_date_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.date_dim SELECT * FROM tpcds_parquet.date_dim;
+
+---- HOUSEHOLD_DEMOGRAPHICS
+CREATE TABLE tpcds_kudu.household_demographics (
+  hd_demo_sk BIGINT PRIMARY KEY,
+  hd_income_band_sk BIGINT,
+  hd_buy_potential STRING,
+  hd_dep_count BIGINT,
+  hd_vehicle_count BIGINT
+)
+PARTITION BY HASH (hd_demo_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.household_demographics
+SELECT * FROM tpcds_parquet.household_demographics;
+
+---- ITEM
+CREATE TABLE tpcds_kudu.item (
+  i_item_sk BIGINT PRIMARY KEY,
+  i_item_id STRING,
+  i_rec_start_date STRING,
+  i_rec_end_date STRING,
+  i_item_desc STRING,
+  i_current_price DECIMAL(7,2),
+  i_wholesale_cost DECIMAL(7,2),
+  i_brand_id BIGINT,
+  i_brand STRING,
+  i_class_id BIGINT,
+  i_class STRING,
+  i_category_id BIGINT,
+  i_category STRING,
+  i_manufact_id BIGINT,
+  i_manufact STRING,
+  i_size STRING,
+  i_formulation STRING,
+  i_color STRING,
+  i_units STRING,
+  i_container STRING,
+  i_manager_id BIGINT,
+  i_product_name STRING
+)
+PARTITION BY HASH (i_item_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.item SELECT * FROM tpcds_parquet.item;
+
+---- PROMOTION
+CREATE TABLE tpcds_kudu.promotion (
+  p_promo_sk BIGINT PRIMARY KEY,
+  p_item_sk BIGINT,
+  p_start_date_sk BIGINT,
+  p_end_date_sk BIGINT,
+  p_promo_id STRING,
+  p_cost DECIMAL(15,2),
+  p_response_target BIGINT,
+  p_promo_name STRING,
+  p_channel_dmail STRING,
+  p_channel_email STRING,
+  p_channel_catalog STRING,
+  p_channel_tv STRING,
+  p_channel_radio STRING,
+  p_channel_press STRING,
+  p_channel_event STRING,
+  p_channel_demo STRING,
+  p_channel_details STRING,
+  p_purpose STRING,
+  p_discount_active STRING
+)
+PARTITION BY HASH (p_promo_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.promotion
+SELECT
+  p_promo_sk,
+  p_item_sk,
+  p_start_date_sk,
+  p_end_date_sk,
+  p_promo_id,
+  p_cost,
+  p_response_target,
+  p_promo_name,
+  p_channel_dmail,
+  p_channel_email,
+  p_channel_catalog,
+  p_channel_tv,
+  p_channel_radio,
+  p_channel_press,
+  p_channel_event,
+  p_channel_demo,
+  p_channel_details,
+  p_purpose,
+  p_discount_active
+FROM tpcds_parquet.promotion;
+
+---- STORE
+CREATE TABLE tpcds_kudu.store (
+  s_store_sk BIGINT PRIMARY KEY,
+  s_store_id STRING,
+  s_rec_start_date STRING,
+  s_rec_end_date STRING,
+  s_closed_date_sk BIGINT,
+  s_store_name STRING,
+  s_number_employees BIGINT,
+  s_floor_space BIGINT,
+  s_hours STRING,
+  s_manager STRING,
+  s_market_id BIGINT,
+  s_geography_class STRING,
+  s_market_desc STRING,
+  s_market_manager STRING,
+  s_division_id BIGINT,
+  s_division_name STRING,
+  s_company_id BIGINT,
+  s_company_name STRING,
+  s_street_number STRING,
+  s_street_name STRING,
+  s_street_type STRING,
+  s_suite_number STRING,
+  s_city STRING,
+  s_county STRING,
+  s_state STRING,
+  s_zip STRING,
+  s_country STRING,
+  s_gmt_offset DECIMAL(5,2),
+  s_tax_precentage DECIMAL(5,2)
+)
+PARTITION BY HASH (s_store_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.store SELECT * FROM tpcds_parquet.store;
+
+---- TIME_DIM
+CREATE TABLE tpcds_kudu.time_dim (
+  t_time_sk BIGINT PRIMARY KEY,
+  t_time_id STRING,
+  t_time BIGINT,
+  t_hour BIGINT,
+  t_minute BIGINT,
+  t_second BIGINT,
+  t_am_pm STRING,
+  t_shift STRING,
+  t_sub_shift STRING,
+  t_meal_time STRING
+)
+PARTITION BY HASH (t_time_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.time_dim SELECT * FROM tpcds_parquet.time_dim;
+
+---- CALL_CENTER
+CREATE TABLE tpcds_kudu.call_center (
+  cc_call_center_sk BIGINT PRIMARY KEY,
+  cc_call_center_id STRING,
+  cc_rec_start_date STRING,
+  cc_rec_end_date STRING,
+  cc_closed_date_sk BIGINT,
+  cc_open_date_sk BIGINT,
+  cc_name STRING,
+  cc_class STRING,
+  cc_employees BIGINT,
+  cc_sq_ft BIGINT,
+  cc_hours STRING,
+  cc_manager STRING,
+  cc_mkt_id BIGINT,
+  cc_mkt_class STRING,
+  cc_mkt_desc STRING,
+  cc_market_manager STRING,
+  cc_division BIGINT,
+  cc_division_name STRING,
+  cc_company BIGINT,
+  cc_company_name STRING,
+  cc_street_number STRING,
+  cc_street_name STRING,
+  cc_street_type STRING,
+  cc_suite_number STRING,
+  cc_city STRING,
+  cc_county STRING,
+  cc_state STRING,
+  cc_zip STRING,
+  cc_country STRING,
+  cc_gmt_offset DECIMAL(5,2),
+  cc_tax_percentage DECIMAL(5,2)
+)
+PARTITION BY HASH (cc_call_center_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.call_center SELECT * FROM tpcds_parquet.call_center;
+
+---- CATALOG_PAGE
+CREATE TABLE tpcds_kudu.catalog_page (
+  cp_catalog_page_sk BIGINT PRIMARY KEY,
+  cp_catalog_page_id STRING,
+  cp_start_date_sk BIGINT,
+  cp_end_date_sk BIGINT,
+  cp_department STRING,
+  cp_catalog_number BIGINT,
+  cp_catalog_page_number BIGINT,
+  cp_description STRING,
+  cp_type STRING
+)
+PARTITION BY HASH (cp_catalog_page_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.catalog_page SELECT * FROM tpcds_parquet.catalog_page;
+
+---- INCOME_BANDS
+CREATE TABLE tpcds_kudu.income_band (
+  ib_income_band_sk BIGINT PRIMARY KEY,
+  ib_lower_bound BIGINT,
+  ib_upper_bound BIGINT
+)
+PARTITION BY HASH (ib_income_band_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.income_band SELECT * FROM tpcds_parquet.income_band;
+
+---- SHIP_MODE
+CREATE TABLE tpcds_kudu.ship_mode (
+  sm_ship_mode_sk BIGINT PRIMARY KEY,
+  sm_ship_mode_id STRING,
+  sm_type STRING,
+  sm_code STRING,
+  sm_carrier STRING,
+  sm_contract STRING
+)
+PARTITION BY HASH (sm_ship_mode_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.ship_mode SELECT * FROM tpcds_parquet.ship_mode;
+
+---- WAREHOUSE
+CREATE TABLE tpcds_kudu.warehouse (
+  w_warehouse_sk BIGINT PRIMARY KEY,
+  w_warehouse_id STRING,
+  w_warehouse_name STRING,
+  w_warehouse_sq_ft BIGINT,
+  w_street_number STRING,
+  w_street_name STRING,
+  w_street_type STRING,
+  w_suite_number STRING,
+  w_city STRING,
+  w_county STRING,
+  w_state STRING,
+  w_zip STRING,
+  w_country STRING,
+  w_gmt_offset DECIMAL(5,2)
+)
+PARTITION BY HASH (w_warehouse_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.warehouse SELECT * FROM tpcds_parquet.warehouse;
+
+---- WEB_PAGE
+CREATE TABLE tpcds_kudu.web_page (
+  wp_web_page_sk BIGINT PRIMARY KEY,
+  wp_web_page_id STRING,
+  wp_rec_start_date STRING,
+  wp_rec_end_date STRING,
+  wp_creation_date_sk BIGINT,
+  wp_access_date_sk BIGINT,
+  wp_autogen_flag STRING,
+  wp_customer_sk BIGINT,
+  wp_url STRING,
+  wp_type STRING,
+  wp_char_count BIGINT,
+  wp_link_count BIGINT,
+  wp_image_count BIGINT,
+  wp_max_ad_count BIGINT
+)
+PARTITION BY HASH (wp_web_page_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.web_page SELECT * FROM tpcds_parquet.web_page;
+
+---- WEB_SITE
+CREATE TABLE tpcds_kudu.web_site (
+  web_site_sk BIGINT PRIMARY KEY,
+  web_site_id STRING,
+  web_rec_start_date STRING,
+  web_rec_end_date STRING,
+  web_name STRING,
+  web_open_date_sk BIGINT,
+  web_close_date_sk BIGINT,
+  web_class STRING,
+  web_manager STRING,
+  web_mkt_id BIGINT,
+  web_mkt_class STRING,
+  web_mkt_desc STRING,
+  web_market_manager STRING,
+  web_company_id BIGINT,
+  web_company_name STRING,
+  web_street_number STRING,
+  web_street_name STRING,
+  web_street_type STRING,
+  web_suite_number STRING,
+  web_city STRING,
+  web_county STRING,
+  web_state STRING,
+  web_zip STRING,
+  web_country STRING,
+  web_gmt_offset DECIMAL(5,2),
+  web_tax_percentage DECIMAL(5,2)
+)
+PARTITION BY HASH (web_site_sk) PARTITIONS 12
+STORED AS KUDU;
+
+INSERT INTO tpcds_kudu.web_site SELECT * FROM tpcds_parquet.web_site;
+
+-- Compute stats on all the tables for optimal performance.
+COMPUTE STATS tpcds_kudu.call_center;
+COMPUTE STATS tpcds_kudu.catalog_page;
+COMPUTE STATS tpcds_kudu.catalog_returns;
+COMPUTE STATS tpcds_kudu.catalog_sales;
+COMPUTE STATS tpcds_kudu.customer;
+COMPUTE STATS tpcds_kudu.customer_address;
+COMPUTE STATS tpcds_kudu.customer_demographics;
+COMPUTE STATS tpcds_kudu.date_dim;
+COMPUTE STATS tpcds_kudu.household_demographics;
+COMPUTE STATS tpcds_kudu.income_band;
+COMPUTE STATS tpcds_kudu.inventory;
+COMPUTE STATS tpcds_kudu.item;
+COMPUTE STATS tpcds_kudu.promotion;
+COMPUTE STATS tpcds_kudu.ship_mode;
+COMPUTE STATS tpcds_kudu.store;
+COMPUTE STATS tpcds_kudu.store_returns;
+COMPUTE STATS tpcds_kudu.store_sales;
+COMPUTE STATS tpcds_kudu.time_dim;
+COMPUTE STATS tpcds_kudu.warehouse;
+COMPUTE STATS tpcds_kudu.web_page;
+COMPUTE STATS tpcds_kudu.web_returns;
+COMPUTE STATS tpcds_kudu.web_sales;
+COMPUTE STATS tpcds_kudu.web_site;
diff --git a/docker/quickstart_client/load_tpcds_parquet.sql 
b/docker/quickstart_client/load_tpcds_parquet.sql
new file mode 100644
index 0000000..d8cd90c
--- /dev/null
+++ b/docker/quickstart_client/load_tpcds_parquet.sql
@@ -0,0 +1,1248 @@
+---- Licensed to the Apache Software Foundation (ASF) under one
+---- or more contributor license agreements.  See the NOTICE file
+---- distributed with this work for additional information
+---- regarding copyright ownership.  The ASF licenses this file
+---- to you under the Apache License, Version 2.0 (the
+---- "License"); you may not use this file except in compliance
+---- with the License.  You may obtain a copy of the License at
+----
+----   http://www.apache.org/licenses/LICENSE-2.0
+----
+---- Unless required by applicable law or agreed to in writing,
+---- software distributed under the License is distributed on an
+---- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+---- KIND, either express or implied.  See the License for the
+---- specific language governing permissions and limitations
+---- under the License.
+
+-- Create text tables on top of raw text data.
+CREATE DATABASE IF NOT EXISTS tpcds_raw;
+DROP TABLE IF EXISTS tpcds_raw.call_center;
+DROP TABLE IF EXISTS tpcds_raw.catalog_page;
+DROP TABLE IF EXISTS tpcds_raw.catalog_returns;
+DROP TABLE IF EXISTS tpcds_raw.catalog_sales;
+DROP TABLE IF EXISTS tpcds_raw.customer;
+DROP TABLE IF EXISTS tpcds_raw.customer_address;
+DROP TABLE IF EXISTS tpcds_raw.customer_demographics;
+DROP TABLE IF EXISTS tpcds_raw.date_dim;
+DROP TABLE IF EXISTS tpcds_raw.household_demographics;
+DROP TABLE IF EXISTS tpcds_raw.income_band;
+DROP TABLE IF EXISTS tpcds_raw.inventory;
+DROP TABLE IF EXISTS tpcds_raw.item;
+DROP TABLE IF EXISTS tpcds_raw.promotion;
+DROP TABLE IF EXISTS tpcds_raw.ship_mode;
+DROP TABLE IF EXISTS tpcds_raw.store;
+DROP TABLE IF EXISTS tpcds_raw.store_returns;
+DROP TABLE IF EXISTS tpcds_raw.store_sales;
+DROP TABLE IF EXISTS tpcds_raw.time_dim;
+DROP TABLE IF EXISTS tpcds_raw.warehouse;
+DROP TABLE IF EXISTS tpcds_raw.web_page;
+DROP TABLE IF EXISTS tpcds_raw.web_returns;
+DROP TABLE IF EXISTS tpcds_raw.web_sales;
+DROP TABLE IF EXISTS tpcds_raw.web_site;
+
+CREATE EXTERNAL TABLE tpcds_raw.call_center (
+  cc_call_center_sk INT,
+  cc_call_center_id STRING,
+  cc_rec_start_date STRING,
+  cc_rec_end_date STRING,
+  cc_closed_date_sk INT,
+  cc_open_date_sk INT,
+  cc_name STRING,
+  cc_class STRING,
+  cc_employees INT,
+  cc_sq_ft INT,
+  cc_hours STRING,
+  cc_manager STRING,
+  cc_mkt_id INT,
+  cc_mkt_class STRING,
+  cc_mkt_desc STRING,
+  cc_market_manager STRING,
+  cc_division INT,
+  cc_division_name STRING,
+  cc_company INT,
+  cc_company_name STRING,
+  cc_street_number STRING,
+  cc_street_name STRING,
+  cc_street_type STRING,
+  cc_suite_number STRING,
+  cc_city STRING,
+  cc_county STRING,
+  cc_state STRING,
+  cc_zip STRING,
+  cc_country STRING,
+  cc_gmt_offset DECIMAL(5,2),
+  cc_tax_percentage DECIMAL(5,2)
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/call_center'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.catalog_page (
+  cp_catalog_page_sk INT,
+  cp_catalog_page_id STRING,
+  cp_start_date_sk INT,
+  cp_end_date_sk INT,
+  cp_department STRING,
+  cp_catalog_number INT,
+  cp_catalog_page_number INT,
+  cp_description STRING,
+  cp_type STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/catalog_page'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.catalog_returns (
+  cr_returned_date_sk INT,
+  cr_returned_time_sk INT,
+  cr_item_sk BIGINT,
+  cr_refunded_customer_sk INT,
+  cr_refunded_cdemo_sk INT,
+  cr_refunded_hdemo_sk INT,
+  cr_refunded_addr_sk INT,
+  cr_returning_customer_sk INT,
+  cr_returning_cdemo_sk INT,
+  cr_returning_hdemo_sk INT,
+  cr_returning_addr_sk INT,
+  cr_call_center_sk INT,
+  cr_catalog_page_sk INT,
+  cr_ship_mode_sk INT,
+  cr_warehouse_sk INT,
+  cr_reason_sk INT,
+  cr_order_number BIGINT,
+  cr_return_quantity INT,
+  cr_return_amount DECIMAL(7,2),
+  cr_return_tax DECIMAL(7,2),
+  cr_return_amt_inc_tax DECIMAL(7,2),
+  cr_fee DECIMAL(7,2),
+  cr_return_ship_cost DECIMAL(7,2),
+  cr_refunded_cash DECIMAL(7,2),
+  cr_reversed_charge DECIMAL(7,2),
+  cr_store_credit DECIMAL(7,2),
+  cr_net_loss DECIMAL(7,2)
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/catalog_returns'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.catalog_sales (
+  cs_sold_date_sk INT,
+  cs_sold_time_sk INT,
+  cs_ship_date_sk INT,
+  cs_bill_customer_sk INT,
+  cs_bill_cdemo_sk INT,
+  cs_bill_hdemo_sk INT,
+  cs_bill_addr_sk INT,
+  cs_ship_customer_sk INT,
+  cs_ship_cdemo_sk INT,
+  cs_ship_hdemo_sk INT,
+  cs_ship_addr_sk INT,
+  cs_call_center_sk INT,
+  cs_catalog_page_sk INT,
+  cs_ship_mode_sk INT,
+  cs_warehouse_sk INT,
+  cs_item_sk BIGINT,
+  cs_promo_sk INT,
+  cs_order_number BIGINT,
+  cs_quantity INT,
+  cs_wholesale_cost DECIMAL(7,2),
+  cs_list_price DECIMAL(7,2),
+  cs_sales_price DECIMAL(7,2),
+  cs_ext_discount_amt DECIMAL(7,2),
+  cs_ext_sales_price DECIMAL(7,2),
+  cs_ext_wholesale_cost DECIMAL(7,2),
+  cs_ext_list_price DECIMAL(7,2),
+  cs_ext_tax DECIMAL(7,2),
+  cs_coupon_amt DECIMAL(7,2),
+  cs_ext_ship_cost DECIMAL(7,2),
+  cs_net_paid DECIMAL(7,2),
+  cs_net_paid_inc_tax DECIMAL(7,2),
+  cs_net_paid_inc_ship DECIMAL(7,2),
+  cs_net_paid_inc_ship_tax DECIMAL(7,2),
+  cs_net_profit DECIMAL(7,2)
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/catalog_sales'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.customer (
+  c_customer_sk INT,
+  c_customer_id STRING,
+  c_current_cdemo_sk INT,
+  c_current_hdemo_sk INT,
+  c_current_addr_sk INT,
+  c_first_shipto_date_sk INT,
+  c_first_sales_date_sk INT,
+  c_salutation STRING,
+  c_first_name STRING,
+  c_last_name STRING,
+  c_preferred_cust_flag STRING,
+  c_birth_day INT,
+  c_birth_month INT,
+  c_birth_year INT,
+  c_birth_country STRING,
+  c_login STRING,
+  c_email_address STRING,
+  c_last_review_date STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/customer'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.customer_address (
+  ca_address_sk INT,
+  ca_address_id STRING,
+  ca_street_number STRING,
+  ca_street_name STRING,
+  ca_street_type STRING,
+  ca_suite_number STRING,
+  ca_city STRING,
+  ca_county STRING,
+  ca_state STRING,
+  ca_zip STRING,
+  ca_country STRING,
+  ca_gmt_offset DECIMAL(5,2),
+  ca_location_type STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/customer_address'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.customer_demographics (
+  cd_demo_sk INT,
+  cd_gender STRING,
+  cd_marital_status STRING,
+  cd_education_status STRING,
+  cd_purchase_estimate INT,
+  cd_credit_rating STRING,
+  cd_dep_count INT,
+  cd_dep_employed_count INT,
+  cd_dep_college_count INT
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/customer_demographics'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.date_dim (
+  d_date_sk INT,
+  d_date_id STRING,
+  d_date STRING,
+  d_month_seq INT,
+  d_week_seq INT,
+  d_quarter_seq INT,
+  d_year INT,
+  d_dow INT,
+  d_moy INT,
+  d_dom INT,
+  d_qoy INT,
+  d_fy_year INT,
+  d_fy_quarter_seq INT,
+  d_fy_week_seq INT,
+  d_day_name STRING,
+  d_quarter_name STRING,
+  d_holiday STRING,
+  d_weekend STRING,
+  d_following_holiday STRING,
+  d_first_dom INT,
+  d_last_dom INT,
+  d_same_day_ly INT,
+  d_same_day_lq INT,
+  d_current_day STRING,
+  d_current_week STRING,
+  d_current_month STRING,
+  d_current_quarter STRING,
+  d_current_year STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/date_dim'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.household_demographics (
+  hd_demo_sk INT,
+  hd_income_band_sk INT,
+  hd_buy_potential STRING,
+  hd_dep_count INT,
+  hd_vehicle_count INT
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/household_demographics'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.income_band (
+  ib_income_band_sk INT,
+  ib_lower_bound INT,
+  ib_upper_bound INT
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/income_band'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.inventory (
+  inv_date_sk INT,
+  inv_item_sk BIGINT,
+  inv_warehouse_sk INT,
+  inv_quantity_on_hand INT
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/inventory'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.item (
+  i_item_sk BIGINT,
+  i_item_id STRING,
+  i_rec_start_date STRING,
+  i_rec_end_date STRING,
+  i_item_desc STRING,
+  i_current_price DECIMAL(7,2),
+  i_wholesale_cost DECIMAL(7,2),
+  i_brand_id INT,
+  i_brand STRING,
+  i_class_id INT,
+  i_class STRING,
+  i_category_id INT,
+  i_category STRING,
+  i_manufact_id INT,
+  i_manufact STRING,
+  i_size STRING,
+  i_formulation STRING,
+  i_color STRING,
+  i_units STRING,
+  i_container STRING,
+  i_manager_id INT,
+  i_product_name STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/item'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.promotion (
+  p_promo_sk INT,
+  p_promo_id STRING,
+  p_start_date_sk INT,
+  p_end_date_sk INT,
+  p_item_sk BIGINT,
+  p_cost DECIMAL(15,2),
+  p_response_target INT,
+  p_promo_name STRING,
+  p_channel_dmail STRING,
+  p_channel_email STRING,
+  p_channel_catalog STRING,
+  p_channel_tv STRING,
+  p_channel_radio STRING,
+  p_channel_press STRING,
+  p_channel_event STRING,
+  p_channel_demo STRING,
+  p_channel_details STRING,
+  p_purpose STRING,
+  p_discount_active STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/promotion'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.ship_mode (
+  sm_ship_mode_sk INT,
+  sm_ship_mode_id STRING,
+  sm_type STRING,
+  sm_code STRING,
+  sm_carrier STRING,
+  sm_contract STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/ship_mode'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.store (
+  s_store_sk INT,
+  s_store_id STRING,
+  s_rec_start_date STRING,
+  s_rec_end_date STRING,
+  s_closed_date_sk INT,
+  s_store_name STRING,
+  s_number_employees INT,
+  s_floor_space INT,
+  s_hours STRING,
+  s_manager STRING,
+  s_market_id INT,
+  s_geography_class STRING,
+  s_market_desc STRING,
+  s_market_manager STRING,
+  s_division_id INT,
+  s_division_name STRING,
+  s_company_id INT,
+  s_company_name STRING,
+  s_street_number STRING,
+  s_street_name STRING,
+  s_street_type STRING,
+  s_suite_number STRING,
+  s_city STRING,
+  s_county STRING,
+  s_state STRING,
+  s_zip STRING,
+  s_country STRING,
+  s_gmt_offset DECIMAL(5,2),
+  s_tax_precentage DECIMAL(5,2)
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/store'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.store_returns (
+  sr_returned_date_sk INT,
+  sr_return_time_sk INT,
+  sr_item_sk BIGINT,
+  sr_customer_sk INT,
+  sr_cdemo_sk INT,
+  sr_hdemo_sk INT,
+  sr_addr_sk INT,
+  sr_store_sk INT,
+  sr_reason_sk INT,
+  sr_ticket_number BIGINT,
+  sr_return_quantity INT,
+  sr_return_amt DECIMAL(7,2),
+  sr_return_tax DECIMAL(7,2),
+  sr_return_amt_inc_tax DECIMAL(7,2),
+  sr_fee DECIMAL(7,2),
+  sr_return_ship_cost DECIMAL(7,2),
+  sr_refunded_cash DECIMAL(7,2),
+  sr_reversed_charge DECIMAL(7,2),
+  sr_store_credit DECIMAL(7,2),
+  sr_net_loss DECIMAL(7,2)
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/store_returns'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.store_sales (
+  ss_sold_date_sk INT,
+  ss_sold_time_sk INT,
+  ss_item_sk BIGINT,
+  ss_customer_sk INT,
+  ss_cdemo_sk INT,
+  ss_hdemo_sk INT,
+  ss_addr_sk INT,
+  ss_store_sk INT,
+  ss_promo_sk INT,
+  ss_ticket_number BIGINT,
+  ss_quantity INT,
+  ss_wholesale_cost DECIMAL(7,2),
+  ss_list_price DECIMAL(7,2),
+  ss_sales_price DECIMAL(7,2),
+  ss_ext_discount_amt DECIMAL(7,2),
+  ss_ext_sales_price DECIMAL(7,2),
+  ss_ext_wholesale_cost DECIMAL(7,2),
+  ss_ext_list_price DECIMAL(7,2),
+  ss_ext_tax DECIMAL(7,2),
+  ss_coupon_amt DECIMAL(7,2),
+  ss_net_paid DECIMAL(7,2),
+  ss_net_paid_inc_tax DECIMAL(7,2),
+  ss_net_profit DECIMAL(7,2)
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/store_sales'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.time_dim (
+  t_time_sk INT,
+  t_time_id STRING,
+  t_time INT,
+  t_hour INT,
+  t_minute INT,
+  t_second INT,
+  t_am_pm STRING,
+  t_shift STRING,
+  t_sub_shift STRING,
+  t_meal_time STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/time_dim'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.warehouse (
+  w_warehouse_sk INT,
+  w_warehouse_id STRING,
+  w_warehouse_name STRING,
+  w_warehouse_sq_ft INT,
+  w_street_number STRING,
+  w_street_name STRING,
+  w_street_type STRING,
+  w_suite_number STRING,
+  w_city STRING,
+  w_county STRING,
+  w_state STRING,
+  w_zip STRING,
+  w_country STRING,
+  w_gmt_offset DECIMAL(5,2)
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/warehouse'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.web_page (
+  wp_web_page_sk INT,
+  wp_web_page_id STRING,
+  wp_rec_start_date STRING,
+  wp_rec_end_date STRING,
+  wp_creation_date_sk INT,
+  wp_access_date_sk INT,
+  wp_autogen_flag STRING,
+  wp_customer_sk INT,
+  wp_url STRING,
+  wp_type STRING,
+  wp_char_count INT,
+  wp_link_count INT,
+  wp_image_count INT,
+  wp_max_ad_count INT
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/web_page'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.web_returns (
+  wr_returned_date_sk INT,
+  wr_returned_time_sk INT,
+  wr_item_sk BIGINT,
+  wr_refunded_customer_sk INT,
+  wr_refunded_cdemo_sk INT,
+  wr_refunded_hdemo_sk INT,
+  wr_refunded_addr_sk INT,
+  wr_returning_customer_sk INT,
+  wr_returning_cdemo_sk INT,
+  wr_returning_hdemo_sk INT,
+  wr_returning_addr_sk INT,
+  wr_web_page_sk INT,
+  wr_reason_sk INT,
+  wr_order_number BIGINT,
+  wr_return_quantity INT,
+  wr_return_amt DECIMAL(7,2),
+  wr_return_tax DECIMAL(7,2),
+  wr_return_amt_inc_tax DECIMAL(7,2),
+  wr_fee DECIMAL(7,2),
+  wr_return_ship_cost DECIMAL(7,2),
+  wr_refunded_cash DECIMAL(7,2),
+  wr_reversed_charge DECIMAL(7,2),
+  wr_account_credit DECIMAL(7,2),
+  wr_net_loss DECIMAL(7,2)
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/web_returns'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.web_sales (
+  ws_sold_date_sk INT,
+  ws_sold_time_sk INT,
+  ws_ship_date_sk INT,
+  ws_item_sk BIGINT,
+  ws_bill_customer_sk INT,
+  ws_bill_cdemo_sk INT,
+  ws_bill_hdemo_sk INT,
+  ws_bill_addr_sk INT,
+  ws_ship_customer_sk INT,
+  ws_ship_cdemo_sk INT,
+  ws_ship_hdemo_sk INT,
+  ws_ship_addr_sk INT,
+  ws_web_page_sk INT,
+  ws_web_site_sk INT,
+  ws_ship_mode_sk INT,
+  ws_warehouse_sk INT,
+  ws_promo_sk INT,
+  ws_order_number BIGINT,
+  ws_quantity INT,
+  ws_wholesale_cost DECIMAL(7,2),
+  ws_list_price DECIMAL(7,2),
+  ws_sales_price DECIMAL(7,2),
+  ws_ext_discount_amt DECIMAL(7,2),
+  ws_ext_sales_price DECIMAL(7,2),
+  ws_ext_wholesale_cost DECIMAL(7,2),
+  ws_ext_list_price DECIMAL(7,2),
+  ws_ext_tax DECIMAL(7,2),
+  ws_coupon_amt DECIMAL(7,2),
+  ws_ext_ship_cost DECIMAL(7,2),
+  ws_net_paid DECIMAL(7,2),
+  ws_net_paid_inc_tax DECIMAL(7,2),
+  ws_net_paid_inc_ship DECIMAL(7,2),
+  ws_net_paid_inc_ship_tax DECIMAL(7,2),
+  ws_net_profit DECIMAL(7,2)
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/web_sales'
+TBLPROPERTIES('serialization.null.format'='');
+CREATE EXTERNAL TABLE tpcds_raw.web_site (
+  web_site_sk INT,
+  web_site_id STRING,
+  web_rec_start_date STRING,
+  web_rec_end_date STRING,
+  web_name STRING,
+  web_open_date_sk INT,
+  web_close_date_sk INT,
+  web_class STRING,
+  web_manager STRING,
+  web_mkt_id INT,
+  web_mkt_class STRING,
+  web_mkt_desc STRING,
+  web_market_manager STRING,
+  web_company_id INT,
+  web_company_name STRING,
+  web_street_number STRING,
+  web_street_name STRING,
+  web_street_type STRING,
+  web_suite_number STRING,
+  web_city STRING,
+  web_county STRING,
+  web_state STRING,
+  web_zip STRING,
+  web_country STRING,
+  web_gmt_offset DECIMAL(5,2),
+  web_tax_percentage DECIMAL(5,2)
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
+WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
+STORED AS TEXTFILE
+LOCATION '/user/hive/warehouse/external/tpcds_raw/web_site'
+TBLPROPERTIES('serialization.null.format'='');
+
+-- Compute stats on all the tables for optimal performance.
+COMPUTE STATS tpcds_raw.call_center;
+COMPUTE STATS tpcds_raw.catalog_page;
+COMPUTE STATS tpcds_raw.catalog_returns;
+COMPUTE STATS tpcds_raw.catalog_sales;
+COMPUTE STATS tpcds_raw.customer;
+COMPUTE STATS tpcds_raw.customer_address;
+COMPUTE STATS tpcds_raw.customer_demographics;
+COMPUTE STATS tpcds_raw.date_dim;
+COMPUTE STATS tpcds_raw.household_demographics;
+COMPUTE STATS tpcds_raw.income_band;
+COMPUTE STATS tpcds_raw.inventory;
+COMPUTE STATS tpcds_raw.item;
+COMPUTE STATS tpcds_raw.promotion;
+COMPUTE STATS tpcds_raw.ship_mode;
+COMPUTE STATS tpcds_raw.store;
+COMPUTE STATS tpcds_raw.store_returns;
+COMPUTE STATS tpcds_raw.store_sales;
+COMPUTE STATS tpcds_raw.time_dim;
+COMPUTE STATS tpcds_raw.warehouse;
+COMPUTE STATS tpcds_raw.web_page;
+COMPUTE STATS tpcds_raw.web_returns;
+COMPUTE STATS tpcds_raw.web_sales;
+COMPUTE STATS tpcds_raw.web_site;
+
+-- Create Parquet tables based on text tables.
+CREATE DATABASE IF NOT EXISTS tpcds_parquet;
+DROP TABLE IF EXISTS tpcds_parquet.call_center;
+DROP TABLE IF EXISTS tpcds_parquet.catalog_page;
+DROP TABLE IF EXISTS tpcds_parquet.catalog_returns;
+DROP TABLE IF EXISTS tpcds_parquet.catalog_sales;
+DROP TABLE IF EXISTS tpcds_parquet.customer;
+DROP TABLE IF EXISTS tpcds_parquet.customer_address;
+DROP TABLE IF EXISTS tpcds_parquet.customer_demographics;
+DROP TABLE IF EXISTS tpcds_parquet.date_dim;
+DROP TABLE IF EXISTS tpcds_parquet.household_demographics;
+DROP TABLE IF EXISTS tpcds_parquet.income_band;
+DROP TABLE IF EXISTS tpcds_parquet.inventory;
+DROP TABLE IF EXISTS tpcds_parquet.item;
+DROP TABLE IF EXISTS tpcds_parquet.promotion;
+DROP TABLE IF EXISTS tpcds_parquet.ship_mode;
+DROP TABLE IF EXISTS tpcds_parquet.store;
+DROP TABLE IF EXISTS tpcds_parquet.store_returns;
+DROP TABLE IF EXISTS tpcds_parquet.store_sales;
+DROP TABLE IF EXISTS tpcds_parquet.time_dim;
+DROP TABLE IF EXISTS tpcds_parquet.warehouse;
+DROP TABLE IF EXISTS tpcds_parquet.web_page;
+DROP TABLE IF EXISTS tpcds_parquet.web_returns;
+DROP TABLE IF EXISTS tpcds_parquet.web_sales;
+DROP TABLE IF EXISTS tpcds_parquet.web_site;
+
+-- TODO: add sort by hints for better clustering.
+CREATE TABLE tpcds_parquet.call_center (
+  cc_call_center_sk INT,
+  cc_call_center_id STRING,
+  cc_rec_start_date STRING,
+  cc_rec_end_date STRING,
+  cc_closed_date_sk INT,
+  cc_open_date_sk INT,
+  cc_name STRING,
+  cc_class STRING,
+  cc_employees INT,
+  cc_sq_ft INT,
+  cc_hours STRING,
+  cc_manager STRING,
+  cc_mkt_id INT,
+  cc_mkt_class STRING,
+  cc_mkt_desc STRING,
+  cc_market_manager STRING,
+  cc_division INT,
+  cc_division_name STRING,
+  cc_company INT,
+  cc_company_name STRING,
+  cc_street_number STRING,
+  cc_street_name STRING,
+  cc_street_type STRING,
+  cc_suite_number STRING,
+  cc_city STRING,
+  cc_county STRING,
+  cc_state STRING,
+  cc_zip STRING,
+  cc_country STRING,
+  cc_gmt_offset DECIMAL(5,2),
+  cc_tax_percentage DECIMAL(5,2)
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.catalog_page (
+  cp_catalog_page_sk INT,
+  cp_catalog_page_id STRING,
+  cp_start_date_sk INT,
+  cp_end_date_sk INT,
+  cp_department STRING,
+  cp_catalog_number INT,
+  cp_catalog_page_number INT,
+  cp_description STRING,
+  cp_type STRING
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.catalog_returns (
+  cr_returned_date_sk INT,
+  cr_returned_time_sk INT,
+  cr_item_sk BIGINT,
+  cr_refunded_customer_sk INT,
+  cr_refunded_cdemo_sk INT,
+  cr_refunded_hdemo_sk INT,
+  cr_refunded_addr_sk INT,
+  cr_returning_customer_sk INT,
+  cr_returning_cdemo_sk INT,
+  cr_returning_hdemo_sk INT,
+  cr_returning_addr_sk INT,
+  cr_call_center_sk INT,
+  cr_catalog_page_sk INT,
+  cr_ship_mode_sk INT,
+  cr_warehouse_sk INT,
+  cr_reason_sk INT,
+  cr_order_number BIGINT,
+  cr_return_quantity INT,
+  cr_return_amount DECIMAL(7,2),
+  cr_return_tax DECIMAL(7,2),
+  cr_return_amt_inc_tax DECIMAL(7,2),
+  cr_fee DECIMAL(7,2),
+  cr_return_ship_cost DECIMAL(7,2),
+  cr_refunded_cash DECIMAL(7,2),
+  cr_reversed_charge DECIMAL(7,2),
+  cr_store_credit DECIMAL(7,2),
+  cr_net_loss DECIMAL(7,2)
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.catalog_sales (
+  cs_sold_date_sk INT,
+  cs_sold_time_sk INT,
+  cs_ship_date_sk INT,
+  cs_bill_customer_sk INT,
+  cs_bill_cdemo_sk INT,
+  cs_bill_hdemo_sk INT,
+  cs_bill_addr_sk INT,
+  cs_ship_customer_sk INT,
+  cs_ship_cdemo_sk INT,
+  cs_ship_hdemo_sk INT,
+  cs_ship_addr_sk INT,
+  cs_call_center_sk INT,
+  cs_catalog_page_sk INT,
+  cs_ship_mode_sk INT,
+  cs_warehouse_sk INT,
+  cs_item_sk BIGINT,
+  cs_promo_sk INT,
+  cs_order_number BIGINT,
+  cs_quantity INT,
+  cs_wholesale_cost DECIMAL(7,2),
+  cs_list_price DECIMAL(7,2),
+  cs_sales_price DECIMAL(7,2),
+  cs_ext_discount_amt DECIMAL(7,2),
+  cs_ext_sales_price DECIMAL(7,2),
+  cs_ext_wholesale_cost DECIMAL(7,2),
+  cs_ext_list_price DECIMAL(7,2),
+  cs_ext_tax DECIMAL(7,2),
+  cs_coupon_amt DECIMAL(7,2),
+  cs_ext_ship_cost DECIMAL(7,2),
+  cs_net_paid DECIMAL(7,2),
+  cs_net_paid_inc_tax DECIMAL(7,2),
+  cs_net_paid_inc_ship DECIMAL(7,2),
+  cs_net_paid_inc_ship_tax DECIMAL(7,2),
+  cs_net_profit DECIMAL(7,2)
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.customer (
+  c_customer_sk INT,
+  c_customer_id STRING,
+  c_current_cdemo_sk INT,
+  c_current_hdemo_sk INT,
+  c_current_addr_sk INT,
+  c_first_shipto_date_sk INT,
+  c_first_sales_date_sk INT,
+  c_salutation STRING,
+  c_first_name STRING,
+  c_last_name STRING,
+  c_preferred_cust_flag STRING,
+  c_birth_day INT,
+  c_birth_month INT,
+  c_birth_year INT,
+  c_birth_country STRING,
+  c_login STRING,
+  c_email_address STRING,
+  c_last_review_date STRING
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.customer_address (
+  ca_address_sk INT,
+  ca_address_id STRING,
+  ca_street_number STRING,
+  ca_street_name STRING,
+  ca_street_type STRING,
+  ca_suite_number STRING,
+  ca_city STRING,
+  ca_county STRING,
+  ca_state STRING,
+  ca_zip STRING,
+  ca_country STRING,
+  ca_gmt_offset DECIMAL(5,2),
+  ca_location_type STRING
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.customer_demographics (
+  cd_demo_sk INT,
+  cd_gender STRING,
+  cd_marital_status STRING,
+  cd_education_status STRING,
+  cd_purchase_estimate INT,
+  cd_credit_rating STRING,
+  cd_dep_count INT,
+  cd_dep_employed_count INT,
+  cd_dep_college_count INT
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.date_dim (
+  d_date_sk INT,
+  d_date_id STRING,
+  d_date STRING,
+  d_month_seq INT,
+  d_week_seq INT,
+  d_quarter_seq INT,
+  d_year INT,
+  d_dow INT,
+  d_moy INT,
+  d_dom INT,
+  d_qoy INT,
+  d_fy_year INT,
+  d_fy_quarter_seq INT,
+  d_fy_week_seq INT,
+  d_day_name STRING,
+  d_quarter_name STRING,
+  d_holiday STRING,
+  d_weekend STRING,
+  d_following_holiday STRING,
+  d_first_dom INT,
+  d_last_dom INT,
+  d_same_day_ly INT,
+  d_same_day_lq INT,
+  d_current_day STRING,
+  d_current_week STRING,
+  d_current_month STRING,
+  d_current_quarter STRING,
+  d_current_year STRING
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.household_demographics (
+  hd_demo_sk INT,
+  hd_income_band_sk INT,
+  hd_buy_potential STRING,
+  hd_dep_count INT,
+  hd_vehicle_count INT
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.income_band (
+  ib_income_band_sk INT,
+  ib_lower_bound INT,
+  ib_upper_bound INT
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.inventory (
+  inv_date_sk INT,
+  inv_item_sk BIGINT,
+  inv_warehouse_sk INT,
+  inv_quantity_on_hand INT
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.item (
+  i_item_sk BIGINT,
+  i_item_id STRING,
+  i_rec_start_date STRING,
+  i_rec_end_date STRING,
+  i_item_desc STRING,
+  i_current_price DECIMAL(7,2),
+  i_wholesale_cost DECIMAL(7,2),
+  i_brand_id INT,
+  i_brand STRING,
+  i_class_id INT,
+  i_class STRING,
+  i_category_id INT,
+  i_category STRING,
+  i_manufact_id INT,
+  i_manufact STRING,
+  i_size STRING,
+  i_formulation STRING,
+  i_color STRING,
+  i_units STRING,
+  i_container STRING,
+  i_manager_id INT,
+  i_product_name STRING
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.promotion (
+  p_promo_sk INT,
+  p_promo_id STRING,
+  p_start_date_sk INT,
+  p_end_date_sk INT,
+  p_item_sk BIGINT,
+  p_cost DECIMAL(15,2),
+  p_response_target INT,
+  p_promo_name STRING,
+  p_channel_dmail STRING,
+  p_channel_email STRING,
+  p_channel_catalog STRING,
+  p_channel_tv STRING,
+  p_channel_radio STRING,
+  p_channel_press STRING,
+  p_channel_event STRING,
+  p_channel_demo STRING,
+  p_channel_details STRING,
+  p_purpose STRING,
+  p_discount_active STRING
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.ship_mode (
+  sm_ship_mode_sk INT,
+  sm_ship_mode_id STRING,
+  sm_type STRING,
+  sm_code STRING,
+  sm_carrier STRING,
+  sm_contract STRING
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.store (
+  s_store_sk INT,
+  s_store_id STRING,
+  s_rec_start_date STRING,
+  s_rec_end_date STRING,
+  s_closed_date_sk INT,
+  s_store_name STRING,
+  s_number_employees INT,
+  s_floor_space INT,
+  s_hours STRING,
+  s_manager STRING,
+  s_market_id INT,
+  s_geography_class STRING,
+  s_market_desc STRING,
+  s_market_manager STRING,
+  s_division_id INT,
+  s_division_name STRING,
+  s_company_id INT,
+  s_company_name STRING,
+  s_street_number STRING,
+  s_street_name STRING,
+  s_street_type STRING,
+  s_suite_number STRING,
+  s_city STRING,
+  s_county STRING,
+  s_state STRING,
+  s_zip STRING,
+  s_country STRING,
+  s_gmt_offset DECIMAL(5,2),
+  s_tax_precentage DECIMAL(5,2)
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.store_returns (
+  sr_returned_date_sk INT,
+  sr_return_time_sk INT,
+  sr_item_sk BIGINT,
+  sr_customer_sk INT,
+  sr_cdemo_sk INT,
+  sr_hdemo_sk INT,
+  sr_addr_sk INT,
+  sr_store_sk INT,
+  sr_reason_sk INT,
+  sr_ticket_number BIGINT,
+  sr_return_quantity INT,
+  sr_return_amt DECIMAL(7,2),
+  sr_return_tax DECIMAL(7,2),
+  sr_return_amt_inc_tax DECIMAL(7,2),
+  sr_fee DECIMAL(7,2),
+  sr_return_ship_cost DECIMAL(7,2),
+  sr_refunded_cash DECIMAL(7,2),
+  sr_reversed_charge DECIMAL(7,2),
+  sr_store_credit DECIMAL(7,2),
+  sr_net_loss DECIMAL(7,2)
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.store_sales (
+  ss_sold_time_sk INT,
+  ss_item_sk BIGINT,
+  ss_customer_sk INT,
+  ss_cdemo_sk INT,
+  ss_hdemo_sk INT,
+  ss_addr_sk INT,
+  ss_store_sk INT,
+  ss_promo_sk INT,
+  ss_ticket_number BIGINT,
+  ss_quantity INT,
+  ss_wholesale_cost DECIMAL(7,2),
+  ss_list_price DECIMAL(7,2),
+  ss_sales_price DECIMAL(7,2),
+  ss_ext_discount_amt DECIMAL(7,2),
+  ss_ext_sales_price DECIMAL(7,2),
+  ss_ext_wholesale_cost DECIMAL(7,2),
+  ss_ext_list_price DECIMAL(7,2),
+  ss_ext_tax DECIMAL(7,2),
+  ss_coupon_amt DECIMAL(7,2),
+  ss_net_paid DECIMAL(7,2),
+  ss_net_paid_inc_tax DECIMAL(7,2),
+  ss_net_profit DECIMAL(7,2)
+)
+PARTITIONED BY (
+  ss_sold_date_sk INT
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.time_dim (
+  t_time_sk INT,
+  t_time_id STRING,
+  t_time INT,
+  t_hour INT,
+  t_minute INT,
+  t_second INT,
+  t_am_pm STRING,
+  t_shift STRING,
+  t_sub_shift STRING,
+  t_meal_time STRING
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.warehouse (
+  w_warehouse_sk INT,
+  w_warehouse_id STRING,
+  w_warehouse_name STRING,
+  w_warehouse_sq_ft INT,
+  w_street_number STRING,
+  w_street_name STRING,
+  w_street_type STRING,
+  w_suite_number STRING,
+  w_city STRING,
+  w_county STRING,
+  w_state STRING,
+  w_zip STRING,
+  w_country STRING,
+  w_gmt_offset DECIMAL(5,2)
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.web_page (
+  wp_web_page_sk INT,
+  wp_web_page_id STRING,
+  wp_rec_start_date STRING,
+  wp_rec_end_date STRING,
+  wp_creation_date_sk INT,
+  wp_access_date_sk INT,
+  wp_autogen_flag STRING,
+  wp_customer_sk INT,
+  wp_url STRING,
+  wp_type STRING,
+  wp_char_count INT,
+  wp_link_count INT,
+  wp_image_count INT,
+  wp_max_ad_count INT
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.web_returns (
+  wr_returned_date_sk INT,
+  wr_returned_time_sk INT,
+  wr_item_sk BIGINT,
+  wr_refunded_customer_sk INT,
+  wr_refunded_cdemo_sk INT,
+  wr_refunded_hdemo_sk INT,
+  wr_refunded_addr_sk INT,
+  wr_returning_customer_sk INT,
+  wr_returning_cdemo_sk INT,
+  wr_returning_hdemo_sk INT,
+  wr_returning_addr_sk INT,
+  wr_web_page_sk INT,
+  wr_reason_sk INT,
+  wr_order_number BIGINT,
+  wr_return_quantity INT,
+  wr_return_amt DECIMAL(7,2),
+  wr_return_tax DECIMAL(7,2),
+  wr_return_amt_inc_tax DECIMAL(7,2),
+  wr_fee DECIMAL(7,2),
+  wr_return_ship_cost DECIMAL(7,2),
+  wr_refunded_cash DECIMAL(7,2),
+  wr_reversed_charge DECIMAL(7,2),
+  wr_account_credit DECIMAL(7,2),
+  wr_net_loss DECIMAL(7,2)
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.web_sales (
+  ws_sold_date_sk INT,
+  ws_sold_time_sk INT,
+  ws_ship_date_sk INT,
+  ws_item_sk BIGINT,
+  ws_bill_customer_sk INT,
+  ws_bill_cdemo_sk INT,
+  ws_bill_hdemo_sk INT,
+  ws_bill_addr_sk INT,
+  ws_ship_customer_sk INT,
+  ws_ship_cdemo_sk INT,
+  ws_ship_hdemo_sk INT,
+  ws_ship_addr_sk INT,
+  ws_web_page_sk INT,
+  ws_web_site_sk INT,
+  ws_ship_mode_sk INT,
+  ws_warehouse_sk INT,
+  ws_promo_sk INT,
+  ws_order_number BIGINT,
+  ws_quantity INT,
+  ws_wholesale_cost DECIMAL(7,2),
+  ws_list_price DECIMAL(7,2),
+  ws_sales_price DECIMAL(7,2),
+  ws_ext_discount_amt DECIMAL(7,2),
+  ws_ext_sales_price DECIMAL(7,2),
+  ws_ext_wholesale_cost DECIMAL(7,2),
+  ws_ext_list_price DECIMAL(7,2),
+  ws_ext_tax DECIMAL(7,2),
+  ws_coupon_amt DECIMAL(7,2),
+  ws_ext_ship_cost DECIMAL(7,2),
+  ws_net_paid DECIMAL(7,2),
+  ws_net_paid_inc_tax DECIMAL(7,2),
+  ws_net_paid_inc_ship DECIMAL(7,2),
+  ws_net_paid_inc_ship_tax DECIMAL(7,2),
+  ws_net_profit DECIMAL(7,2)
+)
+STORED AS PARQUET;
+CREATE TABLE tpcds_parquet.web_site (
+  web_site_sk INT,
+  web_site_id STRING,
+  web_rec_start_date STRING,
+  web_rec_end_date STRING,
+  web_name STRING,
+  web_open_date_sk INT,
+  web_close_date_sk INT,
+  web_class STRING,
+  web_manager STRING,
+  web_mkt_id INT,
+  web_mkt_class STRING,
+  web_mkt_desc STRING,
+  web_market_manager STRING,
+  web_company_id INT,
+  web_company_name STRING,
+  web_street_number STRING,
+  web_street_name STRING,
+  web_street_type STRING,
+  web_suite_number STRING,
+  web_city STRING,
+  web_county STRING,
+  web_state STRING,
+  web_zip STRING,
+  web_country STRING,
+  web_gmt_offset DECIMAL(5,2),
+  web_tax_percentage DECIMAL(5,2)
+)
+STORED AS PARQUET;
+
+
+INSERT INTO tpcds_parquet.call_center SELECT * FROM tpcds_raw.call_center;
+INSERT INTO tpcds_parquet.catalog_page SELECT * FROM tpcds_raw.catalog_page;
+INSERT INTO tpcds_parquet.catalog_returns SELECT * FROM 
tpcds_raw.catalog_returns;
+INSERT INTO tpcds_parquet.catalog_sales SELECT * FROM tpcds_raw.catalog_sales;
+INSERT INTO tpcds_parquet.customer SELECT * FROM tpcds_raw.customer;
+INSERT INTO tpcds_parquet.customer_address SELECT * FROM 
tpcds_raw.customer_address;
+INSERT INTO tpcds_parquet.customer_demographics SELECT * FROM 
tpcds_raw.customer_demographics;
+INSERT INTO tpcds_parquet.date_dim SELECT * FROM tpcds_raw.date_dim;
+INSERT INTO tpcds_parquet.household_demographics SELECT * FROM 
tpcds_raw.household_demographics;
+INSERT INTO tpcds_parquet.income_band SELECT * FROM tpcds_raw.income_band;
+INSERT INTO tpcds_parquet.inventory SELECT * FROM tpcds_raw.inventory;
+INSERT INTO tpcds_parquet.item SELECT * FROM tpcds_raw.item;
+INSERT INTO tpcds_parquet.promotion SELECT * FROM tpcds_raw.promotion;
+INSERT INTO tpcds_parquet.ship_mode SELECT * FROM tpcds_raw.ship_mode;
+INSERT INTO tpcds_parquet.store SELECT * FROM tpcds_raw.store;
+INSERT INTO tpcds_parquet.store_returns SELECT * FROM tpcds_raw.store_returns;
+INSERT INTO tpcds_parquet.time_dim SELECT * FROM tpcds_raw.time_dim;
+INSERT INTO tpcds_parquet.warehouse SELECT * FROM tpcds_raw.warehouse;
+INSERT INTO tpcds_parquet.web_page SELECT * FROM tpcds_raw.web_page;
+INSERT INTO tpcds_parquet.web_returns SELECT * FROM tpcds_raw.web_returns;
+INSERT INTO tpcds_parquet.web_sales SELECT * FROM tpcds_raw.web_sales;
+INSERT INTO tpcds_parquet.web_site SELECT * FROM tpcds_raw.web_site;
+
+INSERT INTO tpcds_parquet.store_sales PARTITION(ss_sold_date_sk)
+SELECT ss_sold_time_sk,
+  ss_item_sk,
+  ss_customer_sk,
+  ss_cdemo_sk,
+  ss_hdemo_sk,
+  ss_addr_sk,
+  ss_store_sk,
+  ss_promo_sk,
+  ss_ticket_number,
+  ss_quantity,
+  ss_wholesale_cost,
+  ss_list_price,
+  ss_sales_price,
+  ss_ext_discount_amt,
+  ss_ext_sales_price,
+  ss_ext_wholesale_cost,
+  ss_ext_list_price,
+  ss_ext_tax,
+  ss_coupon_amt,
+  ss_net_paid,
+  ss_net_paid_inc_tax,
+  ss_net_profit,
+  ss_sold_date_sk
+FROM tpcds_raw.store_sales;
+
+
+-- Compute stats on all the tables for optimal performance.
+COMPUTE STATS tpcds_parquet.call_center;
+COMPUTE STATS tpcds_parquet.catalog_page;
+COMPUTE STATS tpcds_parquet.catalog_returns;
+COMPUTE STATS tpcds_parquet.catalog_sales;
+COMPUTE STATS tpcds_parquet.customer;
+COMPUTE STATS tpcds_parquet.customer_address;
+COMPUTE STATS tpcds_parquet.customer_demographics;
+COMPUTE STATS tpcds_parquet.date_dim;
+COMPUTE STATS tpcds_parquet.household_demographics;
+COMPUTE STATS tpcds_parquet.income_band;
+COMPUTE STATS tpcds_parquet.inventory;
+COMPUTE STATS tpcds_parquet.item;
+COMPUTE STATS tpcds_parquet.promotion;
+COMPUTE STATS tpcds_parquet.ship_mode;
+COMPUTE STATS tpcds_parquet.store;
+COMPUTE STATS tpcds_parquet.store_returns;
+COMPUTE STATS tpcds_parquet.store_sales;
+COMPUTE STATS tpcds_parquet.time_dim;
+COMPUTE STATS tpcds_parquet.warehouse;
+COMPUTE STATS tpcds_parquet.web_page;
+COMPUTE STATS tpcds_parquet.web_returns;
+COMPUTE STATS tpcds_parquet.web_sales;
+COMPUTE STATS tpcds_parquet.web_site;
diff --git a/docker/quickstart_conf/hive-site.xml 
b/docker/quickstart_conf/hive-site.xml
new file mode 100644
index 0000000..aa41bf5
--- /dev/null
+++ b/docker/quickstart_conf/hive-site.xml
@@ -0,0 +1,74 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<!--
+Hive configuration for Impala quickstart docker cluster.
+-->
+<configuration>
+      <property>
+        <!-- Required for automatic metadata sync. -->
+        <name>hive.metastore.dml.events</name>
+        <value>true</value>
+      </property>
+      <property>
+        <!-- User impala is not authorized to consume notifications by 
default, disable
+             authentication to work around this. -->
+         <name>hive.metastore.event.db.notification.api.auth</name>
+        <value>false</value>
+      </property>
+      <property>
+        <name>hive.metastore.uris</name>
+        <value>thrift://quickstart-hive-metastore:9083</value>
+      </property>
+      <!-- Managed and external tablespaces must live on the Docker volumes 
that we
+           configure for the quickstart cluster. -->
+      <property>
+        <name>hive.metastore.warehouse.dir</name>
+        <value>/user/hive/warehouse/managed</value>
+      </property>
+      <property>
+        <name>hive.metastore.warehouse.external.dir</name>
+        <value>/user/hive/warehouse/external</value>
+      </property>
+      <property>
+        <!-- Required to enable Hive transactions -->
+        <name>hive.support.concurrency</name>
+        <value>true</value>
+      </property>
+      <property>
+        <!-- Required to enable Hive transactions -->
+        <name>hive.txn.manager</name>
+        <value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
+      </property>
+      <property>
+        <!-- Use embedded Derby database -->
+        <name>javax.jdo.option.ConnectionDriverName</name>
+        <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+      </property>
+      <property>
+        <!-- Use embedded Derby database -->
+        <name>javax.jdo.option.ConnectionURL</name>
+        
<value>jdbc:derby:;databaseName=/var/lib/hive/metastore/metastore_db;create=true</value>
+      </property>
+      <!-- Hive stats autogathering negatively affects latency of DDL 
operations, etc and
+           is not particularly useful for Impala -->
+      <property>
+        <name>hive.stats.autogather</name>
+        <value>false</value>
+      </property>
+</configuration>
diff --git a/docker/quickstart_hms/Dockerfile b/docker/quickstart_hms/Dockerfile
new file mode 100644
index 0000000..7807ce2
--- /dev/null
+++ b/docker/quickstart_hms/Dockerfile
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Build a base HMS image for quickstart purposes.
+
+ARG BASE_IMAGE=ubuntu:16.04
+FROM ${BASE_IMAGE}
+
+# Common label arguments.
+ARG MAINTAINER
+ARG URL
+ARG VCS_REF
+ARG VCS_TYPE
+ARG VCS_URL
+ARG VERSION
+
+RUN apt-get update && \
+  apt-get install -y openjdk-8-jre-headless \
+  sudo netcat-openbsd less curl iproute2 vim iputils-ping \
+  tzdata krb5-user && \
+  apt-get clean && \
+  rm -rf /var/lib/apt/lists/*
+
+# Use a non-privileged hive user to run the daemons in the container.
+# That user should own everything in the /opt/hive and /var/lib/hive 
subdirectories
+# We use uid/gid 1000 to match the impala user in other containers so that it 
has
+# ownership over any files/directories in docker volumes.
+RUN groupadd -r hive -g 1000 && useradd --no-log-init -r -u 1000 -g 1000 hive 
&& \
+    mkdir -p /opt/hive && chown hive /opt/hive && \
+    mkdir -p /var/lib/hive && chown hive /var/lib/hive && \
+    chmod ugo+w /etc/passwd
+USER hive
+
+# Copy the Hive install.
+WORKDIR /opt/hive
+COPY --chown=hive hive /opt/hive
+COPY --chown=hive hadoop /opt/hadoop
+COPY --chown=hive hms-entrypoint.sh /hms-entrypoint.sh
+
+USER hive
+
+# Add the entrypoint.
+ENTRYPOINT ["/hms-entrypoint.sh"]
+
+LABEL name="Apache Impala HMS Quickstart" \
+      description="Basic HMS image for Impala quickstart." \
+      # Common labels.
+      org.label-schema.maintainer=$MAINTAINER \
+      org.label-schema.url=$URL \
+      org.label-schema.vcs-ref=$VCS_REF \
+      org.label-schema.vcs-type=$VCS_TYPE \
+      org.label-schema.vcs-url=$VCS_URL \
+      org.label-schema.version=$VERSION
diff --git a/docker/quickstart_hms/hms-entrypoint.sh 
b/docker/quickstart_hms/hms-entrypoint.sh
new file mode 100755
index 0000000..3e404f7
--- /dev/null
+++ b/docker/quickstart_hms/hms-entrypoint.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+################################################################################
+#
+# This script follows the pattern described in the docker best practices here:
+# 
https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#entrypoint
+################################################################################
+
+set -euo pipefail
+
+DATA_DIR="/var/lib/hive"
+LOG_DIR="$DATA_DIR/logs"
+HIVE_HOME="/opt/hive"
+export JAVA_HOME=$(which java | xargs readlink -f | sed "s:/bin/java::")
+export HADOOP_HOME=/opt/hadoop
+
+function print_help {
+  echo "Supported commands:"
+  echo "   hms           - start the hive metastore service"
+  echo "   help          - print useful information and exit"
+  echo ""
+  echo "Other commands can be specified to run shell commands."
+}
+
+function run_hive_metastore() {
+  # If the derby files do not exist, then initialize the schema.
+  if [ ! -d "${DATA_DIR}/metastore/metastore_db" ]; then
+    $HIVE_HOME/bin/schematool -dbType derby -initSchema
+  fi
+  # Start the Hive Metastore.
+  exec $HIVE_HOME/bin/hive --service metastore
+}
+
+# If no arguments are passed, print the help.
+if [[ $# -eq 0 ]]; then
+  print_help
+  exit 1
+fi
+
+mkdir -p $DATA_DIR
+mkdir -p $LOG_DIR
+if [[ "$1" == "hms" ]]; then
+  mkdir -p $DATA_DIR
+  mkdir -p $LOG_DIR
+  run_hive_metastore
+  exit 0
+elif [[ "$1" == "help" ]]; then
+  print_help
+  exit 0
+fi
+# Support calling anything else in the container.
+exec "$@"

Reply via email to