This is an automated email from the ASF dual-hosted git repository.
chamikara pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new ed39cbbf709 beam-sql.sh, a standalone launcher for Beam SQL Shell
(#36305)
ed39cbbf709 is described below
commit ed39cbbf7098b6f027af870734e230f878317af1
Author: Talat UYARER <[email protected]>
AuthorDate: Tue Oct 14 16:41:52 2025 -0700
beam-sql.sh, a standalone launcher for Beam SQL Shell (#36305)
* Initial version of Beam SQL cli
* Added last 10 version list
* Updated Licence Header and add doc for beam-sql.sh
* refactoring the Beam SQL shell to make more accessible, reliable, and
maintainable
* Apply suggestion from @gemini-code-assist[bot]
Co-authored-by: gemini-code-assist[bot]
<176961590+gemini-code-assist[bot]@users.noreply.github.com>
* Apply suggestion from @gemini-code-assist[bot]
Co-authored-by: gemini-code-assist[bot]
<176961590+gemini-code-assist[bot]@users.noreply.github.com>
* Addressed Cham comments.
---------
Co-authored-by: gemini-code-assist[bot]
<176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
release/src/main/scripts/set_version.sh | 3 +
scripts/beam-sql.sh | 448 +++++++++++++++++++++
.../content/en/documentation/dsls/sql/shell.md | 114 +++++-
3 files changed, 547 insertions(+), 18 deletions(-)
diff --git a/release/src/main/scripts/set_version.sh
b/release/src/main/scripts/set_version.sh
index 73ca298c133..138275f20a3 100755
--- a/release/src/main/scripts/set_version.sh
+++ b/release/src/main/scripts/set_version.sh
@@ -91,6 +91,7 @@ if [[ -z "$IS_SNAPSHOT_VERSION" ]] ; then
sed -i -e "s/sdk_version=.*/sdk_version=$TARGET_VERSION/" gradle.properties
sed -i -e "s/SdkVersion = .*/SdkVersion = \"$TARGET_VERSION\"/"
sdks/go/pkg/beam/core/core.go
sed -i -e "s/\"version\": .*/\"version\": \"$TARGET_VERSION\",/"
sdks/typescript/package.json
+ sed -i -e
"s/DEFAULT_BEAM_VERSION=\".*\"/DEFAULT_BEAM_VERSION=\"$TARGET_VERSION\"/"
scripts/beam-sql.sh
else
# For snapshot version:
# Java/gradle appends -SNAPSHOT
@@ -103,6 +104,7 @@ else
sed -i -e "s/sdk_version=.*/sdk_version=$TARGET_VERSION.dev/"
gradle.properties
sed -i -e "s/SdkVersion = .*/SdkVersion = \"${TARGET_VERSION}.dev\"/"
sdks/go/pkg/beam/core/core.go
sed -i -e "s/\"version\": .*/\"version\": \"$TARGET_VERSION-SNAPSHOT\",/"
sdks/typescript/package.json
+ sed -i -e
"s/DEFAULT_BEAM_VERSION=\".*\"/DEFAULT_BEAM_VERSION=\"$TARGET_VERSION\"/"
scripts/beam-sql.sh
fi
if [[ "$GIT_ADD" == yes ]] ; then
@@ -112,4 +114,5 @@ if [[ "$GIT_ADD" == yes ]] ; then
git add sdks/go/pkg/beam/core/core.go
git add runners/google-cloud-dataflow-java/build.gradle
git add sdks/typescript/package.json
+ git add scripts/beam-sql.sh
fi
diff --git a/scripts/beam-sql.sh b/scripts/beam-sql.sh
new file mode 100755
index 00000000000..401cd471c08
--- /dev/null
+++ b/scripts/beam-sql.sh
@@ -0,0 +1,448 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# A simple launcher for the Apache Beam SQL Shell.
+# This script builds a self-contained JAR with all dependencies using Maven,
+# which correctly handles service loading for IOs, and caches the JAR.
+set -e # Exit immediately if a command exits with a non-zero status.
+
+# --- Configuration ---
+DEFAULT_BEAM_VERSION="2.67.0"
+MAIN_CLASS="org.apache.beam.sdk.extensions.sql.jdbc.BeamSqlLine"
+# Directory to store cached executable JAR files
+CACHE_DIR="${HOME}/.beam/cache"
+
+# Maven Wrapper Configuration
+MAVEN_WRAPPER_VERSION="3.2.0"
+MAVEN_VERSION="3.9.6"
+MAVEN_WRAPPER_SCRIPT_URL="https://raw.githubusercontent.com/apache/maven-wrapper/refs/tags/maven-wrapper-${MAVEN_WRAPPER_VERSION}/maven-wrapper-distribution/src/resources/mvnw"
+MAVEN_WRAPPER_JAR_URL="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/${MAVEN_WRAPPER_VERSION}/maven-wrapper-${MAVEN_WRAPPER_VERSION}.jar"
+MAVEN_DISTRIBUTION_URL="https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/${MAVEN_VERSION}/apache-maven-${MAVEN_VERSION}-bin.zip"
+
+# Maven Plugin Configuration
+MAVEN_SHADE_PLUGIN_VERSION="3.5.1"
+mkdir -p "${CACHE_DIR}"
+
+# Create a temporary directory for our Maven project.
+WORK_DIR=$(mktemp -d)
+
+# Ensure cleanup on script exit
+cleanup() {
+ if [ -n "${WORK_DIR}" ] && [ -d "${WORK_DIR}" ]; then
+ rm -rf "${WORK_DIR}"
+ fi
+}
+trap cleanup EXIT
+
+# --- Helper Functions ---
+# This function downloads the maven wrapper script and supporting files.
+function setup_maven_wrapper() {
+ local beam_dir="${HOME}/.beam"
+ local maven_wrapper_dir="${beam_dir}/maven-wrapper"
+ local mvnw_script="${maven_wrapper_dir}/mvnw"
+ local wrapper_jar="${maven_wrapper_dir}/.mvn/wrapper/maven-wrapper.jar"
+ local
wrapper_props="${maven_wrapper_dir}/.mvn/wrapper/maven-wrapper.properties"
+
+ # Check if Maven wrapper is already cached
+ if [ -f "${mvnw_script}" ] && [ -f "${wrapper_jar}" ] && [ -f
"${wrapper_props}" ]; then
+ echo "🔧 Using cached Maven Wrapper from ${maven_wrapper_dir}"
+ # Use the cached wrapper directly
+ MAVEN_CMD="${mvnw_script}"
+ return
+ fi
+
+ echo "🔧 Downloading Maven Wrapper for the first time..."
+ mkdir -p "${maven_wrapper_dir}/.mvn/wrapper"
+
+ # Create the properties file to specify a modern Maven version
+ echo "distributionUrl=${MAVEN_DISTRIBUTION_URL}" > "${wrapper_props}"
+
+ # Download the mvnw script and the wrapper JAR to cache directory
+ curl -sSL -o "${mvnw_script}" "${MAVEN_WRAPPER_SCRIPT_URL}"
+ curl -sSL -o "${wrapper_jar}" "${MAVEN_WRAPPER_JAR_URL}"
+
+ # Make the wrapper script executable
+ chmod +x "${mvnw_script}"
+
+ echo "✅ Maven Wrapper cached in ${maven_wrapper_dir} for future use"
+ # Use the cached wrapper directly
+ MAVEN_CMD="${mvnw_script}"
+}
+
+function usage() {
+ echo "Usage: $0 [--version <beam_version>] [--runner <runner_name>] [--io
<io_connector>] [--list-versions] [--list-ios] [--list-runners] [--debug]
[-h|--help]"
+ echo ""
+ echo "A self-contained launcher for the Apache Beam SQL Shell."
+ echo ""
+ echo "Options:"
+ echo " --version Specify the Apache Beam version (default:
${DEFAULT_BEAM_VERSION})."
+ echo " --runner Specify the Beam runner to use (default: direct)."
+ echo " Supported runners:"
+ echo " direct - DirectRunner (runs locally, good for
development)"
+ echo " dataflow - DataflowRunner (runs on Google Cloud
Dataflow)"
+ echo " --io Specify an IO connector to include. Can be used multiple
times."
+ echo " Available connectors: amazon-web-services2, amqp, azure,"
+ echo " azure-cosmos, cassandra, cdap, clickhouse, csv,
debezium, elasticsearch,"
+ echo " google-ads, google-cloud-platform, hadoop-format, hbase,
hcatalog, iceberg,"
+ echo " influxdb, jdbc, jms, json, kafka, kinesis, kudu,
mongodb, mqtt, neo4j,"
+ echo " parquet, pulsar, rabbitmq, redis, singlestore,
snowflake, solace, solr,"
+ echo " sparkreceiver, splunk, synthetic, thrift, tika, xml"
+ echo " --list-versions List all available Beam versions from Maven
Central and exit."
+ echo " --list-ios List all available IO connectors from Maven
Central and exit."
+ echo " --list-runners List all available runners and exit."
+ echo " --debug Enable debug mode (sets bash -x flag)."
+ echo " -h, --help Show this help message."
+ exit 1
+}
+
+# This function fetches all available Beam versions from Maven Central.
+function list_versions() {
+ echo "🔎 Fetching the 10 most recent Apache Beam versions from Maven
Central..."
+ local
metadata_url="https://repo1.maven.org/maven2/org/apache/beam/beam-sdks-java-core/maven-metadata.xml"
+
+ if ! command -v curl &> /dev/null; then
+ echo "❌ Error: 'curl' is required to fetch the version list." >&2
+ return 1
+ fi
+
+ # Fetch, parse, filter, sort, and take the top 10.
+ local versions
+ versions=$(curl -sS "${metadata_url}" | \
+ grep '<version>' | \
+ sed 's/.*<version>\(.*\)<\/version>.*/\1/' | \
+ grep -v 'SNAPSHOT' | \
+ sort -rV | \
+ head -n 10) # Limit to the first 10 lines
+
+ if [ -z "${versions}" ]; then
+ echo "❌ Could not retrieve versions. Please check your internet connection
or the Maven Central status." >&2
+ return 1
+ fi
+
+ echo "✅ 10 latest versions:"
+ echo "${versions}"
+}
+
+# This function lists all available IO connectors by querying Maven Central.
+function list_ios() {
+ echo "🔎 Fetching available Apache Beam IO connectors from Maven Central..."
+ local
search_url="https://search.maven.org/solrsearch/select?q=g:org.apache.beam+AND+a:beam-sdks-java-io-*&rows=100&wt=json"
+
+ if ! command -v curl &> /dev/null; then
+ echo "❌ Error: 'curl' is required to fetch the IO connector list." >&2
+ return 1
+ fi
+
+ # Fetch and parse the JSON response to extract IO connector names
+ local ios
+ ios=$(curl -sS "${search_url}" | \
+ grep -o '"a":"beam-sdks-java-io-[^"]*"' | \
+ sed 's/"a":"beam-sdks-java-io-\([^"]*\)"/\1/' | \
+ grep -v -E '(tests?|expansion-service|parent|upgrade)' | \
+ sort -u)
+
+ if [ -z "${ios}" ]; then
+ echo "❌ Could not retrieve IO connectors. Please check your internet
connection or try again later." >&2
+ echo "📋 Here are the known IO connectors (may not be complete):"
+ echo "amazon-web-services2, amqp, azure, azure-cosmos, cassandra,"
+ echo "cdap, clickhouse, csv, debezium, elasticsearch, google-ads,
google-cloud-platform,"
+ echo "hadoop-format, hbase, hcatalog, iceberg, influxdb, jdbc, jms, json,
kafka, kinesis,"
+ echo "kudu, mongodb, mqtt, neo4j, parquet, pulsar, rabbitmq, redis,
singlestore, snowflake,"
+ echo "solace, solr, sparkreceiver, splunk, synthetic, thrift, tika, xml"
+ return 1
+ fi
+
+ echo "✅ Available IO connectors:"
+ echo "${ios}" | tr '\n' ' ' | fold -s -w 80 | sed 's/^/ /'
+}
+
+# This function lists all available runners by querying Maven Central.
+function list_runners() {
+ echo "🚀 Fetching available Apache Beam runners for version ${BEAM_VERSION}
from Maven Central..."
+ local
search_url="https://search.maven.org/solrsearch/select?q=g:org.apache.beam+AND+a:beam-runners-*+AND+v:${BEAM_VERSION}&rows=100&wt=json"
+
+ if ! command -v curl &> /dev/null; then
+ echo "❌ Error: 'curl' is required to fetch the runner list." >&2
+ return 1
+ fi
+
+ # Fetch and parse the JSON response to extract runner names
+ local runners
+ runners=$(curl -sS "${search_url}" | \
+ grep -o '"a":"beam-runners-[^"]*"' | \
+ sed 's/"a":"beam-runners-\([^"]*\)"/\1/' | \
+ grep -v -E
'(tests?|parent|core-construction|core-java|extensions|job-server|legacy-worker|windmill|examples|experimental|orchestrator|java-fn-execution|java-job-service|gcp-gcemd|gcp-gcsproxy|local-java-core|portability-java|prism-java|reference-java)'
| \
+ sort -u)
+
+ if [ -z "${runners}" ]; then
+ echo "❌ Could not retrieve runners for version ${BEAM_VERSION}. Please
check your internet connection or try again later." >&2
+ echo "📋 Here are the known runners for recent Beam versions (may not be
complete):"
+ echo ""
+ echo " direct - DirectRunner (runs locally, good for
development)"
+ echo " dataflow - DataflowRunner (runs on Google Cloud Dataflow)"
+ echo " flink - FlinkRunner (runs on Apache Flink)"
+ echo " spark - SparkRunner (runs on Apache Spark)"
+ echo " samza - SamzaRunner (runs on Apache Samza)"
+ echo " jet - JetRunner (runs on Hazelcast Jet)"
+ echo " twister2 - Twister2Runner (runs on Twister2)"
+ echo ""
+ echo "💡 Usage: ./beam-sql.sh --runner <runner_name>"
+ echo " Default: direct"
+ echo " Note: Only 'direct' and 'dataflow' are currently supported by
this script."
+ return 1
+ fi
+
+ echo "✅ Available runners for Beam ${BEAM_VERSION}:"
+ echo ""
+
+ # Process each runner and provide descriptions
+ while IFS= read -r runner; do
+ case "$runner" in
+ "direct-java")
+ echo " direct - DirectRunner"
+ echo " Runs locally on your machine. Good for
development and testing."
+ ;;
+ "google-cloud-dataflow-java")
+ echo " dataflow - DataflowRunner"
+ echo " Runs on Google Cloud Dataflow for
production workloads."
+ ;;
+ flink-*)
+ local version=$(echo "$runner" | sed 's/flink-//')
+ echo " flink-${version} - FlinkRunner (Flink ${version})"
+ echo " Runs on Apache Flink ${version} clusters."
+ ;;
+ flink_*)
+ local version=$(echo "$runner" | sed 's/flink_//')
+ echo " flink-${version} - FlinkRunner (Flink ${version})"
+ echo " Runs on Apache Flink ${version} clusters."
+ ;;
+ "spark")
+ echo " spark - SparkRunner"
+ echo " Runs on Apache Spark clusters."
+ ;;
+ "spark-3")
+ echo " spark-3 - SparkRunner (Spark 3.x)"
+ echo " Runs on Apache Spark 3.x clusters."
+ ;;
+ "samza")
+ echo " samza - SamzaRunner"
+ echo " Runs on Apache Samza."
+ ;;
+ "jet")
+ echo " jet - JetRunner"
+ echo " Runs on Hazelcast Jet."
+ ;;
+ "twister2")
+ echo " twister2 - Twister2Runner"
+ echo " Runs on Twister2."
+ ;;
+ "apex")
+ echo " apex - ApexRunner"
+ echo " Runs on Apache Apex."
+ ;;
+ "gearpump")
+ echo " gearpump - GearpumpRunner"
+ echo " Runs on Apache Gearpump."
+ ;;
+ "prism")
+ echo " prism - PrismRunner"
+ echo " Local runner for testing portable
pipelines."
+ ;;
+ "reference")
+ echo " reference - ReferenceRunner"
+ echo " Reference implementation for testing."
+ ;;
+ "portability")
+ echo " portability - PortabilityRunner"
+ echo " For portable pipeline execution."
+ ;;
+ *)
+ # For any other runners, clean up the name and show it
+ local clean_name=$(echo "$runner" | sed -e 's/-java$//' -e 's/^gcp-//'
-e 's/^local-//')
+ echo " ${clean_name} - ${runner}"
+ ;;
+ esac
+ done <<< "$runners"
+
+ echo ""
+ echo "💡 Usage: ./beam-sql.sh --runner <runner_name>"
+ echo " Default: direct"
+ echo " Note: This script currently supports 'direct' and 'dataflow'
runners."
+ echo " Other runners may require additional setup and dependencies."
+}
+
+
+# --- Argument Parsing ---
+BEAM_VERSION="${DEFAULT_BEAM_VERSION}"
+IO_CONNECTORS=()
+BEAM_RUNNER="direct"
+SQLLINE_ARGS=()
+DEBUG_MODE=false
+
+while [[ "$#" -gt 0 ]]; do
+ case $1 in
+ --version) BEAM_VERSION="$2"; shift ;;
+ --runner) BEAM_RUNNER=$(echo "$2" | tr '[:upper:]' '[:lower:]'); shift ;;
+ --io) IO_CONNECTORS+=("$2"); shift ;;
+ --list-versions) list_versions; exit 0 ;;
+ --list-ios) list_ios; exit 0 ;;
+ --list-runners) list_runners; exit 0 ;;
+ --debug) DEBUG_MODE=true ;;
+ -h|--help) usage ;;
+ *) SQLLINE_ARGS+=("$1") ;;
+ esac
+ shift
+done
+
+# Enable debug mode if requested
+if [ "${DEBUG_MODE}" = true ]; then
+ set -x
+fi
+
+# --- Prerequisite Check ---
+# Java is always required.
+if ! command -v java &> /dev/null; then
+ echo "❌ Error: 'java' command not found. It is required to run the
application." >&2
+ exit 1
+fi
+
+# Curl is required for Maven wrapper setup.
+if ! command -v curl &> /dev/null; then
+ echo "❌ Error: 'curl' command not found. It is required to download the
Maven wrapper." >&2
+ exit 1
+fi
+
+setup_maven_wrapper
+
+echo "🚀 Preparing Beam SQL Shell v${BEAM_VERSION}..."
+echo " Runner: ${BEAM_RUNNER}"
+if [ ${#IO_CONNECTORS[@]} -gt 0 ]; then
+ echo " Including IOs: ${IO_CONNECTORS[*]}"
+fi
+
+# --- Dependency Resolution & JAR Caching ---
+
+# Create a unique key for the configuration to use as a cache filename.
+sorted_ios_str=$(printf "%s\n" "${IO_CONNECTORS[@]}" | sort | tr '\n' '-' |
sed 's/-$//')
+CACHE_KEY="beam-${BEAM_VERSION}_runner-${BEAM_RUNNER}_ios-${sorted_ios_str}.jar"
+CACHE_FILE="${CACHE_DIR}/${CACHE_KEY}"
+
+# Check if a cached JAR already exists for this configuration.
+if [ -f "${CACHE_FILE}" ]; then
+ echo "✅ Found cached executable JAR. Skipping build."
+ CP="${CACHE_FILE}"
+else
+ echo "🔎 No cache found. Building executable JAR (this might take a moment on
first run)..."
+
+ # --- Dynamic POM Generation ---
+ POM_FILE="${WORK_DIR}/pom.xml"
+ cat > "${POM_FILE}" << EOL
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>org.apache.beam</groupId>
+ <artifactId>beam-sql-shell-runner</artifactId>
+ <version>1.0</version>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.beam</groupId>
+ <artifactId>beam-sdks-java-extensions-sql-jdbc</artifactId>
+ <version>\${beam.version}</version>
+ </dependency>
+EOL
+# Add IO and Runner dependencies
+ for io in "${IO_CONNECTORS[@]}"; do
+ echo "
<dependency><groupId>org.apache.beam</groupId><artifactId>beam-sdks-java-io-${io}</artifactId><version>\${beam.version}</version></dependency>"
>> "${POM_FILE}"
+ done
+ RUNNER_ARTIFACT=""
+ case "${BEAM_RUNNER}" in
+ dataflow) RUNNER_ARTIFACT="beam-runners-google-cloud-dataflow-java" ;;
+ direct) ;;
+ *) echo "❌ Error: Unsupported runner '${BEAM_RUNNER}'." >&2; exit 1 ;;
+ esac
+ if [ -n "${RUNNER_ARTIFACT}" ]; then
+ echo "
<dependency><groupId>org.apache.beam</groupId><artifactId>${RUNNER_ARTIFACT}</artifactId><version>\${beam.version}</version></dependency>"
>> "${POM_FILE}"
+ fi
+
+# Complete the POM with the build section for the maven-shade-plugin
+cat >> "${POM_FILE}" << EOL
+ </dependencies>
+ <properties>
+ <beam.version>${BEAM_VERSION}</beam.version>
+ </properties>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-shade-plugin</artifactId>
+ <version>${MAVEN_SHADE_PLUGIN_VERSION}</version>
+ <executions>
+ <execution>
+ <phase>package</phase>
+ <goals>
+ <goal>shade</goal>
+ </goals>
+ <configuration>
+ <transformers>
+ <transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+ </transformers>
+ <filters>
+ <filter>
+ <artifact>*:*</artifact>
+ <excludes>
+ <exclude>META-INF/*.SF</exclude>
+ <exclude>META-INF/*.DSA</exclude>
+ <exclude>META-INF/*.RSA</exclude>
+ </excludes>
+ </filter>
+ </filters>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+</project>
+EOL
+
+ # Use `mvn package` to build the uber JAR.
+ ${MAVEN_CMD} -f "${POM_FILE}" -q --batch-mode package
+
+ UBER_JAR_PATH="${WORK_DIR}/target/beam-sql-shell-runner-1.0.jar"
+
+ # Check if build was successful before caching
+ if [ ! -f "${UBER_JAR_PATH}" ]; then
+ echo "❌ Maven build failed. The uber JAR was not created." >&2
+ exit 1
+ fi
+
+ # Copy the newly built JAR to our cache directory.
+ cp "${UBER_JAR_PATH}" "${CACHE_FILE}"
+ CP="${CACHE_FILE}"
+ echo "💾 JAR built and cached for future use."
+fi
+
+# --- Launch Shell ---
+echo "✅ Dependencies ready. Launching Beam SQL Shell..."
+echo "----------------------------------------------------"
+
+java -cp "${CP}" "${MAIN_CLASS}" "${SQLLINE_ARGS[@]}"
+
+echo "----------------------------------------------------"
+echo "👋 Exited Beam SQL Shell."
diff --git a/website/www/site/content/en/documentation/dsls/sql/shell.md
b/website/www/site/content/en/documentation/dsls/sql/shell.md
index 87fb9513e21..fcb560e138d 100644
--- a/website/www/site/content/en/documentation/dsls/sql/shell.md
+++ b/website/www/site/content/en/documentation/dsls/sql/shell.md
@@ -26,23 +26,89 @@ This page describes how to work with the shell, but does
not focus on specific f
## Quickstart
-To use Beam SQL shell, you must first clone the [Beam SDK
repository](https://github.com/apache/beam). Then, from the root of the
repository clone, execute the following commands to run the shell:
+The easiest way to get started with the Beam SQL shell is using the
`beam-sql.sh` script:
+### Using beam-sql.sh Script
+
+The `beam-sql.sh` script automatically downloads and sets up the Beam SQL
shell with all dependencies.
+
+#### Installation
+
+1. **Download the script:**
+ ```bash
+ curl -O
https://raw.githubusercontent.com/apache/beam/master/scripts/beam-sql.sh
+ chmod +x beam-sql.sh
+ ```
+
+2. **Run the shell:**
+ ```bash
+ ./beam-sql.sh
+ ```
+
+The script will automatically:
+- Download a recent stable Beam version by default
+- Build a self-contained JAR with all dependencies
+- Cache the JAR for future use (stored in `~/.beam/cache/`)
+- Launch the Beam SQL shell
+
+#### Prerequisites
+
+- **Java**: Java 11 or higher must be installed and available in your PATH
+- **curl**: Required for downloading the Maven wrapper and dependencies
+
+#### Command-line Options
+
+The `beam-sql.sh` script supports several options:
+
+```bash
+./beam-sql.sh [--version <beam_version>] [--runner <runner_name>] [--io
<io_connector>] [--list-versions] [--list-ios] [--list-runners] [--debug]
[-h|--help]
```
-./gradlew -p sdks/java/extensions/sql/jdbc
-Pbeam.sql.shell.bundled=':runners:flink:1.17,:sdks:java:io:kafka' installDist
-./sdks/java/extensions/sql/jdbc/build/install/jdbc/bin/jdbc
+**Options:**
+- `--version <beam_version>`: Specify the Apache Beam version (a recent stable
version is used by default).
+- `--runner <runner_name>`: Specify the Beam runner to use (default: direct).
+- `--io <io_connector>`: Specify an IO connector to include. Can be used
multiple times. Available connectors include: amazon-web-services2, amqp,
azure, azure-cosmos, cassandra, cdap, clickhouse, csv, debezium, elasticsearch,
google-ads, google-cloud-platform, hadoop-format, hbase, hcatalog, iceberg,
influxdb, jdbc, jms, json, kafka, kinesis, kudu, mongodb, mqtt, neo4j, parquet,
pulsar, rabbitmq, redis, singlestore, snowflake, solace, solr, sparkreceiver,
splunk, synthetic, thrift, tika, xml
+- `--list-versions`: List all available Beam versions from Maven Central and
exit
+- `--list-ios`: List all available IO connectors from Maven Central and exit
(provides the most up-to-date list)
+- `--list-runners`: List all available runners from Maven Central for the
specified Beam version with detailed descriptions and exit
+- `--debug`: Enable debug mode (sets bash -x flag)
+- `-h, --help`: Show help message
+
+**Examples:**
+
+```bash
+# Use a specific Beam version
+./beam-sql.sh --version 2.66.0
+
+# Include Kafka IO connector
+./beam-sql.sh --io kafka
+
+# Use Dataflow runner with multiple IO connectors
+./beam-sql.sh --runner dataflow --io kafka --io iceberg
+
+# List available versions
+./beam-sql.sh --list-versions
+
+# List available IO connectors
+./beam-sql.sh --list-ios
+
+# List available runners (for default version)
+./beam-sql.sh --list-runners
+
+# List available runners for a specific version
+./beam-sql.sh --version 2.66.0 --list-runners
```
-After you run the commands, the SQL shell starts and you can type queries:
+
+### Starting the Shell
+
+After you run the script, the SQL shell starts and you can type queries:
```
-Welcome to Beam SQL 2.66.0-SNAPSHOT (based on sqlline version 1.4.0)
+Welcome to Beam SQL 2.67.0 (based on sqlline version 1.4.0)
0: BeamSQL>
```
-_Note: If you haven't built the project before running the Gradle command, the
command will take a few minutes as Gradle must build all dependencies first._
-
The shell converts the queries into Beam pipelines, runs them using
`DirectRunner`, and returns the results as tables when the pipelines finish:
```
@@ -112,23 +178,35 @@ When you're satisfied with the logic of your SQL
statements, you can submit the
## Specifying the Runner
-By default, Beam uses the `DirectRunner` to run the pipeline on the machine
where you're executing the commands. If you want to run the pipeline with a
different runner, you must perform two steps:
+By default, Beam uses the `DirectRunner` to run the pipeline on the machine
where you're executing the commands. If you want to run the pipeline with a
different runner, you can specify it using the `beam-sql.sh` script:
-1. Make sure the SQL shell includes the desired runner. Add the corresponding
project id to the `-Pbeam.sql.shell.bundled` parameter of the Gradle invocation
([source
code](https://github.com/apache/beam/blob/master/sdks/java/extensions/sql/shell/build.gradle),
[project
ids](https://github.com/apache/beam/blob/master/settings.gradle.kts)). For
example, use the following command to include Flink runner and KafkaIO:
+### Using beam-sql.sh Script
- ```
- ./gradlew -p sdks/java/extensions/sql/jdbc
-Pbeam.sql.shell.bundled=':runners:flink:1.17,:sdks:java:io:kafka' installDist
- ```
+### How Runner Values are Determined
- _Note: You can bundle multiple runners (using a comma-separated list) or
other additional components in the same manner. For example, you can add
support for more I/Os._
+The `beam-sql.sh` script determines the runner in the following way:
-1. Then, specify the runner using the `SET` command ([reference
page](/documentation/dsls/sql/set/)):
+1. **Default**: If no `--runner` option is specified, it defaults to `direct`
(DirectRunner)
+2. **Command-line**: The `--runner` option accepts case-insensitive values
(`Direct`, `DATAFLOW`, etc.)
- ```
- 0: BeamSQL> SET runner='FlinkRunner';
- ```
+For example, use the following commands for the Dataflow runner when using the
`beam-sql.sh` script:
+
+```bash
+# Use Dataflow runner
+./beam-sql.sh --runner dataflow
+
+# Use Dataflow runner with specific IO connectors
+./beam-sql.sh --runner dataflow --io kafka --io iceberg
+```
+
+Then, configure the runner using the `SET` command ([reference
page](/documentation/dsls/sql/set/)):
+
+```
+0: BeamSQL> SET runner='DataflowRunner';
+0: BeamSQL> SET projectId='your-gcp-project';
+0: BeamSQL> SET tempLocation='gs://your-bucket/temp';
+```
-Beam will submit all future `INSERT` statements as pipelines to the specified
runner. In this case, the Beam SQL shell does not display the query results.
You must manage the submitted jobs through the corresponding runner's UI (for
example, using the Flink UI or command line).
## Specifying the PipelineOptions