This is an automated email from the ASF dual-hosted git repository. fokko pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/iceberg.git
commit d9498a00dd1d3f0a5468b4b7c7da84750a7636f1 Author: Brian Olsen <[email protected]> AuthorDate: Fri Jan 5 00:54:46 2024 -0600 Shift site build to use monorepo and gh-pages --- .github/workflows/flink-ci.yml | 1 + .github/workflows/hive-ci.yml | 1 + .github/workflows/java-ci.yml | 1 + .../variables.yml => .github/workflows/site-ci.yml | 31 ++- .github/workflows/spark-ci.yml | 3 +- .gitignore | 5 +- site/.gitignore | 119 ------------ site/Makefile | 34 ++++ site/README.md | 142 +++++++------- site/dev/build.sh | 23 +++ site/dev/clean.sh | 22 +++ site/dev/common.sh | 216 +++++++++++++++++++++ site/dev/deploy.sh | 24 +++ site/dev/serve.sh | 23 +++ site/dev/setup_env.sh | 26 +++ site/docs/blogs.md | 14 ++ site/docs/community.md | 4 +- site/docs/hive-quickstart.md | 2 +- site/docs/how-to-release.md | 5 +- site/docs/multi-engine-support.md | 4 +- site/docs/releases.md | 142 +++++++++++++- site/docs/roadmap.md | 53 ++--- site/docs/spec.md | 38 +++- site/docs/vendors.md | 9 + site/docs/view-spec.md | 72 ++++--- site/mkdocs.yml | 69 +++---- site/nav.yml | 48 +++++ site/requirements.txt | 2 +- 28 files changed, 812 insertions(+), 321 deletions(-) diff --git a/.github/workflows/flink-ci.yml b/.github/workflows/flink-ci.yml index 702ae9bc89..4ea0465505 100644 --- a/.github/workflows/flink-ci.yml +++ b/.github/workflows/flink-ci.yml @@ -40,6 +40,7 @@ on: - 'spark/**' - 'pig/**' - 'docs/**' + - 'site/**' - 'open-api/**' - 'format/**' - '.gitattributes' diff --git a/.github/workflows/hive-ci.yml b/.github/workflows/hive-ci.yml index f582e516fc..0d8b62137b 100644 --- a/.github/workflows/hive-ci.yml +++ b/.github/workflows/hive-ci.yml @@ -38,6 +38,7 @@ on: - 'flink/**' - 'pig/**' - 'docs/**' + - 'site/**' - 'open-api/**' - 'format/**' - '.gitattributes' diff --git a/.github/workflows/java-ci.yml b/.github/workflows/java-ci.yml index 4936e2b651..9e5ace5c84 100644 --- a/.github/workflows/java-ci.yml +++ b/.github/workflows/java-ci.yml @@ -35,6 +35,7 @@ on: - '.asf.yml' - 'dev/**' - 'docs/**' + - 'site/**' - 'open-api/**' - 'format/**' - '.gitattributes' diff --git a/site/variables.yml b/.github/workflows/site-ci.yml similarity index 67% rename from site/variables.yml rename to .github/workflows/site-ci.yml index 4c1011ce2b..95a1fb3b94 100644 --- a/site/variables.yml +++ b/.github/workflows/site-ci.yml @@ -1,3 +1,4 @@ +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -14,13 +15,23 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -extra: - icebergVersion: 1.4.0 - social: - - icon: fontawesome/brands/github-alt - link: https://github.com/apache/iceberg - - icon: fontawesome/brands/youtube - link: https://www.youtube.com/@ApacheIceberg - - icon: fontawesome/brands/slack - link: https://join.slack.com/t/apache-iceberg/shared_invite/zt-1znkcg5zm-7_FE~pcox347XwZE3GNfPg +# +name: site-ci +on: + push: + branches: + - main + paths: + - site/** + workflow_dispatch: +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: 3.x + - name: Deploy Iceberg documentation + run: make deploy + working-directory: ./site diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml index c77f95fe7a..45e63b6c81 100644 --- a/.github/workflows/spark-ci.yml +++ b/.github/workflows/spark-ci.yml @@ -33,6 +33,7 @@ on: - '.gitignore' - '.asf.yml' - 'dev/**' + - 'site/**' - 'mr/**' - 'hive3/**' - 'hive3-orc-bundle/**' @@ -141,4 +142,4 @@ jobs: with: name: test logs path: | - **/build/testlogs \ No newline at end of file + **/build/testlogs diff --git a/.gitignore b/.gitignore index 23febc6ccf..d9848cab06 100644 --- a/.gitignore +++ b/.gitignore @@ -24,7 +24,9 @@ gradle/wrapper/gradle-wrapper.jar lib/ # web site build -site/site +site/site/ +#site/docs/docs/ +site/docs/javadoc/ # benchmark output spark/v3.3/spark/benchmark/* @@ -62,4 +64,3 @@ metastore_db/ # Spark/metastore files spark-warehouse/ derby.log - diff --git a/site/.gitignore b/site/.gitignore deleted file mode 100644 index cc9d8b1ced..0000000000 --- a/site/.gitignore +++ /dev/null @@ -1,119 +0,0 @@ -## Temp remove for first phase -.github/ - -## MkDocs -/site/ - -## Vale -.github/vale/ -.vale.ini - -## MacOS - -# General -.DS_Store -.AppleDouble -.LSOverride - -# Icon must end with two \r -Icon - -# Thumbnails -._* - -# Files that might appear in the root of a volume -.DocumentRevisions-V100 -.fseventsd -.Spotlight-V100 -.TemporaryItems -.Trashes -.VolumeIcon.icns -.com.apple.timemachine.donotpresent - -# Directories potentially created on remote AFP share -.AppleDB -.AppleDesktop -Network Trash Folder -Temporary Items -.apdisk - -## Linux - -*~ - -# temporary files which can be created if a process still has a handle open of a deleted file -.fuse_hidden* - -# KDE directory preferences -.directory - -# Linux trash folder which might appear on any partition or disk -.Trash-* - -# .nfs files are created when an open file is removed but is still being accessed -.nfs* - -## Eclipse - -.metadata -tmp/ -*.tmp -*.bak -*.swp -*~.nib -local.properties -.settings/ -.loadpath -.recommenders - -# External tool builders -.externalToolBuilders/ - -# Locally stored "Eclipse launch configurations" -*.launch - -# PyDev specific (Python IDE for Eclipse) -*.pydevproject - -# CDT-specific (C/C++ Development Tooling) -.cproject - -# CDT- autotools -.autotools - -# Java annotation processor (APT) -.factorypath - -# PDT-specific (PHP Development Tools) -.buildpath - -# sbteclipse plugin -.target - -# Tern plugin -.tern-project - -# TeXlipse plugin -.texlipse - -# STS (Spring Tool Suite) -.springBeans - -# Code Recommenders -.recommenders/ - -# Annotation Processing -.apt_generated/ -.apt_generated_test/ - -# Scala IDE specific (Scala & Java development for Eclipse) -.cache-main -.scala_dependencies -.worksheet - -# Project description file. -# Typically, this file would be tracked if it contains build/dependency configurations: -.project - - - diff --git a/site/Makefile b/site/Makefile new file mode 100755 index 0000000000..ef66118ad5 --- /dev/null +++ b/site/Makefile @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.PHONY: help +help: # Show help for each of the Makefile recipes. + @grep -E '^[a-zA-Z0-9 -]+:.*#' Makefile | sort | while read -r l; do printf "\033[1;32m$$(echo $$l | cut -f 1 -d':')\033[00m:$$(echo $$l | cut -f 2- -d'#')\n"; done + +.PHONY: serve +serve: # Clean, build, and run the docs site locally. + dev/serve.sh + +.PHONY: build +build: # Clean and build the docs site locally. + dev/build.sh + +.PHONY: deploy +deploy: # Clean, build, and deploy the Iceberg docs site. + dev/deploy.sh + +.PHONY: clean +clean: # Clean the local docs site. + dev/clean.sh diff --git a/site/README.md b/site/README.md index f78160861d..a58b988fcc 100644 --- a/site/README.md +++ b/site/README.md @@ -27,52 +27,64 @@ This subproject contains the [MkDocs projects](https://www.mkdocs.org/) that def ## Usage -The directory structure in this repository mimics the sitemap hierarchy of the website. This aims to help contributors find the source files needed to make their changes faster. To understand the layout and naming, it is helpful to have some basic understandings of the MkDocs framework defaults. +The directory structure in this repository aims to mimic the sitemap hierarchy of the website. This helps contributors find the source files needed when updating or adding new documentation. It's helpful to have some basic understanding of the MkDocs framework defaults. ### MkDocs background -In MkDocs, the [`docs_dir`](https://www.mkdocs.org/user-guide/configuration/#docs_dir) points to the root directory containing the source markdown files for an MkDocs project. By default, this points to the `docs` directory. When you build MkDocs `mkdocs build`, MkDocs generates the static site in the [`site_dir`](https://www.mkdocs.org/user-guide/configuration/#site_dir) becomes the root of that project for the generated site. +In MkDocs, the [`docs_dir`](https://www.mkdocs.org/user-guide/configuration/#docs_dir) points to the root directory containing the source markdown files for an MkDocs project. By default, this points to directory named `docs` in the same location as the [`mkdocs.yaml` file](https://www.mkdocs.org/user-guide/configuration/#introduction). Use `mkdocs build`is used to build the project. During the build, MkDocs generates the static site in the [`site_dir`](https://www.mkdocs.org/user-guide/ [...] ### Iceberg docs layout -In the Iceberg docs, since the top-level site and versioned docs are contained in the same directory, they all live under the `/site` directory of the main Iceberg repository. The `/site/docs` directory is named this way to follow the [MkDocs convention](https://www.mkdocs.org/user-guide/configuration/#docs_dir), while the `/site/docs/docs` directory is an analog to the "Docs" navigation tab. Under this directory, you'll find the `/site/docs/docs/nightly` directory, which contains the st [...] +The static Iceberg website lives under the `/site` directory, while the versioned documentation lives under the `/docs` of the main Iceberg repository. The `/site/docs` directory is named that way to follow the [MkDocs convention](https://www.mkdocs.org/user-guide/configuration/#docs_dir). The `/docs` directory contains the current state of the versioned documentation with local revisions. Notice that the root `/site` and `/docs` just happened to share the same naming convention as MkDoc [...] + +The static Iceberg site pages are Markdown files that live at `/site/docs/*.md`. The versioned documentation are Markdown files that live at `/docs/docs/*.md` files. You may ask where the older versions of the docs and javadocs are, which is covered later in the build section. + +``` +. +├── docs (versioned) +│ ├── docs +│ │ ├── assets +│ │ ├── api.md +│ │ ├── ... +│ │ └── table-migration.md +│ └── mkdocs.yml +└── site (non-versioned) + ├── docs + │ ├── about.md + │ ├── ... + │ └── view-spec.md + ├── ... + ├── Makefile + ├── mkdocs.yml + └── requirements.txt +``` +### Building the versioned docs + +The Iceberg versioned docs are committed in the [orphan `docs` branch](https://github.com/apache/iceberg/tree/docs) and mounted using [git worktree](https://git-scm.com/docs/git-worktree) at build time. The `docs` branch contains the versioned documenation source files at the root. These versions are mounted at the `/site/docs/docs/<version>` directory at build time. The `latest` version, is a soft link to the most recent [semver version](https://semver.org/) in the `docs` branch. There [...] + +The docs are built, run, and released using [make](https://www.gnu.org/software/make/manual/make.html). The [Makefile](Makefile) and the [common shell script](dev/common.sh) support the following command: -The non-versioned site pages are all the `/site/docs/.*md` files and the docs are the `/site/docs/docs/<version>/docs/*.md` files. Notice the location of the `mkdocs.yml`. Looking at this though, you may ask where the older versions and javadocs are. +``` site > make help``` +> [build](dev/build.sh): Clean and build the site locally. +> [clean](dev/clean.sh): Clean the local site. +> [deploy](dev/deploy.sh): Clean, build, and deploy the Iceberg docs site. +> help: Show help for each of the Makefile recipes. +> [release](dev/release.sh): Release the current `/docs` as `ICEBERG_VERSION` (`make release ICEBERG_VERSION=<MAJOR.MINOR.PATCH>`). +> [serve](dev/serve.sh): Clean, build, and run the site locally. + +To scaffold the versioned docs and build the project, run the `build` recipe. ``` -./site/ -├── docs -│ ├── assets -│ ├── docs -│ │ └── nightly -│ │ ├── docs -│ │ │ ├── assets -│ │ │ ├── api.md -│ │ │ ├── ... -│ │ │ └── table-migration.md -│ │ └── mkdocs.yml (versioned) -│ ├── about.md -│ ├── ... -│ └── view-spec.md -├── README.md -├── mkdocs.yml (non-versioned) -├── requirements.txt -└── variables.yml +make build ``` -### Building the versioned docs - -> [!IMPORTANT] -> This build process is currently missing older versions and the javadoc branches. -> Until these branches are merged, these steps will not work. -All previously versioned docs will be committed in `docs-<version>` branches and mounted using [git worktree](https://git-scm.com/docs/git-worktree) at build time. The worktree will pull these versions in following the `/site/docs/docs/<version>` convention. The `latest` version, will be a secondary copy of the most recent build version in the worktree, but pointing to `/site/docs/docs/latest`. There is also a `javadoc` branch that contains all prior static generation versions of the jav [...] +This step will generate the following layout: ``` ./site/ └── docs ├── docs - │ ├── nightly - │ ├── latest + │ ├── latest (symlink to /site/docs/1.4.0/) │ ├── 1.4.0 │ ├── 1.3.1 │ └── ... @@ -80,62 +92,41 @@ All previously versioned docs will be committed in `docs-<version>` branches and ├── latest ├── 1.4.0 ├── 1.3.1 - └── ... + └── ... ``` -### Install - -1. (Optional) Set up venv +To run this, run the `serve` recipe, which runs the `build` recipe and calls `mkdocs serve`. This will run locally at <http://localhost:8000>. ``` -python -m venv mkdocs_env -source mkdocs_env/bin/activate +make serve ``` -1. Install required Python libraries +To clear all build files, run `clean`. ``` -pip install -r requirements.txt +make clean ``` -#### Adding additional versioned documentation +#### Offline mode -To build locally with additional docs versions, add them to your working tree. -For now, I'm just adding a single version, and the javadocs directory. +One of the great advantages to the MkDocs material plugin is the [offline feature](https://squidfunk.github.io/mkdocs-material/plugins/offline). You can view the Iceberg docs without the need of a server. To enable OFFLINE builds, add theOFFLINE environment variable to either `build` or `serve` recipes. ``` -git worktree add site/docs/docs/1.4.0 docs-1.4.0 -git worktree add site/docs/javadoc javadoc +make build OFFLINE=true ``` -## Build - -Run the build command in the root directory, and optionally add `--clean` to force MkDocs to clear previously generated pages. - -``` -mkdocs build [--clean] -``` - -## Run - -Start MkDocs server locally to verify the site looks good. - -``` -mkdocs serve -``` +> [!WARNING] +> Building with offline mode disables the [use_directory_urls](https://www.mkdocs.org/user-guide/configuration/#use_directory_urls) setting, ensuring that users can open your documentation directly from the local file system. Do not enable this for releases or deployments. ## Release process -Deploying a version of the docs is a two step process: - 1. ~~Cut a new release from the current branch revision. This creates a new branch `docs-<version>`.~~ - +Deploying the docs is a two step process: + 1. Release a new version by copying the current `/docs` directory to a new version directory in the `docs` branch and a new javadoc build in the `javadoc` branch. ``` - .github/bin/deploy_docs.sh -v 1.4.0 + make release ICEBERG_VERSION=${ICEBERG_VERSION} + ``` + 1. Build and push the generated site to `asf-site`. + ``` + make deploy ``` - - ~~See [deploy_docs.sh](.github/bin/deploy_docs.sh) for more details.~~ - - 1. Make sure to add the new version to the list of versions to pull into git worktree. - 1. Follow the steps in [the build process](#build). - 1. Push the generated site to `gh-pages`. ## Validate Links @@ -147,15 +138,12 @@ As mentioned in the MkDocs section, when you build MkDocs `mkdocs build`, MkDocs ./site/ ├── docs │ ├── docs -│ │ ├── nightly -│ │ │ ├── docs -│ │ │ └── mkdocs.yml -│ │ ├── latest -│ │ │ ├── docs -│ │ │ └── mkdocs.yml -│ │ └── 1.4.0 -│ │ ├── docs -│ │ └── mkdocs.yml +│ │ ├── latest +│ │ │ ├── docs +│ │ │ └── mkdocs.yml +│ │ └── 1.4.0 +│ │ ├── docs +│ │ └── mkdocs.yml │ └─ javadoc │ ├── latest │ └── 1.4.0 diff --git a/site/dev/build.sh b/site/dev/build.sh new file mode 100755 index 0000000000..3b7c3acfaa --- /dev/null +++ b/site/dev/build.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +./dev/setup_env.sh + +mkdocs build diff --git a/site/dev/clean.sh b/site/dev/clean.sh new file mode 100755 index 0000000000..588cc4aaed --- /dev/null +++ b/site/dev/clean.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source dev/common.sh +set -e + +clean diff --git a/site/dev/common.sh b/site/dev/common.sh new file mode 100755 index 0000000000..59d6fcdc27 --- /dev/null +++ b/site/dev/common.sh @@ -0,0 +1,216 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +REMOTE="iceberg_docs" + +# Ensures the presence of a specified remote repository for documentation. +# If the remote doesn't exist, it adds it using the provided URL. +# Then, it fetches updates from the remote repository. +create_or_update_docs_remote () { + echo " --> create or update docs remote" + + # Check if the remote exists before attempting to add it + git config "remote.${REMOTE}.url" >/dev/null || + git remote add "${REMOTE}" https://github.com/apache/iceberg.git + + # Fetch updates from the remote repository + git fetch "${REMOTE}" +} + + +# Pulls updates from a specified branch of a remote repository. +# Arguments: +# $1: Branch name to pull updates from +pull_remote () { + echo " --> pull remote" + + local BRANCH="$1" + + # Ensure the branch argument is not empty + assert_not_empty "${BRANCH}" + + # Perform a pull from the specified branch of the remote repository + git pull "${REMOTE}" "${BRANCH}" +} + +# Pushes changes from a local branch to a specified branch of a remote repository. +# Arguments: +# $1: Branch name to push changes to +push_remote () { + echo " --> push remote" + + local BRANCH="$1" + + # Ensure the branch argument is not empty + assert_not_empty "${BRANCH}" + + # Push changes to the specified branch of the remote repository + git push "${REMOTE}" "${BRANCH}" +} + +# Installs or upgrades dependencies specified in the 'requirements.txt' file using pip. +install_deps () { + echo " --> install deps" + + # Use pip to install or upgrade dependencies from the 'requirements.txt' file quietly + pip -q install -r requirements.txt --upgrade +} + +# Checks if a provided argument is not empty. If empty, displays an error message and exits with a status code 1. +# Arguments: +# $1: Argument to check for emptiness +assert_not_empty () { + + if [ -z "$1" ]; then + echo "No argument supplied" + + # Exit with an error code if no argument is provided + exit 1 + fi +} + +# Finds and retrieves the latest version of the documentation based on the directory structure. +# Assumes the documentation versions are numeric folders within 'docs/docs/'. +get_latest_version () { + # Find the latest numeric folder within 'docs/docs/' structure + local latest=$(ls -d docs/docs/[0-9]* | sort -V | tail -1) + + # Extract the version number from the latest directory path + local latest_version=$(basename "${latest}") + + # Output the latest version number + echo "${latest_version}" +} + +# Creates a 'latest' version of the documentation based on a specified ICEBERG_VERSION. +# Arguments: +# $1: ICEBERG_VERSION - The version number of the documentation to be treated as the latest. +create_latest () { + echo " --> create latest" + + local ICEBERG_VERSION="$1" + + # Ensure ICEBERG_VERSION is not empty + assert_not_empty "${ICEBERG_VERSION}" + + # Output the provided ICEBERG_VERSION for verification + echo "${ICEBERG_VERSION}" + + # Remove any existing 'latest' directory and recreate it + rm -rf docs/docs/latest/ + mkdir docs/docs/latest/ + + # Create symbolic links and copy configuration files for the 'latest' documentation + ln -s "../${ICEBERG_VERSION}/docs" docs/docs/latest/docs + cp "docs/docs/${ICEBERG_VERSION}/mkdocs.yml" docs/docs/latest/ + + cd docs/docs/ + + # Update version information within the 'latest' documentation + update_version "latest" + cd - +} + +# Updates version information within the mkdocs.yml file for a specified ICEBERG_VERSION. +# Arguments: +# $1: ICEBERG_VERSION - The version number used for updating the mkdocs.yml file. +update_version () { + echo " --> update version" + + local ICEBERG_VERSION="$1" + + # Ensure ICEBERG_VERSION is not empty + assert_not_empty "${ICEBERG_VERSION}" + + # Update version information within the mkdocs.yml file using sed commands + if [ "$(uname)" == "Darwin" ] + then + sed -i '' -E "s/(^site\_name:[[:space:]]+docs\/).*$/\1${ICEBERG_VERSION}/" ${ICEBERG_VERSION}/mkdocs.yml + sed -i '' -E "s/(^[[:space:]]*-[[:space:]]+Javadoc:.*\/javadoc\/).*$/\1${ICEBERG_VERSION}/" ${ICEBERG_VERSION}/mkdocs.yml + elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ] + then + sed -i'' -E "s/(^site_name:[[:space:]]+docs\/)[^[:space:]]+/\1${ICEBERG_VERSION}/" "${ICEBERG_VERSION}/mkdocs.yml" + sed -i'' -E "s/(^[[:space:]]*-[[:space:]]+Javadoc:.*\/javadoc\/).*$/\1${ICEBERG_VERSION}/" "${ICEBERG_VERSION}/mkdocs.yml" + fi + +} + +# Excludes versioned documentation from search indexing by modifying .md files. +# Arguments: +# $1: ICEBERG_VERSION - The version number of the documentation to exclude from search indexing. +search_exclude_versioned_docs () { + echo " --> search exclude version docs" + local ICEBERG_VERSION="$1" + + # Ensure ICEBERG_VERSION is not empty + assert_not_empty "${ICEBERG_VERSION}" + + cd "${ICEBERG_VERSION}/docs/" + + # Modify .md files to exclude versioned documentation from search indexing + python3 -c "import os +for f in filter(lambda x: x.endswith('.md'), os.listdir()): lines = open(f).readlines(); open(f, 'w').writelines(lines[:2] + ['search:\n', ' exclude: true\n'] + lines[2:]);" + + cd - +} + +# Sets up local worktrees for the documentation and performs operations related to different versions. +pull_versioned_docs () { + echo " --> pull versioned docs" + + # Ensure the remote repository for documentation exists and is up-to-date + create_or_update_docs_remote + + rm -r docs/docs + + # Add local worktrees for documentation and javadoc from the remote repository + git worktree add -f docs/docs "${REMOTE}/docs" + git worktree add -f docs/javadoc "${REMOTE}/javadoc" + + # Retrieve the latest version of documentation for processing + local latest_version=$(get_latest_version) + + # Output the latest version for debugging purposes + echo "Latest version is: ${latest_version}" + + # Create the 'latest' version of documentation + create_latest "${latest_version}" +} + +# Cleans up artifacts and temporary files generated during documentation management. +clean () { + echo " --> clean" + + # Temporarily disable script exit on errors to ensure cleanup continues + set +e + + # Remove 'latest' directories and related Git worktrees + rm -rf docs/docs/latest &> /dev/null + git worktree remove docs/docs &> /dev/null + git worktree remove docs/javadoc &> /dev/null + + git restore docs/docs + + # Remove any additional temporary artifacts (e.g., 'site/' directory) + rm -rf site/ &> /dev/null + + set -e # Re-enable script exit on errors +} + diff --git a/site/dev/deploy.sh b/site/dev/deploy.sh new file mode 100755 index 0000000000..c55503d994 --- /dev/null +++ b/site/dev/deploy.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +./dev/setup_env.sh + +mkdocs gh-deploy --dirty # --remote-branch asf-site + diff --git a/site/dev/serve.sh b/site/dev/serve.sh new file mode 100755 index 0000000000..8901de92ab --- /dev/null +++ b/site/dev/serve.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +./dev/setup_env.sh + +mkdocs serve --dirty --watch . diff --git a/site/dev/setup_env.sh b/site/dev/setup_env.sh new file mode 100755 index 0000000000..cd228d1eab --- /dev/null +++ b/site/dev/setup_env.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source dev/common.sh +set -e + +clean + +install_deps + +pull_versioned_docs diff --git a/site/docs/blogs.md b/site/docs/blogs.md index a5ef127235..17aed7bd78 100644 --- a/site/docs/blogs.md +++ b/site/docs/blogs.md @@ -22,6 +22,20 @@ title: "Blogs" Here is a list of company blogs that talk about Iceberg. The blogs are ordered from most recent to oldest. +### [Apache Hive-4.x with Iceberg Branches & Tags](https://medium.com/@ayushtkn/apache-hive-4-x-with-iceberg-branches-tags-3d52293ac0bf/) +**Date**: October 12th, 2023, **Company**: Cloudera + +**Authors**: [Ayush Saxena](https://www.linkedin.com/in/ayush151/) + +### [Apache Hive 4.x With Apache Iceberg](https://medium.com/@ayushtkn/apache-hive-4-x-with-apache-iceberg-part-i-355e7a380725/) +**Date**: October 12th, 2023, **Company**: Cloudera + +**Authors**: [Ayush Saxena](https://www.linkedin.com/in/ayush151/) + +### [From Hive Tables to Iceberg Tables: Hassle-Free](https://blog.cloudera.com/from-hive-tables-to-iceberg-tables-hassle-free/) +**Date**: July 14th, 2023, **Company**: Cloudera + +**Authors**: [Srinivas Rishindra Pothireddi](https://www.linkedin.com/in/srinivas-rishindra/) ### [From Hive Tables to Iceberg Tables: Hassle-Free](https://blog.cloudera.com/from-hive-tables-to-iceberg-tables-hassle-free/) **Date**: July 14th, 2023, **Company**: Cloudera diff --git a/site/docs/community.md b/site/docs/community.md index bf5d4449b4..6d39ce96aa 100644 --- a/site/docs/community.md +++ b/site/docs/community.md @@ -40,13 +40,13 @@ Issues are tracked in GitHub: ## Slack -We use the [Apache Iceberg workspace](https://apache-iceberg.slack.com/) on Slack. To be invited, follow [this invite link](https://join.slack.com/t/apache-iceberg/shared_invite/zt-1znkcg5zm-7_FE~pcox347XwZE3GNfPg). +We use the [Apache Iceberg workspace](https://apache-iceberg.slack.com/) on Slack. To be invited, follow [this invite link](https://join.slack.com/t/apache-iceberg/shared_invite/zt-287g3akar-K9Oe_En5j1UL7Y_Ikpai3A). Please note that this link may occasionally break when Slack does an upgrade. If you encounter problems using it, please let us know by sending an email to <[email protected]>. ## Iceberg Community Events -This calendar contians two calendar feeds: +This calendar contains two calendar feeds: * Iceberg Community Events - Events such as conferences and meetups, aimed to educate and inspire Iceberg users. * Iceberg Dev Events - Events such as the triweekly Iceberg sync, aimed to discuss the project roadmap and how to implement features. diff --git a/site/docs/hive-quickstart.md b/site/docs/hive-quickstart.md index 57cc02157a..80247525f7 100644 --- a/site/docs/hive-quickstart.md +++ b/site/docs/hive-quickstart.md @@ -39,7 +39,7 @@ Take a look at the Tags tab in [Apache Hive docker images](https://hub.docker.co Set the version variable. ```sh -export HIVE_VERSION=4.0.0-alpha-2 +export HIVE_VERSION=4.0.0-beta-1 ``` Start the container, using the option `--platform linux/amd64` for a Mac with an M-Series chip: diff --git a/site/docs/how-to-release.md b/site/docs/how-to-release.md index e2d9ae4cee..8a774cc6ee 100644 --- a/site/docs/how-to-release.md +++ b/site/docs/how-to-release.md @@ -303,9 +303,10 @@ Thanks to everyone for contributing! Create a PR in the `iceberg` repo to make revapi run on the new release. For an example see [this PR](https://github.com/apache/iceberg/pull/6275). -#### Update github issue template +#### Update GitHub -Create a PR in the `iceberg` repo to add the new version to the github issue template. For an example see [this PR](https://github.com/apache/iceberg/pull/6287). +- Create a PR in the `iceberg` repo to add the new version to the github issue template. For an example see [this PR](https://github.com/apache/iceberg/pull/6287). +- Draft [a new release to update Github](https://github.com/apache/iceberg/releases/new) to show the latest release. A changelog can be generated automatically using Github. ### Documentation Release diff --git a/site/docs/multi-engine-support.md b/site/docs/multi-engine-support.md index 7a4eb2ea88..cd7fddf322 100644 --- a/site/docs/multi-engine-support.md +++ b/site/docs/multi-engine-support.md @@ -63,8 +63,8 @@ Each engine version undergoes the following lifecycle stages: | ---------- | ------------------ | ----------------------- |------------------------| ------------------ | | 2.4 | End of Life | 0.7.0-incubating | 1.2.1 | [iceberg-spark-runtime-2.4](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-2.4/1.2.1/iceberg-spark-runtime-2.4-1.2.1.jar) | | 3.0 | End of Life | 0.9.0 | 1.0.0 | [iceberg-spark-runtime-3.0_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.0_2.12/1.0.0/iceberg-spark-runtime-3.0_2.12-1.0.0.jar) | -| 3.1 | Deprecated | 0.12.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.1_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.1_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.1_2.12-{{ icebergVersion }}.jar) [1] | -| 3.2 | Maintained | 0.13.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.2_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.2_2.12-{{ icebergVersion }}.jar) | +| 3.1 | End of Life | 0.12.0 | 1.3.1 | [iceberg-spark-runtime-3.1_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.1_2.12/1.3.1/iceberg-spark-runtime-3.1_2.12-1.3.1.jar) [1] | +| 3.2 | Deprecated | 0.13.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.2_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.2_2.12-{{ icebergVersion }}.jar) | | 3.3 | Maintained | 0.14.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.3_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.3_2.12-{{ icebergVersion }}.jar) | | 3.4 | Maintained | 1.3.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.4_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.4_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.4_2.12-{{ icebergVersion }}.jar) | diff --git a/site/docs/releases.md b/site/docs/releases.md index 264773b46d..13ee45edd9 100644 --- a/site/docs/releases.md +++ b/site/docs/releases.md @@ -23,14 +23,17 @@ title: "Releases" The latest version of Iceberg is [{{ icebergVersion }}](https://github.com/apache/iceberg/releases/tag/apache-iceberg-{{ icebergVersion }}). * [{{ icebergVersion }} source tar.gz](https://www.apache.org/dyn/closer.cgi/iceberg/apache-iceberg-{{ icebergVersion }}/apache-iceberg-{{ icebergVersion }}.tar.gz) -- [signature](https://downloads.apache.org/iceberg/apache-iceberg-{{ icebergVersion }}/apache-iceberg-{{ icebergVersion }}.tar.gz.asc) -- [sha512](https://downloads.apache.org/iceberg/apache-iceberg-{{ icebergVersion }}/apache-iceberg-{{ icebergVersion }}.tar.gz.sha512) +* [{{ icebergVersion }} Spark 3.5\_2.12 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.5_2.12-{{ icebergVersion }}.jar) -- [3.5\_2.13](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.5_2.13/{{ icebergVersion }}/iceberg-spark-runtime-3.5_2.13-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Spark 3.4\_2.12 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.4_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.4_2.12-{{ icebergVersion }}.jar) -- [3.4\_2.13](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.4_2.13/{{ icebergVersion }}/iceberg-spark-runtime-3.4_2.13-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Spark 3.3\_2.12 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.3_2.12-{{ icebergVersion }}.jar) -- [3.3\_2.13](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.3_2.13/{{ icebergVersion }}/iceberg-spark-runtime-3.3_2.13-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Spark 3.2\_2.12 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.2_2.12-{{ icebergVersion }}.jar) -- [3.2\_2.13](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.13/{{ icebergVersion }}/iceberg-spark-runtime-3.2_2.13-{{ icebergVersion }}.jar) -* [{{ icebergVersion }} Spark 3.1 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.1_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.1_2.12-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Flink 1.17 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-flink-runtime-1.17/{{ icebergVersion }}/iceberg-flink-runtime-1.17-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Flink 1.16 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-flink-runtime-1.16/{{ icebergVersion }}/iceberg-flink-runtime-1.16-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Flink 1.15 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-flink-runtime-1.15/{{ icebergVersion }}/iceberg-flink-runtime-1.15-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Hive runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-hive-runtime/{{ icebergVersion }}/iceberg-hive-runtime-{{ icebergVersion }}.jar) +* [{{ icebergVersion }} aws-bundle Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-aws-bundle/{{ icebergVersion }}/iceberg-aws-bundle-{{ icebergVersion }}.jar) +* [{{ icebergVersion }} gcp-bundle Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-gcp-bundle/{{ icebergVersion }}/iceberg-gcp-bundle-{{ icebergVersion }}.jar) +* [{{ icebergVersion }} azure-bundle Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-azure-bundle/{{ icebergVersion }}/iceberg-azure-bundle-{{ icebergVersion }}.jar) To use Iceberg in Spark or Flink, download the runtime JAR for your engine version and add it to the jars folder of your installation. @@ -64,7 +67,140 @@ To add a dependency on Iceberg in Maven, add the following to your `pom.xml`: </dependencies> ``` -## 1.3.1 release +### 1.4.3 Release + +Apache Iceberg 1.4.3 was released on December 27, 2023. The main issue it solves is missing files from a transaction retry with conflicting manifests. It is recommended to upgrade if you use transactions. + +- Core: Scan only live entries in partitions table (#8969) by @Fokko in [#9197](https://github.com/apache/iceberg/pull/9197) +- Core: Fix missing files from transaction retries with conflicting manifest merges by [@nastra](https://github.com/nastra) in [#9337](https://github.com/apache/iceberg/pull/9337) +- JDBC Catalog: Fix namespaceExists check with special characters by [@ismailsimsek](https://github.com/ismailsimsek) in [#9291](https://github.com/apache/iceberg/pull/9291) +- Core: Expired Snapshot files in a transaction should be deleted by [@bartash](https://github.com/bartash) in [#9223](https://github.com/apache/iceberg/pull/9223) +- Core: Fix missing delete files from transaction by [@nastra](https://github.com/nastra) in [#9356](https://github.com/apache/iceberg/pull/9356) + + +## Past releases + +### 1.4.2 Release + +Apache Iceberg 1.4.2 was released on November 2, 2023. +The 1.4.2 patch release addresses fixing a remaining case where split offsets +should be ignored when they are deemed invalid. + +* Core + - Core: Ignore split offsets array when split offset is past file length ([\#8925](https://github.com/apache/iceberg/pull/8925)) + +### 1.4.1 Release + +Apache Iceberg 1.4.1 was released on October 23, 2023. +The 1.4.1 release addresses various issues identified in the 1.4.0 release. + +* Core + - Core: Do not use a lazy split offset list in manifests ([\#8834](https://github.com/apache/iceberg/pull/8834)) + - Core: Ignore split offsets when the last split offset is past the file length ([\#8860](https://github.com/apache/iceberg/pull/8860)) +* AWS + - Avoid static global credentials provider which doesn't play well with lifecycle management ([\#8677](https://github.com/apache/iceberg/pull/8677)) +* Flink + - Reverting the default custom partitioner for bucket column ([\#8848](https://github.com/apache/iceberg/pull/8848)) + +### 1.4.0 release + +Apache Iceberg 1.4.0 was released on October 4, 2023. +The 1.4.0 release adds a variety of new features and bug fixes. + +* API + - Implement bound expression sanitization ([\#8149](https://github.com/apache/iceberg/pull/8149)) + - Remove overflow checks in `DefaultCounter` causing performance issues ([\#8297](https://github.com/apache/iceberg/pull/8297)) + - Support incremental scanning with branch ([\#5984](https://github.com/apache/iceberg/pull/5984)) + - Add a validation API to `DeleteFiles` which validates files exist ([\#8525](https://github.com/apache/iceberg/pull/8525)) +* Core + - Use V2 format by default in new tables ([\#8381](https://github.com/apache/iceberg/pull/8381)) + - Use `zstd` compression for Parquet by default in new tables ([\#8593](https://github.com/apache/iceberg/pull/8593)) + - Add strict metadata cleanup mode and enable it by default ([\#8397](https://github.com/apache/iceberg/pull/8397)) ([\#8599](https://github.com/apache/iceberg/pull/8599)) + - Avoid generating huge manifests during commits ([\#6335](https://github.com/apache/iceberg/pull/6335)) + - Add a writer for unordered position deletes ([\#7692](https://github.com/apache/iceberg/pull/7692)) + - Optimize `DeleteFileIndex` ([\#8157](https://github.com/apache/iceberg/pull/8157)) + - Optimize lookup in `DeleteFileIndex` without useful bounds ([\#8278](https://github.com/apache/iceberg/pull/8278)) + - Optimize split offsets handling ([\#8336](https://github.com/apache/iceberg/pull/8336)) + - Optimize computing user-facing state in data tasks ([\#8346](https://github.com/apache/iceberg/pull/8346)) + - Don't persist useless file and position bounds for deletes ([\#8360](https://github.com/apache/iceberg/pull/8360)) + - Don't persist counts for paths and positions in position delete files ([\#8590](https://github.com/apache/iceberg/pull/8590)) + - Support setting system-level properties via environmental variables ([\#5659](https://github.com/apache/iceberg/pull/5659)) + - Add JSON parser for `ContentFile` and `FileScanTask` ([\#6934](https://github.com/apache/iceberg/pull/6934)) + - Add REST spec and request for commits to multiple tables ([\#7741](https://github.com/apache/iceberg/pull/7741)) + - Add REST API for committing changes against multiple tables ([\#7569](https://github.com/apache/iceberg/pull/7569)) + - Default to exponential retry strategy in REST client ([\#8366](https://github.com/apache/iceberg/pull/8366)) + - Support registering tables with REST session catalog ([\#6512](https://github.com/apache/iceberg/pull/6512)) + - Add last updated timestamp and snapshot ID to partitions metadata table ([\#7581](https://github.com/apache/iceberg/pull/7581)) + - Add total data size to partitions metadata table ([\#7920](https://github.com/apache/iceberg/pull/7920)) + - Extend `ResolvingFileIO` to support bulk operations ([\#7976](https://github.com/apache/iceberg/pull/7976)) + - Key metadata in Avro format ([\#6450](https://github.com/apache/iceberg/pull/6450)) + - Add AES GCM encryption stream ([\#3231](https://github.com/apache/iceberg/pull/3231)) + - Fix a connection leak in streaming delete filters ([\#8132](https://github.com/apache/iceberg/pull/8132)) + - Fix lazy snapshot loading history ([\#8470](https://github.com/apache/iceberg/pull/8470)) + - Fix unicode handling in HTTPClient ([\#8046](https://github.com/apache/iceberg/pull/8046)) + - Fix paths for unpartitioned specs in writers ([\#7685](https://github.com/apache/iceberg/pull/7685)) + - Fix OOM caused by Avro decoder caching ([\#7791](https://github.com/apache/iceberg/pull/7791)) +* Spark + - Added support for Spark 3.5 + - Code for DELETE, UPDATE, and MERGE commands has moved to Spark, and all related extensions have been dropped from Iceberg. + - Support for WHEN NOT MATCHED BY SOURCE clause in MERGE. + - Column pruning in merge-on-read operations. + - Ability to request a bigger advisory partition size for the final write to produce well-sized output files without harming the job parallelism. + - Dropped support for Spark 3.1 + - Deprecated support for Spark 3.2 + - Support vectorized reads for merge-on-read operations in Spark 3.4 and 3.5 ([\#8466](https://github.com/apache/iceberg/pull/8466)) + - Increase default advisory partition size for writes in Spark 3.5 ([\#8660](https://github.com/apache/iceberg/pull/8660)) + - Support distributed planning in Spark 3.4 and 3.5 ([\#8123](https://github.com/apache/iceberg/pull/8123)) + - Support pushing down system functions by V2 filters in Spark 3.4 and 3.5 ([\#7886](https://github.com/apache/iceberg/pull/7886)) + - Support fanout position delta writers in Spark 3.4 and 3.5 ([\#7703](https://github.com/apache/iceberg/pull/7703)) + - Use fanout writers for unsorted tables by default in Spark 3.5 ([\#8621](https://github.com/apache/iceberg/pull/8621)) + - Support multiple shuffle partitions per file in compaction in Spark 3.4 and 3.5 ([\#7897](https://github.com/apache/iceberg/pull/7897)) + - Output net changes across snapshots for carryover rows in CDC ([\#7326](https://github.com/apache/iceberg/pull/7326)) + - Display read metrics on Spark SQL UI ([\#7447](https://github.com/apache/iceberg/pull/7447)) ([\#8445](https://github.com/apache/iceberg/pull/8445)) + - Adjust split size to benefit from cluster parallelism in Spark 3.4 and 3.5 ([\#7714](https://github.com/apache/iceberg/pull/7714)) + - Add `fast_forward` procedure ([\#8081](https://github.com/apache/iceberg/pull/8081)) + - Support filters when rewriting position deletes ([\#7582](https://github.com/apache/iceberg/pull/7582)) + - Support setting current snapshot with ref ([\#8163](https://github.com/apache/iceberg/pull/8163)) + - Make backup table name configurable during migration ([\#8227](https://github.com/apache/iceberg/pull/8227)) + - Add write and SQL options to override compression config ([\#8313](https://github.com/apache/iceberg/pull/8313)) + - Correct partition transform functions to match the spec ([\#8192](https://github.com/apache/iceberg/pull/8192)) + - Enable extra commit properties with metadata delete ([\#7649](https://github.com/apache/iceberg/pull/7649)) +* Flink + - Add possibility of ordering the splits based on the file sequence number ([\#7661](https://github.com/apache/iceberg/pull/7661)) + - Fix serialization in `TableSink` with anonymous object ([\#7866](https://github.com/apache/iceberg/pull/7866)) + - Switch to `FileScanTaskParser` for JSON serialization of `IcebergSourceSplit` ([\#7978](https://github.com/apache/iceberg/pull/7978)) + - Custom partitioner for bucket partitions ([\#7161](https://github.com/apache/iceberg/pull/7161)) + - Implement data statistics coordinator to aggregate data statistics from operator subtasks ([\#7360](https://github.com/apache/iceberg/pull/7360)) + - Support alter table column ([\#7628](https://github.com/apache/iceberg/pull/7628)) +* Parquet + - Add encryption config to read and write builders ([\#2639](https://github.com/apache/iceberg/pull/2639)) + - Skip writing bloom filters for deletes ([\#7617](https://github.com/apache/iceberg/pull/7617)) + - Cache codecs by name and level ([\#8182](https://github.com/apache/iceberg/pull/8182)) + - Fix decimal data reading from `ParquetAvroValueReaders` ([\#8246](https://github.com/apache/iceberg/pull/8246)) + - Handle filters with transforms by assuming data must be scanned ([\#8243](https://github.com/apache/iceberg/pull/8243)) +* ORC + - Handle filters with transforms by assuming the filter matches ([\#8244](https://github.com/apache/iceberg/pull/8244)) +* Vendor Integrations + - GCP: Fix single byte read in `GCSInputStream` ([\#8071](https://github.com/apache/iceberg/pull/8071)) + - GCP: Add properties for OAtuh2 and update library ([\#8073](https://github.com/apache/iceberg/pull/8073)) + - GCP: Add prefix and bulk operations to `GCSFileIO` ([\#8168](https://github.com/apache/iceberg/pull/8168)) + - GCP: Add bundle jar for GCP-related dependencies ([\#8231](https://github.com/apache/iceberg/pull/8231)) + - GCP: Add range reads to `GCSInputStream` ([\#8301](https://github.com/apache/iceberg/pull/8301)) + - AWS: Add bundle jar for AWS-related dependencies ([\#8261](https://github.com/apache/iceberg/pull/8261)) + - AWS: support config storage class for `S3FileIO` ([\#8154](https://github.com/apache/iceberg/pull/8154)) + - AWS: Add `FileIO` tracker/closer to Glue catalog ([\#8315](https://github.com/apache/iceberg/pull/8315)) + - AWS: Update S3 signer spec to allow an optional string body in `S3SignRequest` ([\#8361](https://github.com/apache/iceberg/pull/8361)) + - Azure: Add `FileIO` that supports ADLSv2 storage ([\#8303](https://github.com/apache/iceberg/pull/8303)) + - Azure: Make `ADLSFileIO` implement `DelegateFileIO` ([\#8563](https://github.com/apache/iceberg/pull/8563)) + - Nessie: Provide better commit message on table registration ([\#8385](https://github.com/apache/iceberg/pull/8385)) +* Dependencies + - Bump Nessie to 0.71.0 + - Bump ORC to 1.9.1 + - Bump Arrow to 12.0.1 + - Bump AWS Java SDK to 2.20.131 + + +### 1.3.1 release Apache Iceberg 1.3.1 was released on July 25, 2023. The 1.3.1 release addresses various issues identified in the 1.3.0 release. @@ -80,8 +216,6 @@ The 1.3.1 release addresses various issues identified in the 1.3.0 release. * Flink - FlinkCatalog creation no longer creates the default database ([\#8039](https://github.com/apache/iceberg/pull/8039)) -## Past releases - ### 1.3.0 release Apache Iceberg 1.3.0 was released on May 30th, 2023. diff --git a/site/docs/roadmap.md b/site/docs/roadmap.md index c0b0efe9dd..7df1114c76 100644 --- a/site/docs/roadmap.md +++ b/site/docs/roadmap.md @@ -20,28 +20,37 @@ title: "Roadmap" # Roadmap Overview -This roadmap outlines projects that the Iceberg community is working on, their priority, and a rough size estimate. -This is based on the latest [community priority discussion](https://lists.apache.org/thread.html/r84e80216c259c81f824c6971504c321cd8c785774c489d52d4fc123f%40%3Cdev.iceberg.apache.org%3E). +This roadmap outlines projects that the Iceberg community is working on. Each high-level item links to a Github project board that tracks the current status. Related design docs will be linked on the planning boards. -# Priority 1 - -* API: [Iceberg 1.0.0](https://github.com/apache/iceberg/projects/3) [medium] -* Python: [Pythonic refactor](https://github.com/apache/iceberg/projects/7) [medium] -* Spec: [Z-ordering / Space-filling curves](https://github.com/apache/iceberg/projects/16) [medium] -* Spec: [Snapshot tagging and branching](https://github.com/apache/iceberg/projects/4) [small] -* Views: [Spec](https://github.com/apache/iceberg/projects/6) [medium] -* Puffin: [Implement statistics information in table snapshot](https://github.com/apache/iceberg/pull/4741) [medium] -* Flink: [FLIP-27 based Iceberg source](https://github.com/apache/iceberg/projects/23) [large] - -# Priority 2 - -* ORC: [Support delete files stored as ORC](https://github.com/apache/iceberg/projects/13) [small] -* Spark: [DSv2 streaming improvements](https://github.com/apache/iceberg/projects/2) [small] -* Flink: [Inline file compaction](https://github.com/apache/iceberg/projects/14) [small] -* Flink: [Support UPSERT](https://github.com/apache/iceberg/projects/15) [small] -* Spec: [Secondary indexes](https://github.com/apache/iceberg/projects/17) [large] -* Spec v3: [Encryption](https://github.com/apache/iceberg/projects/5) [large] -* Spec v3: [Relative paths](https://github.com/apache/iceberg/projects/18) [large] -* Spec v3: [Default field values](https://github.com/apache/iceberg/projects/19) [medium] +# General + +* [Multi-table transaction support](https://github.com/apache/iceberg/projects/30) +* [Views Support](https://github.com/apache/iceberg/projects/29) +* [Change Data Capture (CDC) Support](https://github.com/apache/iceberg/projects/26) +* [Snapshot tagging and branching](https://github.com/apache/iceberg/projects/4) +* [Inline file compaction](https://github.com/apache/iceberg/projects/14) +* [Delete File compaction](https://github.com/apache/iceberg/projects/10) +* [Z-ordering / Space-filling curves](https://github.com/apache/iceberg/projects/16) +* [Support UPSERT](https://github.com/apache/iceberg/projects/15) + +# Clients +_Python, Rust, and Go projects are pointing to their respective repositories which include +their own issues as the implementations are not final._ + +* [Add the Iceberg Python Client](https://github.com/apache/iceberg-python) +* [Add the Iceberg Rust Client](https://github.com/apache/iceberg-rust) +* [Add the Iceberg Go Client](https://github.com/apache/iceberg-go) + +# Spec V2 + +* [Views Spec](https://github.com/apache/iceberg/projects/6) +* [DSv2 streaming improvements](https://github.com/apache/iceberg/projects/2) +* [Secondary indexes](https://github.com/apache/iceberg/projects/17) + +# Spec V3 + +* [Encryption](https://github.com/apache/iceberg/projects/5) +* [Relative paths](https://github.com/apache/iceberg/projects/18) +* [Default field values](https://github.com/apache/iceberg/projects/19) diff --git a/site/docs/spec.md b/site/docs/spec.md index e1c1ed1b49..9223bafda3 100644 --- a/site/docs/spec.md +++ b/site/docs/spec.md @@ -1,8 +1,5 @@ --- title: "Spec" -url: spec -toc: true -disableSidebar: true --- <!-- - Licensed to the Apache Software Foundation (ASF) under one or more @@ -1128,6 +1125,41 @@ Example ] } ] ``` +### Content File (Data and Delete) Serialization + +Content file (data or delete) is serialized as a JSON object according to the following table. + +| Metadata field |JSON representation|Example| +|--------------------------|--- |--- | +| **`spec-id`** |`JSON int`|`1`| +| **`content`** |`JSON string`|`DATA`, `POSITION_DELETES`, `EQUALITY_DELETES`| +| **`file-path`** |`JSON string`|`"s3://b/wh/data.db/table"`| +| **`file-format`** |`JSON string`|`AVRO`, `ORC`, `PARQUET`| +| **`partition`** |`JSON object: Partition data tuple using partition field ids for the struct field ids`|`{"1000":1}`| +| **`record-count`** |`JSON long`|`1`| +| **`file-size-in-bytes`** |`JSON long`|`1024`| +| **`column-sizes`** |`JSON object: Map from column id to the total size on disk of all regions that store the column.`|`{"keys":[3,4],"values":[100,200]}`| +| **`value-counts`** |`JSON object: Map from column id to number of values in the column (including null and NaN values)`|`{"keys":[3,4],"values":[90,180]}`| +| **`null-value-counts`** |`JSON object: Map from column id to number of null values in the column`|`{"keys":[3,4],"values":[10,20]}`| +| **`nan-value-counts`** |`JSON object: Map from column id to number of NaN values in the column`|`{"keys":[3,4],"values":[0,0]}`| +| **`lower-bounds`** |`JSON object: Map from column id to lower bound binary in the column serialized as hexadecimal string`|`{"keys":[3,4],"values":["01000000","02000000"]}`| +| **`upper-bounds`** |`JSON object: Map from column id to upper bound binary in the column serialized as hexadecimal string`|`{"keys":[3,4],"values":["05000000","0A000000"]}`| +| **`key-metadata`** |`JSON string: Encryption key metadata binary serialized as hexadecimal string`|`00000000000000000000000000000000`| +| **`split-offsets`** |`JSON list of long: Split offsets for the data file`|`[128,256]`| +| **`equality-ids`** |`JSON list of int: Field ids used to determine row equality in equality delete files`|`[1]`| +| **`sort-order-id`** |`JSON int`|`1`| + +### File Scan Task Serialization + +File scan task is serialized as a JSON object according to the following table. + +| Metadata field |JSON representation|Example| +|--------------------------|--- |--- | +| **`schema`** |`JSON object`|`See above, read schemas instead`| +| **`spec`** |`JSON object`|`See above, read partition specs instead`| +| **`data-file`** |`JSON object`|`See above, read content file instead`| +| **`delete-files`** |`JSON list of objects`|`See above, read content file instead`| +| **`residual-filter`** |`JSON object: residual filter expression`|`{"type":"eq","term":"id","value":1}`| ## Appendix D: Single-value serialization diff --git a/site/docs/vendors.md b/site/docs/vendors.md index 14554a37b2..ecafd8d8ae 100644 --- a/site/docs/vendors.md +++ b/site/docs/vendors.md @@ -26,6 +26,11 @@ This page contains some of the vendors who are shipping and supporting Apache Ic CelerData provides commercial offerings for [StarRocks](https://www.starrocks.io/), a distributed MPP SQL engine for enterprise analytics on Iceberg. With its fully vectorized technology, local caching, and intelligent materialized view, StarRocks delivers sub-second query latency for both batch and real-time analytics. CelerData offers both an [enterprise deployment](https://celerdata.com/celerdata-enterprise) and a [cloud service](https://celerdata.com/celerdata-cloud) to help customer [...] +### [ClickHouse](https://clickhouse.com/) +ClickHouse is a column-oriented database that enables its users to generate powerful analytics, using SQL queries, in real-time. ClickHouse integrates well with Iceberg and offers two options to work with it: +1. Via Iceberg [table function](https://clickhouse.com/docs/en/sql-reference/table-functions/iceberg): Provides a read-only table-like interface to Apache Iceberg tables in Amazon S3. +2. Via the Iceberg [table engine](https://clickhouse.com/docs/en/engines/table-engines/integrations/iceberg): An engine that provides a read-only integration with existing Apache Iceberg tables in Amazon S3. + ### [Cloudera](http://cloudera.com) Cloudera Data Platform integrates Apache Iceberg to the following components: @@ -49,6 +54,10 @@ With Dremio, an organization can easily build and manage a data lakehouse in whi IOMETE is a fully-managed ready to use, batteries included Data Platform. IOMETE optimizes clustering, compaction, and access control to Apache Iceberg tables. Customer data remains on customer's account to prevent vendor lock-in. The core of IOMETE platform is a serverless Lakehouse that leverages Apache Iceberg as its core table format. IOMETE platform also includes Serverless Spark, an SQL Editor, A Data Catalog, and granular data access control. IOMETE supports Hybrid-multi-cloud setups. +### [PuppyGraph](https://puppygraph.com) + +PuppyGraph is a cloud-native graph analytics engine that enables users to query one or more relational data stores as a unified graph model. This eliminates the overhead of deploying and maintaining a siloed graph database system, with no ETL required. [PuppyGraph’s native Apache Iceberg integration](https://docs.puppygraph.com/user-manual/getting-started/iceberg) adds native graph capabilities to your existing data lake in an easy and performant way. + ### [Snowflake](http://snowflake.com/) [Snowflake](https://www.snowflake.com/data-cloud/) is a single, cross-cloud platform that enables every organization to mobilize their data with Snowflake’s Data Cloud. Snowflake supports Apache Iceberg by offering [native support for Iceberg Tables](https://www.snowflake.com/blog/iceberg-tables-powering-open-standards-with-snowflake-innovations/) for full DML as well as connectors to [External Tables](https://www.snowflake.com/blog/expanding-the-data-cloud-with-apache-iceberg/) for read [...] diff --git a/site/docs/view-spec.md b/site/docs/view-spec.md index 1a82c1ec9e..d50405cfe0 100644 --- a/site/docs/view-spec.md +++ b/site/docs/view-spec.md @@ -55,9 +55,9 @@ The view version metadata file has the following fields: | Requirement | Field name | Description | |-------------|----------------------|-------------| +| _required_ | `view-uuid` | A UUID that identifies the view, generated when the view is created. Implementations must throw an exception if a view's UUID does not match the expected UUID after refreshing metadata | | _required_ | `format-version` | An integer version number for the view format; must be 1 | | _required_ | `location` | The view's base location; used to create metadata file locations | -| _required_ | `current-schema-id` | ID of the current schema of the view, if known | | _required_ | `schemas` | A list of known schemas | | _required_ | `current-version-id` | ID of the current version of the view (`version-id`) | | _required_ | `versions` | A list of known [versions](#versions) of the view [1] | @@ -72,13 +72,17 @@ Notes: Each version in `versions` is a struct with the following fields: -| Requirement | Field name | Description | -|-------------|-------------------|--------------------------------------------------------------------------| -| _required_ | `version-id` | ID for the version | -| _required_ | `schema-id` | ID of the schema for the view version | -| _required_ | `timestamp-ms` | Timestamp when the version was created (ms from epoch) | -| _required_ | `summary` | A string to string map of [summary metadata](#summary) about the version | -| _required_ | `representations` | A list of [representations](#representations) for the view definition | +| Requirement | Field name | Description | +|-------------|---------------------|-------------------------------------------------------------------------------| +| _required_ | `version-id` | ID for the version | +| _required_ | `schema-id` | ID of the schema for the view version | +| _required_ | `timestamp-ms` | Timestamp when the version was created (ms from epoch) | +| _required_ | `summary` | A string to string map of [summary metadata](#summary) about the version | +| _required_ | `representations` | A list of [representations](#representations) for the view definition | +| _optional_ | `default-catalog` | Catalog name to use when a reference in the SELECT does not contain a catalog | +| _required_ | `default-namespace` | Namespace to use when a reference in the SELECT is a single identifier | + +When `default-catalog` is `null` or not set, the catalog in which the view is stored must be used as the default catalog. #### Summary @@ -114,10 +118,6 @@ A view version can have multiple SQL representations of different dialects, but | _required_ | `type` | `string` | Must be `sql` | | _required_ | `sql` | `string` | A SQL SELECT statement | | _required_ | `dialect` | `string` | The dialect of the `sql` SELECT statement (e.g., "trino" or "spark") | -| _optional_ | `default-catalog` | `string` | Catalog name to use when a reference in the SELECT does not contain a catalog | -| _optional_ | `default-namespace` | `list<string>` | Namespace to use when a reference in the SELECT is a single identifier | -| _optional_ | `field-aliases` | `list<string>` | Column names optionally specified in the create statement | -| _optional_ | `field-comments` | `list<string>` | Column descriptions (COMMENT) optionally specified in the create statement | For example: @@ -141,13 +141,11 @@ This create statement would produce the following `sql` representation metadata: | `type` | `"sql"` | | `sql` | `"SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2"` | | `dialect` | `"spark"` | -| `default-catalog` | `"prod"` | -| `default-namespace` | `["default"]` | -| `field-aliases` | `["event_count", "event_date"]` | -| `field-comments` | `["Count of events", null]` | If a create statement does not include column names or comments before `AS`, the fields should be omitted. +The `event_count` (with the `Count of events` comment) and `event_date` field aliases must be part of the view version's `schema`. + #### Version log The version log tracks changes to the view's current version. This is the view's history and allows reconstructing what version of the view would have been used at some point in time. @@ -192,6 +190,7 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00001-(uuid).metadata.json ``` ``` { + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", "format-version" : 1, "location" : "s3://bucket/warehouse/default.db/event_agg", "current-version-id" : 1, @@ -202,6 +201,8 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00001-(uuid).metadata.json "version-id" : 1, "timestamp-ms" : 1573518431292, "schema-id" : 1, + "default-catalog" : "prod", + "default-namespace" : [ "default" ], "summary" : { "operation" : "create", "engine-name" : "Spark", @@ -210,25 +211,21 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00001-(uuid).metadata.json "representations" : [ { "type" : "sql", "sql" : "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", - "dialect" : "spark", - "default-catalog" : "prod", - "default-namespace" : [ "default" ], - "field-aliases" : ["event_count", "event_date"], - "field-comments" : ["Count of events", null] + "dialect" : "spark" } ] } ], - "current-schema-id": 1, "schemas": [ { "schema-id": 1, "type" : "struct", "fields" : [ { "id" : 1, - "name" : "col1", + "name" : "event_count", "required" : false, - "type" : "int" + "type" : "int", + "doc" : "Count of events" }, { "id" : 2, - "name" : "col2", + "name" : "event_date", "required" : false, "type" : "date" } ] @@ -261,6 +258,7 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00002-(uuid).metadata.json ``` ``` { + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", "format-version" : 1, "location" : "s3://bucket/warehouse/default.db/event_agg", "current-version-id" : 1, @@ -271,6 +269,8 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00002-(uuid).metadata.json "version-id" : 1, "timestamp-ms" : 1573518431292, "schema-id" : 1, + "default-catalog" : "prod", + "default-namespace" : [ "default" ], "summary" : { "operation" : "create", "engine-name" : "Spark", @@ -279,15 +279,14 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00002-(uuid).metadata.json "representations" : [ { "type" : "sql", "sql" : "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", - "dialect" : "spark", - "default-catalog" : "prod", - "default-namespace" : [ "default" ], - "field-aliases" : ["event_count", "event_date"], - "field-comments" : ["Count of events", null] + "dialect" : "spark" } ] }, { "version-id" : 2, "timestamp-ms" : 1573518981593, + "schema-id" : 1, + "default-catalog" : "prod", + "default-namespace" : [ "default" ], "summary" : { "operation" : "create", "engine-name" : "Spark", @@ -296,24 +295,21 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00002-(uuid).metadata.json "representations" : [ { "type" : "sql", "sql" : "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM prod.default.events\nGROUP BY 2", - "dialect" : "spark", - "default-catalog" : "prod", - "default-namespace" : [ "default" ], - "field-aliases" : ["event_count", "event_date"] + "dialect" : "spark" } ] } ], - "current-schema-id": 1, "schemas": [ { "schema-id": 1, "type" : "struct", "fields" : [ { "id" : 1, - "name" : "col1", + "name" : "event_count", "required" : false, - "type" : "int" + "type" : "int", + "doc" : "Count of events" }, { "id" : 2, - "name" : "col2", + "name" : "event_date", "required" : false, "type" : "date" } ] diff --git a/site/mkdocs.yml b/site/mkdocs.yml index ed7ce06403..7b0be5d726 100644 --- a/site/mkdocs.yml +++ b/site/mkdocs.yml @@ -15,6 +15,10 @@ # specific language governing permissions and limitations # under the License. +INHERIT: ./nav.yml + +copyright: | + Apache Iceberg, Iceberg, Apache, the Apache feather logo, and the Apache Iceberg project logo are</br>either registered trademarks or trademarks of The Apache Software Foundation. Copyright © 2023</br>The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/">Apache License, Version 2.0</a>.</br></br> site_name: Apache Iceberg theme: @@ -24,10 +28,12 @@ theme: favicon: assets/images/favicon-96x96.png features: - navigation.tabs + - navigation.tabs.sticky - navigation.path - navigation.top - navigation.tracking - toc.follow + - offline - search.suggest - search.highlight - content.tabs.link @@ -36,40 +42,10 @@ theme: plugins: - search - - macros: - include_yaml: - - variables.yml + - macros - monorepo - -nav: - - Quickstart: - - Spark: spark-quickstart.md - - Hive: hive-quickstart.md - - Docs: - - nightly: '!include docs/docs/nightly/mkdocs.yml' -# - latest: '!include docs/docs/latest/mkdocs.yml' -# - 1.3.1: '!include docs/docs/1.3.1/mkdocs.yml' -# - 1.3.0: '!include docs/docs/1.3.0/mkdocs.yml' - - Releases: releases.md - - Roadmap: roadmap.md - - Blogs: blogs.md - - Talks: talks.md - - Vendors: vendors.md - - Project: - - Join: community.md - - Spec: spec.md - - View spec: view-spec.md - - Puffin spec: puffin-spec.md - - Multi-engine support: multi-engine-support.md - - How to release: how-to-release.md - - Terms: terms.md - - ASF: - - community.md - - Sponsorship: https://www.apache.org/foundation/sponsorship.html - - Events: https://www.apache.org/events/current-event.html - - License: https://www.apache.org/licenses/ - - Security: https://www.apache.org/security/ - - Sponsors: https://www.apache.org/foundation/thanks.html + - offline: + enabled: !ENV [OFFLINE, false] markdown_extensions: - pymdownx.highlight: @@ -87,10 +63,29 @@ markdown_extensions: - pymdownx.mark - attr_list - pymdownx.emoji: - emoji_index: !!python/name:materialx.emoji.twemoji - emoji_generator: !!python/name:materialx.emoji.to_svg + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg - tables + - md_in_html + - toc: + permalink: 🔗 +extra: + icebergVersion: '1.4.2' + social: + - icon: fontawesome/regular/comments + link: './community' + title: community + - icon: fontawesome/brands/github + link: 'https://github.com/apache/iceberg' + title: github + - icon: fontawesome/brands/youtube + link: 'https://www.youtube.com/@ApacheIceberg' + title: youtube + - icon: fontawesome/brands/slack + link: 'https://join.slack.com/t/apache-iceberg/shared_invite/zt-287g3akar-K9Oe_En5j1UL7Y_Ikpai3A' + title: slack + +watch: + - nav.yml -copyright: | - Apache Iceberg, Iceberg, Apache, the Apache feather logo, and the Apache Iceberg project logo are</br>either registered trademarks or trademarks of The Apache Software Foundation. Copyright © 2023</br>The Apache Software Foundation, Licensed under the <a href="https://www.apache.org/licenses/">Apache License, Version 2.0</a>.</br></br> diff --git a/site/nav.yml b/site/nav.yml new file mode 100644 index 0000000000..f7eb81eff5 --- /dev/null +++ b/site/nav.yml @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +nav: + - Home: index.md + - Quickstart: + - Spark: spark-quickstart.md + - Hive: hive-quickstart.md + - Docs: + - latest: '!include docs/docs/latest/mkdocs.yml' + - 1.4.2: '!include docs/docs/1.4.2/mkdocs.yml' + - 1.4.1: '!include docs/docs/1.4.1/mkdocs.yml' + - 1.4.0: '!include docs/docs/1.4.0/mkdocs.yml' + - Releases: releases.md + - Roadmap: roadmap.md + - Blogs: blogs.md + - Talks: talks.md + - Vendors: vendors.md + - Project: + - Join: community.md + - Spec: spec.md + - View spec: view-spec.md + - Puffin spec: puffin-spec.md + - Multi-engine support: multi-engine-support.md + - How to release: how-to-release.md + - Terms: terms.md + - Concepts: + - Catalogs: catalog.md + - ASF: + - Sponsorship: https://www.apache.org/foundation/sponsorship.html + - Events: https://www.apache.org/events/current-event.html + - License: https://www.apache.org/licenses/ + - Security: https://www.apache.org/security/ + - Sponsors: https://www.apache.org/foundation/thanks.html diff --git a/site/requirements.txt b/site/requirements.txt index 72102cf409..abae9c1dc0 100644 --- a/site/requirements.txt +++ b/site/requirements.txt @@ -19,5 +19,5 @@ mkdocs-awesome-pages-plugin==2.9.2 mkdocs-macros-plugin==1.0.5 mkdocs-material==9.5.3 mkdocs-material-extensions==1.3 -mkdocs-monorepo-plugin==1.1.0 +mkdocs-monorepo-plugin @ git+https://github.com/bitsondatadev/mkdocs-monorepo-plugin@url-fix mkdocs-redirects==1.2.1
