This is an automated email from the ASF dual-hosted git repository. djwang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry-pxf.git
commit da1253e9454e6b97fdc02998f8212b17be96ed62 Author: Dianjin Wang <[email protected]> AuthorDate: Mon Feb 9 14:30:50 2026 +0800 ASF: Update Apache compliance and branding migration This commit implements comprehensive changes to align the project with Apache Software Foundation incubation requirements and complete the migration from Greenplum to Apache Cloudberry branding. - Add DISCLAIMER file as required for Apache incubating projects - Update LICENSE file with comprehensive list of 245 files containing Apache License headers, organized by module (FDW, External Table, Server, Documentation, CI/Test templates) - Add Apache License headers to GitHub workflow files - Update CONTRIBUTING.md with Apache project contribution guidelines - Update README.md with Apache Cloudberry branding and simplified content - Update documentation templates in docs/content/ to use Cloudberry - Update automation and testing documentation - Migrate server scripts and Java components references - Update CI/CD workflows with proper Apache licensing - Clean up legacy CI documentation (remove ci/README.md) - Update build system references in Makefile - Enhance installation scripts to support both Cloudberry 2.0 and 2.1+ - Add transition guide for Cloudberry migration - Update all user-facing documentation with correct branding - Simplify README.md focusing on essential information - Update book configuration for documentation generation This change ensures full compliance with Apache incubation requirements while completing the transition to Apache Cloudberry ecosystem. --- .github/workflows/dependency-submission.yml | 21 ++ .github/workflows/pxf-ci.yml | 21 ++ CONTRIBUTING.md | 19 ++ DISCLAIMER | 9 + LICENSE | 78 +++++++ Makefile | 38 ++-- README.md | 244 +++------------------ automation/README.Docker.md | 19 ++ automation/README.Linux.md | 7 +- automation/README.md | 2 +- automation/pxf_regress/README.md | 18 +- .../components/common/ShellSystemObject.java | 2 +- .../pxf/automation/components/gpdb/Gpdb.java | 6 +- .../resources/sut/LocalToIPAMultiNodeHadoopHA.xml | 2 +- .../sut/MultiHadoopIPAMultiNodesCluster.xml | 2 +- .../resources/sut/MultiHadoopMultiNodesCluster.xml | 2 +- .../src/test/resources/sut/MultiNodesCluster.xml | 2 +- automation/src/test/resources/sut/default.xml | 2 +- .../resources/templates/gpdb/gpinitsystem_config | 8 +- ci/README.md | 143 ------------ .../ubuntu/script/entrypoint_kerberos.sh | 18 +- ci/docker/pxf-cbdb-dev/ubuntu/script/pxf-env.sh | 2 +- docs/book/config.yml | 18 +- docs/content/access_hdfs.html.md.erb | 2 +- docs/content/index.html.md.erb | 2 +- docs/content/instcfg_pxf.html.md.erb | 2 +- docs/content/intro_pxf.html.md.erb | 2 +- docs/content/overview_pxf.html.md.erb | 4 +- docs/content/ref/pxf-ref.html.md.erb | 2 +- docs/content/transition_to_cloudberry.html.md.erb | 19 ++ docs/content/using_pxf.html.md.erb | 2 +- external-table/Makefile | 7 +- regression/README.md | 5 +- server/pxf-service/src/scripts/pxf | 18 +- server/pxf-service/src/scripts/pxf-post-gpupgrade | 14 +- server/pxf-service/src/scripts/pxf-pre-gpupgrade | 16 +- 36 files changed, 317 insertions(+), 461 deletions(-) diff --git a/.github/workflows/dependency-submission.yml b/.github/workflows/dependency-submission.yml index 4f5b2b09..d0bce9dd 100644 --- a/.github/workflows/dependency-submission.yml +++ b/.github/workflows/dependency-submission.yml @@ -1,3 +1,24 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# Apache Cloudberry PXF Dependency Submission Workflow +# -------------------------------------------------------------------- name: Dependency Submission on: diff --git a/.github/workflows/pxf-ci.yml b/.github/workflows/pxf-ci.yml index 83188f35..3666ab8e 100644 --- a/.github/workflows/pxf-ci.yml +++ b/.github/workflows/pxf-ci.yml @@ -1,3 +1,24 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# Apache Cloudberry PXF CI Workflow +# -------------------------------------------------------------------- name: PXF CI Pipeline on: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6af259c5..02a2c06c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,3 +1,22 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + Apache Cloudberry community welcomes contributions from anyone, new and experienced! We appreciate your interest in contributing. This guide will help you get started with the contribution. diff --git a/DISCLAIMER b/DISCLAIMER new file mode 100644 index 00000000..14425376 --- /dev/null +++ b/DISCLAIMER @@ -0,0 +1,9 @@ +Apache Cloudberry is an effort undergoing incubation at The Apache +Software Foundation (ASF), sponsored by the Apache +Incubator. Incubation is required of all newly accepted projects until +a further review indicates that the infrastructure, communications, +and decision making process have stabilized in a manner consistent +with other successful ASF projects. While incubation status is not +necessarily a reflection of the completeness or stability of the code, +it does indicate that the project has yet to be fully endorsed by the +ASF. \ No newline at end of file diff --git a/LICENSE b/LICENSE index 4bd5496a..3c1f33f2 100644 --- a/LICENSE +++ b/LICENSE @@ -214,6 +214,84 @@ This product is derived from software originally developed by: notices and license terms. Your use of these subcomponents is subject to the terms and conditions of the subcomponent's license, as noted in the LICENSE file. + +The Greenplum Platform Extension Framework includes: + +---------------------------- + Apache License - Version 2.0 + +The following files are licensed under the Apache License, Version 2.0: + +FDW Module: + fdw/libchurl.c + fdw/libchurl.h + fdw/pxf_bridge.c + fdw/pxf_bridge.h + fdw/pxf_filter.c + fdw/pxf_filter.h + fdw/pxf_header.c + fdw/pxf_header.h + +External Table Module: + external-table/src/gpdbwritableformatter.c + external-table/src/libchurl.c + external-table/src/libchurl.h + external-table/src/pxfbridge.c + external-table/src/pxfbridge.h + external-table/src/pxffilters.c + external-table/src/pxffilters.h + external-table/src/pxfheaders.c + external-table/src/pxfheaders.h + external-table/src/pxfprotocol.c + external-table/src/pxfuriparser.c + external-table/src/pxfuriparser.h + external-table/test/pxffilters_test.c + external-table/test/pxfheaders_test.c + external-table/test/pxfprotocol_test.c + external-table/test/pxfuriparser_test.c + +Server Module (Java Sources): + server/build.gradle + server/gradle.properties + server/settings.gradle + server/pxf-api/src/main/java/org/apache/cloudberry/pxf/api/*.java + server/pxf-api/src/test/java/org/apache/cloudberry/pxf/api/**/*.java + server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/**/*.java + server/pxf-hbase/src/test/java/org/apache/cloudberry/pxf/plugins/hbase/**/*.java + server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/**/*.java + server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/**/*.java + server/pxf-hive/src/main/java/org/apache/cloudberry/pxf/plugins/hive/**/*.java + server/pxf-hive/src/test/java/org/apache/cloudberry/pxf/plugins/hive/**/*.java + server/pxf-jdbc/src/main/java/org/apache/cloudberry/pxf/plugins/jdbc/**/*.java + server/pxf-jdbc/src/test/java/org/apache/cloudberry/pxf/plugins/jdbc/**/*.java + server/pxf-json/src/main/java/org/apache/cloudberry/pxf/plugins/json/**/*.java + server/pxf-json/src/test/java/org/apache/cloudberry/pxf/plugins/json/**/*.java + server/pxf-service/src/main/java/org/apache/cloudberry/pxf/service/**/*.java + server/pxf-service/src/test/java/org/apache/cloudberry/pxf/service/**/*.java + +Documentation Templates: + docs/content/*.html.md.erb + +Configuration Files: + server/pxf-api/src/test/resources/pxf-profiles-default.xml + server/pxf-hive/src/test/resources/pxf-profiles-default.xml + server/pxf-jdbc/src/test/resources/log4j.properties + server/pxf-json/src/test/resources/log4j.properties + server/pxf-service/src/main/resources/pxf-profiles-default.xml + server/pxf-service/src/templates/conf/pxf-profiles.xml + server/pxf-service/src/test/resources/pxf-profiles-default.xml + +CI/Test Templates: + automation/src/test/resources/templates/zk/zoo.cfg + ci/singlecluster/templates/hadoop/etc/hadoop/core-site.xml + ci/singlecluster/templates/hadoop/etc/hadoop/hdfs-site.xml + ci/singlecluster/templates/hadoop/etc/hadoop/yarn-env.sh + ci/singlecluster/templates/hbase/conf/hbase-env.sh + ci/singlecluster/templates/hbase/conf/hbase-site.xml + ci/singlecluster/templates/ranger/install.properties + ci/singlecluster/templates/tez/conf/tez-site.xml + ci/singlecluster/templates/usersync/install.properties + ======================================================================= This product bundles Gradle Wrapper, which is licensed under diff --git a/Makefile b/Makefile index caaac20e..56c651e8 100644 --- a/Makefile +++ b/Makefile @@ -84,6 +84,9 @@ install-server: stage: rm -rf build/stage + make -C $(SOURCE_EXTENSION_DIR) stage + make -C cli stage + make -C server stage ifneq ($(SKIP_EXTERNAL_TABLE_PACKAGE_REASON),) @echo "Skipping staging FDW extension because $(SKIP_EXTERNAL_TABLE_PACKAGE_REASON)" $(eval PXF_MODULES := $(filter-out external-table,$(PXF_MODULES))) @@ -100,7 +103,7 @@ endif cp -a $(SOURCE_EXTENSION_DIR)/build/stage/* build/stage/$${PXF_PACKAGE_NAME} ;\ cp -a cli/build/stage/* build/stage/$${PXF_PACKAGE_NAME} ;\ cp -a server/build/stage/* build/stage/$${PXF_PACKAGE_NAME} ;\ - echo $$(git rev-parse --verify HEAD) > build/stage/$${PXF_PACKAGE_NAME}/pxf/commit.sha ;\ + echo $$(git rev-parse --verify HEAD) > build/stage/$${PXF_PACKAGE_NAME}/commit.sha ;\ cp package/install_binary build/stage/$${PXF_PACKAGE_NAME}/install_component ;\ echo "===> PXF staging is complete <===" @@ -116,15 +119,14 @@ gppkg-rpm: rpm GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) cat package/gppkg_spec.yml.in | sed "s,#arch,`arch`," | sed "s,#os,$(TEST_OS)," | sed "s,#gppkgver,1.0," | sed "s,#gpver,1," > gppkg/gppkg_spec.yml find build/rpmbuild/RPMS -name pxf-cbdb$(GP_MAJOR_VERSION)-*.rpm -exec cp {} gppkg/ \; - source $(GPHOME)/greenplum_path.sh && gppkg --build gppkg + source $(GPHOME)/greenplum_path.sh || source $(GPHOME)/cloudberry-env.sh && gppkg --build gppkg -rpm: - make -C $(SOURCE_EXTENSION_DIR) stage - make -C cli stage - make -C server stage +rpm: stage set -e ;\ GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) ;\ - PXF_FULL_VERSION=$${PXF_VERSION} ;\ + GP_BUILD_ARCH=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/build_arch) ;\ + PXF_PACKAGE_NAME=pxf-cbdb$${GP_MAJOR_VERSION}-${PXF_VERSION}-$${GP_BUILD_ARCH} ;\ + PXF_FULL_VERSION=${PXF_VERSION} ;\ PXF_MAIN_VERSION=$$(echo $${PXF_FULL_VERSION} | sed -E 's/(-SNAPSHOT|-rc[0-9]+)$$//') ;\ if [[ $${PXF_FULL_VERSION} == *"-SNAPSHOT" ]]; then \ PXF_RELEASE=SNAPSHOT; \ @@ -135,7 +137,7 @@ rpm: fi ;\ rm -rf build/rpmbuild ;\ mkdir -p build/rpmbuild/{BUILD,RPMS,SOURCES,SPECS} ;\ - cp -a build/stage/$${PXF_PACKAGE_NAME}/pxf/* build/rpmbuild/SOURCES ;\ + cp -a build/stage/$${PXF_PACKAGE_NAME}/* build/rpmbuild/SOURCES ;\ cp package/*.spec build/rpmbuild/SPECS/ ;\ rpmbuild \ --define "_topdir $${PWD}/build/rpmbuild" \ @@ -150,7 +152,7 @@ rpm-tar: rpm mkdir -p build/{stagerpm,distrpm} set -e ;\ GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) ;\ - PXF_RPM_FILE=$$(find build/rpmbuild/RPMS -name pxf-cbdb$${GP_MAJOR_VERSION}-*.rpm) ;\ + PXF_RPM_FILE=$$(find build/rpmbuild/RPMS -name cloudberry-pxf-*.rpm) ;\ PXF_RPM_BASE_NAME=$$(basename $${PXF_RPM_FILE%*.rpm}) ;\ PXF_PACKAGE_NAME=$${PXF_RPM_BASE_NAME%.*} ;\ mkdir -p build/stagerpm/$${PXF_PACKAGE_NAME} ;\ @@ -165,24 +167,24 @@ deb: stage PXF_MAIN_VERSION=$${PXF_VERSION//-SNAPSHOT/} ;\ if [[ $${PXF_VERSION} == *"-SNAPSHOT" ]]; then PXF_RELEASE=SNAPSHOT; else PXF_RELEASE=1; fi ;\ rm -rf build/debbuild ;\ - mkdir -p build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION}/$(TARGET_EXTENSION_DIR) ;\ - cp -a $(SOURCE_EXTENSION_DIR)/build/stage/* build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION}/$(TARGET_EXTENSION_DIR) ;\ - cp -a cli/build/stage/pxf/* build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION} ;\ - cp -a server/build/stage/pxf/* build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION} ;\ - echo $$(git rev-parse --verify HEAD) > build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION}/commit.sha ;\ + mkdir -p build/debbuild/usr/local/cloudberry-pxf/$(TARGET_EXTENSION_DIR) ;\ + cp -a $(SOURCE_EXTENSION_DIR)/build/stage/* build/debbuild/usr/local/cloudberry-pxf/ ;\ + cp -a cli/build/stage/* build/debbuild/usr/local/cloudberry-pxf ;\ + cp -a server/build/stage/* build/debbuild/usr/local/cloudberry-pxf ;\ + echo $$(git rev-parse --verify HEAD) > build/debbuild/usr/local/cloudberry-pxf/commit.sha ;\ mkdir build/debbuild/DEBIAN ;\ cp -a package/DEBIAN/* build/debbuild/DEBIAN/ ;\ - sed -i -e "s/%VERSION%/$${PXF_MAIN_VERSION}-$${PXF_RELEASE}/" -e "s/%MAINTAINER%/${VENDOR}/" build/debbuild/DEBIAN/control ;\ + sed -i -e "s/%VERSION%/$${PXF_MAIN_VERSION}-$${PXF_RELEASE}/" -e "s/%MAINTAINER%/${VENDOR}/" -e "s/%ARCH%/$$(dpkg --print-architecture)/" build/debbuild/DEBIAN/control ;\ dpkg-deb --build build/debbuild ;\ - mv build/debbuild.deb build/pxf-cbdb$${GP_MAJOR_VERSION}-$${PXF_MAIN_VERSION}-$${PXF_RELEASE}-ubuntu18.04-amd64.deb + mv build/debbuild.deb build/cloudberry-pxf-$${PXF_MAIN_VERSION}-$${PXF_RELEASE}-$$(lsb_release -si | tr '[:upper:]' '[:lower:]')$$(lsb_release -sr)-$$(dpkg --print-architecture).deb deb-tar: deb rm -rf build/{stagedeb,distdeb} mkdir -p build/{stagedeb,distdeb} set -e ;\ GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) ;\ - PXF_DEB_FILE=$$(find build/ -name pxf-cbdb$${GP_MAJOR_VERSION}*.deb) ;\ - PXF_PACKAGE_NAME=$$(dpkg-deb --field $${PXF_DEB_FILE} Package)-$$(dpkg-deb --field $${PXF_DEB_FILE} Version)-ubuntu18.04 ;\ + PXF_DEB_FILE=$$(find build/ -name cloudberry-pxf*.deb) ;\ + PXF_PACKAGE_NAME=$$(dpkg-deb --field $${PXF_DEB_FILE} Package)-$$(dpkg-deb --field $${PXF_DEB_FILE} Version)-$$(lsb_release -si | tr '[:upper:]' '[:lower:]')$$(lsb_release -rs) ;\ mkdir -p build/stagedeb/$${PXF_PACKAGE_NAME} ;\ cp $${PXF_DEB_FILE} build/stagedeb/$${PXF_PACKAGE_NAME} ;\ cp package/install_deb build/stagedeb/$${PXF_PACKAGE_NAME}/install_component ;\ diff --git a/README.md b/README.md index 26bbc1c4..e580cd5a 100755 --- a/README.md +++ b/README.md @@ -1,8 +1,12 @@ # Platform Extension Framework (PXF) for Apache Cloudberry (Incubating) -[](https://communityinviter.com/apps/cloudberrydb/welcome) -[](https://twitter.com/cloudberrydb) -[](https://cloudberry.apache.org) +[](https://cloudberry.apache.org) +[](https://cloudberry.apache.org/docs) +[](https://inviter.co/apache-cloudberry) +[](https://twitter.com/ASFCloudberry) +[](https://cloudberry.apache.org/community/wechat) +[](https://youtube.com/@ApacheCloudberry) +[](https://github.com/apache/cloudberry/discussions) --- @@ -12,7 +16,7 @@ PXF is an extensible framework that allows a distributed database like Greenplum PXF includes built-in connectors for accessing data that exists inside HDFS files, Hive tables, HBase tables, JDBC-accessible databases and more. Users can also create their own connectors to other data storage or processing engines. -This project is forked from [greenplum/pxf](https://github.com/greenplum-db/pxf-archive) and customized for Apache Cloudberry. +This project is derived from [greenplum/pxf](https://github.com/greenplum-db/pxf-archive) and customized for Apache Cloudberry. ## Repository Contents @@ -23,20 +27,12 @@ This project is forked from [greenplum/pxf](https://github.com/greenplum-db/pxf- * `automation/` : Contains the automation and integration tests for PXF against the various datasources * `ci/` : Contains CI/CD environment and scripts (including singlecluster Hadoop environment) * `regression/` : Contains the end-to-end (integration) tests for PXF against the various datasources, utilizing the PostgreSQL testing framework `pg_regress` -* `downloads/` : An empty directory that serves as a staging location for Cloudberry RPMs for the development Docker image ## PXF Development Below are the steps to build and install PXF along with its dependencies including Cloudberry and Hadoop. -> [!Note] -> To start, ensure you have a `~/workspace` directory and have cloned the `pxf` and its prerequisites (shown below) under it. -(The name `workspace` is not strictly required but will be used throughout this guide.) - ```bash -mkdir -p ~/workspace -cd ~/workspace - git clone https://github.com/apache/cloudberry-pxf.git ``` @@ -49,22 +45,22 @@ To build PXF, you must have: Either download and install Cloudberry RPM or build Cloudberry from the source by following instructions in the [Cloudberry](https://github.com/apache/cloudberry). - Assuming you have installed Cloudberry into `/usr/local/cloudberrydb` directory, run its environment script: + Assuming you have installed Cloudberry into `/usr/local/cloudberry-db` directory, run its environment script: ``` - source /usr/local/cloudberrydb/greenplum_path.sh # For Cloudberry 2.0 - source /usr/local/cloudberrydb/cloudberry-env.sh # For Cloudberry 2.1+ + source /usr/local/cloudberry-db/greenplum_path.sh # For Cloudberry 2.0 + source /usr/local/cloudberry-db/cloudberry-env.sh # For Cloudberry 2.1+ ``` 3. JDK 1.8 or JDK 11 to compile/run Export your `JAVA_HOME`: ``` - export JAVA_HOME=<PATH_TO_YOUR_JAVA_HOME> + export JAVA_HOME=/usr/lib/jvm/java-11-openjdk ``` 4. Go (1.9 or later) - To install Go on CentOS, `sudo yum install go`. For other platforms, see the [Go downloads page](https://golang.org/dl/). + You can download and install Go via [Go downloads page](https://golang.org/dl/). Make sure to export your `GOPATH` and add go to your `PATH`. For example: ```shell @@ -78,46 +74,37 @@ To build PXF, you must have: go install github.com/onsi/ginkgo/ginkgo@latest ``` -5. cURL (7.29 or later): - - To install cURL devel package on CentOS 7, `sudo yum install libcurl-devel`. - - Note that CentOS 6 provides an older, unsupported version of cURL (7.19). You should install a newer version from source if you are on CentOS 6. - -### How to Build PXF +### Build PXF PXF uses Makefiles to build its components. PXF server component uses Gradle that is wrapped into the Makefile for convenience. ```bash -cd ~/workspace/pxf +cd cloudberry-pxf/ -# Compile & Test PXF +# Compile PXF make - -# Only run unit tests -make test ``` -### How to Install PXF +### Install PXF -To install PXF, first make sure that the user has sufficient permissions in the `$GPHOME` and `$PXF_HOME` directories to perform the installation. It's recommended to change ownership to match the installing user. For example, when installing PXF as user `gpadmin` under `/usr/local/cloudberrydb`: +To install PXF, first make sure that the user has sufficient permissions in the `$GPHOME` and `$PXF_HOME` directories to perform the installation. It's recommended to change ownership to match the installing user. For example, when installing PXF as user `gpadmin` under `/usr/local/cloudberry-db`: ```bash -export GPHOME=/usr/local/cloudberrydb -export PXF_HOME=/usr/local/pxf +mkdir -p /usr/local/cloudberry-pxf +export PXF_HOME=/usr/local/cloudberry-pxf export PXF_BASE=${HOME}/pxf-base -chown -R gpadmin:gpadmin "${GPHOME}" "${PXF_HOME}" -make -C ~/workspace/pxf install +chown -R gpadmin:gpadmin "${PXF_HOME}" +make install ``` NOTE: if `PXF_BASE` is not set, it will default to `PXF_HOME`, and server configurations, libraries or other configurations, might get deleted after a PXF re-install. -### How to Run PXF +### Run PXF -Ensure that PXF is in your path. This command can be added to your .bashrc +Ensure that PXF is in your path. This command can be added to your `.bashrc`: ```bash -export PATH=/usr/local/pxf/bin:$PATH +export PATH=/usr/local/cloudberry-pxf/bin:$PATH ``` Then you can prepare and start up PXF by doing the following. @@ -143,151 +130,13 @@ After PXF has been re-installed, you can restart the PXF instance using: pxf restart ``` -### How to demonstrate Hadoop Integration -In order to demonstrate end to end functionality you will need Hadoop installed. We have all the related hadoop components (hdfs, hive, hbase, zookeeper, etc) mapped into simple artifact named singlecluster. -You can [download from here](https://storage.googleapis.com/pxf-public/singlecluster-HDP.tar.gz) and untar the `singlecluster-HDP.tar.gz` file, which contains everything needed to run Hadoop. - -```bash -mv singlecluster-HDP.tar.gz ~/workspace/ -cd ~/workspace -tar xzf singlecluster-HDP.tar.gz -``` - -Create a symlink using `ln -s ~/workspace/singlecluster-HDP ~/workspace/singlecluster` and then follow the steps in [Setup Hadoop](####Setup-Hadoop). - -While PXF can run on either Java 8 or Java 11, please ensure that you are running Java 8 for hdfs, hadoop, etc. Please set your java version by seting your `JAVA_HOME` to the appropriate location. - -On a Mac, you can set your java version using `JAVA_HOME` like so: -``` -export JAVA_HOME=`/usr/libexec/java_home -v 1.8` -```` - -Initialize the default server configurations: -``` -cp ${PXF_HOME}/templates/*-site.xml ${PXF_BASE}/servers/default -``` - -### Development With Docker +## Development With Docker > [!Note] > Since the docker container will house all Single cluster Hadoop, Cloudberry > and PXF, we recommend that you have at least 4 cpus and 6GB memory allocated > to Docker. These settings are available under docker preferences. We provide a Docker-based development environment that includes Cloudberry, Hadoop, and PXF. See [automation/README.Docker.md](automation/README.Docker.md) for detailed instructions. -**Quick Start:** - -```bash -# Build and start the development container -docker compose -f ci/docker/pxf-cbdb-dev/ubuntu/docker-compose.yml build -docker compose -f ci/docker/pxf-cbdb-dev/ubuntu/docker-compose.yml up -d - -# Enter the container and run setup -docker exec -it pxf-cbdb-dev bash -c \ - "cd /home/gpadmin/workspace/cloudberry-pxf/ci/docker/pxf-cbdb-dev/ubuntu && ./script/entrypoint.sh" - -# Run tests -docker exec -it pxf-cbdb-dev bash -c \ - "cd /home/gpadmin/workspace/cloudberry-pxf/ci/docker/pxf-cbdb-dev/ubuntu && ./script/run_tests.sh" - -# Stop and clean up -docker compose -f ci/docker/pxf-cbdb-dev/ubuntu/docker-compose.yml down -v -``` - -#### Setup Hadoop -Hdfs will be needed to demonstrate functionality. You can choose to start additional hadoop components (hive/hbase) if you need them. - -Setup [User Impersonation](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Superusers.html) prior to starting the hadoop components (this allows the `gpadmin` user to access hadoop data). - -The Docker development environment automatically configures Hadoop. For manual setup, see [automation/README.Docker.md](automation/README.Docker.md). - -Setup and start HDFS -```bash -pushd ~/workspace/singlecluster/bin -echo y | ./init-gphd.sh -./start-hdfs.sh -popd -``` - -Start other optional components based on your need - -```bash -pushd ~/workspace/singlecluster/bin -# Start Hive -./start-yarn.sh -./start-hive.sh - -# Start HBase -./start-zookeeper.sh -./start-hbase.sh -popd -``` - -#### Setup Minio (optional) -Minio is an S3-API compatible local storage solution. The development docker image comes with Minio software pre-installed. MinIO is automatically started by the Docker development environment. - -After the server starts, you can access Minio UI at `http://localhost:9000` from the host OS. Use `admin` for the access key and `password` for the secret key when connecting to your local Minio instance. - -To run S3 automation tests, set `PROTOCOL=minio`. If later you would like to run Hadoop HDFS tests, unset this variable with `unset PROTOCOL` command. - -#### Setup PXF - -Install PXF Server -```bash -# Install PXF -make -C ~/workspace/pxf install - -# Start PXF -export PXF_JVM_OPTS="-Xmx512m -Xms256m" -$PXF_HOME/bin/pxf start -``` - -Install PXF client (ignore if this is already done) -```bash -psql -d template1 -c "create extension pxf" -``` - -#### Run PXF Tests -All tests use a database named `pxfautomation`. -```bash -pushd ~/workspace/pxf/automation - -# Initialize default server configs using template -cp ${PXF_HOME}/templates/{hdfs,mapred,yarn,core,hbase,hive}-site.xml ${PXF_BASE}/servers/default - -# Run specific tests. Example: Hdfs Smoke Test -make TEST=HdfsSmokeTest - -# Run all tests. This will be very time consuming. -make GROUP=gpdb - -# If you wish to run test(s) against a different storage protocol set the following variable (for eg: s3) -export PROTOCOL=s3 -popd -``` - -If you see any HBase failures, try copying `pxf-hbase-*.jar` to the HBase classpath, and restart HBase: - -``` -cp ${PXF_HOME}/lib/pxf-hbase-*.jar ~/workspace/singlecluster/hbase/lib/pxf-hbase.jar -~/workspace/singlecluster/bin/stop-hbase.sh -~/workspace/singlecluster/bin/start-hbase.sh -``` - -#### Make Changes to PXF - -To deploy your changes to PXF in the development environment. - -```bash -# $PXF_HOME folder is replaced each time you make install. -# So, if you have any config changes, you may want to back those up. -$PXF_HOME/bin/pxf stop -make -C ~/workspace/pxf install -# Make any config changes you had backed up previously -rm -rf $PXF_HOME/pxf-service -yes | $PXF_HOME/bin/pxf init -$PXF_HOME/bin/pxf start -``` - ## IDE Setup (IntelliJ) - Start IntelliJ. Click "Open" and select the directory to which you cloned the `pxf` repo. @@ -311,47 +160,6 @@ no JDK set for Gradle. Just cancel and retry. It goes away the second time. - Debug the new configuration in IntelliJ - Run a query in CloudberryDB that uses PXF to debug with IntelliJ -## To run a Kerberized Hadoop Cluster - -### Requirements - -- Download bin_gpdb (from any of the pipelines) -- Download pxf_tarball (from any of the pipelines) - -These instructions allow you to run a Kerberized cluster. See [automation/README.Docker.md](automation/README.Docker.md) for detailed Kerberos setup instructions. - -```bash -docker run --rm -it \ - --privileged \ - --hostname c6401.ambari.apache.org \ - -p 5432:5432 \ - -p 5888:5888 \ - -p 8000:8000 \ - -p 8080:8080 \ - -p 8020:8020 \ - -p 9000:9000 \ - -p 9090:9090 \ - -p 50070:50070 \ - -w /home/gpadmin/workspace \ - -v ~/workspace/cbdb:/home/gpadmin/workspace/gpdb_src \ - -v ~/workspace/pxf:/home/gpadmin/workspace/pxf_src \ - -v ~/workspace/singlecluster-HDP:/home/gpadmin/workspace/singlecluster \ - -v ~/Downloads/bin_cbdb:/home/gpadmin/workspace/bin_cbdb \ - -v ~/Downloads/pxf_tarball:/home/gpadmin/workspace/pxf_tarball \ - -e CLUSTER_NAME=hdp \ - -e NODE=c6401.ambari.apache.org \ - -e REALM=AMBARI.APACHE.ORG \ - gcr.io/$PROJECT_ID/gpdb-pxf-dev/gpdb6-centos7-test-pxf-hdp2 /bin/bash - -# Inside the container, you can use the scripts in ci/docker/pxf-cbdb-dev/ubuntu/script to set up and run tests. - -echo "+----------------------------------------------+" -echo "| Kerberos admin principal: admin/admin@$REALM |" -echo "| Kerberos admin password : admin |" -echo "+----------------------------------------------+" - -su - gpadmin -``` ## Contribute diff --git a/automation/README.Docker.md b/automation/README.Docker.md index db0c3bc9..83fba964 100644 --- a/automation/README.Docker.md +++ b/automation/README.Docker.md @@ -1,3 +1,22 @@ +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + # Running Automation in Docker ## Prerequisites diff --git a/automation/README.Linux.md b/automation/README.Linux.md index 2d7e557b..f4031c82 100644 --- a/automation/README.Linux.md +++ b/automation/README.Linux.md @@ -5,17 +5,18 @@ They are intended to be used in tandem with the information in the main README f ## Locale Setup -Automation creates a GPDB database using the `ru_RU.CP1251` locale. You can generate the required locale files with +Automation creates a Cloudberry database using the `ru_RU.CP1251` locale. You can generate the required locale files with ```sh sudo sed -i.bak -e 's/# ru_RU.CP1251.*/ru_RU.CP1251 CP1251/' /etc/locale.gen sudo locale-gen ``` -After generating the locale, restart your GPDB cluster +After generating the locale, restart your Cloudberry cluster ```sh -source $GPHOME/greenplum_path.sh +source $GPHOME/greenplum_path.sh # For Cloudberry 2.0 +source $GPHOME/cloudberry-env.sh # For Cloudberry 2.1+ gpstop -a gpstart -a ``` diff --git a/automation/README.md b/automation/README.md index c7236d02..8300d079 100755 --- a/automation/README.md +++ b/automation/README.md @@ -130,7 +130,7 @@ Note: If you get an error saying that the jar does not exist, ensure that you ha - `src/main/java` - contains related classes and utilities for the test - `src/test/java` - contains the TestNG cases. - `sqlrepo` - contains SQL test cases. -- `src/main/java/org/greenplum/pxf/automation/components` - contains all the supported services/components with simple API abstractions. +- `src/main/java/org/apache/cloudberry/pxf/automation/components` - contains all the supported services/components with simple API abstractions. ### General Automation Architecture diff --git a/automation/pxf_regress/README.md b/automation/pxf_regress/README.md index 645cf7ea..07820806 100644 --- a/automation/pxf_regress/README.md +++ b/automation/pxf_regress/README.md @@ -4,7 +4,7 @@ `pxf_regress` is a PSQL test runner written in Go that is heavily inspired by `pg_regress`. PXF's automation test framework sets up data in external data -storage (e.g., Hadoop, Amazon S3, etc), creates Greenplum external tables to +storage (e.g., Hadoop, Amazon S3, etc), creates Cloudberry external tables to work with these data sets, and then invokes `pxf_regress` to run SQL test cases via `psql` and compare the results with expected output. Instead of matching the features of `pg_regress` exactly, this utility currently implements the @@ -36,16 +36,16 @@ small_data └── query02.sql ``` -There are no command line flags; the GPDB cluster that `pxf_regress` connects +There are no command line flags; the Cloudberry cluster that `pxf_regress` connects to can be customized with standard [Postgres environment variables][1]. ### Why not use `pg_regress`? -Ideally, PXF would re-use `pg_regress` which is included with upstream GPDB; -however, PXF supports multiple GPDB versions (currently 5, 6, & 7) with a -single code base. Differences between the GPDB major versions and the included +Ideally, PXF would re-use `pg_regress` which is included with upstream Cloudberry; +however, PXF supports multiple Cloudberry versions with a +single code base. Differences between the Cloudberry major versions and the included `pg_regress` results in non-semantically meaningful (for PXF) differences. -GPDB's version of `pg_regress` uses a utility called `gpdiff.pl` to compare +Cloudberry's version of `pg_regress` uses a utility called `gpdiff.pl` to compare actual test output with expected test output. From the description of [`gpdiff.pl`][2]: @@ -56,9 +56,9 @@ actual test output with expected test output. From the description of > single PostgreSQL instance. When `pg_regress` runs `gpdiff.pl`, it runs the version of `gpdiff.pl` that is -included with GPDB (`$($GPHOME/bin/pg_config +included with Cloudberry (`$($GPHOME/bin/pg_config --libdir)/postgresql/pgxs/src/test/regress/gpdiff.pl`) with hard-coded options -that cannot be customized. Not only is `gpdiff.pl` different across GPDB major +that cannot be customized. Not only is `gpdiff.pl` different across Cloudberry major versions, the set of options that `pg_regress` runs it with will be different across major versions. @@ -119,4 +119,4 @@ $ tree smoke/small_data ``` [1]: https://www.postgresql.org/docs/12/libpq-envars.html -[2]: https://github.com/greenplum-db/gpdb/blob/main/src/test/regress/gpdiff.pl +[2]: https://github.com/apache/cloudberry/blob/main/src/test/regress/gpdiff.pl diff --git a/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/common/ShellSystemObject.java b/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/common/ShellSystemObject.java index 3cdb1f15..e8dad0d4 100755 --- a/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/common/ShellSystemObject.java +++ b/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/common/ShellSystemObject.java @@ -57,7 +57,7 @@ public class ShellSystemObject extends BaseSystemObject { "GPHOME", "GPHD_ROOT", "GPDATA", - "MASTER_DATA_DIRECTORY", + "COORDINATOR_DATA_DIRECTORY", "PGPORT", "PGHOST", "PGDATABASE" diff --git a/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/gpdb/Gpdb.java b/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/gpdb/Gpdb.java index 201881f6..ad04a97f 100755 --- a/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/gpdb/Gpdb.java +++ b/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/gpdb/Gpdb.java @@ -22,7 +22,7 @@ import java.util.List; public class Gpdb extends DbSystemObject { private static final String DEFAULT_PORT = "5432"; - private static final String GREENPLUM_DATABASE_PREFIX = "Greenplum Database "; + private static final String APACHE_CLOUDBERRY_PREFIX = "Apache Cloudberry "; private static final String IF_NOT_EXISTS_OPTION = "IF NOT EXISTS"; private String sshUserName; @@ -580,8 +580,8 @@ public class Gpdb extends DbSystemObject { res.next(); String fullVersion = res.getString(1); ReportUtils.report(report, getClass(), "Retrieved from Greenplum: [" + fullVersion + "]"); - int gpIndex = fullVersion.indexOf(GREENPLUM_DATABASE_PREFIX); // where the version prefix starts - String prefix = GREENPLUM_DATABASE_PREFIX; + int gpIndex = fullVersion.indexOf(APACHE_CLOUDBERRY_PREFIX); // where the version prefix starts + String prefix = APACHE_CLOUDBERRY_PREFIX; // Cloudberry forks print strings like: // "PostgreSQL 14.4 (Apache Cloudberry 3.0.0-devel build dev) ..." // fall back to the Cloudberry prefix if the Greenplum one is missing diff --git a/automation/src/test/resources/sut/LocalToIPAMultiNodeHadoopHA.xml b/automation/src/test/resources/sut/LocalToIPAMultiNodeHadoopHA.xml index 437ac5a3..284baf49 100644 --- a/automation/src/test/resources/sut/LocalToIPAMultiNodeHadoopHA.xml +++ b/automation/src/test/resources/sut/LocalToIPAMultiNodeHadoopHA.xml @@ -88,7 +88,7 @@ make TEST=HdfsHAFailoverTest <GPHOME></GPHOME> <GPHD_ROOT></GPHD_ROOT> <GPDATA></GPDATA> - <MASTER_DATA_DIRECTORY></MASTER_DATA_DIRECTORY> + <COORDINATOR_DATA_DIRECTORY></COORDINATOR_DATA_DIRECTORY> <PGPORT></PGPORT> <PGHOST></PGHOST> <PGDATABASE></PGDATABASE> diff --git a/automation/src/test/resources/sut/MultiHadoopIPAMultiNodesCluster.xml b/automation/src/test/resources/sut/MultiHadoopIPAMultiNodesCluster.xml index dc9ced06..6b18b05c 100644 --- a/automation/src/test/resources/sut/MultiHadoopIPAMultiNodesCluster.xml +++ b/automation/src/test/resources/sut/MultiHadoopIPAMultiNodesCluster.xml @@ -128,7 +128,7 @@ <GPHOME></GPHOME> <GPHD_ROOT></GPHD_ROOT> <GPDATA></GPDATA> - <MASTER_DATA_DIRECTORY></MASTER_DATA_DIRECTORY> + <COORDINATOR_DATA_DIRECTORY></COORDINATOR_DATA_DIRECTORY> <PGPORT></PGPORT> <PGHOST></PGHOST> <PGDATABASE></PGDATABASE> diff --git a/automation/src/test/resources/sut/MultiHadoopMultiNodesCluster.xml b/automation/src/test/resources/sut/MultiHadoopMultiNodesCluster.xml index 167d1507..a6195c61 100644 --- a/automation/src/test/resources/sut/MultiHadoopMultiNodesCluster.xml +++ b/automation/src/test/resources/sut/MultiHadoopMultiNodesCluster.xml @@ -113,7 +113,7 @@ <GPHOME></GPHOME> <GPHD_ROOT></GPHD_ROOT> <GPDATA></GPDATA> - <MASTER_DATA_DIRECTORY></MASTER_DATA_DIRECTORY> + <COORDINATOR_DATA_DIRECTORY></COORDINATOR_DATA_DIRECTORY> <PGPORT></PGPORT> <PGHOST></PGHOST> <PGDATABASE></PGDATABASE> diff --git a/automation/src/test/resources/sut/MultiNodesCluster.xml b/automation/src/test/resources/sut/MultiNodesCluster.xml index a0f01e56..5d3e0ff0 100644 --- a/automation/src/test/resources/sut/MultiNodesCluster.xml +++ b/automation/src/test/resources/sut/MultiNodesCluster.xml @@ -87,7 +87,7 @@ <GPHOME></GPHOME> <GPHD_ROOT></GPHD_ROOT> <GPDATA></GPDATA> - <MASTER_DATA_DIRECTORY></MASTER_DATA_DIRECTORY> + <COORDINATOR_DATA_DIRECTORY></COORDINATOR_DATA_DIRECTORY> <PGPORT></PGPORT> <PGHOST></PGHOST> <PGDATABASE></PGDATABASE> diff --git a/automation/src/test/resources/sut/default.xml b/automation/src/test/resources/sut/default.xml index ed24017a..7c9c4b68 100644 --- a/automation/src/test/resources/sut/default.xml +++ b/automation/src/test/resources/sut/default.xml @@ -97,7 +97,7 @@ <GPHOME></GPHOME> <GPHD_ROOT></GPHD_ROOT> <GPDATA></GPDATA> - <MASTER_DATA_DIRECTORY></MASTER_DATA_DIRECTORY> + <COORDINATOR_DATA_DIRECTORY></COORDINATOR_DATA_DIRECTORY> <PGPORT></PGPORT> <PGDATABASE></PGDATABASE> </shellsystemobject> diff --git a/automation/src/test/resources/templates/gpdb/gpinitsystem_config b/automation/src/test/resources/templates/gpdb/gpinitsystem_config index bbd1b3a7..9940ce05 100755 --- a/automation/src/test/resources/templates/gpdb/gpinitsystem_config +++ b/automation/src/test/resources/templates/gpdb/gpinitsystem_config @@ -25,14 +25,14 @@ PORT_BASE=40000 DATA_DIRECTORY=(/data/gpdb/p1 /data/gpdb/p2) #### OS-configured hostname or IP address of the master host. -MASTER_HOSTNAME=centos64-1 +COORDINATOR_HOSTNAME=centos64-1 -#### File system location where the master data directory +#### File system location where the coordinator data directory #### will be created. -MASTER_DIRECTORY=/data/gpdb/master +COORDINATOR_DIRECTORY=/data/gpdb/coordinator #### Port number for the master instance. -MASTER_PORT=5432 +COORDINATOR_PORT=5432 #### Shell utility used to connect to remote hosts. TRUSTED_SHELL=ssh diff --git a/ci/README.md b/ci/README.md deleted file mode 100644 index 2af1578a..00000000 --- a/ci/README.md +++ /dev/null @@ -1,143 +0,0 @@ -# Concourse pipeline deployment -To facilitate pipeline maintenance, a Python utility 'deploy` -is used to generate the different pipelines for PXF main, -PXF 5x and release pipelines. It also allows the generation -of acceptance and custom pipelines for developers to use. - -The utility uses the [Jinja2](http://jinja.pocoo.org/) template -engine for Python. This allows the generation of portions of the -pipeline from common blocks of pipeline code. Logic (Python code) can -be embedded to further manipulate the generated pipeline. - -# Deploy the `pxf-build` (release) pipeline - -To deploy the build pipeline for PXF, make sure PXF main branch is currently checked-out and run this command: - -```shell script -make -C "${HOME}/workspace/pxf/concourse" build -``` - -# Deploy the `pxf-certification` (release) pipeline - -To deploy the certifcation pipeline (forward compatibility) for PXF, make sure PXF main branch is currently checked-out and run this command: - -```shell script -make -C "${HOME}/workspace/pxf/concourse" certification -``` - -# Deploy the singlecluster pipeline - -The singlecluster pipeline generates the singlecluster tarball for CDH, HDP2, -and HDP3. The generated tarballs are then published to an S3 and GCS bucket. -The produced tarballs can then be consumed in the pxf-build pipelines. - -```shell script -make -C "${HOME}/workspace/pxf/concourse" singlecluster -``` - -# Deploy the cloudbuild pipeline - -```shell script -make -C "${HOME}/workspace/pxf/concourse" cloudbuild -``` - -# Deploy the pull-request pipeline - -```shell script -make -C "${HOME}/workspace/pxf/concourse" pr -``` - -# Deploy the performance pipelines - -10G Performance pipeline: - -```shell script -make SCALE=10 -C "${HOME}/workspace/pxf/concourse" perf -``` - -You can deploy a development version of the perf pipeline by substituting the name -of your development branch into `pxf-git-branch=main`. Also, make sure to change -the name of your development pipeline (i.e. `-p dev:<YOUR-PIPELINE>`). - -50G Performance pipeline: - -```shell script -make SCALE=50 -C "${HOME}/workspace/pxf/concourse" perf -``` - -500G Performance pipeline: - -```shell script -make SCALE=500 -C "${HOME}/workspace/pxf/concourse" perf -``` - -By default, these pipelines run perf on RHEL7. -If you would like to run pipelines using RHEL8, please include `REDHAT_MAJOR_VERSION=8` to the command. -Ex: `make SCALE=10 REDHAT_MAJOR_VERSION=8 -C "${HOME}/workspace/pxf/concourse" perf` - -# Deploy development PXF release pipelines - -The dev release pipeline performs most functions of the `pxf-build` release pipeline except for the tagging and bumping of the build version. - -To deploy dev release pipeline, use: - -```shell -make -C "${HOME}/workspace/pxf/concourse" dev-release -``` - -# Deploy development PXF pipelines - -The dev pipeline is an abbreviated version of the `pxf-build` pipeline. - -To deploy dev pipeline against gpdb 5X_STABLE and 6X_STABLE branches, use: - -```shell -make -C "${HOME}/workspace/pxf/concourse" dev -``` - -To deploy multi-node dev pipeline, you can specify the following options -* `MULTINODE_EL7=<true|false>` for EL7 -* `MULTINODE_EL8=<true|false>` for EL8 -* `MULTINODE_EL9=<true|false>` for EL9 -* `MULTINODE_NO_IMPERSONATION=<true|false>` for EL7, which will also run CLI tests - -```shell -MULTINODE_EL7=true make -C "${HOME}/workspace/pxf/concourse" dev -``` - -This command will automatically point the pipeline at your currently checked-out branch of PXF. - -# Deploy Longevity Testing PXF pipeline -The longevity testing pipeline is designed to work off a PXF tag that needs to be provided as a parameter when -creating the pipeline. The generated pipeline compiles PXF, creates a Greenplum CCP cluster and 2 secure dataproc clusters -and runs a multi-cluster security test every 15 minutes. CCP cluster is set with expiration time of more than 6 months, so -it needs to be cleaned manually and so do the dataproc clusters. - -```shell -YOUR_TAG=<YOUR_TAG> make -C "${HOME}/workspace/pxf/concourse" longevity -``` - -## Uploading a new Apache Maven 3 version - -The CI pipelines for PXF run automation tests using Apache Maven 3.x. Instead of downloading this directly from the Apache -mirrors or Apache archive, we store a copy in Google Cloud Storage to use when we create our images in Cloudbuild. -Typically, we will not be updating these values very often. However, if we need to upload a new version of Maven, you -can use a snippet like this one to download and then upload to GCS. - -```bash -./scripts/download-maven-from-apache-mirror.sh <MAVEN-VERSION> -gcloud storage cp ../downloads/apache-maven-<MAVEN-VERSION>-bin.tar.gz gs://data-gpdb-ud-pxf-build-resources/apache-maven - -# Example for Apache Maven 3.9.2 -./scripts/download-spark-from-apache-mirror.sh 3.9.2 -gcloud storage cp ../downloads/apache-maven-3.9.2-bin.tar.gz gs://data-gpdb-ud-pxf-build-resources/apache-maven - -# Example for Apache Maven 3 Latest -$ ./scripts/download-spark-from-apache-mirror.sh latest -> Looking for latest maven-3 version... -> Latest maven version determined to be: 3.9.3 -> Would you like to proceed (y/n)? y - -gcloud storage cp ../downloads/apache-maven-3.9.3-bin.tar.gz gs://data-gpdb-ud-pxf-build-resources/apache-maven - -``` diff --git a/ci/docker/pxf-cbdb-dev/ubuntu/script/entrypoint_kerberos.sh b/ci/docker/pxf-cbdb-dev/ubuntu/script/entrypoint_kerberos.sh index f64fabee..52a26f35 100755 --- a/ci/docker/pxf-cbdb-dev/ubuntu/script/entrypoint_kerberos.sh +++ b/ci/docker/pxf-cbdb-dev/ubuntu/script/entrypoint_kerberos.sh @@ -35,7 +35,7 @@ ADMIN_PASS=${ADMIN_PASS:-AdminPass@123} PXF_BASE=${PXF_BASE:-/home/gpadmin/pxf-base} GPHOME=${GPHOME:-/usr/local/cloudberry-db} # GPDB demo master path is required by pg_hba reloads; define a default up front. -MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} +COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} # Java locations vary by arch; prefer Java 8 for Hadoop runtime and Java 11 for builds if needed. JAVA_11_ARM=/usr/lib/jvm/java-11-openjdk-arm64 @@ -844,8 +844,8 @@ configure_pg_hba() { } | awk '!seen[$0]++' | sudo tee "${tmp_pg_hba}" >/dev/null sudo mv "${tmp_pg_hba}" "${PG_HBA}" # Reload cluster so new HBA rules take effect immediately for test users. - if [ -n "${MASTER_DATA_DIRECTORY}" ] && [ -x "${GPHOME}/bin/pg_ctl" ]; then - sudo -u gpadmin env MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/pg_ctl" reload -D "${MASTER_DATA_DIRECTORY}" >/dev/null 2>&1 || true + if [ -n "${COORDINATOR_DATA_DIRECTORY}" ] && [ -x "${GPHOME}/bin/pg_ctl" ]; then + sudo -u gpadmin env COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/pg_ctl" reload -D "${COORDINATOR_DATA_DIRECTORY}" >/dev/null 2>&1 || true fi } @@ -875,7 +875,7 @@ ensure_gpdb_databases() { sudo -u gpadmin env ${env_path} "${createdb_bin}" "${conn_flags[@]}" -E UTF8 pxfautomation_encoding >/dev/null 2>&1 || true fi - sudo -u gpadmin env MASTER_DATA_DIRECTORY="${mdd}" GPHOME="${gphome}" "${gphome}/bin/pg_ctl" reload -D "${mdd}" >/dev/null 2>&1 || true + sudo -u gpadmin env COORDINATOR_DATA_DIRECTORY="${mdd}" GPHOME="${gphome}" "${gphome}/bin/pg_ctl" reload -D "${mdd}" >/dev/null 2>&1 || true } verify_security_mode() { @@ -1074,7 +1074,7 @@ init_test_env() { export PGPORT=${PGPORT:-7000} export PGDATABASE=${PGDATABASE:-pxfautomation} export PGUSER=${PGUSER:-gpadmin} - export MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} + export COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} export GPHOME=${GPHOME:-/usr/local/cloudberry-db} export PATH=/usr/local/bin:${GPHOME}/bin:${PATH} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/home/gpadmin/workspace/singlecluster/hadoop/etc/hadoop} @@ -1137,19 +1137,19 @@ EOS pgrep -f sshd >/dev/null 2>&1 || sudo service ssh start >/dev/null 2>&1 || true if ! pgrep -f "${GPHOME}/bin/postgres" >/dev/null 2>&1; then - sudo -u gpadmin env MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/gpstart" -a >/dev/null 2>&1 || true + sudo -u gpadmin env COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/gpstart" -a >/dev/null 2>&1 || true fi if [ -f "${PG_HBA}" ] && ! grep -q "mdw/32 trust" "${PG_HBA}"; then sed -i '1ihost all all mdw/32 trust' "${PG_HBA}" || echo "host all all mdw/32 trust" | sudo tee -a "${PG_HBA}" >/dev/null - sudo -u gpadmin env MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/pg_ctl" reload -D "${MASTER_DATA_DIRECTORY}" >/dev/null 2>&1 || true + sudo -u gpadmin env COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/pg_ctl" reload -D "${COORDINATOR_DATA_DIRECTORY}" >/dev/null 2>&1 || true fi if [ -f "${PG_HBA}" ] && ! grep -q "172.18.0.0/16" "${PG_HBA}"; then sed -i '1ihost all all 172.18.0.0/16 trust' "${PG_HBA}" || echo "host all all 172.18.0.0/16 trust" | sudo tee -a "${PG_HBA}" >/dev/null - sudo -u gpadmin env MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/pg_ctl" reload -D "${MASTER_DATA_DIRECTORY}" >/dev/null 2>&1 || true + sudo -u gpadmin env COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/pg_ctl" reload -D "${COORDINATOR_DATA_DIRECTORY}" >/dev/null 2>&1 || true fi sudo -u gpadmin env PGHOST=${PGHOST} PGPORT=${PGPORT} PGUSER=${PGUSER} "${GPHOME}/bin/createdb" -T template1 pxfautomation >/dev/null 2>&1 || true sudo -u gpadmin env PGHOST=${PGHOST} PGPORT=${PGPORT} PGUSER=${PGUSER} "${GPHOME}/bin/createdb" -T template0 --encoding=WIN1251 --lc-collate=C --lc-ctype=C pxfautomation_encoding >/dev/null 2>&1 || true - ensure_gpdb_databases "${PGHOST}" "${PGPORT}" "${GPHOME}" "${MASTER_DATA_DIRECTORY}" + ensure_gpdb_databases "${PGHOST}" "${PGPORT}" "${GPHOME}" "${COORDINATOR_DATA_DIRECTORY}" for stub in pxf-pre-gpupgrade pxf-post-gpupgrade; do if [ ! -x "/usr/local/bin/${stub}" ]; then sudo tee "/usr/local/bin/${stub}" >/dev/null <<'SH' diff --git a/ci/docker/pxf-cbdb-dev/ubuntu/script/pxf-env.sh b/ci/docker/pxf-cbdb-dev/ubuntu/script/pxf-env.sh index 54588516..2743d4a2 100755 --- a/ci/docker/pxf-cbdb-dev/ubuntu/script/pxf-env.sh +++ b/ci/docker/pxf-cbdb-dev/ubuntu/script/pxf-env.sh @@ -35,7 +35,7 @@ export COMMON_JAVA_OPTS=${COMMON_JAVA_OPTS:-} # -------------------------------------------------------------------- export PGHOST=${PGHOST:-localhost} export PGPORT=${PGPORT:-7000} -export MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} +export COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} # set cloudberry timezone utc export PGTZ=UTC diff --git a/docs/book/config.yml b/docs/book/config.yml index 361733b4..ec750a96 100644 --- a/docs/book/config.yml +++ b/docs/book/config.yml @@ -1,4 +1,4 @@ -book_repo: greenplum-db/docs/book +book_repo: apache/cloudberry-pxf/docs/book public_host: localhost:9292 @@ -9,13 +9,13 @@ sections: subnav_template: pxf-subnav template_variables: - book_title: Greenplum Database PXF Documentation - book_title_short: Greenplum Database PXF Docs - domain_name: greenplum.org - product_link: <div class="header-item"><a href="https://docs.greenplum.org">To Greenplum Database Docs</a></div> - product_url: https://greenplum.org - support_call_to_action: <a href="https://greenplum.org/community/" target="_blank">Need Support?</a> - support_link: <a href="https://github.com/greenplum-db/pxf/wiki">Wiki</a> - support_url: https://greenplum.org + book_title: Apache Cloudberry PXF Documentation + book_title_short: Apache Cloudberry PXF Docs + domain_name: cloudberry.apache.org + product_link: <div class="header-item"><a href="https://docs.cloudberry.apache.org">To Cloudberry Docs</a></div> + product_url: https://cloudberry.apache.org + support_call_to_action: <a href="https://cloudberry.apache.org/community/" target="_blank">Need Support?</a> + support_link: <a href="https://github.com/apache/cloudberry-pxf">GitHub</a> + support_url: https://cloudberry.apache.org broken_link_exclusions: iefix|arrowhead diff --git a/docs/content/access_hdfs.html.md.erb b/docs/content/access_hdfs.html.md.erb index 30babaa2..fd87c709 100644 --- a/docs/content/access_hdfs.html.md.erb +++ b/docs/content/access_hdfs.html.md.erb @@ -32,7 +32,7 @@ HDFS is the primary distributed storage mechanism used by Apache Hadoop. When a <span class="figtitleprefix">Figure: </span>PXF-to-Hadoop Architecture - + A PXF worker thread works on behalf of a segment instance. A worker thread uses its Greenplum Database `gp_segment_id` and the file block information described in the metadata to assign itself a specific portion of the query data. This data may reside on one or more HDFS DataNodes. diff --git a/docs/content/index.html.md.erb b/docs/content/index.html.md.erb index 480ee7e9..aa7c1cc3 100644 --- a/docs/content/index.html.md.erb +++ b/docs/content/index.html.md.erb @@ -21,7 +21,7 @@ specific language governing permissions and limitations under the License. --> -The Greenplum Platform Extension Framework (PXF) provides parallel, high throughput data access and federated queries across heterogeneous data sources via built-in connectors that map a Greenplum Database external table definition to an external data source. PXF has its roots in the Apache HAWQ project. +The Apache Cloudberry Platform Extension Framework (PXF) provides parallel, high throughput data access and federated queries across heterogeneous data sources via built-in connectors that map a Greenplum Database external table definition to an external data source. PXF has its roots in the Apache HAWQ project. - [Overview of PXF](overview_pxf.html) - [Transitioning to Apache Cloudberry](transition_to_cloudberry.html) diff --git a/docs/content/instcfg_pxf.html.md.erb b/docs/content/instcfg_pxf.html.md.erb index 4b7a0a0d..af9a4811 100644 --- a/docs/content/instcfg_pxf.html.md.erb +++ b/docs/content/instcfg_pxf.html.md.erb @@ -1,7 +1,7 @@ --- title: Configuring PXF --- -Your Greenplum Database deployment consists of a coordinator host, a standby coordinator host, and multiple segment hosts. After you configure the Greenplum Platform Extension Framework (PXF), you start a single PXF JVM process (PXF Service) on each Greenplum Database host. +Your Greenplum Database deployment consists of a coordinator host, a standby coordinator host, and multiple segment hosts. After you configure the Apache Cloudberry Platform Extension Framework (PXF), you start a single PXF JVM process (PXF Service) on each Greenplum Database host. PXF provides connectors to Hadoop, Hive, HBase, object stores, network file systems, and external SQL data stores. You must configure PXF to support the connectors that you plan to use. diff --git a/docs/content/intro_pxf.html.md.erb b/docs/content/intro_pxf.html.md.erb index 59c2ec7c..6f0c44c6 100644 --- a/docs/content/intro_pxf.html.md.erb +++ b/docs/content/intro_pxf.html.md.erb @@ -2,7 +2,7 @@ title: Introduction to PXF --- -The Greenplum Platform Extension Framework (PXF) provides *connectors* that enable you to access data stored in sources external to your Greenplum Database deployment. These connectors map an external data source to a Greenplum Database *external table* definition. When you create the Greenplum Database external table, you identify the external data store and the format of the data via a *server* name and a *profile* name that you provide in the command. +The Apache Cloudberry Platform Extension Framework (PXF) provides *connectors* that enable you to access data stored in sources external to your Greenplum Database deployment. These connectors map an external data source to a Greenplum Database *external table* definition. When you create the Greenplum Database external table, you identify the external data store and the format of the data via a *server* name and a *profile* name that you provide in the command. You can query the external table via Greenplum Database, leaving the referenced data in place. Or, you can use the external table to load the data into Greenplum Database for higher performance. diff --git a/docs/content/overview_pxf.html.md.erb b/docs/content/overview_pxf.html.md.erb index a1b43095..05517852 100644 --- a/docs/content/overview_pxf.html.md.erb +++ b/docs/content/overview_pxf.html.md.erb @@ -1,5 +1,5 @@ --- -title: Greenplum Platform Extension Framework (PXF) +title: Apache Cloudberry Platform Extension Framework (PXF) --- <!-- @@ -27,7 +27,7 @@ The diagram below describes a data source that tracks monthly sales across many  -When multiple, related data sets exist in external systems, it is often more efficient to join data sets remotely and return only the results, rather than negotiate the time and storage requirements of performing a rather expensive full data load operation. The *Greenplum Platform Extension Framework (PXF)*, a Greenplum extension that provides parallel, high throughput data access and federated query processing, provides this capability. +When multiple, related data sets exist in external systems, it is often more efficient to join data sets remotely and return only the results, rather than negotiate the time and storage requirements of performing a rather expensive full data load operation. The *Apache Cloudberry Platform Extension Framework (PXF)*, a Greenplum extension that provides parallel, high throughput data access and federated query processing, provides this capability. With PXF, you can use Greenplum and SQL to query these heterogeneous data sources: diff --git a/docs/content/ref/pxf-ref.html.md.erb b/docs/content/ref/pxf-ref.html.md.erb index 3a900d54..1d55e354 100644 --- a/docs/content/ref/pxf-ref.html.md.erb +++ b/docs/content/ref/pxf-ref.html.md.erb @@ -2,7 +2,7 @@ title: Utility Reference --- -The Greenplum Platform Extension Framework (PXF) includes the following utility reference pages: +The Apache Cloudberry Platform Extension Framework (PXF) includes the following utility reference pages: - [pxf cluster](pxf-cluster.html) diff --git a/docs/content/transition_to_cloudberry.html.md.erb b/docs/content/transition_to_cloudberry.html.md.erb index 8cfcc6bd..502dd587 100644 --- a/docs/content/transition_to_cloudberry.html.md.erb +++ b/docs/content/transition_to_cloudberry.html.md.erb @@ -2,6 +2,25 @@ title: Transitioning to Apache Cloudberry (Incubating) --- +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + The transition of the PXF project to **Apache Cloudberry (Incubating)** involves a significant rebranding effort. As part of this transition, the Java package namespace has been changed from `org.greenplum` to `org.apache.cloudberry`. This is a user-facing breaking change. If you have customized PXF configuration files in your `$PXF_BASE/conf` directory, you must manually update these files to use the new package names. diff --git a/docs/content/using_pxf.html.md.erb b/docs/content/using_pxf.html.md.erb index 96dea333..82094676 100644 --- a/docs/content/using_pxf.html.md.erb +++ b/docs/content/using_pxf.html.md.erb @@ -21,7 +21,7 @@ specific language governing permissions and limitations under the License. --> -The Greenplum Platform Extension Framework (PXF) implements a protocol named `pxf` that you can use to create an external table that references data in an external data store. The PXF protocol and Java service are packaged as a Greenplum Database extension. +The Apache Cloudberry Platform Extension Framework (PXF) implements a protocol named `pxf` that you can use to create an external table that references data in an external data store. The PXF protocol and Java service are packaged as a Greenplum Database extension. You must enable the PXF extension in each database in which you plan to use the framework to access external data. You must also explicitly `GRANT` permission to the `pxf` protocol to those users/roles who require access. diff --git a/external-table/Makefile b/external-table/Makefile index 55ba4d92..ae5195dc 100644 --- a/external-table/Makefile +++ b/external-table/Makefile @@ -18,11 +18,14 @@ include $(PGXS) .PHONY: stage stage: pxf.so mkdir -p build/stage/gpextable + mkdir -p build/metadata install -c -m 755 pxf.so build/stage/gpextable/pxf.so install -c -m 644 pxf.control build/stage/gpextable/ install -c -m 644 $(DATA) build/stage/gpextable/ - @echo "cloudberry.version=$(CLB_VERSION)" > build/stage/gpextable/metadata - @echo "cloudberry.major-version=$(CLB_MAJORVERSION)" >> build/stage/gpextable/metadata + @echo "$(GP_MAJORVERSION)" > build/metadata/gp_major_version + @echo "$(shell uname -m)" > build/metadata/build_arch + @echo "cloudberry.version=$(GP_VERSION)" > build/stage/gpextable/metadata + @echo "cloudberry.major-version=$(GP_MAJORVERSION)" >> build/stage/gpextable/metadata .PHONY: clean-all clean-all: clean diff --git a/regression/README.md b/regression/README.md index e9ce5904..44e7ab8b 100644 --- a/regression/README.md +++ b/regression/README.md @@ -14,7 +14,7 @@ Running the tests ## Pre-requisites You need a running instance of Greenplum and PXF, along with a local installation of Greenplum (to be able to use the `pg_regress` framework). -The variables `PGHOST` and `PGPORT` must be pointing at the Greenplum master node, and Greenplum environment scripts like `${GPHOME}/greenplum_path.sh` and `gpdb/gpAux/gpdemo/gpdemo-env.sh` should be sourced. +The variables `PGHOST` and `PGPORT` must be pointing at the Greenplum master node, and Greenplum environment scripts like `${GPHOME}/greenplum_path.sh` (for Cloudberry 2.0) or `${GPHOME}/cloudberry-env.sh` (for Cloudberry 2.1+) should be sourced. `pg_config` must be on your path. For data prep, the appropriate CLIs are required, as we shell out from SQL to these CLIs. These include `hdfs`, `hbase`, and `beeline`. @@ -46,8 +46,7 @@ By setting environment variables you can change the location of the Greenplum ma ### General environment variables -All the general environment variables that come from `greenplum_path.sh` and -`gpdemo-env.sh` must be set. Additionally, `PXF_BASE` must be set if different +All the general environment variables that come from `greenplum_path.sh` (for Cloudberry 2.0) or `cloudberry-env.sh` (for Cloudberry 2.1+) must be set. Additionally, `PXF_BASE` must be set if different from `PXF_HOME`. * `PXF_TEST_DEBUG`: set to anything to prevent deletion of data, and to run `pg_regress` in debug mode (optional) diff --git a/server/pxf-service/src/scripts/pxf b/server/pxf-service/src/scripts/pxf index 0a422983..5e3f0905 100755 --- a/server/pxf-service/src/scripts/pxf +++ b/server/pxf-service/src/scripts/pxf @@ -217,7 +217,7 @@ function doHelp() { restart restart the local PXF server instance (not supported for cluster) status show the status of the local PXF server instance version show the version of PXF server - register install PXF extension under \$GPHOME (useful after upgrades of Greenplum server) + register install PXF extension under \$GPHOME (useful after upgrades of Cloudberry server) prepare prepares a new base directory specified by the \$PXF_BASE environment variable. It creates the servers, logs, lib, keytabs, and run directories inside \$PXF_BASE and copies configuration files. @@ -253,11 +253,11 @@ function doReset() { function installExtensions() { if [[ -d ${parent_script_dir}/gpextable ]]; then if [[ -z "${GPHOME}" ]]; then - echoYellow 'WARNING: environment variable GPHOME is not set, skipping install of Greenplum External Table PXF Extension' - elif [[ ! -f ${GPHOME}/greenplum_path.sh ]]; then - echoYellow "WARNING: environment variable GPHOME (${GPHOME}) must be set to a valid Greenplum installation, skipping install of Greenplum External Table PXF Extension'" + echoYellow 'WARNING: environment variable GPHOME is not set, skipping install of Cloudberry External Table PXF Extension' + elif [[ ! -f ${GPHOME}/greenplum_path.sh && ! -f ${GPHOME}/cloudberry-env.sh ]]; then + echoYellow "WARNING: environment variable GPHOME (${GPHOME}) must be set to a valid Cloudberry installation, skipping install of Cloudberry External Table PXF Extension'" else - echoGreen "Installing Greenplum External Table PXF Extension into '${GPHOME}'" + echoGreen "Installing Cloudberry External Table PXF Extension into '${GPHOME}'" local target_control_file="${GPHOME}/share/postgresql/extension/pxf.control" install --verbose --mode=0644 "${parent_script_dir}/gpextable/pxf.control" "${target_control_file}" || fail "cannot install pxf.control to '${target_control_file}'" @@ -265,11 +265,11 @@ function installExtensions() { fi if [[ -d ${parent_script_dir}/fdw ]]; then if [[ -z "${GPHOME}" ]]; then - echoYellow 'WARNING: environment variable GPHOME is not set, skipping install of Greenplum Foreign Data Wrapper PXF Extension' - elif [[ ! -f ${GPHOME}/greenplum_path.sh ]]; then - echoYellow "WARNING: environment variable GPHOME (${GPHOME}) must be set to a valid Greenplum installation, skipping install of Greenplum Foreign Data Wrapper PXF Extension'" + echoYellow 'WARNING: environment variable GPHOME is not set, skipping install of Cloudberry Foreign Data Wrapper PXF Extension' + elif [[ ! -f ${GPHOME}/greenplum_path.sh && ! -f ${GPHOME}/cloudberry-env.sh ]]; then + echoYellow "WARNING: environment variable GPHOME (${GPHOME}) must be set to a valid Cloudberry installation, skipping install of Cloudberry Foreign Data Wrapper PXF Extension'" else - echoGreen "Installing Greenplum Foreign Data Wrapper PXF Extension into '${GPHOME}'" + echoGreen "Installing Cloudberry Foreign Data Wrapper PXF Extension into '${GPHOME}'" local target_control_file="${GPHOME}/share/postgresql/extension/pxf_fdw.control" install --verbose --mode=0644 "${parent_script_dir}/fdw/pxf_fdw.control" "${target_control_file}" || fail "cannot install pxf_fdw.control to '${target_control_file}'" diff --git a/server/pxf-service/src/scripts/pxf-post-gpupgrade b/server/pxf-service/src/scripts/pxf-post-gpupgrade index 5f017da4..59aa3ad1 100755 --- a/server/pxf-service/src/scripts/pxf-post-gpupgrade +++ b/server/pxf-service/src/scripts/pxf-post-gpupgrade @@ -56,24 +56,24 @@ EOF metadata_file="${PXF_HOME}/gpextable/metadata" pxf_gpdb_major_version="" if [[ -f "${metadata_file}" ]]; then - pxf_gpdb_major_version="$(awk 'BEGIN { FS = \"=\" } /cloudberry.major-version/{ print $2 }' \"${metadata_file}\")" + pxf_gpdb_major_version="$(awk 'BEGIN { FS = "=" } /cloudberry.major-version/{ print $2 }' "${metadata_file}")" else echo "WARNING: metadata file '${metadata_file}' not found; skipping PXF/GPDB compatibility check" >>"${log_file}" fi gp_version="$(psql --no-align --tuples-only --command "SHOW server_version")" pxf_version="$(cat "${PXF_HOME}"/version)" -echo "PXF ${pxf_version} compiled against GPDB major version '${pxf_gpdb_major_version}'" >>"${log_file}" -echo "Running GPDB cluster is version '${gp_version}'" >>"${log_file}" +echo "PXF ${pxf_version} compiled against Cloudberry major version '${pxf_gpdb_major_version}'" >>"${log_file}" +echo "Running Cloudberry cluster is version '${gp_version}'" >>"${log_file}" if [[ -n "${pxf_gpdb_major_version}" && "${pxf_gpdb_major_version}" != "${gp_version%%.*}" ]]; then - echo "ERROR: This version of PXF only works with GPDB ${pxf_gpdb_major_version}+ but the targeted GPDB cluster is ${gp_version}" | tee -a "${log_file}" + echo "ERROR: This version of PXF only works with Cloudberry ${pxf_gpdb_major_version}+ but the targeted Cloudberry cluster is ${gp_version}" | tee -a "${log_file}" exit 1 fi -master_data_dir_query="SELECT datadir FROM pg_catalog.gp_segment_configuration WHERE dbid = 1" -export MASTER_DATA_DIRECTORY="${MASTER_DATA_DIRECTORY:-$(psql --no-align --tuples-only --command "${master_data_dir_query}")}" -echo "GPDB master data directory is '${MASTER_DATA_DIRECTORY}'" >>"${log_file}" +coordinator_data_dir_query="SELECT datadir FROM pg_catalog.gp_segment_configuration WHERE dbid = 1" +export COORDINATOR_DATA_DIRECTORY="${COORDINATOR_DATA_DIRECTORY:-$(psql --no-align --tuples-only --command "${coordinator_data_dir_query}")}" +echo "Cloudberry coordinator data directory is '${COORDINATOR_DATA_DIRECTORY}'" >>"${log_file}" if [[ -d "${PXF_HOME}/gpextable" ]]; then PXF_HOME_REGEX="(.*:)*\/gpextable.*" diff --git a/server/pxf-service/src/scripts/pxf-pre-gpupgrade b/server/pxf-service/src/scripts/pxf-pre-gpupgrade index 1306aa40..c7a69535 100755 --- a/server/pxf-service/src/scripts/pxf-pre-gpupgrade +++ b/server/pxf-service/src/scripts/pxf-pre-gpupgrade @@ -56,24 +56,24 @@ EOF metadata_file="${PXF_HOME}/gpextable/metadata" pxf_gpdb_major_version="" if [[ -f "${metadata_file}" ]]; then - pxf_gpdb_major_version="$(awk 'BEGIN { FS = \"=\" } /cloudberry.major-version/{ print $2 }' \"${metadata_file}\")" + pxf_gpdb_major_version="$(awk 'BEGIN { FS = "=" } /cloudberry.major-version/{ print $2 }' "${metadata_file}")" else - echo "WARNING: metadata file '${metadata_file}' not found; skipping PXF/GPDB compatibility check" >>"${log_file}" + echo "WARNING: metadata file '${metadata_file}' not found; skipping PXF/Cloudberry compatibility check" >>"${log_file}" fi gp_version="$(psql --no-align --tuples-only --command "SHOW server_version")" pxf_version="$(cat "${PXF_HOME}"/version)" -echo "PXF ${pxf_version} compiled against GPDB major version '${pxf_gpdb_major_version}'" >>"${log_file}" -echo "Running GPDB cluster is version '${gp_version}'" >>"${log_file}" +echo "PXF ${pxf_version} compiled against Cloudberry major version '${pxf_gpdb_major_version}'" >>"${log_file}" +echo "Running Cloudberry cluster is version '${gp_version}'" >>"${log_file}" if [[ -n "${pxf_gpdb_major_version}" && "${pxf_gpdb_major_version}" != "${gp_version%%.*}" ]]; then - echo "ERROR: This version of PXF only works with GPDB ${pxf_gpdb_major_version}+ but the targeted GPDB cluster is ${gp_version}" | tee -a "${log_file}" + echo "ERROR: This version of PXF only works with Cloudberry ${pxf_gpdb_major_version}+ but the targeted Cloudberry cluster is ${gp_version}" | tee -a "${log_file}" exit 1 fi -master_data_dir_query="SELECT datadir FROM pg_catalog.gp_segment_configuration WHERE dbid = 1" -export MASTER_DATA_DIRECTORY="${MASTER_DATA_DIRECTORY:-$(psql --no-align --tuples-only --command "${master_data_dir_query}")}" -echo "GPDB master data directory is '${MASTER_DATA_DIRECTORY}'" >>"${log_file}" +coordinator_data_dir_query="SELECT datadir FROM pg_catalog.gp_segment_configuration WHERE dbid = 1" +export COORDINATOR_DATA_DIRECTORY="${COORDINATOR_DATA_DIRECTORY:-$(psql --no-align --tuples-only --command "${coordinator_data_dir_query}")}" +echo "Cloudberry coordinator data directory is '${COORDINATOR_DATA_DIRECTORY}'" >>"${log_file}" if [[ -d "${PXF_HOME}/gpextable" ]]; then PXF_HOME_REGEX="(.*:)*\/gpextable.*" --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
