This is an automated email from the ASF dual-hosted git repository. djwang pushed a commit to branch merge-with-upstream in repository https://gitbox.apache.org/repos/asf/cloudberry-pxf.git
commit 733ab55205b6043f07c8550cc23d9aaaa4547433 Author: kongfanshen <[email protected]> AuthorDate: Fri Oct 14 11:27:47 2022 +0800 Add support for Cloudberry Database This project is dedicated to supporting PXF in Cloudberry Database. In this PR, we updated the build and test files to make it support CloudberryDB 1.0+. You can see more details in the commit. Authored-by: Kongfanshen <[email protected]> Co-authored-by: Hao Wu <[email protected]> --- Makefile | 56 +++++++++------- cli/cmd/cluster.go | 5 ++ external-table/src/libchurl.c | 10 +-- external-table/src/pxffilters.c | 4 +- external-table/src/pxfheaders.c | 9 +++ package/pxf-cbdb1.spec | 79 +++++++++++++++++++++++ server/pxf-service/src/scripts/pxf-post-gpupgrade | 6 +- server/pxf-service/src/scripts/pxf-pre-gpupgrade | 6 +- version | 2 +- 9 files changed, 133 insertions(+), 44 deletions(-) diff --git a/Makefile b/Makefile index 907977ea..30b9d3f7 100644 --- a/Makefile +++ b/Makefile @@ -6,12 +6,8 @@ export PXF_MODULES PXF_VERSION ?= $(shell cat version) export PXF_VERSION -PG_CONFIG ?= pg_config -PGXS := $(shell $(PG_CONFIG) --pgxs) -ifndef PGXS - $(error Make sure the Greenplum installation binaries are in your PATH. i.e. export PATH=<path to your Greenplum installation>/bin:$$PATH) -endif -include $(PGXS) +FDW_SUPPORT = $(shell $(PG_CONFIG) --version | egrep "PostgreSQL 1[2-5]") +FDW_SUPPORT = SOURCE_EXTENSION_DIR = external-table TARGET_EXTENSION_DIR = gpextable @@ -82,12 +78,13 @@ ifneq ($(SKIP_FDW_PACKAGE_REASON),) $(eval PXF_MODULES := $(filter-out fdw,$(PXF_MODULES))) endif set -e ;\ - mkdir -p build/stage/$${PXF_PACKAGE_NAME}/pxf ;\ - for module in $${PXF_MODULES[@]}; do \ - echo "===> Staging [$${module}] module <===" ;\ - make -C $${module} stage ;\ - cp -a "$${module}"/build/stage/* "build/stage/$${PXF_PACKAGE_NAME}/pxf" ;\ - done ;\ + GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) ;\ + GP_BUILD_ARCH=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/build_arch) ;\ + PXF_PACKAGE_NAME=pxf-cbdb$${GP_MAJOR_VERSION}-$${PXF_VERSION}-$${GP_BUILD_ARCH} ;\ + mkdir -p build/stage/$${PXF_PACKAGE_NAME} ;\ + cp -a $(SOURCE_EXTENSION_DIR)/build/stage/* build/stage/$${PXF_PACKAGE_NAME} ;\ + cp -a cli/build/stage/* build/stage/$${PXF_PACKAGE_NAME} ;\ + cp -a server/build/stage/* build/stage/$${PXF_PACKAGE_NAME} ;\ echo $$(git rev-parse --verify HEAD) > build/stage/$${PXF_PACKAGE_NAME}/pxf/commit.sha ;\ cp package/install_binary build/stage/$${PXF_PACKAGE_NAME}/install_component ;\ echo "===> PXF staging is complete <===" @@ -98,8 +95,18 @@ tar: stage tar -czf build/dist/$(PXF_PACKAGE_NAME).tar.gz -C build/stage $(PXF_PACKAGE_NAME) echo "===> PXF TAR file with binaries creation is complete <===" -rpm: stage - rm -rf build/rpmbuild +gppkg-rpm: rpm + rm -rf gppkg + mkdir -p gppkg/deps + GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) + cat package/gppkg_spec.yml.in | sed "s,#arch,`arch`," | sed "s,#os,$(TEST_OS)," | sed "s,#gppkgver,1.0," | sed "s,#gpver,1," > gppkg/gppkg_spec.yml + find build/rpmbuild/RPMS -name pxf-cbdb$(GP_MAJOR_VERSION)-*.rpm -exec cp {} gppkg/ \; + source $(GPHOME)/greenplum_path.sh && gppkg --build gppkg + +rpm: + make -C $(SOURCE_EXTENSION_DIR) stage + make -C cli/go/src/pxf-cli stage + make -C server stage set -e ;\ PXF_MAIN_VERSION=$${PXF_VERSION//-SNAPSHOT/} ;\ if [[ $${PXF_VERSION} == *"-SNAPSHOT" ]]; then PXF_RELEASE=SNAPSHOT; else PXF_RELEASE=1; fi ;\ @@ -112,14 +119,14 @@ rpm: stage --define "pxf_release $${PXF_RELEASE}" \ --define "license ${LICENSE}" \ --define "vendor ${VENDOR}" \ - -bb $${PWD}/build/rpmbuild/SPECS/pxf-gp$${GP_MAJORVERSION}.spec ;\ - echo "===> PXF RPM package creation is complete <===" + -bb $${PWD}/build/rpmbuild/SPECS/pxf-cbdb$${GP_MAJOR_VERSION}.spec rpm-tar: rpm rm -rf build/{stagerpm,distrpm} mkdir -p build/{stagerpm,distrpm} set -e ;\ - PXF_RPM_FILE=$$(find build/rpmbuild/RPMS -name pxf-gp$${GP_MAJORVERSION}-*.rpm) ;\ + GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) ;\ + PXF_RPM_FILE=$$(find build/rpmbuild/RPMS -name pxf-cbdb$${GP_MAJOR_VERSION}-*.rpm) ;\ PXF_RPM_BASE_NAME=$$(basename $${PXF_RPM_FILE%*.rpm}) ;\ PXF_PACKAGE_NAME=$${PXF_RPM_BASE_NAME%.*} ;\ mkdir -p build/stagerpm/$${PXF_PACKAGE_NAME} ;\ @@ -133,21 +140,24 @@ deb: stage set -e ;\ PXF_MAIN_VERSION=$${PXF_VERSION//-SNAPSHOT/} ;\ if [[ $${PXF_VERSION} == *"-SNAPSHOT" ]]; then PXF_RELEASE=SNAPSHOT; else PXF_RELEASE=1; fi ;\ - mkdir -p build/debbuild/usr/local/pxf-gp$${GP_MAJORVERSION} ;\ - cp -a build/stage/$${PXF_PACKAGE_NAME}/pxf/* build/debbuild/usr/local/pxf-gp$${GP_MAJORVERSION} ;\ + rm -rf build/debbuild ;\ + mkdir -p build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION}/$(TARGET_EXTENSION_DIR) ;\ + cp -a $(SOURCE_EXTENSION_DIR)/build/stage/* build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION}/$(TARGET_EXTENSION_DIR) ;\ + cp -a cli/build/stage/pxf/* build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION} ;\ + cp -a server/build/stage/pxf/* build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION} ;\ + echo $$(git rev-parse --verify HEAD) > build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION}/commit.sha ;\ mkdir build/debbuild/DEBIAN ;\ cp -a package/DEBIAN/* build/debbuild/DEBIAN/ ;\ sed -i -e "s/%VERSION%/$${PXF_MAIN_VERSION}-$${PXF_RELEASE}/" -e "s/%MAINTAINER%/${VENDOR}/" build/debbuild/DEBIAN/control ;\ dpkg-deb --build build/debbuild ;\ - mv build/debbuild.deb build/pxf-gp$${GP_MAJORVERSION}-$${PXF_MAIN_VERSION}-$${PXF_RELEASE}-ubuntu18.04-amd64.deb ;\ - echo "===> PXF DEB package creation is complete <===" - + mv build/debbuild.deb build/pxf-cbdb$${GP_MAJOR_VERSION}-$${PXF_MAIN_VERSION}-$${PXF_RELEASE}-ubuntu18.04-amd64.deb deb-tar: deb rm -rf build/{stagedeb,distdeb} mkdir -p build/{stagedeb,distdeb} set -e ;\ - PXF_DEB_FILE=$$(find build/ -name pxf-gp$${GP_MAJORVERSION}*.deb) ;\ + GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) ;\ + PXF_DEB_FILE=$$(find build/ -name pxf-cbdb$${GP_MAJOR_VERSION}*.deb) ;\ PXF_PACKAGE_NAME=$$(dpkg-deb --field $${PXF_DEB_FILE} Package)-$$(dpkg-deb --field $${PXF_DEB_FILE} Version)-ubuntu18.04 ;\ mkdir -p build/stagedeb/$${PXF_PACKAGE_NAME} ;\ cp $${PXF_DEB_FILE} build/stagedeb/$${PXF_PACKAGE_NAME} ;\ diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go index bb56384d..68a7b301 100644 --- a/cli/cmd/cluster.go +++ b/cli/cmd/cluster.go @@ -10,6 +10,7 @@ import ( "github.com/greenplum-db/gp-common-go-libs/dbconn" "github.com/greenplum-db/gp-common-go-libs/gplog" "github.com/spf13/cobra" + "github.com/blang/semver" ) // ClusterData is exported for testing @@ -141,6 +142,10 @@ func doSetup() (*ClusterData, error) { "Please make sure that your Greenplum database is running and you are on the coordinator node.", err.Error())) return nil, err } + + //set the fake version for cbdb. + connection.Version = dbconn.GPDBVersion{VersionString: "7.1.0", SemVer: semver.MustParse("7.1.0")} + segConfigs, err := cluster.GetSegmentConfiguration(connection, true) if err != nil { gplog.Error(fmt.Sprintf("ERROR: Could not retrieve segment information from GPDB.\n%s\n" + err.Error())) diff --git a/external-table/src/libchurl.c b/external-table/src/libchurl.c index 40e03480..06a9906a 100644 --- a/external-table/src/libchurl.c +++ b/external-table/src/libchurl.c @@ -22,7 +22,8 @@ #include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/guc.h" -#include "utils/jsonapi.h" +#include "common/jsonapi.h" +#include "utils/jsonfuncs.h" /* include libcurl without typecheck. * This allows wrapping curl_easy_setopt to be wrapped @@ -92,13 +93,6 @@ typedef struct struct curl_slist *headers; } churl_settings; -/* the null action object used for pure validation */ -static JsonSemAction nullSemAction = -{ - NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL -}; - churl_context *churl_new_context(void); static void create_curl_handle(churl_context *context); static void set_curl_option(churl_context *context, CURLoption option, const void *data); diff --git a/external-table/src/pxffilters.c b/external-table/src/pxffilters.c index ec24164a..3ef0f062 100644 --- a/external-table/src/pxffilters.c +++ b/external-table/src/pxffilters.c @@ -176,7 +176,7 @@ dbop_pxfop_map pxf_supported_opr_op_expr[] = {85 /* boolne */ , PXFOP_NE}, /* bpchar */ - {BPCharEqualOperator /* bpchareq */ , PXFOP_EQ}, + {BpcharEqualOperator /* bpchareq */ , PXFOP_EQ}, {1058 /* bpcharlt */ , PXFOP_LT}, {1060 /* bpchargt */ , PXFOP_GT}, {1059 /* bpcharle */ , PXFOP_LE}, @@ -238,7 +238,7 @@ dbop_pxfop_array_map pxf_supported_opr_scalar_array_op_expr[] = {1120 /* float48eq */ , PXFOP_IN, true}, /* bpchar */ - {BPCharEqualOperator /* bpchareq */ , PXFOP_IN, true}, + {BpcharEqualOperator /* bpchareq */ , PXFOP_IN, true}, }; diff --git a/external-table/src/pxfheaders.c b/external-table/src/pxfheaders.c index da7887b1..0c991eb7 100644 --- a/external-table/src/pxfheaders.c +++ b/external-table/src/pxfheaders.c @@ -1176,4 +1176,13 @@ parseCopyFormatString(Relation rel, char *fmtstr, char fmttype) errmsg("external table internal parse error at end of line"))); } +/* + * This function is copied from fileam.c in the 6X_STABLE branch. + * In version 6, this function is no longer required to be copied. + */ +static List * +appendCopyEncodingOption(List *copyFmtOpts, int encoding) +{ + return lappend(copyFmtOpts, makeDefElem("encoding", (Node *)makeString((char *)pg_encoding_to_char(encoding)))); +} #endif diff --git a/package/pxf-cbdb1.spec b/package/pxf-cbdb1.spec new file mode 100644 index 00000000..f2812e34 --- /dev/null +++ b/package/pxf-cbdb1.spec @@ -0,0 +1,79 @@ +# Disable repacking of jars, since it takes forever +%define __jar_repack %{nil} + +# Disable build-id in rpm +%define _build_id_links none +# Disable automatic dependency processing both for requirements and provides +AutoReqProv: no + +Name: pxf-cbdb1 +Version: %{pxf_version} +Release: 1%{?dist} +Summary: Cloudberry PXF framework for external data access +License: %{license} +URL: http://www.hashdata.cn +Vendor: %{vendor} + +Prefix: /usr/local/%{name} + +# .so file makes sense only when installing on Cloudberry node, so inherit Cloudberry's dependencies implicitly +# Java server can be installed on a new node, only bash is needed for management scripts +## cbdb has added this requirement, pxf may installed under GPHOME, +# this requirement will cause installation fail. +# Requires: bash + +%description +PXF is an extensible framework that allows a distributed database like Cloudberry to query external data files, +whose metadata is not managed by the database. PXF includes built-in connectors for accessing data that exists +inside HDFS files, Hive tables, HBase tables, databases that support JDBC, data stores (S3, GCS) and more. + +%prep +# If the pxf_version macro is not defined, it gets interpreted as a literal string, need %% to escape it +if [ %{pxf_version} = '%%{pxf_version}' ] ; then + echo "The macro (variable) pxf_version must be supplied as rpmbuild ... --define='pxf_version [VERSION]'" + exit 1 +fi + +%install +%__mkdir -p %{buildroot}/%{prefix} +%__cp -R %{_sourcedir}/* %{buildroot}/%{prefix} + +%post +sed -i "s|directory =.*|directory = '${RPM_INSTALL_PREFIX}/fdw/'|g" "${RPM_INSTALL_PREFIX}/fdw/pxf_fdw.control" +sed -i "s|module_pathname =.*|module_pathname = '${RPM_INSTALL_PREFIX}/fdw/pxf_fdw'|g" "${RPM_INSTALL_PREFIX}/fdw/pxf_fdw.control" + +%files +%{prefix} + +# If a file is not marked as a config file, or if a file has not been altered +# since installation, then it will be silently replaced by the version from the +# RPM. + +# If a config file has been edited on disk, but is not actually different from +# the file in the RPM then the edited version will be silently left in place. + +# When a config file has been edited and is different from the file in +# the RPM, then the behavior is the following: +# - %config(noreplace): The edited version will be left in place, and the new +# version will be installed with an .rpmnew suffix. +# - %config: The new file will be installed, and the the old edited version +# will be renamed with an .rpmsave suffix. + +# Configuration directories/files +%config(noreplace) %{prefix}/conf/pxf-application.properties +%config(noreplace) %{prefix}/conf/pxf-env.sh +%config(noreplace) %{prefix}/conf/pxf-log4j2.xml +%config(noreplace) %{prefix}/conf/pxf-profiles.xml + +%pre +# cleanup files and directories created by 'pxf init' command +# only applies for old installations (pre 6.0.0) +%__rm -f "${RPM_INSTALL_PREFIX}/conf/pxf-private.classpath" +%__rm -rf "${RPM_INSTALL_PREFIX}/pxf-service" + +%posttrans +# PXF v5 RPM installation removes the run directory during the %preun step. +# The lack of run directory prevents PXF v6+ from starting up. +# %posttrans of the new package is the only step that runs after the %preun +# of the old package +%{__install} -d -m 700 "${RPM_INSTALL_PREFIX}/run" diff --git a/server/pxf-service/src/scripts/pxf-post-gpupgrade b/server/pxf-service/src/scripts/pxf-post-gpupgrade index c1ca7552..e9f52419 100755 --- a/server/pxf-service/src/scripts/pxf-post-gpupgrade +++ b/server/pxf-service/src/scripts/pxf-post-gpupgrade @@ -57,11 +57,7 @@ if [[ "${pxf_gpdb_major_version}" != "${gp_version%%.*}" ]]; then exit 1 fi -if [[ "${gp_version}" = 5* ]]; then - master_data_dir_query="SELECT fselocation FROM pg_catalog.pg_filespace_entry WHERE fsedbid = 1 AND fsefsoid = (SELECT oid FROM pg_catalog.pg_filespace WHERE fsname = 'pg_system')" -else - master_data_dir_query="SELECT datadir FROM pg_catalog.gp_segment_configuration WHERE dbid = 1" -fi +master_data_dir_query="SELECT datadir FROM pg_catalog.gp_segment_configuration WHERE dbid = 1" export MASTER_DATA_DIRECTORY="${MASTER_DATA_DIRECTORY:-$(psql --no-align --tuples-only --command "${master_data_dir_query}")}" echo "GPDB master data directory is '${MASTER_DATA_DIRECTORY}'" >>"${log_file}" diff --git a/server/pxf-service/src/scripts/pxf-pre-gpupgrade b/server/pxf-service/src/scripts/pxf-pre-gpupgrade index e9aa2b88..24c352d6 100755 --- a/server/pxf-service/src/scripts/pxf-pre-gpupgrade +++ b/server/pxf-service/src/scripts/pxf-pre-gpupgrade @@ -57,11 +57,7 @@ if [[ "${pxf_gpdb_major_version}" != "${gp_version%%.*}" ]]; then exit 1 fi -if [[ "${gp_version}" = 5* ]]; then - master_data_dir_query="SELECT fselocation FROM pg_catalog.pg_filespace_entry WHERE fsedbid = 1 AND fsefsoid = (SELECT oid FROM pg_catalog.pg_filespace WHERE fsname = 'pg_system')" -else - master_data_dir_query="SELECT datadir FROM pg_catalog.gp_segment_configuration WHERE dbid = 1" -fi +master_data_dir_query="SELECT datadir FROM pg_catalog.gp_segment_configuration WHERE dbid = 1" export MASTER_DATA_DIRECTORY="${MASTER_DATA_DIRECTORY:-$(psql --no-align --tuples-only --command "${master_data_dir_query}")}" echo "GPDB master data directory is '${MASTER_DATA_DIRECTORY}'" >>"${log_file}" diff --git a/version b/version index 57d73f9f..3eefcb9d 100644 --- a/version +++ b/version @@ -1 +1 @@ -6.10.1-SNAPSHOT +1.0.0 --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
