From 9e9f6b39c5a9bfeec835a415ffff803c989d366c Mon Sep 17 00:00:00 2001 From: Dianjin Wang Date: Mon, 9 Feb 2026 14:30:50 +0800 Subject: [PATCH 1/2] ASF: Update Apache compliance and branding migration This commit implements comprehensive changes to align the project with Apache Software Foundation incubation requirements and complete the migration from Greenplum to Apache Cloudberry branding. - Add DISCLAIMER file as required for Apache incubating projects - Update LICENSE file with comprehensive list of 245 files containing Apache License headers, organized by module (FDW, External Table, Server, Documentation, CI/Test templates) - Add Apache License headers to GitHub workflow files - Update CONTRIBUTING.md with Apache project contribution guidelines - Update README.md with Apache Cloudberry branding and simplified content - Update documentation templates in docs/content/ to use Cloudberry - Update automation and testing documentation - Migrate server scripts and Java components references - Update CI/CD workflows with proper Apache licensing - Clean up legacy CI documentation (remove ci/README.md) - Update build system references in Makefile - Enhance installation scripts to support both Cloudberry 2.0 and 2.1+ - Add transition guide for Cloudberry migration - Update all user-facing documentation with correct branding - Simplify README.md focusing on essential information - Update book configuration for documentation generation This change ensures full compliance with Apache incubation requirements while completing the transition to Apache Cloudberry ecosystem. --- .github/workflows/dependency-submission.yml | 21 ++ .github/workflows/pxf-ci.yml | 21 ++ CONTRIBUTING.md | 19 ++ DISCLAIMER | 9 + LICENSE | 78 ++++++ Makefile | 38 +-- README.md | 244 ++---------------- automation/README.Docker.md | 19 ++ automation/README.Linux.md | 7 +- automation/README.md | 2 +- automation/pxf_regress/README.md | 18 +- .../components/common/ShellSystemObject.java | 2 +- .../pxf/automation/components/gpdb/Gpdb.java | 6 +- .../sut/LocalToIPAMultiNodeHadoopHA.xml | 2 +- .../sut/MultiHadoopIPAMultiNodesCluster.xml | 2 +- .../sut/MultiHadoopMultiNodesCluster.xml | 2 +- .../test/resources/sut/MultiNodesCluster.xml | 2 +- automation/src/test/resources/sut/default.xml | 2 +- .../templates/gpdb/gpinitsystem_config | 8 +- ci/README.md | 143 ---------- .../ubuntu/script/entrypoint_kerberos.sh | 18 +- .../pxf-cbdb-dev/ubuntu/script/pxf-env.sh | 2 +- docs/book/config.yml | 18 +- docs/content/access_hdfs.html.md.erb | 2 +- docs/content/index.html.md.erb | 2 +- docs/content/instcfg_pxf.html.md.erb | 2 +- docs/content/intro_pxf.html.md.erb | 2 +- docs/content/overview_pxf.html.md.erb | 4 +- docs/content/ref/pxf-ref.html.md.erb | 2 +- .../transition_to_cloudberry.html.md.erb | 19 ++ docs/content/using_pxf.html.md.erb | 2 +- external-table/Makefile | 7 +- regression/README.md | 5 +- server/pxf-service/src/scripts/pxf | 18 +- .../src/scripts/pxf-post-gpupgrade | 14 +- .../pxf-service/src/scripts/pxf-pre-gpupgrade | 16 +- 36 files changed, 317 insertions(+), 461 deletions(-) create mode 100644 DISCLAIMER delete mode 100644 ci/README.md diff --git a/.github/workflows/dependency-submission.yml b/.github/workflows/dependency-submission.yml index 4f5b2b093..d0bce9dd5 100644 --- a/.github/workflows/dependency-submission.yml +++ b/.github/workflows/dependency-submission.yml @@ -1,3 +1,24 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# Apache Cloudberry PXF Dependency Submission Workflow +# -------------------------------------------------------------------- name: Dependency Submission on: diff --git a/.github/workflows/pxf-ci.yml b/.github/workflows/pxf-ci.yml index 83188f352..3666ab8e2 100644 --- a/.github/workflows/pxf-ci.yml +++ b/.github/workflows/pxf-ci.yml @@ -1,3 +1,24 @@ +# -------------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed +# with this work for additional information regarding copyright +# ownership. The ASF licenses this file to You under the Apache +# License, Version 2.0 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# -------------------------------------------------------------------- +# Apache Cloudberry PXF CI Workflow +# -------------------------------------------------------------------- name: PXF CI Pipeline on: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6af259c58..02a2c06cc 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,3 +1,22 @@ + + Apache Cloudberry community welcomes contributions from anyone, new and experienced! We appreciate your interest in contributing. This guide will help you get started with the contribution. diff --git a/DISCLAIMER b/DISCLAIMER new file mode 100644 index 000000000..144253767 --- /dev/null +++ b/DISCLAIMER @@ -0,0 +1,9 @@ +Apache Cloudberry is an effort undergoing incubation at The Apache +Software Foundation (ASF), sponsored by the Apache +Incubator. Incubation is required of all newly accepted projects until +a further review indicates that the infrastructure, communications, +and decision making process have stabilized in a manner consistent +with other successful ASF projects. While incubation status is not +necessarily a reflection of the completeness or stability of the code, +it does indicate that the project has yet to be fully endorsed by the +ASF. \ No newline at end of file diff --git a/LICENSE b/LICENSE index 4bd5496af..3c1f33f21 100644 --- a/LICENSE +++ b/LICENSE @@ -214,6 +214,84 @@ This product is derived from software originally developed by: notices and license terms. Your use of these subcomponents is subject to the terms and conditions of the subcomponent's license, as noted in the LICENSE file. + +The Greenplum Platform Extension Framework includes: + +---------------------------- + Apache License - Version 2.0 + +The following files are licensed under the Apache License, Version 2.0: + +FDW Module: + fdw/libchurl.c + fdw/libchurl.h + fdw/pxf_bridge.c + fdw/pxf_bridge.h + fdw/pxf_filter.c + fdw/pxf_filter.h + fdw/pxf_header.c + fdw/pxf_header.h + +External Table Module: + external-table/src/gpdbwritableformatter.c + external-table/src/libchurl.c + external-table/src/libchurl.h + external-table/src/pxfbridge.c + external-table/src/pxfbridge.h + external-table/src/pxffilters.c + external-table/src/pxffilters.h + external-table/src/pxfheaders.c + external-table/src/pxfheaders.h + external-table/src/pxfprotocol.c + external-table/src/pxfuriparser.c + external-table/src/pxfuriparser.h + external-table/test/pxffilters_test.c + external-table/test/pxfheaders_test.c + external-table/test/pxfprotocol_test.c + external-table/test/pxfuriparser_test.c + +Server Module (Java Sources): + server/build.gradle + server/gradle.properties + server/settings.gradle + server/pxf-api/src/main/java/org/apache/cloudberry/pxf/api/*.java + server/pxf-api/src/test/java/org/apache/cloudberry/pxf/api/**/*.java + server/pxf-hbase/src/main/java/org/apache/cloudberry/pxf/plugins/hbase/**/*.java + server/pxf-hbase/src/test/java/org/apache/cloudberry/pxf/plugins/hbase/**/*.java + server/pxf-hdfs/src/main/java/org/apache/cloudberry/pxf/plugins/hdfs/**/*.java + server/pxf-hdfs/src/test/java/org/apache/cloudberry/pxf/plugins/hdfs/**/*.java + server/pxf-hive/src/main/java/org/apache/cloudberry/pxf/plugins/hive/**/*.java + server/pxf-hive/src/test/java/org/apache/cloudberry/pxf/plugins/hive/**/*.java + server/pxf-jdbc/src/main/java/org/apache/cloudberry/pxf/plugins/jdbc/**/*.java + server/pxf-jdbc/src/test/java/org/apache/cloudberry/pxf/plugins/jdbc/**/*.java + server/pxf-json/src/main/java/org/apache/cloudberry/pxf/plugins/json/**/*.java + server/pxf-json/src/test/java/org/apache/cloudberry/pxf/plugins/json/**/*.java + server/pxf-service/src/main/java/org/apache/cloudberry/pxf/service/**/*.java + server/pxf-service/src/test/java/org/apache/cloudberry/pxf/service/**/*.java + +Documentation Templates: + docs/content/*.html.md.erb + +Configuration Files: + server/pxf-api/src/test/resources/pxf-profiles-default.xml + server/pxf-hive/src/test/resources/pxf-profiles-default.xml + server/pxf-jdbc/src/test/resources/log4j.properties + server/pxf-json/src/test/resources/log4j.properties + server/pxf-service/src/main/resources/pxf-profiles-default.xml + server/pxf-service/src/templates/conf/pxf-profiles.xml + server/pxf-service/src/test/resources/pxf-profiles-default.xml + +CI/Test Templates: + automation/src/test/resources/templates/zk/zoo.cfg + ci/singlecluster/templates/hadoop/etc/hadoop/core-site.xml + ci/singlecluster/templates/hadoop/etc/hadoop/hdfs-site.xml + ci/singlecluster/templates/hadoop/etc/hadoop/yarn-env.sh + ci/singlecluster/templates/hbase/conf/hbase-env.sh + ci/singlecluster/templates/hbase/conf/hbase-site.xml + ci/singlecluster/templates/ranger/install.properties + ci/singlecluster/templates/tez/conf/tez-site.xml + ci/singlecluster/templates/usersync/install.properties + ======================================================================= This product bundles Gradle Wrapper, which is licensed under diff --git a/Makefile b/Makefile index caaac20ed..56c651e8d 100644 --- a/Makefile +++ b/Makefile @@ -84,6 +84,9 @@ install-server: stage: rm -rf build/stage + make -C $(SOURCE_EXTENSION_DIR) stage + make -C cli stage + make -C server stage ifneq ($(SKIP_EXTERNAL_TABLE_PACKAGE_REASON),) @echo "Skipping staging FDW extension because $(SKIP_EXTERNAL_TABLE_PACKAGE_REASON)" $(eval PXF_MODULES := $(filter-out external-table,$(PXF_MODULES))) @@ -100,7 +103,7 @@ endif cp -a $(SOURCE_EXTENSION_DIR)/build/stage/* build/stage/$${PXF_PACKAGE_NAME} ;\ cp -a cli/build/stage/* build/stage/$${PXF_PACKAGE_NAME} ;\ cp -a server/build/stage/* build/stage/$${PXF_PACKAGE_NAME} ;\ - echo $$(git rev-parse --verify HEAD) > build/stage/$${PXF_PACKAGE_NAME}/pxf/commit.sha ;\ + echo $$(git rev-parse --verify HEAD) > build/stage/$${PXF_PACKAGE_NAME}/commit.sha ;\ cp package/install_binary build/stage/$${PXF_PACKAGE_NAME}/install_component ;\ echo "===> PXF staging is complete <===" @@ -116,15 +119,14 @@ gppkg-rpm: rpm GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) cat package/gppkg_spec.yml.in | sed "s,#arch,`arch`," | sed "s,#os,$(TEST_OS)," | sed "s,#gppkgver,1.0," | sed "s,#gpver,1," > gppkg/gppkg_spec.yml find build/rpmbuild/RPMS -name pxf-cbdb$(GP_MAJOR_VERSION)-*.rpm -exec cp {} gppkg/ \; - source $(GPHOME)/greenplum_path.sh && gppkg --build gppkg + source $(GPHOME)/greenplum_path.sh || source $(GPHOME)/cloudberry-env.sh && gppkg --build gppkg -rpm: - make -C $(SOURCE_EXTENSION_DIR) stage - make -C cli stage - make -C server stage +rpm: stage set -e ;\ GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) ;\ - PXF_FULL_VERSION=$${PXF_VERSION} ;\ + GP_BUILD_ARCH=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/build_arch) ;\ + PXF_PACKAGE_NAME=pxf-cbdb$${GP_MAJOR_VERSION}-${PXF_VERSION}-$${GP_BUILD_ARCH} ;\ + PXF_FULL_VERSION=${PXF_VERSION} ;\ PXF_MAIN_VERSION=$$(echo $${PXF_FULL_VERSION} | sed -E 's/(-SNAPSHOT|-rc[0-9]+)$$//') ;\ if [[ $${PXF_FULL_VERSION} == *"-SNAPSHOT" ]]; then \ PXF_RELEASE=SNAPSHOT; \ @@ -135,7 +137,7 @@ rpm: fi ;\ rm -rf build/rpmbuild ;\ mkdir -p build/rpmbuild/{BUILD,RPMS,SOURCES,SPECS} ;\ - cp -a build/stage/$${PXF_PACKAGE_NAME}/pxf/* build/rpmbuild/SOURCES ;\ + cp -a build/stage/$${PXF_PACKAGE_NAME}/* build/rpmbuild/SOURCES ;\ cp package/*.spec build/rpmbuild/SPECS/ ;\ rpmbuild \ --define "_topdir $${PWD}/build/rpmbuild" \ @@ -150,7 +152,7 @@ rpm-tar: rpm mkdir -p build/{stagerpm,distrpm} set -e ;\ GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) ;\ - PXF_RPM_FILE=$$(find build/rpmbuild/RPMS -name pxf-cbdb$${GP_MAJOR_VERSION}-*.rpm) ;\ + PXF_RPM_FILE=$$(find build/rpmbuild/RPMS -name cloudberry-pxf-*.rpm) ;\ PXF_RPM_BASE_NAME=$$(basename $${PXF_RPM_FILE%*.rpm}) ;\ PXF_PACKAGE_NAME=$${PXF_RPM_BASE_NAME%.*} ;\ mkdir -p build/stagerpm/$${PXF_PACKAGE_NAME} ;\ @@ -165,24 +167,24 @@ deb: stage PXF_MAIN_VERSION=$${PXF_VERSION//-SNAPSHOT/} ;\ if [[ $${PXF_VERSION} == *"-SNAPSHOT" ]]; then PXF_RELEASE=SNAPSHOT; else PXF_RELEASE=1; fi ;\ rm -rf build/debbuild ;\ - mkdir -p build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION}/$(TARGET_EXTENSION_DIR) ;\ - cp -a $(SOURCE_EXTENSION_DIR)/build/stage/* build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION}/$(TARGET_EXTENSION_DIR) ;\ - cp -a cli/build/stage/pxf/* build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION} ;\ - cp -a server/build/stage/pxf/* build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION} ;\ - echo $$(git rev-parse --verify HEAD) > build/debbuild/usr/local/pxf-cbdb$${GP_MAJOR_VERSION}/commit.sha ;\ + mkdir -p build/debbuild/usr/local/cloudberry-pxf/$(TARGET_EXTENSION_DIR) ;\ + cp -a $(SOURCE_EXTENSION_DIR)/build/stage/* build/debbuild/usr/local/cloudberry-pxf/ ;\ + cp -a cli/build/stage/* build/debbuild/usr/local/cloudberry-pxf ;\ + cp -a server/build/stage/* build/debbuild/usr/local/cloudberry-pxf ;\ + echo $$(git rev-parse --verify HEAD) > build/debbuild/usr/local/cloudberry-pxf/commit.sha ;\ mkdir build/debbuild/DEBIAN ;\ cp -a package/DEBIAN/* build/debbuild/DEBIAN/ ;\ - sed -i -e "s/%VERSION%/$${PXF_MAIN_VERSION}-$${PXF_RELEASE}/" -e "s/%MAINTAINER%/${VENDOR}/" build/debbuild/DEBIAN/control ;\ + sed -i -e "s/%VERSION%/$${PXF_MAIN_VERSION}-$${PXF_RELEASE}/" -e "s/%MAINTAINER%/${VENDOR}/" -e "s/%ARCH%/$$(dpkg --print-architecture)/" build/debbuild/DEBIAN/control ;\ dpkg-deb --build build/debbuild ;\ - mv build/debbuild.deb build/pxf-cbdb$${GP_MAJOR_VERSION}-$${PXF_MAIN_VERSION}-$${PXF_RELEASE}-ubuntu18.04-amd64.deb + mv build/debbuild.deb build/cloudberry-pxf-$${PXF_MAIN_VERSION}-$${PXF_RELEASE}-$$(lsb_release -si | tr '[:upper:]' '[:lower:]')$$(lsb_release -sr)-$$(dpkg --print-architecture).deb deb-tar: deb rm -rf build/{stagedeb,distdeb} mkdir -p build/{stagedeb,distdeb} set -e ;\ GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) ;\ - PXF_DEB_FILE=$$(find build/ -name pxf-cbdb$${GP_MAJOR_VERSION}*.deb) ;\ - PXF_PACKAGE_NAME=$$(dpkg-deb --field $${PXF_DEB_FILE} Package)-$$(dpkg-deb --field $${PXF_DEB_FILE} Version)-ubuntu18.04 ;\ + PXF_DEB_FILE=$$(find build/ -name cloudberry-pxf*.deb) ;\ + PXF_PACKAGE_NAME=$$(dpkg-deb --field $${PXF_DEB_FILE} Package)-$$(dpkg-deb --field $${PXF_DEB_FILE} Version)-$$(lsb_release -si | tr '[:upper:]' '[:lower:]')$$(lsb_release -rs) ;\ mkdir -p build/stagedeb/$${PXF_PACKAGE_NAME} ;\ cp $${PXF_DEB_FILE} build/stagedeb/$${PXF_PACKAGE_NAME} ;\ cp package/install_deb build/stagedeb/$${PXF_PACKAGE_NAME}/install_component ;\ diff --git a/README.md b/README.md index 26bbc1c4c..e580cd5a7 100755 --- a/README.md +++ b/README.md @@ -1,8 +1,12 @@ # Platform Extension Framework (PXF) for Apache Cloudberry (Incubating) -[![Slack](https://img.shields.io/badge/Join_Slack-6a32c9)](https://communityinviter.com/apps/cloudberrydb/welcome) -[![Twitter Follow](https://img.shields.io/twitter/follow/cloudberrydb)](https://twitter.com/cloudberrydb) -[![Website](https://img.shields.io/badge/Visit%20Website-eebc46)](https://cloudberry.apache.org) +[![Website](https://img.shields.io/badge/Website-eebc46)](https://cloudberry.apache.org) +[![Documentation](https://img.shields.io/badge/Documentation-acd94a)](https://cloudberry.apache.org/docs) +[![Slack](https://img.shields.io/badge/Join_Slack-6a32c9)](https://inviter.co/apache-cloudberry) +[![Twitter Follow](https://img.shields.io/twitter/follow/ASFCloudberry)](https://twitter.com/ASFCloudberry) +[![WeChat](https://img.shields.io/badge/WeChat-eebc46)](https://cloudberry.apache.org/community/wechat) +[![Youtube](https://img.shields.io/badge/Youtube-gebc46)](https://youtube.com/@ApacheCloudberry) +[![GitHub Discussions](https://img.shields.io/github/discussions/apache/cloudberry)](https://github.com/apache/cloudberry/discussions) --- @@ -12,7 +16,7 @@ PXF is an extensible framework that allows a distributed database like Greenplum PXF includes built-in connectors for accessing data that exists inside HDFS files, Hive tables, HBase tables, JDBC-accessible databases and more. Users can also create their own connectors to other data storage or processing engines. -This project is forked from [greenplum/pxf](https://github.com/greenplum-db/pxf-archive) and customized for Apache Cloudberry. +This project is derived from [greenplum/pxf](https://github.com/greenplum-db/pxf-archive) and customized for Apache Cloudberry. ## Repository Contents @@ -23,20 +27,12 @@ This project is forked from [greenplum/pxf](https://github.com/greenplum-db/pxf- * `automation/` : Contains the automation and integration tests for PXF against the various datasources * `ci/` : Contains CI/CD environment and scripts (including singlecluster Hadoop environment) * `regression/` : Contains the end-to-end (integration) tests for PXF against the various datasources, utilizing the PostgreSQL testing framework `pg_regress` -* `downloads/` : An empty directory that serves as a staging location for Cloudberry RPMs for the development Docker image ## PXF Development Below are the steps to build and install PXF along with its dependencies including Cloudberry and Hadoop. -> [!Note] -> To start, ensure you have a `~/workspace` directory and have cloned the `pxf` and its prerequisites (shown below) under it. -(The name `workspace` is not strictly required but will be used throughout this guide.) - ```bash -mkdir -p ~/workspace -cd ~/workspace - git clone https://github.com/apache/cloudberry-pxf.git ``` @@ -49,22 +45,22 @@ To build PXF, you must have: Either download and install Cloudberry RPM or build Cloudberry from the source by following instructions in the [Cloudberry](https://github.com/apache/cloudberry). - Assuming you have installed Cloudberry into `/usr/local/cloudberrydb` directory, run its environment script: + Assuming you have installed Cloudberry into `/usr/local/cloudberry-db` directory, run its environment script: ``` - source /usr/local/cloudberrydb/greenplum_path.sh # For Cloudberry 2.0 - source /usr/local/cloudberrydb/cloudberry-env.sh # For Cloudberry 2.1+ + source /usr/local/cloudberry-db/greenplum_path.sh # For Cloudberry 2.0 + source /usr/local/cloudberry-db/cloudberry-env.sh # For Cloudberry 2.1+ ``` 3. JDK 1.8 or JDK 11 to compile/run Export your `JAVA_HOME`: ``` - export JAVA_HOME= + export JAVA_HOME=/usr/lib/jvm/java-11-openjdk ``` 4. Go (1.9 or later) - To install Go on CentOS, `sudo yum install go`. For other platforms, see the [Go downloads page](https://golang.org/dl/). + You can download and install Go via [Go downloads page](https://golang.org/dl/). Make sure to export your `GOPATH` and add go to your `PATH`. For example: ```shell @@ -78,46 +74,37 @@ To build PXF, you must have: go install github.com/onsi/ginkgo/ginkgo@latest ``` -5. cURL (7.29 or later): - - To install cURL devel package on CentOS 7, `sudo yum install libcurl-devel`. - - Note that CentOS 6 provides an older, unsupported version of cURL (7.19). You should install a newer version from source if you are on CentOS 6. - -### How to Build PXF +### Build PXF PXF uses Makefiles to build its components. PXF server component uses Gradle that is wrapped into the Makefile for convenience. ```bash -cd ~/workspace/pxf +cd cloudberry-pxf/ -# Compile & Test PXF +# Compile PXF make - -# Only run unit tests -make test ``` -### How to Install PXF +### Install PXF -To install PXF, first make sure that the user has sufficient permissions in the `$GPHOME` and `$PXF_HOME` directories to perform the installation. It's recommended to change ownership to match the installing user. For example, when installing PXF as user `gpadmin` under `/usr/local/cloudberrydb`: +To install PXF, first make sure that the user has sufficient permissions in the `$GPHOME` and `$PXF_HOME` directories to perform the installation. It's recommended to change ownership to match the installing user. For example, when installing PXF as user `gpadmin` under `/usr/local/cloudberry-db`: ```bash -export GPHOME=/usr/local/cloudberrydb -export PXF_HOME=/usr/local/pxf +mkdir -p /usr/local/cloudberry-pxf +export PXF_HOME=/usr/local/cloudberry-pxf export PXF_BASE=${HOME}/pxf-base -chown -R gpadmin:gpadmin "${GPHOME}" "${PXF_HOME}" -make -C ~/workspace/pxf install +chown -R gpadmin:gpadmin "${PXF_HOME}" +make install ``` NOTE: if `PXF_BASE` is not set, it will default to `PXF_HOME`, and server configurations, libraries or other configurations, might get deleted after a PXF re-install. -### How to Run PXF +### Run PXF -Ensure that PXF is in your path. This command can be added to your .bashrc +Ensure that PXF is in your path. This command can be added to your `.bashrc`: ```bash -export PATH=/usr/local/pxf/bin:$PATH +export PATH=/usr/local/cloudberry-pxf/bin:$PATH ``` Then you can prepare and start up PXF by doing the following. @@ -143,151 +130,13 @@ After PXF has been re-installed, you can restart the PXF instance using: pxf restart ``` -### How to demonstrate Hadoop Integration -In order to demonstrate end to end functionality you will need Hadoop installed. We have all the related hadoop components (hdfs, hive, hbase, zookeeper, etc) mapped into simple artifact named singlecluster. -You can [download from here](https://storage.googleapis.com/pxf-public/singlecluster-HDP.tar.gz) and untar the `singlecluster-HDP.tar.gz` file, which contains everything needed to run Hadoop. - -```bash -mv singlecluster-HDP.tar.gz ~/workspace/ -cd ~/workspace -tar xzf singlecluster-HDP.tar.gz -``` - -Create a symlink using `ln -s ~/workspace/singlecluster-HDP ~/workspace/singlecluster` and then follow the steps in [Setup Hadoop](####Setup-Hadoop). - -While PXF can run on either Java 8 or Java 11, please ensure that you are running Java 8 for hdfs, hadoop, etc. Please set your java version by seting your `JAVA_HOME` to the appropriate location. - -On a Mac, you can set your java version using `JAVA_HOME` like so: -``` -export JAVA_HOME=`/usr/libexec/java_home -v 1.8` -```` - -Initialize the default server configurations: -``` -cp ${PXF_HOME}/templates/*-site.xml ${PXF_BASE}/servers/default -``` - -### Development With Docker +## Development With Docker > [!Note] > Since the docker container will house all Single cluster Hadoop, Cloudberry and PXF, we recommend that you have at least 4 cpus and 6GB memory allocated to Docker. These settings are available under docker preferences. We provide a Docker-based development environment that includes Cloudberry, Hadoop, and PXF. See [automation/README.Docker.md](automation/README.Docker.md) for detailed instructions. -**Quick Start:** - -```bash -# Build and start the development container -docker compose -f ci/docker/pxf-cbdb-dev/ubuntu/docker-compose.yml build -docker compose -f ci/docker/pxf-cbdb-dev/ubuntu/docker-compose.yml up -d - -# Enter the container and run setup -docker exec -it pxf-cbdb-dev bash -c \ - "cd /home/gpadmin/workspace/cloudberry-pxf/ci/docker/pxf-cbdb-dev/ubuntu && ./script/entrypoint.sh" - -# Run tests -docker exec -it pxf-cbdb-dev bash -c \ - "cd /home/gpadmin/workspace/cloudberry-pxf/ci/docker/pxf-cbdb-dev/ubuntu && ./script/run_tests.sh" - -# Stop and clean up -docker compose -f ci/docker/pxf-cbdb-dev/ubuntu/docker-compose.yml down -v -``` - -#### Setup Hadoop -Hdfs will be needed to demonstrate functionality. You can choose to start additional hadoop components (hive/hbase) if you need them. - -Setup [User Impersonation](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/Superusers.html) prior to starting the hadoop components (this allows the `gpadmin` user to access hadoop data). - -The Docker development environment automatically configures Hadoop. For manual setup, see [automation/README.Docker.md](automation/README.Docker.md). - -Setup and start HDFS -```bash -pushd ~/workspace/singlecluster/bin -echo y | ./init-gphd.sh -./start-hdfs.sh -popd -``` - -Start other optional components based on your need - -```bash -pushd ~/workspace/singlecluster/bin -# Start Hive -./start-yarn.sh -./start-hive.sh - -# Start HBase -./start-zookeeper.sh -./start-hbase.sh -popd -``` - -#### Setup Minio (optional) -Minio is an S3-API compatible local storage solution. The development docker image comes with Minio software pre-installed. MinIO is automatically started by the Docker development environment. - -After the server starts, you can access Minio UI at `http://localhost:9000` from the host OS. Use `admin` for the access key and `password` for the secret key when connecting to your local Minio instance. - -To run S3 automation tests, set `PROTOCOL=minio`. If later you would like to run Hadoop HDFS tests, unset this variable with `unset PROTOCOL` command. - -#### Setup PXF - -Install PXF Server -```bash -# Install PXF -make -C ~/workspace/pxf install - -# Start PXF -export PXF_JVM_OPTS="-Xmx512m -Xms256m" -$PXF_HOME/bin/pxf start -``` - -Install PXF client (ignore if this is already done) -```bash -psql -d template1 -c "create extension pxf" -``` - -#### Run PXF Tests -All tests use a database named `pxfautomation`. -```bash -pushd ~/workspace/pxf/automation - -# Initialize default server configs using template -cp ${PXF_HOME}/templates/{hdfs,mapred,yarn,core,hbase,hive}-site.xml ${PXF_BASE}/servers/default - -# Run specific tests. Example: Hdfs Smoke Test -make TEST=HdfsSmokeTest - -# Run all tests. This will be very time consuming. -make GROUP=gpdb - -# If you wish to run test(s) against a different storage protocol set the following variable (for eg: s3) -export PROTOCOL=s3 -popd -``` - -If you see any HBase failures, try copying `pxf-hbase-*.jar` to the HBase classpath, and restart HBase: - -``` -cp ${PXF_HOME}/lib/pxf-hbase-*.jar ~/workspace/singlecluster/hbase/lib/pxf-hbase.jar -~/workspace/singlecluster/bin/stop-hbase.sh -~/workspace/singlecluster/bin/start-hbase.sh -``` - -#### Make Changes to PXF - -To deploy your changes to PXF in the development environment. - -```bash -# $PXF_HOME folder is replaced each time you make install. -# So, if you have any config changes, you may want to back those up. -$PXF_HOME/bin/pxf stop -make -C ~/workspace/pxf install -# Make any config changes you had backed up previously -rm -rf $PXF_HOME/pxf-service -yes | $PXF_HOME/bin/pxf init -$PXF_HOME/bin/pxf start -``` - ## IDE Setup (IntelliJ) - Start IntelliJ. Click "Open" and select the directory to which you cloned the `pxf` repo. @@ -311,47 +160,6 @@ no JDK set for Gradle. Just cancel and retry. It goes away the second time. - Debug the new configuration in IntelliJ - Run a query in CloudberryDB that uses PXF to debug with IntelliJ -## To run a Kerberized Hadoop Cluster - -### Requirements - -- Download bin_gpdb (from any of the pipelines) -- Download pxf_tarball (from any of the pipelines) - -These instructions allow you to run a Kerberized cluster. See [automation/README.Docker.md](automation/README.Docker.md) for detailed Kerberos setup instructions. - -```bash -docker run --rm -it \ - --privileged \ - --hostname c6401.ambari.apache.org \ - -p 5432:5432 \ - -p 5888:5888 \ - -p 8000:8000 \ - -p 8080:8080 \ - -p 8020:8020 \ - -p 9000:9000 \ - -p 9090:9090 \ - -p 50070:50070 \ - -w /home/gpadmin/workspace \ - -v ~/workspace/cbdb:/home/gpadmin/workspace/gpdb_src \ - -v ~/workspace/pxf:/home/gpadmin/workspace/pxf_src \ - -v ~/workspace/singlecluster-HDP:/home/gpadmin/workspace/singlecluster \ - -v ~/Downloads/bin_cbdb:/home/gpadmin/workspace/bin_cbdb \ - -v ~/Downloads/pxf_tarball:/home/gpadmin/workspace/pxf_tarball \ - -e CLUSTER_NAME=hdp \ - -e NODE=c6401.ambari.apache.org \ - -e REALM=AMBARI.APACHE.ORG \ - gcr.io/$PROJECT_ID/gpdb-pxf-dev/gpdb6-centos7-test-pxf-hdp2 /bin/bash - -# Inside the container, you can use the scripts in ci/docker/pxf-cbdb-dev/ubuntu/script to set up and run tests. - -echo "+----------------------------------------------+" -echo "| Kerberos admin principal: admin/admin@$REALM |" -echo "| Kerberos admin password : admin |" -echo "+----------------------------------------------+" - -su - gpadmin -``` ## Contribute diff --git a/automation/README.Docker.md b/automation/README.Docker.md index db0c3bc96..83fba9640 100644 --- a/automation/README.Docker.md +++ b/automation/README.Docker.md @@ -1,3 +1,22 @@ + + # Running Automation in Docker ## Prerequisites diff --git a/automation/README.Linux.md b/automation/README.Linux.md index 2d7e557bb..f4031c825 100644 --- a/automation/README.Linux.md +++ b/automation/README.Linux.md @@ -5,17 +5,18 @@ They are intended to be used in tandem with the information in the main README f ## Locale Setup -Automation creates a GPDB database using the `ru_RU.CP1251` locale. You can generate the required locale files with +Automation creates a Cloudberry database using the `ru_RU.CP1251` locale. You can generate the required locale files with ```sh sudo sed -i.bak -e 's/# ru_RU.CP1251.*/ru_RU.CP1251 CP1251/' /etc/locale.gen sudo locale-gen ``` -After generating the locale, restart your GPDB cluster +After generating the locale, restart your Cloudberry cluster ```sh -source $GPHOME/greenplum_path.sh +source $GPHOME/greenplum_path.sh # For Cloudberry 2.0 +source $GPHOME/cloudberry-env.sh # For Cloudberry 2.1+ gpstop -a gpstart -a ``` diff --git a/automation/README.md b/automation/README.md index c7236d026..8300d0795 100755 --- a/automation/README.md +++ b/automation/README.md @@ -130,7 +130,7 @@ Note: If you get an error saying that the jar does not exist, ensure that you ha - `src/main/java` - contains related classes and utilities for the test - `src/test/java` - contains the TestNG cases. - `sqlrepo` - contains SQL test cases. -- `src/main/java/org/greenplum/pxf/automation/components` - contains all the supported services/components with simple API abstractions. +- `src/main/java/org/apache/cloudberry/pxf/automation/components` - contains all the supported services/components with simple API abstractions. ### General Automation Architecture diff --git a/automation/pxf_regress/README.md b/automation/pxf_regress/README.md index 645cf7eab..078208069 100644 --- a/automation/pxf_regress/README.md +++ b/automation/pxf_regress/README.md @@ -4,7 +4,7 @@ `pxf_regress` is a PSQL test runner written in Go that is heavily inspired by `pg_regress`. PXF's automation test framework sets up data in external data -storage (e.g., Hadoop, Amazon S3, etc), creates Greenplum external tables to +storage (e.g., Hadoop, Amazon S3, etc), creates Cloudberry external tables to work with these data sets, and then invokes `pxf_regress` to run SQL test cases via `psql` and compare the results with expected output. Instead of matching the features of `pg_regress` exactly, this utility currently implements the @@ -36,16 +36,16 @@ small_data └── query02.sql ``` -There are no command line flags; the GPDB cluster that `pxf_regress` connects +There are no command line flags; the Cloudberry cluster that `pxf_regress` connects to can be customized with standard [Postgres environment variables][1]. ### Why not use `pg_regress`? -Ideally, PXF would re-use `pg_regress` which is included with upstream GPDB; -however, PXF supports multiple GPDB versions (currently 5, 6, & 7) with a -single code base. Differences between the GPDB major versions and the included +Ideally, PXF would re-use `pg_regress` which is included with upstream Cloudberry; +however, PXF supports multiple Cloudberry versions with a +single code base. Differences between the Cloudberry major versions and the included `pg_regress` results in non-semantically meaningful (for PXF) differences. -GPDB's version of `pg_regress` uses a utility called `gpdiff.pl` to compare +Cloudberry's version of `pg_regress` uses a utility called `gpdiff.pl` to compare actual test output with expected test output. From the description of [`gpdiff.pl`][2]: @@ -56,9 +56,9 @@ actual test output with expected test output. From the description of > single PostgreSQL instance. When `pg_regress` runs `gpdiff.pl`, it runs the version of `gpdiff.pl` that is -included with GPDB (`$($GPHOME/bin/pg_config +included with Cloudberry (`$($GPHOME/bin/pg_config --libdir)/postgresql/pgxs/src/test/regress/gpdiff.pl`) with hard-coded options -that cannot be customized. Not only is `gpdiff.pl` different across GPDB major +that cannot be customized. Not only is `gpdiff.pl` different across Cloudberry major versions, the set of options that `pg_regress` runs it with will be different across major versions. @@ -119,4 +119,4 @@ $ tree smoke/small_data ``` [1]: https://www.postgresql.org/docs/12/libpq-envars.html -[2]: https://github.com/greenplum-db/gpdb/blob/main/src/test/regress/gpdiff.pl +[2]: https://github.com/apache/cloudberry/blob/main/src/test/regress/gpdiff.pl diff --git a/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/common/ShellSystemObject.java b/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/common/ShellSystemObject.java index 3cdb1f156..e8dad0d46 100755 --- a/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/common/ShellSystemObject.java +++ b/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/common/ShellSystemObject.java @@ -57,7 +57,7 @@ public class ShellSystemObject extends BaseSystemObject { "GPHOME", "GPHD_ROOT", "GPDATA", - "MASTER_DATA_DIRECTORY", + "COORDINATOR_DATA_DIRECTORY", "PGPORT", "PGHOST", "PGDATABASE" diff --git a/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/gpdb/Gpdb.java b/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/gpdb/Gpdb.java index 201881f6b..ad04a97f6 100755 --- a/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/gpdb/Gpdb.java +++ b/automation/src/main/java/org/apache/cloudberry/pxf/automation/components/gpdb/Gpdb.java @@ -22,7 +22,7 @@ public class Gpdb extends DbSystemObject { private static final String DEFAULT_PORT = "5432"; - private static final String GREENPLUM_DATABASE_PREFIX = "Greenplum Database "; + private static final String APACHE_CLOUDBERRY_PREFIX = "Apache Cloudberry "; private static final String IF_NOT_EXISTS_OPTION = "IF NOT EXISTS"; private String sshUserName; @@ -580,8 +580,8 @@ private int determineVersion() throws Exception { res.next(); String fullVersion = res.getString(1); ReportUtils.report(report, getClass(), "Retrieved from Greenplum: [" + fullVersion + "]"); - int gpIndex = fullVersion.indexOf(GREENPLUM_DATABASE_PREFIX); // where the version prefix starts - String prefix = GREENPLUM_DATABASE_PREFIX; + int gpIndex = fullVersion.indexOf(APACHE_CLOUDBERRY_PREFIX); // where the version prefix starts + String prefix = APACHE_CLOUDBERRY_PREFIX; // Cloudberry forks print strings like: // "PostgreSQL 14.4 (Apache Cloudberry 3.0.0-devel build dev) ..." // fall back to the Cloudberry prefix if the Greenplum one is missing diff --git a/automation/src/test/resources/sut/LocalToIPAMultiNodeHadoopHA.xml b/automation/src/test/resources/sut/LocalToIPAMultiNodeHadoopHA.xml index 437ac5a36..284baf493 100644 --- a/automation/src/test/resources/sut/LocalToIPAMultiNodeHadoopHA.xml +++ b/automation/src/test/resources/sut/LocalToIPAMultiNodeHadoopHA.xml @@ -88,7 +88,7 @@ make TEST=HdfsHAFailoverTest - + diff --git a/automation/src/test/resources/sut/MultiHadoopIPAMultiNodesCluster.xml b/automation/src/test/resources/sut/MultiHadoopIPAMultiNodesCluster.xml index dc9ced06f..6b18b05c1 100644 --- a/automation/src/test/resources/sut/MultiHadoopIPAMultiNodesCluster.xml +++ b/automation/src/test/resources/sut/MultiHadoopIPAMultiNodesCluster.xml @@ -128,7 +128,7 @@ - + diff --git a/automation/src/test/resources/sut/MultiHadoopMultiNodesCluster.xml b/automation/src/test/resources/sut/MultiHadoopMultiNodesCluster.xml index 167d1507f..a6195c61f 100644 --- a/automation/src/test/resources/sut/MultiHadoopMultiNodesCluster.xml +++ b/automation/src/test/resources/sut/MultiHadoopMultiNodesCluster.xml @@ -113,7 +113,7 @@ - + diff --git a/automation/src/test/resources/sut/MultiNodesCluster.xml b/automation/src/test/resources/sut/MultiNodesCluster.xml index a0f01e564..5d3e0ff08 100644 --- a/automation/src/test/resources/sut/MultiNodesCluster.xml +++ b/automation/src/test/resources/sut/MultiNodesCluster.xml @@ -87,7 +87,7 @@ - + diff --git a/automation/src/test/resources/sut/default.xml b/automation/src/test/resources/sut/default.xml index ed24017a1..7c9c4b689 100644 --- a/automation/src/test/resources/sut/default.xml +++ b/automation/src/test/resources/sut/default.xml @@ -97,7 +97,7 @@ - + diff --git a/automation/src/test/resources/templates/gpdb/gpinitsystem_config b/automation/src/test/resources/templates/gpdb/gpinitsystem_config index bbd1b3a78..9940ce05f 100755 --- a/automation/src/test/resources/templates/gpdb/gpinitsystem_config +++ b/automation/src/test/resources/templates/gpdb/gpinitsystem_config @@ -25,14 +25,14 @@ PORT_BASE=40000 DATA_DIRECTORY=(/data/gpdb/p1 /data/gpdb/p2) #### OS-configured hostname or IP address of the master host. -MASTER_HOSTNAME=centos64-1 +COORDINATOR_HOSTNAME=centos64-1 -#### File system location where the master data directory +#### File system location where the coordinator data directory #### will be created. -MASTER_DIRECTORY=/data/gpdb/master +COORDINATOR_DIRECTORY=/data/gpdb/coordinator #### Port number for the master instance. -MASTER_PORT=5432 +COORDINATOR_PORT=5432 #### Shell utility used to connect to remote hosts. TRUSTED_SHELL=ssh diff --git a/ci/README.md b/ci/README.md deleted file mode 100644 index 2af1578a1..000000000 --- a/ci/README.md +++ /dev/null @@ -1,143 +0,0 @@ -# Concourse pipeline deployment -To facilitate pipeline maintenance, a Python utility 'deploy` -is used to generate the different pipelines for PXF main, -PXF 5x and release pipelines. It also allows the generation -of acceptance and custom pipelines for developers to use. - -The utility uses the [Jinja2](http://jinja.pocoo.org/) template -engine for Python. This allows the generation of portions of the -pipeline from common blocks of pipeline code. Logic (Python code) can -be embedded to further manipulate the generated pipeline. - -# Deploy the `pxf-build` (release) pipeline - -To deploy the build pipeline for PXF, make sure PXF main branch is currently checked-out and run this command: - -```shell script -make -C "${HOME}/workspace/pxf/concourse" build -``` - -# Deploy the `pxf-certification` (release) pipeline - -To deploy the certifcation pipeline (forward compatibility) for PXF, make sure PXF main branch is currently checked-out and run this command: - -```shell script -make -C "${HOME}/workspace/pxf/concourse" certification -``` - -# Deploy the singlecluster pipeline - -The singlecluster pipeline generates the singlecluster tarball for CDH, HDP2, -and HDP3. The generated tarballs are then published to an S3 and GCS bucket. -The produced tarballs can then be consumed in the pxf-build pipelines. - -```shell script -make -C "${HOME}/workspace/pxf/concourse" singlecluster -``` - -# Deploy the cloudbuild pipeline - -```shell script -make -C "${HOME}/workspace/pxf/concourse" cloudbuild -``` - -# Deploy the pull-request pipeline - -```shell script -make -C "${HOME}/workspace/pxf/concourse" pr -``` - -# Deploy the performance pipelines - -10G Performance pipeline: - -```shell script -make SCALE=10 -C "${HOME}/workspace/pxf/concourse" perf -``` - -You can deploy a development version of the perf pipeline by substituting the name -of your development branch into `pxf-git-branch=main`. Also, make sure to change -the name of your development pipeline (i.e. `-p dev:`). - -50G Performance pipeline: - -```shell script -make SCALE=50 -C "${HOME}/workspace/pxf/concourse" perf -``` - -500G Performance pipeline: - -```shell script -make SCALE=500 -C "${HOME}/workspace/pxf/concourse" perf -``` - -By default, these pipelines run perf on RHEL7. -If you would like to run pipelines using RHEL8, please include `REDHAT_MAJOR_VERSION=8` to the command. -Ex: `make SCALE=10 REDHAT_MAJOR_VERSION=8 -C "${HOME}/workspace/pxf/concourse" perf` - -# Deploy development PXF release pipelines - -The dev release pipeline performs most functions of the `pxf-build` release pipeline except for the tagging and bumping of the build version. - -To deploy dev release pipeline, use: - -```shell -make -C "${HOME}/workspace/pxf/concourse" dev-release -``` - -# Deploy development PXF pipelines - -The dev pipeline is an abbreviated version of the `pxf-build` pipeline. - -To deploy dev pipeline against gpdb 5X_STABLE and 6X_STABLE branches, use: - -```shell -make -C "${HOME}/workspace/pxf/concourse" dev -``` - -To deploy multi-node dev pipeline, you can specify the following options -* `MULTINODE_EL7=` for EL7 -* `MULTINODE_EL8=` for EL8 -* `MULTINODE_EL9=` for EL9 -* `MULTINODE_NO_IMPERSONATION=` for EL7, which will also run CLI tests - -```shell -MULTINODE_EL7=true make -C "${HOME}/workspace/pxf/concourse" dev -``` - -This command will automatically point the pipeline at your currently checked-out branch of PXF. - -# Deploy Longevity Testing PXF pipeline -The longevity testing pipeline is designed to work off a PXF tag that needs to be provided as a parameter when -creating the pipeline. The generated pipeline compiles PXF, creates a Greenplum CCP cluster and 2 secure dataproc clusters -and runs a multi-cluster security test every 15 minutes. CCP cluster is set with expiration time of more than 6 months, so -it needs to be cleaned manually and so do the dataproc clusters. - -```shell -YOUR_TAG= make -C "${HOME}/workspace/pxf/concourse" longevity -``` - -## Uploading a new Apache Maven 3 version - -The CI pipelines for PXF run automation tests using Apache Maven 3.x. Instead of downloading this directly from the Apache -mirrors or Apache archive, we store a copy in Google Cloud Storage to use when we create our images in Cloudbuild. -Typically, we will not be updating these values very often. However, if we need to upload a new version of Maven, you -can use a snippet like this one to download and then upload to GCS. - -```bash -./scripts/download-maven-from-apache-mirror.sh -gcloud storage cp ../downloads/apache-maven--bin.tar.gz gs://data-gpdb-ud-pxf-build-resources/apache-maven - -# Example for Apache Maven 3.9.2 -./scripts/download-spark-from-apache-mirror.sh 3.9.2 -gcloud storage cp ../downloads/apache-maven-3.9.2-bin.tar.gz gs://data-gpdb-ud-pxf-build-resources/apache-maven - -# Example for Apache Maven 3 Latest -$ ./scripts/download-spark-from-apache-mirror.sh latest -> Looking for latest maven-3 version... -> Latest maven version determined to be: 3.9.3 -> Would you like to proceed (y/n)? y - -gcloud storage cp ../downloads/apache-maven-3.9.3-bin.tar.gz gs://data-gpdb-ud-pxf-build-resources/apache-maven - -``` diff --git a/ci/docker/pxf-cbdb-dev/ubuntu/script/entrypoint_kerberos.sh b/ci/docker/pxf-cbdb-dev/ubuntu/script/entrypoint_kerberos.sh index f64fabeea..52a26f351 100755 --- a/ci/docker/pxf-cbdb-dev/ubuntu/script/entrypoint_kerberos.sh +++ b/ci/docker/pxf-cbdb-dev/ubuntu/script/entrypoint_kerberos.sh @@ -35,7 +35,7 @@ ADMIN_PASS=${ADMIN_PASS:-AdminPass@123} PXF_BASE=${PXF_BASE:-/home/gpadmin/pxf-base} GPHOME=${GPHOME:-/usr/local/cloudberry-db} # GPDB demo master path is required by pg_hba reloads; define a default up front. -MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} +COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} # Java locations vary by arch; prefer Java 8 for Hadoop runtime and Java 11 for builds if needed. JAVA_11_ARM=/usr/lib/jvm/java-11-openjdk-arm64 @@ -844,8 +844,8 @@ configure_pg_hba() { } | awk '!seen[$0]++' | sudo tee "${tmp_pg_hba}" >/dev/null sudo mv "${tmp_pg_hba}" "${PG_HBA}" # Reload cluster so new HBA rules take effect immediately for test users. - if [ -n "${MASTER_DATA_DIRECTORY}" ] && [ -x "${GPHOME}/bin/pg_ctl" ]; then - sudo -u gpadmin env MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/pg_ctl" reload -D "${MASTER_DATA_DIRECTORY}" >/dev/null 2>&1 || true + if [ -n "${COORDINATOR_DATA_DIRECTORY}" ] && [ -x "${GPHOME}/bin/pg_ctl" ]; then + sudo -u gpadmin env COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/pg_ctl" reload -D "${COORDINATOR_DATA_DIRECTORY}" >/dev/null 2>&1 || true fi } @@ -875,7 +875,7 @@ ensure_gpdb_databases() { sudo -u gpadmin env ${env_path} "${createdb_bin}" "${conn_flags[@]}" -E UTF8 pxfautomation_encoding >/dev/null 2>&1 || true fi - sudo -u gpadmin env MASTER_DATA_DIRECTORY="${mdd}" GPHOME="${gphome}" "${gphome}/bin/pg_ctl" reload -D "${mdd}" >/dev/null 2>&1 || true + sudo -u gpadmin env COORDINATOR_DATA_DIRECTORY="${mdd}" GPHOME="${gphome}" "${gphome}/bin/pg_ctl" reload -D "${mdd}" >/dev/null 2>&1 || true } verify_security_mode() { @@ -1074,7 +1074,7 @@ init_test_env() { export PGPORT=${PGPORT:-7000} export PGDATABASE=${PGDATABASE:-pxfautomation} export PGUSER=${PGUSER:-gpadmin} - export MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} + export COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} export GPHOME=${GPHOME:-/usr/local/cloudberry-db} export PATH=/usr/local/bin:${GPHOME}/bin:${PATH} export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/home/gpadmin/workspace/singlecluster/hadoop/etc/hadoop} @@ -1137,19 +1137,19 @@ EOS pgrep -f sshd >/dev/null 2>&1 || sudo service ssh start >/dev/null 2>&1 || true if ! pgrep -f "${GPHOME}/bin/postgres" >/dev/null 2>&1; then - sudo -u gpadmin env MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/gpstart" -a >/dev/null 2>&1 || true + sudo -u gpadmin env COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/gpstart" -a >/dev/null 2>&1 || true fi if [ -f "${PG_HBA}" ] && ! grep -q "mdw/32 trust" "${PG_HBA}"; then sed -i '1ihost all all mdw/32 trust' "${PG_HBA}" || echo "host all all mdw/32 trust" | sudo tee -a "${PG_HBA}" >/dev/null - sudo -u gpadmin env MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/pg_ctl" reload -D "${MASTER_DATA_DIRECTORY}" >/dev/null 2>&1 || true + sudo -u gpadmin env COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/pg_ctl" reload -D "${COORDINATOR_DATA_DIRECTORY}" >/dev/null 2>&1 || true fi if [ -f "${PG_HBA}" ] && ! grep -q "172.18.0.0/16" "${PG_HBA}"; then sed -i '1ihost all all 172.18.0.0/16 trust' "${PG_HBA}" || echo "host all all 172.18.0.0/16 trust" | sudo tee -a "${PG_HBA}" >/dev/null - sudo -u gpadmin env MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/pg_ctl" reload -D "${MASTER_DATA_DIRECTORY}" >/dev/null 2>&1 || true + sudo -u gpadmin env COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY} GPHOME=${GPHOME} "${GPHOME}/bin/pg_ctl" reload -D "${COORDINATOR_DATA_DIRECTORY}" >/dev/null 2>&1 || true fi sudo -u gpadmin env PGHOST=${PGHOST} PGPORT=${PGPORT} PGUSER=${PGUSER} "${GPHOME}/bin/createdb" -T template1 pxfautomation >/dev/null 2>&1 || true sudo -u gpadmin env PGHOST=${PGHOST} PGPORT=${PGPORT} PGUSER=${PGUSER} "${GPHOME}/bin/createdb" -T template0 --encoding=WIN1251 --lc-collate=C --lc-ctype=C pxfautomation_encoding >/dev/null 2>&1 || true - ensure_gpdb_databases "${PGHOST}" "${PGPORT}" "${GPHOME}" "${MASTER_DATA_DIRECTORY}" + ensure_gpdb_databases "${PGHOST}" "${PGPORT}" "${GPHOME}" "${COORDINATOR_DATA_DIRECTORY}" for stub in pxf-pre-gpupgrade pxf-post-gpupgrade; do if [ ! -x "/usr/local/bin/${stub}" ]; then sudo tee "/usr/local/bin/${stub}" >/dev/null <<'SH' diff --git a/ci/docker/pxf-cbdb-dev/ubuntu/script/pxf-env.sh b/ci/docker/pxf-cbdb-dev/ubuntu/script/pxf-env.sh index 545885164..2743d4a26 100755 --- a/ci/docker/pxf-cbdb-dev/ubuntu/script/pxf-env.sh +++ b/ci/docker/pxf-cbdb-dev/ubuntu/script/pxf-env.sh @@ -35,7 +35,7 @@ export COMMON_JAVA_OPTS=${COMMON_JAVA_OPTS:-} # -------------------------------------------------------------------- export PGHOST=${PGHOST:-localhost} export PGPORT=${PGPORT:-7000} -export MASTER_DATA_DIRECTORY=${MASTER_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} +export COORDINATOR_DATA_DIRECTORY=${COORDINATOR_DATA_DIRECTORY:-/home/gpadmin/workspace/cloudberry/gpAux/gpdemo/datadirs/qddir/demoDataDir-1} # set cloudberry timezone utc export PGTZ=UTC diff --git a/docs/book/config.yml b/docs/book/config.yml index 361733b4e..ec750a963 100644 --- a/docs/book/config.yml +++ b/docs/book/config.yml @@ -1,4 +1,4 @@ -book_repo: greenplum-db/docs/book +book_repo: apache/cloudberry-pxf/docs/book public_host: localhost:9292 @@ -9,13 +9,13 @@ sections: subnav_template: pxf-subnav template_variables: - book_title: Greenplum Database PXF Documentation - book_title_short: Greenplum Database PXF Docs - domain_name: greenplum.org - product_link: - product_url: https://greenplum.org - support_call_to_action: Need Support? - support_link: Wiki - support_url: https://greenplum.org + book_title: Apache Cloudberry PXF Documentation + book_title_short: Apache Cloudberry PXF Docs + domain_name: cloudberry.apache.org + product_link: + product_url: https://cloudberry.apache.org + support_call_to_action: Need Support? + support_link: GitHub + support_url: https://cloudberry.apache.org broken_link_exclusions: iefix|arrowhead diff --git a/docs/content/access_hdfs.html.md.erb b/docs/content/access_hdfs.html.md.erb index 30babaa21..fd87c7090 100644 --- a/docs/content/access_hdfs.html.md.erb +++ b/docs/content/access_hdfs.html.md.erb @@ -32,7 +32,7 @@ HDFS is the primary distributed storage mechanism used by Apache Hadoop. When a Figure: PXF-to-Hadoop Architecture -![Greenplum Platform Extenstion Framework to Hadoop Architecture](graphics/pxfarch.png "Greenplum Platform Extension Framework-to-Hadoop Architecture") +![Greenplum Platform Extenstion Framework to Hadoop Architecture](graphics/pxfarch.png "Apache Cloudberry Platform Extension Framework-to-Hadoop Architecture") A PXF worker thread works on behalf of a segment instance. A worker thread uses its Greenplum Database `gp_segment_id` and the file block information described in the metadata to assign itself a specific portion of the query data. This data may reside on one or more HDFS DataNodes. diff --git a/docs/content/index.html.md.erb b/docs/content/index.html.md.erb index 480ee7e91..aa7c1cc39 100644 --- a/docs/content/index.html.md.erb +++ b/docs/content/index.html.md.erb @@ -21,7 +21,7 @@ specific language governing permissions and limitations under the License. --> -The Greenplum Platform Extension Framework (PXF) provides parallel, high throughput data access and federated queries across heterogeneous data sources via built-in connectors that map a Greenplum Database external table definition to an external data source. PXF has its roots in the Apache HAWQ project. +The Apache Cloudberry Platform Extension Framework (PXF) provides parallel, high throughput data access and federated queries across heterogeneous data sources via built-in connectors that map a Greenplum Database external table definition to an external data source. PXF has its roots in the Apache HAWQ project. - [Overview of PXF](overview_pxf.html) - [Transitioning to Apache Cloudberry](transition_to_cloudberry.html) diff --git a/docs/content/instcfg_pxf.html.md.erb b/docs/content/instcfg_pxf.html.md.erb index 4b7a0a0d8..af9a4811c 100644 --- a/docs/content/instcfg_pxf.html.md.erb +++ b/docs/content/instcfg_pxf.html.md.erb @@ -1,7 +1,7 @@ --- title: Configuring PXF --- -Your Greenplum Database deployment consists of a coordinator host, a standby coordinator host, and multiple segment hosts. After you configure the Greenplum Platform Extension Framework (PXF), you start a single PXF JVM process (PXF Service) on each Greenplum Database host. +Your Greenplum Database deployment consists of a coordinator host, a standby coordinator host, and multiple segment hosts. After you configure the Apache Cloudberry Platform Extension Framework (PXF), you start a single PXF JVM process (PXF Service) on each Greenplum Database host. PXF provides connectors to Hadoop, Hive, HBase, object stores, network file systems, and external SQL data stores. You must configure PXF to support the connectors that you plan to use. diff --git a/docs/content/intro_pxf.html.md.erb b/docs/content/intro_pxf.html.md.erb index 59c2ec7cf..6f0c44c69 100644 --- a/docs/content/intro_pxf.html.md.erb +++ b/docs/content/intro_pxf.html.md.erb @@ -2,7 +2,7 @@ title: Introduction to PXF --- -The Greenplum Platform Extension Framework (PXF) provides *connectors* that enable you to access data stored in sources external to your Greenplum Database deployment. These connectors map an external data source to a Greenplum Database *external table* definition. When you create the Greenplum Database external table, you identify the external data store and the format of the data via a *server* name and a *profile* name that you provide in the command. +The Apache Cloudberry Platform Extension Framework (PXF) provides *connectors* that enable you to access data stored in sources external to your Greenplum Database deployment. These connectors map an external data source to a Greenplum Database *external table* definition. When you create the Greenplum Database external table, you identify the external data store and the format of the data via a *server* name and a *profile* name that you provide in the command. You can query the external table via Greenplum Database, leaving the referenced data in place. Or, you can use the external table to load the data into Greenplum Database for higher performance. diff --git a/docs/content/overview_pxf.html.md.erb b/docs/content/overview_pxf.html.md.erb index a1b430958..05517852b 100644 --- a/docs/content/overview_pxf.html.md.erb +++ b/docs/content/overview_pxf.html.md.erb @@ -1,5 +1,5 @@ --- -title: Greenplum Platform Extension Framework (PXF) +title: Apache Cloudberry Platform Extension Framework (PXF) --- + The transition of the PXF project to **Apache Cloudberry (Incubating)** involves a significant rebranding effort. As part of this transition, the Java package namespace has been changed from `org.greenplum` to `org.apache.cloudberry`. This is a user-facing breaking change. If you have customized PXF configuration files in your `$PXF_BASE/conf` directory, you must manually update these files to use the new package names. diff --git a/docs/content/using_pxf.html.md.erb b/docs/content/using_pxf.html.md.erb index 96dea3335..82094676d 100644 --- a/docs/content/using_pxf.html.md.erb +++ b/docs/content/using_pxf.html.md.erb @@ -21,7 +21,7 @@ specific language governing permissions and limitations under the License. --> -The Greenplum Platform Extension Framework (PXF) implements a protocol named `pxf` that you can use to create an external table that references data in an external data store. The PXF protocol and Java service are packaged as a Greenplum Database extension. +The Apache Cloudberry Platform Extension Framework (PXF) implements a protocol named `pxf` that you can use to create an external table that references data in an external data store. The PXF protocol and Java service are packaged as a Greenplum Database extension. You must enable the PXF extension in each database in which you plan to use the framework to access external data. You must also explicitly `GRANT` permission to the `pxf` protocol to those users/roles who require access. diff --git a/external-table/Makefile b/external-table/Makefile index 55ba4d924..ae5195dcd 100644 --- a/external-table/Makefile +++ b/external-table/Makefile @@ -18,11 +18,14 @@ include $(PGXS) .PHONY: stage stage: pxf.so mkdir -p build/stage/gpextable + mkdir -p build/metadata install -c -m 755 pxf.so build/stage/gpextable/pxf.so install -c -m 644 pxf.control build/stage/gpextable/ install -c -m 644 $(DATA) build/stage/gpextable/ - @echo "cloudberry.version=$(CLB_VERSION)" > build/stage/gpextable/metadata - @echo "cloudberry.major-version=$(CLB_MAJORVERSION)" >> build/stage/gpextable/metadata + @echo "$(GP_MAJORVERSION)" > build/metadata/gp_major_version + @echo "$(shell uname -m)" > build/metadata/build_arch + @echo "cloudberry.version=$(GP_VERSION)" > build/stage/gpextable/metadata + @echo "cloudberry.major-version=$(GP_MAJORVERSION)" >> build/stage/gpextable/metadata .PHONY: clean-all clean-all: clean diff --git a/regression/README.md b/regression/README.md index e9ce59044..44e7ab8b3 100644 --- a/regression/README.md +++ b/regression/README.md @@ -14,7 +14,7 @@ Running the tests ## Pre-requisites You need a running instance of Greenplum and PXF, along with a local installation of Greenplum (to be able to use the `pg_regress` framework). -The variables `PGHOST` and `PGPORT` must be pointing at the Greenplum master node, and Greenplum environment scripts like `${GPHOME}/greenplum_path.sh` and `gpdb/gpAux/gpdemo/gpdemo-env.sh` should be sourced. +The variables `PGHOST` and `PGPORT` must be pointing at the Greenplum master node, and Greenplum environment scripts like `${GPHOME}/greenplum_path.sh` (for Cloudberry 2.0) or `${GPHOME}/cloudberry-env.sh` (for Cloudberry 2.1+) should be sourced. `pg_config` must be on your path. For data prep, the appropriate CLIs are required, as we shell out from SQL to these CLIs. These include `hdfs`, `hbase`, and `beeline`. @@ -46,8 +46,7 @@ By setting environment variables you can change the location of the Greenplum ma ### General environment variables -All the general environment variables that come from `greenplum_path.sh` and -`gpdemo-env.sh` must be set. Additionally, `PXF_BASE` must be set if different +All the general environment variables that come from `greenplum_path.sh` (for Cloudberry 2.0) or `cloudberry-env.sh` (for Cloudberry 2.1+) must be set. Additionally, `PXF_BASE` must be set if different from `PXF_HOME`. * `PXF_TEST_DEBUG`: set to anything to prevent deletion of data, and to run `pg_regress` in debug mode (optional) diff --git a/server/pxf-service/src/scripts/pxf b/server/pxf-service/src/scripts/pxf index 0a4229833..5e3f09054 100755 --- a/server/pxf-service/src/scripts/pxf +++ b/server/pxf-service/src/scripts/pxf @@ -217,7 +217,7 @@ function doHelp() { restart restart the local PXF server instance (not supported for cluster) status show the status of the local PXF server instance version show the version of PXF server - register install PXF extension under \$GPHOME (useful after upgrades of Greenplum server) + register install PXF extension under \$GPHOME (useful after upgrades of Cloudberry server) prepare prepares a new base directory specified by the \$PXF_BASE environment variable. It creates the servers, logs, lib, keytabs, and run directories inside \$PXF_BASE and copies configuration files. @@ -253,11 +253,11 @@ function doReset() { function installExtensions() { if [[ -d ${parent_script_dir}/gpextable ]]; then if [[ -z "${GPHOME}" ]]; then - echoYellow 'WARNING: environment variable GPHOME is not set, skipping install of Greenplum External Table PXF Extension' - elif [[ ! -f ${GPHOME}/greenplum_path.sh ]]; then - echoYellow "WARNING: environment variable GPHOME (${GPHOME}) must be set to a valid Greenplum installation, skipping install of Greenplum External Table PXF Extension'" + echoYellow 'WARNING: environment variable GPHOME is not set, skipping install of Cloudberry External Table PXF Extension' + elif [[ ! -f ${GPHOME}/greenplum_path.sh && ! -f ${GPHOME}/cloudberry-env.sh ]]; then + echoYellow "WARNING: environment variable GPHOME (${GPHOME}) must be set to a valid Cloudberry installation, skipping install of Cloudberry External Table PXF Extension'" else - echoGreen "Installing Greenplum External Table PXF Extension into '${GPHOME}'" + echoGreen "Installing Cloudberry External Table PXF Extension into '${GPHOME}'" local target_control_file="${GPHOME}/share/postgresql/extension/pxf.control" install --verbose --mode=0644 "${parent_script_dir}/gpextable/pxf.control" "${target_control_file}" || fail "cannot install pxf.control to '${target_control_file}'" @@ -265,11 +265,11 @@ function installExtensions() { fi if [[ -d ${parent_script_dir}/fdw ]]; then if [[ -z "${GPHOME}" ]]; then - echoYellow 'WARNING: environment variable GPHOME is not set, skipping install of Greenplum Foreign Data Wrapper PXF Extension' - elif [[ ! -f ${GPHOME}/greenplum_path.sh ]]; then - echoYellow "WARNING: environment variable GPHOME (${GPHOME}) must be set to a valid Greenplum installation, skipping install of Greenplum Foreign Data Wrapper PXF Extension'" + echoYellow 'WARNING: environment variable GPHOME is not set, skipping install of Cloudberry Foreign Data Wrapper PXF Extension' + elif [[ ! -f ${GPHOME}/greenplum_path.sh && ! -f ${GPHOME}/cloudberry-env.sh ]]; then + echoYellow "WARNING: environment variable GPHOME (${GPHOME}) must be set to a valid Cloudberry installation, skipping install of Cloudberry Foreign Data Wrapper PXF Extension'" else - echoGreen "Installing Greenplum Foreign Data Wrapper PXF Extension into '${GPHOME}'" + echoGreen "Installing Cloudberry Foreign Data Wrapper PXF Extension into '${GPHOME}'" local target_control_file="${GPHOME}/share/postgresql/extension/pxf_fdw.control" install --verbose --mode=0644 "${parent_script_dir}/fdw/pxf_fdw.control" "${target_control_file}" || fail "cannot install pxf_fdw.control to '${target_control_file}'" diff --git a/server/pxf-service/src/scripts/pxf-post-gpupgrade b/server/pxf-service/src/scripts/pxf-post-gpupgrade index 5f017da43..59aa3ad1e 100755 --- a/server/pxf-service/src/scripts/pxf-post-gpupgrade +++ b/server/pxf-service/src/scripts/pxf-post-gpupgrade @@ -56,24 +56,24 @@ EOF metadata_file="${PXF_HOME}/gpextable/metadata" pxf_gpdb_major_version="" if [[ -f "${metadata_file}" ]]; then - pxf_gpdb_major_version="$(awk 'BEGIN { FS = \"=\" } /cloudberry.major-version/{ print $2 }' \"${metadata_file}\")" + pxf_gpdb_major_version="$(awk 'BEGIN { FS = "=" } /cloudberry.major-version/{ print $2 }' "${metadata_file}")" else echo "WARNING: metadata file '${metadata_file}' not found; skipping PXF/GPDB compatibility check" >>"${log_file}" fi gp_version="$(psql --no-align --tuples-only --command "SHOW server_version")" pxf_version="$(cat "${PXF_HOME}"/version)" -echo "PXF ${pxf_version} compiled against GPDB major version '${pxf_gpdb_major_version}'" >>"${log_file}" -echo "Running GPDB cluster is version '${gp_version}'" >>"${log_file}" +echo "PXF ${pxf_version} compiled against Cloudberry major version '${pxf_gpdb_major_version}'" >>"${log_file}" +echo "Running Cloudberry cluster is version '${gp_version}'" >>"${log_file}" if [[ -n "${pxf_gpdb_major_version}" && "${pxf_gpdb_major_version}" != "${gp_version%%.*}" ]]; then - echo "ERROR: This version of PXF only works with GPDB ${pxf_gpdb_major_version}+ but the targeted GPDB cluster is ${gp_version}" | tee -a "${log_file}" + echo "ERROR: This version of PXF only works with Cloudberry ${pxf_gpdb_major_version}+ but the targeted Cloudberry cluster is ${gp_version}" | tee -a "${log_file}" exit 1 fi -master_data_dir_query="SELECT datadir FROM pg_catalog.gp_segment_configuration WHERE dbid = 1" -export MASTER_DATA_DIRECTORY="${MASTER_DATA_DIRECTORY:-$(psql --no-align --tuples-only --command "${master_data_dir_query}")}" -echo "GPDB master data directory is '${MASTER_DATA_DIRECTORY}'" >>"${log_file}" +coordinator_data_dir_query="SELECT datadir FROM pg_catalog.gp_segment_configuration WHERE dbid = 1" +export COORDINATOR_DATA_DIRECTORY="${COORDINATOR_DATA_DIRECTORY:-$(psql --no-align --tuples-only --command "${coordinator_data_dir_query}")}" +echo "Cloudberry coordinator data directory is '${COORDINATOR_DATA_DIRECTORY}'" >>"${log_file}" if [[ -d "${PXF_HOME}/gpextable" ]]; then PXF_HOME_REGEX="(.*:)*\/gpextable.*" diff --git a/server/pxf-service/src/scripts/pxf-pre-gpupgrade b/server/pxf-service/src/scripts/pxf-pre-gpupgrade index 1306aa400..c7a69535e 100755 --- a/server/pxf-service/src/scripts/pxf-pre-gpupgrade +++ b/server/pxf-service/src/scripts/pxf-pre-gpupgrade @@ -56,24 +56,24 @@ EOF metadata_file="${PXF_HOME}/gpextable/metadata" pxf_gpdb_major_version="" if [[ -f "${metadata_file}" ]]; then - pxf_gpdb_major_version="$(awk 'BEGIN { FS = \"=\" } /cloudberry.major-version/{ print $2 }' \"${metadata_file}\")" + pxf_gpdb_major_version="$(awk 'BEGIN { FS = "=" } /cloudberry.major-version/{ print $2 }' "${metadata_file}")" else - echo "WARNING: metadata file '${metadata_file}' not found; skipping PXF/GPDB compatibility check" >>"${log_file}" + echo "WARNING: metadata file '${metadata_file}' not found; skipping PXF/Cloudberry compatibility check" >>"${log_file}" fi gp_version="$(psql --no-align --tuples-only --command "SHOW server_version")" pxf_version="$(cat "${PXF_HOME}"/version)" -echo "PXF ${pxf_version} compiled against GPDB major version '${pxf_gpdb_major_version}'" >>"${log_file}" -echo "Running GPDB cluster is version '${gp_version}'" >>"${log_file}" +echo "PXF ${pxf_version} compiled against Cloudberry major version '${pxf_gpdb_major_version}'" >>"${log_file}" +echo "Running Cloudberry cluster is version '${gp_version}'" >>"${log_file}" if [[ -n "${pxf_gpdb_major_version}" && "${pxf_gpdb_major_version}" != "${gp_version%%.*}" ]]; then - echo "ERROR: This version of PXF only works with GPDB ${pxf_gpdb_major_version}+ but the targeted GPDB cluster is ${gp_version}" | tee -a "${log_file}" + echo "ERROR: This version of PXF only works with Cloudberry ${pxf_gpdb_major_version}+ but the targeted Cloudberry cluster is ${gp_version}" | tee -a "${log_file}" exit 1 fi -master_data_dir_query="SELECT datadir FROM pg_catalog.gp_segment_configuration WHERE dbid = 1" -export MASTER_DATA_DIRECTORY="${MASTER_DATA_DIRECTORY:-$(psql --no-align --tuples-only --command "${master_data_dir_query}")}" -echo "GPDB master data directory is '${MASTER_DATA_DIRECTORY}'" >>"${log_file}" +coordinator_data_dir_query="SELECT datadir FROM pg_catalog.gp_segment_configuration WHERE dbid = 1" +export COORDINATOR_DATA_DIRECTORY="${COORDINATOR_DATA_DIRECTORY:-$(psql --no-align --tuples-only --command "${coordinator_data_dir_query}")}" +echo "Cloudberry coordinator data directory is '${COORDINATOR_DATA_DIRECTORY}'" >>"${log_file}" if [[ -d "${PXF_HOME}/gpextable" ]]; then PXF_HOME_REGEX="(.*:)*\/gpextable.*" From 618128ebfb0586193637bb9ec3c5fbb50980638b Mon Sep 17 00:00:00 2001 From: Dianjin Wang Date: Mon, 9 Feb 2026 14:54:07 +0800 Subject: [PATCH 2/2] Package: update rpm and deb package files Main changes are included: * Rename package from pxf-gpX to cloudberry-pxf * Update installation paths to /usr/local/cloudberry-pxf-[VERSION] * Remove legacy pxf-gp7.spec and pxf-cbdb1.spec files * Update DEBIAN package control files with new naming * Standardize package configuration based on cloudberry-pxf.spec --- Makefile | 6 +-- package/DEBIAN/conffiles | 8 ++-- package/DEBIAN/control | 6 +-- package/DEBIAN/postinst | 4 +- package/DEBIAN/prerm | 6 +-- package/README.md | 50 +++++++++++------------ package/cloudberry-pxf.spec | 2 - package/install_binary | 12 +++--- package/install_deb | 4 +- package/install_rpm | 4 +- package/pxf-cbdb1.spec | 79 ------------------------------------- package/pxf-gp7.spec | 79 ------------------------------------- 12 files changed, 50 insertions(+), 210 deletions(-) delete mode 100644 package/pxf-cbdb1.spec delete mode 100644 package/pxf-gp7.spec diff --git a/Makefile b/Makefile index 56c651e8d..e62209da1 100644 --- a/Makefile +++ b/Makefile @@ -98,7 +98,7 @@ endif set -e ;\ GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) ;\ GP_BUILD_ARCH=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/build_arch) ;\ - PXF_PACKAGE_NAME=pxf-cbdb$${GP_MAJOR_VERSION}-$${PXF_VERSION}-$${GP_BUILD_ARCH} ;\ + PXF_PACKAGE_NAME=pxf-cloudberry$${GP_MAJOR_VERSION}-$${PXF_VERSION}-$${GP_BUILD_ARCH} ;\ mkdir -p build/stage/$${PXF_PACKAGE_NAME} ;\ cp -a $(SOURCE_EXTENSION_DIR)/build/stage/* build/stage/$${PXF_PACKAGE_NAME} ;\ cp -a cli/build/stage/* build/stage/$${PXF_PACKAGE_NAME} ;\ @@ -118,14 +118,14 @@ gppkg-rpm: rpm mkdir -p gppkg/deps GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) cat package/gppkg_spec.yml.in | sed "s,#arch,`arch`," | sed "s,#os,$(TEST_OS)," | sed "s,#gppkgver,1.0," | sed "s,#gpver,1," > gppkg/gppkg_spec.yml - find build/rpmbuild/RPMS -name pxf-cbdb$(GP_MAJOR_VERSION)-*.rpm -exec cp {} gppkg/ \; + find build/rpmbuild/RPMS -name pxf-cloudberry$(GP_MAJOR_VERSION)-*.rpm -exec cp {} gppkg/ \; source $(GPHOME)/greenplum_path.sh || source $(GPHOME)/cloudberry-env.sh && gppkg --build gppkg rpm: stage set -e ;\ GP_MAJOR_VERSION=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/gp_major_version) ;\ GP_BUILD_ARCH=$$(cat $(SOURCE_EXTENSION_DIR)/build/metadata/build_arch) ;\ - PXF_PACKAGE_NAME=pxf-cbdb$${GP_MAJOR_VERSION}-${PXF_VERSION}-$${GP_BUILD_ARCH} ;\ + PXF_PACKAGE_NAME=pxf-cloudberry$${GP_MAJOR_VERSION}-${PXF_VERSION}-$${GP_BUILD_ARCH} ;\ PXF_FULL_VERSION=${PXF_VERSION} ;\ PXF_MAIN_VERSION=$$(echo $${PXF_FULL_VERSION} | sed -E 's/(-SNAPSHOT|-rc[0-9]+)$$//') ;\ if [[ $${PXF_FULL_VERSION} == *"-SNAPSHOT" ]]; then \ diff --git a/package/DEBIAN/conffiles b/package/DEBIAN/conffiles index 622d6eba5..a2f459ca8 100644 --- a/package/DEBIAN/conffiles +++ b/package/DEBIAN/conffiles @@ -1,4 +1,4 @@ -/usr/local/pxf-gp6/conf/pxf-application.properties -/usr/local/pxf-gp6/conf/pxf-env.sh -/usr/local/pxf-gp6/conf/pxf-log4j2.xml -/usr/local/pxf-gp6/conf/pxf-profiles.xml +/usr/local/cloudberry-pxf/conf/pxf-application.properties +/usr/local/cloudberry-pxf/conf/pxf-env.sh +/usr/local/cloudberry-pxf/conf/pxf-log4j2.xml +/usr/local/cloudberry-pxf/conf/pxf-profiles.xml diff --git a/package/DEBIAN/control b/package/DEBIAN/control index e3c10ee76..81a604a2f 100644 --- a/package/DEBIAN/control +++ b/package/DEBIAN/control @@ -1,5 +1,5 @@ -Package: pxf-gp6 +Package: cloudberry-pxf Version: %VERSION% -Architecture: amd64 +Architecture: %ARCH% Maintainer: %MAINTAINER% -Description: Greenplum PXF framework for external data access +Description: Apache Cloudberry PXF (Platform Extension Framework) for advanced data access diff --git a/package/DEBIAN/postinst b/package/DEBIAN/postinst index 8d38c996c..35578858e 100755 --- a/package/DEBIAN/postinst +++ b/package/DEBIAN/postinst @@ -1,4 +1,4 @@ #!/bin/sh -sed -i "s|directory =.*|directory = '/usr/local/pxf-gp6/gpextable/'|g" /usr/local/pxf-gp6/gpextable/pxf.control -sed -i "s|module_pathname =.*|module_pathname = '/usr/local/pxf-gp6/gpextable/pxf'|g" /usr/local/pxf-gp6/gpextable/pxf.control \ No newline at end of file +sed -i "s|directory =.*|directory = '/usr/local/cloudberry-pxf/gpextable/'|g" "/usr/local/cloudberry-pxf/gpextable/pxf.control" +sed -i "s|module_pathname =.*|module_pathname = '/usr/local/cloudberry-pxf/gpextable/pxf'|g" "/usr/local/cloudberry-pxf/gpextable/pxf.control" \ No newline at end of file diff --git a/package/DEBIAN/prerm b/package/DEBIAN/prerm index 1ec74420e..1c3d2d085 100755 --- a/package/DEBIAN/prerm +++ b/package/DEBIAN/prerm @@ -1,5 +1,5 @@ #!/bin/sh -rm -f /usr/local/pxf-gp6/conf/pxf-private.classpath -rm -rf /usr/local/pxf-gp6/pxf-service -rm -rf /usr/local/pxf-gp6/run +rm -f /usr/local/cloudberry-pxf/conf/pxf-private.classpath +rm -rf /usr/local/cloudberry-pxf/pxf-service +rm -rf /usr/local/cloudberry-pxf/run diff --git a/package/README.md b/package/README.md index 898ea8b1e..6eaa74e81 100644 --- a/package/README.md +++ b/package/README.md @@ -1,60 +1,60 @@ PXF Packaging ============ -PXF consists of 3 groups of artifacts, each developed using a different underlying technology: +Apache Cloudberry PXF (Platform Extension Framework) consists of 3 groups of artifacts, each developed using a different underlying technology: -* Greenplum extension -- written in C; when built, produces a `pxf.so` library and configuration files +* Apache Cloudberry extension -- written in C; when built, produces a `pxf.so` library and configuration files * PXF Server -- written in Java; when built, produces a `pxf.war` file, Tomcat server, dependent JAR files, templates and scripts * Script Cluster Plugin -- written in Go; when built, produces a `pxf-cli` executable -The PXF build system can create an RPM package on CentOs platform and a DEB package on Ubuntu platform, -respectively. PXF compiles against and generates a different package for every major Greenplum version. +The PXF build system can create an RPM package on CentOS platform and a DEB package on Ubuntu platform, +respectively. PXF compiles against and generates packages for Apache Cloudberry. -For example, `pxf-gp5-1.2.3-1.el7.x86_64.rpm` represents an RPM package of PXF version 1.2.3 intended to work with -Greenplum 5 on Centos / Redhat 7 operating systems. +For example, `cloudberry-pxf-1.2.3-1.el7.x86_64.rpm` represents an RPM package of PXF version 1.2.3 intended to work with +Apache Cloudberry on CentOS / Red Hat 7 operating systems. ## PXF RPM specification -On Centos platforms PXF product is packaged as an RPM. The specification on how to build the RPM is provided by the -`pxf-gpX.spec` files in this directory. The following key design decisions were made: +On CentOS platforms PXF product is packaged as an RPM. The specification on how to build the RPM is provided by the +`cloudberry-pxf.spec` file in this directory. The following key design decisions were made: -* the name of the RPM package is `pxf-gpX`, where X is the major Greenplum version (e.g. `pxf-gp5`, `pxf-gp6`) -* to install a newer RPM package for the same Greenplum major release, a user will have to upgrade the PXF RPM -* the RPM installs PXF server into `/usr/local/pxf-gpX` directory (e.g. `/usr/local/pxf-gp6`) +* the name of the RPM package is `cloudberry-pxf` +* to install a newer RPM package, a user will have to upgrade the PXF RPM +* the RPM installs PXF server into `/usr/local/cloudberry-pxf-[VERSION]` directory (e.g. `/usr/local/cloudberry-pxf-1.2.3`) * the RPM is relocatable, a user can specify --prefix option when installing the RPM to install the server into another directory -* the PXF greenplum extension is initially installed by RPM alongside the PXF server and is not initially active -* the PXF greenplum extension is copied into Greenplum install location during `pxf init` command issued by a user after the install +* the PXF Apache Cloudberry extension is initially installed by RPM alongside the PXF server and is not initially active +* the PXF Apache Cloudberry extension is copied into Cloudberry install location during `pxf init` command issued by a user after the install * the PXF RPM version number follows 3-number semantic versioning and must be provided during the RPM build process * the PXF RPM release number is usually specified as `1` -* example PXF RPM names are : `pxf-gp5-1.2.3-1.el6.x86_64.rpm` and `pxf-gp5-1.2.3-1.el7.x86_64.rpm` +* example PXF RPM names are : `cloudberry-pxf-1.2.3-1.el7.x86_64.rpm` and `cloudberry-pxf-1.2.3-1.el8.x86_64.rpm` ## PXF RPM build process To build an RPM, follow these steps: 1. Install the `rpm-build` package: `sudo yum install rpm-build` -2. Install Greenplum database -3. Run `source $GPHOME/greenplum_path.sh` to configure your `PATH` to be able to find `pg_config` program +2. Install Apache Cloudberry +3. Run `source $GPHOME/greenplum_path.sh`(for Cloudberry 2.0) or `source $GPHOME/cloudberry-env.sh` (for Cloudberry 2.1+) to configure your `PATH` to be able to find `pg_config` program 4. Run `make clean rpm` from the top-level directory to build artifacts and assemble the RPM 5. The RPM will be available in `build/rpmbuild/RPMS` directory ## PXF RPM installation process To install PXF from an RPM, follow these steps: -1. Build or download PXF RPM for the corresponding major version of Greenplum. The following example will assume - that PXF version `1.2.3` will be installed to work with with Greenplum 5. -2. Decide which OS user will own the PXF installation. If PXF is installed alongside Greenplum, the user that owns the PXF -installation should either be the same as the one owning the Greenplum installation or have write privilleges to the -Greenplum installation directory. This is necessary to be able to register the PXF Greenplum extension with Greenplum. +1. Build or download PXF RPM for Apache Cloudberry. The following example will assume + that PXF version `1.2.3` will be installed to work with Apache Cloudberry. +2. Decide which OS user will own the PXF installation. If PXF is installed alongside Apache Cloudberry, the user that owns the PXF +installation should either be the same as the one owning the Cloudberry installation or have write privileges to the +Cloudberry installation directory. This is necessary to be able to register the PXF Apache Cloudberry extension with Cloudberry. 3. If a previous PXF version has been installed, stop the PXF server. -4. As a superuser, run `rpm -Uvh pxf-gp5-1.2.3-1.el7.x86_64.rpm` to install the RPM into `/usr/local/pxf-gp5` -5. As a superuser, run `chown gpadmin:gpadmin /usr/local/pxf-gp5` to change ownership of PXF installation to the user `gpadmin`. +4. As a superuser, run `rpm -Uvh cloudberry-pxf-1.2.3-1.el7.x86_64.rpm` to install the RPM into `/usr/local/cloudberry-pxf-1.2.3` +5. As a superuser, run `chown gpadmin:gpadmin /usr/local/cloudberry-pxf-1.2.3` to change ownership of PXF installation to the user `gpadmin`. Specify a different user other than `gpadmin`, if desired. After these steps, the PXF product will be installed and is ready to be configured. If there was a previous installation of -PXF for the same major Greenplum version, the files and the runtime directories from the older version will be removed. +PXF, the files and the runtime directories from the older version will be removed. The PXF configuration directory should remain intact. You will need to have Java installed to run the PXF server. ## PXF removal process To remove the installed PXF package, follow these steps: 1. Stop the PXF server. -2. As a superuser, run `rpm -e pxf-gp5` (or `rpm -e pxf-gp6`). This will remove all files installed by the RPM package +2. As a superuser, run `rpm -e cloudberry-pxf`. This will remove all files installed by the RPM package and the PXF runtime directories. The PXF configuration directory should remain intact. diff --git a/package/cloudberry-pxf.spec b/package/cloudberry-pxf.spec index 44a144e6e..37c8f462b 100644 --- a/package/cloudberry-pxf.spec +++ b/package/cloudberry-pxf.spec @@ -27,8 +27,6 @@ Requires: bash # installing on Cloudberry node, so inherit Cloudberry's dependencies # implicitly -Requires: cloudberry-db - # Weak dependencies either OpenJDK 8 or 11 Suggests: java-1.8.0-openjdk Suggests: java-11-openjdk diff --git a/package/install_binary b/package/install_binary index 574a851b6..fbed1c71d 100755 --- a/package/install_binary +++ b/package/install_binary @@ -4,9 +4,9 @@ INSTALL_COMPONENT_SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd ) PXF_VERSION=$(<"${INSTALL_COMPONENT_SCRIPT_DIR}/pxf/version") function show_help() { - echo "This script installs PXF Greenplum Extension and PXF Server" + echo "This script installs PXF Extension and PXF Server" echo "to install the PXF Extension :" - echo " set GPHOME environment variable to the installation directory of Greenplum Database" + echo " set GPHOME environment variable to the installation directory of Apache Cloudberry" echo "to install the PXF Server :" echo " either set PXF_HOME environment variable to the target PXF Server installation directory" echo " or set GPHOME environment variable to have PXF Server installed into \${GPHOME}/pxf" @@ -29,8 +29,8 @@ function check_gphome() { exit 1 fi else - if [[ ! -f ${GPHOME}/greenplum_path.sh ]]; then - echo "Error: environment variable GPHOME (${GPHOME}) must be set to a valid Greenplum installation" + if [[ ! -f ${GPHOME}/greenplum_path.sh ]] && [[ ! -f ${GPHOME}/cloudberry-env.sh ]]; then + echo "Error: environment variable GPHOME (${GPHOME}) must be set to a valid Apache Cloudberry installation" exit 1 fi if [[ -z "${PXF_HOME}" ]]; then @@ -58,11 +58,11 @@ function install_new_component() { echo "Installing PXF version ${PXF_VERSION} ..." if [[ ! -z "${GPHOME}" ]]; then - echo "... installing PXF Greenplum Extension into ${GPHOME} ..." + echo "... installing PXF Extension into ${GPHOME} ..." cp -av ${INSTALL_COMPONENT_SCRIPT_DIR}/{lib,share} ${GPHOME} check_status $? else - echo "... skipping PXF Greenplum Extension as GPHOME environment variable is not set ..." + echo "... skipping PXF Extension as GPHOME environment variable is not set ..." fi if [[ ! -z "${PXF_HOME}" ]]; then diff --git a/package/install_deb b/package/install_deb index de7db3880..9450b65c5 100755 --- a/package/install_deb +++ b/package/install_deb @@ -14,6 +14,6 @@ if [[ "${owner}" != "root" ]]; then fi ${sudo_if_needed} dpkg --install ${INSTALL_COMPONENT_SCRIPT_DIR}/${DEB_FILE} -${sudo_if_needed} chown --recursive ${owner} /usr/local/pxf-gp* +${sudo_if_needed} chown --recursive ${owner} /usr/local/cloudberry-pxf* -echo "Successfully installed PXF version $(cat /usr/local/pxf-gp*/version)" +echo "Successfully installed PXF version $(cat /usr/local/cloudberry-pxf*/version)" diff --git a/package/install_rpm b/package/install_rpm index 2c28767dc..d34d0e087 100755 --- a/package/install_rpm +++ b/package/install_rpm @@ -14,6 +14,6 @@ if [[ "${owner}" != "root" ]]; then fi ${sudo_if_needed} rpm -Uvh ${INSTALL_COMPONENT_SCRIPT_DIR}/${RPM_FILE} -${sudo_if_needed} chown -R ${owner} /usr/local/pxf-gp* +${sudo_if_needed} chown -R ${owner} /usr/local/cloudberry-pxf* -echo "Successfully installed PXF version $(cat /usr/local/pxf-gp*/version)" +echo "Successfully installed PXF version $(cat /usr/local/cloudberry-pxf*/version)" diff --git a/package/pxf-cbdb1.spec b/package/pxf-cbdb1.spec deleted file mode 100644 index f2812e34e..000000000 --- a/package/pxf-cbdb1.spec +++ /dev/null @@ -1,79 +0,0 @@ -# Disable repacking of jars, since it takes forever -%define __jar_repack %{nil} - -# Disable build-id in rpm -%define _build_id_links none -# Disable automatic dependency processing both for requirements and provides -AutoReqProv: no - -Name: pxf-cbdb1 -Version: %{pxf_version} -Release: 1%{?dist} -Summary: Cloudberry PXF framework for external data access -License: %{license} -URL: http://www.hashdata.cn -Vendor: %{vendor} - -Prefix: /usr/local/%{name} - -# .so file makes sense only when installing on Cloudberry node, so inherit Cloudberry's dependencies implicitly -# Java server can be installed on a new node, only bash is needed for management scripts -## cbdb has added this requirement, pxf may installed under GPHOME, -# this requirement will cause installation fail. -# Requires: bash - -%description -PXF is an extensible framework that allows a distributed database like Cloudberry to query external data files, -whose metadata is not managed by the database. PXF includes built-in connectors for accessing data that exists -inside HDFS files, Hive tables, HBase tables, databases that support JDBC, data stores (S3, GCS) and more. - -%prep -# If the pxf_version macro is not defined, it gets interpreted as a literal string, need %% to escape it -if [ %{pxf_version} = '%%{pxf_version}' ] ; then - echo "The macro (variable) pxf_version must be supplied as rpmbuild ... --define='pxf_version [VERSION]'" - exit 1 -fi - -%install -%__mkdir -p %{buildroot}/%{prefix} -%__cp -R %{_sourcedir}/* %{buildroot}/%{prefix} - -%post -sed -i "s|directory =.*|directory = '${RPM_INSTALL_PREFIX}/fdw/'|g" "${RPM_INSTALL_PREFIX}/fdw/pxf_fdw.control" -sed -i "s|module_pathname =.*|module_pathname = '${RPM_INSTALL_PREFIX}/fdw/pxf_fdw'|g" "${RPM_INSTALL_PREFIX}/fdw/pxf_fdw.control" - -%files -%{prefix} - -# If a file is not marked as a config file, or if a file has not been altered -# since installation, then it will be silently replaced by the version from the -# RPM. - -# If a config file has been edited on disk, but is not actually different from -# the file in the RPM then the edited version will be silently left in place. - -# When a config file has been edited and is different from the file in -# the RPM, then the behavior is the following: -# - %config(noreplace): The edited version will be left in place, and the new -# version will be installed with an .rpmnew suffix. -# - %config: The new file will be installed, and the the old edited version -# will be renamed with an .rpmsave suffix. - -# Configuration directories/files -%config(noreplace) %{prefix}/conf/pxf-application.properties -%config(noreplace) %{prefix}/conf/pxf-env.sh -%config(noreplace) %{prefix}/conf/pxf-log4j2.xml -%config(noreplace) %{prefix}/conf/pxf-profiles.xml - -%pre -# cleanup files and directories created by 'pxf init' command -# only applies for old installations (pre 6.0.0) -%__rm -f "${RPM_INSTALL_PREFIX}/conf/pxf-private.classpath" -%__rm -rf "${RPM_INSTALL_PREFIX}/pxf-service" - -%posttrans -# PXF v5 RPM installation removes the run directory during the %preun step. -# The lack of run directory prevents PXF v6+ from starting up. -# %posttrans of the new package is the only step that runs after the %preun -# of the old package -%{__install} -d -m 700 "${RPM_INSTALL_PREFIX}/run" diff --git a/package/pxf-gp7.spec b/package/pxf-gp7.spec deleted file mode 100644 index 4b22e786a..000000000 --- a/package/pxf-gp7.spec +++ /dev/null @@ -1,79 +0,0 @@ -# Disable repacking of jars, since it takes forever -%define __jar_repack %{nil} - -# Disable automatic dependency processing both for requirements and provides -AutoReqProv: no - -Name: pxf-gp7 -Version: %{pxf_version} -Release: %{pxf_release}%{?dist} -Summary: Greenplum PXF framework for external data access -License: %{license} -URL: https://github.com/greenplum-db/pxf -Vendor: %{vendor} - -Prefix: /usr/local/%{name} - -# .so file makes sense only when installing on Greenplum node, so inherit Greenplum's dependencies implicitly -# Java server can be installed on a new node, only bash is needed for management scripts -Requires: bash - -%description -PXF is an extensible framework that allows a distributed database like Greenplum to query external data files, -whose metadata is not managed by the database. PXF includes built-in connectors for accessing data that exists -inside HDFS files, Hive tables, HBase tables, databases that support JDBC, data stores (S3, GCS) and more. - -%prep -# If the pxf_version macro is not defined, it gets interpreted as a literal string, need %% to escape it -if [ %{pxf_version} = '%%{pxf_version}' ] ; then - echo "The macro (variable) pxf_version must be supplied as rpmbuild ... --define='pxf_version [VERSION]'" - exit 1 -fi - -%install -%__mkdir -p %{buildroot}/%{prefix} -%__cp -R %{_sourcedir}/* %{buildroot}/%{prefix} - -%post -sed -i "s|directory =.*|directory = '${RPM_INSTALL_PREFIX}/gpextable/'|g" "${RPM_INSTALL_PREFIX}/gpextable/pxf.control" -sed -i "s|module_pathname =.*|module_pathname = '${RPM_INSTALL_PREFIX}/gpextable/pxf'|g" "${RPM_INSTALL_PREFIX}/gpextable/pxf.control" -sed -i "s|directory =.*|directory = '${RPM_INSTALL_PREFIX}/fdw/'|g" "${RPM_INSTALL_PREFIX}/fdw/pxf_fdw.control" -sed -i "s|module_pathname =.*|module_pathname = '${RPM_INSTALL_PREFIX}/fdw/pxf_fdw'|g" "${RPM_INSTALL_PREFIX}/fdw/pxf_fdw.control" -sed -i "s|directory =.*|directory = '${RPM_INSTALL_PREFIX}/gpextable/'|g" "${RPM_INSTALL_PREFIX}/gpextable/pxf.control" -sed -i "s|module_pathname =.*|module_pathname = '${RPM_INSTALL_PREFIX}/gpextable/pxf'|g" "${RPM_INSTALL_PREFIX}/gpextable/pxf.control" - -%files -%{prefix} - -# If a file is not marked as a config file, or if a file has not been altered -# since installation, then it will be silently replaced by the version from the -# RPM. - -# If a config file has been edited on disk, but is not actually different from -# the file in the RPM then the edited version will be silently left in place. - -# When a config file has been edited and is different from the file in -# the RPM, then the behavior is the following: -# - %config(noreplace): The edited version will be left in place, and the new -# version will be installed with an .rpmnew suffix. -# - %config: The new file will be installed, and the the old edited version -# will be renamed with an .rpmsave suffix. - -# Configuration directories/files -%config(noreplace) %{prefix}/conf/pxf-application.properties -%config(noreplace) %{prefix}/conf/pxf-env.sh -%config(noreplace) %{prefix}/conf/pxf-log4j2.xml -%config(noreplace) %{prefix}/conf/pxf-profiles.xml - -%pre -# cleanup files and directories created by 'pxf init' command -# only applies for old installations (pre 6.0.0) -%__rm -f "${RPM_INSTALL_PREFIX}/conf/pxf-private.classpath" -%__rm -rf "${RPM_INSTALL_PREFIX}/pxf-service" - -%posttrans -# PXF v5 RPM installation removes the run directory during the %preun step. -# The lack of run directory prevents PXF v6+ from starting up. -# %posttrans of the new package is the only step that runs after the %preun -# of the old package -%{__install} -d -m 700 "${RPM_INSTALL_PREFIX}/run"