diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 14e2943..a51d504 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -99,7 +99,7 @@ jobs: cat /etc/*-release DEBIAN_FRONTEND=noninteractive sudo apt-get update && sudo apt-get install -yq tzdata curl lsb-core lsb-release # > /dev/null . ./scripts/cloud_build_test_ubuntu.sh "3.8.12" - gsutil cp clouddq_patched.zip gs://${{ secrets.GCS_BUCKET_NAME }}/build-artifacts/${{matrix.os}}/python3.8/`date -I'minutes'`/${{ steps.vars.outputs.branch }}_${{ steps.vars.outputs.sha_short }}/clouddq-executable.zip - gsutil cp clouddq_patched.zip.hashsum gs://${{ secrets.GCS_BUCKET_NAME }}/build-artifacts/${{matrix.os}}/python3.8/`date -I'minutes'`/${{ steps.vars.outputs.branch }}_${{ steps.vars.outputs.sha_short }}/clouddq-executable.zip.hashsum - gsutil cp clouddq/integration/clouddq_pyspark_driver.py gs://${{ secrets.GCS_BUCKET_NAME }}/build-artifacts/${{matrix.os}}/python3.8/`date -I'minutes'`/${{ steps.vars.outputs.branch }}_${{ steps.vars.outputs.sha_short }}/clouddq_pyspark_driver.py + gcloud storage cp clouddq_patched.zip gs://${{ secrets.GCS_BUCKET_NAME }}/build-artifacts/${{matrix.os}}/python3.8/`date -I'minutes'`/${{ steps.vars.outputs.branch }}_${{ steps.vars.outputs.sha_short }}/clouddq-executable.zip + gcloud storage cp clouddq_patched.zip.hashsum gs://${{ secrets.GCS_BUCKET_NAME }}/build-artifacts/${{matrix.os}}/python3.8/`date -I'minutes'`/${{ steps.vars.outputs.branch }}_${{ steps.vars.outputs.sha_short }}/clouddq-executable.zip.hashsum + gcloud storage cp clouddq/integration/clouddq_pyspark_driver.py gs://${{ secrets.GCS_BUCKET_NAME }}/build-artifacts/${{matrix.os}}/python3.8/`date -I'minutes'`/${{ steps.vars.outputs.branch }}_${{ steps.vars.outputs.sha_short }}/clouddq_pyspark_driver.py shell: bash diff --git a/cloudbuild-release-debian11.yaml b/cloudbuild-release-debian11.yaml index ec923fd..adc0bca 100644 --- a/cloudbuild-release-debian11.yaml +++ b/cloudbuild-release-debian11.yaml @@ -41,29 +41,29 @@ steps: source scripts/install_gcloud.sh - gsutil ls gs://$_GCS_RELEASE_BUCKET + gcloud storage ls gs://$_GCS_RELEASE_BUCKET - gsutil cp clouddq_patched.zip + gcloud storage cp clouddq_patched.zip gs://${_GCS_RELEASE_BUCKET}/build-artifacts/debian11/python3.9/main/`date -I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip - gsutil cp clouddq_patched.zip.hashsum + gcloud storage cp clouddq_patched.zip.hashsum gs://${_GCS_RELEASE_BUCKET}/build-artifacts/debian11/python3.9/main/`date -I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip.hashsum - gsutil cp clouddq/integration/clouddq_pyspark_driver.py + gcloud storage cp clouddq/integration/clouddq_pyspark_driver.py gs://${_GCS_RELEASE_BUCKET}/build-artifacts/debian11/python3.9/main/`date -I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq_pyspark_driver.py - gsutil cp clouddq_patched.zip + gcloud storage cp clouddq_patched.zip gs://${_GCS_BUCKET_NAME}/build-artifacts/debian11/python3.9/main/`date -I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip - gsutil cp clouddq_patched.zip.hashsum + gcloud storage cp clouddq_patched.zip.hashsum gs://${_GCS_BUCKET_NAME}/build-artifacts/debian11/python3.9/main/`date -I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip.hashsum - gsutil cp clouddq/integration/clouddq_pyspark_driver.py + gcloud storage cp clouddq/integration/clouddq_pyspark_driver.py gs://${_GCS_BUCKET_NAME}/build-artifacts/debian11/python3.9/main/`date -I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq_pyspark_driver.py entrypoint: /bin/bash diff --git a/cloudbuild-release-ubuntu18.yaml b/cloudbuild-release-ubuntu18.yaml index 8b39b26..4922cc2 100644 --- a/cloudbuild-release-ubuntu18.yaml +++ b/cloudbuild-release-ubuntu18.yaml @@ -37,29 +37,29 @@ steps: source scripts/install_gcloud.sh - gsutil ls gs://$_GCS_RELEASE_BUCKET + gcloud storage ls gs://$_GCS_RELEASE_BUCKET - gsutil cp clouddq_patched.zip + gcloud storage cp clouddq_patched.zip gs://${_GCS_RELEASE_BUCKET}/build-artifacts/ubuntu18.04/python3.8/main/`date -I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip - gsutil cp clouddq_patched.zip.hashsum + gcloud storage cp clouddq_patched.zip.hashsum gs://${_GCS_RELEASE_BUCKET}/build-artifacts/ubuntu18.04/python3.8/main/`date -I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip.hashsum - gsutil cp clouddq/integration/clouddq_pyspark_driver.py + gcloud storage cp clouddq/integration/clouddq_pyspark_driver.py gs://${_GCS_RELEASE_BUCKET}/build-artifacts/ubuntu18.04/python3.8/main/`date -I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq_pyspark_driver.py - gsutil cp clouddq_patched.zip + gcloud storage cp clouddq_patched.zip gs://${_GCS_BUCKET_NAME}/build-artifacts/ubuntu18.04/python3.8/main/`date -I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip - gsutil cp clouddq_patched.zip.hashsum + gcloud storage cp clouddq_patched.zip.hashsum gs://${_GCS_BUCKET_NAME}/build-artifacts/ubuntu18.04/python3.8/main/`date -I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq-executable.zip.hashsum - gsutil cp clouddq/integration/clouddq_pyspark_driver.py + gcloud storage cp clouddq/integration/clouddq_pyspark_driver.py gs://${_GCS_BUCKET_NAME}/build-artifacts/ubuntu18.04/python3.8/main/`date -I'minutes'`_${TAG_NAME}_${SHORT_SHA}/clouddq_pyspark_driver.py entrypoint: /bin/bash diff --git a/docs/clouddq-as-dataproc-workflow-composer-dag.md b/docs/clouddq-as-dataproc-workflow-composer-dag.md index 75d5416..0086ed9 100644 --- a/docs/clouddq-as-dataproc-workflow-composer-dag.md +++ b/docs/clouddq-as-dataproc-workflow-composer-dag.md @@ -80,7 +80,7 @@ sed -i s/\/${CLOUDDQ_BIGQUERY_DATASET}/g ./configs/en This is the bucket where the Python executables and the configuration files will be pulled from for the Dataproc PySpark job. ```bash -gsutil mb -p ${PROJECT_ID} -l ${REGION} -b on gs://${GCS_BUCKET_NAME} +gcloud storage buckets create gs://${GCS_BUCKET_NAME} --project=${PROJECT_ID} --location=${REGION} --uniform-bucket-level-access ``` Ensure you have sufficient IAM privileges to create Cloud Storage Buckets in your project. @@ -182,7 +182,7 @@ sed -i s/\/${DATAPROC_WORKFLOW_NAME}/g ${DAG_PY_FILE} export DAG_BUCKET=$(gcloud composer environments describe --format="value(config.dagGcsPrefix)" \ --project ${PROJECT_ID} --location ${REGION} ${COMPOSER_ENVIRONMENT_NAME}) -gsutil cp ${DAG_PY_FILE} ${DAG_BUCKET} +gcloud storage cp ${DAG_PY_FILE} ${DAG_BUCKET} ``` ## 8. Check Airflow job status diff --git a/scripts/dataproc-workflow-composer/upload_clouddq_to_gcs.sh b/scripts/dataproc-workflow-composer/upload_clouddq_to_gcs.sh index 4cfc8f3..ab5001d 100755 --- a/scripts/dataproc-workflow-composer/upload_clouddq_to_gcs.sh +++ b/scripts/dataproc-workflow-composer/upload_clouddq_to_gcs.sh @@ -26,20 +26,20 @@ TARGET_PYTHON_INTERPRETER="${TARGET_PYTHON_INTERPRETER}" || err "Environment var function zip_configs_directory_and_upload_to_gcs() { zip -r clouddq-configs.zip ./configs - gsutil mv clouddq-configs.zip gs://"${GCS_BUCKET_NAME}"/clouddq-configs.zip - gsutil ls gs://"${GCS_BUCKET_NAME}"/clouddq_pyspark_driver.py || gsutil cp ./clouddq/integration/clouddq_pyspark_driver.py gs://"${GCS_BUCKET_NAME}" + gcloud storage mv clouddq-configs.zip gs://"${GCS_BUCKET_NAME}"/clouddq-configs.zip + gcloud storage ls gs://"${GCS_BUCKET_NAME}"/clouddq_pyspark_driver.py || gcloud storage cp ./clouddq/integration/clouddq_pyspark_driver.py gs://"${GCS_BUCKET_NAME}" } function upload_clouddq_zip_executable_to_gcs() { wget -O clouddq_executable.zip https://github.com/GoogleCloudPlatform/cloud-data-quality/releases/download/v"${CLOUDDQ_RELEASE_VERSION}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}"_"${TARGET_OS}"_python"${TARGET_PYTHON_INTERPRETER}".zip wget -O clouddq_executable.zip.hashsum https://github.com/GoogleCloudPlatform/cloud-data-quality/releases/download/v"${CLOUDDQ_RELEASE_VERSION}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}"_"${TARGET_OS}"_python"${TARGET_PYTHON_INTERPRETER}".zip.sha256sum - gsutil cp clouddq_executable.zip gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip - gsutil cp clouddq_executable.zip.hashsum gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip.hashsum + gcloud storage cp clouddq_executable.zip gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip + gcloud storage cp clouddq_executable.zip.hashsum gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip.hashsum } function main() { zip_configs_directory_and_upload_to_gcs - gsutil ls gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip.hashsum || upload_clouddq_zip_executable_to_gcs + gcloud storage ls gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip gs://"${GCS_BUCKET_NAME}"/clouddq_executable_v"${CLOUDDQ_RELEASE_VERSION}".zip.hashsum || upload_clouddq_zip_executable_to_gcs } main "$@" \ No newline at end of file