diff --git a/.gitattributes b/.gitattributes index 8f4aec0b7..8e5c1acf0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,7 +1,7 @@ ############################### # Git Line Endings # # Set default behaviour to automatically normalize line endings. -* text eolf=lf +* text eol=lf # Force batch scripts to always use CRLF line endings so that if a repo is accessed # in Windows via a file share from Linux, the scripts will work. *.{cmd,[cC][mM][dD]} text eol=crlf diff --git a/.github/workflows/_docker-template.yml b/.github/workflows/_docker-template.yml index 7d186bddc..c41244868 100644 --- a/.github/workflows/_docker-template.yml +++ b/.github/workflows/_docker-template.yml @@ -114,7 +114,7 @@ jobs: password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }} - name: Docker metadata (again) - id: meta + id: meta_manifest uses: docker/metadata-action@v5 with: images: ${{ inputs.image }} @@ -141,4 +141,4 @@ jobs: --tag "${img}:${tag}" \ "${img}:${tag}-amd64" \ "${img}:${tag}-arm64" - done < <(printf "%s" "${{ steps.meta.outputs.tags }}") + done < <(printf "%s" "${{ steps.meta_manifest.outputs.tags }}") diff --git a/.gitignore b/.gitignore index 564452318..bae75c1c9 100644 --- a/.gitignore +++ b/.gitignore @@ -5,9 +5,18 @@ .vscode .venv .ruff_cache +.mypy_cache venv **__pycache__ **/venv +*.pyc +.pyc +build + +# Ignore setuptools metadata +*.egg-info/ +*.egg-info +**/*.egg-info/ # keys and certificates *.pem @@ -37,6 +46,7 @@ security/templates/** docs/build/* # Ignore all .env files at any level +.env *.env **/*.env !*.env.template diff --git a/deploy/database.env b/deploy/database.env index d4ad0d7fc..478e13c66 100644 --- a/deploy/database.env +++ b/deploy/database.env @@ -1,9 +1,13 @@ -# production db name -POSTGRES_DATABANK_DB=cogstack - POSTGRES_DB_MAX_CONNECTIONS=100 # Prefix of file names to load the DB schema for in /services/cogstack-db/(pgsql/mssql)/schemas/ folder POSTGRES_DB_SCHEMA_PREFIX="cogstack_db" -POSTGRES_SHM_SIZE="1g" +# production db name +DATABASE_DB_NAME=cogstack + +DATABASE_DOCKER_SHM_SIZE=1g + +DATABASE_DOCKER_CPU_MIN=1 +DATABASE_DOCKER_CPU_MAX=1 +DATABASE_DOCKER_RAM=1g diff --git a/deploy/elasticsearch.env b/deploy/elasticsearch.env index b084dba38..387409be5 100644 --- a/deploy/elasticsearch.env +++ b/deploy/elasticsearch.env @@ -9,10 +9,10 @@ ELASTICSEARCH_VERSION=opensearch # possible values : # - elasticsearch : docker.elastic.co/elasticsearch/elasticsearch:8.18.2 # - elasticsearch (custom cogstack image) : cogstacksystems/cogstack-elasticsearch:latest -# - opensearch : opensearchproject/opensearch:3.2.0 +# - opensearch : opensearchproject/opensearch:3.3.0 # the custom cogstack image is always based on the last image of ES native -ELASTICSEARCH_DOCKER_IMAGE=opensearchproject/opensearch:3.2.0 +ELASTICSEARCH_DOCKER_IMAGE=opensearchproject/opensearch:3.3.0 ELASTICSEARCH_LOG_LEVEL=INFO @@ -88,9 +88,14 @@ ELASTICSEARCH_BACKUP_PARTITION_CONFIG=../data/es_snapshot_backups/config_backup ELASTICSEARCH_SECURITY_DIR=../security/certificates/elastic/ # MEMORY CONFIG -ELASTICSEARCH_JAVA_OPTS="-Xms2048m -Xmx2048m -Des.failure_store_feature_flag_enabled=true" +ELASTICSEARCH_JAVA_OPTS="-Xms512m -Xmx512m -Des.failure_store_feature_flag_enabled=true" + +ELASTICSEARCH_DOCKER_CPU_MIN=1 +ELASTICSEARCH_DOCKER_CPU_MAX=1 +ELASTICSEARCH_DOCKER_RAM=1g + +ELASTICSEARCH_DOCKER_SHM_SIZE=512m -ELASTICSEARCH_SHM_SIZE="1g" ELASTICSEARCH_DOCKER_LOG_SIZE_PER_FILE="1000m" ELASTICSEARCH_DOCKER_LOG_NUM_FILES=10 @@ -140,9 +145,6 @@ ELASTICSEARCH_HOSTS='["https://elasticsearch-1:9200","https://elasticsearch-2:92 KIBANA_HOST="https://kibana:5601" -KIBANA_SERVER_NAME="cogstack-kibana" - - ########################################################################## KIBANA Env vars ########################################################################### # NOTE: some variables from the Elasticsearch section are used # - ${ELASTICSEARCH_VERSION} is used for certificate paths, as well as kibana.yml config path. @@ -158,15 +160,15 @@ KIBANA_VERSION=opensearch-dashboards # - kibana # - opensearch_dashboards # make note of the underscore... -KIBANA_CONFIG_FILE_VERSION=opensearch_dashboards +KIBANA_CONFIG_FILE_VERSION=opensearch_dashboards # possible values: # - elasticsearch : docker.elastic.co/kibana/kibana:8.18.2 # - elasticsearch (custom cogstack image) : cogstacksystems/cogstack-kibana:latest -# - opensearch : opensearchproject/opensearch-dashboards:3.2.0 +# - opensearch : opensearchproject/opensearch-dashboards:3.3.0 # the custom cogstack image is always based on the last image of ES native -ELASTICSEARCH_KIBANA_DOCKER_IMAGE=opensearchproject/opensearch-dashboards:3.2.0 +ELASTICSEARCH_KIBANA_DOCKER_IMAGE=opensearchproject/opensearch-dashboards:3.3.0 KIBANA_SERVER_NAME="cogstack-kibana" KIBANA_PUBLIC_BASE_URL="https://elasticsearch-1:5601" @@ -174,7 +176,11 @@ KIBANA_PUBLIC_BASE_URL="https://elasticsearch-1:5601" KIBANA_SERVER_HOST="0.0.0.0" KIBANA_SERVER_OUTPUT_PORT=5601 -KIBANA_SHM_SIZE="1g" +KIBANA_DOCKER_SHM_SIZE=512m +KIBANA_DOCKER_CPU_MIN=1 +KIBANA_DOCKER_CPU_MAX=1 +KIBANA_DOCKER_RAM=1g + # this is used in Kibana # it needs to be generated via the API @@ -201,6 +207,10 @@ ELASTICSEARCH_XPACK_SECURITY_REPORTING_ENCRYPTION_KEY="e0Y1gTxHWOopIWMTtpjQsDS6K METRICBEAT_IMAGE="docker.elastic.co/beats/metricbeat:8.18.2" +METRICBEAT_DOCKER_SHM=512m +METRICBEAT_DOCKER_CPU_MIN=1 +METRICBEAT_DOCKER_CPU_MAX=1 +METRICBEAT_DOCKER_RAM=1g ########################################################################## FILEBEAT Env vars ########################################################################### @@ -213,3 +223,9 @@ FILEBEAT_STARTUP_COMMAND="-e --strict.perms=false" FILEBEAT_HOST="https://elasticsearch-1:9200" FILEBEAT_IMAGE="docker.elastic.co/beats/filebeat:8.18.2" + + +FILEBEAT_DOCKER_SHM=512m +FILEBEAT_DOCKER_CPU_MIN=1 +FILEBEAT_DOCKER_CPU_MAX=1 +FILEBEAT_DOCKER_RAM=1g diff --git a/deploy/export_env_vars.sh b/deploy/export_env_vars.sh index ea8266095..58b446543 100755 --- a/deploy/export_env_vars.sh +++ b/deploy/export_env_vars.sh @@ -3,12 +3,15 @@ # Enable strict mode (without -e to avoid exit-on-error) set -uo pipefail +# Support being sourced in shells where BASH_SOURCE is unset (e.g. zsh) +SCRIPT_SOURCE="${BASH_SOURCE[0]-$0}" +SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_SOURCE")" && pwd)" +SCRIPT_NAME="$(basename "$SCRIPT_SOURCE")" -echo "🔧 Running $(basename "${BASH_SOURCE[0]}")..." +echo "🔧 Running $SCRIPT_NAME..." set -a -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" DEPLOY_DIR="$SCRIPT_DIR" SECURITY_DIR="$SCRIPT_DIR/../security/env" SERVICES_DIR="$SCRIPT_DIR/../services" @@ -38,6 +41,18 @@ env_files=( "$SERVICES_DIR/cogstack-nlp/medcat-service/env/medcat.env" ) +LINT_SCRIPT="$SCRIPT_DIR/../nifi/user_scripts/utils/lint_env.py" + +if [ -x "$LINT_SCRIPT" ]; then + echo "🔍 Validating env files..." + if ! python3 "$LINT_SCRIPT" "${env_files[@]}"; then + echo "❌ Env validation failed. Fix the errors above before continuing." + exit 1 + fi +else + echo "⚠️ Skipping env validation; $LINT_SCRIPT not found or not executable." +fi + for env_file in "${env_files[@]}"; do if [ -f "$env_file" ]; then echo "✅ Sourcing $env_file" @@ -56,4 +71,4 @@ set +a # Restore safe defaults for interactive/dev shell set +u -set +o pipefail \ No newline at end of file +set +o pipefail diff --git a/deploy/gitea.env b/deploy/gitea.env index 0009d5759..e2ef85779 100644 --- a/deploy/gitea.env +++ b/deploy/gitea.env @@ -23,3 +23,8 @@ GITEA_LOCAL_PUB_KEY_PATH="$GITEA_LOCAL_KEY_PATH.pub" GITEA_LOCAL_KEY_TITLE="gitea-cogstack-$(hostname)-$(date +%s)" GITEA_DEFAULT_MAIN_REMOTE_NAME="cogstack-gitea" + +GITEA_DOCKER_SHM_SIZE=512m +GITEA_DOCKER_CPU_MIN=1 +GITEA_DOCKER_CPU_MAX=1 +GITEA_DOCKER_RAM=1g diff --git a/deploy/nginx.env b/deploy/nginx.env index aae2c825d..a08762ca2 100644 --- a/deploy/nginx.env +++ b/deploy/nginx.env @@ -1,3 +1,9 @@ NGINX_KIBANA_HOST=kibana NGINX_KIBANA_PROXY_PORT=5601 NGINX_ES_NODE_SOURCE_INSTANCE_NAME="elasticsearch-1" + + +NGINX_SHM_SIZE=1g +NGINX_DOCKER_CPU_MIN=1 +NGINX_DOCKER_CPU_MAX=1 +NGINX_DOCKER_RAM=1g diff --git a/deploy/nifi.env b/deploy/nifi.env index dcc710c06..cc3380ec8 100644 --- a/deploy/nifi.env +++ b/deploy/nifi.env @@ -1,3 +1,29 @@ + + +############################################################################################################################## +# IMPORTANT SETTINGS FOR DEPLOYMENTS RESOURCE SCOPED +############################################################################################################################## +NIFI_JVM_OPTS="-XX:+UseG1GC -XX:MaxGCPauseMillis=250 -XX:+ParallelRefProcEnabled -Djava.security.egd=file:/dev/./urandom" +NIFI_JVM_HEAP_INIT=768m +NIFI_JVM_HEAP_MAX=1g + + +NIFI_DOCKER_SHM_SIZE=1g +NIFI_DOCKER_REGISTRY_SHM_SIZE=1g + +NIFI_DOCKER_CPU_MIN=1 +NIFI_DOCKER_CPU_MAX=1 +NIFI_DOCKER_RAM=1g + +NIFI_REGISTRY_DOCKER_CPU_MIN=1 +NIFI_REGISTRY_DOCKER_CPU_MAX=1 +NIFI_REGISTRY_DOCKER_RAM=1g + +NIFI_DOCKER_LOG_SIZE_PER_FILE="250m" +NIFI_DOCKER_LOG_NUM_FILES=10 + +############################################################################################################################## + # NiFi NIFI_ENV_FILE="./nifi.env" NIFI_SECURITY_DIR="../security/certificates/nifi/" @@ -6,11 +32,6 @@ NIFI_DATA_PATH="../data/" NIFI_VERSION="2.7.2" NIFI_TOOLKIT_VERSION=$NIFI_VERSION -NIFI_SHM_SIZE="1g" -NIFI_REGISTRY_SHM_SIZE="1g" -NIFI_DOCKER_LOG_SIZE_PER_FILE="250m" -NIFI_DOCKER_LOG_NUM_FILES=10 - #### Port and network settings NIFI_WEB_PROXY_CONTEXT_PATH="/nifi" diff --git a/deploy/services-dev.yml b/deploy/services-dev.yml index 593af6ec9..51fcf4db0 100644 --- a/deploy/services-dev.yml +++ b/deploy/services-dev.yml @@ -1,3 +1,67 @@ +#---------------------------------------------------------------------------# +# Common snippets / anchors # +#---------------------------------------------------------------------------# +x-nifi-logging-common: &nifi-logging-common + driver: "json-file" + options: + max-size: ${NIFI_DOCKER_LOG_SIZE_PER_FILE:-250m} + max-file: ${NIFI_DOCKER_LOG_NUM_FILES:-10} + +x-logging-common: &logging-common + driver: "json-file" + options: + max-size: ${DOCKER_LOG_SIZE_PER_FILE:-100m} + max-file: ${DOCKER_LOG_NUM_FILES:-10} + +x-all-env: &all-env + - ./project.env + - ./general.env + - ./nifi.env + - ./gitea.env + - ./nginx.env + - ./database.env + - ./elasticsearch.env + - ./network_settings.env + - ../security/env/users_nifi.env + - ../security/env/users_database.env + - ../security/env/users_nginx.env + - ../security/env/users_elasticsearch.env + - ../security/env/certificates_general.env + - ../security/env/certificates_elasticsearch.env + - ../security/env/certificates_nifi.env + +x-es-env: &es-env + - ./network_settings.env + - ./elasticsearch.env + - ../security/env/users_elasticsearch.env + - ../security/env/certificates_elasticsearch.env + +x-common-hosts: &common-hosts + - ${ELASTICSEARCH_1_HOST_NAME:-test-1:0.0.0.0} + - ${ELASTICSEARCH_2_HOST_NAME:-test-2:0.0.0.0} + - ${ELASTICSEARCH_3_HOST_NAME:-test-3:0.0.0.0} + - ${KIBANA_HOST_NAME:-test-4:0.0.0.0} + - ${NIFI_HOST_NAME:-test-5:0.0.0.0} + - ${NIFI_REGISTRY_HOST_NAME:-test-6:0.0.0.0} + +x-common-ulimits: &common-ulimits + ulimits: + nofile: + soft: 65535 + hard: 65535 + nproc: 65535 + memlock: + soft: -1 + hard: -1 + +x-nifi-common: &nifi-common + <<: *common-ulimits + restart: always + env_file: *all-env + extra_hosts: *common-hosts + networks: + - cognet + #---------------------------------------------------------------------------# # Used services # #---------------------------------------------------------------------------# @@ -7,7 +71,7 @@ services: # NiFi webapp # #---------------------------------------------------------------------------# nifi: - # image: cogstacksystems/cogstack-nifi:latest + <<: *nifi-common build: context: ../nifi/ args: @@ -16,19 +80,7 @@ services: no_proxy: $no_proxy container_name: cogstack-nifi hostname: nifi - restart: always - env_file: - - ./general.env - - ./project.env - - ./nifi.env - - ./elasticsearch.env - - ./network_settings.env - - ../security/users_nifi.env - - ../security/users_elasticsearch.env - - ../security/certificates_general.env - - ../security/certificates_elasticsearch.env - - ../security/certificates_nifi.env - shm_size: 1024mb + shm_size: ${NIFI_SHM_SIZE:-"1g"} environment: - USER_ID=${NIFI_UID:-1000} - GROUP_ID=${NIFI_GID:-1000} @@ -37,30 +89,25 @@ services: - NIFI_INTERNAL_PORT=${NIFI_INTERNAL_PORT:-8443} - NIFI_OUTPUT_PORT=${NIFI_OUTPUT_PORT:-8082} - NIFI_INPUT_SOCKET_PORT=${NIFI_INPUT_SOCKET_PORT:-10000} - - NIFI_SECURITY_DIR=${NIFI_SECURITY_DIR:-../security/nifi_certificates/} - - ELASTICSEARCH_SECURITY_DIR=${ELASTICSEARCH_SECURITY_DIR:-../security/es_certificates/} volumes: # INFO: drivers folder - ../nifi/drivers:/opt/nifi/drivers - + # INFO: if there are local changes, map these content from local host to container # (normally, these 3 directories below are bundled with our NiFi image) # N.B. The container user may not have the permission to read these directories/files. - - ../nifi/user-templates:/opt/nifi/nifi-current/conf/templates:rw - - ../nifi/user-scripts:/opt/nifi/user-scripts:rw - - ../nifi/user-schemas:/opt/nifi/user-schemas:rw + - ../nifi/user_templates:/opt/nifi/nifi-current/conf/templates:rw + - ../nifi/user_scripts:/opt/nifi/user_scripts:rw + - ../nifi/user_schemas:/opt/nifi/user_schemas:rw - # this is a direct mapping to where we store the NiFi python processors as of NiFi 2.0.x - - ../nifi/user-python-extensions:/opt/nifi/nifi-current/python_extensions:rw + # this is a direct mapping to where we store the NiFi python processors as of NiFi 2.x.x + - ../nifi/user_python_extensions:/opt/nifi/nifi-current/python_extensions:rw # INFO: uncomment below to map security certificates if need to secure NiFi endpoints - - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}:/opt/nifi/nifi-current/nifi_certificates:ro - - ./${ELASTICSEARCH_SECURITY_DIR:-../security/es_certificates/}:/opt/nifi/nifi-current/es_certificates:ro - - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}nifi-keystore.jks:/opt/nifi/nifi-current/conf/keystore.jks - - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}nifi-truststore.jks:/opt/nifi/nifi-current/conf/truststore.jks + - ../security:/security:ro # Security credentials scripts - - ../security/nifi_create_single_user_auth.sh:/opt/nifi/nifi-current/security_scripts/nifi_create_single_user_auth.sh:ro + - ../security/scripts/nifi_create_single_user_auth.sh:/opt/nifi/nifi-current/security_scripts/nifi_create_single_user_auth.sh:ro # # Nifi properties file: - ../nifi/conf/:/opt/nifi/nifi-current/conf/:rw @@ -72,7 +119,7 @@ services: - ../services/cogstack-db/:/opt/cogstack-db/:rw # medcat models - - ./${RES_MEDCAT_SERVICE_MODEL_PRODUCTION_PATH:-../services/nlp-services/medcat-service/models/}:/opt/models:rw + - ./${RES_MEDCAT_SERVICE_MODEL_PRODUCTION_PATH:-../services/cogstack-nlp/medcat-service/models/}:/opt/models:rw # rest of volumes to persist the state - nifi-vol-logs:/opt/nifi/nifi-current/logs @@ -85,51 +132,22 @@ services: # errors generated during data processing - nifi-vol-errors:/opt/nifi/pipeline/flowfile-errors - extra_hosts: - - ${ELASTICSEARCH_1_HOST_NAME:-test-1:0.0.0.0} - - ${ELASTICSEARCH_2_HOST_NAME:-test-2:0.0.0.0} - - ${ELASTICSEARCH_3_HOST_NAME:-test-3:0.0.0.0} - - ${KIBANA_HOST_NAME:-test-4:0.0.0.0} - - ${NIFI_HOST_NAME:-test-5:0.0.0.0} - - ${NIFI_REGISTRY_HOST_NAME:-test-6:0.0.0.0} - - # user: "${NIFI_UID:-1000}:${NIFI_GID:-1000}" - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 - hard: 262144 - # INFO : Uncomment the below line to generate your own USERNAME and PASSWORD, # a bit messy this way as you will need to copy the credentials back # to the "login-identity-providers.xml" section. # entrypoint: bash -c "/opt/nifi/nifi-current/bin/nifi.sh set-single-user-credentials admin admincogstacknifi" - tty: true ports: - "${NIFI_OUTPUT_PORT:-8082}:${NIFI_INTERNAL_PORT:-8443}" - "${NIFI_INPUT_SOCKET_PORT:-10000}" - networks: - - cognet - + logging: *nifi-logging-common + nifi-registry-flow: image: apache/nifi-registry:${NIFI_REGISTRY_VERSION:-2.7.1} hostname: nifi-registry container_name: cogstack-nifi-registry-flow - restart: always + shm_size: ${NIFI_REGISTRY_SHM_SIZE:-"1g"} user: root - env_file: - - ./general.env - - ./network_settings.env - - ./nifi.env - - ./project.env - - ../security/users_nifi.env - - ../security/users_elasticsearch.env - - ../security/certificates_general.env - - ../security/certificates_elasticsearch.env - - ../security/certificates_nifi.env environment: - http_proxy=$HTTP_PROXY - https_proxy=$HTTPS_PROXY @@ -143,31 +161,20 @@ services: - TRUSTSTORE_PATH=${NIFI_REGISTRY_TRUSTSTORE_PATH:-./conf/truststore.jks} - TRUSTSTORE_TYPE=${NIFI_TRUSTSTORE_TYPE:-jks} - - INITIAL_ADMIN_IDENTITY=${NIFI_INITIAL_ADMIN_IDENTITY:-"CN=admin, OU=nifi"} + - INITIAL_ADMIN_IDENTITY=${NIFI_INITIAL_ADMIN_IDENTITY:-"cogstack"} - AUTH=${NIFI_AUTH:-"tls"} - NIFI_REGISTRY_DB_DIR=${NIFI_REGISTRY_DB_DIR:-/opt/nifi-registry/nifi-registry-current/database} #- NIFI_REGISTRY_FLOW_PROVIDER=${NIFI_REGISTRY_FLOW_PROVIDER:-file} - NIFI_REGISTRY_FLOW_STORAGE_DIR=${NIFI_REGISTRY_FLOW_STORAGE_DIR:-/opt/nifi-registry/nifi-registry-current/flow_storage} volumes: - ../nifi/nifi-registry/:/opt/nifi-registry/nifi-registry-current/conf/:rw - - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}nifi-keystore.jks:/opt/nifi-registry/nifi-registry-current/conf/keystore.jks:ro - - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}nifi-truststore.jks://opt/nifi-registry/nifi-registry-current/conf/truststore.jks:ro + - ./${NIFI_SECURITY_DIR:-../security/certificates/nifi/}nifi-keystore.jks:/opt/nifi-registry/nifi-registry-current/conf/keystore.jks:ro + - ./${NIFI_SECURITY_DIR:-../security/certificates/nifi/}nifi-truststore.jks://opt/nifi-registry/nifi-registry-current/conf/truststore.jks:ro - nifi-registry-vol-database:/opt/nifi-registry/nifi-registry-current/database - nifi-registry-vol-flow-storage:/opt/nifi-registry/nifi-registry-current/flow_storage - nifi-registry-vol-work:/opt/nifi-registry/nifi-registry-current/work - nifi-registry-vol-logs:/opt/nifi-registry/nifi-registry-current/logs - extra_hosts: - - ${NIFI_HOST_NAME:-test-5:0.0.0.0} - - ${NIFI_REGISTRY_HOST_NAME:-test-6:0.0.0.0} - - ulimits: - memlock: - soft: -1 - hard: -1 - nofile: - soft: 65536 - hard: 262144 - + extra_hosts: *common-hosts tty: true ports: - "${NIFI_REGISTRY_FLOW_OUTPUT_PORT:-8083}:${NIFI_REGISTRY_FLOW_INPUT_PORT:-18443}" @@ -177,43 +184,27 @@ services: chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/work && \ chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/logs && \ bash /opt/nifi-registry/scripts/start.sh" - - networks: - - cognet - + logging: *nifi-logging-common + nifi-nginx: - # image: cogstacksystems/nifi-nginx:latest - build: - context: ../services/nginx/ - args: - HTTP_PROXY: $HTTP_PROXY - HTTPS_PROXY: $HTTPS_PROXY - no_proxy: $no_proxy + image: cogstacksystems/nifi-nginx:latest container_name: cogstack-nifi-nginx restart: always - env_file: - - ./network_settings.env - - ./nginx.env - - ./nifi.env - - ./elasticsearch.env - - ./project.env - - ./nlp_service.env + shm_size: 512mb + env_file: *all-env volumes: - ../services/nginx/sites-enabled:/etc/nginx/sites-enabled:ro - ../services/nginx/config/nginx.conf.template:/etc/nginx/config/nginx.conf.template:rw - ../services/nginx/config/nginx.conf:/etc/nginx/nginx.conf:rw - - ../security/root_certificates:/etc/nginx/root_certificates:ro - - ../security/nifi_certificates:/etc/nginx/nifi_certificates:ro - - - ../security/es_certificates/${ELASTICSEARCH_VERSION:-opensearch}/elastic-stack-ca.crt.pem:/etc/nginx/es_certificates/elastic-stack-ca.crt.pem:ro - - ../security/es_certificates/${ELASTICSEARCH_VERSION:-opensearch}/elastic-stack-ca.key.pem:/etc/nginx/es_certificates/elastic-stack-ca.key.pem:ro - # - ../security/es_certificates/:/etc/nginx/es_certificates/:ro + - ../security/certificates:/certificates:ro ports: - "${NIFI_EXTERNAL_PORT_NGINX:-8443}:${NIFI_INTERNAL_PORT_NGINX:-8443}" - "${NIFI_REGISTRY_EXTERNAL_PORT_NGINX:-18443}:${NIFI_REGISTRY_INTERNAL_PORT_NGINX:-18443}" networks: - cognet command: /bin/bash -c "envsubst < /etc/nginx/config/nginx.conf.template > /etc/nginx/config/nginx.conf && nginx -g 'daemon off;'" + extra_hosts: *common-hosts + logging: *nifi-logging-common #---------------------------------------------------------------------------# # Docker named volumes # @@ -249,7 +240,6 @@ volumes: driver: local nifi-registry-vol-logs: driver: local - #---------------------------------------------------------------------------# # Docker networks. # #---------------------------------------------------------------------------# diff --git a/deploy/services.yml b/deploy/services.yml index 27c19bbfa..57266f692 100644 --- a/deploy/services.yml +++ b/deploy/services.yml @@ -42,6 +42,10 @@ x-es-env: &es-env - ../security/env/users_elasticsearch.env - ../security/env/certificates_elasticsearch.env +x-db-env: &db-env + - ./database.env + - ../security/env/users_database.env + x-common-hosts: &common-hosts - ${ELASTICSEARCH_1_HOST_NAME:-test-1:0.0.0.0} - ${ELASTICSEARCH_2_HOST_NAME:-test-2:0.0.0.0} @@ -62,12 +66,26 @@ x-common-ulimits: &common-ulimits x-nifi-common: &nifi-common <<: *common-ulimits - restart: always + restart: unless-stopped env_file: *all-env extra_hosts: *common-hosts networks: - cognet +x-db-common: &db-common + <<: *common-ulimits + shm_size: ${DATABASE_DOCKER_SHM_SIZE:-"1g"} + restart: unless-stopped + env_file: *db-env + deploy: + resources: + limits: + cpus: "${DATABASE_DOCKER_CPU_MAX}" + memory: "${DATABASE_DOCKER_RAM}" + reservations: + cpus: "${DATABASE_DOCKER_CPU_MIN}" + memory: "${DATABASE_DOCKER_RAM}" + x-es-common-volumes: &es-common-volumes # Shared configs - ../services/elasticsearch/config/${ELASTICSEARCH_VERSION:-opensearch}.yml:/usr/share/${ELASTICSEARCH_VERSION:-opensearch}/config/${ELASTICSEARCH_VERSION:-opensearch}.yml:ro @@ -93,9 +111,9 @@ x-es-common-volumes: &es-common-volumes x-es-common: &es-common <<: *common-ulimits - image: ${ELASTICSEARCH_DOCKER_IMAGE:-opensearchproject/opensearch:3.2.0} - shm_size: ${ELASTICSEARCH_SHM_SIZE:-"1g"} - restart: always + image: ${ELASTICSEARCH_DOCKER_IMAGE:-opensearchproject/opensearch:3.3.0} + shm_size: ${ELASTICSEARCH_DOCKER_SHM_SIZE:-1g} + restart: unless-stopped env_file: *es-env networks: - cognet @@ -108,12 +126,21 @@ x-es-common: &es-common OPENSEARCH_INITIAL_ADMIN_PASSWORD: ${OPENSEARCH_INITIAL_ADMIN_PASSWORD:-kibanaserver} ELASTICSEARCH_VERSION: ${ELASTICSEARCH_VERSION:-opensearch} logging: *es-logging-common + deploy: + resources: + limits: + cpus: "${ELASTICSEARCH_DOCKER_CPU_MAX}" + memory: "${ELASTICSEARCH_DOCKER_RAM}" + reservations: + cpus: "${ELASTICSEARCH_DOCKER_CPU_MIN}" + memory: "${ELASTICSEARCH_DOCKER_RAM}" x-metricbeat-common: &metricbeat-common <<: *common-ulimits image: ${METRICBEAT_IMAGE:-docker.elastic.co/beats/metricbeat:8.18.2} command: -e --strict.perms=false restart: unless-stopped + shm_size: ${METRICBEAT_DOCKER_SHM:-1g} env_file: - ./elasticsearch.env - ../security/env/users_elasticsearch.env @@ -122,6 +149,14 @@ x-metricbeat-common: &metricbeat-common - METRICBEAT_USER=${METRICBEAT_USER:-elastic} - METRICBEAT_PASSWORD=${METRICBEAT_PASSWORD:-kibanaserver} - KIBANA_HOST=${KIBANA_HOST:-"https://kibana:5601"} + deploy: + resources: + limits: + cpus: "${METRICBEAT_DOCKER_CPU_MAX}" + memory: "${METRICBEAT_DOCKER_RAM}" + reservations: + cpus: "${METRICBEAT_DOCKER_CPU_MIN}" + memory: "${METRICBEAT_DOCKER_RAM}" volumes: - ../services/metricbeat/metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro - ../security/certificates/elastic/elasticsearch/elastic-stack-ca.crt.pem:/usr/share/metricbeat/root-ca.crt:ro @@ -136,6 +171,7 @@ x-filebeat-common: &filebeat-common image: ${FILEBEAT_IMAGE:-docker.elastic.co/beats/filebeat:8.18.2} command: ${FILEBEAT_STARTUP_COMMAND:-'-e --strict.perms=false'} restart: unless-stopped + shm_size: ${FILEBEAT_DOCKER_SHM:-1g} env_file: - ./elasticsearch.env - ../security/env/users_elasticsearch.env @@ -144,6 +180,14 @@ x-filebeat-common: &filebeat-common - FILEBEAT_USER=${FILEBEAT_USER:-elastic} - FILEBEAT_PASSWORD=${FILEBEAT_PASSWORD:-kibanaserver} - KIBANA_HOST=${KIBANA_HOST:-"https://kibana:5601"} + deploy: + resources: + limits: + cpus: "${FILEBEAT_DOCKER_CPU_MAX}" + memory: "${FILEBEAT_DOCKER_RAM}" + reservations: + cpus: "${FILEBEAT_DOCKER_CPU_MIN}" + memory: "${FILEBEAT_DOCKER_RAM}" volumes: - ../services/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:rw - ../security/certificates/elastic/elasticsearch/elastic-stack-ca.crt.pem:/etc/pki/root/root-ca.crt:ro @@ -162,15 +206,10 @@ services: # Postgres container with sample data # #---------------------------------------------------------------------------# samples-db: - <<: *common-ulimits + <<: *db-common image: postgres:17.5-alpine container_name: cogstack-samples-db - shm_size: ${POSTGRES_SHM_SIZE:-"1g"} - restart: always platform: linux/amd64 - env_file: - - ./database.env - - ../security/env/users_database.env environment: # PG env vars - POSTGRES_USER=${POSTGRES_USER_SAMPLES:-test} @@ -194,19 +233,14 @@ services: # CogStack Databank / Cogstack-DB, production database # #---------------------------------------------------------------------------# cogstack-databank-db: - <<: *common-ulimits + <<: *db-common image: postgres:17.5-alpine container_name: cogstack-production-databank-db - shm_size: ${POSTGRES_SHM_SIZE:-"1g"} - restart: always platform: linux/amd64 - env_file: - - ./database.env - - ../security/env/users_database.env environment: - - POSTGRES_USER=${POSTGRES_USER:-admin} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-admin} - - POSTGRES_DATABANK_DB=${POSTGRES_DATABANK_DB:-cogstack} + - POSTGRES_USER=${DATABASE_USER:-admin} + - POSTGRES_PASSWORD=${DATABASE_PASSWORD:-admin} + - POSTGRES_DATABANK_DB=${DATABASE_DB_NAME:-cogstack} volumes: # mapping postgres data dump and initialization - ../services/cogstack-db/pgsql/schemas:/data/:ro @@ -222,14 +256,9 @@ services: - cognet cogstack-databank-db-mssql: - <<: *common-ulimits + <<: *db-common image: mcr.microsoft.com/mssql/server:2019-latest container_name: cogstack-production-databank-db-mssql - shm_size: ${POSTGRES_SHM_SIZE:-"1g"} - restart: always - env_file: - - ./database.env - - ../security/env/users_database.env environment: - ACCEPT_EULA=y - MSSQL_SA_USER=${MSSQL_SA_USER:-sa} @@ -252,7 +281,7 @@ services: es_native_create_certs: container_name: es_create_certs image: docker.elastic.co/elasticsearch/elasticsearch:8.18.2 - shm_size: ${ELASTICSEARCH_SHM_SIZE:-"1g"} + shm_size: ${ELASTICSEARCH_DOCKER_SHM_SIZE:-1g} env_file: *es-env restart: "no" command: bash -c "bash /usr/share/elasticsearch/es_native_cert_generator.sh" @@ -287,7 +316,7 @@ services: ports: - "${ELASTICSEARCH_NODE_1_OUTPUT_PORT:-9200}:9200" - "${ELASTICSEARCH_NODE_1_COMM_OUTPUT_PORT:-9300}:9300" - - "${ELASTICSEARCH_NODE_1_ANALYZER_OUTPUT_PORT:-9600}:9600" # required for Performance Analyzer + - "${ELASTICSEARCH_NODE_1_ANALYZER_OUTPUT_PORT:-9600}:9600" elasticsearch-2: extends: @@ -306,7 +335,7 @@ services: ports: - "${ELASTICSEARCH_NODE_2_OUTPUT_PORT:-9201}:9200" - "${ELASTICSEARCH_NODE_2_COMM_OUTPUT_PORT:-9301}:9300" - - "${ELASTICSEARCH_NODE_2_ANALYZER_OUTPUT_PORT:-9601}:9600" # required for Performance Analyzer + - "${ELASTICSEARCH_NODE_2_ANALYZER_OUTPUT_PORT:-9601}:9600" elasticsearch-3: extends: @@ -325,7 +354,7 @@ services: ports: - "${ELASTICSEARCH_NODE_3_OUTPUT_PORT:-9202}:9200" - "${ELASTICSEARCH_NODE_3_COMM_OUTPUT_PORT:-9302}:9300" - - "${ELASTICSEARCH_NODE_3_ANALYZER_OUTPUT_PORT:-9602}:9600" # required for Performance Analyzer + - "${ELASTICSEARCH_NODE_3_ANALYZER_OUTPUT_PORT:-9602}:9600" metricbeat-1: <<: *metricbeat-common @@ -350,9 +379,9 @@ services: container_name: cogstack-metricbeat-3 volumes: - metricbeat-data-3:/usr/share/metricbeat/data - - ../security/certificates/elastic/elasticsearch/elasticsearch/${ES_INSTANCE_NAME_3:-elasticsearch-3}/${ES_INSTANCE_NAME3:-elasticsearch-3}.p12:/usr/share/metricbeat/esnode.p12:ro - - ../security/certificates/elastic/elasticsearch/elasticsearch/${ES_INSTANCE_NAME_3:-elasticsearch-3}/${ES_INSTANCE_NAME3:-elasticsearch-3}.crt:/usr/share/metricbeat/esnode.crt:ro - - ../security/certificates/elastic/elasticsearch/elasticsearch/${ES_INSTANCE_NAME_3:-elasticsearch-3}/${ES_INSTANCE_NAME3:-elasticsearch-3}.key:/usr/share/metricbeat/esnode.key:ro + - ../security/certificates/elastic/elasticsearch/elasticsearch/${ES_INSTANCE_NAME_3:-elasticsearch-3}/${ES_INSTANCE_NAME_3:-elasticsearch-3}.p12:/usr/share/metricbeat/esnode.p12:ro + - ../security/certificates/elastic/elasticsearch/elasticsearch/${ES_INSTANCE_NAME_3:-elasticsearch-3}/${ES_INSTANCE_NAME_3:-elasticsearch-3}.crt:/usr/share/metricbeat/esnode.crt:ro + - ../security/certificates/elastic/elasticsearch/elasticsearch/${ES_INSTANCE_NAME_3:-elasticsearch-3}/${ES_INSTANCE_NAME_3:-elasticsearch-3}.key:/usr/share/metricbeat/esnode.key:ro filebeat-1: <<: *filebeat-common @@ -389,9 +418,9 @@ services: #---------------------------------------------------------------------------# kibana: <<: *common-ulimits - image: ${ELASTICSEARCH_KIBANA_DOCKER_IMAGE:-opensearchproject/opensearch-dashboards:3.2.0} + image: ${ELASTICSEARCH_KIBANA_DOCKER_IMAGE:-opensearchproject/opensearch-dashboards:3.3.0} container_name: cogstack-kibana - shm_size: ${KIBANA_SHM_SIZE:-"1g"} + shm_size: ${KIBANA_DOCKER_SHM_SIZE:-1g} restart: always env_file: *es-env environment: @@ -401,7 +430,14 @@ services: # INFO: uncomment below to enable SSL keys SERVER_SSL_ENABLED: ${ELASTICSEARCH_SSL_ENABLED:-"true"} OPENSEARCH_INITIAL_ADMIN_PASSWORD: ${OPENSEARCH_INITIAL_ADMIN_PASSWORD:-kibanaserver} - + deploy: + resources: + limits: + cpus: "${KIBANA_DOCKER_CPU_MAX}" + memory: "${KIBANA_DOCKER_RAM}" + reservations: + cpus: "${KIBANA_DOCKER_CPU_MIN}" + memory: "${KIBANA_DOCKER_RAM}" volumes: # INFO: Kibana configuration mapped via volume (make sure to comment this and uncomment the next line if you are using NATIVE kibana deployment) - ../services/kibana/config/${ELASTICSEARCH_VERSION:-opensearch}.yml:/usr/share/${KIBANA_VERSION:-opensearch-dashboards}/config/${KIBANA_CONFIG_FILE_VERSION:-opensearch_dashboards}.yml:ro @@ -434,7 +470,7 @@ services: image: cogstacksystems/cogstack-nifi:latest container_name: cogstack-nifi hostname: nifi - shm_size: ${NIFI_SHM_SIZE:-"1g"} + shm_size: ${NIFI_DOCKER_SHM_SIZE:-"1g"} environment: - USER_ID=${NIFI_UID:-1000} - GROUP_ID=${NIFI_GID:-1000} @@ -443,6 +479,15 @@ services: - NIFI_INTERNAL_PORT=${NIFI_INTERNAL_PORT:-8443} - NIFI_OUTPUT_PORT=${NIFI_OUTPUT_PORT:-8082} - NIFI_INPUT_SOCKET_PORT=${NIFI_INPUT_SOCKET_PORT:-10000} + - JVM_OPTS="${NIFI_JVM_OPTS:--XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+ParallelRefProcEnabled -Djava.security.egd=file:/dev/./urandom}" + deploy: + resources: + limits: + cpus: "${NIFI_DOCKER_CPU_MAX}" + memory: "${NIFI_DOCKER_RAM}" + reservations: + cpus: "${NIFI_DOCKER_CPU_MIN}" + memory: "${NIFI_DOCKER_RAM}" volumes: # INFO: drivers folder - ../nifi/drivers:/opt/nifi/drivers @@ -450,12 +495,12 @@ services: # INFO: if there are local changes, map these content from local host to container # (normally, these 3 directories below are bundled with our NiFi image) # N.B. The container user may not have the permission to read these directories/files. - - ../nifi/user-templates:/opt/nifi/nifi-current/conf/templates:rw - - ../nifi/user-scripts:/opt/nifi/user-scripts:rw - - ../nifi/user-schemas:/opt/nifi/user-schemas:rw + - ../nifi/user_templates:/opt/nifi/nifi-current/conf/templates:rw + - ../nifi/user_scripts:/opt/nifi/user_scripts:rw + - ../nifi/user_schemas:/opt/nifi/user_schemas:rw # this is a direct mapping to where we store the NiFi python processors as of NiFi 2.x.x - - ../nifi/user-python-extensions:/opt/nifi/nifi-current/python_extensions:rw + - ../nifi/user_python_extensions:/opt/nifi/nifi-current/python_extensions:rw # INFO: uncomment below to map security certificates if need to secure NiFi endpoints - ../security:/security:ro @@ -501,7 +546,7 @@ services: image: apache/nifi-registry:${NIFI_REGISTRY_VERSION:-2.7.2} hostname: nifi-registry container_name: cogstack-nifi-registry-flow - shm_size: ${NIFI_REGISTRY_SHM_SIZE:-"1g"} + shm_size: ${NIFI_DOCKER_REGISTRY_SHM_SIZE:-1g} user: root environment: - http_proxy=$HTTP_PROXY @@ -509,11 +554,11 @@ services: - no_proxy=$no_proxy - USER_ID=${NIFI_UID:-1000} - GROUP_ID=${NIFI_GID:-1000} - - KEYSTORE_PATH=${NIFI_REGISTRY_KEYSTORE_PATH:-./conf/keystore.jks} + - KEYSTORE_PATH=${NIFI_REGISTRY_KEYSTORE_PATH:-/security/certificates/nifi/nifi-keystore.jks} - KEYSTORE_TYPE=${NIFI_KEYSTORE_TYPE:-jks} - KEYSTORE_PASSWORD=${NIFI_KEYSTORE_PASSWORD:-"cogstackNifi"} - TRUSTSTORE_PASSWORD=${NIFI_TRUSTSTORE_PASSWORD:-"cogstackNifi"} - - TRUSTSTORE_PATH=${NIFI_REGISTRY_TRUSTSTORE_PATH:-./conf/truststore.jks} + - TRUSTSTORE_PATH=${NIFI_REGISTRY_TRUSTSTORE_PATH:-/security/certificates/nifi/nifi-truststore.jks} - TRUSTSTORE_TYPE=${NIFI_TRUSTSTORE_TYPE:-jks} - INITIAL_ADMIN_IDENTITY=${NIFI_INITIAL_ADMIN_IDENTITY:-"cogstack"} @@ -521,10 +566,18 @@ services: - NIFI_REGISTRY_DB_DIR=${NIFI_REGISTRY_DB_DIR:-/opt/nifi-registry/nifi-registry-current/database} #- NIFI_REGISTRY_FLOW_PROVIDER=${NIFI_REGISTRY_FLOW_PROVIDER:-file} - NIFI_REGISTRY_FLOW_STORAGE_DIR=${NIFI_REGISTRY_FLOW_STORAGE_DIR:-/opt/nifi-registry/nifi-registry-current/flow_storage} + deploy: + resources: + limits: + cpus: "${NIFI_REGISTRY_DOCKER_CPU_MAX}" + memory: "${NIFI_REGISTRY_DOCKER_RAM}" + reservations: + cpus: "${NIFI_REGISTRY_DOCKER_CPU_MIN}" + memory: "${NIFI_REGISTRY_DOCKER_RAM}" volumes: - ../nifi/nifi-registry/:/opt/nifi-registry/nifi-registry-current/conf/:rw - - ./${NIFI_SECURITY_DIR:-../security/certificates/nifi/}nifi-keystore.jks:/opt/nifi-registry/nifi-registry-current/conf/keystore.jks:ro - - ./${NIFI_SECURITY_DIR:-../security/certificates/nifi/}nifi-truststore.jks://opt/nifi-registry/nifi-registry-current/conf/truststore.jks:ro + - ../security:/security:ro + - nifi-registry-vol-database:/opt/nifi-registry/nifi-registry-current/database - nifi-registry-vol-flow-storage:/opt/nifi-registry/nifi-registry-current/flow_storage - nifi-registry-vol-work:/opt/nifi-registry/nifi-registry-current/work @@ -545,8 +598,16 @@ services: image: cogstacksystems/nifi-nginx:latest container_name: cogstack-nifi-nginx restart: always - shm_size: 512mb + shm_size: ${NGINX_SHM_SIZE:-1g} env_file: *all-env + deploy: + resources: + limits: + cpus: "${NGINX_DOCKER_CPU_MAX}" + memory: "${NGINX_DOCKER_RAM}" + reservations: + cpus: "${NGINX_DOCKER_CPU_MIN}" + memory: "${NGINX_DOCKER_RAM}" volumes: - ../services/nginx/sites-enabled:/etc/nginx/sites-enabled:ro - ../services/nginx/config/nginx.conf.template:/etc/nginx/config/nginx.conf.template:rw @@ -587,12 +648,20 @@ services: <<: *common-ulimits container_name: cogstack-gitea image: gitea/gitea:1.23-rootless - shm_size: ${DOCKER_SHM_SIZE:-"1g"} + shm_size: ${GITEA_DOCKER_SHM_SIZE:-"1g"} restart: always environment: - http_proxy=$HTTP_PROXY - https_proxy=$HTTPS_PROXY - no_proxy=$no_proxy + deploy: + resources: + limits: + cpus: "${GITEA_DOCKER_CPU_MAX}" + memory: "${GITEA_DOCKER_RAM}" + reservations: + cpus: "${GITEA_DOCKER_CPU_MIN}" + memory: "${GITEA_DOCKER_RAM}" volumes: # app config - ../services/gitea/app.ini:/etc/gitea/app.ini:rw diff --git a/docs/deploy/troubleshooting.md b/docs/deploy/troubleshooting.md index 15dc8c94f..b448a3dad 100644 --- a/docs/deploy/troubleshooting.md +++ b/docs/deploy/troubleshooting.md @@ -75,15 +75,15 @@ ERROR: [1] bootstrap checks failed To solve this one needs to simply execute :
- on Linux/Mac OS X : - ```sysctl -w vm.max_map_count=262144``` in terminal. - To make the same change systemwide plase add ```vm.max_map_count=262144``` to /etc/sysctl.conf and restart the dockerservice/machine. + `sysctl -w vm.max_map_count=262144` in terminal. + To make the same change systemwide plase add `vm.max_map_count=262144` to /etc/sysctl.conf and restart the dockerservice/machine. An example of this can be found under /services/elasticsearch/sysctl.conf
- on Windows you need to enter the following commands in a powershell instance:
- ```wsl -d docker-desktop``` + `wsl -d docker-desktop`
- ```sysctl -w vm.max_map_count=262144``` + `sysctl -w vm.max_map_count=262144` For more on this issue please read: https://www.elastic.co/guide/en/elasticsearch/reference/current/vm-max-map-count.html diff --git a/docs/deploy/workflows.md b/docs/deploy/workflows.md index 0ad86ff0b..ddc7ee78d 100644 --- a/docs/deploy/workflows.md +++ b/docs/deploy/workflows.md @@ -260,7 +260,7 @@ Given a document content encoded as JSON, it will return payload containing the There are several NiFi components involved in this process which stand out: 1. `ConvertAvroToJSON` - converts the AVRO records to JSON format using a generic format transcoder, 2. `ExecuteScript-ConvertRecordToMedCATinput` - prepares the JSON payload for MedCAT Service, this is Jython script, it has several configurable process properties: - - `document_id_field\ = `docid` , the exact name of the unique Id column for the DB/ES record + - `document_id_field` = `docid`, the exact name of the unique Id column for the DB/ES record - `document_text_field` = `document`, field/column name containing free text - `log_file_name` = `nlp_request_bulk_parse_medical_text.log`, creates a log file in the repo folder `/nifi/user-scripts/` - `log_invalid_records_to_file` = `True`, enable/disable logging errors to logfile with the above mentioned file name @@ -403,4 +403,3 @@ Prerequisites for this workflow: 4. datetime fields must have the same format. The script used for this process is located here: `nifi/user-scripts/cogstack_cohort_generate_data.py`. Please read all the info provided in the NiFi template. - diff --git a/docs/main.md b/docs/main.md index 9ab7119c7..ab98b59f8 100644 --- a/docs/main.md +++ b/docs/main.md @@ -1,2 +1,3 @@ ```{include} ../README.md +``` diff --git a/docs/nifi/main.md b/docs/nifi/main.md index 42b1ae1fc..879c05359 100644 --- a/docs/nifi/main.md +++ b/docs/nifi/main.md @@ -25,9 +25,9 @@ Avro Schema:[official documentation](https://avro.apache.org/docs/1.11.1/) ├── devel - custom folder that is mounted on the NiFi container where you may place your own scripts, again, read & write permissions required ├── drivers - drivers used for DB connections, currently PostgreSQL and MSSQL ├── nifi-app.log - log file mounted directly from the container for easy log checking -├── user-schemas - Avro schemas used within workflows, it can also contain other schemas used in specific custom processors -├── user-scripts - custom scripts used in workflows, you can put them here -└── user-templates - here we store the fully exported templates of the workflows within NiFi +├── user_schemas - Avro schemas used within workflows, it can also contain other schemas used in specific custom processors +├── user_scripts - custom scripts used in workflows, you can put them here +└── user_templates - here we store the fully exported templates of the workflows within NiFi ``` ## Custom Docker image @@ -85,14 +85,14 @@ nifi.flow.configuration.archive.max.time=1 days nifi.flow.configuration.archive.max.storage=12 GB ``` -By default, the flowfiles thar are out of the processing queues will be archived for a set period of time. The ```nifi.flow.configuration.archive.max.time``` sets the max duration, max size configurable via ```nifi.flow.configuration.archive.max.storage```, take note of these properties, the storage limit can quickly be hit if you have a high flow-file throughput. +By default, the flowfiles thar are out of the processing queues will be archived for a set period of time. The `nifi.flow.configuration.archive.max.time` sets the max duration, max size configurable via `nifi.flow.configuration.archive.max.storage`, take note of these properties, the storage limit can quickly be hit if you have a high flow-file throughput. Make sure to check the archive storage and flowfile storage settings as these will be the first to impact the space used for logging.

#### IMPORTANT NOTE about nifi properties -:::{admonition} IMPORTANT NOTE about `nifi.properties +:::{admonition} IMPORTANT NOTE about `nifi.properties` :class: warning For Linux users : This is a file that will get modified on runtime as when the container is up some of the properties within the file will get changed ( `nifi.cluster.node.address` for example). Some permission error's might pop out as the UID and GID of the folder permissions are different from that of the user within the container, which is using UID=1000 and GID=1000, declared in the `Dockerfile` and in `deploy/services.yml` under the `nifi` service section. To avoid permission issues, on the host container you will need to create a group with the GID 1000, assign the user that is running the docker command to the created group, and everything should work. ::: @@ -302,4 +302,4 @@ This section covers dealing with data type issues depending on DB types and/or o ### MySQL Issues have been found with MySQL: -- allows zero dates in DateTime fields -> solution: can be overcome in the URL connection string using parameters \ No newline at end of file +- allows zero dates in DateTime fields -> solution: can be overcome in the URL connection string using parameters diff --git a/docs/security/nifi.md b/docs/security/nifi.md index b5406bd7e..eb0aa63d5 100644 --- a/docs/security/nifi.md +++ b/docs/security/nifi.md @@ -41,7 +41,7 @@ Before starting the NIFI container it's important to take note of the following - **(OPTIONAL, DO NOT USE FOR NIFI VERSION >= 2.0)** the `nifi_toolkit_security.sh` script is used to download the nifi toolkit and generate new certificates and keys that are used by the container, take note that inside the `localhost` folder there is another nifi.properties file that is generated, we must look to the following setttings which are generated randomly and copy them to the `nifi/conf/nifi.properties` file. - the trust/store keys generated for production will be in the `nifi_certificates/localhost` folder and the `nifi-cert.pem` + `nifi-key.key` files. in the base `nifi_certificates` folder. -- as part of the security process the `nifi.sensitive.props.key` should be set to a random string or a password of minimum 12 characters. Once this is set do NOT modify it as all the other sensitive passwords will be hashed with this string. By default this is set to ```cogstackNiFipass``` +- as part of the security process the `nifi.sensitive.props.key` should be set to a random string or a password of minimum 12 characters. Once this is set do NOT modify it as all the other sensitive passwords will be hashed with this string. By default this is set to cogstackNiFipass Example (`nifi/conf/nifi.properties`): ```properties @@ -54,7 +54,7 @@ Example (`nifi/conf/nifi.properties`): ### Setting up access via user account (SINGLE USER CREDETIAL) -This is entirely optional, if you have configered the security certs as described in ```security/README.md``` then you are good to go. +This is entirely optional, if you have configered the security certs as described in security/README.md then you are good to go.
Default username :
diff --git a/nifi/Dockerfile b/nifi/Dockerfile index 746c57988..cd335892a 100644 --- a/nifi/Dockerfile +++ b/nifi/Dockerfile @@ -2,6 +2,8 @@ ARG NIFI_VERSION=2.7.2 FROM apache/nifi:${NIFI_VERSION} +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + ARG HTTP_PROXY="" ARG HTTPS_PROXY="" ARG no_proxy="" @@ -14,7 +16,7 @@ ARG GID=${NIFI_GID:-1000} ARG TZ="Europe/London" ARG NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY="/opt/nifi/nifi-current/python/framework" ARG NIFI_PYTHON_EXTENSIONS_SOURCE_DIRECTORY_DEFAULT="/opt/nifi/nifi-current/python_extensions" -ARG NIFI_PYTHON_WORKING_DIRECTORY="/opt/nifi/user-scripts" +ARG NIFI_PYTHON_WORKING_DIRECTORY="/opt/nifi/user_scripts" ENV TZ=${TZ} ENV NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY=${NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY} @@ -25,34 +27,65 @@ ENV NIFI_PYTHON_WORKING_DIRECTORY=${NIFI_PYTHON_WORKING_DIRECTORY} ENV PIP_PREFER_BINARY=1 ENV PIP_DISABLE_PIP_VERSION_CHECK=1 ENV PIP_NO_CACHE_DIR=1 +# Enables Python to generate .pyc files in the container +ENV PYTHONDONTWRITEBYTECODE=0 +# Turns off buffering for easier container logging +ENV PYTHONUNBUFFERED=1 # default env vars to prevent NiFi from running on HTTP ENV NIFI_WEB_HTTP_PORT="" ENV NIFI_WEB_HTTP_HOST="" -RUN echo "GID=${GID}" -RUN echo "UID=${UID}" - USER root -# run updates and install some base utility packages along with python support -RUN apt-get update && apt-get upgrade -y --no-install-recommends && apt-get install -y --no-install-recommends iputils-ping libssl-dev openssl apt-transport-https apt-utils curl software-properties-common wget git build-essential make cmake ca-certificates zip unzip tzdata jq - -RUN echo "deb http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources -RUN echo "deb http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources -RUN echo "deb http://deb.debian.org/debian/ bookworm-backports main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources -RUN echo "deb http://security.debian.org/debian-security/ bookworm-security main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources - -RUN echo "deb-src http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources -RUN echo "deb-src http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources -RUN echo "deb-src http://deb.debian.org/debian/ bookworm-backports main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources -RUN echo "deb-src http://security.debian.org/debian-security/ bookworm-security main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources - -# Microsoft repos -RUN wget -q -O- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/packages.microsoft.gpg -RUN echo "deb [arch=amd64,armhf,arm64] https://packages.microsoft.com/ubuntu/22.04/prod jammy main" | tee -a /etc/apt/sources.list - -RUN apt-get update && apt-get install --no-install-recommends -y ssl-cert libsqlite3-dev python3-dev python3-pip python3.11 python3.11-dev python3-venv sqlite3 postgresql-server-dev-all +# add repositories, install tooling, and clean up apt metadata in one layer +RUN set -eux; \ + apt-get update -y; \ + apt-get install -y --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + wget; \ + printf '%s\n' \ + "deb http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware" \ + "deb http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware" \ + "deb http://deb.debian.org/debian/ bookworm-backports main contrib non-free non-free-firmware" \ + "deb http://security.debian.org/debian-security/ bookworm-security main contrib non-free non-free-firmware" \ + "deb-src http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware" \ + "deb-src http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware" \ + "deb-src http://deb.debian.org/debian/ bookworm-backports main contrib non-free non-free-firmware" \ + "deb-src http://security.debian.org/debian-security/ bookworm-security main contrib non-free non-free-firmware" \ + > /etc/apt/sources.list.d/debian.list; \ + wget -q -O- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/packages.microsoft.gpg; \ + echo "deb [arch=amd64,armhf,arm64] https://packages.microsoft.com/ubuntu/22.04/prod jammy main" > /etc/apt/sources.list.d/microsoft.list; \ + apt-get update -y; \ + apt-get upgrade -y --no-install-recommends; \ + apt-get install -y --no-install-recommends \ + apt-utils \ + build-essential \ + cmake \ + git \ + iputils-ping \ + jq \ + libsqlite3-dev \ + libssl-dev \ + make \ + openssl \ + postgresql-server-dev-all \ + python3.11 \ + python3.11-dev \ + python3-dev \ + python3-pip \ + python3-venv \ + software-properties-common \ + sqlite3 \ + ssl-cert \ + tzdata \ + unzip \ + zip; \ + apt-get clean; \ + rm -rf /var/lib/apt/lists/* # bust cache ENV UV_VERSION=latest @@ -60,33 +93,31 @@ ENV UV_VERSION=latest # install rust, medcat requirement, install UV ENV HOME=/root ENV PATH="/root/.cargo/bin:${PATH}" +ENV UV_INSTALL_DIR=/usr/local/bin -RUN curl -sSf https://sh.rustup.rs -o /tmp/rustup-init.sh \ - && chmod +x /tmp/rustup-init.sh \ - && /tmp/rustup-init.sh -y \ - && rm /tmp/rustup-init.sh - -RUN curl -Ls https://astral.sh/uv/install.sh -o /tmp/install_uv.sh \ - && bash /tmp/install_uv.sh - -RUN UV_PATH=$(find / -name uv -type f | head -n1) && \ - ln -s "$UV_PATH" /usr/local/bin/uv +RUN set -eux; \ + curl -sSf https://sh.rustup.rs -o /tmp/rustup-init.sh; \ + chmod +x /tmp/rustup-init.sh; \ + /tmp/rustup-init.sh -y; \ + rm /tmp/rustup-init.sh -# clean up apt -RUN apt-get clean autoclean && apt-get autoremove --purge -y +RUN set -eux; \ + curl -Ls https://astral.sh/uv/install.sh -o /tmp/install_uv.sh; \ + bash /tmp/install_uv.sh; \ + rm /tmp/install_uv.sh ######################################## Python / PIP SECTION ######################################## RUN uv pip install --no-cache-dir --break-system-packages --system --upgrade pip setuptools wheel -# install util packages used in NiFi scripts (such as MedCAT, avro, nifyapi, etc.) +# install util packages used in NiFi scripts (such as avro, nifyapi, etc.) COPY ./requirements.txt ./requirements.txt -RUN uv pip install --no-cache-dir --break-system-packages --target=${NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY} -r "./requirements.txt" +RUN uv pip install --no-cache-dir --break-system-packages --target=${NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY} -r "./requirements.txt" --index-url https://pypi.org/simple ####################################################################################################### # solve groovy grape proxy issues, grape ignores the current environment's proxy settings -RUN export JAVA_OPTS="-Dhttp.proxyHost=$HTTP_PROXY -Dhttps.proxyHost=$HTTPS_PROXY -Dhttp.nonProxyHosts=$no_proxy" +ENV JAVA_OPTS="-Dhttp.proxyHost=${HTTP_PROXY} -Dhttps.proxyHost=${HTTPS_PROXY} -Dhttp.nonProxyHosts=${no_proxy}" # INSTALL NAR extensions WORKDIR /opt/nifi/nifi-current/lib/ diff --git a/nifi/user-schemas/avro/.keep b/nifi/__init__.py similarity index 100% rename from nifi/user-schemas/avro/.keep rename to nifi/__init__.py diff --git a/nifi/conf/nifi.properties b/nifi/conf/nifi.properties index 1363e7deb..6be2c3176 100644 --- a/nifi/conf/nifi.properties +++ b/nifi/conf/nifi.properties @@ -48,9 +48,9 @@ nifi.templates.enabled=true nifi.python.command=python3.11 nifi.python.framework.source.directory=/opt/nifi/nifi-current/python/framework nifi.python.extensions.source.directory.default=/opt/nifi/nifi-current/python_extensions -nifi.python.working.directory=/opt/nifi/user-scripts -nifi.python.logs.directory=./logs -nifi.python.max.processes.per.extension.type=10 +nifi.python.working.directory=/opt/nifi/user_scripts +nifi.python.logs.directory=./logs +nifi.python.max.processes.per.extension.type=10 nifi.python.max.processes=100 #################### @@ -362,4 +362,3 @@ nifi.diagnostics.on.shutdown.max.filecount=10 # The diagnostics folder's maximum permitted size in bytes. If the limit is exceeded, the oldest files are deleted. nifi.diagnostics.on.shutdown.max.directory.size=10 MB - diff --git a/nifi/nifi-registry/nifi-registry.properties b/nifi/nifi-registry/nifi-registry.properties index 802c3a987..40bff0249 100644 --- a/nifi/nifi-registry/nifi-registry.properties +++ b/nifi/nifi-registry/nifi-registry.properties @@ -33,11 +33,11 @@ nifi.registry.web.proxy.context.path=/nifi-registry nifi.registry.web.proxy.host=localhost:18443,nifi-registry:18443,nifi-registry-flow:18443,cogstack-nifi-registry-flow:18443,cogstack-nifi-registry:18443,nginx.local:18443 # security properties # -nifi.registry.security.keystore=/opt/nifi-registry/nifi-registry-current/conf/keystore.jks +nifi.registry.security.keystore=/security/certificates/nifi/nifi-keystore.jks nifi.registry.security.keystoreType=JKS nifi.registry.security.keystorePasswd=cogstackNifi nifi.registry.security.keyPasswd=cogstackNifi -nifi.registry.security.truststore=/opt/nifi-registry/nifi-registry-current/conf/truststore.jks +nifi.registry.security.truststore=/security/certificates/nifi/nifi-truststore.jks nifi.registry.security.truststoreType=JKS nifi.registry.security.truststorePasswd=cogstackNifi nifi.registry.security.needClientAuth=false @@ -124,4 +124,4 @@ nifi.registry.security.user.authorizer=managed-authorizer # revision management # # This feature should remain disabled until a future NiFi release that supports the revision API changes -nifi.registry.revisions.enabled=false \ No newline at end of file +nifi.registry.revisions.enabled=false diff --git a/nifi/requirements-dev.txt b/nifi/requirements-dev.txt new file mode 100644 index 000000000..5547e70bf --- /dev/null +++ b/nifi/requirements-dev.txt @@ -0,0 +1,7 @@ +ruff==0.12.12 +mypy==1.17.0 +mypy-extensions==1.1.0 +types-aiofiles==24.1.0.20250708 +types-PyYAML==6.0.12.20250516 +types-setuptools==80.9.0.20250529 +timeout-decorator==0.5.0 diff --git a/nifi/requirements.txt b/nifi/requirements.txt index 1632088d4..6f7f981f4 100644 --- a/nifi/requirements.txt +++ b/nifi/requirements.txt @@ -1,45 +1,32 @@ +wheel==0.45.1 + # data science pkgs -seaborn==0.13.2 -matplotlib==3.10.6 -graphviz==0.21 -plotly==6.3.0 -keras==3.12.0 nltk==3.9.1 -numpy>=1.26.0,<2.0.0 -pandas==1.5.3 -dill>=0.3.6,<1.0.0 -bokeh==3.8.0 -psycopg[c,binary]==3.2.9 -overrides==7.0.0 +numpy==2.3.5 +pandas==2.3.3 +pyarrow==22.0.0 # used in NiFi scripts: geolocation, avro conversion etc. py4j==0.10.9.9 rancoord==0.0.6 geocoder==1.38.1 -avro==1.12.0 +avro==1.12.1 nipyapi==1.1.0 py7zr==1.0.0 -ipyparallel==9.0.1 -cython==3.1.3 -tqdm==4.67.1 jsonpickle==4.1.1 -certifi==2025.8.3 -xlsxwriter==3.2.5 -mysql-connector-python==9.4.0 -pymssql==2.3.7 +xlsxwriter==3.2.9 +mysql-connector-python==9.5.0 +pymssql==2.3.9 +psycopg[c,binary]==3.2.9 +requests==2.32.5 +PyYAML==6.0.3 +pydantic==2.12.5 +overrides==7.0.0 # other utils xnat==0.7.2 # ElasticSearch/OpenSearch packages -opensearch-py==3.0.0 elasticsearch9==9.1.0 +opensearch-py==3.0.0 neo4j==5.28.2 - -# git utils -dvc==3.62.0 -GitPython==3.1.45 -PyYAML==6.0.2 - -# code utils -ruff==0.12.12 diff --git a/nifi/user-scripts/dto/nifi_api_config.py b/nifi/user-scripts/dto/nifi_api_config.py deleted file mode 100644 index 303bdd1b1..000000000 --- a/nifi/user-scripts/dto/nifi_api_config.py +++ /dev/null @@ -1,45 +0,0 @@ -import os - - -class NiFiAPIConfig: - NIFI_URL_SCHEME: str = "https" - NIFI_HOST: str = "localhost" - NIFI_PORT: int = 8443 - NIFI_REGISTRY_PORT: int = 18443 - - NIFI_USERNAME: str = os.environ.get("NIFI_SINGLE_USER_CREDENTIALS_USERNAME", "admin") - NIFI_PASSWORD: str = os.environ.get("NIFI_SINGLE_USER_CREDENTIALS_PASSWORD", "cogstackNiFi") - - ROOT_CERT_CA_PATH: str = os.path.abspath("../../../../security/certificates/root/root-ca.pem") - NIFI_CERT_PEM_PATH: str = os.path.abspath("../../../../security/certificates/nifi/nifi.pem") - NIFI_CERT_KEY_PATH: str = os.path.abspath("../../../../security/certificates/nifi/nifi.key") - - VERIFY_SSL: bool = True - - @property - def nifi_base_url(self) -> str: - """Full NiFi base URL, e.g. https://localhost:8443""" - return f"{self.NIFI_URL_SCHEME}://{self.NIFI_HOST}:{self.NIFI_PORT}" - - @property - def nifi_api_url(self) -> str: - """"NiFi REST API root, e.g. https://localhost:8443/nifi-api""" - return f"{self.nifi_base_url}/nifi-api" - - @property - def nifi_registry_base_url(self) -> str: - """"NiFi Registry REST API root, e.g. https://localhost:18443/nifi-registry""" - return f"{self.NIFI_URL_SCHEME}://{self.NIFI_HOST}:{self.NIFI_REGISTRY_PORT}/nifi-registry/" - - @property - def nifi_registry_api_url(self) -> str: - """"NiFi Registry REST API root, e.g. https://localhost:18443/nifi-registry/nifi-registry-api""" - return f"{self.NIFI_URL_SCHEME}://{self.NIFI_HOST}:{self.NIFI_REGISTRY_PORT}/nifi-registry-api" - - def auth_credentials(self) -> tuple[str, str]: - """Convenience for requests auth=(user, password).""" - return (self.NIFI_USERNAME, self.NIFI_PASSWORD) - - def get_nifi_ssl_certs(self) -> tuple[str, str]: - """Convenience for requests cert=(cert_path, key_path).""" - return (self.NIFI_CERT_PEM_PATH, self.NIFI_CERT_KEY_PATH) diff --git a/nifi/user-scripts/dto/pg_config.py b/nifi/user-scripts/dto/pg_config.py deleted file mode 100644 index 19f15d029..000000000 --- a/nifi/user-scripts/dto/pg_config.py +++ /dev/null @@ -1,10 +0,0 @@ -from pydantic import BaseModel, Field - - -class PGConfig(BaseModel): - host: str = Field(default="localhost") - port: int = Field(default=5432) - db: str = Field(default="samples_db") - user: str = Field(default="test") - password: str = Field(default="test") - timeout: int = Field(default=50) diff --git a/nifi/user-scripts/dto/service_health.py b/nifi/user-scripts/dto/service_health.py deleted file mode 100644 index 5f6455dbb..000000000 --- a/nifi/user-scripts/dto/service_health.py +++ /dev/null @@ -1,51 +0,0 @@ -from datetime import datetime -from typing import Literal - -from pydantic import BaseModel, Field - - -class ServiceHealth(BaseModel): - """ - Base health check model shared by all services. - """ - - service: str = Field(..., description="Service name, e.g. NiFi, PostgreSQL, OpenSearch/ElasticSearch, etc.") - status: Literal["healthy", "unhealthy", "degraded"] = Field( - ..., description="Current service status" - ) - message: str | None = Field(None, description="Optional status message") - timestamp: datetime = Field(default_factory=datetime.utcnow) - avg_processing_ms: float | None = Field(None) - service_info: str | None = Field(None) - connected: bool | None = Field(None) - - class Config: - extra = "ignore" - -class MLServiceHealth(ServiceHealth): - model_name: str | None = Field(None, description="Name of the ML model") - model_version: str | None = Field(None, description="Version of the ML model") - model_card: str | None = Field(None, description="URL or path to the model card") - -class NiFiHealth(ServiceHealth): - active_threads: int | None = Field(None, description="Number of active threads") - queued_bytes: int | None = Field(None, description="Total queued bytes") - queued_count: int | None = Field(None, description="Number of queued flowfiles") - -class ElasticsearchHealth(ServiceHealth): - cluster_status: str | None = Field(None, description="Cluster health status") - node_count: int | None = Field(None) - active_shards: int | None = Field(None) - -class PostgresHealth(ServiceHealth): - version: str | None = Field(None) - latency_ms: float | None = Field(None, description="Ping latency in milliseconds") - db_name: str | None = Field(None, description="Database name") - -class MedCATTrainerHealth(ServiceHealth): - """Health check model for MedCAT Trainer web service.""" - app_version: str | None = Field(None, description="MedCAT Trainer app version") - -class CogstackCohortHealth(ServiceHealth): - """Health check model for CogStack Cohort service.""" - pass diff --git a/nifi/user-scripts/logs/.gitignore b/nifi/user-scripts/logs/.gitignore deleted file mode 100644 index f59ec20aa..000000000 --- a/nifi/user-scripts/logs/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* \ No newline at end of file diff --git a/nifi/user-scripts/utils/helpers/nifi_api_client.py b/nifi/user-scripts/utils/helpers/nifi_api_client.py deleted file mode 100644 index 1c353d2c1..000000000 --- a/nifi/user-scripts/utils/helpers/nifi_api_client.py +++ /dev/null @@ -1,82 +0,0 @@ -from logging import Logger - -from dto.nifi_api_config import NiFiAPIConfig -from nipyapi import canvas, security -from nipyapi.nifi import ApiClient, ProcessGroupsApi -from nipyapi.nifi.configuration import Configuration as NiFiConfiguration -from nipyapi.nifi.models.process_group_entity import ProcessGroupEntity -from nipyapi.nifi.models.processor_entity import ProcessorEntity -from nipyapi.registry import ApiClient as RegistryApiClient -from nipyapi.registry import BucketsApi -from nipyapi.registry.configuration import Configuration as RegistryConfiguration -from utils.generic import get_logger - - -class NiFiRegistryClient: - def __init__(self, config: NiFiAPIConfig) -> None: - self.config = config or NiFiAPIConfig() - self.nipyapi_config = RegistryConfiguration() - self.nipyapi_config.host = self.config.nifi_registry_api_url - self.nipyapi_config.verify_ssl = self.config.VERIFY_SSL - self.nipyapi_config.cert_file = self.config.NIFI_CERT_PEM_PATH # type: ignore - self.nipyapi_config.key_file = self.config.NIFI_CERT_KEY_PATH # type: ignore - self.nipyapi_config.ssl_ca_cert = self.config.ROOT_CERT_CA_PATH # type: ignore - - self.logger: Logger = get_logger(self.__class__.__name__) - - self.api_client = RegistryApiClient(self.nipyapi_config.host) - self.buckets_api = BucketsApi(self.api_client) - - def list_buckets(self): - buckets = self.buckets_api.get_buckets() - for b in buckets: - self.logger.info("Bucket: %s (%s)", b.name, b.identifier) - return buckets - - -class NiFiClient: - def __init__(self, config: NiFiAPIConfig) -> None: - self.config = config or NiFiAPIConfig() - self.nipyapi_config = NiFiConfiguration() - self.nipyapi_config.host = self.config.nifi_api_url - self.nipyapi_config.verify_ssl = self.config.VERIFY_SSL - self.nipyapi_config.cert_file = self.config.NIFI_CERT_PEM_PATH # type: ignore - self.nipyapi_config.key_file = self.config.NIFI_CERT_KEY_PATH # type: ignore - self.nipyapi_config.ssl_ca_cert = self.config.ROOT_CERT_CA_PATH # type: ignore - - self.logger: Logger = get_logger(self.__class__.__name__) - - self.api_client = ApiClient(self.nipyapi_config) - self.process_group_api = ProcessGroupsApi(self.api_client) - - self._login() - - def _login(self) -> None: - security.service_login( - service='nifi', - username=self.config.NIFI_USERNAME, - password=self.config.NIFI_PASSWORD - ) - self.logger.info("✅ Logged in to NiFi") - - def get_root_process_group_id(self) -> str: - return canvas.get_root_pg_id() - - def get_process_group_by_name(self, process_group_name: str) -> None | list[object] | object: - return canvas.get_process_group(process_group_name, identifier_type="nam") - - def get_process_group_by_id(self, process_group_id: str) -> ProcessGroupEntity: - return canvas.get_process_group(process_group_id, identifier_type="id") - - def start_process_group(self, process_group_id: str) -> bool: - return canvas.schedule_process_group(process_group_id, True) - - def stop_process_group(self, process_group_id: str) -> bool: - return canvas.schedule_process_group(process_group_id, False) - - def get_child_process_groups_from_parent_id(self, parent_process_group_id: str) -> list[ProcessGroupEntity]: - parent_pg = canvas.get_process_group(parent_process_group_id, identifier_type="id") - return canvas.list_all_process_groups(parent_pg.id) - - def get_all_processors_in_process_group(self, process_group_id: str) -> list[ProcessorEntity]: - return canvas.list_all_processors(process_group_id) diff --git a/nifi/user-scripts/utils/helpers/service.py b/nifi/user-scripts/utils/helpers/service.py deleted file mode 100644 index 9d4b28080..000000000 --- a/nifi/user-scripts/utils/helpers/service.py +++ /dev/null @@ -1,33 +0,0 @@ -import sys -import time - -import psycopg2 -from psycopg2 import sql - -sys.path.append("../../dto/") - -from dto.pg_config import PGConfig - - -def check_postgres(cfg: PGConfig) -> tuple[bool, float | None, str | None]: - """Return (is_healthy, latency_ms, error_detail)""" - start = time.perf_counter() - try: - conn = psycopg2.connect( - host=cfg.host, - port=cfg.port, - dbname=cfg.db, - user=cfg.user, - password=cfg.password, - connect_timeout=cfg.timeout - ) - with conn.cursor() as cur: - cur.execute(sql.SQL("SELECT 1;")) - result = cur.fetchone() - conn.close() - if result != (1,): - return False, None, f"Unexpected result: {result}" - latency = (time.perf_counter() - start) * 1000 - return True, latency, None - except Exception as e: - return False, None, str(e) diff --git a/nifi/user-python-extensions/convert_avro_binary_field_to_base64.py b/nifi/user_python_extensions/convert_avro_binary_field_to_base64.py similarity index 99% rename from nifi/user-python-extensions/convert_avro_binary_field_to_base64.py rename to nifi/user_python_extensions/convert_avro_binary_field_to_base64.py index 0558dbbd4..99a80ec47 100644 --- a/nifi/user-python-extensions/convert_avro_binary_field_to_base64.py +++ b/nifi/user_python_extensions/convert_avro_binary_field_to_base64.py @@ -1,6 +1,6 @@ import sys -sys.path.insert(0, "/opt/nifi/user-scripts") +sys.path.insert(0, "/opt/nifi/user_scripts") import base64 import copy diff --git a/nifi/user-python-extensions/convert_json_record_schema.py b/nifi/user_python_extensions/convert_json_record_schema.py similarity index 99% rename from nifi/user-python-extensions/convert_json_record_schema.py rename to nifi/user_python_extensions/convert_json_record_schema.py index da8434acc..e3e41859c 100644 --- a/nifi/user-python-extensions/convert_json_record_schema.py +++ b/nifi/user_python_extensions/convert_json_record_schema.py @@ -1,6 +1,6 @@ import sys -sys.path.insert(0, "/opt/nifi/user-scripts") +sys.path.insert(0, "/opt/nifi/user_scripts") import json import traceback diff --git a/nifi/user-python-extensions/parse_service_response.py b/nifi/user_python_extensions/parse_service_response.py similarity index 99% rename from nifi/user-python-extensions/parse_service_response.py rename to nifi/user_python_extensions/parse_service_response.py index 7307b7273..6b3ca6fa4 100644 --- a/nifi/user-python-extensions/parse_service_response.py +++ b/nifi/user_python_extensions/parse_service_response.py @@ -1,6 +1,6 @@ import sys -sys.path.insert(0, "/opt/nifi/user-scripts") +sys.path.insert(0, "/opt/nifi/user_scripts") import json import traceback diff --git a/nifi/user-python-extensions/prepare_record_for_nlp.py b/nifi/user_python_extensions/prepare_record_for_nlp.py similarity index 99% rename from nifi/user-python-extensions/prepare_record_for_nlp.py rename to nifi/user_python_extensions/prepare_record_for_nlp.py index 0f36f7069..f700fc45c 100644 --- a/nifi/user-python-extensions/prepare_record_for_nlp.py +++ b/nifi/user_python_extensions/prepare_record_for_nlp.py @@ -1,6 +1,6 @@ import sys -sys.path.insert(0, "/opt/nifi/user-scripts") +sys.path.insert(0, "/opt/nifi/user_scripts") import io import json diff --git a/nifi/user-python-extensions/prepare_record_for_ocr.py b/nifi/user_python_extensions/prepare_record_for_ocr.py similarity index 99% rename from nifi/user-python-extensions/prepare_record_for_ocr.py rename to nifi/user_python_extensions/prepare_record_for_ocr.py index 0e29fa77d..3a3d513be 100644 --- a/nifi/user-python-extensions/prepare_record_for_ocr.py +++ b/nifi/user_python_extensions/prepare_record_for_ocr.py @@ -1,6 +1,6 @@ import sys -sys.path.insert(0, "/opt/nifi/user-scripts") +sys.path.insert(0, "/opt/nifi/user_scripts") import base64 import io diff --git a/nifi/user-python-extensions/record_add_geolocation.py b/nifi/user_python_extensions/record_add_geolocation.py similarity index 99% rename from nifi/user-python-extensions/record_add_geolocation.py rename to nifi/user_python_extensions/record_add_geolocation.py index b29218ec6..62ca06200 100644 --- a/nifi/user-python-extensions/record_add_geolocation.py +++ b/nifi/user_python_extensions/record_add_geolocation.py @@ -1,6 +1,6 @@ import sys -sys.path.insert(0, "/opt/nifi/user-scripts") +sys.path.insert(0, "/opt/nifi/user_scripts") import csv import json diff --git a/nifi/user-python-extensions/record_decompress_cerner_blob.py b/nifi/user_python_extensions/record_decompress_cerner_blob.py similarity index 99% rename from nifi/user-python-extensions/record_decompress_cerner_blob.py rename to nifi/user_python_extensions/record_decompress_cerner_blob.py index a7cb0afc5..ef5e257bf 100644 --- a/nifi/user-python-extensions/record_decompress_cerner_blob.py +++ b/nifi/user_python_extensions/record_decompress_cerner_blob.py @@ -1,6 +1,6 @@ import sys -sys.path.insert(0, "/opt/nifi/user-scripts") +sys.path.insert(0, "/opt/nifi/user_scripts") import base64 import json diff --git a/nifi/user-python-extensions/sample_processor.py b/nifi/user_python_extensions/sample_processor.py similarity index 99% rename from nifi/user-python-extensions/sample_processor.py rename to nifi/user_python_extensions/sample_processor.py index ddee7e475..da5ef3b74 100644 --- a/nifi/user-python-extensions/sample_processor.py +++ b/nifi/user_python_extensions/sample_processor.py @@ -1,7 +1,7 @@ import sys from typing import Any -sys.path.insert(0, "/opt/nifi/user-scripts") +sys.path.insert(0, "/opt/nifi/user_scripts") import io import json diff --git a/nifi/user-schemas/elasticsearch/indices/.keep b/nifi/user_schemas/avro/.keep similarity index 100% rename from nifi/user-schemas/elasticsearch/indices/.keep rename to nifi/user_schemas/avro/.keep diff --git a/nifi/user-schemas/elasticsearch/base_index_settings.json b/nifi/user_schemas/elasticsearch/base_index_settings.json similarity index 100% rename from nifi/user-schemas/elasticsearch/base_index_settings.json rename to nifi/user_schemas/elasticsearch/base_index_settings.json diff --git a/nifi/user-schemas/elasticsearch/templates/.keep b/nifi/user_schemas/elasticsearch/indices/.keep similarity index 100% rename from nifi/user-schemas/elasticsearch/templates/.keep rename to nifi/user_schemas/elasticsearch/indices/.keep diff --git a/nifi/user-schemas/json/.keep b/nifi/user_schemas/elasticsearch/templates/.keep similarity index 100% rename from nifi/user-schemas/json/.keep rename to nifi/user_schemas/elasticsearch/templates/.keep diff --git a/nifi/user-scripts/db/.gitignore b/nifi/user_schemas/json/.keep similarity index 100% rename from nifi/user-scripts/db/.gitignore rename to nifi/user_schemas/json/.keep diff --git a/nifi/user-schemas/legacy/annotation-medcat.avsc b/nifi/user_schemas/legacy/annotation-medcat.avsc similarity index 100% rename from nifi/user-schemas/legacy/annotation-medcat.avsc rename to nifi/user_schemas/legacy/annotation-medcat.avsc diff --git a/nifi/user-schemas/legacy/annotation_elasticsearch_index_mapping.json b/nifi/user_schemas/legacy/annotation_elasticsearch_index_mapping.json similarity index 100% rename from nifi/user-schemas/legacy/annotation_elasticsearch_index_mapping.json rename to nifi/user_schemas/legacy/annotation_elasticsearch_index_mapping.json diff --git a/nifi/user-schemas/legacy/cogstack_common_schema.avsc b/nifi/user_schemas/legacy/cogstack_common_schema.avsc similarity index 100% rename from nifi/user-schemas/legacy/cogstack_common_schema.avsc rename to nifi/user_schemas/legacy/cogstack_common_schema.avsc diff --git a/nifi/user-schemas/legacy/cogstack_common_schema_elasticsearch_index_mapping_template.json b/nifi/user_schemas/legacy/cogstack_common_schema_elasticsearch_index_mapping_template.json similarity index 100% rename from nifi/user-schemas/legacy/cogstack_common_schema_elasticsearch_index_mapping_template.json rename to nifi/user_schemas/legacy/cogstack_common_schema_elasticsearch_index_mapping_template.json diff --git a/nifi/user-schemas/legacy/cogstack_common_schema_full.avsc b/nifi/user_schemas/legacy/cogstack_common_schema_full.avsc similarity index 100% rename from nifi/user-schemas/legacy/cogstack_common_schema_full.avsc rename to nifi/user_schemas/legacy/cogstack_common_schema_full.avsc diff --git a/nifi/user-schemas/legacy/cogstack_common_schema_mapping.json b/nifi/user_schemas/legacy/cogstack_common_schema_mapping.json similarity index 100% rename from nifi/user-schemas/legacy/cogstack_common_schema_mapping.json rename to nifi/user_schemas/legacy/cogstack_common_schema_mapping.json diff --git a/nifi/user-schemas/legacy/document.avsc b/nifi/user_schemas/legacy/document.avsc similarity index 100% rename from nifi/user-schemas/legacy/document.avsc rename to nifi/user_schemas/legacy/document.avsc diff --git a/nifi/user-schemas/legacy/document_all_fields.avsc b/nifi/user_schemas/legacy/document_all_fields.avsc similarity index 100% rename from nifi/user-schemas/legacy/document_all_fields.avsc rename to nifi/user_schemas/legacy/document_all_fields.avsc diff --git a/nifi/user-scripts/logs/parse_json/.gitkeep b/nifi/user_scripts/__init__.py similarity index 100% rename from nifi/user-scripts/logs/parse_json/.gitkeep rename to nifi/user_scripts/__init__.py diff --git a/nifi/user-scripts/bootstrap_external_lib_imports.py b/nifi/user_scripts/bootstrap_external_lib_imports.py similarity index 80% rename from nifi/user-scripts/bootstrap_external_lib_imports.py rename to nifi/user_scripts/bootstrap_external_lib_imports.py index 830743b06..4978ba944 100644 --- a/nifi/user-scripts/bootstrap_external_lib_imports.py +++ b/nifi/user_scripts/bootstrap_external_lib_imports.py @@ -14,6 +14,6 @@ def running_in_docker() -> bool: # we need to add it to the sys imports if running_in_docker(): - sys.path.insert(0, "/opt/nifi/user-scripts") + sys.path.insert(0, "/opt/nifi/user_scripts") else: - sys.path.insert(0, "./user-scripts") + sys.path.insert(0, "./user_scripts") diff --git a/nifi/user-scripts/clean_doc.py b/nifi/user_scripts/clean_doc.py similarity index 100% rename from nifi/user-scripts/clean_doc.py rename to nifi/user_scripts/clean_doc.py diff --git a/nifi/user-scripts/cogstack_cohort_generate_data.py b/nifi/user_scripts/cogstack_cohort_generate_data.py similarity index 100% rename from nifi/user-scripts/cogstack_cohort_generate_data.py rename to nifi/user_scripts/cogstack_cohort_generate_data.py diff --git a/nifi/user-scripts/cogstack_cohort_generate_random_data.py b/nifi/user_scripts/cogstack_cohort_generate_random_data.py similarity index 100% rename from nifi/user-scripts/cogstack_cohort_generate_random_data.py rename to nifi/user_scripts/cogstack_cohort_generate_random_data.py diff --git a/nifi/user-scripts/tmp/.gitignore b/nifi/user_scripts/db/.gitignore similarity index 100% rename from nifi/user-scripts/tmp/.gitignore rename to nifi/user_scripts/db/.gitignore diff --git a/nifi/user_scripts/dto/__init__.py b/nifi/user_scripts/dto/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/nifi/user_scripts/dto/database_config.py b/nifi/user_scripts/dto/database_config.py new file mode 100644 index 000000000..0999ebb88 --- /dev/null +++ b/nifi/user_scripts/dto/database_config.py @@ -0,0 +1,31 @@ +from pathlib import Path +from typing import Any + +from pydantic import AliasChoices, Field, PositiveInt, SecretStr +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class DatabaseConfig(BaseSettings): + model_config = SettingsConfigDict( + env_prefix="DATABASE_", + env_file=[Path(__file__).resolve().parents[3] / "deploy" / "database.env", + Path(__file__).resolve().parents[3] / "security" / "env" / "users_database.env", + ], + extra="ignore", + env_ignore_empty=True, + populate_by_name=True + ) + + host: str = Field(default="localhost", validation_alias=AliasChoices("POSTGRES_HOST")) + port: int = Field(default=5432,validation_alias=AliasChoices("POSTGRES_PORT"), ge=1, le=65535) + + database_name : str = Field(default="db_samples", validation_alias=AliasChoices("DB", "DB_NAME")) + username: str = Field(default="test", validation_alias=AliasChoices("POSTGRES_USER_SAMPLES", "POSTGRES_USER")) + password: SecretStr = Field(default_factory=lambda: SecretStr("test"), + validation_alias=AliasChoices("POSTGRES_PASSWORD_SAMPLES", + "password", + "POSTGRES_PASSWORD")) + timeout: PositiveInt = Field(default=60, validation_alias=AliasChoices("TIMEOUT")) + + def get_field_values_kwargs(self) -> dict[str, Any]: + return self.model_dump() diff --git a/nifi/user_scripts/dto/elastic_config.py b/nifi/user_scripts/dto/elastic_config.py new file mode 100644 index 000000000..3ce08f0e2 --- /dev/null +++ b/nifi/user_scripts/dto/elastic_config.py @@ -0,0 +1,78 @@ +import json +from pathlib import Path +from typing import ClassVar + +from pydantic import AliasChoices, Field, SecretStr, field_validator +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class ElasticConfig(BaseSettings): + + ROOT_DIR: ClassVar = Path(__file__).resolve().parents[3] + CERT_ROOT_DIR: ClassVar = ROOT_DIR / "security" / "certificates" / "elastic" + + model_config = SettingsConfigDict( + env_prefix="ELASTICSEARCH_", + env_file=[ROOT_DIR / "deploy" / "elasticsearch.env", + ROOT_DIR / "security" / "env" / "users_elasticsearch.env", + ], + extra="ignore", + env_ignore_empty=True, + populate_by_name=True + ) + + elasticsearch_version: str = Field(default="opensearch", validation_alias=AliasChoices("VERSION")) + kibana_version: str = Field(default="opensearch-dashboards", validation_alias=AliasChoices("KIBANA_VERSION")) + + es_port_1: int = Field(default=9200, + validation_alias=AliasChoices("ELASTICSEARCH_NODE_1_OUTPUT_PORT"), ge=1, le=65535) + es_port_2: int = Field(default=9201, + validation_alias=AliasChoices("ELASTICSEARCH_NODE_2_OUTPUT_PORT"), ge=1, le=65535) + es_port_3: int = Field(default=9202, + validation_alias=AliasChoices("ELASTICSEARCH_NODE_3_OUTPUT_PORT"), ge=1, le=65535) + + kibana_host: str = Field(default="https://localhost:5601", + validation_alias=AliasChoices("KIBANA_HOST", "kibana_host")) + + kibana_port: int = Field(default=5601, + validation_alias=AliasChoices("KIBANA_SERVER_OUTPUT_PORT"), ge=1, le=65535) + + hosts: list[str] = Field(default_factory=list) + timeout: int = Field(default=60) + verify_ssl: bool = Field(default=False, validation_alias=AliasChoices("SSL_ENABLED", "ELASTICSEARCH_SSL_ENABLED")) + user: str = Field(default="admin", validation_alias=AliasChoices("ELASTIC_USER")) + password: SecretStr = Field(default_factory=lambda: SecretStr("admin"), + validation_alias=AliasChoices("ELASTIC_PASSWORD", + "password", + "ELASTICSEARCH_PASSWORD", + "OPENSEARCH_INITIAL_ADMIN_PASSWORD")) + + elastic_root_cert_ca_path: ClassVar = (CERT_ROOT_DIR / "opensearch" / "elastic-stack-ca.crt.pem").as_posix() + elastic_node_cert_key_path: ClassVar = (CERT_ROOT_DIR / "opensearch" / + "elasticsearch/elasticsearch-1/elasticsearch-1.key").as_posix() + elastic_node_cert_pem_path: ClassVar = (CERT_ROOT_DIR / "opensearch" / + "elasticsearch/elasticsearch-1/elasticsearch-1.crt").as_posix() + + kibana_client_cert_key_path: ClassVar = (CERT_ROOT_DIR / "opensearch" / "es_kibana_client.key").as_posix() + kibana_client_cert_pem_path: ClassVar = (CERT_ROOT_DIR / "opensearch" / "es_kibana_client.pem").as_posix() + + @field_validator("hosts", mode="before") + def parse_list(cls, v): + if isinstance(v, str): + return json.loads(v) + return v + + @property + def ports(self) -> list[int]: + return [self.es_port_1, self.es_port_2, self.es_port_3] + + def auth_credentials(self) -> tuple[str, str]: + """convenience for requests auth=(user, password).""" + return (self.user, self.password.get_secret_value()) + + def get_ssl_certs_paths(self) -> tuple[str, str]: + """convenience for requests cert=(cert_path, key_path).""" + return (self.elastic_node_cert_pem_path, self.elastic_node_cert_key_path) + + def get_kibana_ssl_certs_path(self) -> tuple[str, str]: + return (self.kibana_client_cert_pem_path, self.kibana_client_cert_key_path) \ No newline at end of file diff --git a/nifi/user_scripts/dto/nifi_api_config.py b/nifi/user_scripts/dto/nifi_api_config.py new file mode 100644 index 000000000..869444c87 --- /dev/null +++ b/nifi/user_scripts/dto/nifi_api_config.py @@ -0,0 +1,47 @@ +import os +from pathlib import Path + +CERTS_ROOT = Path(__file__).resolve().parents[3] / "security" / "certificates" + + +class NiFiAPIConfig: + + def __init__(self): + self.nifi_url_scheme = "https" + self.nifi_host = "localhost" + self.nifi_port = 8443 + self.nifi_registry_port = 18443 + self.nifi_username = os.environ.get("NIFI_SINGLE_USER_CREDENTIALS_USERNAME", "admin") + self.nifi_password = os.environ.get("NIFI_SINGLE_USER_CREDENTIALS_PASSWORD", "cogstackNiFi") + self.root_cert_ca_path = (CERTS_ROOT / "root" / "root-ca.pem").as_posix() + self.nifi_cert_pem_path = (CERTS_ROOT / "nifi" / "nifi.pem").as_posix() + self.nifi_cert_key_path = (CERTS_ROOT / "nifi" / "nifi.key").as_posix() + self.verify_ssl = True + + @property + def nifi_base_url(self) -> str: + """Full NiFi base URL, e.g. https://localhost:8443""" + return f"{self.nifi_url_scheme}://{self.nifi_host}:{self.nifi_port}" + + @property + def nifi_api_url(self) -> str: + """NiFi REST API root, e.g. https://localhost:8443/nifi-api""" + return f"{self.nifi_base_url}/nifi-api" + + @property + def nifi_registry_base_url(self) -> str: + """NiFi Registry REST API root, e.g. https://localhost:18443/nifi-registry/""" + return f"{self.nifi_url_scheme}://{self.nifi_host}:{self.nifi_registry_port}/nifi-registry/" + + @property + def nifi_registry_api_url(self) -> str: + """nifi registry rest api root, e.g. https://localhost:18443/nifi-registry/nifi-registry-api""" + return f"{self.nifi_url_scheme}://{self.nifi_host}:{self.nifi_registry_port}/nifi-registry-api/" + + def auth_credentials(self) -> tuple[str, str]: + """convenience for requests auth=(user, password).""" + return (self.nifi_username, self.nifi_password) + + def get_ssl_certs_paths(self) -> tuple[str, str]: + """convenience for requests cert=(cert_path, key_path).""" + return (self.nifi_cert_pem_path, self.nifi_cert_key_path) diff --git a/nifi/user_scripts/dto/service_health.py b/nifi/user_scripts/dto/service_health.py new file mode 100644 index 000000000..878467afc --- /dev/null +++ b/nifi/user_scripts/dto/service_health.py @@ -0,0 +1,51 @@ +from datetime import datetime +from typing import Literal + +from pydantic import BaseModel, Field + + +class ServiceHealth(BaseModel): + """ + Base health check model shared by all services. + """ + + service: str = Field(..., description="Service name, e.g. NiFi, PostgreSQL, OpenSearch/ElasticSearch, etc.") + status: Literal["healthy", "unhealthy", "degraded"] = Field(description="Current service status", + default="unhealthy") + + message: str | None = Field(default=None, description="Optional status message") + timestamp: datetime | None = Field(default_factory=datetime.now) + avg_processing_ms: float | None = Field(default=None) + service_info: str | None = Field(default=None) + connected: bool = Field(default=False) + latency_ms: float = Field(default=0.0, description="Ping latency in milliseconds") + + class Config: + extra = "ignore" + +class MLServiceHealth(ServiceHealth): + model_name: str | None = Field(None, description="Name of the ML model") + model_version: str | None = Field(None, description="Version of the ML model") + model_card: str | None = Field(None, description="URL or path to the model card") + +class NiFiHealth(ServiceHealth): + active_threads: int | None = Field(default=None, description="Number of active threads") + queued_bytes: int | None = Field(default=None, description="Total queued bytes") + queued_count: int | None = Field(default=None, description="Number of queued flowfiles") + +class ElasticHealth(ServiceHealth): + cluster_status: str | None = Field(default=None, description="Cluster health status") + node_count: int | None = Field(default=None) + active_shards: int | None = Field(default=None) + +class DatabaseHealth(ServiceHealth): + version: str | None = Field(default=None, description="Database version, e.g PgSQL 17, MSSQL 21, etc.") + db_name: str | None = Field(default=None, description="Database name") + +class MedCATTrainerHealth(ServiceHealth): + """Health check model for MedCAT Trainer web service.""" + app_version: str | None = Field(None, description="MedCAT Trainer app version") + +class CogstackCohortHealth(ServiceHealth): + """Health check model for CogStack Cohort service.""" + pass diff --git a/nifi/user_scripts/elastic_schema_converter.py b/nifi/user_scripts/elastic_schema_converter.py new file mode 100644 index 000000000..ea880673d --- /dev/null +++ b/nifi/user_scripts/elastic_schema_converter.py @@ -0,0 +1,64 @@ +import json +import re +import sys +import traceback +from collections import defaultdict +from logging import Logger + +logger: Logger = Logger(__name__) + +origin_index_mapping = json.loads(sys.stdin.read()) + +INPUT_INDEX_NAME = "" +OUTPUT_INDEX_NAME = "" +OUTPUT_FILE_NAME = "" +JSON_FIELD_MAPPER_SCHEMA_FILE_PATH = "" +TRANSFORM_KEYS_LOWER_CASE = False + +for arg in sys.argv: + _arg = arg.split("=", 1) + _arg[0] = _arg[0].lower() + if _arg[0] == "input_index_name": + INPUT_INDEX_NAME = _arg[1] + if _arg[0] == "output_index_name": + OUTPUT_INDEX_NAME = _arg[1] + if _arg[0] == "output_file_name": + OUTPUT_FILE_NAME = _arg[1] + if _arg[0] == "json_field_mapper_schema_file_path": + JSON_FIELD_MAPPER_SCHEMA_FILE_PATH = _arg[1] + +try: + json_field_mapper: dict = {} + with open(JSON_FIELD_MAPPER_SCHEMA_FILE_PATH) as file: + json_field_mapper = json.load(file) + + output_index_mapping: dict = {} + + origin_index_name = INPUT_INDEX_NAME if INPUT_INDEX_NAME else \ + origin_index_mapping[list(origin_index_mapping.keys())[0]] + + for origin_field_name, origin_field_es_properties in origin_index_mapping["mappings"]["properties"]: + pass + + for curr_field_name, curr_field_value in record.items(): + curr_field_name = str(curr_field_name).lower() + if curr_field_name in new_schema_field_names: + # check if the mapping is not a dict (nested field) + if isinstance(json_mapper_schema[curr_field_name], str): + new_record.update({json_mapper_schema[curr_field_name] : curr_field_value}) + elif isinstance(json_mapper_schema[curr_field_name], dict): + # nested field + new_record.update({curr_field_name: {}}) + for nested_field_name, nested_field_value in curr_field_value.items(): + if nested_field_name in json_mapper_schema[curr_field_name].keys(): + new_record[curr_field_name].update({ \ + json_mapper_schema[curr_field_name][nested_field_name]: nested_field_value}) + + + +except Exception as exception: + logger.error("Exception during flowfile processing: " + traceback.format_exc()) + raise exception + +# Output cleaned JSON as UTF-8 +sys.stdout.buffer.write(json.dumps(output_index_mapping, ensure_ascii=False).encode("utf-8")) diff --git a/nifi/user-scripts/generate_location.py b/nifi/user_scripts/generate_location.py similarity index 85% rename from nifi/user-scripts/generate_location.py rename to nifi/user_scripts/generate_location.py index 5d88f9af7..4eb9426f7 100644 --- a/nifi/user-scripts/generate_location.py +++ b/nifi/user_scripts/generate_location.py @@ -37,21 +37,21 @@ def poly_creator(city: str): def main(): input_stream = sys.stdin.read() + log_file_path = os.path.join(NIFI_USER_SCRIPT_LOGS_DIR, str(LOG_FILE_NAME)) + output_stream = [] try: - log_file_path = os.path.join(NIFI_USER_SCRIPT_LOGS_DIR, str(LOG_FILE_NAME)) patients = json.loads(input_stream) locations = [poly_creator(location) for location in LOCATIONS.split(",")] - - output_stream = [] for patient in patients: to_append = {} id = patient["_source"][SUBJECT_ID_FIELD_NAME] - idx = randrange(len(locations)) # pick a random location specified - lat, lon, _ = rc.coordinates_randomizer(polygon = locations[idx], num_locations = 1) # generate latitude and longitude - + # pick a random location specified + idx = randrange(len(locations)) + # generate latitude and longitude + lat, lon, _ = rc.coordinates_randomizer(polygon = locations[idx], num_locations = 1) to_append[SUBJECT_ID_FIELD_NAME] = id to_append[LOCATION_NAME_FIELD] = "POINT (" + str(lon[0]) + " " + str(lat[0]) + ")" output_stream.append(to_append) @@ -62,8 +62,8 @@ def main(): else: with open(log_file_path, "a+") as log_file: log_file.write("\n" + str(traceback.print_exc())) - finally: - return output_stream + return output_stream sys.stdout.write(json.dumps(main())) + diff --git a/nifi/user-scripts/get_files_from_storage.py b/nifi/user_scripts/get_files_from_storage.py similarity index 98% rename from nifi/user-scripts/get_files_from_storage.py rename to nifi/user_scripts/get_files_from_storage.py index f1aefbbb2..2dc9e1de7 100644 --- a/nifi/user-scripts/get_files_from_storage.py +++ b/nifi/user_scripts/get_files_from_storage.py @@ -170,7 +170,8 @@ def get_files_and_metadata(): if generate_pseudo_doc_id is not False: _file_id_dict["document_Pseudo_Id"] = str(uuid.uuid4().hex) - txt_file_df = pandas.concat([txt_file_df, pandas.DataFrame.from_dict([_file_id_dict], orient="columns")]) + txt_file_df = pandas.concat([txt_file_df, + pandas.DataFrame.from_dict([_file_id_dict], orient="columns")]) folders_ingested[root].append(file_id) else: diff --git a/nifi/user_scripts/legacy_scripts/__init__.py b/nifi/user_scripts/legacy_scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/nifi/user-scripts/legacy_scripts/annotation_creator.py b/nifi/user_scripts/legacy_scripts/annotation_creator.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/annotation_creator.py rename to nifi/user_scripts/legacy_scripts/annotation_creator.py diff --git a/nifi/user-scripts/legacy_scripts/annotation_manager.py b/nifi/user_scripts/legacy_scripts/annotation_manager.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/annotation_manager.py rename to nifi/user_scripts/legacy_scripts/annotation_manager.py diff --git a/nifi/user-scripts/legacy_scripts/annotation_manager_docs.py b/nifi/user_scripts/legacy_scripts/annotation_manager_docs.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/annotation_manager_docs.py rename to nifi/user_scripts/legacy_scripts/annotation_manager_docs.py diff --git a/nifi/user-scripts/legacy_scripts/anonymise_doc.py b/nifi/user_scripts/legacy_scripts/anonymise_doc.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/anonymise_doc.py rename to nifi/user_scripts/legacy_scripts/anonymise_doc.py diff --git a/nifi/user-scripts/legacy_scripts/flowfile_to_attribute_with_content.py b/nifi/user_scripts/legacy_scripts/flowfile_to_attribute_with_content.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/flowfile_to_attribute_with_content.py rename to nifi/user_scripts/legacy_scripts/flowfile_to_attribute_with_content.py diff --git a/nifi/user-scripts/legacy_scripts/ingest_into_es.py b/nifi/user_scripts/legacy_scripts/ingest_into_es.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/ingest_into_es.py rename to nifi/user_scripts/legacy_scripts/ingest_into_es.py diff --git a/nifi/user-scripts/legacy_scripts/parse-anns-from-nlp-response-bulk.py b/nifi/user_scripts/legacy_scripts/parse-anns-from-nlp-response-bulk.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/parse-anns-from-nlp-response-bulk.py rename to nifi/user_scripts/legacy_scripts/parse-anns-from-nlp-response-bulk.py diff --git a/nifi/user-scripts/legacy_scripts/parse-es-db-result-for-nlp-request-bulk.py b/nifi/user_scripts/legacy_scripts/parse-es-db-result-for-nlp-request-bulk.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/parse-es-db-result-for-nlp-request-bulk.py rename to nifi/user_scripts/legacy_scripts/parse-es-db-result-for-nlp-request-bulk.py diff --git a/nifi/user-scripts/legacy_scripts/parse-json-to-avro.py b/nifi/user_scripts/legacy_scripts/parse-json-to-avro.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/parse-json-to-avro.py rename to nifi/user_scripts/legacy_scripts/parse-json-to-avro.py diff --git a/nifi/user-scripts/legacy_scripts/parse-tika-result-json-to-avro.py b/nifi/user_scripts/legacy_scripts/parse-tika-result-json-to-avro.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/parse-tika-result-json-to-avro.py rename to nifi/user_scripts/legacy_scripts/parse-tika-result-json-to-avro.py diff --git a/nifi/user-scripts/legacy_scripts/prepare-db-record-for-tika-request-single.py b/nifi/user_scripts/legacy_scripts/prepare-db-record-for-tika-request-single.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/prepare-db-record-for-tika-request-single.py rename to nifi/user_scripts/legacy_scripts/prepare-db-record-for-tika-request-single.py diff --git a/nifi/user-scripts/legacy_scripts/prepare-file-for-tika-request-single-keep-db-fields.py b/nifi/user_scripts/legacy_scripts/prepare-file-for-tika-request-single-keep-db-fields.py similarity index 100% rename from nifi/user-scripts/legacy_scripts/prepare-file-for-tika-request-single-keep-db-fields.py rename to nifi/user_scripts/legacy_scripts/prepare-file-for-tika-request-single-keep-db-fields.py diff --git a/nifi/user-scripts/tests/generate_big_ann_file.py b/nifi/user_scripts/tests/generate_big_ann_file.py similarity index 100% rename from nifi/user-scripts/tests/generate_big_ann_file.py rename to nifi/user_scripts/tests/generate_big_ann_file.py diff --git a/nifi/user-scripts/tests/generate_files.py b/nifi/user_scripts/tests/generate_files.py similarity index 100% rename from nifi/user-scripts/tests/generate_files.py rename to nifi/user_scripts/tests/generate_files.py diff --git a/nifi/user-scripts/tests/get_ingested_files.py b/nifi/user_scripts/tests/get_ingested_files.py similarity index 100% rename from nifi/user-scripts/tests/get_ingested_files.py rename to nifi/user_scripts/tests/get_ingested_files.py diff --git a/nifi/user_scripts/tests/nifi/test_nifi.py b/nifi/user_scripts/tests/nifi/test_nifi.py new file mode 100644 index 000000000..0242088a0 --- /dev/null +++ b/nifi/user_scripts/tests/nifi/test_nifi.py @@ -0,0 +1,149 @@ +import json +import os +import sys +import tempfile +import unittest +from pathlib import Path + +sys.path.insert(0, "../../") + +import requests +from dto.nifi_api_config import NiFiAPIConfig +from dto.database_config import DatabaseConfig +from dto.service_health import NiFiHealth, DatabaseHealth +from nipyapi import config as nipy_config +from nipyapi import security, versioning +from utils.helpers.nifi_api_client import NiFiClient, NiFiRegistryClient +from utils.helpers.service import check_postgres + + +class TestServices(unittest.TestCase): + """Service connectivity and health checks.""" + + @classmethod + def setUpClass(cls): + + + # cls.pg_cfg = PGConfig() + # cls.nifi_api_config = NiFiAPIConfig() + # cls.nifi_client = NiFiClient(config=cls.nifi_api_config, login_on_init=False) + # cls.nifi_registry_client = NiFiRegistryClient(config=cls.nifi_api_config) + # cls.pg_config = PGConfig() + # cls.registry_bucket_name = os.environ.get("NIFI_REGISTRY_BUCKET", "cogstack") + # cls.flow_name = "opensearch_ingest_docs_db_to_es" + # cls.template_path = ( + # Path(__file__).resolve().parents[4] + # / "nifi" + # / "user-templates" + # / f"{cls.flow_name}.json" + # ) + # cls.es_hosts = os.environ.get("OPENSEARCH_URLS", "http://localhost:9200") + # cls.es_username = os.environ.get("OPENSEARCH_USERNAME", "admin") + # cls.es_password = os.environ.get("OPENSEARCH_PASSWORD", "admin") + + #@classmethod + #def _configure_nipyapi(cls) -> None: + # """Apply SSL + host config so nipyapi uses the same creds as the raw client.""" + # nipy_config.nifi_config.host = cls.nifi_api_config.nifi_api_url + # nipy_config.registry_config.host = cls.nifi_api_config.nifi_registry_api_url + # + # for cfg in (nipy_config.nifi_config, nipy_config.registry_config): + # cfg.verify_ssl = cls.nifi_api_config.VERIFY_SSL + # cfg.cert_file = cls.nifi_api_config.NIFI_CERT_PEM_PATH + # cfg.key_file = cls.nifi_api_config.NIFI_CERT_KEY_PATH + # cfg.ssl_ca_cert = cls.nifi_api_config.ROOT_CERT_CA_PATH + # + #def _prepare_snapshot_with_env_defaults(self) -> Path: + # """ + # Load the opensearch template and pre-fill controller service properties + # using env/default configs so the flow can start without manual clicks. + # """ + # with self.template_path.open() as fp: + # snapshot = json.load(fp) + # + # db_url = f"jdbc:postgresql://{self.pg_cfg.host}:{self.pg_cfg.port}/{self.pg_cfg.db}" + # + # for controller_service in snapshot.get("flowContents", {}).get("controllerServices", []): + # name = controller_service.get("name") + # properties = controller_service.setdefault("properties", {}) + # + # if name == "DBCPConnectionPool": + # properties["Database Connection URL"] = db_url + # properties["Database User"] = self.pg_cfg.user + # properties["Password"] = self.pg_cfg.password + # elif name == "ElasticSearchClientServiceImpl": + # properties["el-cs-http-hosts"] = self.es_hosts + # properties["el-cs-username"] = self.es_username + # properties["el-cs-password"] = self.es_password + # + # fd, tmp_path = tempfile.mkstemp(suffix=".json", prefix="nifi-template-") + # with os.fdopen(fd, "w") as tmp_file: + # json.dump(snapshot, tmp_file) + # + # return Path(tmp_path) + # + #def test_nifi_health(self) -> None: + # result = self.nifi_client._login() + # self.assertTrue(result) + # + #def test_nifi_registry_health(self) -> None: + # result = requests.head( + # url=self.nifi_api_config.nifi_registry_base_url, + # auth=self.nifi_api_config.auth_credentials(), + # cert=self.nifi_api_config.get_nifi_ssl_certs_paths(), + # verify=self.nifi_api_config.ROOT_CERT_CA_PATH, + # timeout=15, + # ) + # self.assertEqual(result.status_code, 200) + # + #def test_postgres_health(self): + # result, latency, err = check_postgres(self.pg_config) + # self.assertTrue(result, f"PostgreSQL unhealthy: {err}") + # print(f"✅ PostgreSQL OK, latency {latency:.2f} ms") + # + #def test_import_opensearch_template_and_configure_controller_services(self) -> None: + # """ + # Bring the opensearch template into the local NiFi Registry bucket and + # patch the controller services so they use local PG/ES credentials. + # """ + # self.assertTrue(self.nifi_client._login()) + # self._configure_nipyapi() + # + # security.service_login( + # service="registry", + # username=self.nifi_api_config.NIFI_USERNAME, + # password=self.nifi_api_config.NIFI_PASSWORD, + # ) + # + # bucket = versioning.get_bucket(self.registry_bucket_name) + # if bucket is None: + # bucket = versioning.create_bucket( + # bucket_name=self.registry_bucket_name, + # bucket_desc="Auto-created for test imports", + # ) + # + # flow = versioning.get_flow_in_bucket( + # bucket_id=bucket.identifier, + # identifier=self.flow_name, + # identifier_type="name", + # ) + # if flow is None: + # flow = versioning.create_flow( + # bucket_id=bucket.identifier, + # flow_name=self.flow_name, + # desc="Auto-imported from user-templates", + # ) + # + # snapshot_path = self._prepare_snapshot_with_env_defaults() + # + # try: + # snapshot = versioning.import_flow_version( + # bucket_id=bucket.identifier, + # flow_id=flow.identifier, + # file_path=str(snapshot_path), + # ) + # finally: + # snapshot_path.unlink(missing_ok=True) + # + # self.assertIsNotNone(snapshot) + # \ No newline at end of file diff --git a/nifi/user_scripts/tests/nifi/test_opensearch_ingest.py b/nifi/user_scripts/tests/nifi/test_opensearch_ingest.py new file mode 100644 index 000000000..a26238800 --- /dev/null +++ b/nifi/user_scripts/tests/nifi/test_opensearch_ingest.py @@ -0,0 +1,25 @@ +import unittest +from io import BytesIO + + +class DummyFlowFile: + def __init__(self, content: str): + self._data = BytesIO(content.encode()) + + def read(self): + return self._data.getvalue() + + def write(self, data): + self._data = BytesIO(data) + return self + +class TestMyProcessor(unittest.TestCase): + def test_uppercase(self): + proc = Proccc() + ff_in = DummyFlowFile("hello nifi") + ff_out = proc.transform({}, ff_in) + + self.assertEqual(ff_out.read().decode(), "HELLO NIFI") + +if __name__ == "__main__": + unittest.main() diff --git a/nifi/user_scripts/tests/nifi/test_service_health.py b/nifi/user_scripts/tests/nifi/test_service_health.py new file mode 100644 index 000000000..0c993587a --- /dev/null +++ b/nifi/user_scripts/tests/nifi/test_service_health.py @@ -0,0 +1,54 @@ +import unittest + +from pydantic import SecretStr + +from nifi.user_scripts.dto.database_config import DatabaseConfig +from nifi.user_scripts.dto.elastic_config import ElasticConfig +from nifi.user_scripts.dto.nifi_api_config import NiFiAPIConfig +from nifi.user_scripts.dto.service_health import DatabaseHealth, ElasticHealth, NiFiHealth +from nifi.user_scripts.utils.generic import get_logger +from nifi.user_scripts.utils.helpers.nifi_api_client import NiFiClient, NiFiRegistryClient +from nifi.user_scripts.utils.helpers.service import check_elasticsearch, check_kibana, check_postgres + + +class TestServices(unittest.TestCase): + """Service connectivity and health checks.""" + + logger = get_logger(__name__) + + @classmethod + def setUpClass(cls): + cls.nifi_api_config: NiFiAPIConfig = NiFiAPIConfig() + cls.nifi_client: NiFiClient = NiFiClient(config=cls.nifi_api_config, healh_check_on_init=False) + cls.nifi_registry_client: NiFiRegistryClient = NiFiRegistryClient(config=cls.nifi_api_config) + cls.pg_config: DatabaseConfig = DatabaseConfig(port=5554) + cls.elastic_config: ElasticConfig = ElasticConfig(user="admin", + hosts=["https://localhost:9200"], + password=SecretStr("admin"), + kibana_host="https://localhost:5601", + kibana_version="opensearch-dashboards") + + def test_nifi_health(self) -> None: + health: NiFiHealth = self.nifi_client.health_check() + self.assertTrue(health.connected) + self.assertEqual(health.status, "healthy") + + def test_nifi_registry_health(self) -> None: + nifi_health: NiFiHealth = self.nifi_registry_client.health_check() + self.assertTrue(nifi_health.connected) + self.assertEqual(nifi_health.status, "healthy") + + def test_postgres_health(self): + database_health: DatabaseHealth = check_postgres(self.pg_config) + self.assertTrue(database_health.connected) + self.assertEqual(database_health.status, "healthy") + + def test_elastic_health(self): + elastic_health: ElasticHealth = check_elasticsearch(self.elastic_config) + self.assertTrue(elastic_health.connected) + self.assertEqual(elastic_health.status, "healthy") + + def test_kibana_health(self): + elastic_health: ElasticHealth = check_kibana(self.elastic_config) + self.assertTrue(elastic_health.connected) + self.assertEqual(elastic_health.status, "healthy") \ No newline at end of file diff --git a/nifi/user-scripts/tests/test_files/ex1.pdf b/nifi/user_scripts/tests/resources/ex1.pdf old mode 100755 new mode 100644 similarity index 100% rename from nifi/user-scripts/tests/test_files/ex1.pdf rename to nifi/user_scripts/tests/resources/ex1.pdf diff --git a/nifi/user_scripts/tests/test_avro.py b/nifi/user_scripts/tests/test_avro.py new file mode 100644 index 000000000..7f893747e --- /dev/null +++ b/nifi/user_scripts/tests/test_avro.py @@ -0,0 +1,61 @@ +import io +import json + +import avro +from avro.datafile import DataFileWriter +from avro.io import DatumWriter + +""" + Use this script to test avro schemas etc with python3 +""" + +stream = object() + +json_mapper_schema = json.loads(open("../user-schemas/cogstack_common_schema_mapping.json").read()) +avro_cogstack_schema = avro.schema.parse(open("../user-schemas/cogstack_common_schema_full.avsc", "rb").read(), validate_enum_symbols=False) + +test_records = [{ "docid" : "1", + "sampleid" : 1041, + "dct" : "2020-05-11 10:52:25.273518", + "binarydoc": "blablabla" }, + { "docid" : "1", + "sampleid" : 1041, + "dct" : "2020-05-11 10:52:25.273518", + "binarydoc": "blablabla" }] + +schema_fields = avro_cogstack_schema.props["fields"] +dict_fields_types = {} +for field in schema_fields: + dict_fields_types[field.name] = "" + tmp_list = json.loads(str(field.type)) + if len(tmp_list) > 1 and type(tmp_list) is not str: + if type(tmp_list[1]) is dict: + dict_fields_types[field.name] = tmp_list[1]["type"] + else: + dict_fields_types[field.name] = tmp_list[1] + else: + dict_fields_types[field.name] = field.type + +available_mapping_keys = {} +for k,v in json_mapper_schema.items(): + if v: + available_mapping_keys[k] = v + +bytes_io = io.BytesIO(bytes("", encoding="UTF-8")) + +type_mapping = {"boolean": "bool", "long": "int", "int": "int", "float" : "float", "byte":"bytes", "string": "str", "double": "float"} + + +print(avro_cogstack_schema) + +with DataFileWriter(bytes_io, DatumWriter(), avro_cogstack_schema) as writer: + # re-map the value to the new keys + + for _record in test_records: + record = {} + + for k, v in available_mapping_keys.items(): + if v in _record.keys(): + record[k] = _record[v] #getattr(__builtins__, type_mapping[dict_fields_types[k]])(_record[v]) + + writer.append(record) diff --git a/nifi/user_scripts/tmp/.gitignore b/nifi/user_scripts/tmp/.gitignore new file mode 100644 index 000000000..e69de29bb diff --git a/nifi/user_scripts/utils/__init__.py b/nifi/user_scripts/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/nifi/user-scripts/utils/cerner_blob.py b/nifi/user_scripts/utils/cerner_blob.py similarity index 100% rename from nifi/user-scripts/utils/cerner_blob.py rename to nifi/user_scripts/utils/cerner_blob.py diff --git a/nifi/user-scripts/utils/ethnicity_map.py b/nifi/user_scripts/utils/ethnicity_map.py similarity index 100% rename from nifi/user-scripts/utils/ethnicity_map.py rename to nifi/user_scripts/utils/ethnicity_map.py diff --git a/nifi/user-scripts/utils/generic.py b/nifi/user_scripts/utils/generic.py similarity index 98% rename from nifi/user-scripts/utils/generic.py rename to nifi/user_scripts/utils/generic.py index 5d5be2ead..963f3773e 100644 --- a/nifi/user-scripts/utils/generic.py +++ b/nifi/user_scripts/utils/generic.py @@ -79,7 +79,7 @@ def dict2jsonl_file(input_dict: dict | defaultdict, file_path: str) -> None: print('', file=outfile) -def get_logger(name: str) -> logging.Logger: +def get_logger(name: str, propagate: bool = False) -> logging.Logger: """Return a configured logger shared across all NiFi clients.""" level_name = os.getenv("NIFI_LOG_LEVEL", "INFO").upper() level = getattr(logging, level_name, logging.INFO) @@ -94,7 +94,7 @@ def get_logger(name: str) -> logging.Logger: handler.setFormatter(fmt) logger.addHandler(handler) logger.setLevel(level) - logger.propagate = False + logger.propagate = propagate return logger def download_file_from_url(url: str, output_path: str, ssl_verify: bool = False, chunk_size: int = 8192) -> None: diff --git a/nifi/user-scripts/utils/helpers/avro_json_encoder.py b/nifi/user_scripts/utils/helpers/avro_json_encoder.py similarity index 100% rename from nifi/user-scripts/utils/helpers/avro_json_encoder.py rename to nifi/user_scripts/utils/helpers/avro_json_encoder.py diff --git a/nifi/user-scripts/utils/helpers/base_nifi_processor.py b/nifi/user_scripts/utils/helpers/base_nifi_processor.py similarity index 100% rename from nifi/user-scripts/utils/helpers/base_nifi_processor.py rename to nifi/user_scripts/utils/helpers/base_nifi_processor.py diff --git a/nifi/user_scripts/utils/helpers/nifi_api_client.py b/nifi/user_scripts/utils/helpers/nifi_api_client.py new file mode 100644 index 000000000..c8127b511 --- /dev/null +++ b/nifi/user_scripts/utils/helpers/nifi_api_client.py @@ -0,0 +1,142 @@ +import time +from logging import Logger + +import requests +from nipyapi import canvas, security +from nipyapi.nifi import ApiClient, ProcessGroupsApi +from nipyapi.nifi.configuration import Configuration as NiFiConfiguration +from nipyapi.nifi.models.process_group_entity import ProcessGroupEntity +from nipyapi.nifi.models.processor_entity import ProcessorEntity +from nipyapi.registry import ApiClient as RegistryApiClient +from nipyapi.registry import BucketsApi +from nipyapi.registry.configuration import Configuration as RegistryConfiguration + +from nifi.user_scripts.dto.nifi_api_config import NiFiAPIConfig +from nifi.user_scripts.dto.service_health import NiFiHealth +from nifi.user_scripts.utils.generic import get_logger + + +class NiFiRegistryClient: + + def __init__(self, config: NiFiAPIConfig, healh_check_on_init: bool = True) -> None: + self.config = config or NiFiAPIConfig() + self.nipyapi_config = RegistryConfiguration() + self.nipyapi_config.host = self.config.nifi_registry_api_url + self.nipyapi_config.verify_ssl = self.config.verify_ssl + self.nipyapi_config.cert_file = self.config.nifi_cert_pem_path # type: ignore + self.nipyapi_config.key_file = self.config.nifi_cert_key_path # type: ignore + self.nipyapi_config.ssl_ca_cert = self.config.root_cert_ca_path # type: ignore + + self.logger: Logger = get_logger(self.__class__.__name__) + + self.api_client = RegistryApiClient(self.nipyapi_config.host) + self.buckets_api = BucketsApi(self.api_client) + + def list_buckets(self): + buckets = self.buckets_api.get_buckets() + for b in buckets: + self.logger.info("Bucket: %s (%s)", b.name, b.identifier) + return buckets + + def health_check(self, timeout: int = 15) -> NiFiHealth: + start = time.perf_counter() + nifi_health = NiFiHealth( + service="nifi-registry", + service_info=self.config.nifi_registry_base_url + ) + + try: + response = requests.head( + url=self.config.nifi_registry_base_url, + auth=self.config.auth_credentials(), + cert=self.config.get_ssl_certs_paths(), + verify=self.config.root_cert_ca_path, + timeout=timeout + ) + + nifi_health.latency_ms = (time.perf_counter() - start) * 1000 + nifi_health.connected = response.ok + + if response.status_code == 200: + nifi_health.status = "healthy" + self.logger.info(f"✅ Logged in to NiFi Registry, latency {nifi_health.latency_ms:.2f} ms") + else: + nifi_health.message = f"❌ Unexpected status code {response.status_code}" + + except Exception as exc: + nifi_health.message = str(exc) + self.logger.info("❌ Failed to log in to NiFi: %s", exc) + + return nifi_health + + +class NiFiClient: + + def __init__(self, config: NiFiAPIConfig, healh_check_on_init: bool = True) -> None: + self.config = config or NiFiAPIConfig() + self.nipyapi_config = NiFiConfiguration() + self.nipyapi_config.host = self.config.nifi_api_url + self.nipyapi_config.verify_ssl = self.config.verify_ssl + self.nipyapi_config.cert_file = self.config.nifi_cert_pem_path # type: ignore + self.nipyapi_config.key_file = self.config.nifi_cert_key_path # type: ignore + self.nipyapi_config.ssl_ca_cert = self.config.root_cert_ca_path # type: ignore + + self.logger: Logger = get_logger(self.__class__.__name__) + + self.api_client = ApiClient(self.nipyapi_config) + self.process_group_api = ProcessGroupsApi(self.api_client) + + if healh_check_on_init: + self.health_check() + + def health_check(self) -> NiFiHealth: + start = time.perf_counter() + nifi_health = NiFiHealth( + service="nifi", + service_info=self.config.nifi_api_url + ) + + try: + result = security.service_login( + service='nifi', + username=self.config.nifi_username, + password=self.config.nifi_password + ) + + nifi_health.connected = bool(result) + nifi_health.latency_ms = (time.perf_counter() - start) * 1000 + + if result: + nifi_health.status = "healthy" + self.logger.info(f"✅ Logged in to NiFi, latency {nifi_health.latency_ms:.2f} ms") + else: + nifi_health.message = "Authentication returned False" + self.logger.info("❌ Failed to log in to NiFi") + + except Exception as exc: + nifi_health.message = str(exc) + self.logger.info("❌ Failed to log in to NiFi: %s", exc) + + return nifi_health + + def get_root_process_group_id(self) -> str: + return canvas.get_root_pg_id() + + def get_process_group_by_name(self, process_group_name: str) -> None | list[object] | object: + return canvas.get_process_group(process_group_name, identifier_type="nam") + + def get_process_group_by_id(self, process_group_id: str) -> ProcessGroupEntity: + return canvas.get_process_group(process_group_id, identifier_type="id") + + def start_process_group(self, process_group_id: str) -> bool: + return canvas.schedule_process_group(process_group_id, True) + + def stop_process_group(self, process_group_id: str) -> bool: + return canvas.schedule_process_group(process_group_id, False) + + def get_child_process_groups_from_parent_id(self, parent_process_group_id: str) -> list[ProcessGroupEntity]: + parent_pg = canvas.get_process_group(parent_process_group_id, identifier_type="id") + return canvas.list_all_process_groups(parent_pg.id) + + def get_all_processors_in_process_group(self, process_group_id: str) -> list[ProcessorEntity]: + return canvas.list_all_processors(process_group_id) diff --git a/nifi/user_scripts/utils/helpers/service.py b/nifi/user_scripts/utils/helpers/service.py new file mode 100644 index 000000000..512f8034b --- /dev/null +++ b/nifi/user_scripts/utils/helpers/service.py @@ -0,0 +1,113 @@ +import time + +import psycopg +import requests +from opensearchpy import OpenSearch + +from nifi.user_scripts.dto.database_config import DatabaseConfig +from nifi.user_scripts.dto.elastic_config import ElasticConfig +from nifi.user_scripts.dto.service_health import DatabaseHealth, ElasticHealth +from nifi.user_scripts.utils.generic import get_logger + +logger = get_logger(__name__) + +def check_kibana(config: ElasticConfig) -> ElasticHealth: + + elastic_health: ElasticHealth = ElasticHealth(service=config.kibana_version) + start = time.perf_counter() + + try: + if config.kibana_version == "kibana": + raise NotImplementedError + + response = requests.get(config.kibana_host + "/api/status", + auth=config.auth_credentials(), + timeout=config.timeout, + cert=config.get_kibana_ssl_certs_path(), + verify=config.elastic_root_cert_ca_path + ) + + elastic_health.latency_ms = (time.perf_counter() - start) * 1000 + elastic_health.connected = response.ok + + if response.status_code == 200: + elastic_health.status = "healthy" + logger.info(f"✅ {config.kibana_version} OK, latency {elastic_health.latency_ms:.2f} ms") + else: + elastic_health.message = f"❌ Failed to query {config.kibana_version}" + + except Exception as e: + elastic_health.message = str(e) + logger.error(f"❌ Failed to query {config.kibana_version}: %s", str(e)) + + return elastic_health + +def check_elasticsearch(config: ElasticConfig) -> ElasticHealth: + + elastic_health: ElasticHealth = ElasticHealth(service=config.elasticsearch_version) + start = time.perf_counter() + + try: + elastic_connection = OpenSearch(hosts=config.hosts, + use_ssl=config.verify_ssl, + verify_certs=False, + http_auth=config.auth_credentials(), + ssl_show_warn=False, + ssl_assert_hostname=False, + ca_cert=config.elastic_root_cert_ca_path, + client_cert=config.elastic_node_cert_pem_path, + client_key=config.elastic_node_cert_key_path + ) + + if config.elasticsearch_version == "elasticsearch": + raise NotImplementedError + + if elastic_connection.ping(): + elastic_health.connected = True + elastic_health.status = "healthy" + elastic_health.service_info = elastic_connection.nodes.info() + elastic_health.latency_ms = (time.perf_counter() - start) * 1000 + logger.info(f"✅ {config.elasticsearch_version} OK, latency {elastic_health.latency_ms:.2f} ms") + else: + elastic_health.message = f"❌ Failed to query {config.elasticsearch_version}" + except Exception as e: + elastic_health.message = str(e) + logger.error(f"❌ Failed to query {config.elasticsearch_version}: %s", str(e)) + + return elastic_health + +def check_postgres(config: DatabaseConfig) -> DatabaseHealth: + + start = time.perf_counter() + database_health = DatabaseHealth(service="cogstack-samples-db", + db_name=config.database_name, + version=None + ) + + try: + with psycopg.connect( + host=config.host, + port=config.port, + user=config.username, + password=config.password.get_secret_value(), + dbname=config.database_name, + connect_timeout=config.timeout, + ) as connection, connection.cursor() as cursor: + cursor.execute("SELECT version();") + result = cursor.fetchone() + + if result and result[0]: + database_health.version = result[0] + database_health.status = "healthy" + database_health.connected = True + database_health.latency_ms = (time.perf_counter() - start) * 1000 + logger.info(f"✅ PostgreSQL OK, latency {database_health.latency_ms:.2f} ms") + else: + database_health.message = "No version returned from database" + database_health.status = "unhealthy" + database_health.connected = True + + except Exception as e: + database_health.message = str(e) + logger.info("❌ Failed to query PostgreSQLi: %s", str(e)) + return database_health diff --git a/nifi/user_scripts/utils/lint_env.py b/nifi/user_scripts/utils/lint_env.py new file mode 100644 index 000000000..8918e9152 --- /dev/null +++ b/nifi/user_scripts/utils/lint_env.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +""" + Lightweight env file validator used by deploy/export_env_vars.sh. +""" + +from __future__ import annotations + +import sys +from collections.abc import Iterable +from pathlib import Path + +PORT_SUFFIXES = ("_PORT", "_OUTPUT_PORT", "_INPUT_PORT") +BOOL_SUFFIXES = ("_ENABLED", "_SSL_ENABLED", "_BAKE") +BOOL_VALUES = {"true", "false", "1", "0", "yes", "no", "on", "off"} + + +def strip_quotes(value: str) -> str: + if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")): + return value[1:-1] + return value + + +def parse_env_file(path: Path) -> tuple[list[str], list[str], list[tuple[str, str, int]]]: + errors: list[str] = [] + warnings: list[str] = [] + entries: list[tuple[str, str, int]] = [] + + for lineno, raw_line in enumerate(path.read_text().splitlines(), start=1): + line = raw_line.strip() + if not line or line.startswith("#"): + continue + + if line.startswith("export "): + line = line[len("export ") :].strip() + + if "=" not in line: + errors.append(f"{path}:{lineno}: missing '=' (got: {raw_line})") + continue + + key, value = line.split("=", 1) + key = key.strip() + value = value.strip() + + if not key: + errors.append(f"{path}:{lineno}: empty key (got: {raw_line})") + continue + + entries.append((key, value, lineno)) + + seen = {} + for key, _, lineno in entries: + if key in seen: + warnings.append(f"{path}:{lineno}: duplicate key '{key}' (also on line {seen[key]})") + else: + seen[key] = lineno + + return errors, warnings, entries + + +def validate_entries(path: Path, entries: Iterable[tuple[str, str, int]]) -> list[str]: + errors: list[str] = [] + + for key, value, lineno in entries: + normalized = strip_quotes(value) + + if any(key.endswith(suffix) for suffix in PORT_SUFFIXES): + if not normalized.isdigit(): + errors.append(f"{path}:{lineno}: '{key}' should be an integer port (got '{value}')") + + if any(key.endswith(suffix) for suffix in BOOL_SUFFIXES): + if normalized.lower() not in BOOL_VALUES: + errors.append( + f"{path}:{lineno}: '{key}' should be one of {sorted(BOOL_VALUES)} (got '{value}')" + ) + + return errors + + +def main(args: list[str]) -> int: + if not args: + script = Path(__file__).name + print(f"Usage: {script} [ ...]") + return 1 + + warnings: list[str] = [] + errors: list[str] = [] + checked_files = 0 + + for path_str in args: + path = Path(path_str).resolve() + if not path.exists(): + warnings.append(f"Skipping missing env file: {path}") + continue + + checked_files += 1 + parse_errors, parse_warnings, entries = parse_env_file(path) + errors.extend(parse_errors) + warnings.extend(parse_warnings) + errors.extend(validate_entries(path, entries)) + + for warning in warnings: + print(f"⚠️ {warning}") + + if errors: + print("❌ Env validation failed:") + for err in errors: + print(f" - {err}") + return 1 + + print(f"✅ Env validation passed ({checked_files} files checked)") + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/nifi/user-scripts/utils/pgsql_query.py b/nifi/user_scripts/utils/pgsql_query.py similarity index 71% rename from nifi/user-scripts/utils/pgsql_query.py rename to nifi/user_scripts/utils/pgsql_query.py index 12e32d96e..33c639f2f 100644 --- a/nifi/user-scripts/utils/pgsql_query.py +++ b/nifi/user_scripts/utils/pgsql_query.py @@ -1,6 +1,6 @@ -import psycopg2 +import psycopg -conn = psycopg2.connect( +conn = psycopg.connect( host="localhost", database="suppliers", user="YourUsername", diff --git a/nifi/user-scripts/utils/sqlite_query.py b/nifi/user_scripts/utils/sqlite_query.py similarity index 100% rename from nifi/user-scripts/utils/sqlite_query.py rename to nifi/user_scripts/utils/sqlite_query.py diff --git a/nifi/user-templates/dt4h/annotate_dt4h_ann_manager.xml b/nifi/user_templates/dt4h/annotate_dt4h_ann_manager.xml similarity index 100% rename from nifi/user-templates/dt4h/annotate_dt4h_ann_manager.xml rename to nifi/user_templates/dt4h/annotate_dt4h_ann_manager.xml diff --git a/nifi/user-templates/dt4h/raw_ingest_dt4h.xml b/nifi/user_templates/dt4h/raw_ingest_dt4h.xml similarity index 100% rename from nifi/user-templates/dt4h/raw_ingest_dt4h.xml rename to nifi/user_templates/dt4h/raw_ingest_dt4h.xml diff --git a/nifi/user-templates/legacy/CogStack_Cohort_create_source_docs.xml b/nifi/user_templates/legacy/CogStack_Cohort_create_source_docs.xml similarity index 100% rename from nifi/user-templates/legacy/CogStack_Cohort_create_source_docs.xml rename to nifi/user_templates/legacy/CogStack_Cohort_create_source_docs.xml diff --git a/nifi/user-templates/legacy/Common_schema_example_ingest.xml b/nifi/user_templates/legacy/Common_schema_example_ingest.xml similarity index 100% rename from nifi/user-templates/legacy/Common_schema_example_ingest.xml rename to nifi/user_templates/legacy/Common_schema_example_ingest.xml diff --git a/nifi/user-templates/legacy/DEID_sample_pipeline.xml b/nifi/user_templates/legacy/DEID_sample_pipeline.xml similarity index 100% rename from nifi/user-templates/legacy/DEID_sample_pipeline.xml rename to nifi/user_templates/legacy/DEID_sample_pipeline.xml diff --git a/nifi/user-templates/legacy/Generate_location_ES.xml b/nifi/user_templates/legacy/Generate_location_ES.xml similarity index 100% rename from nifi/user-templates/legacy/Generate_location_ES.xml rename to nifi/user_templates/legacy/Generate_location_ES.xml diff --git a/nifi/user-templates/legacy/Grab_non_annotated_docs.xml b/nifi/user_templates/legacy/Grab_non_annotated_docs.xml similarity index 100% rename from nifi/user-templates/legacy/Grab_non_annotated_docs.xml rename to nifi/user_templates/legacy/Grab_non_annotated_docs.xml diff --git a/nifi/user-templates/legacy/HealTAC_23.xml b/nifi/user_templates/legacy/HealTAC_23.xml similarity index 100% rename from nifi/user-templates/legacy/HealTAC_23.xml rename to nifi/user_templates/legacy/HealTAC_23.xml diff --git a/nifi/user-templates/legacy/OS_annotate_per_doc.xml b/nifi/user_templates/legacy/OS_annotate_per_doc.xml similarity index 100% rename from nifi/user-templates/legacy/OS_annotate_per_doc.xml rename to nifi/user_templates/legacy/OS_annotate_per_doc.xml diff --git a/nifi/user-templates/legacy/OpenSearch_Ingest_DB_OCR_service_to_ES.xml b/nifi/user_templates/legacy/OpenSearch_Ingest_DB_OCR_service_to_ES.xml similarity index 100% rename from nifi/user-templates/legacy/OpenSearch_Ingest_DB_OCR_service_to_ES.xml rename to nifi/user_templates/legacy/OpenSearch_Ingest_DB_OCR_service_to_ES.xml diff --git a/nifi/user-templates/legacy/OpenSearch_ingest_annotate_DB_MedCATService_to_ES.xml b/nifi/user_templates/legacy/OpenSearch_ingest_annotate_DB_MedCATService_to_ES.xml similarity index 100% rename from nifi/user-templates/legacy/OpenSearch_ingest_annotate_DB_MedCATService_to_ES.xml rename to nifi/user_templates/legacy/OpenSearch_ingest_annotate_DB_MedCATService_to_ES.xml diff --git a/nifi/user-templates/legacy/OpenSearch_ingest_annotate_DB_to_ES_and_DB_ann_manager.xml b/nifi/user_templates/legacy/OpenSearch_ingest_annotate_DB_to_ES_and_DB_ann_manager.xml similarity index 100% rename from nifi/user-templates/legacy/OpenSearch_ingest_annotate_DB_to_ES_and_DB_ann_manager.xml rename to nifi/user_templates/legacy/OpenSearch_ingest_annotate_DB_to_ES_and_DB_ann_manager.xml diff --git a/nifi/user-templates/legacy/OpenSearch_ingest_annotate_ES_MedCATService_to_ES.xml b/nifi/user_templates/legacy/OpenSearch_ingest_annotate_ES_MedCATService_to_ES.xml similarity index 100% rename from nifi/user-templates/legacy/OpenSearch_ingest_annotate_ES_MedCATService_to_ES.xml rename to nifi/user_templates/legacy/OpenSearch_ingest_annotate_ES_MedCATService_to_ES.xml diff --git a/nifi/user-templates/legacy/OpenSearch_ingest_docs_DB_to_ES.xml b/nifi/user_templates/legacy/OpenSearch_ingest_docs_DB_to_ES.xml similarity index 100% rename from nifi/user-templates/legacy/OpenSearch_ingest_docs_DB_to_ES.xml rename to nifi/user_templates/legacy/OpenSearch_ingest_docs_DB_to_ES.xml diff --git a/nifi/user-templates/legacy/Raw_file_read_from_disk_ocr_custom.xml b/nifi/user_templates/legacy/Raw_file_read_from_disk_ocr_custom.xml similarity index 100% rename from nifi/user-templates/legacy/Raw_file_read_from_disk_ocr_custom.xml rename to nifi/user_templates/legacy/Raw_file_read_from_disk_ocr_custom.xml diff --git a/nifi/user-templates/opensearch_docs_ingest_annotations_to_es.json b/nifi/user_templates/opensearch_docs_ingest_annotations_to_es.json similarity index 100% rename from nifi/user-templates/opensearch_docs_ingest_annotations_to_es.json rename to nifi/user_templates/opensearch_docs_ingest_annotations_to_es.json diff --git a/nifi/user-templates/opensearch_ingest_docs_db_ocr_service_to_es.json b/nifi/user_templates/opensearch_ingest_docs_db_ocr_service_to_es.json similarity index 100% rename from nifi/user-templates/opensearch_ingest_docs_db_ocr_service_to_es.json rename to nifi/user_templates/opensearch_ingest_docs_db_ocr_service_to_es.json diff --git a/nifi/user-templates/opensearch_ingest_docs_db_to_es.json b/nifi/user_templates/opensearch_ingest_docs_db_to_es.json similarity index 100% rename from nifi/user-templates/opensearch_ingest_docs_db_to_es.json rename to nifi/user_templates/opensearch_ingest_docs_db_to_es.json diff --git a/pyproject.toml b/pyproject.toml index 6fae60f28..2b563d437 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.ruff] line-length = 120 -exclude = ["nifi/user-scripts/legacy_scripts", "services"] +exclude = ["nifi/user_scripts/legacy_scripts", "services"] target-version = "py311" indent-width = 4 @@ -25,15 +25,30 @@ fixable = ["ALL"] [tool.mypy] plugins = ["pydantic.mypy"] +python_version = "3.11" ignore_missing_imports = true strict = false files = "." mypy_path = "./typings/" +warn_unused_configs = true -[tool.isort] -line_length = 120 -skip = ["venv", "venv-test", "envs", "docker", "models"] +[tool.setuptools.packages.find] +include = ["nifi*"] +exclude = [ + "*egg-info*", + "build*", + "dist*", + "nifi/conf*", + "nifi/drivers*", + "nifi/user_schemas*", + "nifi/user_templates*", +] + +[project] +name = "cogstack_nifi" +version = "0.0.1" +requires-python = ">=3.11" -[tool.flake8] -max-line-length = 120 -exclude = ["venv", "venv-test", "envs", "docker", "models"] +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" diff --git a/security/env/users_database.env b/security/env/users_database.env index 9da92b17c..b2140eccf 100644 --- a/security/env/users_database.env +++ b/security/env/users_database.env @@ -3,8 +3,8 @@ POSTGRES_USER_SAMPLES=test POSTGRES_PASSWORD_SAMPLES=test # Production DATABASE user -POSTGRES_USER=admin -POSTGRES_PASSWORD=admin +DATABASE_USER=admin +DATABASE_PASSWORD=admin # Production DATABASE MSSQL user MSSQL_SA_USER=sa diff --git a/security/env/users_elasticsearch.env b/security/env/users_elasticsearch.env index 918334a3f..3ba87ca1a 100644 --- a/security/env/users_elasticsearch.env +++ b/security/env/users_elasticsearch.env @@ -42,4 +42,3 @@ ES_LOGSTASH_PASS=kibanaserver ES_KIBANARO_PASS=kibanaserver ES_READALL_PASS=kibanaserver ES_SNAPSHOTRESTORE_PASS=kibanaserver - diff --git a/security/scripts/create_opensearch_client_admin_certs.sh b/security/scripts/create_opensearch_client_admin_certs.sh index 7a8528808..1d864ab8e 100644 --- a/security/scripts/create_opensearch_client_admin_certs.sh +++ b/security/scripts/create_opensearch_client_admin_certs.sh @@ -49,7 +49,7 @@ echo "========================================================================== CA_ROOT_CERT="${ROOT_CERTIFICATES_FOLDER}"$ROOT_CERTIFICATE_NAME".pem" CA_ROOT_KEY="${ROOT_CERTIFICATES_FOLDER}"$ROOT_CERTIFICATE_NAME".key" -EXT_FILE= "${SECURITY_TEMPLATES_FOLDER}ssl-extensions-x509.cnf" +EXT_FILE="${SECURITY_TEMPLATES_FOLDER}ssl-extensions-x509.cnf" # === Client cert === echo "Generating a key for: $ES_CLIENT_CERT_NAME"