diff --git a/.gitattributes b/.gitattributes
index 8f4aec0b7..8e5c1acf0 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,7 +1,7 @@
###############################
# Git Line Endings #
# Set default behaviour to automatically normalize line endings.
-* text eolf=lf
+* text eol=lf
# Force batch scripts to always use CRLF line endings so that if a repo is accessed
# in Windows via a file share from Linux, the scripts will work.
*.{cmd,[cC][mM][dD]} text eol=crlf
diff --git a/.github/workflows/_docker-template.yml b/.github/workflows/_docker-template.yml
index 7d186bddc..c41244868 100644
--- a/.github/workflows/_docker-template.yml
+++ b/.github/workflows/_docker-template.yml
@@ -114,7 +114,7 @@ jobs:
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
- name: Docker metadata (again)
- id: meta
+ id: meta_manifest
uses: docker/metadata-action@v5
with:
images: ${{ inputs.image }}
@@ -141,4 +141,4 @@ jobs:
--tag "${img}:${tag}" \
"${img}:${tag}-amd64" \
"${img}:${tag}-arm64"
- done < <(printf "%s" "${{ steps.meta.outputs.tags }}")
+ done < <(printf "%s" "${{ steps.meta_manifest.outputs.tags }}")
diff --git a/.gitignore b/.gitignore
index 564452318..bae75c1c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,9 +5,18 @@
.vscode
.venv
.ruff_cache
+.mypy_cache
venv
**__pycache__
**/venv
+*.pyc
+.pyc
+build
+
+# Ignore setuptools metadata
+*.egg-info/
+*.egg-info
+**/*.egg-info/
# keys and certificates
*.pem
@@ -37,6 +46,7 @@ security/templates/**
docs/build/*
# Ignore all .env files at any level
+.env
*.env
**/*.env
!*.env.template
diff --git a/deploy/database.env b/deploy/database.env
index d4ad0d7fc..478e13c66 100644
--- a/deploy/database.env
+++ b/deploy/database.env
@@ -1,9 +1,13 @@
-# production db name
-POSTGRES_DATABANK_DB=cogstack
-
POSTGRES_DB_MAX_CONNECTIONS=100
# Prefix of file names to load the DB schema for in /services/cogstack-db/(pgsql/mssql)/schemas/ folder
POSTGRES_DB_SCHEMA_PREFIX="cogstack_db"
-POSTGRES_SHM_SIZE="1g"
+# production db name
+DATABASE_DB_NAME=cogstack
+
+DATABASE_DOCKER_SHM_SIZE=1g
+
+DATABASE_DOCKER_CPU_MIN=1
+DATABASE_DOCKER_CPU_MAX=1
+DATABASE_DOCKER_RAM=1g
diff --git a/deploy/elasticsearch.env b/deploy/elasticsearch.env
index b084dba38..387409be5 100644
--- a/deploy/elasticsearch.env
+++ b/deploy/elasticsearch.env
@@ -9,10 +9,10 @@ ELASTICSEARCH_VERSION=opensearch
# possible values :
# - elasticsearch : docker.elastic.co/elasticsearch/elasticsearch:8.18.2
# - elasticsearch (custom cogstack image) : cogstacksystems/cogstack-elasticsearch:latest
-# - opensearch : opensearchproject/opensearch:3.2.0
+# - opensearch : opensearchproject/opensearch:3.3.0
# the custom cogstack image is always based on the last image of ES native
-ELASTICSEARCH_DOCKER_IMAGE=opensearchproject/opensearch:3.2.0
+ELASTICSEARCH_DOCKER_IMAGE=opensearchproject/opensearch:3.3.0
ELASTICSEARCH_LOG_LEVEL=INFO
@@ -88,9 +88,14 @@ ELASTICSEARCH_BACKUP_PARTITION_CONFIG=../data/es_snapshot_backups/config_backup
ELASTICSEARCH_SECURITY_DIR=../security/certificates/elastic/
# MEMORY CONFIG
-ELASTICSEARCH_JAVA_OPTS="-Xms2048m -Xmx2048m -Des.failure_store_feature_flag_enabled=true"
+ELASTICSEARCH_JAVA_OPTS="-Xms512m -Xmx512m -Des.failure_store_feature_flag_enabled=true"
+
+ELASTICSEARCH_DOCKER_CPU_MIN=1
+ELASTICSEARCH_DOCKER_CPU_MAX=1
+ELASTICSEARCH_DOCKER_RAM=1g
+
+ELASTICSEARCH_DOCKER_SHM_SIZE=512m
-ELASTICSEARCH_SHM_SIZE="1g"
ELASTICSEARCH_DOCKER_LOG_SIZE_PER_FILE="1000m"
ELASTICSEARCH_DOCKER_LOG_NUM_FILES=10
@@ -140,9 +145,6 @@ ELASTICSEARCH_HOSTS='["https://elasticsearch-1:9200","https://elasticsearch-2:92
KIBANA_HOST="https://kibana:5601"
-KIBANA_SERVER_NAME="cogstack-kibana"
-
-
########################################################################## KIBANA Env vars ###########################################################################
# NOTE: some variables from the Elasticsearch section are used
# - ${ELASTICSEARCH_VERSION} is used for certificate paths, as well as kibana.yml config path.
@@ -158,15 +160,15 @@ KIBANA_VERSION=opensearch-dashboards
# - kibana
# - opensearch_dashboards # make note of the underscore...
-KIBANA_CONFIG_FILE_VERSION=opensearch_dashboards
+KIBANA_CONFIG_FILE_VERSION=opensearch_dashboards
# possible values:
# - elasticsearch : docker.elastic.co/kibana/kibana:8.18.2
# - elasticsearch (custom cogstack image) : cogstacksystems/cogstack-kibana:latest
-# - opensearch : opensearchproject/opensearch-dashboards:3.2.0
+# - opensearch : opensearchproject/opensearch-dashboards:3.3.0
# the custom cogstack image is always based on the last image of ES native
-ELASTICSEARCH_KIBANA_DOCKER_IMAGE=opensearchproject/opensearch-dashboards:3.2.0
+ELASTICSEARCH_KIBANA_DOCKER_IMAGE=opensearchproject/opensearch-dashboards:3.3.0
KIBANA_SERVER_NAME="cogstack-kibana"
KIBANA_PUBLIC_BASE_URL="https://elasticsearch-1:5601"
@@ -174,7 +176,11 @@ KIBANA_PUBLIC_BASE_URL="https://elasticsearch-1:5601"
KIBANA_SERVER_HOST="0.0.0.0"
KIBANA_SERVER_OUTPUT_PORT=5601
-KIBANA_SHM_SIZE="1g"
+KIBANA_DOCKER_SHM_SIZE=512m
+KIBANA_DOCKER_CPU_MIN=1
+KIBANA_DOCKER_CPU_MAX=1
+KIBANA_DOCKER_RAM=1g
+
# this is used in Kibana
# it needs to be generated via the API
@@ -201,6 +207,10 @@ ELASTICSEARCH_XPACK_SECURITY_REPORTING_ENCRYPTION_KEY="e0Y1gTxHWOopIWMTtpjQsDS6K
METRICBEAT_IMAGE="docker.elastic.co/beats/metricbeat:8.18.2"
+METRICBEAT_DOCKER_SHM=512m
+METRICBEAT_DOCKER_CPU_MIN=1
+METRICBEAT_DOCKER_CPU_MAX=1
+METRICBEAT_DOCKER_RAM=1g
########################################################################## FILEBEAT Env vars ###########################################################################
@@ -213,3 +223,9 @@ FILEBEAT_STARTUP_COMMAND="-e --strict.perms=false"
FILEBEAT_HOST="https://elasticsearch-1:9200"
FILEBEAT_IMAGE="docker.elastic.co/beats/filebeat:8.18.2"
+
+
+FILEBEAT_DOCKER_SHM=512m
+FILEBEAT_DOCKER_CPU_MIN=1
+FILEBEAT_DOCKER_CPU_MAX=1
+FILEBEAT_DOCKER_RAM=1g
diff --git a/deploy/export_env_vars.sh b/deploy/export_env_vars.sh
index ea8266095..58b446543 100755
--- a/deploy/export_env_vars.sh
+++ b/deploy/export_env_vars.sh
@@ -3,12 +3,15 @@
# Enable strict mode (without -e to avoid exit-on-error)
set -uo pipefail
+# Support being sourced in shells where BASH_SOURCE is unset (e.g. zsh)
+SCRIPT_SOURCE="${BASH_SOURCE[0]-$0}"
+SCRIPT_DIR="$(cd "$(dirname "$SCRIPT_SOURCE")" && pwd)"
+SCRIPT_NAME="$(basename "$SCRIPT_SOURCE")"
-echo "🔧 Running $(basename "${BASH_SOURCE[0]}")..."
+echo "🔧 Running $SCRIPT_NAME..."
set -a
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
DEPLOY_DIR="$SCRIPT_DIR"
SECURITY_DIR="$SCRIPT_DIR/../security/env"
SERVICES_DIR="$SCRIPT_DIR/../services"
@@ -38,6 +41,18 @@ env_files=(
"$SERVICES_DIR/cogstack-nlp/medcat-service/env/medcat.env"
)
+LINT_SCRIPT="$SCRIPT_DIR/../nifi/user_scripts/utils/lint_env.py"
+
+if [ -x "$LINT_SCRIPT" ]; then
+ echo "🔍 Validating env files..."
+ if ! python3 "$LINT_SCRIPT" "${env_files[@]}"; then
+ echo "❌ Env validation failed. Fix the errors above before continuing."
+ exit 1
+ fi
+else
+ echo "⚠️ Skipping env validation; $LINT_SCRIPT not found or not executable."
+fi
+
for env_file in "${env_files[@]}"; do
if [ -f "$env_file" ]; then
echo "✅ Sourcing $env_file"
@@ -56,4 +71,4 @@ set +a
# Restore safe defaults for interactive/dev shell
set +u
-set +o pipefail
\ No newline at end of file
+set +o pipefail
diff --git a/deploy/gitea.env b/deploy/gitea.env
index 0009d5759..e2ef85779 100644
--- a/deploy/gitea.env
+++ b/deploy/gitea.env
@@ -23,3 +23,8 @@ GITEA_LOCAL_PUB_KEY_PATH="$GITEA_LOCAL_KEY_PATH.pub"
GITEA_LOCAL_KEY_TITLE="gitea-cogstack-$(hostname)-$(date +%s)"
GITEA_DEFAULT_MAIN_REMOTE_NAME="cogstack-gitea"
+
+GITEA_DOCKER_SHM_SIZE=512m
+GITEA_DOCKER_CPU_MIN=1
+GITEA_DOCKER_CPU_MAX=1
+GITEA_DOCKER_RAM=1g
diff --git a/deploy/nginx.env b/deploy/nginx.env
index aae2c825d..a08762ca2 100644
--- a/deploy/nginx.env
+++ b/deploy/nginx.env
@@ -1,3 +1,9 @@
NGINX_KIBANA_HOST=kibana
NGINX_KIBANA_PROXY_PORT=5601
NGINX_ES_NODE_SOURCE_INSTANCE_NAME="elasticsearch-1"
+
+
+NGINX_SHM_SIZE=1g
+NGINX_DOCKER_CPU_MIN=1
+NGINX_DOCKER_CPU_MAX=1
+NGINX_DOCKER_RAM=1g
diff --git a/deploy/nifi.env b/deploy/nifi.env
index dcc710c06..cc3380ec8 100644
--- a/deploy/nifi.env
+++ b/deploy/nifi.env
@@ -1,3 +1,29 @@
+
+
+##############################################################################################################################
+# IMPORTANT SETTINGS FOR DEPLOYMENTS RESOURCE SCOPED
+##############################################################################################################################
+NIFI_JVM_OPTS="-XX:+UseG1GC -XX:MaxGCPauseMillis=250 -XX:+ParallelRefProcEnabled -Djava.security.egd=file:/dev/./urandom"
+NIFI_JVM_HEAP_INIT=768m
+NIFI_JVM_HEAP_MAX=1g
+
+
+NIFI_DOCKER_SHM_SIZE=1g
+NIFI_DOCKER_REGISTRY_SHM_SIZE=1g
+
+NIFI_DOCKER_CPU_MIN=1
+NIFI_DOCKER_CPU_MAX=1
+NIFI_DOCKER_RAM=1g
+
+NIFI_REGISTRY_DOCKER_CPU_MIN=1
+NIFI_REGISTRY_DOCKER_CPU_MAX=1
+NIFI_REGISTRY_DOCKER_RAM=1g
+
+NIFI_DOCKER_LOG_SIZE_PER_FILE="250m"
+NIFI_DOCKER_LOG_NUM_FILES=10
+
+##############################################################################################################################
+
# NiFi
NIFI_ENV_FILE="./nifi.env"
NIFI_SECURITY_DIR="../security/certificates/nifi/"
@@ -6,11 +32,6 @@ NIFI_DATA_PATH="../data/"
NIFI_VERSION="2.7.2"
NIFI_TOOLKIT_VERSION=$NIFI_VERSION
-NIFI_SHM_SIZE="1g"
-NIFI_REGISTRY_SHM_SIZE="1g"
-NIFI_DOCKER_LOG_SIZE_PER_FILE="250m"
-NIFI_DOCKER_LOG_NUM_FILES=10
-
#### Port and network settings
NIFI_WEB_PROXY_CONTEXT_PATH="/nifi"
diff --git a/deploy/services-dev.yml b/deploy/services-dev.yml
index 593af6ec9..51fcf4db0 100644
--- a/deploy/services-dev.yml
+++ b/deploy/services-dev.yml
@@ -1,3 +1,67 @@
+#---------------------------------------------------------------------------#
+# Common snippets / anchors #
+#---------------------------------------------------------------------------#
+x-nifi-logging-common: &nifi-logging-common
+ driver: "json-file"
+ options:
+ max-size: ${NIFI_DOCKER_LOG_SIZE_PER_FILE:-250m}
+ max-file: ${NIFI_DOCKER_LOG_NUM_FILES:-10}
+
+x-logging-common: &logging-common
+ driver: "json-file"
+ options:
+ max-size: ${DOCKER_LOG_SIZE_PER_FILE:-100m}
+ max-file: ${DOCKER_LOG_NUM_FILES:-10}
+
+x-all-env: &all-env
+ - ./project.env
+ - ./general.env
+ - ./nifi.env
+ - ./gitea.env
+ - ./nginx.env
+ - ./database.env
+ - ./elasticsearch.env
+ - ./network_settings.env
+ - ../security/env/users_nifi.env
+ - ../security/env/users_database.env
+ - ../security/env/users_nginx.env
+ - ../security/env/users_elasticsearch.env
+ - ../security/env/certificates_general.env
+ - ../security/env/certificates_elasticsearch.env
+ - ../security/env/certificates_nifi.env
+
+x-es-env: &es-env
+ - ./network_settings.env
+ - ./elasticsearch.env
+ - ../security/env/users_elasticsearch.env
+ - ../security/env/certificates_elasticsearch.env
+
+x-common-hosts: &common-hosts
+ - ${ELASTICSEARCH_1_HOST_NAME:-test-1:0.0.0.0}
+ - ${ELASTICSEARCH_2_HOST_NAME:-test-2:0.0.0.0}
+ - ${ELASTICSEARCH_3_HOST_NAME:-test-3:0.0.0.0}
+ - ${KIBANA_HOST_NAME:-test-4:0.0.0.0}
+ - ${NIFI_HOST_NAME:-test-5:0.0.0.0}
+ - ${NIFI_REGISTRY_HOST_NAME:-test-6:0.0.0.0}
+
+x-common-ulimits: &common-ulimits
+ ulimits:
+ nofile:
+ soft: 65535
+ hard: 65535
+ nproc: 65535
+ memlock:
+ soft: -1
+ hard: -1
+
+x-nifi-common: &nifi-common
+ <<: *common-ulimits
+ restart: always
+ env_file: *all-env
+ extra_hosts: *common-hosts
+ networks:
+ - cognet
+
#---------------------------------------------------------------------------#
# Used services #
#---------------------------------------------------------------------------#
@@ -7,7 +71,7 @@ services:
# NiFi webapp #
#---------------------------------------------------------------------------#
nifi:
- # image: cogstacksystems/cogstack-nifi:latest
+ <<: *nifi-common
build:
context: ../nifi/
args:
@@ -16,19 +80,7 @@ services:
no_proxy: $no_proxy
container_name: cogstack-nifi
hostname: nifi
- restart: always
- env_file:
- - ./general.env
- - ./project.env
- - ./nifi.env
- - ./elasticsearch.env
- - ./network_settings.env
- - ../security/users_nifi.env
- - ../security/users_elasticsearch.env
- - ../security/certificates_general.env
- - ../security/certificates_elasticsearch.env
- - ../security/certificates_nifi.env
- shm_size: 1024mb
+ shm_size: ${NIFI_SHM_SIZE:-"1g"}
environment:
- USER_ID=${NIFI_UID:-1000}
- GROUP_ID=${NIFI_GID:-1000}
@@ -37,30 +89,25 @@ services:
- NIFI_INTERNAL_PORT=${NIFI_INTERNAL_PORT:-8443}
- NIFI_OUTPUT_PORT=${NIFI_OUTPUT_PORT:-8082}
- NIFI_INPUT_SOCKET_PORT=${NIFI_INPUT_SOCKET_PORT:-10000}
- - NIFI_SECURITY_DIR=${NIFI_SECURITY_DIR:-../security/nifi_certificates/}
- - ELASTICSEARCH_SECURITY_DIR=${ELASTICSEARCH_SECURITY_DIR:-../security/es_certificates/}
volumes:
# INFO: drivers folder
- ../nifi/drivers:/opt/nifi/drivers
-
+
# INFO: if there are local changes, map these content from local host to container
# (normally, these 3 directories below are bundled with our NiFi image)
# N.B. The container user may not have the permission to read these directories/files.
- - ../nifi/user-templates:/opt/nifi/nifi-current/conf/templates:rw
- - ../nifi/user-scripts:/opt/nifi/user-scripts:rw
- - ../nifi/user-schemas:/opt/nifi/user-schemas:rw
+ - ../nifi/user_templates:/opt/nifi/nifi-current/conf/templates:rw
+ - ../nifi/user_scripts:/opt/nifi/user_scripts:rw
+ - ../nifi/user_schemas:/opt/nifi/user_schemas:rw
- # this is a direct mapping to where we store the NiFi python processors as of NiFi 2.0.x
- - ../nifi/user-python-extensions:/opt/nifi/nifi-current/python_extensions:rw
+ # this is a direct mapping to where we store the NiFi python processors as of NiFi 2.x.x
+ - ../nifi/user_python_extensions:/opt/nifi/nifi-current/python_extensions:rw
# INFO: uncomment below to map security certificates if need to secure NiFi endpoints
- - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}:/opt/nifi/nifi-current/nifi_certificates:ro
- - ./${ELASTICSEARCH_SECURITY_DIR:-../security/es_certificates/}:/opt/nifi/nifi-current/es_certificates:ro
- - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}nifi-keystore.jks:/opt/nifi/nifi-current/conf/keystore.jks
- - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}nifi-truststore.jks:/opt/nifi/nifi-current/conf/truststore.jks
+ - ../security:/security:ro
# Security credentials scripts
- - ../security/nifi_create_single_user_auth.sh:/opt/nifi/nifi-current/security_scripts/nifi_create_single_user_auth.sh:ro
+ - ../security/scripts/nifi_create_single_user_auth.sh:/opt/nifi/nifi-current/security_scripts/nifi_create_single_user_auth.sh:ro
# # Nifi properties file:
- ../nifi/conf/:/opt/nifi/nifi-current/conf/:rw
@@ -72,7 +119,7 @@ services:
- ../services/cogstack-db/:/opt/cogstack-db/:rw
# medcat models
- - ./${RES_MEDCAT_SERVICE_MODEL_PRODUCTION_PATH:-../services/nlp-services/medcat-service/models/}:/opt/models:rw
+ - ./${RES_MEDCAT_SERVICE_MODEL_PRODUCTION_PATH:-../services/cogstack-nlp/medcat-service/models/}:/opt/models:rw
# rest of volumes to persist the state
- nifi-vol-logs:/opt/nifi/nifi-current/logs
@@ -85,51 +132,22 @@ services:
# errors generated during data processing
- nifi-vol-errors:/opt/nifi/pipeline/flowfile-errors
- extra_hosts:
- - ${ELASTICSEARCH_1_HOST_NAME:-test-1:0.0.0.0}
- - ${ELASTICSEARCH_2_HOST_NAME:-test-2:0.0.0.0}
- - ${ELASTICSEARCH_3_HOST_NAME:-test-3:0.0.0.0}
- - ${KIBANA_HOST_NAME:-test-4:0.0.0.0}
- - ${NIFI_HOST_NAME:-test-5:0.0.0.0}
- - ${NIFI_REGISTRY_HOST_NAME:-test-6:0.0.0.0}
-
- # user: "${NIFI_UID:-1000}:${NIFI_GID:-1000}"
- ulimits:
- memlock:
- soft: -1
- hard: -1
- nofile:
- soft: 65536
- hard: 262144
-
# INFO : Uncomment the below line to generate your own USERNAME and PASSWORD,
# a bit messy this way as you will need to copy the credentials back
# to the "login-identity-providers.xml" section.
# entrypoint: bash -c "/opt/nifi/nifi-current/bin/nifi.sh set-single-user-credentials admin admincogstacknifi"
-
tty: true
ports:
- "${NIFI_OUTPUT_PORT:-8082}:${NIFI_INTERNAL_PORT:-8443}"
- "${NIFI_INPUT_SOCKET_PORT:-10000}"
- networks:
- - cognet
-
+ logging: *nifi-logging-common
+
nifi-registry-flow:
image: apache/nifi-registry:${NIFI_REGISTRY_VERSION:-2.7.1}
hostname: nifi-registry
container_name: cogstack-nifi-registry-flow
- restart: always
+ shm_size: ${NIFI_REGISTRY_SHM_SIZE:-"1g"}
user: root
- env_file:
- - ./general.env
- - ./network_settings.env
- - ./nifi.env
- - ./project.env
- - ../security/users_nifi.env
- - ../security/users_elasticsearch.env
- - ../security/certificates_general.env
- - ../security/certificates_elasticsearch.env
- - ../security/certificates_nifi.env
environment:
- http_proxy=$HTTP_PROXY
- https_proxy=$HTTPS_PROXY
@@ -143,31 +161,20 @@ services:
- TRUSTSTORE_PATH=${NIFI_REGISTRY_TRUSTSTORE_PATH:-./conf/truststore.jks}
- TRUSTSTORE_TYPE=${NIFI_TRUSTSTORE_TYPE:-jks}
- - INITIAL_ADMIN_IDENTITY=${NIFI_INITIAL_ADMIN_IDENTITY:-"CN=admin, OU=nifi"}
+ - INITIAL_ADMIN_IDENTITY=${NIFI_INITIAL_ADMIN_IDENTITY:-"cogstack"}
- AUTH=${NIFI_AUTH:-"tls"}
- NIFI_REGISTRY_DB_DIR=${NIFI_REGISTRY_DB_DIR:-/opt/nifi-registry/nifi-registry-current/database}
#- NIFI_REGISTRY_FLOW_PROVIDER=${NIFI_REGISTRY_FLOW_PROVIDER:-file}
- NIFI_REGISTRY_FLOW_STORAGE_DIR=${NIFI_REGISTRY_FLOW_STORAGE_DIR:-/opt/nifi-registry/nifi-registry-current/flow_storage}
volumes:
- ../nifi/nifi-registry/:/opt/nifi-registry/nifi-registry-current/conf/:rw
- - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}nifi-keystore.jks:/opt/nifi-registry/nifi-registry-current/conf/keystore.jks:ro
- - ./${NIFI_SECURITY_DIR:-../security/nifi_certificates/}nifi-truststore.jks://opt/nifi-registry/nifi-registry-current/conf/truststore.jks:ro
+ - ./${NIFI_SECURITY_DIR:-../security/certificates/nifi/}nifi-keystore.jks:/opt/nifi-registry/nifi-registry-current/conf/keystore.jks:ro
+ - ./${NIFI_SECURITY_DIR:-../security/certificates/nifi/}nifi-truststore.jks://opt/nifi-registry/nifi-registry-current/conf/truststore.jks:ro
- nifi-registry-vol-database:/opt/nifi-registry/nifi-registry-current/database
- nifi-registry-vol-flow-storage:/opt/nifi-registry/nifi-registry-current/flow_storage
- nifi-registry-vol-work:/opt/nifi-registry/nifi-registry-current/work
- nifi-registry-vol-logs:/opt/nifi-registry/nifi-registry-current/logs
- extra_hosts:
- - ${NIFI_HOST_NAME:-test-5:0.0.0.0}
- - ${NIFI_REGISTRY_HOST_NAME:-test-6:0.0.0.0}
-
- ulimits:
- memlock:
- soft: -1
- hard: -1
- nofile:
- soft: 65536
- hard: 262144
-
+ extra_hosts: *common-hosts
tty: true
ports:
- "${NIFI_REGISTRY_FLOW_OUTPUT_PORT:-8083}:${NIFI_REGISTRY_FLOW_INPUT_PORT:-18443}"
@@ -177,43 +184,27 @@ services:
chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/work && \
chown -R nifi:nifi /opt/nifi-registry/nifi-registry-current/logs && \
bash /opt/nifi-registry/scripts/start.sh"
-
- networks:
- - cognet
-
+ logging: *nifi-logging-common
+
nifi-nginx:
- # image: cogstacksystems/nifi-nginx:latest
- build:
- context: ../services/nginx/
- args:
- HTTP_PROXY: $HTTP_PROXY
- HTTPS_PROXY: $HTTPS_PROXY
- no_proxy: $no_proxy
+ image: cogstacksystems/nifi-nginx:latest
container_name: cogstack-nifi-nginx
restart: always
- env_file:
- - ./network_settings.env
- - ./nginx.env
- - ./nifi.env
- - ./elasticsearch.env
- - ./project.env
- - ./nlp_service.env
+ shm_size: 512mb
+ env_file: *all-env
volumes:
- ../services/nginx/sites-enabled:/etc/nginx/sites-enabled:ro
- ../services/nginx/config/nginx.conf.template:/etc/nginx/config/nginx.conf.template:rw
- ../services/nginx/config/nginx.conf:/etc/nginx/nginx.conf:rw
- - ../security/root_certificates:/etc/nginx/root_certificates:ro
- - ../security/nifi_certificates:/etc/nginx/nifi_certificates:ro
-
- - ../security/es_certificates/${ELASTICSEARCH_VERSION:-opensearch}/elastic-stack-ca.crt.pem:/etc/nginx/es_certificates/elastic-stack-ca.crt.pem:ro
- - ../security/es_certificates/${ELASTICSEARCH_VERSION:-opensearch}/elastic-stack-ca.key.pem:/etc/nginx/es_certificates/elastic-stack-ca.key.pem:ro
- # - ../security/es_certificates/:/etc/nginx/es_certificates/:ro
+ - ../security/certificates:/certificates:ro
ports:
- "${NIFI_EXTERNAL_PORT_NGINX:-8443}:${NIFI_INTERNAL_PORT_NGINX:-8443}"
- "${NIFI_REGISTRY_EXTERNAL_PORT_NGINX:-18443}:${NIFI_REGISTRY_INTERNAL_PORT_NGINX:-18443}"
networks:
- cognet
command: /bin/bash -c "envsubst < /etc/nginx/config/nginx.conf.template > /etc/nginx/config/nginx.conf && nginx -g 'daemon off;'"
+ extra_hosts: *common-hosts
+ logging: *nifi-logging-common
#---------------------------------------------------------------------------#
# Docker named volumes #
@@ -249,7 +240,6 @@ volumes:
driver: local
nifi-registry-vol-logs:
driver: local
-
#---------------------------------------------------------------------------#
# Docker networks. #
#---------------------------------------------------------------------------#
diff --git a/deploy/services.yml b/deploy/services.yml
index 27c19bbfa..57266f692 100644
--- a/deploy/services.yml
+++ b/deploy/services.yml
@@ -42,6 +42,10 @@ x-es-env: &es-env
- ../security/env/users_elasticsearch.env
- ../security/env/certificates_elasticsearch.env
+x-db-env: &db-env
+ - ./database.env
+ - ../security/env/users_database.env
+
x-common-hosts: &common-hosts
- ${ELASTICSEARCH_1_HOST_NAME:-test-1:0.0.0.0}
- ${ELASTICSEARCH_2_HOST_NAME:-test-2:0.0.0.0}
@@ -62,12 +66,26 @@ x-common-ulimits: &common-ulimits
x-nifi-common: &nifi-common
<<: *common-ulimits
- restart: always
+ restart: unless-stopped
env_file: *all-env
extra_hosts: *common-hosts
networks:
- cognet
+x-db-common: &db-common
+ <<: *common-ulimits
+ shm_size: ${DATABASE_DOCKER_SHM_SIZE:-"1g"}
+ restart: unless-stopped
+ env_file: *db-env
+ deploy:
+ resources:
+ limits:
+ cpus: "${DATABASE_DOCKER_CPU_MAX}"
+ memory: "${DATABASE_DOCKER_RAM}"
+ reservations:
+ cpus: "${DATABASE_DOCKER_CPU_MIN}"
+ memory: "${DATABASE_DOCKER_RAM}"
+
x-es-common-volumes: &es-common-volumes
# Shared configs
- ../services/elasticsearch/config/${ELASTICSEARCH_VERSION:-opensearch}.yml:/usr/share/${ELASTICSEARCH_VERSION:-opensearch}/config/${ELASTICSEARCH_VERSION:-opensearch}.yml:ro
@@ -93,9 +111,9 @@ x-es-common-volumes: &es-common-volumes
x-es-common: &es-common
<<: *common-ulimits
- image: ${ELASTICSEARCH_DOCKER_IMAGE:-opensearchproject/opensearch:3.2.0}
- shm_size: ${ELASTICSEARCH_SHM_SIZE:-"1g"}
- restart: always
+ image: ${ELASTICSEARCH_DOCKER_IMAGE:-opensearchproject/opensearch:3.3.0}
+ shm_size: ${ELASTICSEARCH_DOCKER_SHM_SIZE:-1g}
+ restart: unless-stopped
env_file: *es-env
networks:
- cognet
@@ -108,12 +126,21 @@ x-es-common: &es-common
OPENSEARCH_INITIAL_ADMIN_PASSWORD: ${OPENSEARCH_INITIAL_ADMIN_PASSWORD:-kibanaserver}
ELASTICSEARCH_VERSION: ${ELASTICSEARCH_VERSION:-opensearch}
logging: *es-logging-common
+ deploy:
+ resources:
+ limits:
+ cpus: "${ELASTICSEARCH_DOCKER_CPU_MAX}"
+ memory: "${ELASTICSEARCH_DOCKER_RAM}"
+ reservations:
+ cpus: "${ELASTICSEARCH_DOCKER_CPU_MIN}"
+ memory: "${ELASTICSEARCH_DOCKER_RAM}"
x-metricbeat-common: &metricbeat-common
<<: *common-ulimits
image: ${METRICBEAT_IMAGE:-docker.elastic.co/beats/metricbeat:8.18.2}
command: -e --strict.perms=false
restart: unless-stopped
+ shm_size: ${METRICBEAT_DOCKER_SHM:-1g}
env_file:
- ./elasticsearch.env
- ../security/env/users_elasticsearch.env
@@ -122,6 +149,14 @@ x-metricbeat-common: &metricbeat-common
- METRICBEAT_USER=${METRICBEAT_USER:-elastic}
- METRICBEAT_PASSWORD=${METRICBEAT_PASSWORD:-kibanaserver}
- KIBANA_HOST=${KIBANA_HOST:-"https://kibana:5601"}
+ deploy:
+ resources:
+ limits:
+ cpus: "${METRICBEAT_DOCKER_CPU_MAX}"
+ memory: "${METRICBEAT_DOCKER_RAM}"
+ reservations:
+ cpus: "${METRICBEAT_DOCKER_CPU_MIN}"
+ memory: "${METRICBEAT_DOCKER_RAM}"
volumes:
- ../services/metricbeat/metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro
- ../security/certificates/elastic/elasticsearch/elastic-stack-ca.crt.pem:/usr/share/metricbeat/root-ca.crt:ro
@@ -136,6 +171,7 @@ x-filebeat-common: &filebeat-common
image: ${FILEBEAT_IMAGE:-docker.elastic.co/beats/filebeat:8.18.2}
command: ${FILEBEAT_STARTUP_COMMAND:-'-e --strict.perms=false'}
restart: unless-stopped
+ shm_size: ${FILEBEAT_DOCKER_SHM:-1g}
env_file:
- ./elasticsearch.env
- ../security/env/users_elasticsearch.env
@@ -144,6 +180,14 @@ x-filebeat-common: &filebeat-common
- FILEBEAT_USER=${FILEBEAT_USER:-elastic}
- FILEBEAT_PASSWORD=${FILEBEAT_PASSWORD:-kibanaserver}
- KIBANA_HOST=${KIBANA_HOST:-"https://kibana:5601"}
+ deploy:
+ resources:
+ limits:
+ cpus: "${FILEBEAT_DOCKER_CPU_MAX}"
+ memory: "${FILEBEAT_DOCKER_RAM}"
+ reservations:
+ cpus: "${FILEBEAT_DOCKER_CPU_MIN}"
+ memory: "${FILEBEAT_DOCKER_RAM}"
volumes:
- ../services/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:rw
- ../security/certificates/elastic/elasticsearch/elastic-stack-ca.crt.pem:/etc/pki/root/root-ca.crt:ro
@@ -162,15 +206,10 @@ services:
# Postgres container with sample data #
#---------------------------------------------------------------------------#
samples-db:
- <<: *common-ulimits
+ <<: *db-common
image: postgres:17.5-alpine
container_name: cogstack-samples-db
- shm_size: ${POSTGRES_SHM_SIZE:-"1g"}
- restart: always
platform: linux/amd64
- env_file:
- - ./database.env
- - ../security/env/users_database.env
environment:
# PG env vars
- POSTGRES_USER=${POSTGRES_USER_SAMPLES:-test}
@@ -194,19 +233,14 @@ services:
# CogStack Databank / Cogstack-DB, production database #
#---------------------------------------------------------------------------#
cogstack-databank-db:
- <<: *common-ulimits
+ <<: *db-common
image: postgres:17.5-alpine
container_name: cogstack-production-databank-db
- shm_size: ${POSTGRES_SHM_SIZE:-"1g"}
- restart: always
platform: linux/amd64
- env_file:
- - ./database.env
- - ../security/env/users_database.env
environment:
- - POSTGRES_USER=${POSTGRES_USER:-admin}
- - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-admin}
- - POSTGRES_DATABANK_DB=${POSTGRES_DATABANK_DB:-cogstack}
+ - POSTGRES_USER=${DATABASE_USER:-admin}
+ - POSTGRES_PASSWORD=${DATABASE_PASSWORD:-admin}
+ - POSTGRES_DATABANK_DB=${DATABASE_DB_NAME:-cogstack}
volumes:
# mapping postgres data dump and initialization
- ../services/cogstack-db/pgsql/schemas:/data/:ro
@@ -222,14 +256,9 @@ services:
- cognet
cogstack-databank-db-mssql:
- <<: *common-ulimits
+ <<: *db-common
image: mcr.microsoft.com/mssql/server:2019-latest
container_name: cogstack-production-databank-db-mssql
- shm_size: ${POSTGRES_SHM_SIZE:-"1g"}
- restart: always
- env_file:
- - ./database.env
- - ../security/env/users_database.env
environment:
- ACCEPT_EULA=y
- MSSQL_SA_USER=${MSSQL_SA_USER:-sa}
@@ -252,7 +281,7 @@ services:
es_native_create_certs:
container_name: es_create_certs
image: docker.elastic.co/elasticsearch/elasticsearch:8.18.2
- shm_size: ${ELASTICSEARCH_SHM_SIZE:-"1g"}
+ shm_size: ${ELASTICSEARCH_DOCKER_SHM_SIZE:-1g}
env_file: *es-env
restart: "no"
command: bash -c "bash /usr/share/elasticsearch/es_native_cert_generator.sh"
@@ -287,7 +316,7 @@ services:
ports:
- "${ELASTICSEARCH_NODE_1_OUTPUT_PORT:-9200}:9200"
- "${ELASTICSEARCH_NODE_1_COMM_OUTPUT_PORT:-9300}:9300"
- - "${ELASTICSEARCH_NODE_1_ANALYZER_OUTPUT_PORT:-9600}:9600" # required for Performance Analyzer
+ - "${ELASTICSEARCH_NODE_1_ANALYZER_OUTPUT_PORT:-9600}:9600"
elasticsearch-2:
extends:
@@ -306,7 +335,7 @@ services:
ports:
- "${ELASTICSEARCH_NODE_2_OUTPUT_PORT:-9201}:9200"
- "${ELASTICSEARCH_NODE_2_COMM_OUTPUT_PORT:-9301}:9300"
- - "${ELASTICSEARCH_NODE_2_ANALYZER_OUTPUT_PORT:-9601}:9600" # required for Performance Analyzer
+ - "${ELASTICSEARCH_NODE_2_ANALYZER_OUTPUT_PORT:-9601}:9600"
elasticsearch-3:
extends:
@@ -325,7 +354,7 @@ services:
ports:
- "${ELASTICSEARCH_NODE_3_OUTPUT_PORT:-9202}:9200"
- "${ELASTICSEARCH_NODE_3_COMM_OUTPUT_PORT:-9302}:9300"
- - "${ELASTICSEARCH_NODE_3_ANALYZER_OUTPUT_PORT:-9602}:9600" # required for Performance Analyzer
+ - "${ELASTICSEARCH_NODE_3_ANALYZER_OUTPUT_PORT:-9602}:9600"
metricbeat-1:
<<: *metricbeat-common
@@ -350,9 +379,9 @@ services:
container_name: cogstack-metricbeat-3
volumes:
- metricbeat-data-3:/usr/share/metricbeat/data
- - ../security/certificates/elastic/elasticsearch/elasticsearch/${ES_INSTANCE_NAME_3:-elasticsearch-3}/${ES_INSTANCE_NAME3:-elasticsearch-3}.p12:/usr/share/metricbeat/esnode.p12:ro
- - ../security/certificates/elastic/elasticsearch/elasticsearch/${ES_INSTANCE_NAME_3:-elasticsearch-3}/${ES_INSTANCE_NAME3:-elasticsearch-3}.crt:/usr/share/metricbeat/esnode.crt:ro
- - ../security/certificates/elastic/elasticsearch/elasticsearch/${ES_INSTANCE_NAME_3:-elasticsearch-3}/${ES_INSTANCE_NAME3:-elasticsearch-3}.key:/usr/share/metricbeat/esnode.key:ro
+ - ../security/certificates/elastic/elasticsearch/elasticsearch/${ES_INSTANCE_NAME_3:-elasticsearch-3}/${ES_INSTANCE_NAME_3:-elasticsearch-3}.p12:/usr/share/metricbeat/esnode.p12:ro
+ - ../security/certificates/elastic/elasticsearch/elasticsearch/${ES_INSTANCE_NAME_3:-elasticsearch-3}/${ES_INSTANCE_NAME_3:-elasticsearch-3}.crt:/usr/share/metricbeat/esnode.crt:ro
+ - ../security/certificates/elastic/elasticsearch/elasticsearch/${ES_INSTANCE_NAME_3:-elasticsearch-3}/${ES_INSTANCE_NAME_3:-elasticsearch-3}.key:/usr/share/metricbeat/esnode.key:ro
filebeat-1:
<<: *filebeat-common
@@ -389,9 +418,9 @@ services:
#---------------------------------------------------------------------------#
kibana:
<<: *common-ulimits
- image: ${ELASTICSEARCH_KIBANA_DOCKER_IMAGE:-opensearchproject/opensearch-dashboards:3.2.0}
+ image: ${ELASTICSEARCH_KIBANA_DOCKER_IMAGE:-opensearchproject/opensearch-dashboards:3.3.0}
container_name: cogstack-kibana
- shm_size: ${KIBANA_SHM_SIZE:-"1g"}
+ shm_size: ${KIBANA_DOCKER_SHM_SIZE:-1g}
restart: always
env_file: *es-env
environment:
@@ -401,7 +430,14 @@ services:
# INFO: uncomment below to enable SSL keys
SERVER_SSL_ENABLED: ${ELASTICSEARCH_SSL_ENABLED:-"true"}
OPENSEARCH_INITIAL_ADMIN_PASSWORD: ${OPENSEARCH_INITIAL_ADMIN_PASSWORD:-kibanaserver}
-
+ deploy:
+ resources:
+ limits:
+ cpus: "${KIBANA_DOCKER_CPU_MAX}"
+ memory: "${KIBANA_DOCKER_RAM}"
+ reservations:
+ cpus: "${KIBANA_DOCKER_CPU_MIN}"
+ memory: "${KIBANA_DOCKER_RAM}"
volumes:
# INFO: Kibana configuration mapped via volume (make sure to comment this and uncomment the next line if you are using NATIVE kibana deployment)
- ../services/kibana/config/${ELASTICSEARCH_VERSION:-opensearch}.yml:/usr/share/${KIBANA_VERSION:-opensearch-dashboards}/config/${KIBANA_CONFIG_FILE_VERSION:-opensearch_dashboards}.yml:ro
@@ -434,7 +470,7 @@ services:
image: cogstacksystems/cogstack-nifi:latest
container_name: cogstack-nifi
hostname: nifi
- shm_size: ${NIFI_SHM_SIZE:-"1g"}
+ shm_size: ${NIFI_DOCKER_SHM_SIZE:-"1g"}
environment:
- USER_ID=${NIFI_UID:-1000}
- GROUP_ID=${NIFI_GID:-1000}
@@ -443,6 +479,15 @@ services:
- NIFI_INTERNAL_PORT=${NIFI_INTERNAL_PORT:-8443}
- NIFI_OUTPUT_PORT=${NIFI_OUTPUT_PORT:-8082}
- NIFI_INPUT_SOCKET_PORT=${NIFI_INPUT_SOCKET_PORT:-10000}
+ - JVM_OPTS="${NIFI_JVM_OPTS:--XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:+ParallelRefProcEnabled -Djava.security.egd=file:/dev/./urandom}"
+ deploy:
+ resources:
+ limits:
+ cpus: "${NIFI_DOCKER_CPU_MAX}"
+ memory: "${NIFI_DOCKER_RAM}"
+ reservations:
+ cpus: "${NIFI_DOCKER_CPU_MIN}"
+ memory: "${NIFI_DOCKER_RAM}"
volumes:
# INFO: drivers folder
- ../nifi/drivers:/opt/nifi/drivers
@@ -450,12 +495,12 @@ services:
# INFO: if there are local changes, map these content from local host to container
# (normally, these 3 directories below are bundled with our NiFi image)
# N.B. The container user may not have the permission to read these directories/files.
- - ../nifi/user-templates:/opt/nifi/nifi-current/conf/templates:rw
- - ../nifi/user-scripts:/opt/nifi/user-scripts:rw
- - ../nifi/user-schemas:/opt/nifi/user-schemas:rw
+ - ../nifi/user_templates:/opt/nifi/nifi-current/conf/templates:rw
+ - ../nifi/user_scripts:/opt/nifi/user_scripts:rw
+ - ../nifi/user_schemas:/opt/nifi/user_schemas:rw
# this is a direct mapping to where we store the NiFi python processors as of NiFi 2.x.x
- - ../nifi/user-python-extensions:/opt/nifi/nifi-current/python_extensions:rw
+ - ../nifi/user_python_extensions:/opt/nifi/nifi-current/python_extensions:rw
# INFO: uncomment below to map security certificates if need to secure NiFi endpoints
- ../security:/security:ro
@@ -501,7 +546,7 @@ services:
image: apache/nifi-registry:${NIFI_REGISTRY_VERSION:-2.7.2}
hostname: nifi-registry
container_name: cogstack-nifi-registry-flow
- shm_size: ${NIFI_REGISTRY_SHM_SIZE:-"1g"}
+ shm_size: ${NIFI_DOCKER_REGISTRY_SHM_SIZE:-1g}
user: root
environment:
- http_proxy=$HTTP_PROXY
@@ -509,11 +554,11 @@ services:
- no_proxy=$no_proxy
- USER_ID=${NIFI_UID:-1000}
- GROUP_ID=${NIFI_GID:-1000}
- - KEYSTORE_PATH=${NIFI_REGISTRY_KEYSTORE_PATH:-./conf/keystore.jks}
+ - KEYSTORE_PATH=${NIFI_REGISTRY_KEYSTORE_PATH:-/security/certificates/nifi/nifi-keystore.jks}
- KEYSTORE_TYPE=${NIFI_KEYSTORE_TYPE:-jks}
- KEYSTORE_PASSWORD=${NIFI_KEYSTORE_PASSWORD:-"cogstackNifi"}
- TRUSTSTORE_PASSWORD=${NIFI_TRUSTSTORE_PASSWORD:-"cogstackNifi"}
- - TRUSTSTORE_PATH=${NIFI_REGISTRY_TRUSTSTORE_PATH:-./conf/truststore.jks}
+ - TRUSTSTORE_PATH=${NIFI_REGISTRY_TRUSTSTORE_PATH:-/security/certificates/nifi/nifi-truststore.jks}
- TRUSTSTORE_TYPE=${NIFI_TRUSTSTORE_TYPE:-jks}
- INITIAL_ADMIN_IDENTITY=${NIFI_INITIAL_ADMIN_IDENTITY:-"cogstack"}
@@ -521,10 +566,18 @@ services:
- NIFI_REGISTRY_DB_DIR=${NIFI_REGISTRY_DB_DIR:-/opt/nifi-registry/nifi-registry-current/database}
#- NIFI_REGISTRY_FLOW_PROVIDER=${NIFI_REGISTRY_FLOW_PROVIDER:-file}
- NIFI_REGISTRY_FLOW_STORAGE_DIR=${NIFI_REGISTRY_FLOW_STORAGE_DIR:-/opt/nifi-registry/nifi-registry-current/flow_storage}
+ deploy:
+ resources:
+ limits:
+ cpus: "${NIFI_REGISTRY_DOCKER_CPU_MAX}"
+ memory: "${NIFI_REGISTRY_DOCKER_RAM}"
+ reservations:
+ cpus: "${NIFI_REGISTRY_DOCKER_CPU_MIN}"
+ memory: "${NIFI_REGISTRY_DOCKER_RAM}"
volumes:
- ../nifi/nifi-registry/:/opt/nifi-registry/nifi-registry-current/conf/:rw
- - ./${NIFI_SECURITY_DIR:-../security/certificates/nifi/}nifi-keystore.jks:/opt/nifi-registry/nifi-registry-current/conf/keystore.jks:ro
- - ./${NIFI_SECURITY_DIR:-../security/certificates/nifi/}nifi-truststore.jks://opt/nifi-registry/nifi-registry-current/conf/truststore.jks:ro
+ - ../security:/security:ro
+
- nifi-registry-vol-database:/opt/nifi-registry/nifi-registry-current/database
- nifi-registry-vol-flow-storage:/opt/nifi-registry/nifi-registry-current/flow_storage
- nifi-registry-vol-work:/opt/nifi-registry/nifi-registry-current/work
@@ -545,8 +598,16 @@ services:
image: cogstacksystems/nifi-nginx:latest
container_name: cogstack-nifi-nginx
restart: always
- shm_size: 512mb
+ shm_size: ${NGINX_SHM_SIZE:-1g}
env_file: *all-env
+ deploy:
+ resources:
+ limits:
+ cpus: "${NGINX_DOCKER_CPU_MAX}"
+ memory: "${NGINX_DOCKER_RAM}"
+ reservations:
+ cpus: "${NGINX_DOCKER_CPU_MIN}"
+ memory: "${NGINX_DOCKER_RAM}"
volumes:
- ../services/nginx/sites-enabled:/etc/nginx/sites-enabled:ro
- ../services/nginx/config/nginx.conf.template:/etc/nginx/config/nginx.conf.template:rw
@@ -587,12 +648,20 @@ services:
<<: *common-ulimits
container_name: cogstack-gitea
image: gitea/gitea:1.23-rootless
- shm_size: ${DOCKER_SHM_SIZE:-"1g"}
+ shm_size: ${GITEA_DOCKER_SHM_SIZE:-"1g"}
restart: always
environment:
- http_proxy=$HTTP_PROXY
- https_proxy=$HTTPS_PROXY
- no_proxy=$no_proxy
+ deploy:
+ resources:
+ limits:
+ cpus: "${GITEA_DOCKER_CPU_MAX}"
+ memory: "${GITEA_DOCKER_RAM}"
+ reservations:
+ cpus: "${GITEA_DOCKER_CPU_MIN}"
+ memory: "${GITEA_DOCKER_RAM}"
volumes:
# app config
- ../services/gitea/app.ini:/etc/gitea/app.ini:rw
diff --git a/docs/deploy/troubleshooting.md b/docs/deploy/troubleshooting.md
index 15dc8c94f..b448a3dad 100644
--- a/docs/deploy/troubleshooting.md
+++ b/docs/deploy/troubleshooting.md
@@ -75,15 +75,15 @@ ERROR: [1] bootstrap checks failed
To solve this one needs to simply execute :
- on Linux/Mac OS X :
- ```sysctl -w vm.max_map_count=262144``` in terminal.
- To make the same change systemwide plase add ```vm.max_map_count=262144``` to /etc/sysctl.conf and restart the dockerservice/machine.
+ `sysctl -w vm.max_map_count=262144` in terminal.
+ To make the same change systemwide plase add `vm.max_map_count=262144` to /etc/sysctl.conf and restart the dockerservice/machine.
An example of this can be found under /services/elasticsearch/sysctl.conf
- on Windows you need to enter the following commands in a powershell instance:
- ```wsl -d docker-desktop```
+ `wsl -d docker-desktop`
- ```sysctl -w vm.max_map_count=262144```
+ `sysctl -w vm.max_map_count=262144`
For more on this issue please read: https://www.elastic.co/guide/en/elasticsearch/reference/current/vm-max-map-count.html
diff --git a/docs/deploy/workflows.md b/docs/deploy/workflows.md
index 0ad86ff0b..ddc7ee78d 100644
--- a/docs/deploy/workflows.md
+++ b/docs/deploy/workflows.md
@@ -260,7 +260,7 @@ Given a document content encoded as JSON, it will return payload containing the
There are several NiFi components involved in this process which stand out:
1. `ConvertAvroToJSON` - converts the AVRO records to JSON format using a generic format transcoder,
2. `ExecuteScript-ConvertRecordToMedCATinput` - prepares the JSON payload for MedCAT Service, this is Jython script, it has several configurable process properties:
- - `document_id_field\ = `docid` , the exact name of the unique Id column for the DB/ES record
+ - `document_id_field` = `docid`, the exact name of the unique Id column for the DB/ES record
- `document_text_field` = `document`, field/column name containing free text
- `log_file_name` = `nlp_request_bulk_parse_medical_text.log`, creates a log file in the repo folder `/nifi/user-scripts/`
- `log_invalid_records_to_file` = `True`, enable/disable logging errors to logfile with the above mentioned file name
@@ -403,4 +403,3 @@ Prerequisites for this workflow:
4. datetime fields must have the same format.
The script used for this process is located here: `nifi/user-scripts/cogstack_cohort_generate_data.py`. Please read all the info provided in the NiFi template.
-
diff --git a/docs/main.md b/docs/main.md
index 9ab7119c7..ab98b59f8 100644
--- a/docs/main.md
+++ b/docs/main.md
@@ -1,2 +1,3 @@
```{include} ../README.md
+```
diff --git a/docs/nifi/main.md b/docs/nifi/main.md
index 42b1ae1fc..879c05359 100644
--- a/docs/nifi/main.md
+++ b/docs/nifi/main.md
@@ -25,9 +25,9 @@ Avro Schema:[official documentation](https://avro.apache.org/docs/1.11.1/)
├── devel - custom folder that is mounted on the NiFi container where you may place your own scripts, again, read & write permissions required
├── drivers - drivers used for DB connections, currently PostgreSQL and MSSQL
├── nifi-app.log - log file mounted directly from the container for easy log checking
-├── user-schemas - Avro schemas used within workflows, it can also contain other schemas used in specific custom processors
-├── user-scripts - custom scripts used in workflows, you can put them here
-└── user-templates - here we store the fully exported templates of the workflows within NiFi
+├── user_schemas - Avro schemas used within workflows, it can also contain other schemas used in specific custom processors
+├── user_scripts - custom scripts used in workflows, you can put them here
+└── user_templates - here we store the fully exported templates of the workflows within NiFi
```
## Custom Docker image
@@ -85,14 +85,14 @@ nifi.flow.configuration.archive.max.time=1 days
nifi.flow.configuration.archive.max.storage=12 GB
```
-By default, the flowfiles thar are out of the processing queues will be archived for a set period of time. The ```nifi.flow.configuration.archive.max.time``` sets the max duration, max size configurable via ```nifi.flow.configuration.archive.max.storage```, take note of these properties, the storage limit can quickly be hit if you have a high flow-file throughput.
+By default, the flowfiles thar are out of the processing queues will be archived for a set period of time. The `nifi.flow.configuration.archive.max.time` sets the max duration, max size configurable via `nifi.flow.configuration.archive.max.storage`, take note of these properties, the storage limit can quickly be hit if you have a high flow-file throughput.
Make sure to check the archive storage and flowfile storage settings as these will be the first to impact the space used for logging.
#### IMPORTANT NOTE about nifi properties
-:::{admonition} IMPORTANT NOTE about `nifi.properties
+:::{admonition} IMPORTANT NOTE about `nifi.properties`
:class: warning
For Linux users : This is a file that will get modified on runtime as when the container is up some of the properties within the file will get changed ( `nifi.cluster.node.address` for example). Some permission error's might pop out as the UID and GID of the folder permissions are different from that of the user within the container, which is using UID=1000 and GID=1000, declared in the `Dockerfile` and in `deploy/services.yml` under the `nifi` service section. To avoid permission issues, on the host container you will need to create a group with the GID 1000, assign the user that is running the docker command to the created group, and everything should work.
:::
@@ -302,4 +302,4 @@ This section covers dealing with data type issues depending on DB types and/or o
### MySQL
Issues have been found with MySQL:
-- allows zero dates in DateTime fields -> solution: can be overcome in the URL connection string using parameters
\ No newline at end of file
+- allows zero dates in DateTime fields -> solution: can be overcome in the URL connection string using parameters
diff --git a/docs/security/nifi.md b/docs/security/nifi.md
index b5406bd7e..eb0aa63d5 100644
--- a/docs/security/nifi.md
+++ b/docs/security/nifi.md
@@ -41,7 +41,7 @@ Before starting the NIFI container it's important to take note of the following
- **(OPTIONAL, DO NOT USE FOR NIFI VERSION >= 2.0)** the `nifi_toolkit_security.sh` script is used to download the nifi toolkit and generate new certificates and keys that are used by the container, take note that inside the `localhost` folder there is another nifi.properties file that is generated, we must look to the following setttings which are generated randomly and copy them to the `nifi/conf/nifi.properties` file.
- the trust/store keys generated for production will be in the `nifi_certificates/localhost` folder and the `nifi-cert.pem` + `nifi-key.key` files. in the base `nifi_certificates` folder.
-- as part of the security process the `nifi.sensitive.props.key` should be set to a random string or a password of minimum 12 characters. Once this is set do NOT modify it as all the other sensitive passwords will be hashed with this string. By default this is set to ```cogstackNiFipass```
+- as part of the security process the `nifi.sensitive.props.key` should be set to a random string or a password of minimum 12 characters. Once this is set do NOT modify it as all the other sensitive passwords will be hashed with this string. By default this is set to cogstackNiFipass
Example (`nifi/conf/nifi.properties`):
```properties
@@ -54,7 +54,7 @@ Example (`nifi/conf/nifi.properties`):
### Setting up access via user account (SINGLE USER CREDETIAL)
-This is entirely optional, if you have configered the security certs as described in ```security/README.md``` then you are good to go.
+This is entirely optional, if you have configered the security certs as described in security/README.md then you are good to go.
Default username :
diff --git a/nifi/Dockerfile b/nifi/Dockerfile
index 746c57988..cd335892a 100644
--- a/nifi/Dockerfile
+++ b/nifi/Dockerfile
@@ -2,6 +2,8 @@ ARG NIFI_VERSION=2.7.2
FROM apache/nifi:${NIFI_VERSION}
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
ARG HTTP_PROXY=""
ARG HTTPS_PROXY=""
ARG no_proxy=""
@@ -14,7 +16,7 @@ ARG GID=${NIFI_GID:-1000}
ARG TZ="Europe/London"
ARG NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY="/opt/nifi/nifi-current/python/framework"
ARG NIFI_PYTHON_EXTENSIONS_SOURCE_DIRECTORY_DEFAULT="/opt/nifi/nifi-current/python_extensions"
-ARG NIFI_PYTHON_WORKING_DIRECTORY="/opt/nifi/user-scripts"
+ARG NIFI_PYTHON_WORKING_DIRECTORY="/opt/nifi/user_scripts"
ENV TZ=${TZ}
ENV NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY=${NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY}
@@ -25,34 +27,65 @@ ENV NIFI_PYTHON_WORKING_DIRECTORY=${NIFI_PYTHON_WORKING_DIRECTORY}
ENV PIP_PREFER_BINARY=1
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
ENV PIP_NO_CACHE_DIR=1
+# Enables Python to generate .pyc files in the container
+ENV PYTHONDONTWRITEBYTECODE=0
+# Turns off buffering for easier container logging
+ENV PYTHONUNBUFFERED=1
# default env vars to prevent NiFi from running on HTTP
ENV NIFI_WEB_HTTP_PORT=""
ENV NIFI_WEB_HTTP_HOST=""
-RUN echo "GID=${GID}"
-RUN echo "UID=${UID}"
-
USER root
-# run updates and install some base utility packages along with python support
-RUN apt-get update && apt-get upgrade -y --no-install-recommends && apt-get install -y --no-install-recommends iputils-ping libssl-dev openssl apt-transport-https apt-utils curl software-properties-common wget git build-essential make cmake ca-certificates zip unzip tzdata jq
-
-RUN echo "deb http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources
-RUN echo "deb http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources
-RUN echo "deb http://deb.debian.org/debian/ bookworm-backports main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources
-RUN echo "deb http://security.debian.org/debian-security/ bookworm-security main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources
-
-RUN echo "deb-src http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources
-RUN echo "deb-src http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources
-RUN echo "deb-src http://deb.debian.org/debian/ bookworm-backports main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources
-RUN echo "deb-src http://security.debian.org/debian-security/ bookworm-security main contrib non-free non-free-firmware" >> /etc/apt/sources.list.d/debian.sources
-
-# Microsoft repos
-RUN wget -q -O- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/packages.microsoft.gpg
-RUN echo "deb [arch=amd64,armhf,arm64] https://packages.microsoft.com/ubuntu/22.04/prod jammy main" | tee -a /etc/apt/sources.list
-
-RUN apt-get update && apt-get install --no-install-recommends -y ssl-cert libsqlite3-dev python3-dev python3-pip python3.11 python3.11-dev python3-venv sqlite3 postgresql-server-dev-all
+# add repositories, install tooling, and clean up apt metadata in one layer
+RUN set -eux; \
+ apt-get update -y; \
+ apt-get install -y --no-install-recommends \
+ apt-transport-https \
+ ca-certificates \
+ curl \
+ gnupg \
+ wget; \
+ printf '%s\n' \
+ "deb http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware" \
+ "deb http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware" \
+ "deb http://deb.debian.org/debian/ bookworm-backports main contrib non-free non-free-firmware" \
+ "deb http://security.debian.org/debian-security/ bookworm-security main contrib non-free non-free-firmware" \
+ "deb-src http://deb.debian.org/debian/ bookworm main contrib non-free non-free-firmware" \
+ "deb-src http://deb.debian.org/debian/ bookworm-updates main contrib non-free non-free-firmware" \
+ "deb-src http://deb.debian.org/debian/ bookworm-backports main contrib non-free non-free-firmware" \
+ "deb-src http://security.debian.org/debian-security/ bookworm-security main contrib non-free non-free-firmware" \
+ > /etc/apt/sources.list.d/debian.list; \
+ wget -q -O- https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor -o /etc/apt/trusted.gpg.d/packages.microsoft.gpg; \
+ echo "deb [arch=amd64,armhf,arm64] https://packages.microsoft.com/ubuntu/22.04/prod jammy main" > /etc/apt/sources.list.d/microsoft.list; \
+ apt-get update -y; \
+ apt-get upgrade -y --no-install-recommends; \
+ apt-get install -y --no-install-recommends \
+ apt-utils \
+ build-essential \
+ cmake \
+ git \
+ iputils-ping \
+ jq \
+ libsqlite3-dev \
+ libssl-dev \
+ make \
+ openssl \
+ postgresql-server-dev-all \
+ python3.11 \
+ python3.11-dev \
+ python3-dev \
+ python3-pip \
+ python3-venv \
+ software-properties-common \
+ sqlite3 \
+ ssl-cert \
+ tzdata \
+ unzip \
+ zip; \
+ apt-get clean; \
+ rm -rf /var/lib/apt/lists/*
# bust cache
ENV UV_VERSION=latest
@@ -60,33 +93,31 @@ ENV UV_VERSION=latest
# install rust, medcat requirement, install UV
ENV HOME=/root
ENV PATH="/root/.cargo/bin:${PATH}"
+ENV UV_INSTALL_DIR=/usr/local/bin
-RUN curl -sSf https://sh.rustup.rs -o /tmp/rustup-init.sh \
- && chmod +x /tmp/rustup-init.sh \
- && /tmp/rustup-init.sh -y \
- && rm /tmp/rustup-init.sh
-
-RUN curl -Ls https://astral.sh/uv/install.sh -o /tmp/install_uv.sh \
- && bash /tmp/install_uv.sh
-
-RUN UV_PATH=$(find / -name uv -type f | head -n1) && \
- ln -s "$UV_PATH" /usr/local/bin/uv
+RUN set -eux; \
+ curl -sSf https://sh.rustup.rs -o /tmp/rustup-init.sh; \
+ chmod +x /tmp/rustup-init.sh; \
+ /tmp/rustup-init.sh -y; \
+ rm /tmp/rustup-init.sh
-# clean up apt
-RUN apt-get clean autoclean && apt-get autoremove --purge -y
+RUN set -eux; \
+ curl -Ls https://astral.sh/uv/install.sh -o /tmp/install_uv.sh; \
+ bash /tmp/install_uv.sh; \
+ rm /tmp/install_uv.sh
######################################## Python / PIP SECTION ########################################
RUN uv pip install --no-cache-dir --break-system-packages --system --upgrade pip setuptools wheel
-# install util packages used in NiFi scripts (such as MedCAT, avro, nifyapi, etc.)
+# install util packages used in NiFi scripts (such as avro, nifyapi, etc.)
COPY ./requirements.txt ./requirements.txt
-RUN uv pip install --no-cache-dir --break-system-packages --target=${NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY} -r "./requirements.txt"
+RUN uv pip install --no-cache-dir --break-system-packages --target=${NIFI_PYTHON_FRAMEWORK_SOURCE_DIRECTORY} -r "./requirements.txt" --index-url https://pypi.org/simple
#######################################################################################################
# solve groovy grape proxy issues, grape ignores the current environment's proxy settings
-RUN export JAVA_OPTS="-Dhttp.proxyHost=$HTTP_PROXY -Dhttps.proxyHost=$HTTPS_PROXY -Dhttp.nonProxyHosts=$no_proxy"
+ENV JAVA_OPTS="-Dhttp.proxyHost=${HTTP_PROXY} -Dhttps.proxyHost=${HTTPS_PROXY} -Dhttp.nonProxyHosts=${no_proxy}"
# INSTALL NAR extensions
WORKDIR /opt/nifi/nifi-current/lib/
diff --git a/nifi/user-schemas/avro/.keep b/nifi/__init__.py
similarity index 100%
rename from nifi/user-schemas/avro/.keep
rename to nifi/__init__.py
diff --git a/nifi/conf/nifi.properties b/nifi/conf/nifi.properties
index 1363e7deb..6be2c3176 100644
--- a/nifi/conf/nifi.properties
+++ b/nifi/conf/nifi.properties
@@ -48,9 +48,9 @@ nifi.templates.enabled=true
nifi.python.command=python3.11
nifi.python.framework.source.directory=/opt/nifi/nifi-current/python/framework
nifi.python.extensions.source.directory.default=/opt/nifi/nifi-current/python_extensions
-nifi.python.working.directory=/opt/nifi/user-scripts
-nifi.python.logs.directory=./logs
-nifi.python.max.processes.per.extension.type=10
+nifi.python.working.directory=/opt/nifi/user_scripts
+nifi.python.logs.directory=./logs
+nifi.python.max.processes.per.extension.type=10
nifi.python.max.processes=100
####################
@@ -362,4 +362,3 @@ nifi.diagnostics.on.shutdown.max.filecount=10
# The diagnostics folder's maximum permitted size in bytes. If the limit is exceeded, the oldest files are deleted.
nifi.diagnostics.on.shutdown.max.directory.size=10 MB
-
diff --git a/nifi/nifi-registry/nifi-registry.properties b/nifi/nifi-registry/nifi-registry.properties
index 802c3a987..40bff0249 100644
--- a/nifi/nifi-registry/nifi-registry.properties
+++ b/nifi/nifi-registry/nifi-registry.properties
@@ -33,11 +33,11 @@ nifi.registry.web.proxy.context.path=/nifi-registry
nifi.registry.web.proxy.host=localhost:18443,nifi-registry:18443,nifi-registry-flow:18443,cogstack-nifi-registry-flow:18443,cogstack-nifi-registry:18443,nginx.local:18443
# security properties #
-nifi.registry.security.keystore=/opt/nifi-registry/nifi-registry-current/conf/keystore.jks
+nifi.registry.security.keystore=/security/certificates/nifi/nifi-keystore.jks
nifi.registry.security.keystoreType=JKS
nifi.registry.security.keystorePasswd=cogstackNifi
nifi.registry.security.keyPasswd=cogstackNifi
-nifi.registry.security.truststore=/opt/nifi-registry/nifi-registry-current/conf/truststore.jks
+nifi.registry.security.truststore=/security/certificates/nifi/nifi-truststore.jks
nifi.registry.security.truststoreType=JKS
nifi.registry.security.truststorePasswd=cogstackNifi
nifi.registry.security.needClientAuth=false
@@ -124,4 +124,4 @@ nifi.registry.security.user.authorizer=managed-authorizer
# revision management #
# This feature should remain disabled until a future NiFi release that supports the revision API changes
-nifi.registry.revisions.enabled=false
\ No newline at end of file
+nifi.registry.revisions.enabled=false
diff --git a/nifi/requirements-dev.txt b/nifi/requirements-dev.txt
new file mode 100644
index 000000000..5547e70bf
--- /dev/null
+++ b/nifi/requirements-dev.txt
@@ -0,0 +1,7 @@
+ruff==0.12.12
+mypy==1.17.0
+mypy-extensions==1.1.0
+types-aiofiles==24.1.0.20250708
+types-PyYAML==6.0.12.20250516
+types-setuptools==80.9.0.20250529
+timeout-decorator==0.5.0
diff --git a/nifi/requirements.txt b/nifi/requirements.txt
index 1632088d4..6f7f981f4 100644
--- a/nifi/requirements.txt
+++ b/nifi/requirements.txt
@@ -1,45 +1,32 @@
+wheel==0.45.1
+
# data science pkgs
-seaborn==0.13.2
-matplotlib==3.10.6
-graphviz==0.21
-plotly==6.3.0
-keras==3.12.0
nltk==3.9.1
-numpy>=1.26.0,<2.0.0
-pandas==1.5.3
-dill>=0.3.6,<1.0.0
-bokeh==3.8.0
-psycopg[c,binary]==3.2.9
-overrides==7.0.0
+numpy==2.3.5
+pandas==2.3.3
+pyarrow==22.0.0
# used in NiFi scripts: geolocation, avro conversion etc.
py4j==0.10.9.9
rancoord==0.0.6
geocoder==1.38.1
-avro==1.12.0
+avro==1.12.1
nipyapi==1.1.0
py7zr==1.0.0
-ipyparallel==9.0.1
-cython==3.1.3
-tqdm==4.67.1
jsonpickle==4.1.1
-certifi==2025.8.3
-xlsxwriter==3.2.5
-mysql-connector-python==9.4.0
-pymssql==2.3.7
+xlsxwriter==3.2.9
+mysql-connector-python==9.5.0
+pymssql==2.3.9
+psycopg[c,binary]==3.2.9
+requests==2.32.5
+PyYAML==6.0.3
+pydantic==2.12.5
+overrides==7.0.0
# other utils
xnat==0.7.2
# ElasticSearch/OpenSearch packages
-opensearch-py==3.0.0
elasticsearch9==9.1.0
+opensearch-py==3.0.0
neo4j==5.28.2
-
-# git utils
-dvc==3.62.0
-GitPython==3.1.45
-PyYAML==6.0.2
-
-# code utils
-ruff==0.12.12
diff --git a/nifi/user-scripts/dto/nifi_api_config.py b/nifi/user-scripts/dto/nifi_api_config.py
deleted file mode 100644
index 303bdd1b1..000000000
--- a/nifi/user-scripts/dto/nifi_api_config.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import os
-
-
-class NiFiAPIConfig:
- NIFI_URL_SCHEME: str = "https"
- NIFI_HOST: str = "localhost"
- NIFI_PORT: int = 8443
- NIFI_REGISTRY_PORT: int = 18443
-
- NIFI_USERNAME: str = os.environ.get("NIFI_SINGLE_USER_CREDENTIALS_USERNAME", "admin")
- NIFI_PASSWORD: str = os.environ.get("NIFI_SINGLE_USER_CREDENTIALS_PASSWORD", "cogstackNiFi")
-
- ROOT_CERT_CA_PATH: str = os.path.abspath("../../../../security/certificates/root/root-ca.pem")
- NIFI_CERT_PEM_PATH: str = os.path.abspath("../../../../security/certificates/nifi/nifi.pem")
- NIFI_CERT_KEY_PATH: str = os.path.abspath("../../../../security/certificates/nifi/nifi.key")
-
- VERIFY_SSL: bool = True
-
- @property
- def nifi_base_url(self) -> str:
- """Full NiFi base URL, e.g. https://localhost:8443"""
- return f"{self.NIFI_URL_SCHEME}://{self.NIFI_HOST}:{self.NIFI_PORT}"
-
- @property
- def nifi_api_url(self) -> str:
- """"NiFi REST API root, e.g. https://localhost:8443/nifi-api"""
- return f"{self.nifi_base_url}/nifi-api"
-
- @property
- def nifi_registry_base_url(self) -> str:
- """"NiFi Registry REST API root, e.g. https://localhost:18443/nifi-registry"""
- return f"{self.NIFI_URL_SCHEME}://{self.NIFI_HOST}:{self.NIFI_REGISTRY_PORT}/nifi-registry/"
-
- @property
- def nifi_registry_api_url(self) -> str:
- """"NiFi Registry REST API root, e.g. https://localhost:18443/nifi-registry/nifi-registry-api"""
- return f"{self.NIFI_URL_SCHEME}://{self.NIFI_HOST}:{self.NIFI_REGISTRY_PORT}/nifi-registry-api"
-
- def auth_credentials(self) -> tuple[str, str]:
- """Convenience for requests auth=(user, password)."""
- return (self.NIFI_USERNAME, self.NIFI_PASSWORD)
-
- def get_nifi_ssl_certs(self) -> tuple[str, str]:
- """Convenience for requests cert=(cert_path, key_path)."""
- return (self.NIFI_CERT_PEM_PATH, self.NIFI_CERT_KEY_PATH)
diff --git a/nifi/user-scripts/dto/pg_config.py b/nifi/user-scripts/dto/pg_config.py
deleted file mode 100644
index 19f15d029..000000000
--- a/nifi/user-scripts/dto/pg_config.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from pydantic import BaseModel, Field
-
-
-class PGConfig(BaseModel):
- host: str = Field(default="localhost")
- port: int = Field(default=5432)
- db: str = Field(default="samples_db")
- user: str = Field(default="test")
- password: str = Field(default="test")
- timeout: int = Field(default=50)
diff --git a/nifi/user-scripts/dto/service_health.py b/nifi/user-scripts/dto/service_health.py
deleted file mode 100644
index 5f6455dbb..000000000
--- a/nifi/user-scripts/dto/service_health.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from datetime import datetime
-from typing import Literal
-
-from pydantic import BaseModel, Field
-
-
-class ServiceHealth(BaseModel):
- """
- Base health check model shared by all services.
- """
-
- service: str = Field(..., description="Service name, e.g. NiFi, PostgreSQL, OpenSearch/ElasticSearch, etc.")
- status: Literal["healthy", "unhealthy", "degraded"] = Field(
- ..., description="Current service status"
- )
- message: str | None = Field(None, description="Optional status message")
- timestamp: datetime = Field(default_factory=datetime.utcnow)
- avg_processing_ms: float | None = Field(None)
- service_info: str | None = Field(None)
- connected: bool | None = Field(None)
-
- class Config:
- extra = "ignore"
-
-class MLServiceHealth(ServiceHealth):
- model_name: str | None = Field(None, description="Name of the ML model")
- model_version: str | None = Field(None, description="Version of the ML model")
- model_card: str | None = Field(None, description="URL or path to the model card")
-
-class NiFiHealth(ServiceHealth):
- active_threads: int | None = Field(None, description="Number of active threads")
- queued_bytes: int | None = Field(None, description="Total queued bytes")
- queued_count: int | None = Field(None, description="Number of queued flowfiles")
-
-class ElasticsearchHealth(ServiceHealth):
- cluster_status: str | None = Field(None, description="Cluster health status")
- node_count: int | None = Field(None)
- active_shards: int | None = Field(None)
-
-class PostgresHealth(ServiceHealth):
- version: str | None = Field(None)
- latency_ms: float | None = Field(None, description="Ping latency in milliseconds")
- db_name: str | None = Field(None, description="Database name")
-
-class MedCATTrainerHealth(ServiceHealth):
- """Health check model for MedCAT Trainer web service."""
- app_version: str | None = Field(None, description="MedCAT Trainer app version")
-
-class CogstackCohortHealth(ServiceHealth):
- """Health check model for CogStack Cohort service."""
- pass
diff --git a/nifi/user-scripts/logs/.gitignore b/nifi/user-scripts/logs/.gitignore
deleted file mode 100644
index f59ec20aa..000000000
--- a/nifi/user-scripts/logs/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*
\ No newline at end of file
diff --git a/nifi/user-scripts/utils/helpers/nifi_api_client.py b/nifi/user-scripts/utils/helpers/nifi_api_client.py
deleted file mode 100644
index 1c353d2c1..000000000
--- a/nifi/user-scripts/utils/helpers/nifi_api_client.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from logging import Logger
-
-from dto.nifi_api_config import NiFiAPIConfig
-from nipyapi import canvas, security
-from nipyapi.nifi import ApiClient, ProcessGroupsApi
-from nipyapi.nifi.configuration import Configuration as NiFiConfiguration
-from nipyapi.nifi.models.process_group_entity import ProcessGroupEntity
-from nipyapi.nifi.models.processor_entity import ProcessorEntity
-from nipyapi.registry import ApiClient as RegistryApiClient
-from nipyapi.registry import BucketsApi
-from nipyapi.registry.configuration import Configuration as RegistryConfiguration
-from utils.generic import get_logger
-
-
-class NiFiRegistryClient:
- def __init__(self, config: NiFiAPIConfig) -> None:
- self.config = config or NiFiAPIConfig()
- self.nipyapi_config = RegistryConfiguration()
- self.nipyapi_config.host = self.config.nifi_registry_api_url
- self.nipyapi_config.verify_ssl = self.config.VERIFY_SSL
- self.nipyapi_config.cert_file = self.config.NIFI_CERT_PEM_PATH # type: ignore
- self.nipyapi_config.key_file = self.config.NIFI_CERT_KEY_PATH # type: ignore
- self.nipyapi_config.ssl_ca_cert = self.config.ROOT_CERT_CA_PATH # type: ignore
-
- self.logger: Logger = get_logger(self.__class__.__name__)
-
- self.api_client = RegistryApiClient(self.nipyapi_config.host)
- self.buckets_api = BucketsApi(self.api_client)
-
- def list_buckets(self):
- buckets = self.buckets_api.get_buckets()
- for b in buckets:
- self.logger.info("Bucket: %s (%s)", b.name, b.identifier)
- return buckets
-
-
-class NiFiClient:
- def __init__(self, config: NiFiAPIConfig) -> None:
- self.config = config or NiFiAPIConfig()
- self.nipyapi_config = NiFiConfiguration()
- self.nipyapi_config.host = self.config.nifi_api_url
- self.nipyapi_config.verify_ssl = self.config.VERIFY_SSL
- self.nipyapi_config.cert_file = self.config.NIFI_CERT_PEM_PATH # type: ignore
- self.nipyapi_config.key_file = self.config.NIFI_CERT_KEY_PATH # type: ignore
- self.nipyapi_config.ssl_ca_cert = self.config.ROOT_CERT_CA_PATH # type: ignore
-
- self.logger: Logger = get_logger(self.__class__.__name__)
-
- self.api_client = ApiClient(self.nipyapi_config)
- self.process_group_api = ProcessGroupsApi(self.api_client)
-
- self._login()
-
- def _login(self) -> None:
- security.service_login(
- service='nifi',
- username=self.config.NIFI_USERNAME,
- password=self.config.NIFI_PASSWORD
- )
- self.logger.info("✅ Logged in to NiFi")
-
- def get_root_process_group_id(self) -> str:
- return canvas.get_root_pg_id()
-
- def get_process_group_by_name(self, process_group_name: str) -> None | list[object] | object:
- return canvas.get_process_group(process_group_name, identifier_type="nam")
-
- def get_process_group_by_id(self, process_group_id: str) -> ProcessGroupEntity:
- return canvas.get_process_group(process_group_id, identifier_type="id")
-
- def start_process_group(self, process_group_id: str) -> bool:
- return canvas.schedule_process_group(process_group_id, True)
-
- def stop_process_group(self, process_group_id: str) -> bool:
- return canvas.schedule_process_group(process_group_id, False)
-
- def get_child_process_groups_from_parent_id(self, parent_process_group_id: str) -> list[ProcessGroupEntity]:
- parent_pg = canvas.get_process_group(parent_process_group_id, identifier_type="id")
- return canvas.list_all_process_groups(parent_pg.id)
-
- def get_all_processors_in_process_group(self, process_group_id: str) -> list[ProcessorEntity]:
- return canvas.list_all_processors(process_group_id)
diff --git a/nifi/user-scripts/utils/helpers/service.py b/nifi/user-scripts/utils/helpers/service.py
deleted file mode 100644
index 9d4b28080..000000000
--- a/nifi/user-scripts/utils/helpers/service.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import sys
-import time
-
-import psycopg2
-from psycopg2 import sql
-
-sys.path.append("../../dto/")
-
-from dto.pg_config import PGConfig
-
-
-def check_postgres(cfg: PGConfig) -> tuple[bool, float | None, str | None]:
- """Return (is_healthy, latency_ms, error_detail)"""
- start = time.perf_counter()
- try:
- conn = psycopg2.connect(
- host=cfg.host,
- port=cfg.port,
- dbname=cfg.db,
- user=cfg.user,
- password=cfg.password,
- connect_timeout=cfg.timeout
- )
- with conn.cursor() as cur:
- cur.execute(sql.SQL("SELECT 1;"))
- result = cur.fetchone()
- conn.close()
- if result != (1,):
- return False, None, f"Unexpected result: {result}"
- latency = (time.perf_counter() - start) * 1000
- return True, latency, None
- except Exception as e:
- return False, None, str(e)
diff --git a/nifi/user-python-extensions/convert_avro_binary_field_to_base64.py b/nifi/user_python_extensions/convert_avro_binary_field_to_base64.py
similarity index 99%
rename from nifi/user-python-extensions/convert_avro_binary_field_to_base64.py
rename to nifi/user_python_extensions/convert_avro_binary_field_to_base64.py
index 0558dbbd4..99a80ec47 100644
--- a/nifi/user-python-extensions/convert_avro_binary_field_to_base64.py
+++ b/nifi/user_python_extensions/convert_avro_binary_field_to_base64.py
@@ -1,6 +1,6 @@
import sys
-sys.path.insert(0, "/opt/nifi/user-scripts")
+sys.path.insert(0, "/opt/nifi/user_scripts")
import base64
import copy
diff --git a/nifi/user-python-extensions/convert_json_record_schema.py b/nifi/user_python_extensions/convert_json_record_schema.py
similarity index 99%
rename from nifi/user-python-extensions/convert_json_record_schema.py
rename to nifi/user_python_extensions/convert_json_record_schema.py
index da8434acc..e3e41859c 100644
--- a/nifi/user-python-extensions/convert_json_record_schema.py
+++ b/nifi/user_python_extensions/convert_json_record_schema.py
@@ -1,6 +1,6 @@
import sys
-sys.path.insert(0, "/opt/nifi/user-scripts")
+sys.path.insert(0, "/opt/nifi/user_scripts")
import json
import traceback
diff --git a/nifi/user-python-extensions/parse_service_response.py b/nifi/user_python_extensions/parse_service_response.py
similarity index 99%
rename from nifi/user-python-extensions/parse_service_response.py
rename to nifi/user_python_extensions/parse_service_response.py
index 7307b7273..6b3ca6fa4 100644
--- a/nifi/user-python-extensions/parse_service_response.py
+++ b/nifi/user_python_extensions/parse_service_response.py
@@ -1,6 +1,6 @@
import sys
-sys.path.insert(0, "/opt/nifi/user-scripts")
+sys.path.insert(0, "/opt/nifi/user_scripts")
import json
import traceback
diff --git a/nifi/user-python-extensions/prepare_record_for_nlp.py b/nifi/user_python_extensions/prepare_record_for_nlp.py
similarity index 99%
rename from nifi/user-python-extensions/prepare_record_for_nlp.py
rename to nifi/user_python_extensions/prepare_record_for_nlp.py
index 0f36f7069..f700fc45c 100644
--- a/nifi/user-python-extensions/prepare_record_for_nlp.py
+++ b/nifi/user_python_extensions/prepare_record_for_nlp.py
@@ -1,6 +1,6 @@
import sys
-sys.path.insert(0, "/opt/nifi/user-scripts")
+sys.path.insert(0, "/opt/nifi/user_scripts")
import io
import json
diff --git a/nifi/user-python-extensions/prepare_record_for_ocr.py b/nifi/user_python_extensions/prepare_record_for_ocr.py
similarity index 99%
rename from nifi/user-python-extensions/prepare_record_for_ocr.py
rename to nifi/user_python_extensions/prepare_record_for_ocr.py
index 0e29fa77d..3a3d513be 100644
--- a/nifi/user-python-extensions/prepare_record_for_ocr.py
+++ b/nifi/user_python_extensions/prepare_record_for_ocr.py
@@ -1,6 +1,6 @@
import sys
-sys.path.insert(0, "/opt/nifi/user-scripts")
+sys.path.insert(0, "/opt/nifi/user_scripts")
import base64
import io
diff --git a/nifi/user-python-extensions/record_add_geolocation.py b/nifi/user_python_extensions/record_add_geolocation.py
similarity index 99%
rename from nifi/user-python-extensions/record_add_geolocation.py
rename to nifi/user_python_extensions/record_add_geolocation.py
index b29218ec6..62ca06200 100644
--- a/nifi/user-python-extensions/record_add_geolocation.py
+++ b/nifi/user_python_extensions/record_add_geolocation.py
@@ -1,6 +1,6 @@
import sys
-sys.path.insert(0, "/opt/nifi/user-scripts")
+sys.path.insert(0, "/opt/nifi/user_scripts")
import csv
import json
diff --git a/nifi/user-python-extensions/record_decompress_cerner_blob.py b/nifi/user_python_extensions/record_decompress_cerner_blob.py
similarity index 99%
rename from nifi/user-python-extensions/record_decompress_cerner_blob.py
rename to nifi/user_python_extensions/record_decompress_cerner_blob.py
index a7cb0afc5..ef5e257bf 100644
--- a/nifi/user-python-extensions/record_decompress_cerner_blob.py
+++ b/nifi/user_python_extensions/record_decompress_cerner_blob.py
@@ -1,6 +1,6 @@
import sys
-sys.path.insert(0, "/opt/nifi/user-scripts")
+sys.path.insert(0, "/opt/nifi/user_scripts")
import base64
import json
diff --git a/nifi/user-python-extensions/sample_processor.py b/nifi/user_python_extensions/sample_processor.py
similarity index 99%
rename from nifi/user-python-extensions/sample_processor.py
rename to nifi/user_python_extensions/sample_processor.py
index ddee7e475..da5ef3b74 100644
--- a/nifi/user-python-extensions/sample_processor.py
+++ b/nifi/user_python_extensions/sample_processor.py
@@ -1,7 +1,7 @@
import sys
from typing import Any
-sys.path.insert(0, "/opt/nifi/user-scripts")
+sys.path.insert(0, "/opt/nifi/user_scripts")
import io
import json
diff --git a/nifi/user-schemas/elasticsearch/indices/.keep b/nifi/user_schemas/avro/.keep
similarity index 100%
rename from nifi/user-schemas/elasticsearch/indices/.keep
rename to nifi/user_schemas/avro/.keep
diff --git a/nifi/user-schemas/elasticsearch/base_index_settings.json b/nifi/user_schemas/elasticsearch/base_index_settings.json
similarity index 100%
rename from nifi/user-schemas/elasticsearch/base_index_settings.json
rename to nifi/user_schemas/elasticsearch/base_index_settings.json
diff --git a/nifi/user-schemas/elasticsearch/templates/.keep b/nifi/user_schemas/elasticsearch/indices/.keep
similarity index 100%
rename from nifi/user-schemas/elasticsearch/templates/.keep
rename to nifi/user_schemas/elasticsearch/indices/.keep
diff --git a/nifi/user-schemas/json/.keep b/nifi/user_schemas/elasticsearch/templates/.keep
similarity index 100%
rename from nifi/user-schemas/json/.keep
rename to nifi/user_schemas/elasticsearch/templates/.keep
diff --git a/nifi/user-scripts/db/.gitignore b/nifi/user_schemas/json/.keep
similarity index 100%
rename from nifi/user-scripts/db/.gitignore
rename to nifi/user_schemas/json/.keep
diff --git a/nifi/user-schemas/legacy/annotation-medcat.avsc b/nifi/user_schemas/legacy/annotation-medcat.avsc
similarity index 100%
rename from nifi/user-schemas/legacy/annotation-medcat.avsc
rename to nifi/user_schemas/legacy/annotation-medcat.avsc
diff --git a/nifi/user-schemas/legacy/annotation_elasticsearch_index_mapping.json b/nifi/user_schemas/legacy/annotation_elasticsearch_index_mapping.json
similarity index 100%
rename from nifi/user-schemas/legacy/annotation_elasticsearch_index_mapping.json
rename to nifi/user_schemas/legacy/annotation_elasticsearch_index_mapping.json
diff --git a/nifi/user-schemas/legacy/cogstack_common_schema.avsc b/nifi/user_schemas/legacy/cogstack_common_schema.avsc
similarity index 100%
rename from nifi/user-schemas/legacy/cogstack_common_schema.avsc
rename to nifi/user_schemas/legacy/cogstack_common_schema.avsc
diff --git a/nifi/user-schemas/legacy/cogstack_common_schema_elasticsearch_index_mapping_template.json b/nifi/user_schemas/legacy/cogstack_common_schema_elasticsearch_index_mapping_template.json
similarity index 100%
rename from nifi/user-schemas/legacy/cogstack_common_schema_elasticsearch_index_mapping_template.json
rename to nifi/user_schemas/legacy/cogstack_common_schema_elasticsearch_index_mapping_template.json
diff --git a/nifi/user-schemas/legacy/cogstack_common_schema_full.avsc b/nifi/user_schemas/legacy/cogstack_common_schema_full.avsc
similarity index 100%
rename from nifi/user-schemas/legacy/cogstack_common_schema_full.avsc
rename to nifi/user_schemas/legacy/cogstack_common_schema_full.avsc
diff --git a/nifi/user-schemas/legacy/cogstack_common_schema_mapping.json b/nifi/user_schemas/legacy/cogstack_common_schema_mapping.json
similarity index 100%
rename from nifi/user-schemas/legacy/cogstack_common_schema_mapping.json
rename to nifi/user_schemas/legacy/cogstack_common_schema_mapping.json
diff --git a/nifi/user-schemas/legacy/document.avsc b/nifi/user_schemas/legacy/document.avsc
similarity index 100%
rename from nifi/user-schemas/legacy/document.avsc
rename to nifi/user_schemas/legacy/document.avsc
diff --git a/nifi/user-schemas/legacy/document_all_fields.avsc b/nifi/user_schemas/legacy/document_all_fields.avsc
similarity index 100%
rename from nifi/user-schemas/legacy/document_all_fields.avsc
rename to nifi/user_schemas/legacy/document_all_fields.avsc
diff --git a/nifi/user-scripts/logs/parse_json/.gitkeep b/nifi/user_scripts/__init__.py
similarity index 100%
rename from nifi/user-scripts/logs/parse_json/.gitkeep
rename to nifi/user_scripts/__init__.py
diff --git a/nifi/user-scripts/bootstrap_external_lib_imports.py b/nifi/user_scripts/bootstrap_external_lib_imports.py
similarity index 80%
rename from nifi/user-scripts/bootstrap_external_lib_imports.py
rename to nifi/user_scripts/bootstrap_external_lib_imports.py
index 830743b06..4978ba944 100644
--- a/nifi/user-scripts/bootstrap_external_lib_imports.py
+++ b/nifi/user_scripts/bootstrap_external_lib_imports.py
@@ -14,6 +14,6 @@ def running_in_docker() -> bool:
# we need to add it to the sys imports
if running_in_docker():
- sys.path.insert(0, "/opt/nifi/user-scripts")
+ sys.path.insert(0, "/opt/nifi/user_scripts")
else:
- sys.path.insert(0, "./user-scripts")
+ sys.path.insert(0, "./user_scripts")
diff --git a/nifi/user-scripts/clean_doc.py b/nifi/user_scripts/clean_doc.py
similarity index 100%
rename from nifi/user-scripts/clean_doc.py
rename to nifi/user_scripts/clean_doc.py
diff --git a/nifi/user-scripts/cogstack_cohort_generate_data.py b/nifi/user_scripts/cogstack_cohort_generate_data.py
similarity index 100%
rename from nifi/user-scripts/cogstack_cohort_generate_data.py
rename to nifi/user_scripts/cogstack_cohort_generate_data.py
diff --git a/nifi/user-scripts/cogstack_cohort_generate_random_data.py b/nifi/user_scripts/cogstack_cohort_generate_random_data.py
similarity index 100%
rename from nifi/user-scripts/cogstack_cohort_generate_random_data.py
rename to nifi/user_scripts/cogstack_cohort_generate_random_data.py
diff --git a/nifi/user-scripts/tmp/.gitignore b/nifi/user_scripts/db/.gitignore
similarity index 100%
rename from nifi/user-scripts/tmp/.gitignore
rename to nifi/user_scripts/db/.gitignore
diff --git a/nifi/user_scripts/dto/__init__.py b/nifi/user_scripts/dto/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/nifi/user_scripts/dto/database_config.py b/nifi/user_scripts/dto/database_config.py
new file mode 100644
index 000000000..0999ebb88
--- /dev/null
+++ b/nifi/user_scripts/dto/database_config.py
@@ -0,0 +1,31 @@
+from pathlib import Path
+from typing import Any
+
+from pydantic import AliasChoices, Field, PositiveInt, SecretStr
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class DatabaseConfig(BaseSettings):
+ model_config = SettingsConfigDict(
+ env_prefix="DATABASE_",
+ env_file=[Path(__file__).resolve().parents[3] / "deploy" / "database.env",
+ Path(__file__).resolve().parents[3] / "security" / "env" / "users_database.env",
+ ],
+ extra="ignore",
+ env_ignore_empty=True,
+ populate_by_name=True
+ )
+
+ host: str = Field(default="localhost", validation_alias=AliasChoices("POSTGRES_HOST"))
+ port: int = Field(default=5432,validation_alias=AliasChoices("POSTGRES_PORT"), ge=1, le=65535)
+
+ database_name : str = Field(default="db_samples", validation_alias=AliasChoices("DB", "DB_NAME"))
+ username: str = Field(default="test", validation_alias=AliasChoices("POSTGRES_USER_SAMPLES", "POSTGRES_USER"))
+ password: SecretStr = Field(default_factory=lambda: SecretStr("test"),
+ validation_alias=AliasChoices("POSTGRES_PASSWORD_SAMPLES",
+ "password",
+ "POSTGRES_PASSWORD"))
+ timeout: PositiveInt = Field(default=60, validation_alias=AliasChoices("TIMEOUT"))
+
+ def get_field_values_kwargs(self) -> dict[str, Any]:
+ return self.model_dump()
diff --git a/nifi/user_scripts/dto/elastic_config.py b/nifi/user_scripts/dto/elastic_config.py
new file mode 100644
index 000000000..3ce08f0e2
--- /dev/null
+++ b/nifi/user_scripts/dto/elastic_config.py
@@ -0,0 +1,78 @@
+import json
+from pathlib import Path
+from typing import ClassVar
+
+from pydantic import AliasChoices, Field, SecretStr, field_validator
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class ElasticConfig(BaseSettings):
+
+ ROOT_DIR: ClassVar = Path(__file__).resolve().parents[3]
+ CERT_ROOT_DIR: ClassVar = ROOT_DIR / "security" / "certificates" / "elastic"
+
+ model_config = SettingsConfigDict(
+ env_prefix="ELASTICSEARCH_",
+ env_file=[ROOT_DIR / "deploy" / "elasticsearch.env",
+ ROOT_DIR / "security" / "env" / "users_elasticsearch.env",
+ ],
+ extra="ignore",
+ env_ignore_empty=True,
+ populate_by_name=True
+ )
+
+ elasticsearch_version: str = Field(default="opensearch", validation_alias=AliasChoices("VERSION"))
+ kibana_version: str = Field(default="opensearch-dashboards", validation_alias=AliasChoices("KIBANA_VERSION"))
+
+ es_port_1: int = Field(default=9200,
+ validation_alias=AliasChoices("ELASTICSEARCH_NODE_1_OUTPUT_PORT"), ge=1, le=65535)
+ es_port_2: int = Field(default=9201,
+ validation_alias=AliasChoices("ELASTICSEARCH_NODE_2_OUTPUT_PORT"), ge=1, le=65535)
+ es_port_3: int = Field(default=9202,
+ validation_alias=AliasChoices("ELASTICSEARCH_NODE_3_OUTPUT_PORT"), ge=1, le=65535)
+
+ kibana_host: str = Field(default="https://localhost:5601",
+ validation_alias=AliasChoices("KIBANA_HOST", "kibana_host"))
+
+ kibana_port: int = Field(default=5601,
+ validation_alias=AliasChoices("KIBANA_SERVER_OUTPUT_PORT"), ge=1, le=65535)
+
+ hosts: list[str] = Field(default_factory=list)
+ timeout: int = Field(default=60)
+ verify_ssl: bool = Field(default=False, validation_alias=AliasChoices("SSL_ENABLED", "ELASTICSEARCH_SSL_ENABLED"))
+ user: str = Field(default="admin", validation_alias=AliasChoices("ELASTIC_USER"))
+ password: SecretStr = Field(default_factory=lambda: SecretStr("admin"),
+ validation_alias=AliasChoices("ELASTIC_PASSWORD",
+ "password",
+ "ELASTICSEARCH_PASSWORD",
+ "OPENSEARCH_INITIAL_ADMIN_PASSWORD"))
+
+ elastic_root_cert_ca_path: ClassVar = (CERT_ROOT_DIR / "opensearch" / "elastic-stack-ca.crt.pem").as_posix()
+ elastic_node_cert_key_path: ClassVar = (CERT_ROOT_DIR / "opensearch" /
+ "elasticsearch/elasticsearch-1/elasticsearch-1.key").as_posix()
+ elastic_node_cert_pem_path: ClassVar = (CERT_ROOT_DIR / "opensearch" /
+ "elasticsearch/elasticsearch-1/elasticsearch-1.crt").as_posix()
+
+ kibana_client_cert_key_path: ClassVar = (CERT_ROOT_DIR / "opensearch" / "es_kibana_client.key").as_posix()
+ kibana_client_cert_pem_path: ClassVar = (CERT_ROOT_DIR / "opensearch" / "es_kibana_client.pem").as_posix()
+
+ @field_validator("hosts", mode="before")
+ def parse_list(cls, v):
+ if isinstance(v, str):
+ return json.loads(v)
+ return v
+
+ @property
+ def ports(self) -> list[int]:
+ return [self.es_port_1, self.es_port_2, self.es_port_3]
+
+ def auth_credentials(self) -> tuple[str, str]:
+ """convenience for requests auth=(user, password)."""
+ return (self.user, self.password.get_secret_value())
+
+ def get_ssl_certs_paths(self) -> tuple[str, str]:
+ """convenience for requests cert=(cert_path, key_path)."""
+ return (self.elastic_node_cert_pem_path, self.elastic_node_cert_key_path)
+
+ def get_kibana_ssl_certs_path(self) -> tuple[str, str]:
+ return (self.kibana_client_cert_pem_path, self.kibana_client_cert_key_path)
\ No newline at end of file
diff --git a/nifi/user_scripts/dto/nifi_api_config.py b/nifi/user_scripts/dto/nifi_api_config.py
new file mode 100644
index 000000000..869444c87
--- /dev/null
+++ b/nifi/user_scripts/dto/nifi_api_config.py
@@ -0,0 +1,47 @@
+import os
+from pathlib import Path
+
+CERTS_ROOT = Path(__file__).resolve().parents[3] / "security" / "certificates"
+
+
+class NiFiAPIConfig:
+
+ def __init__(self):
+ self.nifi_url_scheme = "https"
+ self.nifi_host = "localhost"
+ self.nifi_port = 8443
+ self.nifi_registry_port = 18443
+ self.nifi_username = os.environ.get("NIFI_SINGLE_USER_CREDENTIALS_USERNAME", "admin")
+ self.nifi_password = os.environ.get("NIFI_SINGLE_USER_CREDENTIALS_PASSWORD", "cogstackNiFi")
+ self.root_cert_ca_path = (CERTS_ROOT / "root" / "root-ca.pem").as_posix()
+ self.nifi_cert_pem_path = (CERTS_ROOT / "nifi" / "nifi.pem").as_posix()
+ self.nifi_cert_key_path = (CERTS_ROOT / "nifi" / "nifi.key").as_posix()
+ self.verify_ssl = True
+
+ @property
+ def nifi_base_url(self) -> str:
+ """Full NiFi base URL, e.g. https://localhost:8443"""
+ return f"{self.nifi_url_scheme}://{self.nifi_host}:{self.nifi_port}"
+
+ @property
+ def nifi_api_url(self) -> str:
+ """NiFi REST API root, e.g. https://localhost:8443/nifi-api"""
+ return f"{self.nifi_base_url}/nifi-api"
+
+ @property
+ def nifi_registry_base_url(self) -> str:
+ """NiFi Registry REST API root, e.g. https://localhost:18443/nifi-registry/"""
+ return f"{self.nifi_url_scheme}://{self.nifi_host}:{self.nifi_registry_port}/nifi-registry/"
+
+ @property
+ def nifi_registry_api_url(self) -> str:
+ """nifi registry rest api root, e.g. https://localhost:18443/nifi-registry/nifi-registry-api"""
+ return f"{self.nifi_url_scheme}://{self.nifi_host}:{self.nifi_registry_port}/nifi-registry-api/"
+
+ def auth_credentials(self) -> tuple[str, str]:
+ """convenience for requests auth=(user, password)."""
+ return (self.nifi_username, self.nifi_password)
+
+ def get_ssl_certs_paths(self) -> tuple[str, str]:
+ """convenience for requests cert=(cert_path, key_path)."""
+ return (self.nifi_cert_pem_path, self.nifi_cert_key_path)
diff --git a/nifi/user_scripts/dto/service_health.py b/nifi/user_scripts/dto/service_health.py
new file mode 100644
index 000000000..878467afc
--- /dev/null
+++ b/nifi/user_scripts/dto/service_health.py
@@ -0,0 +1,51 @@
+from datetime import datetime
+from typing import Literal
+
+from pydantic import BaseModel, Field
+
+
+class ServiceHealth(BaseModel):
+ """
+ Base health check model shared by all services.
+ """
+
+ service: str = Field(..., description="Service name, e.g. NiFi, PostgreSQL, OpenSearch/ElasticSearch, etc.")
+ status: Literal["healthy", "unhealthy", "degraded"] = Field(description="Current service status",
+ default="unhealthy")
+
+ message: str | None = Field(default=None, description="Optional status message")
+ timestamp: datetime | None = Field(default_factory=datetime.now)
+ avg_processing_ms: float | None = Field(default=None)
+ service_info: str | None = Field(default=None)
+ connected: bool = Field(default=False)
+ latency_ms: float = Field(default=0.0, description="Ping latency in milliseconds")
+
+ class Config:
+ extra = "ignore"
+
+class MLServiceHealth(ServiceHealth):
+ model_name: str | None = Field(None, description="Name of the ML model")
+ model_version: str | None = Field(None, description="Version of the ML model")
+ model_card: str | None = Field(None, description="URL or path to the model card")
+
+class NiFiHealth(ServiceHealth):
+ active_threads: int | None = Field(default=None, description="Number of active threads")
+ queued_bytes: int | None = Field(default=None, description="Total queued bytes")
+ queued_count: int | None = Field(default=None, description="Number of queued flowfiles")
+
+class ElasticHealth(ServiceHealth):
+ cluster_status: str | None = Field(default=None, description="Cluster health status")
+ node_count: int | None = Field(default=None)
+ active_shards: int | None = Field(default=None)
+
+class DatabaseHealth(ServiceHealth):
+ version: str | None = Field(default=None, description="Database version, e.g PgSQL 17, MSSQL 21, etc.")
+ db_name: str | None = Field(default=None, description="Database name")
+
+class MedCATTrainerHealth(ServiceHealth):
+ """Health check model for MedCAT Trainer web service."""
+ app_version: str | None = Field(None, description="MedCAT Trainer app version")
+
+class CogstackCohortHealth(ServiceHealth):
+ """Health check model for CogStack Cohort service."""
+ pass
diff --git a/nifi/user_scripts/elastic_schema_converter.py b/nifi/user_scripts/elastic_schema_converter.py
new file mode 100644
index 000000000..ea880673d
--- /dev/null
+++ b/nifi/user_scripts/elastic_schema_converter.py
@@ -0,0 +1,64 @@
+import json
+import re
+import sys
+import traceback
+from collections import defaultdict
+from logging import Logger
+
+logger: Logger = Logger(__name__)
+
+origin_index_mapping = json.loads(sys.stdin.read())
+
+INPUT_INDEX_NAME = ""
+OUTPUT_INDEX_NAME = ""
+OUTPUT_FILE_NAME = ""
+JSON_FIELD_MAPPER_SCHEMA_FILE_PATH = ""
+TRANSFORM_KEYS_LOWER_CASE = False
+
+for arg in sys.argv:
+ _arg = arg.split("=", 1)
+ _arg[0] = _arg[0].lower()
+ if _arg[0] == "input_index_name":
+ INPUT_INDEX_NAME = _arg[1]
+ if _arg[0] == "output_index_name":
+ OUTPUT_INDEX_NAME = _arg[1]
+ if _arg[0] == "output_file_name":
+ OUTPUT_FILE_NAME = _arg[1]
+ if _arg[0] == "json_field_mapper_schema_file_path":
+ JSON_FIELD_MAPPER_SCHEMA_FILE_PATH = _arg[1]
+
+try:
+ json_field_mapper: dict = {}
+ with open(JSON_FIELD_MAPPER_SCHEMA_FILE_PATH) as file:
+ json_field_mapper = json.load(file)
+
+ output_index_mapping: dict = {}
+
+ origin_index_name = INPUT_INDEX_NAME if INPUT_INDEX_NAME else \
+ origin_index_mapping[list(origin_index_mapping.keys())[0]]
+
+ for origin_field_name, origin_field_es_properties in origin_index_mapping["mappings"]["properties"]:
+ pass
+
+ for curr_field_name, curr_field_value in record.items():
+ curr_field_name = str(curr_field_name).lower()
+ if curr_field_name in new_schema_field_names:
+ # check if the mapping is not a dict (nested field)
+ if isinstance(json_mapper_schema[curr_field_name], str):
+ new_record.update({json_mapper_schema[curr_field_name] : curr_field_value})
+ elif isinstance(json_mapper_schema[curr_field_name], dict):
+ # nested field
+ new_record.update({curr_field_name: {}})
+ for nested_field_name, nested_field_value in curr_field_value.items():
+ if nested_field_name in json_mapper_schema[curr_field_name].keys():
+ new_record[curr_field_name].update({ \
+ json_mapper_schema[curr_field_name][nested_field_name]: nested_field_value})
+
+
+
+except Exception as exception:
+ logger.error("Exception during flowfile processing: " + traceback.format_exc())
+ raise exception
+
+# Output cleaned JSON as UTF-8
+sys.stdout.buffer.write(json.dumps(output_index_mapping, ensure_ascii=False).encode("utf-8"))
diff --git a/nifi/user-scripts/generate_location.py b/nifi/user_scripts/generate_location.py
similarity index 85%
rename from nifi/user-scripts/generate_location.py
rename to nifi/user_scripts/generate_location.py
index 5d88f9af7..4eb9426f7 100644
--- a/nifi/user-scripts/generate_location.py
+++ b/nifi/user_scripts/generate_location.py
@@ -37,21 +37,21 @@ def poly_creator(city: str):
def main():
input_stream = sys.stdin.read()
+ log_file_path = os.path.join(NIFI_USER_SCRIPT_LOGS_DIR, str(LOG_FILE_NAME))
+ output_stream = []
try:
- log_file_path = os.path.join(NIFI_USER_SCRIPT_LOGS_DIR, str(LOG_FILE_NAME))
patients = json.loads(input_stream)
locations = [poly_creator(location) for location in LOCATIONS.split(",")]
-
- output_stream = []
for patient in patients:
to_append = {}
id = patient["_source"][SUBJECT_ID_FIELD_NAME]
- idx = randrange(len(locations)) # pick a random location specified
- lat, lon, _ = rc.coordinates_randomizer(polygon = locations[idx], num_locations = 1) # generate latitude and longitude
-
+ # pick a random location specified
+ idx = randrange(len(locations))
+ # generate latitude and longitude
+ lat, lon, _ = rc.coordinates_randomizer(polygon = locations[idx], num_locations = 1)
to_append[SUBJECT_ID_FIELD_NAME] = id
to_append[LOCATION_NAME_FIELD] = "POINT (" + str(lon[0]) + " " + str(lat[0]) + ")"
output_stream.append(to_append)
@@ -62,8 +62,8 @@ def main():
else:
with open(log_file_path, "a+") as log_file:
log_file.write("\n" + str(traceback.print_exc()))
- finally:
- return output_stream
+ return output_stream
sys.stdout.write(json.dumps(main()))
+
diff --git a/nifi/user-scripts/get_files_from_storage.py b/nifi/user_scripts/get_files_from_storage.py
similarity index 98%
rename from nifi/user-scripts/get_files_from_storage.py
rename to nifi/user_scripts/get_files_from_storage.py
index f1aefbbb2..2dc9e1de7 100644
--- a/nifi/user-scripts/get_files_from_storage.py
+++ b/nifi/user_scripts/get_files_from_storage.py
@@ -170,7 +170,8 @@ def get_files_and_metadata():
if generate_pseudo_doc_id is not False:
_file_id_dict["document_Pseudo_Id"] = str(uuid.uuid4().hex)
- txt_file_df = pandas.concat([txt_file_df, pandas.DataFrame.from_dict([_file_id_dict], orient="columns")])
+ txt_file_df = pandas.concat([txt_file_df,
+ pandas.DataFrame.from_dict([_file_id_dict], orient="columns")])
folders_ingested[root].append(file_id)
else:
diff --git a/nifi/user_scripts/legacy_scripts/__init__.py b/nifi/user_scripts/legacy_scripts/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/nifi/user-scripts/legacy_scripts/annotation_creator.py b/nifi/user_scripts/legacy_scripts/annotation_creator.py
similarity index 100%
rename from nifi/user-scripts/legacy_scripts/annotation_creator.py
rename to nifi/user_scripts/legacy_scripts/annotation_creator.py
diff --git a/nifi/user-scripts/legacy_scripts/annotation_manager.py b/nifi/user_scripts/legacy_scripts/annotation_manager.py
similarity index 100%
rename from nifi/user-scripts/legacy_scripts/annotation_manager.py
rename to nifi/user_scripts/legacy_scripts/annotation_manager.py
diff --git a/nifi/user-scripts/legacy_scripts/annotation_manager_docs.py b/nifi/user_scripts/legacy_scripts/annotation_manager_docs.py
similarity index 100%
rename from nifi/user-scripts/legacy_scripts/annotation_manager_docs.py
rename to nifi/user_scripts/legacy_scripts/annotation_manager_docs.py
diff --git a/nifi/user-scripts/legacy_scripts/anonymise_doc.py b/nifi/user_scripts/legacy_scripts/anonymise_doc.py
similarity index 100%
rename from nifi/user-scripts/legacy_scripts/anonymise_doc.py
rename to nifi/user_scripts/legacy_scripts/anonymise_doc.py
diff --git a/nifi/user-scripts/legacy_scripts/flowfile_to_attribute_with_content.py b/nifi/user_scripts/legacy_scripts/flowfile_to_attribute_with_content.py
similarity index 100%
rename from nifi/user-scripts/legacy_scripts/flowfile_to_attribute_with_content.py
rename to nifi/user_scripts/legacy_scripts/flowfile_to_attribute_with_content.py
diff --git a/nifi/user-scripts/legacy_scripts/ingest_into_es.py b/nifi/user_scripts/legacy_scripts/ingest_into_es.py
similarity index 100%
rename from nifi/user-scripts/legacy_scripts/ingest_into_es.py
rename to nifi/user_scripts/legacy_scripts/ingest_into_es.py
diff --git a/nifi/user-scripts/legacy_scripts/parse-anns-from-nlp-response-bulk.py b/nifi/user_scripts/legacy_scripts/parse-anns-from-nlp-response-bulk.py
similarity index 100%
rename from nifi/user-scripts/legacy_scripts/parse-anns-from-nlp-response-bulk.py
rename to nifi/user_scripts/legacy_scripts/parse-anns-from-nlp-response-bulk.py
diff --git a/nifi/user-scripts/legacy_scripts/parse-es-db-result-for-nlp-request-bulk.py b/nifi/user_scripts/legacy_scripts/parse-es-db-result-for-nlp-request-bulk.py
similarity index 100%
rename from nifi/user-scripts/legacy_scripts/parse-es-db-result-for-nlp-request-bulk.py
rename to nifi/user_scripts/legacy_scripts/parse-es-db-result-for-nlp-request-bulk.py
diff --git a/nifi/user-scripts/legacy_scripts/parse-json-to-avro.py b/nifi/user_scripts/legacy_scripts/parse-json-to-avro.py
similarity index 100%
rename from nifi/user-scripts/legacy_scripts/parse-json-to-avro.py
rename to nifi/user_scripts/legacy_scripts/parse-json-to-avro.py
diff --git a/nifi/user-scripts/legacy_scripts/parse-tika-result-json-to-avro.py b/nifi/user_scripts/legacy_scripts/parse-tika-result-json-to-avro.py
similarity index 100%
rename from nifi/user-scripts/legacy_scripts/parse-tika-result-json-to-avro.py
rename to nifi/user_scripts/legacy_scripts/parse-tika-result-json-to-avro.py
diff --git a/nifi/user-scripts/legacy_scripts/prepare-db-record-for-tika-request-single.py b/nifi/user_scripts/legacy_scripts/prepare-db-record-for-tika-request-single.py
similarity index 100%
rename from nifi/user-scripts/legacy_scripts/prepare-db-record-for-tika-request-single.py
rename to nifi/user_scripts/legacy_scripts/prepare-db-record-for-tika-request-single.py
diff --git a/nifi/user-scripts/legacy_scripts/prepare-file-for-tika-request-single-keep-db-fields.py b/nifi/user_scripts/legacy_scripts/prepare-file-for-tika-request-single-keep-db-fields.py
similarity index 100%
rename from nifi/user-scripts/legacy_scripts/prepare-file-for-tika-request-single-keep-db-fields.py
rename to nifi/user_scripts/legacy_scripts/prepare-file-for-tika-request-single-keep-db-fields.py
diff --git a/nifi/user-scripts/tests/generate_big_ann_file.py b/nifi/user_scripts/tests/generate_big_ann_file.py
similarity index 100%
rename from nifi/user-scripts/tests/generate_big_ann_file.py
rename to nifi/user_scripts/tests/generate_big_ann_file.py
diff --git a/nifi/user-scripts/tests/generate_files.py b/nifi/user_scripts/tests/generate_files.py
similarity index 100%
rename from nifi/user-scripts/tests/generate_files.py
rename to nifi/user_scripts/tests/generate_files.py
diff --git a/nifi/user-scripts/tests/get_ingested_files.py b/nifi/user_scripts/tests/get_ingested_files.py
similarity index 100%
rename from nifi/user-scripts/tests/get_ingested_files.py
rename to nifi/user_scripts/tests/get_ingested_files.py
diff --git a/nifi/user_scripts/tests/nifi/test_nifi.py b/nifi/user_scripts/tests/nifi/test_nifi.py
new file mode 100644
index 000000000..0242088a0
--- /dev/null
+++ b/nifi/user_scripts/tests/nifi/test_nifi.py
@@ -0,0 +1,149 @@
+import json
+import os
+import sys
+import tempfile
+import unittest
+from pathlib import Path
+
+sys.path.insert(0, "../../")
+
+import requests
+from dto.nifi_api_config import NiFiAPIConfig
+from dto.database_config import DatabaseConfig
+from dto.service_health import NiFiHealth, DatabaseHealth
+from nipyapi import config as nipy_config
+from nipyapi import security, versioning
+from utils.helpers.nifi_api_client import NiFiClient, NiFiRegistryClient
+from utils.helpers.service import check_postgres
+
+
+class TestServices(unittest.TestCase):
+ """Service connectivity and health checks."""
+
+ @classmethod
+ def setUpClass(cls):
+
+
+ # cls.pg_cfg = PGConfig()
+ # cls.nifi_api_config = NiFiAPIConfig()
+ # cls.nifi_client = NiFiClient(config=cls.nifi_api_config, login_on_init=False)
+ # cls.nifi_registry_client = NiFiRegistryClient(config=cls.nifi_api_config)
+ # cls.pg_config = PGConfig()
+ # cls.registry_bucket_name = os.environ.get("NIFI_REGISTRY_BUCKET", "cogstack")
+ # cls.flow_name = "opensearch_ingest_docs_db_to_es"
+ # cls.template_path = (
+ # Path(__file__).resolve().parents[4]
+ # / "nifi"
+ # / "user-templates"
+ # / f"{cls.flow_name}.json"
+ # )
+ # cls.es_hosts = os.environ.get("OPENSEARCH_URLS", "http://localhost:9200")
+ # cls.es_username = os.environ.get("OPENSEARCH_USERNAME", "admin")
+ # cls.es_password = os.environ.get("OPENSEARCH_PASSWORD", "admin")
+
+ #@classmethod
+ #def _configure_nipyapi(cls) -> None:
+ # """Apply SSL + host config so nipyapi uses the same creds as the raw client."""
+ # nipy_config.nifi_config.host = cls.nifi_api_config.nifi_api_url
+ # nipy_config.registry_config.host = cls.nifi_api_config.nifi_registry_api_url
+ #
+ # for cfg in (nipy_config.nifi_config, nipy_config.registry_config):
+ # cfg.verify_ssl = cls.nifi_api_config.VERIFY_SSL
+ # cfg.cert_file = cls.nifi_api_config.NIFI_CERT_PEM_PATH
+ # cfg.key_file = cls.nifi_api_config.NIFI_CERT_KEY_PATH
+ # cfg.ssl_ca_cert = cls.nifi_api_config.ROOT_CERT_CA_PATH
+ #
+ #def _prepare_snapshot_with_env_defaults(self) -> Path:
+ # """
+ # Load the opensearch template and pre-fill controller service properties
+ # using env/default configs so the flow can start without manual clicks.
+ # """
+ # with self.template_path.open() as fp:
+ # snapshot = json.load(fp)
+ #
+ # db_url = f"jdbc:postgresql://{self.pg_cfg.host}:{self.pg_cfg.port}/{self.pg_cfg.db}"
+ #
+ # for controller_service in snapshot.get("flowContents", {}).get("controllerServices", []):
+ # name = controller_service.get("name")
+ # properties = controller_service.setdefault("properties", {})
+ #
+ # if name == "DBCPConnectionPool":
+ # properties["Database Connection URL"] = db_url
+ # properties["Database User"] = self.pg_cfg.user
+ # properties["Password"] = self.pg_cfg.password
+ # elif name == "ElasticSearchClientServiceImpl":
+ # properties["el-cs-http-hosts"] = self.es_hosts
+ # properties["el-cs-username"] = self.es_username
+ # properties["el-cs-password"] = self.es_password
+ #
+ # fd, tmp_path = tempfile.mkstemp(suffix=".json", prefix="nifi-template-")
+ # with os.fdopen(fd, "w") as tmp_file:
+ # json.dump(snapshot, tmp_file)
+ #
+ # return Path(tmp_path)
+ #
+ #def test_nifi_health(self) -> None:
+ # result = self.nifi_client._login()
+ # self.assertTrue(result)
+ #
+ #def test_nifi_registry_health(self) -> None:
+ # result = requests.head(
+ # url=self.nifi_api_config.nifi_registry_base_url,
+ # auth=self.nifi_api_config.auth_credentials(),
+ # cert=self.nifi_api_config.get_nifi_ssl_certs_paths(),
+ # verify=self.nifi_api_config.ROOT_CERT_CA_PATH,
+ # timeout=15,
+ # )
+ # self.assertEqual(result.status_code, 200)
+ #
+ #def test_postgres_health(self):
+ # result, latency, err = check_postgres(self.pg_config)
+ # self.assertTrue(result, f"PostgreSQL unhealthy: {err}")
+ # print(f"✅ PostgreSQL OK, latency {latency:.2f} ms")
+ #
+ #def test_import_opensearch_template_and_configure_controller_services(self) -> None:
+ # """
+ # Bring the opensearch template into the local NiFi Registry bucket and
+ # patch the controller services so they use local PG/ES credentials.
+ # """
+ # self.assertTrue(self.nifi_client._login())
+ # self._configure_nipyapi()
+ #
+ # security.service_login(
+ # service="registry",
+ # username=self.nifi_api_config.NIFI_USERNAME,
+ # password=self.nifi_api_config.NIFI_PASSWORD,
+ # )
+ #
+ # bucket = versioning.get_bucket(self.registry_bucket_name)
+ # if bucket is None:
+ # bucket = versioning.create_bucket(
+ # bucket_name=self.registry_bucket_name,
+ # bucket_desc="Auto-created for test imports",
+ # )
+ #
+ # flow = versioning.get_flow_in_bucket(
+ # bucket_id=bucket.identifier,
+ # identifier=self.flow_name,
+ # identifier_type="name",
+ # )
+ # if flow is None:
+ # flow = versioning.create_flow(
+ # bucket_id=bucket.identifier,
+ # flow_name=self.flow_name,
+ # desc="Auto-imported from user-templates",
+ # )
+ #
+ # snapshot_path = self._prepare_snapshot_with_env_defaults()
+ #
+ # try:
+ # snapshot = versioning.import_flow_version(
+ # bucket_id=bucket.identifier,
+ # flow_id=flow.identifier,
+ # file_path=str(snapshot_path),
+ # )
+ # finally:
+ # snapshot_path.unlink(missing_ok=True)
+ #
+ # self.assertIsNotNone(snapshot)
+ #
\ No newline at end of file
diff --git a/nifi/user_scripts/tests/nifi/test_opensearch_ingest.py b/nifi/user_scripts/tests/nifi/test_opensearch_ingest.py
new file mode 100644
index 000000000..a26238800
--- /dev/null
+++ b/nifi/user_scripts/tests/nifi/test_opensearch_ingest.py
@@ -0,0 +1,25 @@
+import unittest
+from io import BytesIO
+
+
+class DummyFlowFile:
+ def __init__(self, content: str):
+ self._data = BytesIO(content.encode())
+
+ def read(self):
+ return self._data.getvalue()
+
+ def write(self, data):
+ self._data = BytesIO(data)
+ return self
+
+class TestMyProcessor(unittest.TestCase):
+ def test_uppercase(self):
+ proc = Proccc()
+ ff_in = DummyFlowFile("hello nifi")
+ ff_out = proc.transform({}, ff_in)
+
+ self.assertEqual(ff_out.read().decode(), "HELLO NIFI")
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/nifi/user_scripts/tests/nifi/test_service_health.py b/nifi/user_scripts/tests/nifi/test_service_health.py
new file mode 100644
index 000000000..0c993587a
--- /dev/null
+++ b/nifi/user_scripts/tests/nifi/test_service_health.py
@@ -0,0 +1,54 @@
+import unittest
+
+from pydantic import SecretStr
+
+from nifi.user_scripts.dto.database_config import DatabaseConfig
+from nifi.user_scripts.dto.elastic_config import ElasticConfig
+from nifi.user_scripts.dto.nifi_api_config import NiFiAPIConfig
+from nifi.user_scripts.dto.service_health import DatabaseHealth, ElasticHealth, NiFiHealth
+from nifi.user_scripts.utils.generic import get_logger
+from nifi.user_scripts.utils.helpers.nifi_api_client import NiFiClient, NiFiRegistryClient
+from nifi.user_scripts.utils.helpers.service import check_elasticsearch, check_kibana, check_postgres
+
+
+class TestServices(unittest.TestCase):
+ """Service connectivity and health checks."""
+
+ logger = get_logger(__name__)
+
+ @classmethod
+ def setUpClass(cls):
+ cls.nifi_api_config: NiFiAPIConfig = NiFiAPIConfig()
+ cls.nifi_client: NiFiClient = NiFiClient(config=cls.nifi_api_config, healh_check_on_init=False)
+ cls.nifi_registry_client: NiFiRegistryClient = NiFiRegistryClient(config=cls.nifi_api_config)
+ cls.pg_config: DatabaseConfig = DatabaseConfig(port=5554)
+ cls.elastic_config: ElasticConfig = ElasticConfig(user="admin",
+ hosts=["https://localhost:9200"],
+ password=SecretStr("admin"),
+ kibana_host="https://localhost:5601",
+ kibana_version="opensearch-dashboards")
+
+ def test_nifi_health(self) -> None:
+ health: NiFiHealth = self.nifi_client.health_check()
+ self.assertTrue(health.connected)
+ self.assertEqual(health.status, "healthy")
+
+ def test_nifi_registry_health(self) -> None:
+ nifi_health: NiFiHealth = self.nifi_registry_client.health_check()
+ self.assertTrue(nifi_health.connected)
+ self.assertEqual(nifi_health.status, "healthy")
+
+ def test_postgres_health(self):
+ database_health: DatabaseHealth = check_postgres(self.pg_config)
+ self.assertTrue(database_health.connected)
+ self.assertEqual(database_health.status, "healthy")
+
+ def test_elastic_health(self):
+ elastic_health: ElasticHealth = check_elasticsearch(self.elastic_config)
+ self.assertTrue(elastic_health.connected)
+ self.assertEqual(elastic_health.status, "healthy")
+
+ def test_kibana_health(self):
+ elastic_health: ElasticHealth = check_kibana(self.elastic_config)
+ self.assertTrue(elastic_health.connected)
+ self.assertEqual(elastic_health.status, "healthy")
\ No newline at end of file
diff --git a/nifi/user-scripts/tests/test_files/ex1.pdf b/nifi/user_scripts/tests/resources/ex1.pdf
old mode 100755
new mode 100644
similarity index 100%
rename from nifi/user-scripts/tests/test_files/ex1.pdf
rename to nifi/user_scripts/tests/resources/ex1.pdf
diff --git a/nifi/user_scripts/tests/test_avro.py b/nifi/user_scripts/tests/test_avro.py
new file mode 100644
index 000000000..7f893747e
--- /dev/null
+++ b/nifi/user_scripts/tests/test_avro.py
@@ -0,0 +1,61 @@
+import io
+import json
+
+import avro
+from avro.datafile import DataFileWriter
+from avro.io import DatumWriter
+
+"""
+ Use this script to test avro schemas etc with python3
+"""
+
+stream = object()
+
+json_mapper_schema = json.loads(open("../user-schemas/cogstack_common_schema_mapping.json").read())
+avro_cogstack_schema = avro.schema.parse(open("../user-schemas/cogstack_common_schema_full.avsc", "rb").read(), validate_enum_symbols=False)
+
+test_records = [{ "docid" : "1",
+ "sampleid" : 1041,
+ "dct" : "2020-05-11 10:52:25.273518",
+ "binarydoc": "blablabla" },
+ { "docid" : "1",
+ "sampleid" : 1041,
+ "dct" : "2020-05-11 10:52:25.273518",
+ "binarydoc": "blablabla" }]
+
+schema_fields = avro_cogstack_schema.props["fields"]
+dict_fields_types = {}
+for field in schema_fields:
+ dict_fields_types[field.name] = ""
+ tmp_list = json.loads(str(field.type))
+ if len(tmp_list) > 1 and type(tmp_list) is not str:
+ if type(tmp_list[1]) is dict:
+ dict_fields_types[field.name] = tmp_list[1]["type"]
+ else:
+ dict_fields_types[field.name] = tmp_list[1]
+ else:
+ dict_fields_types[field.name] = field.type
+
+available_mapping_keys = {}
+for k,v in json_mapper_schema.items():
+ if v:
+ available_mapping_keys[k] = v
+
+bytes_io = io.BytesIO(bytes("", encoding="UTF-8"))
+
+type_mapping = {"boolean": "bool", "long": "int", "int": "int", "float" : "float", "byte":"bytes", "string": "str", "double": "float"}
+
+
+print(avro_cogstack_schema)
+
+with DataFileWriter(bytes_io, DatumWriter(), avro_cogstack_schema) as writer:
+ # re-map the value to the new keys
+
+ for _record in test_records:
+ record = {}
+
+ for k, v in available_mapping_keys.items():
+ if v in _record.keys():
+ record[k] = _record[v] #getattr(__builtins__, type_mapping[dict_fields_types[k]])(_record[v])
+
+ writer.append(record)
diff --git a/nifi/user_scripts/tmp/.gitignore b/nifi/user_scripts/tmp/.gitignore
new file mode 100644
index 000000000..e69de29bb
diff --git a/nifi/user_scripts/utils/__init__.py b/nifi/user_scripts/utils/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/nifi/user-scripts/utils/cerner_blob.py b/nifi/user_scripts/utils/cerner_blob.py
similarity index 100%
rename from nifi/user-scripts/utils/cerner_blob.py
rename to nifi/user_scripts/utils/cerner_blob.py
diff --git a/nifi/user-scripts/utils/ethnicity_map.py b/nifi/user_scripts/utils/ethnicity_map.py
similarity index 100%
rename from nifi/user-scripts/utils/ethnicity_map.py
rename to nifi/user_scripts/utils/ethnicity_map.py
diff --git a/nifi/user-scripts/utils/generic.py b/nifi/user_scripts/utils/generic.py
similarity index 98%
rename from nifi/user-scripts/utils/generic.py
rename to nifi/user_scripts/utils/generic.py
index 5d5be2ead..963f3773e 100644
--- a/nifi/user-scripts/utils/generic.py
+++ b/nifi/user_scripts/utils/generic.py
@@ -79,7 +79,7 @@ def dict2jsonl_file(input_dict: dict | defaultdict, file_path: str) -> None:
print('', file=outfile)
-def get_logger(name: str) -> logging.Logger:
+def get_logger(name: str, propagate: bool = False) -> logging.Logger:
"""Return a configured logger shared across all NiFi clients."""
level_name = os.getenv("NIFI_LOG_LEVEL", "INFO").upper()
level = getattr(logging, level_name, logging.INFO)
@@ -94,7 +94,7 @@ def get_logger(name: str) -> logging.Logger:
handler.setFormatter(fmt)
logger.addHandler(handler)
logger.setLevel(level)
- logger.propagate = False
+ logger.propagate = propagate
return logger
def download_file_from_url(url: str, output_path: str, ssl_verify: bool = False, chunk_size: int = 8192) -> None:
diff --git a/nifi/user-scripts/utils/helpers/avro_json_encoder.py b/nifi/user_scripts/utils/helpers/avro_json_encoder.py
similarity index 100%
rename from nifi/user-scripts/utils/helpers/avro_json_encoder.py
rename to nifi/user_scripts/utils/helpers/avro_json_encoder.py
diff --git a/nifi/user-scripts/utils/helpers/base_nifi_processor.py b/nifi/user_scripts/utils/helpers/base_nifi_processor.py
similarity index 100%
rename from nifi/user-scripts/utils/helpers/base_nifi_processor.py
rename to nifi/user_scripts/utils/helpers/base_nifi_processor.py
diff --git a/nifi/user_scripts/utils/helpers/nifi_api_client.py b/nifi/user_scripts/utils/helpers/nifi_api_client.py
new file mode 100644
index 000000000..c8127b511
--- /dev/null
+++ b/nifi/user_scripts/utils/helpers/nifi_api_client.py
@@ -0,0 +1,142 @@
+import time
+from logging import Logger
+
+import requests
+from nipyapi import canvas, security
+from nipyapi.nifi import ApiClient, ProcessGroupsApi
+from nipyapi.nifi.configuration import Configuration as NiFiConfiguration
+from nipyapi.nifi.models.process_group_entity import ProcessGroupEntity
+from nipyapi.nifi.models.processor_entity import ProcessorEntity
+from nipyapi.registry import ApiClient as RegistryApiClient
+from nipyapi.registry import BucketsApi
+from nipyapi.registry.configuration import Configuration as RegistryConfiguration
+
+from nifi.user_scripts.dto.nifi_api_config import NiFiAPIConfig
+from nifi.user_scripts.dto.service_health import NiFiHealth
+from nifi.user_scripts.utils.generic import get_logger
+
+
+class NiFiRegistryClient:
+
+ def __init__(self, config: NiFiAPIConfig, healh_check_on_init: bool = True) -> None:
+ self.config = config or NiFiAPIConfig()
+ self.nipyapi_config = RegistryConfiguration()
+ self.nipyapi_config.host = self.config.nifi_registry_api_url
+ self.nipyapi_config.verify_ssl = self.config.verify_ssl
+ self.nipyapi_config.cert_file = self.config.nifi_cert_pem_path # type: ignore
+ self.nipyapi_config.key_file = self.config.nifi_cert_key_path # type: ignore
+ self.nipyapi_config.ssl_ca_cert = self.config.root_cert_ca_path # type: ignore
+
+ self.logger: Logger = get_logger(self.__class__.__name__)
+
+ self.api_client = RegistryApiClient(self.nipyapi_config.host)
+ self.buckets_api = BucketsApi(self.api_client)
+
+ def list_buckets(self):
+ buckets = self.buckets_api.get_buckets()
+ for b in buckets:
+ self.logger.info("Bucket: %s (%s)", b.name, b.identifier)
+ return buckets
+
+ def health_check(self, timeout: int = 15) -> NiFiHealth:
+ start = time.perf_counter()
+ nifi_health = NiFiHealth(
+ service="nifi-registry",
+ service_info=self.config.nifi_registry_base_url
+ )
+
+ try:
+ response = requests.head(
+ url=self.config.nifi_registry_base_url,
+ auth=self.config.auth_credentials(),
+ cert=self.config.get_ssl_certs_paths(),
+ verify=self.config.root_cert_ca_path,
+ timeout=timeout
+ )
+
+ nifi_health.latency_ms = (time.perf_counter() - start) * 1000
+ nifi_health.connected = response.ok
+
+ if response.status_code == 200:
+ nifi_health.status = "healthy"
+ self.logger.info(f"✅ Logged in to NiFi Registry, latency {nifi_health.latency_ms:.2f} ms")
+ else:
+ nifi_health.message = f"❌ Unexpected status code {response.status_code}"
+
+ except Exception as exc:
+ nifi_health.message = str(exc)
+ self.logger.info("❌ Failed to log in to NiFi: %s", exc)
+
+ return nifi_health
+
+
+class NiFiClient:
+
+ def __init__(self, config: NiFiAPIConfig, healh_check_on_init: bool = True) -> None:
+ self.config = config or NiFiAPIConfig()
+ self.nipyapi_config = NiFiConfiguration()
+ self.nipyapi_config.host = self.config.nifi_api_url
+ self.nipyapi_config.verify_ssl = self.config.verify_ssl
+ self.nipyapi_config.cert_file = self.config.nifi_cert_pem_path # type: ignore
+ self.nipyapi_config.key_file = self.config.nifi_cert_key_path # type: ignore
+ self.nipyapi_config.ssl_ca_cert = self.config.root_cert_ca_path # type: ignore
+
+ self.logger: Logger = get_logger(self.__class__.__name__)
+
+ self.api_client = ApiClient(self.nipyapi_config)
+ self.process_group_api = ProcessGroupsApi(self.api_client)
+
+ if healh_check_on_init:
+ self.health_check()
+
+ def health_check(self) -> NiFiHealth:
+ start = time.perf_counter()
+ nifi_health = NiFiHealth(
+ service="nifi",
+ service_info=self.config.nifi_api_url
+ )
+
+ try:
+ result = security.service_login(
+ service='nifi',
+ username=self.config.nifi_username,
+ password=self.config.nifi_password
+ )
+
+ nifi_health.connected = bool(result)
+ nifi_health.latency_ms = (time.perf_counter() - start) * 1000
+
+ if result:
+ nifi_health.status = "healthy"
+ self.logger.info(f"✅ Logged in to NiFi, latency {nifi_health.latency_ms:.2f} ms")
+ else:
+ nifi_health.message = "Authentication returned False"
+ self.logger.info("❌ Failed to log in to NiFi")
+
+ except Exception as exc:
+ nifi_health.message = str(exc)
+ self.logger.info("❌ Failed to log in to NiFi: %s", exc)
+
+ return nifi_health
+
+ def get_root_process_group_id(self) -> str:
+ return canvas.get_root_pg_id()
+
+ def get_process_group_by_name(self, process_group_name: str) -> None | list[object] | object:
+ return canvas.get_process_group(process_group_name, identifier_type="nam")
+
+ def get_process_group_by_id(self, process_group_id: str) -> ProcessGroupEntity:
+ return canvas.get_process_group(process_group_id, identifier_type="id")
+
+ def start_process_group(self, process_group_id: str) -> bool:
+ return canvas.schedule_process_group(process_group_id, True)
+
+ def stop_process_group(self, process_group_id: str) -> bool:
+ return canvas.schedule_process_group(process_group_id, False)
+
+ def get_child_process_groups_from_parent_id(self, parent_process_group_id: str) -> list[ProcessGroupEntity]:
+ parent_pg = canvas.get_process_group(parent_process_group_id, identifier_type="id")
+ return canvas.list_all_process_groups(parent_pg.id)
+
+ def get_all_processors_in_process_group(self, process_group_id: str) -> list[ProcessorEntity]:
+ return canvas.list_all_processors(process_group_id)
diff --git a/nifi/user_scripts/utils/helpers/service.py b/nifi/user_scripts/utils/helpers/service.py
new file mode 100644
index 000000000..512f8034b
--- /dev/null
+++ b/nifi/user_scripts/utils/helpers/service.py
@@ -0,0 +1,113 @@
+import time
+
+import psycopg
+import requests
+from opensearchpy import OpenSearch
+
+from nifi.user_scripts.dto.database_config import DatabaseConfig
+from nifi.user_scripts.dto.elastic_config import ElasticConfig
+from nifi.user_scripts.dto.service_health import DatabaseHealth, ElasticHealth
+from nifi.user_scripts.utils.generic import get_logger
+
+logger = get_logger(__name__)
+
+def check_kibana(config: ElasticConfig) -> ElasticHealth:
+
+ elastic_health: ElasticHealth = ElasticHealth(service=config.kibana_version)
+ start = time.perf_counter()
+
+ try:
+ if config.kibana_version == "kibana":
+ raise NotImplementedError
+
+ response = requests.get(config.kibana_host + "/api/status",
+ auth=config.auth_credentials(),
+ timeout=config.timeout,
+ cert=config.get_kibana_ssl_certs_path(),
+ verify=config.elastic_root_cert_ca_path
+ )
+
+ elastic_health.latency_ms = (time.perf_counter() - start) * 1000
+ elastic_health.connected = response.ok
+
+ if response.status_code == 200:
+ elastic_health.status = "healthy"
+ logger.info(f"✅ {config.kibana_version} OK, latency {elastic_health.latency_ms:.2f} ms")
+ else:
+ elastic_health.message = f"❌ Failed to query {config.kibana_version}"
+
+ except Exception as e:
+ elastic_health.message = str(e)
+ logger.error(f"❌ Failed to query {config.kibana_version}: %s", str(e))
+
+ return elastic_health
+
+def check_elasticsearch(config: ElasticConfig) -> ElasticHealth:
+
+ elastic_health: ElasticHealth = ElasticHealth(service=config.elasticsearch_version)
+ start = time.perf_counter()
+
+ try:
+ elastic_connection = OpenSearch(hosts=config.hosts,
+ use_ssl=config.verify_ssl,
+ verify_certs=False,
+ http_auth=config.auth_credentials(),
+ ssl_show_warn=False,
+ ssl_assert_hostname=False,
+ ca_cert=config.elastic_root_cert_ca_path,
+ client_cert=config.elastic_node_cert_pem_path,
+ client_key=config.elastic_node_cert_key_path
+ )
+
+ if config.elasticsearch_version == "elasticsearch":
+ raise NotImplementedError
+
+ if elastic_connection.ping():
+ elastic_health.connected = True
+ elastic_health.status = "healthy"
+ elastic_health.service_info = elastic_connection.nodes.info()
+ elastic_health.latency_ms = (time.perf_counter() - start) * 1000
+ logger.info(f"✅ {config.elasticsearch_version} OK, latency {elastic_health.latency_ms:.2f} ms")
+ else:
+ elastic_health.message = f"❌ Failed to query {config.elasticsearch_version}"
+ except Exception as e:
+ elastic_health.message = str(e)
+ logger.error(f"❌ Failed to query {config.elasticsearch_version}: %s", str(e))
+
+ return elastic_health
+
+def check_postgres(config: DatabaseConfig) -> DatabaseHealth:
+
+ start = time.perf_counter()
+ database_health = DatabaseHealth(service="cogstack-samples-db",
+ db_name=config.database_name,
+ version=None
+ )
+
+ try:
+ with psycopg.connect(
+ host=config.host,
+ port=config.port,
+ user=config.username,
+ password=config.password.get_secret_value(),
+ dbname=config.database_name,
+ connect_timeout=config.timeout,
+ ) as connection, connection.cursor() as cursor:
+ cursor.execute("SELECT version();")
+ result = cursor.fetchone()
+
+ if result and result[0]:
+ database_health.version = result[0]
+ database_health.status = "healthy"
+ database_health.connected = True
+ database_health.latency_ms = (time.perf_counter() - start) * 1000
+ logger.info(f"✅ PostgreSQL OK, latency {database_health.latency_ms:.2f} ms")
+ else:
+ database_health.message = "No version returned from database"
+ database_health.status = "unhealthy"
+ database_health.connected = True
+
+ except Exception as e:
+ database_health.message = str(e)
+ logger.info("❌ Failed to query PostgreSQLi: %s", str(e))
+ return database_health
diff --git a/nifi/user_scripts/utils/lint_env.py b/nifi/user_scripts/utils/lint_env.py
new file mode 100644
index 000000000..8918e9152
--- /dev/null
+++ b/nifi/user_scripts/utils/lint_env.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+"""
+ Lightweight env file validator used by deploy/export_env_vars.sh.
+"""
+
+from __future__ import annotations
+
+import sys
+from collections.abc import Iterable
+from pathlib import Path
+
+PORT_SUFFIXES = ("_PORT", "_OUTPUT_PORT", "_INPUT_PORT")
+BOOL_SUFFIXES = ("_ENABLED", "_SSL_ENABLED", "_BAKE")
+BOOL_VALUES = {"true", "false", "1", "0", "yes", "no", "on", "off"}
+
+
+def strip_quotes(value: str) -> str:
+ if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
+ return value[1:-1]
+ return value
+
+
+def parse_env_file(path: Path) -> tuple[list[str], list[str], list[tuple[str, str, int]]]:
+ errors: list[str] = []
+ warnings: list[str] = []
+ entries: list[tuple[str, str, int]] = []
+
+ for lineno, raw_line in enumerate(path.read_text().splitlines(), start=1):
+ line = raw_line.strip()
+ if not line or line.startswith("#"):
+ continue
+
+ if line.startswith("export "):
+ line = line[len("export ") :].strip()
+
+ if "=" not in line:
+ errors.append(f"{path}:{lineno}: missing '=' (got: {raw_line})")
+ continue
+
+ key, value = line.split("=", 1)
+ key = key.strip()
+ value = value.strip()
+
+ if not key:
+ errors.append(f"{path}:{lineno}: empty key (got: {raw_line})")
+ continue
+
+ entries.append((key, value, lineno))
+
+ seen = {}
+ for key, _, lineno in entries:
+ if key in seen:
+ warnings.append(f"{path}:{lineno}: duplicate key '{key}' (also on line {seen[key]})")
+ else:
+ seen[key] = lineno
+
+ return errors, warnings, entries
+
+
+def validate_entries(path: Path, entries: Iterable[tuple[str, str, int]]) -> list[str]:
+ errors: list[str] = []
+
+ for key, value, lineno in entries:
+ normalized = strip_quotes(value)
+
+ if any(key.endswith(suffix) for suffix in PORT_SUFFIXES):
+ if not normalized.isdigit():
+ errors.append(f"{path}:{lineno}: '{key}' should be an integer port (got '{value}')")
+
+ if any(key.endswith(suffix) for suffix in BOOL_SUFFIXES):
+ if normalized.lower() not in BOOL_VALUES:
+ errors.append(
+ f"{path}:{lineno}: '{key}' should be one of {sorted(BOOL_VALUES)} (got '{value}')"
+ )
+
+ return errors
+
+
+def main(args: list[str]) -> int:
+ if not args:
+ script = Path(__file__).name
+ print(f"Usage: {script} [ ...]")
+ return 1
+
+ warnings: list[str] = []
+ errors: list[str] = []
+ checked_files = 0
+
+ for path_str in args:
+ path = Path(path_str).resolve()
+ if not path.exists():
+ warnings.append(f"Skipping missing env file: {path}")
+ continue
+
+ checked_files += 1
+ parse_errors, parse_warnings, entries = parse_env_file(path)
+ errors.extend(parse_errors)
+ warnings.extend(parse_warnings)
+ errors.extend(validate_entries(path, entries))
+
+ for warning in warnings:
+ print(f"⚠️ {warning}")
+
+ if errors:
+ print("❌ Env validation failed:")
+ for err in errors:
+ print(f" - {err}")
+ return 1
+
+ print(f"✅ Env validation passed ({checked_files} files checked)")
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
diff --git a/nifi/user-scripts/utils/pgsql_query.py b/nifi/user_scripts/utils/pgsql_query.py
similarity index 71%
rename from nifi/user-scripts/utils/pgsql_query.py
rename to nifi/user_scripts/utils/pgsql_query.py
index 12e32d96e..33c639f2f 100644
--- a/nifi/user-scripts/utils/pgsql_query.py
+++ b/nifi/user_scripts/utils/pgsql_query.py
@@ -1,6 +1,6 @@
-import psycopg2
+import psycopg
-conn = psycopg2.connect(
+conn = psycopg.connect(
host="localhost",
database="suppliers",
user="YourUsername",
diff --git a/nifi/user-scripts/utils/sqlite_query.py b/nifi/user_scripts/utils/sqlite_query.py
similarity index 100%
rename from nifi/user-scripts/utils/sqlite_query.py
rename to nifi/user_scripts/utils/sqlite_query.py
diff --git a/nifi/user-templates/dt4h/annotate_dt4h_ann_manager.xml b/nifi/user_templates/dt4h/annotate_dt4h_ann_manager.xml
similarity index 100%
rename from nifi/user-templates/dt4h/annotate_dt4h_ann_manager.xml
rename to nifi/user_templates/dt4h/annotate_dt4h_ann_manager.xml
diff --git a/nifi/user-templates/dt4h/raw_ingest_dt4h.xml b/nifi/user_templates/dt4h/raw_ingest_dt4h.xml
similarity index 100%
rename from nifi/user-templates/dt4h/raw_ingest_dt4h.xml
rename to nifi/user_templates/dt4h/raw_ingest_dt4h.xml
diff --git a/nifi/user-templates/legacy/CogStack_Cohort_create_source_docs.xml b/nifi/user_templates/legacy/CogStack_Cohort_create_source_docs.xml
similarity index 100%
rename from nifi/user-templates/legacy/CogStack_Cohort_create_source_docs.xml
rename to nifi/user_templates/legacy/CogStack_Cohort_create_source_docs.xml
diff --git a/nifi/user-templates/legacy/Common_schema_example_ingest.xml b/nifi/user_templates/legacy/Common_schema_example_ingest.xml
similarity index 100%
rename from nifi/user-templates/legacy/Common_schema_example_ingest.xml
rename to nifi/user_templates/legacy/Common_schema_example_ingest.xml
diff --git a/nifi/user-templates/legacy/DEID_sample_pipeline.xml b/nifi/user_templates/legacy/DEID_sample_pipeline.xml
similarity index 100%
rename from nifi/user-templates/legacy/DEID_sample_pipeline.xml
rename to nifi/user_templates/legacy/DEID_sample_pipeline.xml
diff --git a/nifi/user-templates/legacy/Generate_location_ES.xml b/nifi/user_templates/legacy/Generate_location_ES.xml
similarity index 100%
rename from nifi/user-templates/legacy/Generate_location_ES.xml
rename to nifi/user_templates/legacy/Generate_location_ES.xml
diff --git a/nifi/user-templates/legacy/Grab_non_annotated_docs.xml b/nifi/user_templates/legacy/Grab_non_annotated_docs.xml
similarity index 100%
rename from nifi/user-templates/legacy/Grab_non_annotated_docs.xml
rename to nifi/user_templates/legacy/Grab_non_annotated_docs.xml
diff --git a/nifi/user-templates/legacy/HealTAC_23.xml b/nifi/user_templates/legacy/HealTAC_23.xml
similarity index 100%
rename from nifi/user-templates/legacy/HealTAC_23.xml
rename to nifi/user_templates/legacy/HealTAC_23.xml
diff --git a/nifi/user-templates/legacy/OS_annotate_per_doc.xml b/nifi/user_templates/legacy/OS_annotate_per_doc.xml
similarity index 100%
rename from nifi/user-templates/legacy/OS_annotate_per_doc.xml
rename to nifi/user_templates/legacy/OS_annotate_per_doc.xml
diff --git a/nifi/user-templates/legacy/OpenSearch_Ingest_DB_OCR_service_to_ES.xml b/nifi/user_templates/legacy/OpenSearch_Ingest_DB_OCR_service_to_ES.xml
similarity index 100%
rename from nifi/user-templates/legacy/OpenSearch_Ingest_DB_OCR_service_to_ES.xml
rename to nifi/user_templates/legacy/OpenSearch_Ingest_DB_OCR_service_to_ES.xml
diff --git a/nifi/user-templates/legacy/OpenSearch_ingest_annotate_DB_MedCATService_to_ES.xml b/nifi/user_templates/legacy/OpenSearch_ingest_annotate_DB_MedCATService_to_ES.xml
similarity index 100%
rename from nifi/user-templates/legacy/OpenSearch_ingest_annotate_DB_MedCATService_to_ES.xml
rename to nifi/user_templates/legacy/OpenSearch_ingest_annotate_DB_MedCATService_to_ES.xml
diff --git a/nifi/user-templates/legacy/OpenSearch_ingest_annotate_DB_to_ES_and_DB_ann_manager.xml b/nifi/user_templates/legacy/OpenSearch_ingest_annotate_DB_to_ES_and_DB_ann_manager.xml
similarity index 100%
rename from nifi/user-templates/legacy/OpenSearch_ingest_annotate_DB_to_ES_and_DB_ann_manager.xml
rename to nifi/user_templates/legacy/OpenSearch_ingest_annotate_DB_to_ES_and_DB_ann_manager.xml
diff --git a/nifi/user-templates/legacy/OpenSearch_ingest_annotate_ES_MedCATService_to_ES.xml b/nifi/user_templates/legacy/OpenSearch_ingest_annotate_ES_MedCATService_to_ES.xml
similarity index 100%
rename from nifi/user-templates/legacy/OpenSearch_ingest_annotate_ES_MedCATService_to_ES.xml
rename to nifi/user_templates/legacy/OpenSearch_ingest_annotate_ES_MedCATService_to_ES.xml
diff --git a/nifi/user-templates/legacy/OpenSearch_ingest_docs_DB_to_ES.xml b/nifi/user_templates/legacy/OpenSearch_ingest_docs_DB_to_ES.xml
similarity index 100%
rename from nifi/user-templates/legacy/OpenSearch_ingest_docs_DB_to_ES.xml
rename to nifi/user_templates/legacy/OpenSearch_ingest_docs_DB_to_ES.xml
diff --git a/nifi/user-templates/legacy/Raw_file_read_from_disk_ocr_custom.xml b/nifi/user_templates/legacy/Raw_file_read_from_disk_ocr_custom.xml
similarity index 100%
rename from nifi/user-templates/legacy/Raw_file_read_from_disk_ocr_custom.xml
rename to nifi/user_templates/legacy/Raw_file_read_from_disk_ocr_custom.xml
diff --git a/nifi/user-templates/opensearch_docs_ingest_annotations_to_es.json b/nifi/user_templates/opensearch_docs_ingest_annotations_to_es.json
similarity index 100%
rename from nifi/user-templates/opensearch_docs_ingest_annotations_to_es.json
rename to nifi/user_templates/opensearch_docs_ingest_annotations_to_es.json
diff --git a/nifi/user-templates/opensearch_ingest_docs_db_ocr_service_to_es.json b/nifi/user_templates/opensearch_ingest_docs_db_ocr_service_to_es.json
similarity index 100%
rename from nifi/user-templates/opensearch_ingest_docs_db_ocr_service_to_es.json
rename to nifi/user_templates/opensearch_ingest_docs_db_ocr_service_to_es.json
diff --git a/nifi/user-templates/opensearch_ingest_docs_db_to_es.json b/nifi/user_templates/opensearch_ingest_docs_db_to_es.json
similarity index 100%
rename from nifi/user-templates/opensearch_ingest_docs_db_to_es.json
rename to nifi/user_templates/opensearch_ingest_docs_db_to_es.json
diff --git a/pyproject.toml b/pyproject.toml
index 6fae60f28..2b563d437 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.ruff]
line-length = 120
-exclude = ["nifi/user-scripts/legacy_scripts", "services"]
+exclude = ["nifi/user_scripts/legacy_scripts", "services"]
target-version = "py311"
indent-width = 4
@@ -25,15 +25,30 @@ fixable = ["ALL"]
[tool.mypy]
plugins = ["pydantic.mypy"]
+python_version = "3.11"
ignore_missing_imports = true
strict = false
files = "."
mypy_path = "./typings/"
+warn_unused_configs = true
-[tool.isort]
-line_length = 120
-skip = ["venv", "venv-test", "envs", "docker", "models"]
+[tool.setuptools.packages.find]
+include = ["nifi*"]
+exclude = [
+ "*egg-info*",
+ "build*",
+ "dist*",
+ "nifi/conf*",
+ "nifi/drivers*",
+ "nifi/user_schemas*",
+ "nifi/user_templates*",
+]
+
+[project]
+name = "cogstack_nifi"
+version = "0.0.1"
+requires-python = ">=3.11"
-[tool.flake8]
-max-line-length = 120
-exclude = ["venv", "venv-test", "envs", "docker", "models"]
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
diff --git a/security/env/users_database.env b/security/env/users_database.env
index 9da92b17c..b2140eccf 100644
--- a/security/env/users_database.env
+++ b/security/env/users_database.env
@@ -3,8 +3,8 @@ POSTGRES_USER_SAMPLES=test
POSTGRES_PASSWORD_SAMPLES=test
# Production DATABASE user
-POSTGRES_USER=admin
-POSTGRES_PASSWORD=admin
+DATABASE_USER=admin
+DATABASE_PASSWORD=admin
# Production DATABASE MSSQL user
MSSQL_SA_USER=sa
diff --git a/security/env/users_elasticsearch.env b/security/env/users_elasticsearch.env
index 918334a3f..3ba87ca1a 100644
--- a/security/env/users_elasticsearch.env
+++ b/security/env/users_elasticsearch.env
@@ -42,4 +42,3 @@ ES_LOGSTASH_PASS=kibanaserver
ES_KIBANARO_PASS=kibanaserver
ES_READALL_PASS=kibanaserver
ES_SNAPSHOTRESTORE_PASS=kibanaserver
-
diff --git a/security/scripts/create_opensearch_client_admin_certs.sh b/security/scripts/create_opensearch_client_admin_certs.sh
index 7a8528808..1d864ab8e 100644
--- a/security/scripts/create_opensearch_client_admin_certs.sh
+++ b/security/scripts/create_opensearch_client_admin_certs.sh
@@ -49,7 +49,7 @@ echo "==========================================================================
CA_ROOT_CERT="${ROOT_CERTIFICATES_FOLDER}"$ROOT_CERTIFICATE_NAME".pem"
CA_ROOT_KEY="${ROOT_CERTIFICATES_FOLDER}"$ROOT_CERTIFICATE_NAME".key"
-EXT_FILE= "${SECURITY_TEMPLATES_FOLDER}ssl-extensions-x509.cnf"
+EXT_FILE="${SECURITY_TEMPLATES_FOLDER}ssl-extensions-x509.cnf"
# === Client cert ===
echo "Generating a key for: $ES_CLIENT_CERT_NAME"