From 5295c23e25a36c5fbd77a73f5a80479daf0854d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Jim=C3=A9nez?= Date: Mon, 22 Dec 2025 14:40:28 +0000 Subject: [PATCH] Create web-compatible script based on existing one --- resources/bin/rb_backup_segments.sh | 476 ++++++++++++++++++++++++++++ 1 file changed, 476 insertions(+) create mode 100644 resources/bin/rb_backup_segments.sh diff --git a/resources/bin/rb_backup_segments.sh b/resources/bin/rb_backup_segments.sh new file mode 100644 index 00000000..681da2d1 --- /dev/null +++ b/resources/bin/rb_backup_segments.sh @@ -0,0 +1,476 @@ +#!/usr/bin/env bash + +function set_color() { + green="echo -en \\033[1;32m" + red="echo -en \\033[1;31m" + yellow="echo -en \\033[1;33m" + orange="echo -en \\033[0;33m" + blue="echo -en \\033[1;34m" + black="echo -en \\033[1;30m" + white="echo -en \\033[255m" + cyan="echo -en \\033[0;36m" + purple="echo -en \\033[0;35m" + browm="echo -en \\033[0;33m" + gray="echo -en \\033[0;37m" + norm="echo -en \\033[1;0m" + eval \$$1 +} + +e_ok() { + echo -n " [" + set_color green + echo -n $"OK" + set_color norm + echo -n "]" + echo -ne "\r" + echo + return 0 +} + +e_fail() { + echo -n " [" + set_color red + echo -n $"FAILED" + set_color norm + echo -n "]" + echo -ne "\r" + echo + return 1 +} + +function print_result(){ + if [ "x$1" == "x0" ]; then + echo " [OK]" + else + echo " [FAILED]" + fi +} + +function usage(){ + echo "$0 [-h] [-r -f filename] [-x start date] [-y stop date]" + echo " export segments from the local s3 database (s3://bucket/) to /var/backup/segments" + echo " start and stop date should have following format : 2025-05-27T09:00:00.000Z" + echo "" + echo " -h: print this help" + echo " -r: import backup file (-f is mandatory)" + echo " -e: enable imported segments" + echo " -f: import from a specified file." + echo " -t: export to specific tarfile" + echo " -g: regex grep filter to export only those files that match the filter" + echo " -x: start date of the segments to import/export" + echo " -y: end date of the segments to import/export" + echo " -s: segment ids to export, comma separated" + echo "" + echo " -n: do not ask. just do it" + echo " -b: restore full druid database / is deleting current druid database in postgresql!! you loose current druid data in postgresql!!" + echo " -v: be verbose (debug)" + exit 1 +} + +function print_system() { + if [ -f "$1" ]; then + dirfilename=$(dirname "$1") + echo "Free space: $(df -h "$dirfilename" | grep / | awk '{print $5}') ($(df -h "$dirfilename" | grep / | awk '{print $4}')) load average: $(uptime|sed 's/.*load average: //')" + fi +} + +source /etc/profile + +import=0 +enableallsegments=0 +filename="" +debug=0 +filter="." +ask=1 +restoredb=0 +segment_ids="" + +newerthan="" +olderthan="" + +start_time=$(date +%s) +hostname=$(hostname -s) +currenttime="$(date +"%Y%m%d%H%M")" +tmpdir="/tmp/segment.tmp-${currenttime}.$$" + +renice -n 19 $$ &>/dev/null + +while getopts "href:t:nbg:vx:y:s:" name +do + case $name in + h) usage;; + r) import=1;; + e) enableallsegments=1;; + f) filename="$OPTARG";; + t) exportfile="$OPTARG";; + g) filter="$OPTARG";; + v) debug=1;; + n) ask=0;; + b) restoredb=1;; + x) startdate="$OPTARG";; + y) stopdate="$OPTARG";; + s) segment_ids="$OPTARG";; + esac +done + +if [ ! -f /etc/druid/_common/common.runtime.properties ];then + echo "ERROR: /etc/druid/_common/common.runtime.properties file is missing. This file is needed to connect to the database of druid, cannot continue..." + exit 1 +fi + +if [ "x$exportfile" != "x" ]; then + exportpath=$(dirname "$exportfile") + if [ ! -d "$exportpath" ]; then + echo "ERROR: path where we need to store the tar file does not exist!" + exit 1 + fi + + if [[ "$exportfile" != *.tar ]]; then + echo "ERROR: export file should end with .tar" + exit 1 + fi +fi + +# get druid connection data +s3currentbucket=$(grep "^druid.storage.bucket=" /etc/druid/_common/common.runtime.properties 2>/dev/null | cut -d'=' -f2) +[ -z "$s3currentbucket" ] && s3currentbucket="bucket" +s3basekey=$(grep "^druid.storage.baseKey=" /etc/druid/_common/common.runtime.properties 2>/dev/null | cut -d'=' -f2) +[ -z "$s3basekey" ] && s3basekey="rbdata" +druid_db_uri=$(grep "^druid.metadata.storage.connector.connectURI=" /etc/druid/_common/common.runtime.properties 2>/dev/null | cut -d'=' -f2) +druid_db_pass=$(grep "^druid.metadata.storage.connector.password=" /etc/druid/_common/common.runtime.properties 2>/dev/null | cut -d'=' -f2) + +# we are going to import the segments in the local s3 +if [ $import -eq 1 ]; then + confirm=0 + if [ -z "$filename" ]; then + echo "ERROR: The option -f is mandatory to import segments" + exit 1 + elif [ -d "$filename" ]; then + echo "ERROR: The selected file is a directory" + exit 1 + elif [ ! -f "$filename" ]; then + echo "ERROR: The selected file $filename doesn't exist" + exit 1 + else + if file "$filename" | grep -q "gzip compressed data"; then + echo "WARNING: Restoring backup from $filename file" + print_system "$filename" + if [ $ask -eq 1 ]; then + echo -n "Would you like to continue? (y/N) " + read -r VAR + if [ "$VAR" == "y" ] || [ "$VAR" == "Y" ]; then + confirm=1 + else + confirm=0 + fi + else + confirm=1 + fi + else + echo "ERROR: The selected file $filename is not a backup segment file" + exit 1 + fi + fi + + if [ $confirm -eq 1 ]; then + if [ -e "$tmpdir" ]; then + echo "ERROR: The temporal directory $tmpdir already exist!!" + exit 1 + else + mkdir -p "$tmpdir" + + echo -n "- uncompress file $filename" + nice -n 19 ionice -c2 -n7 tar xzf "$filename" -C "$tmpdir" + RET1=$? + print_result $RET1 + + if [ $RET1 -eq 0 ]; then + if [ ! -f "$tmpdir/conf/db-druid-dump.psql" ]; then + echo "ERROR: postgresql database segments file not found!" + elif [ ! -d "$tmpdir/segments" ]; then + echo "ERROR: segments directory not found!" + else + FILES_COUNT=$(find "$tmpdir/segments" -type f -name "*.zip" | wc -l) + if [ "$FILES_COUNT" -eq 0 ]; then + echo "ERROR: there are no segments in this backup" + else + if [ $restoredb -eq 1 ]; then + + PGHOSTNAME="$(echo "$druid_db_uri" | sed 's|jdbc:postgresql://||' | sed 's/:.*//')" + PGPORT="$(echo "$druid_db_uri" | sed 's|jdbc:postgresql://||' | sed 's/.*://' | sed 's|/.*||')" + + echo -n "- delete previous segments database !!" + echo "delete from druid_segments;" | PGPASSWORD="$druid_db_pass" psql -h "$PGHOSTNAME" -p "$PGPORT" -U druid -d druid &>/dev/null + RET2=$? + print_result $RET2 + + echo -n "- restore segments database :" + PGPASSWORD="$druid_db_pass" pg_restore -t druid_segments -h "$PGHOSTNAME" -p "$PGPORT" -U druid -d druid --data-only -F c "$tmpdir/conf/db-druid-dump.psql" + RET3=$? + print_result $RET3 + else + RET2=0 + RET3=0 + fi + + if [ $RET2 -eq 0 ] && [ $RET3 -eq 0 ]; then + echo -n "- preparing data : " + mapfile -t segments_to_sync < <(find "$tmpdir/segments" -type f -name "*.zip") + + if [[ -n "$startdate" || -n "$stopdate" ]]; then + filtered_sync=() + for n in "${segments_to_sync[@]}"; do + timestamp=$(echo "$n" | awk -F '/' '{print $3}') + if [[ -n "$startdate" && "$timestamp" < "$startdate" ]]; then + continue + fi + if [[ -n "$stopdate" && "$timestamp" > "$stopdate" ]]; then + continue + fi + filtered_sync+=("$n") + done + segments_to_sync=("${filtered_sync[@]}") + fi + echo "100%" + + counter=0 + RET4=0 + echo -n "- import segments : " + if [ $debug -eq 1 ]; then + echo "" + fi + for n in "${segments_to_sync[@]}"; do + ((counter++)) + progress=$(printf "%.0f" "$(echo "scale=2; $counter / ${#segments_to_sync[@]} * 100" | bc)") + if [ $debug -eq 1 ]; then + echo -n " sync $n to s3 : [${progress}%]" + fi + s3_target_path=$(echo "$n" | sed "s|${tmpdir}/segments/||") + nice -n 19 ionice -c2 -n7 /usr/local/bin/mcli --quiet cp "$n" "${hostname}/${s3currentbucket}/${s3basekey}/${s3_target_path}" + RET4=$? + if [ $debug -eq 1 ]; then + print_result $RET4 + else + printf "\r- import segments : %.0f%%" "$progress" + fi + done + echo "" + + if [ $restoredb -ne 1 ]; then + RET5=0 + echo -n "- import/update druid metadata : " + mapfile -t rule_files < <(find "$tmpdir/segments" -type f -name "rule.json") + counter=0 + numberofrules=${#rule_files[@]} + for rule in "${rule_files[@]}"; do + ((counter++)) + progress=$(printf "%.0f" "$(echo "scale=2; $counter * 100 / $numberofrules" | bc)") + if [ "$enableallsegments" -eq 1 ]; then + if [ $debug -eq 1 ]; then + echo -n " enable segment and " + fi + sed -i 's/"used": "f",/"used": "t",/' "$rule" + fi + if [ $debug -eq 1 ]; then + echo -n "add $rule in druid database... [$progress%]" + else + printf "\r- import/update druid metadata : %.0f%%" "$progress" + fi + rvm ruby-2.7.5@web do rb_druid_metadata -f "$rule" &>/dev/null + RET5=$? + if [ $debug -eq 1 ]; then + print_result $RET5 + fi + done + fi + fi + fi + fi + fi + echo -n "- remove temporary files :" + rm -rf "$tmpdir" + print_result $? + + end_time=$(date +%s) + elapsed_seconds=$((end_time - start_time)) + elapsed_time=$(printf "%d:%02d:%02d" "$((elapsed_seconds / 3600))" "$(( (elapsed_seconds % 3600) / 60 ))" "$((elapsed_seconds % 60))" ) + echo "- total runtime: $elapsed_time (HH:MM:SS)" + fi + fi +else # we are going to export the segments to a tar + if [ -z "$exportfile" ]; then + filename="/var/backup/segments/segment-${currenttime}.tar" + mkdir -p /var/backup/segments/ + else + filename=${exportfile} + fi + + confirm=1 + if [ -f "$filename" ]; then + print_system "$filename" + if [ $ask -eq 1 ]; then + echo -n "The file $filename exist. Would you like to overwrite it? (y/N) " + read -r VAR + if [ "$VAR" == "y" ] || [ "$VAR" == "Y" ]; then + confirm=1 + else + confirm=0 + fi + else + confirm=1 + fi + fi + + if [ $confirm -eq 1 ]; then + rm -f "$filename" + + if [ -e "$tmpdir" ]; then + echo "ERROR: The temporal dir $tmpdir already exist!!" + else + mkdir -p "$tmpdir/conf" + echo -n "- backup full druid database $tmpdir/conf/db-druid-dump.psql" + + PGHOSTNAME="$(echo "$druid_db_uri" | sed 's|jdbc:postgresql://||' | sed 's/:.*//')" + PGPORT="$(echo "$druid_db_uri" | sed 's|jdbc:postgresql://||' | sed 's/.*://' | sed 's|/.*||')" + PGPASSWORD="$druid_db_pass" pg_dump -U druid -h "$PGHOSTNAME" -p "$PGPORT" -F c -b -f "$tmpdir/conf/db-druid-dump.psql" + RET1=$? + print_result $RET1 + + mkdir -p "$tmpdir/segments" + pushd "$tmpdir/segments" &>/dev/null + + echo "- getting segments info from s3: " + + minio_find_cmd="nice -n 19 ionice -c2 -n7 /usr/local/bin/mcli find \"${hostname}/${s3currentbucket}/${s3basekey}/\" --regex \"$filter\"" + + if [ -n "$startdate" ]; then + minio_find_cmd+=" --newer-than \"${startdate}\"" + fi + if [ -n "$stopdate" ]; then + minio_find_cmd+=" --older-than \"${stopdate}\"" + fi + + if [ -n "$segment_ids" ]; then + miniofiles=() + IFS=',' read -ra ids <<< "$segment_ids" + for id in "${ids[@]}"; do + IFS='_' read -ra id_parts <<< "$id" + num_parts=${#id_parts[@]} + version=${id_parts[num_parts-1]} + interval_end=${id_parts[num_parts-2]} + interval_start=${id_parts[num_parts-3]} + interval="${interval_start}_${interval_end}" + datasource_parts=("${id_parts[@]:0:num_parts-3}") + datasource=$(IFS=_ ; echo "${datasource_parts[*]}") + s3_path_prefix="${datasource}/${interval}/${version}" + + mapfile -t segment_files < <(nice -n 19 ionice -c2 -n7 /usr/local/bin/mcli find "${hostname}/${s3currentbucket}/${s3basekey}/${s3_path_prefix}/" --regex ".*" | sed "s|${hostname}/${s3currentbucket}/${s3basekey}/||") + miniofiles+=("${segment_files[@]}") + done + print_result $? + else + mapfile -t miniofiles < <(eval "$minio_find_cmd" | sed "s|${hostname}/${s3currentbucket}/${s3basekey}/||") + print_result $? + fi + + counter=0 + numberofsegments=${#miniofiles[@]} + batch_size=8 + pids=() + RET2=0 + + echo -n "- copy segment data :" + if [ $debug -eq 1 ]; then + echo "" + fi + + for i in "${!miniofiles[@]}"; do + n="${miniofiles[$i]}" + + mkdir -p "./$(dirname "$n")" + + ( + nice -n 19 ionice -c2 -n7 /usr/local/bin/mcli --quiet get "${hostname}/${s3currentbucket}/${s3basekey}/$n" "$n" + if [ $? -ne 0 ]; then echo "ERROR: Download of $n failed" >&2; exit 1; fi + ) & + pids+=($!) + + if (( ${#pids[@]} >= batch_size || i == numberofsegments - 1 )); then + for pid in "${pids[@]}"; do + wait "$pid" || RET2=1 + done + pids=() + + processed_count=$((i + 1)) + progress=$(printf "%.0f" "$(echo "scale=2; $processed_count * 100 / $numberofsegments" | bc)") + if [ $debug -eq 0 ]; then + printf "\r- copy segment data : %d%%" "$progress" + fi + fi + done + + echo "" + + counter=0 + echo -n "- create metadata :" + if [ $debug -eq 1 ]; then + echo "" + fi + + mapfile -t zip_files < <(find . -type f -name "index.zip") + numberofzips=${#zip_files[@]} + + for n in "${zip_files[@]}"; do + ((counter++)) + progress=$(printf "%.0f" "$(echo "scale=2; $counter * 100 / $numberofzips" | bc)") + if [ $debug -eq 1 ]; then + echo -n " creating $(dirname "$n")/rule.json: [$progress%]" + else + printf "\r- create metadata : %d%%" "$progress" + fi + if [ -s "$n" ]; then + IFS='/' read -ra parts <<< "${n#./}" + descriptorid="${parts[1]}_${parts[2]}_${parts[3]}" + if [ -n "$descriptorid" ]; then + rule_file="$(dirname "$n")/rule.json" + rvm ruby-2.7.5@web do rb_druid_metadata -i "$descriptorid" > "$rule_file" 2>/dev/null + if [ $debug -eq 1 ]; then + if [ -s "$rule_file" ]; then + print_result 0 + else + print_result 1 + fi + fi + elif [ $debug -eq 1 ]; then + print_result 1 + fi + elif [ $debug -eq 1 ]; then + print_result 1 + fi + done + echo "" + popd &>/dev/null + + echo -n "- compress data into $(basename "$filename")" + nice -n 19 ionice -c2 -n7 tar czf "$filename" -C "$tmpdir" . + RET3=$? + print_result $RET3 + + echo -n "- deleting temporal data $tmpdir" + rm -rf "$tmpdir" + print_result $? + echo "" + echo -n "Backup file $filename saved" + if [ $RET1 -eq 0 ] && [ $RET2 -eq 0 ] && [ $RET3 -eq 0 ]; then + print_result 0 + else + echo -n " (with errors) " + print_result 1 + fi + end_time=$(date +%s) + elapsed_seconds=$((end_time - start_time)) + elapsed_time=$(printf "%d:%02d:%02d" "$((elapsed_seconds / 3600))" "$(( (elapsed_seconds % 3600) / 60 ))" "$((elapsed_seconds % 60))" ) + echo "- total runtime: $elapsed_time (HH:MM:SS)" + fi + fi +fi