|
| 1 | +#!/bin/bash |
| 2 | +# |
| 3 | +# compare-bloat.sh — Compare database bloat between SOURCE and TARGET |
| 4 | +# |
| 5 | +# Run on the migration instance where both databases are accessible. |
| 6 | +# Reads connection strings from ~/.env (PGCOPYDB_SOURCE_PGURI, PGCOPYDB_TARGET_PGURI) |
| 7 | +# |
| 8 | +# Compares table heap, TOAST, and index sizes between source and target to |
| 9 | +# quantify bloat reduction after migration. Uses only catalog queries — |
| 10 | +# no table scans, no writes, no locks, safe for production. |
| 11 | +# |
| 12 | +# Usage: ./compare-bloat.sh [--min-size-mb N] [--top-indexes N] |
| 13 | +# |
| 14 | +set -euo pipefail |
| 15 | + |
| 16 | +# --- Load environment --- |
| 17 | +set +u |
| 18 | +set -a |
| 19 | +source ~/.env |
| 20 | +set +a |
| 21 | +set -u |
| 22 | + |
| 23 | +if [ -z "${PGCOPYDB_SOURCE_PGURI:-}" ] || [ -z "${PGCOPYDB_TARGET_PGURI:-}" ]; then |
| 24 | + echo "ERROR: PGCOPYDB_SOURCE_PGURI and PGCOPYDB_TARGET_PGURI must be set in ~/.env" |
| 25 | + exit 1 |
| 26 | +fi |
| 27 | +# --- loaded --- |
| 28 | + |
| 29 | +# --- Colors --- |
| 30 | +RED='\033[0;31m' |
| 31 | +GREEN='\033[0;32m' |
| 32 | +YELLOW='\033[1;33m' |
| 33 | +BOLD='\033[1m' |
| 34 | +NC='\033[0m' |
| 35 | + |
| 36 | +# --- Config (overridable via flags) --- |
| 37 | +MIN_TABLE_SIZE_MB=100 |
| 38 | +TOP_INDEX_COUNT=20 |
| 39 | + |
| 40 | +while [[ $# -gt 0 ]]; do |
| 41 | + case "$1" in |
| 42 | + --min-size-mb) MIN_TABLE_SIZE_MB="$2"; shift 2 ;; |
| 43 | + --top-indexes) TOP_INDEX_COUNT="$2"; shift 2 ;; |
| 44 | + *) echo "Unknown option: $1"; exit 1 ;; |
| 45 | + esac |
| 46 | +done |
| 47 | + |
| 48 | +MIN_TABLE_SIZE_BYTES=$((MIN_TABLE_SIZE_MB * 1024 * 1024)) |
| 49 | + |
| 50 | +# --- Helper functions --- |
| 51 | +src_query() { |
| 52 | + psql "$PGCOPYDB_SOURCE_PGURI" -t -A -F'|' -c "$1" 2>/dev/null || echo "" |
| 53 | +} |
| 54 | + |
| 55 | +tgt_query() { |
| 56 | + psql "$PGCOPYDB_TARGET_PGURI" -t -A -F'|' -c "$1" 2>/dev/null || echo "" |
| 57 | +} |
| 58 | + |
| 59 | +human_size() { |
| 60 | + local bytes="${1:-0}" |
| 61 | + if [ "$bytes" -eq 0 ] 2>/dev/null; then |
| 62 | + echo "0 B" |
| 63 | + return |
| 64 | + fi |
| 65 | + numfmt --to=iec-i --suffix=B "$bytes" 2>/dev/null || echo "${bytes} B" |
| 66 | +} |
| 67 | + |
| 68 | +pct() { |
| 69 | + local num="${1:-0}" |
| 70 | + local den="${2:-0}" |
| 71 | + if [ "$den" -eq 0 ] 2>/dev/null; then |
| 72 | + echo "—" |
| 73 | + else |
| 74 | + echo "$((num * 100 / den))%" |
| 75 | + fi |
| 76 | +} |
| 77 | + |
| 78 | +# ══════════════════════════════════════════════════════════════════ |
| 79 | +NOW=$(date -u '+%Y-%m-%d %H:%M:%S UTC') |
| 80 | + |
| 81 | +echo "" |
| 82 | +echo "══════════════════════════════════════════════════════════════════" |
| 83 | +echo " Database Bloat Comparison — $NOW" |
| 84 | +echo "══════════════════════════════════════════════════════════════════" |
| 85 | + |
| 86 | +# --- Section 1: Database Overview --- |
| 87 | +echo "" |
| 88 | +echo " DATABASE OVERVIEW" |
| 89 | +echo " ────────────────────────────────────────────────────────────────" |
| 90 | + |
| 91 | +SRC_VER=$(psql "$PGCOPYDB_SOURCE_PGURI" -t -A -c "SHOW server_version;" 2>/dev/null || echo "unknown") |
| 92 | +TGT_VER=$(psql "$PGCOPYDB_TARGET_PGURI" -t -A -c "SHOW server_version;" 2>/dev/null || echo "unknown") |
| 93 | + |
| 94 | +SRC_DB_SIZE=$(psql "$PGCOPYDB_SOURCE_PGURI" -t -A -c "SELECT pg_database_size(current_database());" 2>/dev/null || echo "0") |
| 95 | +TGT_DB_SIZE=$(psql "$PGCOPYDB_TARGET_PGURI" -t -A -c "SELECT pg_database_size(current_database());" 2>/dev/null || echo "0") |
| 96 | + |
| 97 | +DB_DIFF=$((SRC_DB_SIZE - TGT_DB_SIZE)) |
| 98 | +DB_PCT=$(pct "$DB_DIFF" "$SRC_DB_SIZE") |
| 99 | + |
| 100 | +echo "" |
| 101 | +printf " %-12s %-20s %s\n" "" "SOURCE" "TARGET" |
| 102 | +printf " %-12s %-20s %s\n" "Version" "PostgreSQL $SRC_VER" "PostgreSQL $TGT_VER" |
| 103 | +printf " %-12s %-20s %s\n" "Total size" "$(human_size "$SRC_DB_SIZE")" "$(human_size "$TGT_DB_SIZE")" |
| 104 | +echo "" |
| 105 | +echo -e " ${BOLD}Size reduction: $(human_size "$DB_DIFF") ($DB_PCT)${NC}" |
| 106 | + |
| 107 | +# --- Section 2: Per-Table Comparison --- |
| 108 | +echo "" |
| 109 | +echo "" |
| 110 | +echo " PER-TABLE COMPARISON (tables > ${MIN_TABLE_SIZE_MB} MB on source)" |
| 111 | +echo " ────────────────────────────────────────────────────────────────" |
| 112 | + |
| 113 | +TABLE_QUERY=" |
| 114 | +SELECT |
| 115 | + n.nspname || '.' || c.relname, |
| 116 | + pg_relation_size(c.oid, 'main'), |
| 117 | + COALESCE(pg_relation_size(c.reltoastrelid), 0), |
| 118 | + pg_indexes_size(c.oid), |
| 119 | + c.reltuples::bigint |
| 120 | +FROM pg_class c |
| 121 | +JOIN pg_namespace n ON n.oid = c.relnamespace |
| 122 | +WHERE c.relkind = 'r' |
| 123 | + AND n.nspname NOT IN ('pg_catalog', 'information_schema', 'pg_toast') |
| 124 | + AND pg_relation_size(c.oid, 'main') > ${MIN_TABLE_SIZE_BYTES} |
| 125 | +ORDER BY pg_total_relation_size(c.oid) DESC; |
| 126 | +" |
| 127 | + |
| 128 | +SRC_TABLES=$(src_query "$TABLE_QUERY") |
| 129 | +TGT_TABLES=$(tgt_query "$TABLE_QUERY") |
| 130 | + |
| 131 | +# Parse target into associative arrays |
| 132 | +declare -A TGT_HEAP TGT_TOAST TGT_IDX TGT_ROWS |
| 133 | +while IFS='|' read -r name heap toast idx rows; do |
| 134 | + [ -z "$name" ] && continue |
| 135 | + TGT_HEAP["$name"]="$heap" |
| 136 | + TGT_TOAST["$name"]="$toast" |
| 137 | + TGT_IDX["$name"]="$idx" |
| 138 | + TGT_ROWS["$name"]="$rows" |
| 139 | +done <<< "$TGT_TABLES" |
| 140 | + |
| 141 | +# Accumulators for summary |
| 142 | +TOTAL_SRC_HEAP=0 |
| 143 | +TOTAL_TGT_HEAP=0 |
| 144 | +TOTAL_SRC_TOAST=0 |
| 145 | +TOTAL_TGT_TOAST=0 |
| 146 | +TOTAL_SRC_IDX=0 |
| 147 | +TOTAL_TGT_IDX=0 |
| 148 | +TABLE_COUNT=0 |
| 149 | + |
| 150 | +echo "" |
| 151 | +printf " ${BOLD}%-40s %10s %10s %6s %10s %10s %6s %10s %10s %6s${NC}\n" \ |
| 152 | + "Table" "Src Heap" "Tgt Heap" "Heap%" "Src TOAST" "Tgt TOAST" "TOAST%" "Src Idx" "Tgt Idx" "Idx%" |
| 153 | +printf " %-40s %10s %10s %6s %10s %10s %6s %10s %10s %6s\n" \ |
| 154 | + "────────────────────────────────────────" "──────────" "──────────" "──────" "──────────" "──────────" "──────" "──────────" "──────────" "──────" |
| 155 | + |
| 156 | +while IFS='|' read -r name src_heap src_toast src_idx src_rows; do |
| 157 | + [ -z "$name" ] && continue |
| 158 | + |
| 159 | + tgt_heap="${TGT_HEAP[$name]:-0}" |
| 160 | + tgt_toast="${TGT_TOAST[$name]:-0}" |
| 161 | + tgt_idx="${TGT_IDX[$name]:-0}" |
| 162 | + |
| 163 | + TOTAL_SRC_HEAP=$((TOTAL_SRC_HEAP + src_heap)) |
| 164 | + TOTAL_TGT_HEAP=$((TOTAL_TGT_HEAP + tgt_heap)) |
| 165 | + TOTAL_SRC_TOAST=$((TOTAL_SRC_TOAST + src_toast)) |
| 166 | + TOTAL_TGT_TOAST=$((TOTAL_TGT_TOAST + tgt_toast)) |
| 167 | + TOTAL_SRC_IDX=$((TOTAL_SRC_IDX + src_idx)) |
| 168 | + TOTAL_TGT_IDX=$((TOTAL_TGT_IDX + tgt_idx)) |
| 169 | + TABLE_COUNT=$((TABLE_COUNT + 1)) |
| 170 | + |
| 171 | + # Truncate long table names |
| 172 | + display_name="$name" |
| 173 | + if [ ${#display_name} -gt 40 ]; then |
| 174 | + display_name="${display_name:0:37}..." |
| 175 | + fi |
| 176 | + |
| 177 | + heap_diff=$((src_heap - tgt_heap)) |
| 178 | + toast_diff=$((src_toast - tgt_toast)) |
| 179 | + idx_diff=$((src_idx - tgt_idx)) |
| 180 | + |
| 181 | + printf " %-40s %10s %10s %6s %10s %10s %6s %10s %10s %6s\n" \ |
| 182 | + "$display_name" \ |
| 183 | + "$(human_size "$src_heap")" "$(human_size "$tgt_heap")" "$(pct "$heap_diff" "$src_heap")" \ |
| 184 | + "$(human_size "$src_toast")" "$(human_size "$tgt_toast")" "$(pct "$toast_diff" "$src_toast")" \ |
| 185 | + "$(human_size "$src_idx")" "$(human_size "$tgt_idx")" "$(pct "$idx_diff" "$src_idx")" |
| 186 | +done <<< "$SRC_TABLES" |
| 187 | + |
| 188 | +echo "" |
| 189 | +HEAP_DIFF_TOTAL=$((TOTAL_SRC_HEAP - TOTAL_TGT_HEAP)) |
| 190 | +TOAST_DIFF_TOTAL=$((TOTAL_SRC_TOAST - TOTAL_TGT_TOAST)) |
| 191 | +IDX_DIFF_TOTAL=$((TOTAL_SRC_IDX - TOTAL_TGT_IDX)) |
| 192 | + |
| 193 | +printf " ${BOLD}%-40s %10s %10s %6s %10s %10s %6s %10s %10s %6s${NC}\n" \ |
| 194 | + "TOTALS ($TABLE_COUNT tables)" \ |
| 195 | + "$(human_size "$TOTAL_SRC_HEAP")" "$(human_size "$TOTAL_TGT_HEAP")" "$(pct "$HEAP_DIFF_TOTAL" "$TOTAL_SRC_HEAP")" \ |
| 196 | + "$(human_size "$TOTAL_SRC_TOAST")" "$(human_size "$TOTAL_TGT_TOAST")" "$(pct "$TOAST_DIFF_TOTAL" "$TOTAL_SRC_TOAST")" \ |
| 197 | + "$(human_size "$TOTAL_SRC_IDX")" "$(human_size "$TOTAL_TGT_IDX")" "$(pct "$IDX_DIFF_TOTAL" "$TOTAL_SRC_IDX")" |
| 198 | + |
| 199 | +# --- Section 3: Top Indexes by Size Difference --- |
| 200 | +echo "" |
| 201 | +echo "" |
| 202 | +echo " TOP ${TOP_INDEX_COUNT} INDEXES BY SIZE DIFFERENCE" |
| 203 | +echo " ────────────────────────────────────────────────────────────────" |
| 204 | + |
| 205 | +INDEX_QUERY=" |
| 206 | +SELECT |
| 207 | + n.nspname || '.' || ci.relname, |
| 208 | + ct.relname, |
| 209 | + pg_relation_size(ci.oid) |
| 210 | +FROM pg_class ci |
| 211 | +JOIN pg_index i ON i.indexrelid = ci.oid |
| 212 | +JOIN pg_class ct ON ct.oid = i.indrelid |
| 213 | +JOIN pg_namespace n ON n.oid = ci.relnamespace |
| 214 | +WHERE ci.relkind = 'i' |
| 215 | + AND n.nspname NOT IN ('pg_catalog', 'information_schema') |
| 216 | +ORDER BY pg_relation_size(ci.oid) DESC; |
| 217 | +" |
| 218 | + |
| 219 | +SRC_INDEXES=$(src_query "$INDEX_QUERY") |
| 220 | +TGT_INDEXES=$(tgt_query "$INDEX_QUERY") |
| 221 | + |
| 222 | +# Parse target indexes |
| 223 | +declare -A TGT_IDX_SIZE |
| 224 | +while IFS='|' read -r idx_name tbl_name idx_size; do |
| 225 | + [ -z "$idx_name" ] && continue |
| 226 | + TGT_IDX_SIZE["$idx_name"]="$idx_size" |
| 227 | +done <<< "$TGT_INDEXES" |
| 228 | + |
| 229 | +# Build array of (diff, name, src_size, tgt_size, table) and sort |
| 230 | +declare -a IDX_DIFFS=() |
| 231 | +while IFS='|' read -r idx_name tbl_name src_size; do |
| 232 | + [ -z "$idx_name" ] && continue |
| 233 | + tgt_size="${TGT_IDX_SIZE[$idx_name]:-0}" |
| 234 | + diff=$((src_size - tgt_size)) |
| 235 | + IDX_DIFFS+=("${diff}|${idx_name}|${src_size}|${tgt_size}|${tbl_name}") |
| 236 | +done <<< "$SRC_INDEXES" |
| 237 | + |
| 238 | +# Sort by diff descending and take top N |
| 239 | +SORTED_IDXS=$(printf '%s\n' "${IDX_DIFFS[@]}" | sort -t'|' -k1 -rn | head -n "$TOP_INDEX_COUNT") |
| 240 | + |
| 241 | +echo "" |
| 242 | +printf " ${BOLD}%-50s %-20s %10s %10s %10s${NC}\n" \ |
| 243 | + "Index" "Table" "Source" "Target" "Reduction" |
| 244 | +printf " %-50s %-20s %10s %10s %10s\n" \ |
| 245 | + "──────────────────────────────────────────────────" "────────────────────" "──────────" "──────────" "──────────" |
| 246 | + |
| 247 | +while IFS='|' read -r diff idx_name src_size tgt_size tbl_name; do |
| 248 | + [ -z "$idx_name" ] && continue |
| 249 | + |
| 250 | + display_idx="$idx_name" |
| 251 | + if [ ${#display_idx} -gt 50 ]; then |
| 252 | + display_idx="${display_idx:0:47}..." |
| 253 | + fi |
| 254 | + display_tbl="$tbl_name" |
| 255 | + if [ ${#display_tbl} -gt 20 ]; then |
| 256 | + display_tbl="${display_tbl:0:17}..." |
| 257 | + fi |
| 258 | + |
| 259 | + printf " %-50s %-20s %10s %10s %10s\n" \ |
| 260 | + "$display_idx" "$display_tbl" \ |
| 261 | + "$(human_size "$src_size")" "$(human_size "$tgt_size")" \ |
| 262 | + "$(human_size "$diff")" |
| 263 | +done <<< "$SORTED_IDXS" |
| 264 | + |
| 265 | +# --- Section 4: Summary --- |
| 266 | +HEAP_DIFF=$((TOTAL_SRC_HEAP - TOTAL_TGT_HEAP)) |
| 267 | +TOAST_DIFF=$((TOTAL_SRC_TOAST - TOTAL_TGT_TOAST)) |
| 268 | +IDX_DIFF=$((TOTAL_SRC_IDX - TOTAL_TGT_IDX)) |
| 269 | +TOTAL_DIFF=$((HEAP_DIFF + TOAST_DIFF + IDX_DIFF)) |
| 270 | +TOTAL_SRC=$((TOTAL_SRC_HEAP + TOTAL_SRC_TOAST + TOTAL_SRC_IDX)) |
| 271 | + |
| 272 | +echo "" |
| 273 | +echo "" |
| 274 | +echo " ══════════════════════════════════════════════════════════════════" |
| 275 | +echo -e " ${BOLD}BLOAT REDUCTION SUMMARY${NC} (tables > ${MIN_TABLE_SIZE_MB} MB)" |
| 276 | +echo " ══════════════════════════════════════════════════════════════════" |
| 277 | +echo "" |
| 278 | +printf " %-20s %12s %12s %12s %8s\n" "Component" "Source" "Target" "Reduction" "Pct" |
| 279 | +printf " %-20s %12s %12s %12s %8s\n" "────────────────────" "────────────" "────────────" "────────────" "────────" |
| 280 | +printf " %-20s %12s %12s %12s %8s\n" \ |
| 281 | + "Table heap" "$(human_size "$TOTAL_SRC_HEAP")" "$(human_size "$TOTAL_TGT_HEAP")" "$(human_size "$HEAP_DIFF")" "$(pct "$HEAP_DIFF" "$TOTAL_SRC_HEAP")" |
| 282 | +printf " %-20s %12s %12s %12s %8s\n" \ |
| 283 | + "TOAST data" "$(human_size "$TOTAL_SRC_TOAST")" "$(human_size "$TOTAL_TGT_TOAST")" "$(human_size "$TOAST_DIFF")" "$(pct "$TOAST_DIFF" "$TOTAL_SRC_TOAST")" |
| 284 | +printf " %-20s %12s %12s %12s %8s\n" \ |
| 285 | + "Indexes" "$(human_size "$TOTAL_SRC_IDX")" "$(human_size "$TOTAL_TGT_IDX")" "$(human_size "$IDX_DIFF")" "$(pct "$IDX_DIFF" "$TOTAL_SRC_IDX")" |
| 286 | +printf " %-20s %12s %12s %12s %8s\n" \ |
| 287 | + "────────────────────" "────────────" "────────────" "────────────" "────────" |
| 288 | +printf " ${BOLD}%-20s %12s %12s %12s %8s${NC}\n" \ |
| 289 | + "TOTAL" "$(human_size "$TOTAL_SRC")" "$(human_size "$((TOTAL_SRC - TOTAL_DIFF))")" "$(human_size "$TOTAL_DIFF")" "$(pct "$TOTAL_DIFF" "$TOTAL_SRC")" |
| 290 | +echo "" |
| 291 | +echo " Database-level: $(human_size "$SRC_DB_SIZE") → $(human_size "$TGT_DB_SIZE") ($(human_size "$DB_DIFF") / $DB_PCT reduction)" |
| 292 | +echo "" |
| 293 | +echo "══════════════════════════════════════════════════════════════════" |
| 294 | +echo "" |
0 commit comments