#!/usr/bin/env bash
set -euo pipefail

ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT"

echo "== clawd housekeeping report =="
echo "Root: $ROOT"
echo

echo "-- Top-level folder sizes --"
du -sh * 2>/dev/null | sort -h | tail -n 25

echo

echo "-- Largest files (top 30) --"
# macOS: stat uses -f
find . -type f -maxdepth 3 -print0 2>/dev/null \
  | xargs -0 stat -f "%z %N" 2>/dev/null \
  | sort -nr \
  | head -n 30 \
  | awk '{size=$1; $1=""; sub(/^ /, "", $0); printf("%10.1f MB  %s\n", size/1024/1024, $0)}'

echo

echo "-- Orphan slip images (images not referenced by any ticket.md) --"
# Collect referenced images
TMP_REF="$(mktemp)"
TMP_ALL="$(mktemp)"
trap 'rm -f "$TMP_REF" "$TMP_ALL"' EXIT

grep -Rho "`[^`]+\.(jpg|jpeg|png|webp)`" bets 2>/dev/null \
  | sed -E 's/^`//; s/`$//' \
  | sort -u > "$TMP_REF" || true

find bets -type f \( -iname "*.jpg" -o -iname "*.jpeg" -o -iname "*.png" -o -iname "*.webp" \) \
  | sed 's#^bets/##' \
  | sort -u > "$TMP_ALL" || true

comm -23 "$TMP_ALL" "$TMP_REF" | head -n 100

if [ "$(comm -23 "$TMP_ALL" "$TMP_REF" | wc -l | tr -d ' ')" -gt 100 ]; then
  echo "(truncated)"
fi

echo

echo "-- Legacy folders present? --"
for p in archive/legacy_wins archive/legacy_losses; do
  if [ -d "$p" ]; then
    echo "OK: $p"
  fi
done
