#!/usr/bin/env bash
set -euo pipefail

# Non-destructive archiver: moves older raw inflow into archive/raw/<area>/YYYY-MM/
# Default recommendations:
# - imports/ and inbox/: archive items older than 30 days
# - health/: keep last 7 days "hot", archive older day-folders
#
# Usage:
#   scripts/housekeeping_move_to_archive.sh --dry-run
#   scripts/housekeeping_move_to_archive.sh --cutoff-days 30 --health-hot-days 7

ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT"

DRY_RUN=0
CUTOFF_DAYS=30
HEALTH_HOT_DAYS=7
RECURSIVE=0
RECURSIVE_CUTOFF_DAYS=60
TMP_CUTOFF_DAYS=14

while [[ $# -gt 0 ]]; do
  case "$1" in
    --dry-run) DRY_RUN=1; shift ;;
    --cutoff-days) CUTOFF_DAYS="$2"; shift 2 ;;
    --health-hot-days) HEALTH_HOT_DAYS="$2"; shift 2 ;;
    --recursive) RECURSIVE=1; shift ;;
    --recursive-cutoff-days) RECURSIVE_CUTOFF_DAYS="$2"; shift 2 ;;
    --tmp-cutoff-days) TMP_CUTOFF_DAYS="$2"; shift 2 ;;
    -h|--help)
      sed -n '1,160p' "$0"; exit 0 ;;
    *) echo "Unknown arg: $1"; exit 2 ;;
  esac
done

# macOS date helpers
now_epoch() { date +%s; }
days_ago_epoch() { python3 - <<PY 2>/dev/null || true
import time
print(int(time.time()) - int(${1})*86400)
PY
}

# Fallback if python3 missing
if ! command -v python3 >/dev/null 2>&1; then
  days_ago_epoch() {
    # BSD date supports -v
    date -v -"$1"d +%s
  }
fi

cutoff_epoch=$(days_ago_epoch "$CUTOFF_DAYS")
health_cutoff_epoch=$(days_ago_epoch "$HEALTH_HOT_DAYS")
recursive_cutoff_epoch=$(days_ago_epoch "$RECURSIVE_CUTOFF_DAYS")
tmp_cutoff_epoch=$(days_ago_epoch "$TMP_CUTOFF_DAYS")

mkdir -p archive/raw/imports archive/raw/inbox archive/raw/health archive/raw/tmp

move_item() {
  local src="$1"; local dest_dir="$2";
  mkdir -p "$dest_dir"
  if [[ $DRY_RUN -eq 1 ]]; then
    echo "[dry-run] mv \"$src\" \"$dest_dir/\""
  else
    mv "$src" "$dest_dir/"
  fi
}

move_file_preserve_rel() {
  # args: <src_abs_or_rel> <area> <epoch_mtime>
  local src="$1"; local area="$2"; local mtime="$3"
  local ym
  ym=$(date -r "$mtime" +%Y-%m)
  # compute repo-relative path
  local rel
  rel=$(python3 - <<PY 2>/dev/null || true
import os
root=os.path.realpath("$ROOT")
src=os.path.realpath("$src")
print(os.path.relpath(src, root))
PY
)
  if [[ -z "$rel" ]]; then
    rel="$src"
  fi
  local rel_dir
  rel_dir=$(dirname "$rel")
  move_item "$src" "archive/raw/${area}/${ym}/${rel_dir}"
}

# --- imports/ ---
# Move files/dirs in imports/ older than cutoff, excluding a small allowlist of stable paths.
archive_imports() {
  [[ -d imports ]] || return 0
  local ym
  echo "== imports (>${CUTOFF_DAYS}d) =="

  # Exclude stable/current pointers and state files
  local exclude_regex='^(imports/(healthconnect/health_connect_export\.db|healthconnect/ingest_state\.json|bet365/ingest_state\.json))$'

  while IFS= read -r -d '' p; do
    # skip excluded
    if echo "$p" | grep -Eq "$exclude_regex"; then
      continue
    fi

    local mtime
    mtime=$(stat -f %m "$p")
    if [[ "$mtime" -ge "$cutoff_epoch" ]]; then
      continue
    fi
    ym=$(date -r "$mtime" +%Y-%m)
    move_item "$p" "archive/raw/imports/$ym"
  done < <(find imports -mindepth 1 -maxdepth 1 -print0)
}

# --- inbox/ ---
archive_inbox() {
  [[ -d inbox ]] || return 0
  local ym
  echo "== inbox (>${CUTOFF_DAYS}d) =="
  while IFS= read -r -d '' p; do
    local mtime
    mtime=$(stat -f %m "$p")
    if [[ "$mtime" -ge "$cutoff_epoch" ]]; then
      continue
    fi
    ym=$(date -r "$mtime" +%Y-%m)
    move_item "$p" "archive/raw/inbox/$ym"
  done < <(find inbox -mindepth 1 -maxdepth 1 -print0)
}

# --- health/ ---
# Keep last HEALTH_HOT_DAYS in place; move older dated folders health/YYYY-MM-DD/* into archive/raw/health/YYYY-MM/
archive_health() {
  [[ -d health ]] || return 0
  local ym
  echo "== health (keep last ${HEALTH_HOT_DAYS}d hot) =="

  while IFS= read -r -d '' p; do
    local base
    base=$(basename "$p")
    # only archive dated folders
    if ! echo "$base" | grep -Eq '^\d{4}-\d{2}-\d{2}$'; then
      continue
    fi
    local mtime
    mtime=$(stat -f %m "$p")
    if [[ "$mtime" -ge "$health_cutoff_epoch" ]]; then
      continue
    fi
    ym=$(echo "$base" | cut -c1-7)
    move_item "$p" "archive/raw/health/$ym"
  done < <(find health -mindepth 1 -maxdepth 1 -type d -print0)
}

archive_tmp() {
  [[ -d _tmp ]] || return 0
  local ym
  echo "== _tmp (>${TMP_CUTOFF_DAYS}d) =="
  while IFS= read -r -d '' p; do
    local mtime
    mtime=$(stat -f %m "$p")
    if [[ "$mtime" -ge "$tmp_cutoff_epoch" ]]; then
      continue
    fi
    ym=$(date -r "$mtime" +%Y-%m)
    move_item "$p" "archive/raw/tmp/$ym"
  done < <(find _tmp -mindepth 1 -maxdepth 1 -print0)
}

archive_recursive_leaf_files() {
  # Move old leaf files recursively from imports/ and inbox/, excluding pinned/current paths.
  [[ $RECURSIVE -eq 1 ]] || return 0

  echo "== recursive leaf archiving (imports + inbox, >${RECURSIVE_CUTOFF_DAYS}d) =="

  # Pinned paths (never move)
  local pinned=(
    "imports/healthconnect/health_connect_export.db"
    "imports/healthconnect/ingest_state.json"
    "imports/bet365/ingest_state.json"
  )

  is_pinned() {
    local rel="$1"
    for x in "${pinned[@]}"; do
      if [[ "$rel" == "$x" ]]; then return 0; fi
    done
    return 1
  }

  for area in imports inbox; do
    [[ -d "$area" ]] || continue

    while IFS= read -r -d '' f; do
      # repo-relative
      local rel
      rel=$(python3 - <<PY 2>/dev/null || true
import os
root=os.path.realpath("$ROOT")
src=os.path.realpath("$f")
print(os.path.relpath(src, root))
PY
)
      [[ -n "$rel" ]] || rel="$f"

      if is_pinned "$rel"; then
        continue
      fi

      # Skip node_modules and git internals if ever under these
      if echo "$rel" | grep -Eq '(^|/)node_modules/|(^|/)\.git/'; then
        continue
      fi

      local mtime
      mtime=$(stat -f %m "$f")
      if [[ "$mtime" -ge "$recursive_cutoff_epoch" ]]; then
        continue
      fi

      move_file_preserve_rel "$f" "$area" "$mtime"
    done < <(find "$area" -type f -print0)
  done
}

archive_imports
archive_inbox
archive_health
archive_tmp
archive_recursive_leaf_files

echo
if [[ $DRY_RUN -eq 1 ]]; then
  echo "Dry run complete. Re-run without --dry-run to apply."
else
  echo "Archiving complete."
fi
