#!/usr/bin/env python3
"""
Health Connect → Dashboard Sync
================================
Reads the Health Connect export (SQLite DB inside a zip),
extracts weight, body composition, steps, sleep, exercise,
heart rate, blood pressure, and updates dashboard-data.json.

Usage:
  python3 ~/clawd/health_sync.py                          # auto-finds zip in Google Drive
  python3 ~/clawd/health_sync.py "/path/to/Health Connect.zip"  # explicit path

Designed to run daily via launchd after Google Drive syncs.
"""

import sys
import json
import sqlite3
import zipfile
import tempfile
import logging
from pathlib import Path
from datetime import datetime, timezone, timedelta, date
from zoneinfo import ZoneInfo

EASTERN = ZoneInfo("America/New_York")

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [HEALTH] %(message)s",
    datefmt="%H:%M:%S",
)
log = logging.getLogger("health-sync")

DASHBOARD_JSON = Path(__file__).resolve().parent / "dashboard-data.json"
LOGS_DIR = Path(__file__).resolve().parent / "logs"
LOGS_DIR.mkdir(exist_ok=True)

# Where Google Drive for Desktop syncs files (common paths)
GDRIVE_SEARCH_PATHS = [
    Path.home() / "Google Drive" / "My Drive",
    Path.home() / "Library" / "CloudStorage" / "GoogleDrive-syrros@gmail.com" / "My Drive",
    Path.home() / "Google Drive",
    Path.home() / "GoogleDrive",
]

BASELINE_WEIGHT = 245.0
GOAL_WEIGHT = 220.0

# Health Connect exercise type mapping
# Bill's actual activities: Cycling, Hockey, Cross Country Skiing, Hiking
# Oura auto-detects and often maps wrong codes, so we override:
#   Type 58 (Snowboarding) → XC Skiing (similar motion)
#   Type 53 (Treadmill) → Hockey (short high-intensity)
#   Type 21 (Exercise Class) → Workout (generic catch-all)
EXERCISE_TYPES = {
    0: "Other", 4: "Cycling", 5: "Cycling (Stationary)",
    21: "Workout", 29: "Golf", 34: "Hiking", 36: "Hockey",
    53: "Hockey", 56: "XC Skiing", 58: "XC Skiing",
    66: "Strength Training", 76: "Walking", 78: "Weightlifting",
    80: "Yoga",
}


def find_health_connect_zip():
    """Find the Health Connect zip file in Google Drive.

    Tries exact filename first (avoids macOS TCC permission issues with
    directory listing in launchd), then falls back to glob/iterdir.
    """
    # Pass 1: Try exact known filenames (no directory listing needed)
    KNOWN_NAMES = ["Health Connect.zip", "Health Connect (1).zip", "Health Connect (2).zip"]
    for base in GDRIVE_SEARCH_PATHS:
        for name in KNOWN_NAMES:
            candidate = base / name
            try:
                if candidate.exists() and candidate.stat().st_size > 1000:
                    log.info(f"Found via exact path: {candidate}")
                    return candidate
            except PermissionError:
                continue

    # Pass 2: Glob patterns (needs directory listing permission)
    for base in GDRIVE_SEARCH_PATHS:
        if not base.exists():
            continue
        try:
            for pattern in ["Health Connect*.zip", "health_connect*.zip"]:
                matches = list(base.glob(pattern))
                if matches:
                    matches.sort(key=lambda p: p.stat().st_mtime, reverse=True)
                    return matches[0]
        except PermissionError:
            log.warning(f"Permission denied listing {base} — try granting Full Disk Access")
            continue

    return None


def extract_db(zip_path):
    """Extract the SQLite DB from the Health Connect zip."""
    tmp_dir = tempfile.mkdtemp()
    with zipfile.ZipFile(zip_path, "r") as z:
        db_files = [f for f in z.namelist() if f.endswith(".db")]
        if not db_files:
            raise ValueError(f"No .db file found in {zip_path}")
        z.extract(db_files[0], tmp_dir)
        return Path(tmp_dir) / db_files[0]


def ms_to_dt(ms):
    """Convert epoch milliseconds to datetime in Eastern time."""
    utc_dt = datetime.fromtimestamp(ms / 1000, tz=timezone.utc)
    return utc_dt.astimezone(EASTERN)


def grams_to_lbs(g):
    """Convert grams to pounds."""
    return round(g / 1000 * 2.20462, 1)


def process_weight(cursor):
    """Extract weight and body composition data."""
    # Get all weight records, most recent first
    cursor.execute("SELECT time, weight FROM weight_record_table ORDER BY time DESC")
    weights = []
    for row in cursor.fetchall():
        dt = ms_to_dt(row[0])
        lbs = grams_to_lbs(row[1])
        weights.append({"date": dt.strftime("%Y-%m-%d"), "time": dt.strftime("%H:%M"), "lbs": lbs})

    if not weights:
        return None

    current = weights[0]

    # Get body composition for the most recent weigh-in
    latest_time = None
    cursor.execute("SELECT time FROM weight_record_table ORDER BY time DESC LIMIT 1")
    row = cursor.fetchone()
    if row:
        latest_time = row[0]

    body_comp = {}
    if latest_time:
        # Body fat
        cursor.execute("SELECT percentage FROM body_fat_record_table ORDER BY ABS(time - ?) LIMIT 1", (latest_time,))
        row = cursor.fetchone()
        if row:
            body_comp["bodyFat"] = round(row[0], 1)

        # Lean body mass
        cursor.execute("SELECT mass FROM lean_body_mass_record_table ORDER BY ABS(time - ?) LIMIT 1", (latest_time,))
        row = cursor.fetchone()
        if row:
            body_comp["leanMass"] = grams_to_lbs(row[0])

        # Bone mass
        cursor.execute("SELECT mass FROM bone_mass_record_table ORDER BY ABS(time - ?) LIMIT 1", (latest_time,))
        row = cursor.fetchone()
        if row:
            body_comp["boneMass"] = grams_to_lbs(row[0])

        # Body water
        cursor.execute("SELECT body_water_mass FROM body_water_mass_record_table ORDER BY ABS(time - ?) LIMIT 1", (latest_time,))
        row = cursor.fetchone()
        if row:
            body_comp["bodyWater"] = grams_to_lbs(row[0])

        # BMR
        cursor.execute("SELECT basal_metabolic_rate FROM basal_metabolic_rate_record_table ORDER BY ABS(time - ?) LIMIT 1", (latest_time,))
        row = cursor.fetchone()
        if row and row[0] > 500:  # Filter out partial-day readings
            body_comp["bmr"] = round(row[0], 0)

    # Build 30-day series
    thirty_days_ago = (date.today() - timedelta(days=30)).isoformat()
    series = []
    seen_dates = set()
    for w in weights:
        if w["date"] >= thirty_days_ago and w["date"] not in seen_dates:
            series.append(w["lbs"])
            seen_dates.add(w["date"])
    series.reverse()  # chronological order

    # Calculate stats
    last_7 = series[-7:] if len(series) >= 7 else series
    avg7d = round(sum(last_7) / len(last_7), 1) if last_7 else current["lbs"]

    prev_weight = series[-2] if len(series) >= 2 else current["lbs"]
    delta = round(current["lbs"] - prev_weight, 1)

    lost_so_far = round(BASELINE_WEIGHT - current["lbs"], 1)
    remaining = current["lbs"] - GOAL_WEIGHT

    # Weekly pace from 7-day endpoints
    if len(series) >= 7:
        weekly_pace = round(series[-1] - series[-7], 1)
    else:
        weekly_pace = -1.5  # default estimate

    if weekly_pace < 0 and remaining > 0:
        eta_weeks = round(remaining / abs(weekly_pace))
    else:
        eta_weeks = 0

    return {
        "current": current["lbs"],
        "avg7d": avg7d,
        "goal": GOAL_WEIGHT,
        "baseline": BASELINE_WEIGHT,
        "baselineDate": "February 1st",
        "deltaVsLast": delta,
        "pace": weekly_pace,
        "paceUnit": "lb/wk",
        "etaWeeks": eta_weeks,
        "lastWeighIn": current["date"],
        "series30d": series,
        "lostSoFar": lost_so_far,
        "weeklyPace": abs(weekly_pace),
        "projectedWeeks": float(eta_weeks),
        "date": current["date"],
        "synced_at": datetime.now().isoformat(),
        "source": "Health Connect",
        **body_comp,
    }


def process_steps(cursor):
    """Extract daily step counts for last 7 days."""
    cursor.execute("""
        SELECT local_date, SUM(count)
        FROM steps_record_table
        GROUP BY local_date
        ORDER BY local_date DESC
        LIMIT 7
    """)
    days = []
    for row in cursor.fetchall():
        d = datetime.fromtimestamp(row[0] * 86400, tz=timezone.utc)
        days.append({"date": d.strftime("%Y-%m-%d"), "steps": row[1]})
    days.reverse()

    today_steps = days[-1]["steps"] if days else 0
    avg = round(sum(d["steps"] for d in days) / len(days)) if days else 0

    return {"today": today_steps, "avg7d": avg, "days": days}


def process_sleep(cursor):
    """Extract recent sleep sessions in the format the dashboard expects."""
    # Get most recent sleep session for the main display
    cursor.execute("""
        SELECT s.row_id, s.start_time, s.end_time, s.title
        FROM sleep_session_record_table s
        ORDER BY s.start_time DESC
        LIMIT 1
    """)
    row = cursor.fetchone()
    if not row:
        return None

    last_id, start_ms, end_ms, title = row
    start = ms_to_dt(start_ms)
    end = ms_to_dt(end_ms)
    total_hours = round((end_ms - start_ms) / 3600000, 1)

    bedtime = start.strftime("%I:%M %p").lstrip("0")
    wake_time = end.strftime("%I:%M %p").lstrip("0")

    # Get sleep stages for the most recent session
    # Stage types: 1=Awake, 2=Sleep, 3=OutOfBed, 4=Light, 5=Deep, 6=REM
    cursor.execute("""
        SELECT stage_type, SUM(stage_end_time - stage_start_time) as duration_ms
        FROM sleep_stages_table
        WHERE parent_key = ?
        GROUP BY stage_type
    """, (last_id,))

    stages = {}
    stage_map = {1: "awake_min", 4: "light_min", 5: "deep_min", 6: "rem_min"}
    for srow in cursor.fetchall():
        key = stage_map.get(srow[0])
        if key:
            stages[key] = round(srow[1] / 60000)  # ms to minutes

    # Get HRV for last night (closest to sleep time)
    avg_hrv = None
    cursor.execute("""
        SELECT AVG(heart_rate_variability_millis)
        FROM heart_rate_variability_rmssd_record_table
        WHERE time BETWEEN ? AND ?
    """, (start_ms, end_ms))
    hrv_row = cursor.fetchone()
    if hrv_row and hrv_row[0]:
        avg_hrv = round(hrv_row[0], 1)

    # Build 14-night series
    cursor.execute("""
        SELECT start_time, end_time
        FROM sleep_session_record_table
        ORDER BY start_time DESC
        LIMIT 14
    """)
    series14d = {}
    for srow in cursor.fetchall():
        d = ms_to_dt(srow[0]).strftime("%Y-%m-%d")
        h = round((srow[1] - srow[0]) / 3600000, 1)
        if d not in series14d:  # one entry per night
            series14d[d] = h

    return {
        "total_hours": total_hours,
        "bedtime": bedtime,
        "wake_time": wake_time,
        "source": title or "Oura",
        "stages": stages if stages else None,
        "avg_hrv": avg_hrv,
        "series14d": series14d,
    }


def process_exercise(cursor):
    """Extract recent exercise sessions."""
    cursor.execute("""
        SELECT start_time, end_time, exercise_type, title
        FROM exercise_session_record_table
        ORDER BY start_time DESC
        LIMIT 10
    """)
    sessions = []
    for row in cursor.fetchall():
        start = ms_to_dt(row[0])
        duration_min = round((row[1] - row[0]) / 60000)
        raw_type = row[2]
        exercise_name = EXERCISE_TYPES.get(raw_type, f"Type {raw_type}")
        title = row[3] or exercise_name
        log.info(f"  Exercise: type={raw_type} mapped='{exercise_name}' title='{row[3]}' dur={duration_min}m")

        sessions.append({
            "date": start.strftime("%Y-%m-%d"),
            "time": start.strftime("%H:%M"),
            "duration_min": duration_min,
            "type": exercise_name,
            "raw_type": raw_type,
            "title": title,
        })

    # Count sessions this week
    week_start = (date.today() - timedelta(days=date.today().weekday())).isoformat()
    this_week = [s for s in sessions if s["date"] >= week_start]

    return {"thisWeek": len(this_week), "sessions": sessions}


def process_heart_rate(cursor):
    """Extract heart rate summary."""
    # Get last 24 hours of HR data
    cutoff = int((datetime.now(timezone.utc) - timedelta(hours=24)).timestamp() * 1000)
    cursor.execute("""
        SELECT MIN(s.beats_per_minute), MAX(s.beats_per_minute), AVG(s.beats_per_minute)
        FROM heart_rate_record_series_table s
        JOIN heart_rate_record_table h ON s.parent_key = h.row_id
        WHERE h.start_time > ?
    """, (cutoff,))
    row = cursor.fetchone()

    result = {}
    if row and row[0]:
        result = {
            "min24h": row[0],
            "max24h": row[1],
            "avg24h": round(row[2]),
        }

    # Resting HR
    cursor.execute("SELECT beats_per_minute FROM resting_heart_rate_record_table ORDER BY time DESC LIMIT 1")
    row = cursor.fetchone()
    if row:
        result["resting"] = row[0]

    # Latest HRV
    cursor.execute("SELECT heart_rate_variability_millis FROM heart_rate_variability_rmssd_record_table ORDER BY time DESC LIMIT 1")
    row = cursor.fetchone()
    if row:
        result["hrv"] = round(row[0], 1)

    return result


def process_blood_pressure(cursor):
    """Extract latest blood pressure reading."""
    cursor.execute("SELECT time, systolic, diastolic FROM blood_pressure_record_table ORDER BY time DESC LIMIT 1")
    row = cursor.fetchone()
    if row:
        dt = ms_to_dt(row[0])
        return {
            "systolic": int(row[1]),
            "diastolic": int(row[2]),
            "date": dt.strftime("%Y-%m-%d"),
        }
    return None


def process_oxygen(cursor):
    """Extract latest SpO2 reading."""
    cursor.execute("SELECT time, percentage FROM oxygen_saturation_record_table ORDER BY time DESC LIMIT 1")
    row = cursor.fetchone()
    if row:
        dt = ms_to_dt(row[0])
        return {"spo2": round(row[1], 1), "date": dt.strftime("%Y-%m-%d")}
    return None


def main():
    # Find the zip
    if len(sys.argv) > 1:
        zip_path = Path(sys.argv[1])
    else:
        zip_path = find_health_connect_zip()

    if not zip_path or not zip_path.exists():
        log.error("Health Connect zip not found. Pass the path explicitly or set up Google Drive sync.")
        log.info("Searched: " + ", ".join(str(p) for p in GDRIVE_SEARCH_PATHS))
        sys.exit(1)

    log.info(f"Processing: {zip_path}")
    log.info(f"File modified: {datetime.fromtimestamp(zip_path.stat().st_mtime).strftime('%Y-%m-%d %H:%M')}")

    # Extract and connect
    db_path = extract_db(zip_path)
    conn = sqlite3.connect(str(db_path))
    cursor = conn.cursor()

    # Process all health data
    weight = process_weight(cursor)
    steps = process_steps(cursor)
    sleep = process_sleep(cursor)
    exercise = process_exercise(cursor)
    heart = process_heart_rate(cursor)
    bp = process_blood_pressure(cursor)
    o2 = process_oxygen(cursor)

    conn.close()

    # Update dashboard
    data = json.loads(DASHBOARD_JSON.read_text()) if DASHBOARD_JSON.exists() else {}

    # ── FRESHNESS GUARD ──────────────────────────────────────
    # Never overwrite weight/body data with an older date.
    # This prevents stale Health Connect exports from clobbering
    # manual updates or more recent data in dashboard-data.json.
    existing_weight_date = data.get("weight", {}).get("date", "1970-01-01")
    incoming_weight_date = weight["date"] if weight else "1970-01-01"

    if weight and incoming_weight_date >= existing_weight_date:
        data["weight"] = weight
        data["date"] = weight["date"]
        log.info(f"Weight: {weight['current']} lbs (BF: {weight.get('bodyFat', '?')}%, Lean: {weight.get('leanMass', '?')} lbs)")
    elif weight:
        log.warning(
            f"SKIPPED weight update: incoming date {incoming_weight_date} is older "
            f"than existing {existing_weight_date} — keeping current dashboard data"
        )
    # ─────────────────────────────────────────────────────────

    # Write exercise in the format the dashboard expects: data.exercise.recent[].name
    # Only update if we have sessions and they're not all older than what's already there
    new_exercise = {
        "recent": [
            {"date": s["date"], "name": s["title"], "duration_min": s["duration_min"]}
            for s in exercise.get("sessions", [])
        ],
        "thisWeek": exercise.get("thisWeek", 0),
    }
    existing_exercise_date = (data.get("exercise", {}).get("recent", [{}]) or [{}])[0].get("date", "1970-01-01")
    incoming_exercise_date = (new_exercise["recent"][0]["date"] if new_exercise["recent"] else "1970-01-01")
    if incoming_exercise_date >= existing_exercise_date:
        data["exercise"] = new_exercise
    else:
        log.warning(f"SKIPPED exercise update: incoming {incoming_exercise_date} older than existing {existing_exercise_date}")

    # Write sleep — only if incoming is newer
    if sleep:
        existing_sleep_date = data.get("sleep", {}).get("series14d", {})
        incoming_sleep_dates = sleep.get("series14d", {})
        # Compare most recent sleep date
        existing_latest = max(existing_sleep_date.keys()) if existing_sleep_date else "1970-01-01"
        incoming_latest = max(incoming_sleep_dates.keys()) if incoming_sleep_dates else "1970-01-01"
        if incoming_latest >= existing_latest:
            data["sleep"] = sleep
        else:
            log.warning(f"SKIPPED sleep update: incoming {incoming_latest} older than existing {existing_latest}")

    # Write all health data (steps, heart, BP, O2) under data.health
    data["health"] = {
        "steps": steps,
        "heartRate": heart,
        "bloodPressure": bp,
        "oxygen": o2,
        "updated": datetime.now().isoformat(),
        "source": "Health Connect via Google Drive",
    }

    DASHBOARD_JSON.write_text(json.dumps(data, indent=2))

    # Summary
    log.info(f"Steps today: {steps['today']:,} (7d avg: {steps['avg7d']:,})")
    log.info(f"Sleep last night: {sleep['total_hours'] if sleep else '?'}h (HRV: {sleep.get('avg_hrv', '?') if sleep else '?'}ms)")
    log.info(f"Exercise this week: {exercise['thisWeek']} sessions")
    if heart:
        log.info(f"Heart: avg {heart.get('avg24h', '?')} bpm, HRV {heart.get('hrv', '?')}ms")
    if bp:
        log.info(f"BP: {bp['systolic']}/{bp['diastolic']} ({bp['date']})")
    log.info("Dashboard updated!")


if __name__ == "__main__":
    main()
