#!/usr/bin/env python3
"""
Kitzu — iHealth Vitals Collector

Collects blood pressure, weight, and body composition data from multiple sources:
  1. iHealth MyVitals app CSV exports (direct from iHealth BP monitor)
  2. Normalized blood_pressure.csv (pre-processed BP data)
  3. Health Connect weight/body composition exports
  4. Manual weight and body fat CSVs

All data is normalized into the Kitzu unified profile format.
Deduplication ensures no duplicate readings across sources.

Usage:
    python3 collect_ihealth.py              # Import all available data
    python3 collect_ihealth.py --status     # Show current vitals data summary
    python3 collect_ihealth.py --scan       # Scan for new iHealth export files
"""

import csv
import json
import logging
import argparse
from datetime import datetime, date
from pathlib import Path
from typing import Optional

# ── Setup ────────────────────────────────────────────────────
log = logging.getLogger("kitzu.ihealth")
logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(message)s")

# Paths
KITZU_ROOT = Path(__file__).resolve().parent.parent
DATA_ROOT = KITZU_ROOT / "data"
VITALS_DIR = DATA_ROOT / "vitals"
VITALS_DIR.mkdir(parents=True, exist_ok=True)

# Add parent to sys.path for schema import
import sys
sys.path.insert(0, str(KITZU_ROOT))
from schema import load_profile, save_profile

# ── Source file locations ────────────────────────────────────
HOME = Path.home()
CLAWD = HOME / "clawd"

# Known data sources (searched in order)
IHEALTH_EXPORT_DIRS = [
    CLAWD / "inbox" / "health",           # iHealth app email exports
    CLAWD / "data" / "health",            # Processed health data
    HOME / "Downloads",                    # Fresh downloads
]

BP_CSV_PATH = CLAWD / "data" / "health" / "blood_pressure.csv"
MANUAL_WEIGHT_PATH = CLAWD / "data" / "health" / "manual_weight.csv"
MANUAL_BODYFAT_PATH = CLAWD / "data" / "health" / "manual_bodyfat.csv"
DASHBOARD_PATH = CLAWD / "_Organized" / "Data" / "dashboard-data.json"


# ── BP Reading dataclass ─────────────────────────────────────

class BPReading:
    """Normalized blood pressure reading."""
    def __init__(self, dt: datetime, systolic: int, diastolic: int, pulse: int,
                 note: str = "", source: str = "iHealth"):
        self.dt = dt
        self.systolic = systolic
        self.diastolic = diastolic
        self.pulse = pulse
        self.note = note
        self.source = source

    @property
    def date_str(self) -> str:
        return self.dt.strftime("%Y-%m-%d")

    @property
    def time_str(self) -> str:
        return self.dt.strftime("%H:%M")

    @property
    def key(self) -> str:
        """Unique key for deduplication."""
        return f"{self.date_str}_{self.time_str}_{self.systolic}_{self.diastolic}"

    def to_dict(self) -> dict:
        return {
            "systolic": self.systolic,
            "diastolic": self.diastolic,
            "pulse": self.pulse,
            "date": self.date_str,
            "time": self.time_str,
            "note": self.note,
            "source": self.source,
        }

    def __repr__(self):
        return f"BP({self.date_str} {self.time_str}: {self.systolic}/{self.diastolic} HR:{self.pulse})"


class WeightReading:
    """Normalized weight/body composition reading."""
    def __init__(self, dt: datetime, weight_lb: float, body_fat_pct: float = None,
                 muscle_mass: float = None, lean_mass: float = None,
                 bone_mass: float = None, body_water: float = None,
                 visceral_fat: float = None, bmr: float = None,
                 bmi: float = None, source: str = "Health Connect"):
        self.dt = dt
        self.weight_lb = weight_lb
        self.body_fat_pct = body_fat_pct
        self.muscle_mass = muscle_mass
        self.lean_mass = lean_mass
        self.bone_mass = bone_mass
        self.body_water = body_water
        self.visceral_fat = visceral_fat
        self.bmr = bmr
        self.bmi = bmi
        self.source = source

    @property
    def date_str(self) -> str:
        return self.dt.strftime("%Y-%m-%d")

    @property
    def key(self) -> str:
        return f"{self.date_str}_{self.weight_lb}"

    def to_dict(self) -> dict:
        d = {
            "date": self.date_str,
            "weight_lb": self.weight_lb,
            "source": self.source,
        }
        if self.body_fat_pct is not None: d["body_fat_pct"] = self.body_fat_pct
        if self.muscle_mass is not None: d["muscle_mass"] = self.muscle_mass
        if self.lean_mass is not None: d["lean_mass"] = self.lean_mass
        if self.bone_mass is not None: d["bone_mass"] = self.bone_mass
        if self.body_water is not None: d["body_water"] = self.body_water
        if self.visceral_fat is not None: d["visceral_fat"] = self.visceral_fat
        if self.bmr is not None: d["bmr"] = self.bmr
        if self.bmi is not None: d["bmi"] = self.bmi
        return d


# ── Parsers ──────────────────────────────────────────────────

def parse_ihealth_export(filepath: Path) -> list[BPReading]:
    """Parse iHealth MyVitals app CSV export.

    Format: Date,Time,SYS(mmHg),DIA(mmHg),Pulse(Beats/Min),Note
    Example: "Feb 17, 2026",7:14 AM,143,87,49,
    """
    readings = []
    try:
        with open(filepath, "r") as f:
            reader = csv.DictReader(f)
            for row in reader:
                try:
                    date_str = row.get("Date", "").strip().strip('"')
                    time_str = row.get("Time", "").strip()

                    # Parse "Feb 17, 2026" format
                    dt_str = f"{date_str} {time_str}"
                    try:
                        dt = datetime.strptime(dt_str, "%b %d, %Y %I:%M %p")
                    except ValueError:
                        try:
                            dt = datetime.strptime(dt_str, "%b %d, %Y %H:%M")
                        except ValueError:
                            dt = datetime.strptime(date_str, "%b %d, %Y")

                    sys_val = int(row.get("SYS(mmHg)", 0))
                    dia_val = int(row.get("DIA(mmHg)", 0))
                    pulse = int(row.get("Pulse(Beats/Min)", 0))
                    note = row.get("Note", "").strip()

                    if sys_val > 0 and dia_val > 0:
                        readings.append(BPReading(
                            dt=dt, systolic=sys_val, diastolic=dia_val,
                            pulse=pulse, note=note, source="iHealth Export"
                        ))
                except (ValueError, KeyError) as e:
                    log.warning(f"Skipping row in {filepath.name}: {e}")
    except Exception as e:
        log.error(f"Failed to parse {filepath}: {e}")

    log.info(f"  Parsed {len(readings)} readings from {filepath.name}")
    return readings


def parse_normalized_bp_csv(filepath: Path) -> list[BPReading]:
    """Parse the normalized blood_pressure.csv format.

    Format: date,time,sys,dia,pulse,notes
    Example: 2026-02-17,06:59,173,108,56,
    """
    readings = []
    try:
        with open(filepath, "r") as f:
            reader = csv.DictReader(f)
            for row in reader:
                try:
                    d = row.get("date", "").strip()
                    t = row.get("time", "00:00").strip()
                    dt = datetime.strptime(f"{d} {t}", "%Y-%m-%d %H:%M")

                    sys_val = int(row.get("sys", 0))
                    dia_val = int(row.get("dia", 0))
                    pulse = int(row.get("pulse", 0))
                    note = row.get("notes", "").strip()

                    if sys_val > 0 and dia_val > 0:
                        readings.append(BPReading(
                            dt=dt, systolic=sys_val, diastolic=dia_val,
                            pulse=pulse, note=note, source="iHealth CSV"
                        ))
                except (ValueError, KeyError) as e:
                    log.warning(f"Skipping row: {e}")
    except Exception as e:
        log.error(f"Failed to parse {filepath}: {e}")

    log.info(f"  Parsed {len(readings)} readings from {filepath.name}")
    return readings


def parse_dashboard_bp(filepath: Path) -> list[BPReading]:
    """Parse BP data from InsideTracker dashboard-data.json."""
    readings = []
    try:
        data = json.loads(filepath.read_text())
        bp = data.get("bloodPressure", {})

        # Current reading
        if bp.get("systolic"):
            date_str = bp.get("date", "")
            time_str = bp.get("time", "08:00")
            try:
                dt = datetime.strptime(f"{date_str} {time_str}", "%Y-%m-%d %H:%M")
            except ValueError:
                dt = datetime.strptime(date_str, "%Y-%m-%d")
            readings.append(BPReading(
                dt=dt, systolic=bp["systolic"], diastolic=bp["diastolic"],
                pulse=bp.get("pulse", 0), source="InsideTracker/iHealth"
            ))

        # History
        for entry in bp.get("history", []):
            try:
                d = entry.get("date", "")
                t = entry.get("time", "08:00")
                dt = datetime.strptime(f"{d} {t}", "%Y-%m-%d %H:%M")
                readings.append(BPReading(
                    dt=dt, systolic=entry["systolic"], diastolic=entry["diastolic"],
                    pulse=entry.get("pulse", 0), source="InsideTracker/iHealth"
                ))
            except (ValueError, KeyError):
                pass

    except Exception as e:
        log.error(f"Failed to parse dashboard BP: {e}")

    log.info(f"  Parsed {len(readings)} readings from dashboard-data.json")
    return readings


def parse_dashboard_weight(filepath: Path) -> list[WeightReading]:
    """Parse weight/body composition from dashboard-data.json."""
    readings = []
    try:
        data = json.loads(filepath.read_text())
        weight = data.get("weight", {})
        if not weight:
            return readings

        # Current reading with full body comp
        current = weight.get("current")
        if current:
            date_str = weight.get("lastWeighIn") or weight.get("date", "")
            try:
                dt = datetime.strptime(date_str, "%Y-%m-%d")
            except ValueError:
                dt = datetime.now()

            readings.append(WeightReading(
                dt=dt, weight_lb=current,
                body_fat_pct=weight.get("bodyFat"),
                muscle_mass=weight.get("muscleMass"),
                lean_mass=weight.get("leanMass"),
                bone_mass=weight.get("boneMass"),
                body_water=weight.get("bodyWater"),
                visceral_fat=weight.get("visceralFat"),
                bmr=weight.get("bmr"),
                bmi=weight.get("bmi"),
                source="Health Connect"
            ))

        # 30-day series — list of float weight values (no dates)
        # Work backwards from lastWeighIn date
        series = weight.get("series30d", [])
        if series and isinstance(series[0], (int, float)):
            last_date_str = weight.get("lastWeighIn") or weight.get("date", "")
            try:
                last_dt = datetime.strptime(last_date_str, "%Y-%m-%d")
                from datetime import timedelta
                for i, w_val in enumerate(reversed(series)):
                    day_dt = last_dt - timedelta(days=i)
                    if w_val and w_val > 0:
                        readings.append(WeightReading(
                            dt=day_dt, weight_lb=float(w_val),
                            source="Health Connect"
                        ))
            except (ValueError, TypeError):
                pass
        elif series:
            # Dict format fallback
            for entry in series:
                try:
                    if isinstance(entry, dict):
                        entry_dt = datetime.strptime(entry["date"], "%Y-%m-%d")
                        readings.append(WeightReading(
                            dt=entry_dt, weight_lb=float(entry.get("weight", 0)),
                            source="Health Connect"
                        ))
                except (ValueError, KeyError, TypeError):
                    pass

    except Exception as e:
        log.error(f"Failed to parse dashboard weight: {e}")

    log.info(f"  Parsed {len(readings)} weight readings from dashboard-data.json")
    return readings


def parse_manual_weight(filepath: Path) -> list[WeightReading]:
    """Parse manual_weight.csv for weight readings."""
    readings = []
    try:
        with open(filepath, "r") as f:
            reader = csv.DictReader(f)
            for row in reader:
                try:
                    ts = row.get("timestamp_iso", "").strip().strip('"')
                    dt = datetime.fromisoformat(ts.replace("Z", "+00:00")).replace(tzinfo=None)
                    w = float(row.get("weight", 0))
                    if w > 0:
                        readings.append(WeightReading(
                            dt=dt, weight_lb=w, source="Manual"
                        ))
                except (ValueError, KeyError) as e:
                    log.warning(f"Skipping manual weight row: {e}")
    except Exception as e:
        log.error(f"Failed to parse {filepath}: {e}")

    if readings:
        log.info(f"  Parsed {len(readings)} readings from {filepath.name}")
    return readings


def parse_manual_bodyfat(filepath: Path) -> list[dict]:
    """Parse manual_bodyfat.csv for body fat readings (merged with weight later)."""
    entries = []
    try:
        with open(filepath, "r") as f:
            reader = csv.DictReader(f)
            for row in reader:
                try:
                    ts = row.get("timestamp_iso", "").strip().strip('"')
                    dt = datetime.fromisoformat(ts.replace("Z", "+00:00")).replace(tzinfo=None)
                    bf = float(row.get("body_fat_pct", 0))
                    if bf > 0:
                        entries.append({"date": dt.strftime("%Y-%m-%d"), "body_fat_pct": bf})
                except (ValueError, KeyError):
                    pass
    except Exception:
        pass
    return entries


# ── Scanner ──────────────────────────────────────────────────

def scan_ihealth_exports() -> list[Path]:
    """Scan known directories for iHealth BP export CSVs."""
    found = []
    for directory in IHEALTH_EXPORT_DIRS:
        if not directory.exists():
            continue
        # Recursively find BP_Data files from iHealth app
        for csv_file in directory.rglob("BP_Data_*.csv"):
            found.append(csv_file)
        # Also check for generic iHealth-style exports (but not our normalized CSV)
        for csv_file in directory.rglob("*BloodPressure*.csv"):
            found.append(csv_file)
    return sorted(set(found))


# ── Main Collection Logic ────────────────────────────────────

def collect_all_bp() -> list[BPReading]:
    """Collect BP readings from all available sources, deduplicated."""
    all_readings = []

    # 1. Scan for iHealth app exports
    log.info("Scanning for iHealth exports...")
    exports = scan_ihealth_exports()
    for export_path in exports:
        all_readings.extend(parse_ihealth_export(export_path))

    # 2. Normalized BP CSV
    if BP_CSV_PATH.exists():
        log.info("Reading normalized blood_pressure.csv...")
        all_readings.extend(parse_normalized_bp_csv(BP_CSV_PATH))

    # 3. InsideTracker dashboard BP
    if DASHBOARD_PATH.exists():
        log.info("Reading dashboard-data.json BP...")
        all_readings.extend(parse_dashboard_bp(DASHBOARD_PATH))

    # Deduplicate by key (date_time_sys_dia)
    seen = set()
    unique = []
    for r in all_readings:
        if r.key not in seen:
            seen.add(r.key)
            unique.append(r)

    # Sort by datetime (most recent last)
    unique.sort(key=lambda r: r.dt)

    log.info(f"Total unique BP readings: {len(unique)} (from {len(all_readings)} raw)")
    return unique


def collect_all_weight() -> list[WeightReading]:
    """Collect weight/body comp readings from all sources, deduplicated."""
    all_readings = []

    # 1. Dashboard weight + body comp (richest source)
    if DASHBOARD_PATH.exists():
        log.info("Reading dashboard-data.json weight...")
        all_readings.extend(parse_dashboard_weight(DASHBOARD_PATH))

    # 2. Manual weight entries
    if MANUAL_WEIGHT_PATH.exists():
        log.info("Reading manual_weight.csv...")
        all_readings.extend(parse_manual_weight(MANUAL_WEIGHT_PATH))

    # Deduplicate by key (date_weight)
    seen = set()
    unique = []
    for r in all_readings:
        if r.key not in seen:
            seen.add(r.key)
            unique.append(r)

    # Sort by datetime
    unique.sort(key=lambda r: r.dt)

    # Merge manual body fat entries
    bodyfat_entries = []
    if MANUAL_BODYFAT_PATH.exists():
        bodyfat_entries = parse_manual_bodyfat(MANUAL_BODYFAT_PATH)

    # Match body fat to nearest weight reading
    for bf in bodyfat_entries:
        for wr in unique:
            if wr.date_str == bf["date"] and wr.body_fat_pct is None:
                wr.body_fat_pct = bf["body_fat_pct"]
                break

    log.info(f"Total unique weight readings: {len(unique)}")
    return unique


def update_profile(bp_readings: list[BPReading], weight_readings: list[WeightReading]):
    """Write collected vitals data into the unified Kitzu profile."""
    profile = load_profile()

    # ── Blood Pressure ──
    if bp_readings:
        latest = bp_readings[-1]
        profile["vitals"]["blood_pressure"] = {
            "systolic": latest.systolic,
            "diastolic": latest.diastolic,
            "pulse": latest.pulse,
            "date": latest.date_str,
            "time": latest.time_str,
            "source": latest.source,
        }
        profile["vitals"]["bp_history"] = [r.to_dict() for r in bp_readings]

        # Calculate BP stats
        recent_30 = [r for r in bp_readings if (datetime.now() - r.dt).days <= 30]
        if recent_30:
            avg_sys = round(sum(r.systolic for r in recent_30) / len(recent_30))
            avg_dia = round(sum(r.diastolic for r in recent_30) / len(recent_30))
            avg_pulse = round(sum(r.pulse for r in recent_30) / len(recent_30))
            profile["vitals"]["bp_stats_30d"] = {
                "avg_systolic": avg_sys,
                "avg_diastolic": avg_dia,
                "avg_pulse": avg_pulse,
                "reading_count": len(recent_30),
                "min_systolic": min(r.systolic for r in recent_30),
                "max_systolic": max(r.systolic for r in recent_30),
                "min_diastolic": min(r.diastolic for r in recent_30),
                "max_diastolic": max(r.diastolic for r in recent_30),
            }
            log.info(f"  30-day BP avg: {avg_sys}/{avg_dia} ({len(recent_30)} readings)")

    # ── Weight / Body Composition ──
    if weight_readings:
        latest = weight_readings[-1]
        # Find the most detailed reading for body comp (may not be the latest by date)
        detailed = max(
            (r for r in weight_readings if r.body_fat_pct is not None),
            key=lambda r: r.dt,
            default=latest
        )

        profile["vitals"]["weight"] = detailed.to_dict()
        profile["vitals"]["weight_history"] = [r.to_dict() for r in weight_readings]

        # Weight trend (last 7 days)
        recent_7 = [r for r in weight_readings if (datetime.now() - r.dt).days <= 7]
        if len(recent_7) >= 2:
            first_w = recent_7[0].weight_lb
            last_w = recent_7[-1].weight_lb
            delta = round(last_w - first_w, 1)
            profile["vitals"]["weight_trend_7d"] = {
                "start": first_w,
                "end": last_w,
                "delta_lb": delta,
                "direction": "down" if delta < -0.5 else "up" if delta > 0.5 else "stable",
            }

    # Store raw vitals data to disk as well
    vitals_archive = {
        "collected_at": datetime.now().isoformat(),
        "bp_readings": [r.to_dict() for r in bp_readings],
        "weight_readings": [r.to_dict() for r in weight_readings],
    }
    archive_path = VITALS_DIR / f"{date.today().isoformat()}_vitals.json"
    archive_path.write_text(json.dumps(vitals_archive, indent=2))
    log.info(f"  Archived to {archive_path.name}")

    profile["vitals"]["last_sync"] = datetime.now().isoformat()
    profile["vitals"]["sources"] = list(set(
        [r.source for r in bp_readings] + [r.source for r in weight_readings]
    ))
    save_profile(profile)
    log.info("Unified profile updated (vitals section)")


def show_status():
    """Show current vitals data summary."""
    profile = load_profile()
    v = profile.get("vitals", {})

    print("\n╔══════════════════════════════════════════╗")
    print("║       Kitzu iHealth Vitals Status        ║")
    print("╠══════════════════════════════════════════╣")

    bp = v.get("blood_pressure", {})
    if bp:
        print(f"║  Latest BP: {bp.get('systolic','?')}/{bp.get('diastolic','?')} mmHg")
        print(f"║  Pulse: {bp.get('pulse','?')} bpm")
        print(f"║  Date: {bp.get('date','?')} {bp.get('time','')}")
        print(f"║  Source: {bp.get('source','?')}")
    else:
        print("║  No BP data")

    stats = v.get("bp_stats_30d", {})
    if stats:
        print(f"║  30d avg: {stats.get('avg_systolic','?')}/{stats.get('avg_diastolic','?')}")
        print(f"║  Range: {stats.get('min_systolic','?')}-{stats.get('max_systolic','?')}/{stats.get('min_diastolic','?')}-{stats.get('max_diastolic','?')}")
        print(f"║  Readings: {stats.get('reading_count','?')}")

    bp_hist = v.get("bp_history", [])
    print(f"║  Total history: {len(bp_hist)} readings")

    print("║──────────────────────────────────────────")

    w = v.get("weight", {})
    if w:
        print(f"║  Weight: {w.get('weight_lb','?')} lbs")
        if w.get("body_fat_pct"):
            print(f"║  Body Fat: {w['body_fat_pct']}%")
        if w.get("muscle_mass"):
            print(f"║  Muscle: {w['muscle_mass']} lbs")
        if w.get("bmi"):
            print(f"║  BMI: {w['bmi']}")
        print(f"║  Date: {w.get('date','?')}")
    else:
        print("║  No weight data")

    trend = v.get("weight_trend_7d", {})
    if trend:
        print(f"║  7d trend: {trend.get('delta_lb',0):+.1f} lbs ({trend.get('direction','?')})")

    w_hist = v.get("weight_history", [])
    print(f"║  Total history: {len(w_hist)} readings")

    print("║──────────────────────────────────────────")
    print(f"║  Last sync: {v.get('last_sync', 'never')}")
    sources = v.get("sources", [])
    if sources:
        print(f"║  Sources: {', '.join(sources)}")

    print("╚══════════════════════════════════════════╝\n")

    # Scan for available exports
    exports = scan_ihealth_exports()
    if exports:
        print(f"Found {len(exports)} iHealth export file(s):")
        for p in exports:
            print(f"  → {p}")


def main():
    parser = argparse.ArgumentParser(description="Kitzu iHealth Vitals Collector")
    parser.add_argument("--status", action="store_true", help="Show vitals status")
    parser.add_argument("--scan", action="store_true", help="Scan for new export files")
    args = parser.parse_args()

    if args.status:
        show_status()
        return

    if args.scan:
        exports = scan_ihealth_exports()
        if exports:
            print(f"\nFound {len(exports)} iHealth export(s):")
            for p in exports:
                print(f"  → {p}")
        else:
            print("\nNo iHealth exports found. Export from the iHealth MyVitals app.")
        return

    print("\n🩺 Kitzu iHealth Vitals Collector")
    print("=" * 42)

    # Collect from all sources
    bp_readings = collect_all_bp()
    weight_readings = collect_all_weight()

    if not bp_readings and not weight_readings:
        log.warning("No vitals data found from any source")
        return

    # Update unified profile
    update_profile(bp_readings, weight_readings)

    # Summary
    print(f"\n✅ Collection complete:")
    print(f"   BP readings:     {len(bp_readings)}")
    print(f"   Weight readings:  {len(weight_readings)}")
    if bp_readings:
        latest_bp = bp_readings[-1]
        print(f"   Latest BP:       {latest_bp.systolic}/{latest_bp.diastolic} ({latest_bp.date_str})")
    if weight_readings:
        latest_w = weight_readings[-1]
        print(f"   Latest weight:   {latest_w.weight_lb} lbs ({latest_w.date_str})")


if __name__ == "__main__":
    main()
