"""
Kitzu Wellness Intelligence Platform — Unified Data Schema

This module defines the canonical data structures for the Kitzu data lake.
All collectors normalize their source data into these formats before storage.
The inference engine reads only from these normalized structures.

Design principles:
  - Every record has: source, timestamp, confidence
  - Units are standardized (metric where possible, mg/dL for blood)
  - Historical data is append-only (never overwrite, always add)
  - Schema is extensible — new fields don't break existing code
"""

from dataclasses import dataclass, field, asdict
from datetime import date, datetime
from typing import Optional
from pathlib import Path
import json

# ── Base paths ───────────────────────────────────────────────
KITZU_ROOT = Path(__file__).resolve().parent
DATA_ROOT = KITZU_ROOT / "data"

# Data source directories
OURA_DIR = DATA_ROOT / "oura"
BLOOD_DIR = DATA_ROOT / "blood"
GENETICS_DIR = DATA_ROOT / "genetics"
MICROBIOME_DIR = DATA_ROOT / "microbiome"
VITALS_DIR = DATA_ROOT / "vitals"
CGM_DIR = DATA_ROOT / "cgm"
MODALITIES_DIR = DATA_ROOT / "modalities"
UNIFIED_DIR = DATA_ROOT / "unified"


# ── Unified Profile ──────────────────────────────────────────

def load_profile() -> dict:
    """Load the unified profile, or create empty one."""
    path = UNIFIED_DIR / "profile.json"
    if path.exists():
        return json.loads(path.read_text())
    return _empty_profile()


def save_profile(profile: dict):
    """Save the unified profile."""
    path = UNIFIED_DIR / "profile.json"
    path.parent.mkdir(parents=True, exist_ok=True)
    profile["last_updated"] = datetime.now().isoformat()
    path.write_text(json.dumps(profile, indent=2, default=str))


def _empty_profile() -> dict:
    """Create a blank unified profile structure."""
    return {
        "version": "1.0",
        "created": datetime.now().isoformat(),
        "last_updated": None,

        # Latest snapshot from each source
        "oura": {
            "sleep": None,
            "readiness": None,
            "heart_rate": None,
            "steps": None,
            "exercise": None,
            "hrv_trend": [],
            "last_sync": None,
        },
        "blood": {
            "latest_test": None,
            "markers": {},          # {marker_name: {value, unit, range, trend[], date}}
            "inner_age": None,
            "test_history": [],     # [{date, source, marker_count}]
            "last_sync": None,
        },
        "genetics": {
            "source": None,         # "23andMe", "AncestryDNA", etc.
            "snp_count": 0,
            "health_snps": {},      # {rsID: {genotype, gene, category, interpretation}}
            "carrier_status": [],   # [{condition, variant, status}]
            "last_sync": None,
        },
        "microbiome": {
            "source": None,         # "Viome", "Thorne", etc.
            "food_recs": {
                "superfoods": [],
                "enjoy": [],
                "minimize": [],
                "avoid": [],
            },
            "scores": {},           # {pathway_name: score}
            "last_sync": None,
        },
        "vitals": {
            "blood_pressure": None, # {systolic, diastolic, pulse, date}
            "weight": None,         # {value_lb, body_fat_pct, muscle_mass, date}
            "bp_history": [],
            "weight_history": [],
            "last_sync": None,
        },
        "cgm": {
            "active": False,
            "sensor": None,         # "Freestyle Libre 3", etc.
            "daily_avg": None,
            "time_in_range": None,  # percentage
            "spikes": [],           # [{time, value, trigger}]
            "last_sync": None,
        },
        "modalities": {
            "sessions": [],         # [{date, modality, duration_min, notes, pre_score, post_score}]
            "response_profiles": {},# {modality: {avg_hrv_delta, sleep_impact, recovery_impact}}
            "last_session": None,
        },

        # Cross-reference outputs (populated by inference engine)
        "insights": [],             # [{date, type, sources[], finding, confidence, action}]
        "active_protocol": None,    # Current adaptive protocol
        "compliance": {
            "score_7d": None,
            "detected_sessions": [],
            "missed_protocols": [],
        },
    }


# ── Data Record Types ────────────────────────────────────────

@dataclass
class OuraDailyRecord:
    """One day of Oura data — sleep, readiness, activity."""
    date: str
    sleep_hours: Optional[float] = None
    sleep_score: Optional[int] = None
    deep_sleep_min: Optional[int] = None
    rem_sleep_min: Optional[int] = None
    light_sleep_min: Optional[int] = None
    awake_min: Optional[int] = None
    bedtime: Optional[str] = None
    wake_time: Optional[str] = None
    avg_hrv: Optional[float] = None
    resting_hr: Optional[int] = None
    readiness_score: Optional[int] = None
    steps: Optional[int] = None
    active_calories: Optional[int] = None
    exercises: list = field(default_factory=list)
    source: str = "Oura"
    synced_at: Optional[str] = None

    def to_dict(self) -> dict:
        return {k: v for k, v in asdict(self).items() if v is not None}


@dataclass
class BloodMarker:
    """A single blood biomarker reading."""
    name: str
    value: float
    unit: str
    optimal_range: str           # "65-99" format
    status: str                  # "optimal", "at_risk", "needs_work", "critical"
    date: str
    source: str = "InsideTracker"
    trend: list = field(default_factory=list)  # Historical values
    action: Optional[str] = None

    def to_dict(self) -> dict:
        return asdict(self)


@dataclass
class BloodTest:
    """A complete blood test result."""
    date: str
    source: str
    markers: list                # List of BloodMarker
    inner_age: Optional[float] = None
    marker_count: int = 0

    def to_dict(self) -> dict:
        d = asdict(self)
        d["markers"] = [m.to_dict() if hasattr(m, "to_dict") else m for m in self.markers]
        return d


@dataclass
class ModalitySession:
    """A single wellness modality session."""
    date: str
    modality: str                # "red_light", "cold_plunge", "sauna", etc.
    sku: str                     # "KZ-RLT", "KZ-CP", etc.
    duration_min: int
    time_of_day: Optional[str] = None
    pre_score: Optional[int] = None    # 1-10 subjective
    post_score: Optional[int] = None   # 1-10 subjective
    device_readings: dict = field(default_factory=dict)  # {metric: value}
    notes: Optional[str] = None

    def to_dict(self) -> dict:
        return {k: v for k, v in asdict(self).items() if v is not None}


# ── Storage Helpers ──────────────────────────────────────────

def store_daily_oura(record: OuraDailyRecord):
    """Append a daily Oura record to the data lake."""
    path = OURA_DIR / f"{record.date}.json"
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(record.to_dict(), indent=2))


def store_blood_test(test: BloodTest):
    """Store a blood test result."""
    path = BLOOD_DIR / f"{test.date}_{test.source.lower().replace(' ', '_')}.json"
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(test.to_dict(), indent=2, default=str))


def store_modality_session(session: ModalitySession):
    """Append a modality session."""
    path = MODALITIES_DIR / "sessions.json"
    path.parent.mkdir(parents=True, exist_ok=True)

    existing = []
    if path.exists():
        existing = json.loads(path.read_text())

    existing.append(session.to_dict())
    path.write_text(json.dumps(existing, indent=2, default=str))


def load_oura_history(days: int = 30) -> list:
    """Load recent Oura daily records."""
    records = []
    if not OURA_DIR.exists():
        return records

    files = sorted(OURA_DIR.glob("*.json"), reverse=True)[:days]
    for f in files:
        try:
            records.append(json.loads(f.read_text()))
        except Exception:
            continue
    return records


def load_blood_history() -> list:
    """Load all blood test records."""
    records = []
    if not BLOOD_DIR.exists():
        return records

    for f in sorted(BLOOD_DIR.glob("*.json")):
        try:
            records.append(json.loads(f.read_text()))
        except Exception:
            continue
    return records


# ── Quick check ──────────────────────────────────────────────

def data_lake_status() -> dict:
    """Return a summary of what's in the data lake."""
    status = {}

    for name, directory in [
        ("oura", OURA_DIR),
        ("blood", BLOOD_DIR),
        ("genetics", GENETICS_DIR),
        ("microbiome", MICROBIOME_DIR),
        ("vitals", VITALS_DIR),
        ("cgm", CGM_DIR),
        ("modalities", MODALITIES_DIR),
    ]:
        if directory.exists():
            files = list(directory.glob("*.json"))
            status[name] = {
                "files": len(files),
                "latest": max(f.stem for f in files) if files else None,
            }
        else:
            status[name] = {"files": 0, "latest": None}

    # Unified profile
    profile_path = UNIFIED_DIR / "profile.json"
    status["profile"] = {
        "exists": profile_path.exists(),
        "last_updated": None,
    }
    if profile_path.exists():
        try:
            p = json.loads(profile_path.read_text())
            status["profile"]["last_updated"] = p.get("last_updated")
        except Exception:
            pass

    return status


if __name__ == "__main__":
    """Quick diagnostic — show data lake status."""
    import pprint
    print("Kitzu Data Lake Status")
    print("=" * 40)
    pprint.pprint(data_lake_status())
    print()
    print(f"Data root: {DATA_ROOT}")
    print(f"Profile:   {UNIFIED_DIR / 'profile.json'}")
