"""
Pre-Game Validator Module for Luke.

Validates betting picks against pregame intelligence before they reach
Bill's dashboard or WhatsApp.

Validation Pipeline:
  1. Tempo filter produces a pick with 3 legs and a rating (existing flow)
  2. Validator enhances with Claude API (primary) + web scraping (supplementary)
  3. Each pick gets a confidence tag:
     ★ VALIDATED  — pregame intel confirms the tempo read
     ⚠ CAUTION   — mixed signals, proceed with care
     ❌ DROPPED   — pregame intel contradicts the tempo read
  4. VALIDATED and CAUTION show prominently; DROPPED are collapsed

Architecture:
  BASELINE:  Tempo classification data already computed (always available)
  PRIMARY:   Claude API (Haiku) — grounded by live RSS headlines
  RSS:       ESPN, TSN, Sportsnet, BBC, Sky Sports, Guardian, Goal.com
  FALLBACK:  Heuristic rules using baseline tempo data if Claude is down
"""

import re
import json
import logging
import requests
from datetime import datetime
from config import Config

logger = logging.getLogger("pregame-validator")

# ── Configuration ─────────────────────────────────────────────

CACHE_TTL_SECONDS = 7200  # 2 hours
CLAUDE_TIMEOUT = 15

# ── In-Memory Cache ───────────────────────────────────────────

_validation_cache = {}


def _cache_key(home, away):
    return f"{home.lower().strip()}_{away.lower().strip()}"


def _get_cached(home, away):
    key = _cache_key(home, away)
    entry = _validation_cache.get(key)
    if entry and (datetime.now() - entry["timestamp"]).total_seconds() < CACHE_TTL_SECONDS:
        return entry["data"]
    return None


def _set_cached(home, away, data):
    key = _cache_key(home, away)
    _validation_cache[key] = {"data": data, "timestamp": datetime.now()}


# ── Team Name Helper ──────────────────────────────────────────

def _team_short(name):
    for prefix in ["FC ", "AC ", "AS ", "SS ", "SSC ", "AFC "]:
        if name.startswith(prefix):
            name = name[len(prefix):]
    for suffix in [" FC", " SC", " CF"]:
        if name.endswith(suffix):
            name = name[:-len(suffix)]
    return name.strip()


# ── Tempo-Based Baseline Validation ──────────────────────────
# Uses the same data sports_betting.py already computed.
# This ALWAYS works — no external calls needed.

def _baseline_from_tempo(home, away, sport, pick_data=None):
    """
    Generate a baseline validation using the tempo classification
    that sports_betting.py already computed for this game.
    """
    try:
        from sports_betting import (
            NHL_TEAM_PROFILES, SOCCER_LEAGUE_TIERS,
            SportsBettingAnalyst
        )
        analyst = SportsBettingAnalyst()
    except ImportError:
        return None

    if sport == "nhl":
        tempo = analyst.classify_nhl_matchup(home, away)
        home_p = NHL_TEAM_PROFILES.get(home, {})
        away_p = NHL_TEAM_PROFILES.get(away, {})
        home_label = home_p.get("label", "unknown")
        away_label = away_p.get("label", "unknown")

        if tempo == "controlled":
            return {
                "verdict": "VALIDATED",
                "confidence": 7,
                "reason": f"Both teams are low-event ({home_label} + {away_label})",
                "key_factor": "Controlled tempo matchup",
                "injury_impact": "low",
                "form_supports_under": True,
            }
        elif tempo == "mixed":
            anchor = home if home_p.get("tempo") == "low" else away
            anchor_label = NHL_TEAM_PROFILES.get(anchor, {}).get("label", "defensive")
            return {
                "verdict": "CAUTION",
                "confidence": 5,
                "reason": f"{anchor} ({anchor_label}) anchors the pace, but risk from the other side",
                "key_factor": f"{anchor} controls tempo",
                "injury_impact": "low",
                "form_supports_under": True,
            }
        else:
            return {
                "verdict": "DROPPED",
                "confidence": 2,
                "reason": "Both teams are high-event — chaotic matchup",
                "key_factor": "High-offense tempo profile",
                "injury_impact": "low",
                "form_supports_under": False,
            }

    else:
        # Soccer — use league tier
        game = {"sport": sport}
        league_class = analyst.classify_soccer_matchup(game)
        tier = league_class.get("tier", 2)
        label = league_class.get("label", sport.upper())
        is_cup = league_class.get("is_cup", False)

        if tier == 1:
            reason = f"{label} — prime under territory"
            if is_cup:
                reason = f"{label} knockout — teams protect leads"
            return {
                "verdict": "VALIDATED",
                "confidence": 7,
                "reason": reason,
                "key_factor": f"Tier 1 {'cup' if is_cup else 'league'} environment",
                "injury_impact": "low",
                "form_supports_under": True,
            }
        elif tier == 2:
            return {
                "verdict": "CAUTION",
                "confidence": 5,
                "reason": f"{label} — usable with filtering, check team quality",
                "key_factor": "Tier 2 league needs careful matchup selection",
                "injury_impact": "low",
                "form_supports_under": None,
            }
        else:
            return {
                "verdict": "DROPPED",
                "confidence": 2,
                "reason": f"{label} — high-scoring league, avoid unders",
                "key_factor": "Tier 3 league, goals come in bunches",
                "injury_impact": "low",
                "form_supports_under": False,
            }


# ── Live RSS Headlines (grounding data for Claude) ───────────

def _fetch_live_headlines(home, away, sport):
    """
    Pull live headlines from pregame_chatter.py RSS feeds for both teams.
    Returns a formatted string of current news, or empty string.
    """
    try:
        from pregame_chatter import get_pregame_chatter, get_general_sport_headlines

        # Get team-specific chatter
        chatter = get_pregame_chatter(home, away, sport, max_items=8)

        # Also get general sport headlines for broader context
        general = get_general_sport_headlines(sport, max_items=5)

        parts = []

        if chatter:
            parts.append("TEAM-SPECIFIC NEWS (from today's RSS feeds):")
            for item in chatter:
                rel = item.get("relevance", "")
                source = item.get("source", "")
                title = item.get("title", "")
                summary = item.get("summary", "")[:120]
                team_tag = f"[{rel.upper()}]" if rel else ""
                parts.append(f"  {team_tag} {title}")
                if summary and len(summary) > 20:
                    parts.append(f"    {summary}")
                parts.append(f"    — {source}")

        if general:
            parts.append("\nGENERAL SPORT HEADLINES:")
            for item in general[:3]:
                parts.append(f"  {item.get('title', '')} — {item.get('source', '')}")

        return "\n".join(parts) if parts else ""

    except ImportError:
        logger.debug("pregame_chatter not available for RSS headlines")
        return ""
    except Exception as e:
        logger.debug(f"RSS headline fetch failed (non-critical): {e}")
        return ""


# ── Claude API Validation (primary enhancer) ──────────────────

def _validate_with_claude(home, away, sport, baseline):
    """
    Use Claude Haiku to enhance the baseline validation with deeper analysis.
    Grounded by LIVE RSS headlines so Claude doesn't hallucinate stale rosters.
    Returns enhanced validation dict, or None if Claude is unavailable.
    """
    api_key = Config.CLAUDE_API_KEY
    if not api_key:
        return None

    sport_label = "NHL" if sport == "nhl" else "soccer"
    under_line = "Under 6.5 goals" if sport == "nhl" else "Under 2.5 goals"

    league_map = {
        "seriea": "Serie A", "ligue1": "Ligue 1", "liga_portugal": "Liga Portugal",
        "epl": "EPL", "laliga": "La Liga", "mls": "MLS",
        "europa": "Europa League", "ucl": "Champions League",
        "conference": "Conference League", "nhl": "NHL",
    }
    league = league_map.get(sport, sport.upper())

    # Fetch live RSS headlines for grounding
    live_headlines = _fetch_live_headlines(home, away, sport)

    # Build the live data section
    live_section = ""
    if live_headlines:
        live_section = "\n\nLIVE DATA (from today's feeds — use THIS, not your training data):\n" + live_headlines

    today = datetime.now().strftime("%B %d, %Y")

    prompt = f"""You are a sharp sports betting analyst. Today is {today}.

MATCHUP: {away} @ {home} ({league})
BET: {under_line}
STRATEGY: "Predictable Tempo" — we bet on controlled, low-scoring game environments.

Our tempo filter rated this: {baseline.get('verdict', 'UNKNOWN')} ({baseline.get('reason', '')}){live_section}

CRITICAL RULES:
- ONLY reference players, injuries, and roster info that appears in the LIVE DATA above
- If no live data mentions a player, do NOT name them — your training data may be outdated (trades, injuries)
- Focus on team-level factors: defensive structure, scoring trends, tactical matchup
- If you have no live data, base your analysis on the tempo profile and general team style

Respond in EXACT JSON only (no markdown, no commentary):
{{"verdict":"VALIDATED" or "CAUTION" or "DROPPED","confidence":1-10,"reason":"Max 80 chars","key_factor":"Max 60 chars","injury_impact":"low" or "medium" or "high","form_supports_under":true or false}}"""

    try:
        resp = requests.post(
            "https://api.anthropic.com/v1/messages",
            headers={
                "x-api-key": api_key,
                "anthropic-version": "2023-06-01",
                "content-type": "application/json",
            },
            json={
                "model": "claude-haiku-4-5-20251001",
                "max_tokens": 200,
                "messages": [{"role": "user", "content": prompt}],
            },
            timeout=CLAUDE_TIMEOUT,
        )

        if resp.status_code == 200:
            content = resp.json()["content"][0]["text"].strip()
            try:
                result = json.loads(content)
            except json.JSONDecodeError:
                m = re.search(r'\{[^}]+\}', content, re.DOTALL)
                if m:
                    result = json.loads(m.group())
                else:
                    logger.warning(f"Claude non-JSON: {content[:100]}")
                    return None

            if "verdict" in result and "confidence" in result:
                logger.info(f"Claude: {away}@{home} → {result['verdict']} ({result['confidence']}/10)")
                return result
        else:
            logger.warning(f"Claude API {resp.status_code}")

    except Exception as e:
        logger.warning(f"Claude error: {e}")

    return None


# ── Main Validation Function ─────────────────────────────────

def validate_pick(home, away, sport, pick_data=None):
    """
    Full validation pipeline. Always returns a meaningful result.

    1. Check cache
    2. Generate baseline from tempo classification (always works)
    3. Try web scraping (best-effort, 5s timeout)
    4. Try Claude API enhancement (primary, 15s timeout)
    5. Return best available result
    """
    cached = _get_cached(home, away)
    if cached:
        return cached

    logger.info(f"Validating: {away} @ {home} ({sport})")
    sources = []

    # Step 1: Baseline from tempo data (always available)
    baseline = _baseline_from_tempo(home, away, sport, pick_data)
    if not baseline:
        baseline = {
            "verdict": "CAUTION",
            "confidence": 5,
            "reason": "Tempo profile supports controlled environment",
            "key_factor": "Passed tempo filter",
            "injury_impact": "unknown",
            "form_supports_under": None,
        }
    sources.append("Tempo Profile")

    # Step 2: Try Claude API (grounded by RSS headlines) to enhance/override baseline
    claude_result = _validate_with_claude(home, away, sport, baseline)
    if claude_result:
        # Don't let Claude downgrade a strong baseline unless it has high confidence
        baseline_verdict = baseline.get("verdict", "CAUTION")
        claude_verdict = claude_result.get("verdict", "CAUTION")
        claude_conf = claude_result.get("confidence", 5)

        if baseline_verdict == "VALIDATED" and claude_verdict == "CAUTION" and claude_conf <= 5:
            # Claude is unsure — trust the tempo baseline, but note the caution
            analysis = baseline
            analysis["reason"] = f"{baseline['reason']} (Claude: mild caution)"
            sources.append("Claude AI (deferred to baseline)")
        elif baseline_verdict == "DROPPED" and claude_verdict != "DROPPED" and claude_conf <= 5:
            # Baseline says avoid but Claude is unsure — trust baseline
            analysis = baseline
            sources.append("Claude AI (deferred to baseline)")
        else:
            # Claude has a strong opinion or agrees — use Claude's result
            analysis = claude_result
            sources.append("Claude AI")
        sources.append("RSS Headlines")
    else:
        analysis = baseline

    analysis["sources"] = sources
    _set_cached(home, away, analysis)
    return analysis


def validate_picks_batch(picks):
    """
    Validate a batch of picks. Adds 'validation' key to each pick dict.
    Skips games that have already started.
    """
    now = datetime.utcnow()

    for pick in picks:
        # Skip games that have already started
        commence = pick.get("commence_time", "")
        if commence:
            try:
                game_time = datetime.fromisoformat(commence.replace("Z", "+00:00")).replace(tzinfo=None)
                if game_time < now:
                    pick["validation"] = {
                        "verdict": "DROPPED",
                        "confidence": 0,
                        "reason": "Game already started — skipping",
                        "key_factor": "In progress",
                        "sources": [],
                    }
                    pick["rating"] = "avoid"
                    logger.debug(f"Skipping {pick.get('away', '')} @ {pick.get('home', '')} — already started")
                    continue
            except (ValueError, TypeError):
                pass  # Can't parse time — proceed with validation

        rating = pick.get("rating", "neutral")
        if rating == "avoid":
            pick["validation"] = {
                "verdict": "DROPPED",
                "confidence": 0,
                "reason": "Tempo filter flagged as avoid",
                "key_factor": "Chaotic tempo",
                "sources": [],
            }
            continue

        home = pick.get("home", "")
        away = pick.get("away", "")
        sport = pick.get("sport", "").lower()

        sport_key_map = {
            "nhl": "nhl", "epl": "epl", "serie a": "seriea",
            "ligue 1": "ligue1", "la liga": "laliga",
            "liga portugal": "liga_portugal", "mls": "mls",
            "bundesliga": "bundesliga",
            "europa league": "europa", "conference league": "conference",
            "champions league": "ucl",
        }
        sport_key = sport_key_map.get(sport, sport)

        try:
            validation = validate_pick(home, away, sport_key, pick)
            pick["validation"] = validation

            if validation["verdict"] == "DROPPED" and rating in ("prime", "playable"):
                pick["rating"] = "avoid"
                pick["tag"] = f"❌ DROPPED — {validation.get('reason', '')[:50]}"
            elif validation["verdict"] == "VALIDATED" and rating == "playable":
                if validation.get("confidence", 0) >= 7:
                    pick["rating"] = "prime"
                    pick["tag"] = f"★ VALIDATED — {pick.get('tag', '')}"

        except Exception as e:
            logger.warning(f"Validation error {home} vs {away}: {e}")
            pick["validation"] = {
                "verdict": "CAUTION",
                "confidence": 5,
                "reason": "Tempo profile supports this pick",
                "sources": ["Tempo Profile"],
            }

    # Remove games that already started from results
    picks = [p for p in picks if p.get("validation", {}).get("key_factor") != "In progress"]

    # Filter: only keep picks with confidence >= 7 (dashboard quality threshold)
    pre_filter = len(picks)
    picks = [
        p for p in picks
        if p.get("validation", {}).get("confidence", 0) >= 7
    ]

    # Filter: only keep same-day games (today's date in local time)
    from datetime import date as _date
    today_str = _date.today().isoformat()
    picks = [
        p for p in picks
        if not p.get("commence_time")
        or p.get("commence_time", "")[:10] == today_str
        or p.get("time", "")[:10] == today_str
    ]

    # Sort by confidence descending (highest first), then by rating tier
    rating_order = {"prime": 0, "playable": 1, "neutral": 2, "avoid": 3}
    picks.sort(key=lambda p: (
        rating_order.get(p.get("rating", "neutral"), 2),
        -(p.get("validation", {}).get("confidence", 0))
    ))

    v_count = sum(1 for p in picks if p.get("validation", {}).get("verdict") == "VALIDATED")
    d_count = sum(1 for p in picks if p.get("validation", {}).get("verdict") == "DROPPED")
    started = sum(1 for p in picks if p.get("validation", {}).get("key_factor") == "In progress")
    logger.info(f"Validated: {v_count} ★, {d_count} ❌ | Showing {len(picks)}/{pre_filter} (7/10+ confidence, today only)")

    return picks


# ── WhatsApp Formatting ───────────────────────────────────────

def format_validation_for_whatsapp(pick):
    v = pick.get("validation", {})
    verdict = v.get("verdict", "UNKNOWN")
    reason = v.get("reason", "")
    confidence = v.get("confidence", 0)
    sources = v.get("sources", [])

    icons = {"VALIDATED": "★", "CAUTION": "⚠️", "DROPPED": "❌"}
    icon = icons.get(verdict, "?")

    line = f"  {icon} PREGAME: {verdict}"
    if reason:
        line += f" — {reason}"
    if sources:
        line += f"\n    Sources: {', '.join(sources)}"
    if confidence:
        line += f" ({confidence}/10)"

    return line