#!/usr/bin/env python3
"""
Pregame Research — Storyline-Driven Parlay Intelligence
========================================================
Scans today's soccer (+ NHL) fixtures, pulls RSS headlines,
and uses Claude Haiku to build narrative-based pregame reports
with a highlighted Parlay of the Day.

100% free data sources:
  - ESPN public API for today's fixtures (no key required)
  - RSS feeds for team news (ESPN, BBC, Sky, Guardian, Goal.com)
  - Claude Haiku for storyline synthesis (~$0.01/day)

Independent of Luke, independent of The Odds API.

Usage:
  python3 pregame_research.py                     # uses .env for Claude key
  ANTHROPIC_API_KEY=xxx python3 pregame_research.py  # explicit key
"""

import os
import sys
import json
import logging
import re
import xml.etree.ElementTree as ET
from pathlib import Path
from datetime import datetime, date, timedelta, timezone

try:
    import requests
except ImportError:
    print("pip install requests")
    sys.exit(1)

# Load .env if present
try:
    from dotenv import load_dotenv
    load_dotenv(Path(__file__).resolve().parent / ".env")
except ImportError:
    pass

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [PREGAME] %(message)s",
    datefmt="%H:%M:%S",
)
log = logging.getLogger("pregame-research")

# ── Config ────────────────────────────────────────────────────
CLAUDE_API_KEY = os.getenv("ANTHROPIC_API_KEY") or os.getenv("CLAUDE_API_KEY", "")
DASHBOARD_JSON = Path(__file__).resolve().parent / "dashboard-data.json"
LOGS_DIR = Path(__file__).resolve().parent / "logs"
LOGS_DIR.mkdir(exist_ok=True)

# ── ESPN Public Scoreboard Endpoints (free, no key) ──────────
# These return today's games for each league
ESPN_LEAGUES = {
    "epl":         {"url": "https://site.api.espn.com/apis/site/v2/sports/soccer/eng.1/scoreboard",   "label": "EPL"},
    "laliga":      {"url": "https://site.api.espn.com/apis/site/v2/sports/soccer/esp.1/scoreboard",   "label": "La Liga"},
    "seriea":      {"url": "https://site.api.espn.com/apis/site/v2/sports/soccer/ita.1/scoreboard",   "label": "Serie A"},
    "bundesliga":  {"url": "https://site.api.espn.com/apis/site/v2/sports/soccer/ger.1/scoreboard",   "label": "Bundesliga"},
    "ligue1":      {"url": "https://site.api.espn.com/apis/site/v2/sports/soccer/fra.1/scoreboard",   "label": "Ligue 1"},
    "liga_portugal":{"url": "https://site.api.espn.com/apis/site/v2/sports/soccer/por.1/scoreboard",  "label": "Liga Portugal"},
    "eredivisie":  {"url": "https://site.api.espn.com/apis/site/v2/sports/soccer/ned.1/scoreboard",   "label": "Eredivisie"},
    "mls":         {"url": "https://site.api.espn.com/apis/site/v2/sports/soccer/usa.1/scoreboard",   "label": "MLS"},
    "ucl":         {"url": "https://site.api.espn.com/apis/site/v2/sports/soccer/uefa.champions/scoreboard", "label": "Champions League"},
    "europa":      {"url": "https://site.api.espn.com/apis/site/v2/sports/soccer/uefa.europa/scoreboard",    "label": "Europa League"},
    "conference":  {"url": "https://site.api.espn.com/apis/site/v2/sports/soccer/uefa.europa.conf/scoreboard","label": "Conference League"},
    "nhl":         {"url": "https://site.api.espn.com/apis/site/v2/sports/hockey/nhl/scoreboard",     "label": "NHL"},
}

# ── RSS Feed Sources (from pregame_chatter.py) ───────────────
RSS_FEEDS = {
    "soccer": [
        {"name": "ESPN FC", "url": "https://www.espn.com/espn/rss/soccer/news"},
        {"name": "BBC Football", "url": "https://feeds.bbci.co.uk/sport/football/rss.xml"},
        {"name": "Sky Sports Football", "url": "https://www.skysports.com/rss/11095"},
        {"name": "The Guardian Football", "url": "https://www.theguardian.com/football/rss"},
        {"name": "Goal.com", "url": "http://www.goal.com/en/feeds/news?fmt=rss"},
        {"name": "TSN Soccer", "url": "https://www.tsn.ca/rss/soccer"},
    ],
    "nhl": [
        {"name": "ESPN NHL", "url": "https://www.espn.com/espn/rss/nhl/news"},
        {"name": "TSN Hockey", "url": "https://www.tsn.ca/rss/hockey"},
        {"name": "Sportsnet NHL", "url": "https://www.sportsnet.ca/hockey/nhl/feed/"},
    ],
}

# ── Team Name Aliases for RSS Matching ────────────────────────
TEAM_ALIASES = {
    # EPL
    "Arsenal": ["arsenal", "gunners"],
    "Liverpool": ["liverpool"],
    "Manchester City": ["man city", "manchester city", "city"],
    "Manchester United": ["man united", "manchester united", "man utd"],
    "Chelsea": ["chelsea", "blues"],
    "Tottenham Hotspur": ["tottenham", "spurs"],
    "Newcastle United": ["newcastle", "magpies"],
    "Aston Villa": ["aston villa", "villa"],
    "Brighton": ["brighton", "brighton and hove", "seagulls"],
    "West Ham United": ["west ham"],
    "Fulham": ["fulham"],
    "Crystal Palace": ["crystal palace", "palace"],
    "Bournemouth": ["bournemouth"],
    "Brentford": ["brentford"],
    "Wolves": ["wolves", "wolverhampton"],
    "Everton": ["everton"],
    "Nottingham Forest": ["nottingham", "forest"],
    "Leicester City": ["leicester"],
    "Ipswich Town": ["ipswich"],
    "Southampton": ["southampton"],
    # Serie A
    "AC Milan": ["milan", "ac milan", "rossoneri"],
    "Inter Milan": ["inter", "inter milan", "nerazzurri"],
    "Juventus": ["juventus", "juve"],
    "Napoli": ["napoli"],
    "AS Roma": ["roma"],
    "Lazio": ["lazio"],
    "Atalanta": ["atalanta"],
    "Fiorentina": ["fiorentina"],
    "Bologna": ["bologna"],
    "Torino": ["torino"],
    # La Liga
    "Real Madrid": ["real madrid"],
    "Barcelona": ["barcelona", "barca", "barça"],
    "Atletico Madrid": ["atletico", "atletico madrid"],
    "Real Sociedad": ["real sociedad", "sociedad"],
    "Athletic Bilbao": ["athletic bilbao", "bilbao"],
    "Villarreal": ["villarreal"],
    "Sevilla": ["sevilla"],
    "Real Betis": ["betis", "real betis"],
    # Bundesliga
    "Bayern Munich": ["bayern", "bayern munich"],
    "Borussia Dortmund": ["dortmund", "bvb"],
    "RB Leipzig": ["leipzig", "rb leipzig"],
    "Bayer Leverkusen": ["leverkusen", "bayer leverkusen"],
    "Stuttgart": ["stuttgart"],
    "Eintracht Frankfurt": ["frankfurt", "eintracht"],
    # Ligue 1
    "Paris Saint-Germain": ["psg", "paris saint-germain"],
    "Olympique Marseille": ["marseille", "om"],
    "Lyon": ["lyon", "olympique lyonnais"],
    "Monaco": ["monaco"],
    "Lille": ["lille"],
    # Portuguese
    "Benfica": ["benfica"],
    "Porto": ["porto"],
    "Sporting CP": ["sporting", "sporting cp", "sporting lisbon"],
    # UCL / Europa regulars
    "Celtic": ["celtic"],
    "Rangers": ["rangers"],
    "Ajax": ["ajax"],
    "PSV": ["psv", "psv eindhoven"],
    "Feyenoord": ["feyenoord"],
    # MLS
    "CF Montreal": ["cf montreal", "cf montréal"],
    "Toronto FC": ["toronto fc", "tfc"],
    "Vancouver Whitecaps": ["whitecaps"],
    "Inter Miami": ["inter miami"],
    "LAFC": ["lafc"],
    "LA Galaxy": ["la galaxy", "galaxy"],
    "Atlanta United": ["atlanta united"],
    "Seattle Sounders": ["sounders"],
    "Columbus Crew": ["columbus crew", "crew"],
    # NHL
    "Montreal Canadiens": ["canadiens", "habs", "montreal"],
    "Ottawa Senators": ["senators", "sens", "ottawa"],
    "Toronto Maple Leafs": ["maple leafs", "leafs"],
    "Boston Bruins": ["bruins", "boston"],
    "New York Rangers": ["rangers", "nyr"],
    "Carolina Hurricanes": ["hurricanes", "carolina"],
    "Florida Panthers": ["panthers", "florida"],
    "Edmonton Oilers": ["oilers", "edmonton"],
    "Colorado Avalanche": ["avalanche", "avs", "colorado"],
    "Dallas Stars": ["stars", "dallas"],
    "Winnipeg Jets": ["jets", "winnipeg"],
    "Vancouver Canucks": ["canucks"],
    "Tampa Bay Lightning": ["lightning", "tampa"],
    "Vegas Golden Knights": ["golden knights", "vegas"],
    "Pittsburgh Penguins": ["penguins", "pens"],
    "Washington Capitals": ["capitals", "caps"],
    "New Jersey Devils": ["devils"],
    "New York Islanders": ["islanders"],
    "Minnesota Wild": ["wild", "minnesota"],
}

# ── Tempo/Tier Strategy Data ─────────────────────────────────
# Leagues: Tier 1 = low-scoring / structured, Tier 2 = mixed, Tier 3 = high-scoring
LEAGUE_TIERS = {
    "EPL": 1, "La Liga": 1, "Serie A": 1,
    "Champions League": 1, "Europa League": 1,
    "Bundesliga": 2, "Ligue 1": 2, "Liga Portugal": 2,
    "Conference League": 2,
    "MLS": 3, "Eredivisie": 3,
    "NHL": None,  # handled separately via team profiles
}

# NHL tempo profiles (subset — controlled teams most relevant for unders)
NHL_CONTROLLED = [
    "Minnesota Wild", "New Jersey Devils", "Carolina Hurricanes",
    "Nashville Predators", "Los Angeles Kings", "Dallas Stars",
    "New York Islanders", "Columbus Blue Jackets", "Anaheim Ducks",
    "Ottawa Senators", "Boston Bruins", "St. Louis Blues",
]
NHL_CHAOTIC = [
    "Edmonton Oilers", "Colorado Avalanche", "Florida Panthers",
    "Toronto Maple Leafs", "Tampa Bay Lightning", "Pittsburgh Penguins",
    "Vancouver Canucks", "Buffalo Sabres", "Detroit Red Wings",
]


# ── Step 1: Get Today's Fixtures from ESPN ────────────────────

def fetch_fixtures():
    """Fetch today's fixtures from ESPN public API across all leagues."""
    today_str = date.today().isoformat()
    # ESPN uses YYYYMMDD format for dates
    espn_date = date.today().strftime("%Y%m%d")
    all_fixtures = []

    for league_key, league_info in ESPN_LEAGUES.items():
        try:
            url = league_info["url"]
            params = {"dates": espn_date}
            resp = requests.get(url, params=params, timeout=10, headers={
                "User-Agent": "Clawd-Pregame/1.0"
            })
            if resp.status_code != 200:
                log.debug(f"{league_key}: ESPN returned {resp.status_code}")
                continue

            data = resp.json()
            events = data.get("events", [])

            for event in events:
                try:
                    competitions = event.get("competitions", [{}])
                    comp = competitions[0] if competitions else {}
                    competitors = comp.get("competitors", [])
                    if len(competitors) < 2:
                        continue

                    home_team = None
                    away_team = None
                    for c in competitors:
                        team_info = c.get("team", {})
                        name = team_info.get("displayName", team_info.get("shortDisplayName", ""))
                        if c.get("homeAway") == "home":
                            home_team = name
                        else:
                            away_team = name

                    if not home_team or not away_team:
                        continue

                    # Parse game time
                    game_date = event.get("date", "")
                    time_str = ""
                    if game_date:
                        try:
                            dt = datetime.fromisoformat(game_date.replace("Z", "+00:00"))
                            # Convert to ET for display
                            et_offset = timezone(timedelta(hours=-4))
                            dt_et = dt.astimezone(et_offset)
                            time_str = dt_et.strftime("%I:%M %p ET")
                        except ValueError:
                            time_str = ""

                    status = event.get("status", {}).get("type", {}).get("name", "")
                    # Skip games already completed
                    if status in ("STATUS_FINAL", "STATUS_FULL_TIME"):
                        continue

                    # Extract knockout/series/aggregate context
                    knockout_context = ""
                    notes = event.get("notes", [])
                    for note in notes:
                        text = note.get("headline", "") or note.get("text", "")
                        if text:
                            knockout_context += text + ". "

                    # Check for series summary (UCL, Europa, Conference knockouts)
                    series = event.get("series", {})
                    if series:
                        summary = series.get("summary", "")
                        if summary:
                            knockout_context += f"Series: {summary}. "

                    # Check competition round info
                    comp_type = comp.get("type", {})
                    round_name = ""
                    if comp_type:
                        round_name = comp_type.get("text", "") or comp_type.get("abbreviation", "")

                    # Extract first-leg score from competitors if available
                    first_leg_scores = {}
                    for c in competitors:
                        team_info = c.get("team", {})
                        tname = team_info.get("displayName", "")
                        # ESPN sometimes includes linescores or records for series
                        records = c.get("records", [])
                        for rec in records:
                            if "leg" in rec.get("name", "").lower() or "aggregate" in rec.get("name", "").lower():
                                knockout_context += f"{tname}: {rec.get('summary', '')}. "
                        # Check for aggregate score in statistics
                        score_val = c.get("score", "")
                        if score_val:
                            first_leg_scores[c.get("homeAway", "")] = score_val

                    # Also check event headline
                    event_headline = event.get("name", "")
                    season_type = event.get("season", {}).get("type", 0)

                    fixture = {
                        "home": home_team,
                        "away": away_team,
                        "league": league_info["label"],
                        "league_key": league_key,
                        "time": time_str,
                        "date_iso": game_date,
                        "status": status,
                        "venue": comp.get("venue", {}).get("fullName", ""),
                        "round": round_name,
                        "knockout_context": knockout_context.strip() if knockout_context.strip() else "",
                    }
                    all_fixtures.append(fixture)

                except (KeyError, IndexError) as e:
                    continue

            if events:
                game_count = len([f for f in all_fixtures if f["league"] == league_info["label"]])
                if game_count:
                    log.info(f"{league_info['label']}: {game_count} games today")

        except Exception as e:
            log.warning(f"{league_key}: fixture fetch failed — {e}")

    log.info(f"Total: {len(all_fixtures)} fixtures across all leagues")
    return all_fixtures


# ── Step 2: Pull RSS Headlines ────────────────────────────────

def _parse_rss(content):
    """Parse RSS XML."""
    items = []
    try:
        root = ET.fromstring(content)
        for item in root.iter("item"):
            entry = {}
            title_el = item.find("title")
            desc_el = item.find("description")
            link_el = item.find("link")
            if title_el is not None and title_el.text:
                entry["title"] = title_el.text.strip()
            if desc_el is not None and desc_el.text:
                clean = re.sub(r"<[^>]+>", "", desc_el.text)
                entry["summary"] = clean.strip()[:200]
            if link_el is not None and link_el.text:
                entry["link"] = link_el.text.strip()
            if "title" in entry:
                items.append(entry)
    except ET.ParseError:
        pass
    return items


def _matches_team(text, team_name):
    """Check if text mentions a team by any alias."""
    if not text:
        return False
    text_lower = text.lower()
    aliases = TEAM_ALIASES.get(team_name, [team_name.lower()])
    for alias in aliases:
        if alias.lower() in text_lower:
            return True
    if team_name.lower() in text_lower:
        return True
    return False


def fetch_headlines(fixtures):
    """Fetch RSS headlines and match them to today's fixtures."""
    # Determine which feed categories we need
    has_soccer = any(f["league_key"] != "nhl" for f in fixtures)
    has_nhl = any(f["league_key"] == "nhl" for f in fixtures)

    all_items = []
    feeds_to_fetch = []
    if has_soccer:
        feeds_to_fetch.extend(RSS_FEEDS["soccer"])
    if has_nhl:
        feeds_to_fetch.extend(RSS_FEEDS["nhl"])

    if not feeds_to_fetch:
        return {}

    for feed_info in feeds_to_fetch:
        try:
            resp = requests.get(feed_info["url"], timeout=8, headers={
                "User-Agent": "Clawd-Pregame/1.0"
            })
            if resp.status_code == 200:
                items = _parse_rss(resp.text)
                for item in items:
                    item["source"] = feed_info["name"]
                all_items.append({"name": feed_info["name"], "items": items})
                log.debug(f"RSS {feed_info['name']}: {len(items)} items")
        except Exception as e:
            log.debug(f"RSS {feed_info['name']}: failed — {e}")

    # Match headlines to fixtures
    fixture_headlines = {}
    for i, fixture in enumerate(fixtures):
        key = f"{fixture['away']}@{fixture['home']}"
        matched = []

        for feed in all_items:
            for item in feed["items"]:
                combined = f"{item.get('title', '')} {item.get('summary', '')}"
                home_match = _matches_team(combined, fixture["home"])
                away_match = _matches_team(combined, fixture["away"])

                if home_match or away_match:
                    relevance = "both" if (home_match and away_match) else ("home" if home_match else "away")
                    matched.append({
                        "title": item.get("title", ""),
                        "summary": item.get("summary", ""),
                        "source": item.get("source", ""),
                        "relevance": relevance,
                    })

        # Deduplicate
        seen = set()
        unique = []
        for m in matched:
            title_key = m["title"].lower()[:50]
            if title_key not in seen:
                seen.add(title_key)
                unique.append(m)

        # Sort: 'both' first
        unique.sort(key=lambda x: (0 if x["relevance"] == "both" else 1))
        fixture_headlines[key] = unique[:6]

    total_headlines = sum(len(v) for v in fixture_headlines.values())
    log.info(f"Matched {total_headlines} headlines across {len(fixture_headlines)} matchups")
    return fixture_headlines


# ── Step 3: Classify Tempo / Tier ─────────────────────────────

def classify_fixture(fixture):
    """Classify a fixture for Predictable Tempo strategy."""
    league = fixture["league"]
    home = fixture["home"]
    away = fixture["away"]

    if league == "NHL":
        home_ctrl = home in NHL_CONTROLLED
        away_ctrl = away in NHL_CONTROLLED
        home_chaotic = home in NHL_CHAOTIC
        away_chaotic = away in NHL_CHAOTIC

        if home_ctrl and away_ctrl:
            return {"tempo": "controlled", "tier": 1, "parlay_fit": "prime",
                    "note": "Both teams play structured, low-event hockey"}
        elif home_chaotic and away_chaotic:
            return {"tempo": "chaotic", "tier": 3, "parlay_fit": "avoid",
                    "note": "Both teams run-and-gun — skip unders"}
        elif home_ctrl or away_ctrl:
            anchor = home if home_ctrl else away
            return {"tempo": "mixed", "tier": 2, "parlay_fit": "playable",
                    "note": f"{anchor} anchors the pace"}
        else:
            return {"tempo": "neutral", "tier": 2, "parlay_fit": "neutral",
                    "note": "Neither team strongly typed"}
    else:
        tier = LEAGUE_TIERS.get(league, 2)
        if tier == 1:
            return {"tempo": "structured", "tier": 1, "parlay_fit": "prime",
                    "note": f"Tier 1 {league} — prime under territory"}
        elif tier == 2:
            return {"tempo": "mixed", "tier": 2, "parlay_fit": "playable",
                    "note": f"Tier 2 {league} — usable with right matchup"}
        else:
            return {"tempo": "open", "tier": 3, "parlay_fit": "avoid",
                    "note": f"Tier 3 {league} — high-scoring league, risky for unders"}


# ── JSON Repair Helper ─────────────────────────────────────────

def _repair_json(text):
    """Attempt to fix common JSON formatting issues from LLM output."""
    # Strip markdown fences
    text = re.sub(r'^```(?:json)?\s*', '', text, flags=re.MULTILINE)
    text = re.sub(r'\s*```\s*$', '', text, flags=re.MULTILINE)
    text = text.strip()

    # Try parsing as-is first
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        pass

    # Fix trailing commas before } or ]
    fixed = re.sub(r',\s*([}\]])', r'\1', text)
    try:
        return json.loads(fixed)
    except json.JSONDecodeError:
        pass

    # Fix unescaped newlines in strings (common LLM mistake)
    fixed2 = re.sub(r'(?<=": ")(.*?)(?="[,}\]])', lambda m: m.group(0).replace('\n', ' '), fixed, flags=re.DOTALL)
    try:
        return json.loads(fixed2)
    except json.JSONDecodeError:
        pass

    # Last resort: extract the outermost JSON object
    match = re.search(r'\{[\s\S]*\}', text)
    if match:
        extracted = match.group()
        extracted = re.sub(r',\s*([}\]])', r'\1', extracted)
        try:
            return json.loads(extracted)
        except json.JSONDecodeError:
            pass

    return None


# ── Step 4: Claude Sonnet Storyline Synthesis ──────────────────

BATCH_SIZE = 10  # Max games per API call

def _build_game_context(fixture, headlines):
    """Build the context block for a single fixture."""
    key = f"{fixture['away']}@{fixture['home']}"
    tempo = classify_fixture(fixture)
    news = headlines.get(key, [])

    round_info = f" — {fixture['round']}" if fixture.get('round') else ""
    game_block = f"""
GAME: {fixture['away']} @ {fixture['home']} ({fixture['league']}{round_info}, {fixture['time']})
TEMPO: {tempo['tempo']} (Tier {tempo['tier']}) — {tempo['note']}
PARLAY FIT: {tempo['parlay_fit']}"""

    if fixture.get("knockout_context"):
        game_block += f"\nKNOCKOUT CONTEXT: {fixture['knockout_context']}"

    if news:
        game_block += "\nHEADLINES:"
        for h in news[:4]:
            game_block += f"\n  [{h['relevance'].upper()}] {h['title']}"
            if h.get("summary"):
                game_block += f"\n    {h['summary'][:100]}"
            game_block += f" — {h['source']}"
    else:
        game_block += "\nHEADLINES: No team-specific news found today."

    return game_block


def _call_claude(prompt, timeout=120):
    """Make a single Claude API call with retry."""
    for attempt in range(2):
        t = timeout + (attempt * 60)
        try:
            log.info(f"Calling Claude Sonnet 4.6 (attempt {attempt + 1}, timeout {t}s)...")
            resp = requests.post(
                "https://api.anthropic.com/v1/messages",
                headers={
                    "x-api-key": CLAUDE_API_KEY,
                    "anthropic-version": "2023-06-01",
                    "content-type": "application/json",
                },
                json={
                    "model": "claude-sonnet-4-6",
                    "max_tokens": 8000,
                    "messages": [{"role": "user", "content": prompt}],
                },
                timeout=t,
            )

            if resp.status_code == 200:
                content = resp.json()["content"][0]["text"].strip()
                result = _repair_json(content)
                if result:
                    return result
                else:
                    log.warning(f"Claude returned unparseable JSON: {content[:200]}")
                    return None
            elif resp.status_code == 529:
                log.warning("Claude API overloaded (529) — retrying...")
                continue
            else:
                log.warning(f"Claude API error {resp.status_code}: {resp.text[:100]}")
                return None

        except requests.exceptions.Timeout:
            log.warning(f"Claude timed out after {t}s (attempt {attempt + 1})")
            continue
        except Exception as e:
            log.warning(f"Claude call failed: {e}")
            return None

    return None


def build_pregame_report(fixtures, headlines):
    """Use Claude Sonnet to synthesize fixtures + headlines into a pregame report."""
    if not CLAUDE_API_KEY:
        log.warning("No Claude API key — generating basic report without AI synthesis")
        return _build_basic_report(fixtures, headlines)

    today = date.today().strftime("%A, %B %d, %Y")

    # Pre-filter: only send prime + playable fixtures to Sonnet (skip avoid-tier)
    sonnet_fixtures = []
    avoid_fixtures = []
    for f in fixtures:
        tempo = classify_fixture(f)
        is_knockout = f["league"] in KNOCKOUT_LEAGUES
        has_context = bool(f.get("knockout_context"))
        # Knockout without context = avoid. Tier 3 chaotic = avoid.
        if (is_knockout and not has_context) or tempo["parlay_fit"] == "avoid":
            avoid_fixtures.append(f)
        else:
            sonnet_fixtures.append(f)

    log.info(f"Sending {len(sonnet_fixtures)} games to Sonnet (skipping {len(avoid_fixtures)} avoid-tier)")

    # Build basic reports for avoided games
    avoid_report = _build_basic_report(avoid_fixtures, headlines) if avoid_fixtures else {"games": []}

    # Split remaining fixtures into batches
    batches = [sonnet_fixtures[i:i + BATCH_SIZE] for i in range(0, len(sonnet_fixtures), BATCH_SIZE)]
    log.info(f"Processing {len(sonnet_fixtures)} games in {len(batches)} batch(es)")

    all_games = []
    all_parlays = []

    for batch_num, batch in enumerate(batches, 1):
        games_context = [_build_game_context(f, headlines) for f in batch]
        games_text = "\n".join(games_context)

        is_only_batch = len(batches) == 1
        parlay_instruction = """
Also build a "parlayOfTheDay" combining 2-4 of the STRONGEST legs with VALID ratings.""" if is_only_batch or batch_num == len(batches) else """
Set "parlayOfTheDay" to null for this batch — the parlay will be built after all games are analyzed."""

        prompt = f"""You are Bill's personal pregame research analyst. Today is {today}.

Bill's approach: Story-first parlay building. Lead with the story. ANY bet type is on the table if the storyline supports it.

═══════════════════════════════════════════════════════════════
GOLDEN RULE: YOU CAN ONLY STATE FACTS FROM THE DATA BELOW.
═══════════════════════════════════════════════════════════════
Bill places REAL BETS based on your output. If you fabricate a single detail — a game result, a player injury, a lineup change, a streak, a debut — Bill loses real money.

YOUR ONLY INFORMATION SOURCES (nothing else exists):
  1. The HEADLINES provided below (if any)
  2. The TEMPO and TIER classification provided below
  3. The KNOCKOUT CONTEXT if provided

YOU HAVE ZERO ACCESS TO:
  - Recent game results (unless a HEADLINE explicitly states one)
  - Current injury lists, lineups, or roster moves
  - Winning/losing streaks or current form
  - Player-specific stats, debuts, or trades
  - Emotional states or locker room mood

If you write ANY claim not directly from a HEADLINE below, you are fabricating.
If NO HEADLINES exist → say "No headline data" and base analysis ONLY on tempo/tier.
═══════════════════════════════════════════════════════════════

{"BATCH " + str(batch_num) + " OF " + str(len(batches)) + " — " if len(batches) > 1 else ""}TODAY'S FIXTURES + HEADLINES:
{games_text}

══════════════════════════════════════════
6-STEP FRAMEWORK (every game):
══════════════════════════════════════════

STEP 1 — READ THE ROOM: What do the HEADLINES tell us? What's the TEMPO/TIER? Venue? Only cite facts from headlines above.
STEP 2 — MATCH NARRATIVE: 2-3 sentences max. How does tempo + headline context shape the game? Do NOT invent details. If headlines are thin, keep it short.
STEP 3 — BEST BET: Classify bet type, rate VALID/MARGINAL/FADE based only on what you actually know.
STEP 4 — CORRELATION CHECK (parlay): Flag shared risk between legs.
STEP 5 — VERDICT (parlay): PLAY/MODIFY/SKIP.
STEP 6 — BETTER BET: For MARGINAL/FADE, suggest replacement.

KNOCKOUT OVERRIDE: If KNOCKOUT CONTEXT exists, it overrides tempo. Team trailing → OVER territory. Team leading → Under/clean sheet. Unknown aggregate → flag it, exclude from parlay.
{parlay_instruction}

OUTPUT — JSON only, no markdown fences:
{{
  "parlayOfTheDay": {{ "title": "...", "legs": [{{"matchup":"...","pick":"...","rating":"VALID","fit_reason":"..."}}], "narrative": "...", "correlation_risk": "...", "verdict": "PLAY", "verdict_reason": "...", "confidence": 7, "risk_note": "...", "modification": null }} or null,
  "games": [{{
    "matchup": "Away @ Home",
    "league": "...",
    "time": "...",
    "headline_basis": ["Quote or paraphrase each headline used", "Empty array if no headlines"],
    "room": "Step 1 — ONLY reference headlines listed in headline_basis + tempo/tier. If none, say 'No headline data — analysis based on tempo and tier only.'",
    "narrative": "Step 2 — 2-3 sentences max. Grounded ONLY in headline_basis + tempo/tier. Short if data is thin.",
    "pick": "...",
    "pick_type": "Under|Over|ML|Draw|BTTS|Clean Sheet|Handicap|Other",
    "rating": "VALID|MARGINAL|FADE",
    "line_reasoning": "...",
    "better_bet": "...or null",
    "parlay_fit": "prime|playable|avoid",
    "confidence": 1-10,
    "key_factor": "..."
  }}]
}}

CONFIDENCE RULES:
- No headlines at all → confidence 4 or lower, parlay_fit cannot be "prime"
- 1-2 thin headlines → confidence 5-6 max
- Rich headline data → confidence up to 9

FINAL CHECK: Before outputting, re-read each game's room/narrative. For every specific claim (result, injury, streak, player detail), find the exact HEADLINE it came from. If you cannot find it, DELETE the claim. Sort by parlay_fit (prime first)."""

        log.info(f"Batch {batch_num}/{len(batches)}: {len(batch)} games")
        result = _call_claude(prompt)

        if result:
            batch_games = result.get("games", [])
            log.info(f"Batch {batch_num}: {len(batch_games)} games analyzed")
            all_games.extend(batch_games)
            if result.get("parlayOfTheDay"):
                all_parlays.append(result["parlayOfTheDay"])
        else:
            log.warning(f"Batch {batch_num} failed — using basic report for these games")
            fallback = _build_basic_report(batch, headlines)
            all_games.extend(fallback.get("games", []))

    # If we have multiple batches and no parlay yet, build one from the best legs
    if not all_parlays and len(all_games) >= 2:
        # Fall back to basic parlay from prime games
        prime_games = [g for g in all_games if g.get("parlay_fit") == "prime" and g.get("rating") == "VALID"]
        if len(prime_games) >= 2:
            legs = [{"matchup": g["matchup"], "pick": g["pick"], "rating": g["rating"], "fit_reason": g["key_factor"]} for g in prime_games[:3]]
            all_parlays.append({
                "title": "Best Legs Parlay",
                "legs": legs,
                "narrative": "Combined from the strongest VALID picks across all batches.",
                "correlation_risk": "Legs analyzed in separate batches — cross-batch correlation not checked.",
                "verdict": "MODIFY",
                "verdict_reason": "Legs look strong individually but correlation wasn't checked across batches.",
                "confidence": 6,
                "risk_note": "Cross-batch parlay — verify legs share a coherent narrative.",
                "modification": None,
            })

    # Pick the best parlay (highest confidence)
    best_parlay = None
    if all_parlays:
        best_parlay = max(all_parlays, key=lambda p: p.get("confidence", 0))

    # Append avoided games at the end (basic report, no Sonnet cost)
    all_games.extend(avoid_report.get("games", []))

    # Sort all games
    fit_order = {"prime": 0, "playable": 1, "neutral": 2, "avoid": 3}
    all_games.sort(key=lambda g: fit_order.get(g.get("parlay_fit", "neutral"), 2))

    log.info(f"Final report: {len(all_games)} games, parlay: {(best_parlay or {}).get('title', 'none')}")
    return {"parlayOfTheDay": best_parlay, "games": all_games}


KNOCKOUT_LEAGUES = {"Champions League", "Europa League", "Conference League"}

def _build_basic_report(fixtures, headlines):
    """Fallback report without Claude — uses tempo classification only."""
    games = []
    prime_legs = []

    for fixture in fixtures:
        key = f"{fixture['away']}@{fixture['home']}"
        tempo = classify_fixture(fixture)
        news = headlines.get(key, [])

        headline_text = ""
        if news:
            headline_text = f" Headlines: {news[0]['title']}"

        is_knockout = fixture["league"] in KNOCKOUT_LEAGUES
        has_knockout_ctx = bool(fixture.get("knockout_context"))

        # Knockout second legs override tempo classification
        if is_knockout:
            if has_knockout_ctx:
                ctx = fixture["knockout_context"]
                room = f"KNOCKOUT SECOND LEG. {ctx}"
                narrative = f"Aggregate context drives this game — tempo classification is unreliable. {ctx}"
                pick = "Check aggregate before betting"
                pick_type = "Other"
                rating = "FADE"
                parlay_fit = "avoid"
                confidence = 3
                key_factor = "Knockout second leg — aggregate matters more than tempo"
                better_bet = "Wait for lineup news and assess aggregate deficit before placing any bet"
            else:
                room = f"KNOCKOUT SECOND LEG ({fixture['league']}). First-leg result unknown."
                narrative = f"Cannot build a narrative without aggregate context. DO NOT bet blind."
                pick = "Unknown aggregate — verify first leg before betting"
                pick_type = "Other"
                rating = "FADE"
                parlay_fit = "avoid"
                confidence = 2
                key_factor = "Unknown first-leg result — cannot recommend a bet"
                better_bet = "Find the first-leg score before considering any angle"
            line_reasoning = "Knockout context overrides league tier classification"
        else:
            # Normal league game — use tempo heuristic
            if fixture["league"] == "NHL":
                pick = "Under 5.0" if tempo["tier"] == 1 else "Under 5.5" if tempo["tier"] == 2 else "Under 6.0"
                pick_type = "Under"
            elif tempo["tier"] == 1:
                pick = "Under 2.0" if fixture["league"] in ("Serie A",) else "Under 2.5"
                pick_type = "Under"
            elif tempo["tier"] == 2:
                pick = "Under 2.5" if fixture["league"] in ("Ligue 1", "Liga Portugal") else "Under 3.0"
                pick_type = "Under"
            else:
                pick = "Under 3.5"
                pick_type = "Under"

            room = f"{tempo['note']}.{headline_text}" if headline_text else tempo["note"]
            narrative = f"Based on tempo classification: {tempo['note']}. No AI synthesis available — storyline built from tempo tier only."
            parlay_fit = tempo["parlay_fit"]
            confidence = {1: 7, 2: 5, 3: 3}.get(tempo["tier"], 5)
            key_factor = tempo["note"]
            line_reasoning = "Based on league tier and tempo classification (no headline validation)"
            rating = "VALID" if tempo["tier"] == 1 else "MARGINAL"
            better_bet = None

        game = {
            "matchup": f"{fixture['away']} @ {fixture['home']}",
            "league": fixture["league"],
            "time": fixture["time"],
            "room": room,
            "narrative": narrative,
            "parlay_fit": parlay_fit,
            "pick": pick,
            "pick_type": pick_type,
            "rating": rating,
            "line_reasoning": line_reasoning,
            "better_bet": better_bet,
            "confidence": confidence,
            "key_factor": key_factor,
        }
        games.append(game)

        # Only add non-knockout games to parlay candidates
        if parlay_fit == "prime" and not is_knockout:
            prime_legs.append({
                "matchup": f"{fixture['away']} @ {fixture['home']}",
                "pick": pick,
                "rating": rating,
                "fit_reason": tempo["note"],
            })

    # Sort by parlay_fit
    fit_order = {"prime": 0, "playable": 1, "neutral": 2, "avoid": 3}
    games.sort(key=lambda g: fit_order.get(g["parlay_fit"], 2))

    parlay = None
    if len(prime_legs) >= 2:
        parlay = {
            "title": "Tempo Lock Parlay",
            "legs": prime_legs[:3],
            "narrative": "All legs in Tier 1 structured leagues with controlled tempo profiles. No knockout second legs included.",
            "correlation_risk": "Multiple unders share the same risk — one early goal in any game pressures the entire parlay.",
            "verdict": "PLAY",
            "verdict_reason": "All legs are VALID tempo reads from Tier 1 leagues.",
            "confidence": 7,
            "risk_note": "Based on tempo classification only — no headline validation available.",
            "modification": None,
        }

    return {"parlayOfTheDay": parlay, "games": games}


# ── Step 5: Write to Dashboard ────────────────────────────────

def write_to_dashboard(report, fixtures):
    """Write the pregame report to dashboard-data.json."""
    try:
        if DASHBOARD_JSON.exists():
            data = json.loads(DASHBOARD_JSON.read_text())
        else:
            data = {}

        data["pregame"] = {
            "parlayOfTheDay": report.get("parlayOfTheDay"),
            "games": report.get("games", []),
            "fixtureCount": len(fixtures),
            "updated": datetime.now().isoformat(),
            "source": "pregame_research.py",
        }

        DASHBOARD_JSON.write_text(json.dumps(data, indent=2))
        game_count = len(report.get("games", []))
        parlay_title = (report.get("parlayOfTheDay") or {}).get("title", "none")
        log.info(f"Wrote {game_count} game reports + parlay to dashboard-data.json")
        log.info(f"Parlay of the Day: {parlay_title}")

    except Exception as e:
        log.error(f"Failed to write dashboard: {e}")


# ── Main ──────────────────────────────────────────────────────

def main():
    log.info("=" * 60)
    log.info("Pregame Research — Storyline-Driven Parlay Intel")
    log.info(f"Date: {date.today()}")
    log.info("=" * 60)

    # Step 1: Get today's fixtures
    fixtures = fetch_fixtures()
    if not fixtures:
        log.info("No fixtures today — nothing to research")
        write_to_dashboard({"parlayOfTheDay": None, "games": []}, [])
        return

    # Step 2: Pull RSS headlines for team-specific news
    headlines = fetch_headlines(fixtures)

    # Step 3: Build the pregame report (Claude Haiku synthesis)
    report = build_pregame_report(fixtures, headlines)

    # Step 4: Write to dashboard
    write_to_dashboard(report, fixtures)

    # Summary
    games = report.get("games", [])
    prime = sum(1 for g in games if g.get("parlay_fit") == "prime")
    playable = sum(1 for g in games if g.get("parlay_fit") == "playable")
    avoid = sum(1 for g in games if g.get("parlay_fit") == "avoid")
    log.info(f"Summary: {prime} prime · {playable} playable · {avoid} avoid")
    log.info("Done!")


if __name__ == "__main__":
    main()