"""
Google Drive Health Connect connector.

Searches Bill's Google Drive for Health Connect zip files,
downloads the latest one to a temp directory, and hands it
off to the health_connect parser for ingestion.

Primary method: Google Drive API (no dependency on local sync)
Fallback: filesystem watcher (local Google Drive sync folder)
"""

import os
import io
import tempfile
import logging
from datetime import datetime, timedelta
from pathlib import Path

logger = logging.getLogger("gdrive-health")


class GDriveHealthConnector:
    """Search and download Health Connect exports from Google Drive."""

    def __init__(self):
        self._service = None
        self._available = None  # Cached availability check
        self._download_dir = Path(tempfile.gettempdir()) / "clawd-health-downloads"
        self._download_dir.mkdir(exist_ok=True)

    @property
    def service(self):
        """Lazy-load Drive service."""
        if self._service is None:
            try:
                from google_auth import build_drive_service
                self._service = build_drive_service()
            except Exception as e:
                logger.warning(f"Google Drive service unavailable: {e}")
                self._service = False  # Sentinel: tried and failed
        return self._service if self._service is not False else None

    @property
    def available(self):
        """Check if Google Drive API is available."""
        if self._available is None:
            self._available = self.service is not None
            if self._available:
                logger.info("Google Drive connector: ACTIVE")
            else:
                logger.info("Google Drive connector: INACTIVE (falling back to filesystem)")
        return self._available

    def search_health_connect_files(self, max_age_hours=48):
        """
        Search Google Drive for Health Connect zip files.

        Returns list of dicts: [{id, name, modifiedTime, size}, ...]
        sorted by modifiedTime descending (newest first).
        """
        if not self.available:
            return []

        try:
            # Search for Health Connect zip files
            # Use multiple query patterns to catch different naming conventions
            queries = [
                "name contains 'Health Connect' and mimeType = 'application/zip'",
                "name contains 'health_connect' and mimeType = 'application/zip'",
                "name contains 'Health Connect' and name contains '.zip'",
                "name contains 'health_connect' and name contains '.zip'",
            ]

            all_files = {}
            for query in queries:
                try:
                    # Also filter by not trashed
                    full_query = f"({query}) and trashed = false"
                    results = self.service.files().list(
                        q=full_query,
                        fields="files(id, name, modifiedTime, size, mimeType)",
                        orderBy="modifiedTime desc",
                        pageSize=10,
                    ).execute()

                    for f in results.get("files", []):
                        if f["id"] not in all_files:
                            all_files[f["id"]] = f
                except Exception as e:
                    logger.debug(f"Drive query failed: {query} — {e}")
                    continue

            if not all_files:
                logger.debug("No Health Connect files found in Google Drive")
                return []

            # Filter by age
            cutoff = datetime.utcnow() - timedelta(hours=max_age_hours)
            recent_files = []
            for f in all_files.values():
                try:
                    mod_time = datetime.fromisoformat(
                        f["modifiedTime"].replace("Z", "+00:00")
                    ).replace(tzinfo=None)
                    if mod_time >= cutoff:
                        recent_files.append({
                            "id": f["id"],
                            "name": f["name"],
                            "modifiedTime": f["modifiedTime"],
                            "size": int(f.get("size", 0)),
                        })
                except (ValueError, KeyError):
                    continue

            # Sort newest first
            recent_files.sort(key=lambda x: x["modifiedTime"], reverse=True)

            if recent_files:
                logger.info(
                    f"Found {len(recent_files)} Health Connect file(s) in Drive "
                    f"(newest: {recent_files[0]['name']})"
                )

            return recent_files

        except Exception as e:
            logger.error(f"Drive search failed: {e}", exc_info=True)
            return []

    def download_file(self, file_id, filename=None):
        """
        Download a file from Google Drive by ID.
        Returns local file path or None on failure.
        """
        if not self.available:
            return None

        try:
            from googleapiclient.http import MediaIoBaseDownload

            # Get file metadata if we don't have the name
            if not filename:
                meta = self.service.files().get(
                    fileId=file_id, fields="name"
                ).execute()
                filename = meta["name"]

            local_path = self._download_dir / filename

            # Skip if already downloaded (same name, recent)
            if local_path.exists():
                age_hours = (
                    datetime.now().timestamp() - local_path.stat().st_mtime
                ) / 3600
                if age_hours < 1:
                    logger.info(f"Using cached download: {local_path}")
                    return str(local_path)

            # Download
            request = self.service.files().get_media(fileId=file_id)
            buffer = io.BytesIO()
            downloader = MediaIoBaseDownload(buffer, request)

            done = False
            while not done:
                status, done = downloader.next_chunk()
                if status:
                    logger.debug(f"Download progress: {int(status.progress() * 100)}%")

            # Write to local file
            with open(local_path, "wb") as f:
                f.write(buffer.getvalue())

            size_kb = local_path.stat().st_size / 1024
            logger.info(f"Downloaded: {filename} ({size_kb:.0f} KB)")
            return str(local_path)

        except Exception as e:
            logger.error(f"Drive download failed: {e}", exc_info=True)
            return None

    def fetch_latest_health_connect(self, max_age_hours=24):
        """
        Find and download the latest Health Connect file from Drive.

        Returns local file path or None.
        This is the main entry point for the scheduler.
        """
        files = self.search_health_connect_files(max_age_hours=max_age_hours)
        if not files:
            return None

        latest = files[0]

        # Skip tiny files (< 100KB likely corrupted)
        if latest["size"] < 100 * 1024:
            logger.warning(
                f"Skipping small file: {latest['name']} ({latest['size']} bytes)"
            )
            return None

        return self.download_file(latest["id"], latest["name"])

    def cleanup_old_downloads(self, max_age_days=7):
        """Remove downloaded files older than N days."""
        try:
            cutoff = datetime.now().timestamp() - (max_age_days * 86400)
            for f in self._download_dir.iterdir():
                if f.stat().st_mtime < cutoff:
                    f.unlink()
                    logger.debug(f"Cleaned up old download: {f.name}")
        except Exception as e:
            logger.warning(f"Cleanup failed: {e}")


# Module-level singleton
_connector = None


def get_connector():
    """Get the singleton GDriveHealthConnector."""
    global _connector
    if _connector is None:
        _connector = GDriveHealthConnector()
    return _connector
