#!/usr/bin/env python3
"""
GitHub Year-in-Review Analytics for Personal Blogging

Generates comprehensive analytics and blog-ready content from your GitHub activity,
including repositories from your personal account and all organizations.

Outputs:
- repos_{year}.json          — Full repository data with monthly activity
- analytics_{year}.json      — Computed insights, trends, and narratives
- timeline_{year}.svg        — Visual heatmap of activity
- blog_drafts_{year}.md      — Ready-to-use blog post templates
- monthly_recap_{year}.md    — Month-by-month narrative summaries
- project_spotlights_{year}/ — Individual project deep-dives
"""

from __future__ import annotations

import argparse
import datetime as dt
import json
import os
import re
import sys
import time
from collections import defaultdict
from dataclasses import dataclass, asdict, field
from typing import Any, Dict, List, Optional, Tuple
from pathlib import Path

import requests

GITHUB_API = "https://api.github.com"

# ─────────────────────────────────────────────────────────────────────────────
# Language/Technology Categorization
# ─────────────────────────────────────────────────────────────────────────────

LANGUAGE_CATEGORIES = {
    "web_frontend": ["JavaScript", "TypeScript", "HTML", "CSS", "Vue", "Svelte"],
    "web_backend": ["Python", "Go", "Rust", "Ruby", "Java", "C#", "PHP"],
    "mobile": ["Swift", "Kotlin", "Dart", "Objective-C"],
    "systems": ["C", "C++", "Rust", "Assembly", "Zig"],
    "data_ml": ["Python", "Jupyter Notebook", "R", "Julia"],
    "devops": ["Shell", "Dockerfile", "HCL", "Nix"],
    "embedded": ["C", "C++", "Rust", "MicroPython", "Arduino"],
}

TOPIC_THEMES = {
    "ai_ml": ["machine-learning", "ai", "artificial-intelligence", "llm", "deep-learning", 
              "neural-network", "transformers", "gpt", "nlp", "computer-vision", "ml"],
    "xr": ["vr", "ar", "xr", "virtual-reality", "augmented-reality", "mixed-reality",
           "unity", "unreal", "oculus", "quest", "hololens", "webxr"],
    "robotics": ["robotics", "ros", "robot", "autonomous", "drone", "uav", "iot", "sensor"],
    "web": ["web", "frontend", "backend", "api", "rest", "graphql", "astro", "react", 
            "nextjs", "svelte", "vue"],
    "gamedev": ["game", "gamedev", "unity", "godot", "unreal", "game-development"],
    "tools": ["cli", "tool", "automation", "productivity", "developer-tools", "devtools"],
    "accessibility": ["accessibility", "a11y", "assistive", "caption", "screen-reader"],
}

PROJECT_TYPE_SIGNALS = {
    "webapp": {"topics": ["web", "frontend", "webapp"], "files": ["index.html", "package.json"]},
    "api": {"topics": ["api", "rest", "graphql", "backend"], "language": ["Go", "Python", "Rust"]},
    "library": {"topics": ["library", "package", "module", "npm", "pypi", "crate"]},
    "cli": {"topics": ["cli", "command-line", "terminal"]},
    "extension": {"topics": ["extension", "plugin", "addon", "vscode"]},
    "ml_project": {"topics": ["machine-learning", "deep-learning", "model"]},
    "hardware": {"topics": ["arduino", "raspberry-pi", "embedded", "iot", "hardware"]},
}


# ─────────────────────────────────────────────────────────────────────────────
# Utility Functions
# ─────────────────────────────────────────────────────────────────────────────

def iso(d: dt.datetime) -> str:
    return d.replace(microsecond=0).isoformat() + "Z"


def month_key(y: int, m: int) -> str:
    return f"{y:04d}-{m:02d}"


def month_name(m: int) -> str:
    return ["", "January", "February", "March", "April", "May", "June",
            "July", "August", "September", "October", "November", "December"][m]


def quarter_name(m: int) -> str:
    return ["", "Q1", "Q1", "Q1", "Q2", "Q2", "Q2", "Q3", "Q3", "Q3", "Q4", "Q4", "Q4"][m]


def gh_headers(token: str) -> Dict[str, str]:
    return {
        "Authorization": f"token {token}",
        "Accept": "application/vnd.github+json",
        "X-GitHub-Api-Version": "2022-11-28",
        "User-Agent": "github-timeline-analytics",
    }


def gh_get(url: str, token: str, params: Optional[Dict[str, Any]] = None) -> requests.Response:
    r = requests.get(url, headers=gh_headers(token), params=params, timeout=60)
    if r.status_code >= 400:
        raise RuntimeError(f"GitHub API error {r.status_code} for {url}: {r.text[:4000]}")
    return r


def paginate(url: str, token: str, params: Optional[Dict[str, Any]] = None, max_pages: int = 10) -> List[Any]:
    items: List[Any] = []
    page = 1
    while page <= max_pages:
        p = dict(params or {})
        p.update({"per_page": 100, "page": page})
        r = gh_get(url, token, p)
        data = r.json()
        if isinstance(data, dict) and "items" in data:
            data = data["items"]
        if not data:
            break
        if not isinstance(data, list):
            raise RuntimeError(f"Unexpected pagination payload for {url}: {type(data)}")
        items.extend(data)
        if len(data) < 100:
            break
        page += 1
    return items


def get_viewer(token: str) -> Dict[str, Any]:
    return gh_get(f"{GITHUB_API}/user", token).json()


def list_user_repos(token: str) -> List[Dict[str, Any]]:
    return paginate(
        f"{GITHUB_API}/user/repos",
        token,
        params={
            "visibility": "all",
            "affiliation": "owner,collaborator,organization_member",
            "sort": "pushed",
            "direction": "desc",
        },
    )


def list_orgs(token: str) -> List[Dict[str, Any]]:
    return paginate(f"{GITHUB_API}/user/orgs", token)


def list_org_repos(org_login: str, token: str) -> List[Dict[str, Any]]:
    return paginate(
        f"{GITHUB_API}/orgs/{org_login}/repos",
        token,
        params={"type": "all", "sort": "pushed", "direction": "desc"},
    )


def month_range(year: int) -> List[Tuple[dt.datetime, dt.datetime, str]]:
    out = []
    for m in range(1, 13):
        start = dt.datetime(year, m, 1, 0, 0, 0, tzinfo=dt.timezone.utc)
        if m == 12:
            end = dt.datetime(year + 1, 1, 1, 0, 0, 0, tzinfo=dt.timezone.utc)
        else:
            end = dt.datetime(year, m + 1, 1, 0, 0, 0, tzinfo=dt.timezone.utc)
        out.append((start, end, month_key(year, m)))
    return out


def count_commits_via_search(full_name: str, token: str, start: dt.datetime, end: dt.datetime, 
                              max_retries: int = 3) -> int:
    """Uses GitHub Search API to count commits in a date range with rate limit handling."""
    q = f"repo:{full_name} committer-date:{start.date().isoformat()}..{(end - dt.timedelta(days=1)).date().isoformat()}"
    url = f"{GITHUB_API}/search/commits"
    headers = gh_headers(token)
    headers["Accept"] = "application/vnd.github.cloak-preview+json, application/vnd.github+json"
    
    for attempt in range(max_retries):
        r = requests.get(url, headers=headers, params={"q": q, "per_page": 1}, timeout=60)
        
        if r.status_code == 422:
            return 0
        
        if r.status_code == 403:
            # Rate limited - check headers for reset time
            reset_time = r.headers.get("X-RateLimit-Reset")
            if reset_time:
                wait_seconds = max(int(reset_time) - int(time.time()), 1)
                wait_seconds = min(wait_seconds, 65)  # Cap at ~1 minute
            else:
                wait_seconds = 30 * (attempt + 1)
            
            if attempt < max_retries - 1:
                print(f"\n    [!] Rate limited. Waiting {wait_seconds}s...", end="", flush=True)
                time.sleep(wait_seconds)
                print(" retrying...", end=" ", flush=True)
                continue
            else:
                # Give up after retries, return 0 instead of crashing
                print(f"\n    [!] Rate limit exceeded, skipping rest of repo")
                return 0
        
        if r.status_code >= 400:
            if attempt < max_retries - 1:
                time.sleep(5)
                continue
            return 0
        
        # Add small delay between successful requests to avoid hitting limits
        time.sleep(0.5)
        
        data = r.json()
        return int(data.get("total_count", 0) or 0)
    
    return 0


def get_repo_languages(full_name: str, token: str) -> Dict[str, int]:
    """Get language breakdown (bytes) for a repository."""
    try:
        r = gh_get(f"{GITHUB_API}/repos/{full_name}/languages", token)
        return r.json()
    except Exception:
        return {}


def get_repo_contributors_count(full_name: str, token: str) -> int:
    """Get contributor count for a repository."""
    try:
        r = gh_get(f"{GITHUB_API}/repos/{full_name}/contributors", token, params={"per_page": 1, "anon": "true"})
        # Check Link header for last page
        link_header = r.headers.get("Link", "")
        if 'rel="last"' in link_header:
            import re as regex
            match = regex.search(r'page=(\d+)>; rel="last"', link_header)
            if match:
                return int(match.group(1))
        return len(r.json())
    except Exception:
        return 1


# ─────────────────────────────────────────────────────────────────────────────
# Classification & Analysis
# ─────────────────────────────────────────────────────────────────────────────

def classify_repo_type(repo: Dict[str, Any]) -> str:
    """Classify repository type based on signals."""
    topics = set(t.lower() for t in (repo.get("topics") or []))
    language = repo.get("language") or ""
    name = repo.get("name", "").lower()
    
    # Check each project type
    for proj_type, signals in PROJECT_TYPE_SIGNALS.items():
        if any(t in topics for t in signals.get("topics", [])):
            return proj_type
        if language in signals.get("language", []):
            return proj_type
    
    # Fallback heuristics
    if "api" in name or "server" in name or "backend" in name:
        return "api"
    if "cli" in name or "tool" in name:
        return "cli"
    if repo.get("has_pages"):
        return "webapp"
    
    return "project"


def classify_repo_maturity(repo: Dict[str, Any], total_commits: int) -> str:
    """Classify repository maturity/stage."""
    size_kb = int(repo.get("size") or 0)
    forks = int(repo.get("forks_count") or 0)
    issues = int(repo.get("open_issues_count") or 0)
    stars = int(repo.get("stargazers_count") or 0)
    archived = bool(repo.get("archived"))
    
    if archived:
        return "archived"
    
    # Production indicators
    if stars >= 10 or forks >= 5 or (size_kb >= 5000 and total_commits >= 50):
        return "production"
    
    # Active development
    if total_commits >= 20 or (size_kb >= 1000 and issues > 0):
        return "active"
    
    # Prototype/demo stage
    if total_commits >= 5 or size_kb >= 300:
        return "prototype"
    
    # Just started
    if total_commits >= 1:
        return "experiment"
    
    return "dormant"


def detect_themes(repo: Dict[str, Any]) -> List[str]:
    """Detect thematic areas from topics and language."""
    topics = set(t.lower() for t in (repo.get("topics") or []))
    language = repo.get("language") or ""
    detected = []
    
    for theme, keywords in TOPIC_THEMES.items():
        if any(kw in topics for kw in keywords):
            detected.append(theme)
    
    # Language-based inference
    if language in ["Python", "Jupyter Notebook"] and not detected:
        detected.append("data_ml")
    
    return detected


def detect_tech_stack(repo: Dict[str, Any], languages: Dict[str, int]) -> List[str]:
    """Detect technology stack from languages and topics."""
    topics = set(t.lower() for t in (repo.get("topics") or []))
    stack = []
    
    # From languages
    total_bytes = sum(languages.values()) or 1
    for lang, bytes_count in sorted(languages.items(), key=lambda x: x[1], reverse=True):
        if bytes_count / total_bytes > 0.1:  # At least 10% of codebase
            stack.append(lang)
    
    # From topics - specific frameworks/tools
    frameworks = ["react", "vue", "svelte", "astro", "nextjs", "django", "flask", 
                  "fastapi", "express", "nest", "spring", "rails", "unity", "godot"]
    for fw in frameworks:
        if fw in topics:
            stack.append(fw.title())
    
    return stack[:5]  # Top 5


# ─────────────────────────────────────────────────────────────────────────────
# Data Models
# ─────────────────────────────────────────────────────────────────────────────

@dataclass
class RepoActivity:
    full_name: str
    html_url: str
    private: bool
    pushed_at: str
    created_at: str
    updated_at: str
    description: str
    language: str
    languages_breakdown: Dict[str, int]
    topics: List[str]
    
    # Classifications
    project_type: str
    maturity: str
    themes: List[str]
    tech_stack: List[str]
    
    # Activity metrics
    commits_by_month: Dict[str, int]
    total_year_commits: int
    
    # Additional metadata
    owner: str
    name: str
    stargazers_count: int
    forks_count: int
    open_issues_count: int
    has_pages: bool
    contributors_count: int
    
    # Computed fields
    is_new_this_year: bool
    first_active_month: Optional[str]
    peak_month: Optional[str]
    activity_pattern: str  # "steady", "burst", "growing", "declining", "sporadic"
    
    def to_dict(self):
        return asdict(self)


@dataclass
class YearAnalytics:
    year: int
    generated_at: str
    
    # Summary stats
    total_repos: int
    active_repos: int
    new_repos: int
    total_commits: int
    
    # Distributions
    commits_by_month: Dict[str, int]
    commits_by_quarter: Dict[str, int]
    repos_by_type: Dict[str, int]
    repos_by_maturity: Dict[str, int]
    repos_by_theme: Dict[str, int]
    languages_used: Dict[str, int]  # Count of repos per language
    
    # Top lists
    top_repos_by_commits: List[str]
    top_languages: List[str]
    top_themes: List[str]
    
    # Patterns
    most_active_month: str
    least_active_month: str
    busiest_quarter: str
    new_languages_explored: List[str]
    
    # Narratives (for blog content)
    monthly_narratives: Dict[str, str]
    quarterly_summaries: Dict[str, str]
    year_summary: str
    highlights: List[str]
    
    def to_dict(self):
        return asdict(self)


# ─────────────────────────────────────────────────────────────────────────────
# Activity Analysis
# ─────────────────────────────────────────────────────────────────────────────

def analyze_activity_pattern(commits_by_month: Dict[str, int]) -> str:
    """Determine the activity pattern for a repository."""
    values = list(commits_by_month.values())
    if sum(values) == 0:
        return "dormant"
    
    non_zero = [v for v in values if v > 0]
    if len(non_zero) <= 2:
        return "burst"
    
    # Check for trend
    first_half = sum(values[:6])
    second_half = sum(values[6:])
    
    if second_half > first_half * 1.5:
        return "growing"
    elif first_half > second_half * 1.5:
        return "declining"
    
    # Check for consistency
    if len(non_zero) >= 8:
        return "steady"
    
    return "sporadic"


def build_activity(year: int, repos: List[Dict[str, Any]], token: str, 
                   fetch_languages: bool = True, 
                   offset: int = 0, limit: int = 0,
                   progress_file: Optional[Path] = None,
                   existing_activity: Optional[List[RepoActivity]] = None) -> List[RepoActivity]:
    """Build detailed activity records for all repositories with chunked progress."""
    months = month_range(year)
    out: List[RepoActivity] = list(existing_activity or [])
    processed_names = {r.full_name for r in out}
    
    # Apply offset and limit
    total_repos = len(repos)
    start_idx = offset
    end_idx = total_repos if limit == 0 else min(offset + limit, total_repos)
    repos_to_process = repos[start_idx:end_idx]
    
    print(f"Processing repos {start_idx+1}-{end_idx} of {total_repos}...")
    if out:
        print(f"  (Resuming with {len(out)} already processed)")
    
    for i, repo in enumerate(repos_to_process):
        full_name = repo["full_name"]
        
        # Skip if already processed (for resume)
        if full_name in processed_names:
            print(f"  [{start_idx+i+1}/{total_repos}] {full_name}... [SKIP] (already done)")
            continue
        
        print(f"  [{start_idx+i+1}/{total_repos}] {full_name}...", end=" ", flush=True)
        
        # Count commits by month
        commits: Dict[str, int] = {}
        for start, end, key in months:
            commits[key] = count_commits_via_search(full_name, token, start, end)
        
        total_commits = sum(commits.values())
        
        # Get language breakdown
        languages = {}
        if fetch_languages and total_commits > 0:
            languages = get_repo_languages(full_name, token)
        
        # Get contributor count for active repos
        contributors = 1
        if total_commits >= 10:
            contributors = get_repo_contributors_count(full_name, token)
        
        print(f"[OK] ({total_commits} commits)")
        
        # Parse owner/name
        owner, name = full_name.split("/", 1) if "/" in full_name else ("", full_name)
        
        # Determine if new this year
        created = repo.get("created_at", "")
        is_new = created.startswith(str(year)) if created else False
        
        # Find first active month and peak month
        active_months = [(k, v) for k, v in commits.items() if v > 0]
        first_active = active_months[0][0] if active_months else None
        peak = max(commits.items(), key=lambda x: x[1])[0] if any(commits.values()) else None
        
        activity = RepoActivity(
            full_name=full_name,
            html_url=repo.get("html_url", ""),
            private=bool(repo.get("private")),
            pushed_at=repo.get("pushed_at") or "",
            created_at=repo.get("created_at") or "",
            updated_at=repo.get("updated_at") or "",
            description=(repo.get("description") or "").strip(),
            language=repo.get("language") or "",
            languages_breakdown=languages,
            topics=list(repo.get("topics") or []),
            project_type=classify_repo_type(repo),
            maturity=classify_repo_maturity(repo, total_commits),
            themes=detect_themes(repo),
            tech_stack=detect_tech_stack(repo, languages),
            commits_by_month=commits,
            total_year_commits=total_commits,
            owner=owner,
            name=name,
            stargazers_count=int(repo.get("stargazers_count") or 0),
            forks_count=int(repo.get("forks_count") or 0),
            open_issues_count=int(repo.get("open_issues_count") or 0),
            has_pages=bool(repo.get("has_pages")),
            contributors_count=contributors,
            is_new_this_year=is_new,
            first_active_month=first_active,
            peak_month=peak,
            activity_pattern=analyze_activity_pattern(commits),
        )
        out.append(activity)
        
        # Save progress after each repo (for resume capability)
        if progress_file:
            progress_file.write_text(
                json.dumps([r.to_dict() for r in out], indent=2), 
                encoding="utf-8"
            )
    
    # Sort by activity
    out.sort(key=lambda r: (r.total_year_commits, r.pushed_at), reverse=True)
    return out


def load_progress(progress_file: Path) -> List[RepoActivity]:
    """Load previously saved progress from a JSON file."""
    if not progress_file.exists():
        return []
    
    try:
        data = json.loads(progress_file.read_text(encoding="utf-8"))
        activities = []
        for item in data:
            activities.append(RepoActivity(
                full_name=item["full_name"],
                html_url=item["html_url"],
                private=item["private"],
                pushed_at=item["pushed_at"],
                created_at=item["created_at"],
                updated_at=item["updated_at"],
                description=item["description"],
                language=item["language"],
                languages_breakdown=item.get("languages_breakdown", {}),
                topics=item["topics"],
                project_type=item["project_type"],
                maturity=item["maturity"],
                themes=item["themes"],
                tech_stack=item["tech_stack"],
                commits_by_month=item["commits_by_month"],
                total_year_commits=item["total_year_commits"],
                owner=item["owner"],
                name=item["name"],
                stargazers_count=item["stargazers_count"],
                forks_count=item["forks_count"],
                open_issues_count=item["open_issues_count"],
                has_pages=item["has_pages"],
                contributors_count=item["contributors_count"],
                is_new_this_year=item["is_new_this_year"],
                first_active_month=item.get("first_active_month"),
                peak_month=item.get("peak_month"),
                activity_pattern=item["activity_pattern"],
            ))
        return activities
    except Exception as e:
        print(f"  [!] Could not load progress file: {e}")
        return []


# ─────────────────────────────────────────────────────────────────────────────
# Analytics Computation
# ─────────────────────────────────────────────────────────────────────────────

def compute_analytics(year: int, repos: List[RepoActivity]) -> YearAnalytics:
    """Compute year-level analytics from repository data."""
    
    # Basic counts
    active_repos = [r for r in repos if r.total_year_commits > 0]
    new_repos = [r for r in repos if r.is_new_this_year]
    total_commits = sum(r.total_year_commits for r in repos)
    
    # Aggregate commits by month
    commits_by_month: Dict[str, int] = defaultdict(int)
    for r in repos:
        for mk, count in r.commits_by_month.items():
            commits_by_month[mk] += count
    
    # Commits by quarter
    commits_by_quarter: Dict[str, int] = defaultdict(int)
    for mk, count in commits_by_month.items():
        m = int(mk.split("-")[1])
        q = quarter_name(m)
        commits_by_quarter[q] += count
    
    # Distributions
    repos_by_type: Dict[str, int] = defaultdict(int)
    repos_by_maturity: Dict[str, int] = defaultdict(int)
    repos_by_theme: Dict[str, int] = defaultdict(int)
    languages_used: Dict[str, int] = defaultdict(int)
    
    for r in active_repos:
        repos_by_type[r.project_type] += 1
        repos_by_maturity[r.maturity] += 1
        for theme in r.themes:
            repos_by_theme[theme] += 1
        if r.language:
            languages_used[r.language] += 1
    
    # Top lists
    top_repos = [r.full_name for r in sorted(active_repos, 
                 key=lambda x: x.total_year_commits, reverse=True)[:10]]
    top_langs = sorted(languages_used.items(), key=lambda x: x[1], reverse=True)[:5]
    top_themes = sorted(repos_by_theme.items(), key=lambda x: x[1], reverse=True)[:5]
    
    # Find most/least active months
    sorted_months = sorted(commits_by_month.items(), key=lambda x: x[1], reverse=True)
    most_active = sorted_months[0][0] if sorted_months else ""
    least_active = sorted_months[-1][0] if sorted_months else ""
    
    # Busiest quarter
    busiest_q = max(commits_by_quarter.items(), key=lambda x: x[1])[0] if commits_by_quarter else ""
    
    # Generate narratives
    monthly_narratives = generate_monthly_narratives(year, repos, commits_by_month)
    quarterly_summaries = generate_quarterly_summaries(year, repos, commits_by_quarter)
    year_summary = generate_year_summary(year, repos, total_commits, active_repos)
    highlights = generate_highlights(year, repos, active_repos)
    
    return YearAnalytics(
        year=year,
        generated_at=dt.datetime.now(dt.timezone.utc).isoformat(),
        total_repos=len(repos),
        active_repos=len(active_repos),
        new_repos=len(new_repos),
        total_commits=total_commits,
        commits_by_month=dict(commits_by_month),
        commits_by_quarter=dict(commits_by_quarter),
        repos_by_type=dict(repos_by_type),
        repos_by_maturity=dict(repos_by_maturity),
        repos_by_theme=dict(repos_by_theme),
        languages_used=dict(languages_used),
        top_repos_by_commits=top_repos,
        top_languages=[l[0] for l in top_langs],
        top_themes=[t[0] for t in top_themes],
        most_active_month=most_active,
        least_active_month=least_active,
        busiest_quarter=busiest_q,
        new_languages_explored=[],  # Could be computed with historical data
        monthly_narratives=monthly_narratives,
        quarterly_summaries=quarterly_summaries,
        year_summary=year_summary,
        highlights=highlights,
    )


def generate_monthly_narratives(year: int, repos: List[RepoActivity], 
                                 commits_by_month: Dict[str, int]) -> Dict[str, str]:
    """Generate narrative summaries for each month."""
    narratives = {}
    
    for m in range(1, 13):
        mk = month_key(year, m)
        month_commits = commits_by_month.get(mk, 0)
        
        if month_commits == 0:
            narratives[mk] = f"A quiet month with no recorded commits."
            continue
        
        # Find active repos this month
        active_this_month = [
            r for r in repos 
            if r.commits_by_month.get(mk, 0) > 0
        ]
        active_this_month.sort(key=lambda x: x.commits_by_month.get(mk, 0), reverse=True)
        
        # Find new repos started this month
        new_this_month = [
            r for r in repos 
            if r.created_at.startswith(mk)
        ]
        
        # Build narrative
        parts = [f"**{month_commits} commits** across **{len(active_this_month)} repositories**."]
        
        if new_this_month:
            names = ", ".join(r.name for r in new_this_month[:3])
            parts.append(f"Started new projects: {names}.")
        
        if active_this_month:
            top = active_this_month[0]
            parts.append(f"Most active: **{top.name}** ({top.commits_by_month.get(mk, 0)} commits).")
        
        # Theme focus
        themes_this_month = set()
        for r in active_this_month:
            themes_this_month.update(r.themes)
        if themes_this_month:
            parts.append(f"Focus areas: {', '.join(sorted(themes_this_month)[:3])}.")
        
        narratives[mk] = " ".join(parts)
    
    return narratives


def generate_quarterly_summaries(year: int, repos: List[RepoActivity],
                                  commits_by_quarter: Dict[str, int]) -> Dict[str, str]:
    """Generate quarterly summary paragraphs."""
    summaries = {}
    quarter_months = {
        "Q1": [1, 2, 3],
        "Q2": [4, 5, 6],
        "Q3": [7, 8, 9],
        "Q4": [10, 11, 12],
    }
    
    for q, months in quarter_months.items():
        total = commits_by_quarter.get(q, 0)
        
        # Find new repos in this quarter
        new_in_q = [
            r for r in repos
            if any(r.created_at.startswith(month_key(year, m)) for m in months)
        ]
        
        # Active repos in quarter
        active_in_q = [
            r for r in repos
            if any(r.commits_by_month.get(month_key(year, m), 0) > 0 for m in months)
        ]
        
        # Build summary
        parts = [f"{q} {year}: **{total} commits** across **{len(active_in_q)} repositories**."]
        
        if new_in_q:
            parts.append(f"Launched {len(new_in_q)} new project(s).")
        
        # Top project
        if active_in_q:
            q_commits = lambda r: sum(r.commits_by_month.get(month_key(year, m), 0) for m in months)
            top = max(active_in_q, key=q_commits)
            parts.append(f"Top focus: **{top.name}**.")
        
        summaries[q] = " ".join(parts)
    
    return summaries


def generate_year_summary(year: int, repos: List[RepoActivity], 
                          total_commits: int, active_repos: List[RepoActivity]) -> str:
    """Generate a comprehensive year summary paragraph."""
    new_count = len([r for r in repos if r.is_new_this_year])
    
    # Primary languages
    lang_counts: Dict[str, int] = defaultdict(int)
    for r in active_repos:
        if r.language:
            lang_counts[r.language] += r.total_year_commits
    top_langs = sorted(lang_counts.items(), key=lambda x: x[1], reverse=True)[:3]
    lang_str = ", ".join(l[0] for l in top_langs)
    
    # Primary themes
    theme_counts: Dict[str, int] = defaultdict(int)
    for r in active_repos:
        for t in r.themes:
            theme_counts[t] += r.total_year_commits
    top_themes = sorted(theme_counts.items(), key=lambda x: x[1], reverse=True)[:3]
    theme_str = ", ".join(t[0].replace("_", " ") for t in top_themes)
    
    summary = f"""In {year}, I contributed **{total_commits} commits** across **{len(active_repos)} active repositories**. 
I started **{new_count} new projects** and worked primarily with {lang_str}. 
My main focus areas were: {theme_str}."""
    
    return summary.strip()


def generate_highlights(year: int, repos: List[RepoActivity], 
                        active_repos: List[RepoActivity]) -> List[str]:
    """Generate notable highlights for the year."""
    highlights = []
    
    # Most active repo
    if active_repos:
        top = active_repos[0]
        highlights.append(f"Most active project: **{top.name}** with {top.total_year_commits} commits")
    
    # New project count
    new_repos = [r for r in repos if r.is_new_this_year]
    if new_repos:
        highlights.append(f"Launched {len(new_repos)} new projects")
    
    # Production-ready projects
    prod = [r for r in active_repos if r.maturity == "production"]
    if prod:
        highlights.append(f"{len(prod)} project(s) reached production maturity")
    
    # Languages explored
    langs = set(r.language for r in active_repos if r.language)
    if len(langs) >= 3:
        highlights.append(f"Worked across {len(langs)} programming languages")
    
    # Steady contributors
    steady = [r for r in active_repos if r.activity_pattern == "steady"]
    if steady:
        highlights.append(f"{len(steady)} project(s) with consistent year-round activity")
    
    # External interest (stars/forks)
    starred = [r for r in active_repos if r.stargazers_count > 0]
    if starred:
        total_stars = sum(r.stargazers_count for r in starred)
        highlights.append(f"{total_stars} total stars across projects")
    
    return highlights


# ─────────────────────────────────────────────────────────────────────────────
# Output Generation
# ─────────────────────────────────────────────────────────────────────────────

def svg_timeline(year: int, items: List[RepoActivity]) -> str:
    """Produces a clean SVG timeline visualization."""
    months = [month_key(year, m) for m in range(1, 13)]
    active_items = [r for r in items if sum(r.commits_by_month.values()) > 0]
    
    if not active_items:
        return '<svg xmlns="http://www.w3.org/2000/svg" width="800" height="200"><text x="400" y="100" text-anchor="middle" font-family="system-ui">No activity found</text></svg>'
    
    # Layout
    row_h = 24
    left_w = 320
    cell_w = 36
    top_h = 80
    padding = 24
    w = padding * 2 + left_w + cell_w * len(months)
    h = padding * 2 + top_h + row_h * len(active_items) + 40  # Extra for legend
    
    def bucket(c: int) -> int:
        if c <= 0: return 0
        if c == 1: return 1
        if c <= 3: return 2
        if c <= 7: return 3
        if c <= 15: return 4
        return 5
    
    # Modern color palette (teal-based)
    fills = {
        0: "#f8fafc",
        1: "#ccfbf1",
        2: "#5eead4",
        3: "#14b8a6",
        4: "#0d9488",
        5: "#0f766e",
    }
    
    def esc(s: str) -> str:
        return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;")
    
    month_labels = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]
    
    svg = [f'<svg xmlns="http://www.w3.org/2000/svg" width="{w}" height="{h}" viewBox="0 0 {w} {h}">']
    svg.append('<defs>')
    svg.append('<style>')
    svg.append('@import url("https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&amp;display=swap");')
    svg.append('.title { font-family: Inter, system-ui, sans-serif; font-weight: 700; }')
    svg.append('.subtitle { font-family: Inter, system-ui, sans-serif; font-weight: 400; }')
    svg.append('.label { font-family: Inter, system-ui, sans-serif; font-weight: 500; }')
    svg.append('.meta { font-family: Inter, system-ui, sans-serif; font-weight: 400; }')
    svg.append('</style>')
    svg.append('</defs>')
    
    # Background
    svg.append(f'<rect x="0" y="0" width="{w}" height="{h}" fill="#ffffff"/>')
    
    # Title
    svg.append(f'<text x="{padding}" y="{padding+24}" class="title" font-size="22" fill="#0f172a">GitHub Activity • {year}</text>')
    svg.append(f'<text x="{padding}" y="{padding+46}" class="subtitle" font-size="13" fill="#64748b">Repository commit activity by month</text>')
    
    # Legend
    legend_y = padding + 56
    svg.append(f'<text x="{padding}" y="{legend_y}" class="meta" font-size="11" fill="#94a3b8">Less</text>')
    for i, (level, color) in enumerate(sorted(fills.items())):
        x_leg = padding + 36 + i * 22
        svg.append(f'<rect x="{x_leg}" y="{legend_y-10}" width="18" height="12" rx="2" fill="{color}" stroke="#e2e8f0" stroke-width="0.5"/>')
    svg.append(f'<text x="{padding+175}" y="{legend_y}" class="meta" font-size="11" fill="#94a3b8">More</text>')
    
    # Month headers
    x0 = padding + left_w
    y0 = padding + top_h
    for i, lab in enumerate(month_labels):
        x = x0 + i * cell_w + cell_w / 2
        svg.append(f'<text x="{x:.1f}" y="{padding+top_h-8}" text-anchor="middle" class="label" font-size="11" fill="#475569">{lab}</text>')
    
    # Rows
    for r_i, repo in enumerate(active_items):
        y = y0 + r_i * row_h
        total = sum(repo.commits_by_month.values())
        
        # Repo link and metadata
        display_name = repo.name if len(repo.name) <= 28 else repo.name[:25] + "..."
        svg.append(f'<a href="{esc(repo.html_url)}" target="_blank">')
        svg.append(f'<text x="{padding}" y="{y+16}" class="label" font-size="12" fill="#0284c7">{esc(display_name)}</text>')
        svg.append('</a>')
        
        # Badges
        badge_x = padding + 200
        if repo.is_new_this_year:
            svg.append(f'<rect x="{badge_x}" y="{y+5}" width="32" height="14" rx="3" fill="#dbeafe"/>')
            svg.append(f'<text x="{badge_x+16}" y="{y+14}" text-anchor="middle" class="meta" font-size="9" fill="#1d4ed8">NEW</text>')
            badge_x += 38
        
        svg.append(f'<text x="{padding+280}" y="{y+16}" class="meta" font-size="11" fill="#94a3b8">{total}</text>')
        
        # Month cells
        for m_i, mk in enumerate(months):
            c = repo.commits_by_month.get(mk, 0)
            b = bucket(c)
            fill = fills[b]
            x = x0 + m_i * cell_w
            svg.append(f'<rect x="{x+2}" y="{y+3}" width="{cell_w-4}" height="{row_h-6}" rx="3" fill="{fill}" stroke="#e2e8f0" stroke-width="0.5">')
            svg.append(f'<title>{esc(repo.name)} • {mk}: {c} commits</title>')
            svg.append('</rect>')
    
    svg.append("</svg>")
    return "\n".join(svg)


def generate_blog_drafts(year: int, repos: List[RepoActivity], 
                         analytics: YearAnalytics) -> str:
    """Generate ready-to-use blog post templates."""
    lines = [
        f"---",
        f"# Blog Draft Templates for {year}",
        f"# Generated: {dt.datetime.now().isoformat()}",
        f"# Use these as starting points for your blog posts",
        f"---",
        "",
        "# Year in Review: My {year} GitHub Journey",
        "",
        "## Introduction",
        "",
        analytics.year_summary,
        "",
        "## Highlights",
        "",
    ]
    
    for h in analytics.highlights:
        lines.append(f"- {h}")
    
    lines.extend([
        "",
        "## Quarterly Breakdown",
        "",
    ])
    
    for q in ["Q1", "Q2", "Q3", "Q4"]:
        lines.append(f"### {q}")
        lines.append("")
        lines.append(analytics.quarterly_summaries.get(q, ""))
        lines.append("")
    
    lines.extend([
        "## Project Spotlights",
        "",
    ])
    
    # Top 5 projects
    top_repos = [r for r in repos if r.total_year_commits > 0][:5]
    for repo in top_repos:
        lines.extend([
            f"### {repo.name}",
            "",
            f"**{repo.total_year_commits} commits** | {repo.project_type} | {repo.maturity}",
            "",
            f"> {repo.description or 'No description'}",
            "",
            f"- **Tech stack:** {', '.join(repo.tech_stack) or repo.language or 'N/A'}",
            f"- **Themes:** {', '.join(repo.themes) or 'General'}",
            f"- **Activity pattern:** {repo.activity_pattern}",
            f"- **Link:** [{repo.full_name}]({repo.html_url})",
            "",
        ])
    
    lines.extend([
        "## Technologies Used",
        "",
    ])
    
    for lang in analytics.top_languages:
        count = analytics.languages_used.get(lang, 0)
        lines.append(f"- **{lang}**: {count} repositories")
    
    lines.extend([
        "",
        "## Looking Ahead",
        "",
        "<!-- Write about your goals for next year -->",
        "",
        "---",
        "",
        "*This post was generated with data from the GitHub Timeline Analytics tool.*",
    ])
    
    return "\n".join(lines)


def generate_monthly_recap(year: int, repos: List[RepoActivity], 
                           analytics: YearAnalytics) -> str:
    """Generate detailed monthly recap document."""
    lines = [
        f"# Monthly Recap: {year}",
        "",
        f"*Generated: {dt.datetime.now().strftime('%B %d, %Y')}*",
        "",
    ]
    
    for m in range(1, 13):
        mk = month_key(year, m)
        mname = month_name(m)
        
        lines.extend([
            f"## {mname} {year}",
            "",
            analytics.monthly_narratives.get(mk, "No data available."),
            "",
        ])
        
        # Active repos this month
        active = [r for r in repos if r.commits_by_month.get(mk, 0) > 0]
        active.sort(key=lambda x: x.commits_by_month.get(mk, 0), reverse=True)
        
        if active:
            lines.append("### Active Repositories")
            lines.append("")
            lines.append("| Repository | Commits | Type | Pattern |")
            lines.append("|------------|---------|------|---------|")
            for r in active[:10]:
                commits = r.commits_by_month.get(mk, 0)
                lines.append(f"| [{r.name}]({r.html_url}) | {commits} | {r.project_type} | {r.activity_pattern} |")
            lines.append("")
        
        # New repos this month
        new_this_month = [r for r in repos if r.created_at.startswith(mk)]
        if new_this_month:
            lines.append("### New Projects")
            lines.append("")
            for r in new_this_month:
                lines.append(f"- **{r.name}**: {r.description or 'No description'}")
            lines.append("")
        
        lines.append("---")
        lines.append("")
    
    return "\n".join(lines)


def generate_project_spotlights(year: int, repos: List[RepoActivity], 
                                 out_dir: Path) -> None:
    """Generate individual spotlight files for top projects."""
    spotlight_dir = out_dir / f"project_spotlights_{year}"
    spotlight_dir.mkdir(exist_ok=True)
    
    top_repos = [r for r in repos if r.total_year_commits >= 10][:20]
    
    for repo in top_repos:
        filename = f"{repo.name.lower().replace(' ', '-')}.md"
        
        # Monthly activity chart (text-based)
        months_short = ["J","F","M","A","M","J","J","A","S","O","N","D"]
        activity_chart = ""
        for i, m in enumerate(range(1, 13)):
            mk = month_key(year, m)
            c = repo.commits_by_month.get(mk, 0)
            bar = "█" * min(c // 2, 10) if c > 0 else "░"
            activity_chart += f"{months_short[i]}: {bar} ({c})\n"
        
        content = f"""---
title: "{repo.name}"
description: "{repo.description or 'Project spotlight'}"
year: {year}
commits: {repo.total_year_commits}
type: "{repo.project_type}"
maturity: "{repo.maturity}"
languages: {json.dumps(repo.tech_stack)}
themes: {json.dumps(repo.themes)}
---

# {repo.name}

{repo.description or 'No description available.'}

## Overview

| Metric | Value |
|--------|-------|
| Total Commits ({year}) | {repo.total_year_commits} |
| Project Type | {repo.project_type} |
| Maturity | {repo.maturity} |
| Activity Pattern | {repo.activity_pattern} |
| Primary Language | {repo.language or 'N/A'} |
| Contributors | {repo.contributors_count} |
| Stars | {repo.stargazers_count} |
| Forks | {repo.forks_count} |

## Tech Stack

{', '.join(repo.tech_stack) if repo.tech_stack else repo.language or 'Not specified'}

## Themes

{', '.join(t.replace('_', ' ').title() for t in repo.themes) if repo.themes else 'General development'}

## Activity Timeline

```
{activity_chart}
```

## Links

- **Repository:** [{repo.full_name}]({repo.html_url})
{'- **Live Site:** ' + repo.html_url.replace('github.com', 'github.io').replace(repo.owner + '/', '') if repo.has_pages else ''}

## Blog Post Ideas

- [ ] Deep dive into the architecture of {repo.name}
- [ ] Lessons learned building {repo.name}
- [ ] Technical challenges and solutions in {repo.name}
"""
        
        (spotlight_dir / filename).write_text(content, encoding="utf-8")
    
    print(f"   [OK] Generated {len(top_repos)} project spotlight files")


def generate_data_files(year: int, repos: List[RepoActivity], 
                        analytics: YearAnalytics, out_dir: Path) -> None:
    """Generate data files for use in Astro/React components."""
    
    # Minimal repo data for timeline component
    timeline_data = [
        {
            "name": r.name,
            "fullName": r.full_name,
            "url": r.html_url,
            "commits": r.commits_by_month,
            "total": r.total_year_commits,
            "type": r.project_type,
            "isNew": r.is_new_this_year,
            "language": r.language,
        }
        for r in repos if r.total_year_commits > 0
    ]
    
    (out_dir / f"timeline_data_{year}.json").write_text(
        json.dumps(timeline_data, indent=2), encoding="utf-8"
    )
    
    # Summary stats for dashboard
    dashboard_data = {
        "year": year,
        "totalCommits": analytics.total_commits,
        "activeRepos": analytics.active_repos,
        "newRepos": analytics.new_repos,
        "commitsByMonth": analytics.commits_by_month,
        "topRepos": analytics.top_repos_by_commits[:5],
        "topLanguages": analytics.top_languages,
        "topThemes": analytics.top_themes,
        "highlights": analytics.highlights,
    }
    
    (out_dir / f"dashboard_{year}.json").write_text(
        json.dumps(dashboard_data, indent=2), encoding="utf-8"
    )
    
    print(f"   [OK] Generated component data files")


# ─────────────────────────────────────────────────────────────────────────────
# Main
# ─────────────────────────────────────────────────────────────────────────────

def main():
    ap = argparse.ArgumentParser(
        description="Generate comprehensive GitHub activity analytics for blogging",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python github_2025_timeline.py --year 2025 --out out
  python github_2025_timeline.py --limit 10                    # Process first 10 repos only
  python github_2025_timeline.py --limit 20 --offset 10        # Process repos 11-30
  python github_2025_timeline.py --resume                      # Continue from last run
  python github_2025_timeline.py --skip-languages              # Faster (skip language API calls)

Output files:
  repos_{year}.json           Full repository data
  analytics_{year}.json       Computed insights and narratives
  timeline_{year}.svg         Visual activity heatmap
  blog_drafts_{year}.md       Ready-to-use blog templates
  monthly_recap_{year}.md     Month-by-month summaries
  timeline_data_{year}.json   Data for web components
  dashboard_{year}.json       Summary stats for dashboards
  project_spotlights_{year}/  Individual project deep-dives
        """
    )
    ap.add_argument("--year", type=int, default=2025, help="Year to analyze")
    ap.add_argument("--out", type=str, default="out", help="Output directory")
    ap.add_argument("--skip-languages", action="store_true", 
                    help="Skip fetching language breakdowns (faster)")
    ap.add_argument("--limit", type=int, default=0,
                    help="Process only first N repos (0 = all)")
    ap.add_argument("--offset", type=int, default=0,
                    help="Start from repo index N (for resuming)")
    ap.add_argument("--resume", action="store_true",
                    help="Resume from previous partial run (uses progress file)")
    ap.add_argument("--exclude", type=str, nargs="*", default=[],
                    help="Exclude repos matching these names (partial match)")
    args = ap.parse_args()
    
    token = os.environ.get("GITHUB_TOKEN", "").strip()
    if not token:
        print("ERROR: Set GITHUB_TOKEN in your environment.", file=sys.stderr)
        print("  PowerShell: $env:GITHUB_TOKEN='ghp_...'", file=sys.stderr)
        print("  Bash: export GITHUB_TOKEN='ghp_...'", file=sys.stderr)
        sys.exit(2)
    
    year = args.year
    out_dir = Path(args.out)
    out_dir.mkdir(parents=True, exist_ok=True)
    
    print(f"\n{'=' * 60}")
    print(f"  GitHub Year-in-Review Analytics")
    print(f"  Analyzing: {year}")
    print(f"{'=' * 60}\n")
    
    # Authenticate
    print("[*] Authenticating with GitHub...")
    viewer = get_viewer(token)
    viewer_login = viewer.get("login")
    print(f"    Authenticated as: {viewer_login}\n")
    
    # Fetch repos
    print("[*] Fetching repositories...")
    user_repos = list_user_repos(token)
    print(f"   Found {len(user_repos)} user repositories")
    
    orgs = list_orgs(token)
    print(f"   Found {len(orgs)} organizations")
    
    org_repos: List[Dict[str, Any]] = []
    for o in orgs:
        login = o.get("login")
        if not login:
            continue
        try:
            repos_list = list_org_repos(login, token)
            org_repos.extend(repos_list)
            print(f"     - {login}: {len(repos_list)} repos")
        except Exception as e:
            print(f"     [!] {login}: Failed ({e})", file=sys.stderr)
    
    # Merge and dedupe
    merged: Dict[str, Dict[str, Any]] = {}
    for r in (user_repos + org_repos):
        merged[r["full_name"]] = r
    repos = sorted(merged.values(), key=lambda r: r.get("pushed_at") or "", reverse=True)
    print(f"\n   Total unique repositories: {len(repos)}")
    
    # Filter out excluded repos
    if args.exclude:
        before_count = len(repos)
        repos = [r for r in repos if not any(excl.lower() in r["full_name"].lower() for excl in args.exclude)]
        excluded_count = before_count - len(repos)
        print(f"   Excluded {excluded_count} repos matching: {', '.join(args.exclude)}")
    print()
    
    # Progress file for resume capability
    progress_file = out_dir / f"_progress_{year}.json"
    
    # Load existing progress if resuming
    existing_activity: List[RepoActivity] = []
    if args.resume and progress_file.exists():
        print(f"[*] Loading previous progress...")
        existing_activity = load_progress(progress_file)
        print(f"    Found {len(existing_activity)} previously processed repos\n")
    
    # Build activity
    print(f"[*] Analyzing {year} activity...")
    if args.limit > 0:
        print(f"    Processing repos {args.offset + 1} to {min(args.offset + args.limit, len(repos))}")
    print(f"    (Progress saved after each repo - use --resume to continue)\n")
    
    activity = build_activity(
        year, repos, token, 
        fetch_languages=not args.skip_languages,
        offset=args.offset,
        limit=args.limit,
        progress_file=progress_file,
        existing_activity=existing_activity
    )
    
    # Compute analytics
    print(f"\n[*] Computing analytics...")
    analytics = compute_analytics(year, activity)
    
    # Generate outputs
    print(f"\n[*] Generating output files...\n")
    
    # Core data files
    (out_dir / f"repos_{year}.json").write_text(
        json.dumps([r.to_dict() for r in activity], indent=2), encoding="utf-8"
    )
    print(f"   [OK] repos_{year}.json")
    
    (out_dir / f"analytics_{year}.json").write_text(
        json.dumps(analytics.to_dict(), indent=2), encoding="utf-8"
    )
    print(f"   [OK] analytics_{year}.json")
    
    # Visual timeline
    (out_dir / f"timeline_{year}.svg").write_text(
        svg_timeline(year, activity), encoding="utf-8"
    )
    print(f"   [OK] timeline_{year}.svg")
    
    # Blog content
    (out_dir / f"blog_drafts_{year}.md").write_text(
        generate_blog_drafts(year, activity, analytics), encoding="utf-8"
    )
    print(f"   [OK] blog_drafts_{year}.md")
    
    (out_dir / f"monthly_recap_{year}.md").write_text(
        generate_monthly_recap(year, activity, analytics), encoding="utf-8"
    )
    print(f"   [OK] monthly_recap_{year}.md")
    
    # Component data
    generate_data_files(year, activity, analytics, out_dir)
    
    # Project spotlights
    generate_project_spotlights(year, activity, out_dir)
    
    # Summary
    print(f"\n{'=' * 60}")
    print(f"  Summary")
    print(f"{'=' * 60}")
    print(f"  Total repositories:     {analytics.total_repos}")
    print(f"  Active in {year}:        {analytics.active_repos}")
    print(f"  New projects:           {analytics.new_repos}")
    print(f"  Total commits:          {analytics.total_commits}")
    print(f"  Most active month:      {analytics.most_active_month}")
    print(f"  Top project:            {analytics.top_repos_by_commits[0] if analytics.top_repos_by_commits else 'N/A'}")
    print(f"  Top languages:          {', '.join(analytics.top_languages[:3])}")
    print(f"  Focus themes:           {', '.join(analytics.top_themes[:3])}")
    print(f"{'=' * 60}")
    print(f"\n  Output directory: {out_dir.absolute()}")
    
    # Show continuation instructions if not all repos were processed
    processed_count = len(activity)
    total_count = len(repos)
    if processed_count < total_count:
        remaining = total_count - processed_count
        print(f"\n  [!] Processed {processed_count} of {total_count} repos ({remaining} remaining)")
        print(f"      To continue: python scripts/github_2025_timeline.py --resume")
    else:
        # Clean up progress file if complete
        if progress_file.exists():
            progress_file.unlink()
        print(f"\n  [OK] All {total_count} repositories processed!")
    print()


if __name__ == "__main__":
    main()