#!/usr/bin/env python3
"""
Progressive Skills MCP Server

11 tools for managing a SKILL.md-compatible skills library with:
  - 3-level progressive disclosure  (list_skills → peek_skill → load_skill)
  - RPG-style mastery scoring        (5 dimensions, 0-100, Novice→Master)
  - SM-2 spaced repetition decay     (effective_score decays when skills go unreviewed)
  - Experience-weighted scoring      (use_count + update_count suppress effective_score until earned)
  - Usage-based feedback             (report_outcome → quality 0-5 → SM-2 update + usage_log.jsonl)
  - Session reflection loop          (reflect_on_session → create/update → record_assessment)
  - Post-update assessment prompt    (update_skill returns assessment prompt automatically)
  - Master index                     (index.json, auto-rebuilt on every write)
  - Fully spec-compliant SKILL.md    (compatible with OpenCode, Claude, agent hosts)

Config: ~/.config/skills-mcp/config.json
  { "skills_dir": "~/.skills" }
"""

import json
import math
import os
import re
import subprocess
import sys
from datetime import date, datetime, timedelta, timezone
from pathlib import Path


# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------

def _load_config() -> dict:
    path = Path.home() / ".config" / "skills-mcp" / "config.json"
    if path.exists():
        return json.loads(path.read_text())
    return {}


_CONFIG = _load_config()
SKILLS_DIR = Path(os.path.expanduser(_CONFIG.get("skills_dir", "~/.skills")))

_AI_REVIEW_CFG = _CONFIG.get("ai_review", {})
_AI_REVIEW_ENABLED: bool = _AI_REVIEW_CFG.get("enabled", False)
_AI_REVIEW_COMMAND: list[str] = _AI_REVIEW_CFG.get("command", ["gh", "copilot", "-p"])
_AI_REVIEW_TIMEOUT: int = int(_AI_REVIEW_CFG.get("timeout", 120))


# ---------------------------------------------------------------------------
# YAML frontmatter — hand-rolled, zero dependencies
#
# Handles the subset used by SKILL.md:
#   - top-level  key: value  and  key: "value"
#   - one-level nested block (metadata:)
#   - no lists, no multi-line values (not needed by the spec)
# ---------------------------------------------------------------------------

def _strip_quotes(s: str) -> str:
    s = s.strip()
    if len(s) >= 2 and s[0] in ('"', "'") and s[-1] == s[0]:
        return s[1:-1]
    return s


def _parse_yaml_block(lines: list[str], base_indent: int = 0) -> dict:
    result: dict = {}
    i = 0
    while i < len(lines):
        line = lines[i]
        raw = line.lstrip()
        if not raw or raw.startswith("#"):
            i += 1
            continue

        indent = len(line) - len(raw)
        if indent < base_indent:
            break
        if indent > base_indent:
            i += 1
            continue

        if ":" not in raw:
            i += 1
            continue

        key, _, tail = raw.partition(":")
        key = key.strip()
        tail = tail.strip()

        if tail:
            result[key] = _strip_quotes(tail)
            i += 1
        else:
            # Nested block — collect indented lines
            nested: list[str] = []
            j = i + 1
            while j < len(lines):
                nl = lines[j]
                if not nl.strip():
                    j += 1
                    continue
                ni = len(nl) - len(nl.lstrip())
                if ni <= base_indent:
                    break
                nested.append(nl)
                j += 1
            result[key] = _parse_yaml_block(nested, base_indent + 2) if nested else None
            i = j

    return result


def _parse_frontmatter(text: str) -> tuple[dict, str]:
    """Return (frontmatter_dict, body).  Falls back to ({}, text) if malformed."""
    if not text.startswith("---"):
        return {}, text
    lines = text.split("\n")
    close = None
    for idx, line in enumerate(lines[1:], 1):
        if line.rstrip() == "---":
            close = idx
            break
    if close is None:
        return {}, text
    fm = _parse_yaml_block(lines[1:close], base_indent=0)
    body = "\n".join(lines[close + 1:]).lstrip("\n")
    return fm, body


def _serialize_frontmatter(fm: dict) -> str:
    parts = ["---"]
    for key, value in fm.items():
        if isinstance(value, dict):
            parts.append(f"{key}:")
            for k, v in value.items():
                escaped = str(v if v is not None else "").replace('"', '\\"')
                parts.append(f'  {k}: "{escaped}"')
        elif value is None:
            parts.append(f"{key}:")
        else:
            parts.append(f"{key}: {value}")
    parts.append("---")
    return "\n".join(parts)


def _now() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def _today() -> date:
    return datetime.now(timezone.utc).date()


# ---------------------------------------------------------------------------
# SM-2 spaced repetition model
#
# Based on the SM-2 algorithm (SuperMemo 1987).
#
# quality scale (used in report_outcome):
#   5 — perfect recall, effortless
#   4 — correct after a moment's thought
#   3 — correct but required significant effort
#   2 — incorrect, but the correct answer felt obvious once seen
#   1 — incorrect, and the correct answer was difficult to recall
#   0 — complete blackout
#
# Effective score decays linearly from 1.0 → 0.5 as overdue_days grows
# from 0 → interval, then holds at 0.5 (never zeroes out a good skill).
# ---------------------------------------------------------------------------

_SM2_INITIAL_EF = 2.5
_SM2_INITIAL_INTERVAL = 1.0       # days
_SM2_DEFAULT_HALFLIFE = 90.0      # days — fallback when no SM-2 history
_SM2_DECAY_FLOOR = 0.5            # effective_score never falls below 50% of assessed

# Experience factor: suppresses effective_score until the skill has been
# applied in practice.  combined_xp = use_count + update_count * 1.5
# exp_factor = 1.0 - _EXP_DISCOUNT * exp(-combined_xp / _EXP_SCALE)
#
# At 0 use/0 updates  →  factor ≈ 0.60  (40% withheld until earned)
# At 4 uses / 2 upd   →  factor ≈ 0.86
# At 10 uses / 5 upd  →  factor ≈ 0.96
_EXP_DISCOUNT = 0.40
_EXP_SCALE    = 5.0


def _sm2_update(quality: int, repetitions: int, ef: float, interval: float) -> tuple[int, float, float]:
    """Run one SM-2 iteration. Returns (new_repetitions, new_ef, new_interval_days)."""
    quality = max(0, min(5, quality))
    if quality >= 3:
        if repetitions == 0:
            new_interval = 1.0
        elif repetitions == 1:
            new_interval = 6.0
        else:
            new_interval = round(interval * ef)
        new_repetitions = repetitions + 1
    else:
        # Failed recall — reset to beginning
        new_repetitions = 0
        new_interval = 1.0
    new_ef = ef + (0.1 - (5 - quality) * (0.08 + (5 - quality) * 0.02))
    new_ef = max(1.3, new_ef)
    return new_repetitions, new_ef, new_interval


def _effective_score(assessed_score: int, meta: dict) -> tuple[float, str, int]:
    """
    Compute (effective_score, due_date_str, days_until_due).

    If SM-2 state is present, uses sm2_due_date and sm2_interval.
    Falls back to linear decay from last_validated / updated_at using
    the default half-life.
    """
    today = _today()

    sm2_due_str = meta.get("sm2_due_date", "")
    sm2_interval = 0.0
    try:
        sm2_interval = float(meta.get("sm2_interval", "0") or "0")
    except (TypeError, ValueError):
        pass

    due: date | None = None

    if sm2_due_str and sm2_interval > 0:
        try:
            due = date.fromisoformat(sm2_due_str)
        except ValueError:
            pass

    if due is None:
        # Fallback: linear decay from assessment date
        last = meta.get("last_validated", "") or meta.get("updated_at", "")
        if last:
            try:
                validated = datetime.fromisoformat(last.replace("Z", "+00:00")).date()
                sm2_interval = _SM2_DEFAULT_HALFLIFE
                due = validated + timedelta(days=int(sm2_interval))
            except (ValueError, OverflowError):
                pass

    if due is None:
        return float(assessed_score), "", 0

    days_until_due = (due - today).days

    days_overdue = max(0, -days_until_due)
    if days_overdue > 0 and sm2_interval > 0:
        decay = max(_SM2_DECAY_FLOOR, 1.0 - (1.0 - _SM2_DECAY_FLOOR) * min(1.0, days_overdue / sm2_interval))
    else:
        decay = 1.0

    # Experience factor: suppresses score until proven through use + refinement.
    try:
        use_count    = int(meta.get("use_count",    "0") or "0")
        update_count = int(meta.get("update_count", "0") or "0")
    except (TypeError, ValueError):
        use_count = update_count = 0
    combined_xp = use_count + update_count * 1.5
    exp_factor = 1.0 - _EXP_DISCOUNT * math.exp(-combined_xp / _EXP_SCALE)

    return round(assessed_score * exp_factor * decay, 1), str(due), days_until_due


# ---------------------------------------------------------------------------
# Mastery model
# ---------------------------------------------------------------------------

_LEVELS = [
    (0,  20,  "novice"),
    (21, 40,  "apprentice"),
    (41, 60,  "journeyman"),
    (61, 80,  "expert"),
    (81, 100, "master"),
]

_DIMENSIONS = ["completeness", "specificity", "examples", "edge_cases", "actionability"]

_ASSESSMENT_TRIGGER = "Call record_assessment to score this skill (completeness, specificity, examples, edge_cases, actionability — each 0–20)."


def _extract_sources(body: str) -> list[str]:
    """Extract URLs and file paths from a ## Sources section in the skill body."""
    in_sources = False
    sources: list[str] = []
    for line in body.split("\n"):
        if line.strip().startswith("## Sources"):
            in_sources = True
            continue
        if in_sources:
            if line.startswith("## "):
                break
            stripped = line.strip().lstrip("-").lstrip("*").strip()
            if stripped.startswith(("http://", "https://", "file://", "~/", "/")):
                sources.append(stripped)
            elif stripped and not stripped.startswith("#"):
                sources.append(stripped)
    return [s for s in sources if s]


# ---------------------------------------------------------------------------
# Section slicing — zero-dep fuzzy relevance scoring
#
# Naive stemming covers the most common English suffixes so that a query for
# "mocking" hits sections about "mock", "docker" hits "dockerized", etc.
# Scoring is tf-idf-lite: heading matches outweigh body matches, and term
# density (matches / words) beats raw count to avoid rewarding giant sections.
# ---------------------------------------------------------------------------

_STEM_SUFFIXES = ("ing", "tion", "tions", "ings", "ed", "er", "ers", "s", "es",
                  "ize", "ized", "izes", "ly", "ful", "ment", "ments")


def _stem(word: str) -> str:
    """Strip the longest matching common suffix, minimum 4-char stem."""
    w = word.lower()
    for suf in sorted(_STEM_SUFFIXES, key=len, reverse=True):
        if w.endswith(suf) and len(w) - len(suf) >= 4:
            return w[: -len(suf)]
    return w


def _tokenize(text: str) -> list[str]:
    """Lowercase alphanumeric tokens, stemmed."""
    return [_stem(t) for t in re.findall(r"[a-zA-Z0-9_\-]+", text.lower())]


def _parse_sections(body: str) -> list[tuple[str, str]]:
    """Return list of (heading, content) pairs for each ## section."""
    sections: list[tuple[str, str]] = []
    current_heading = ""
    current_lines: list[str] = []

    for line in body.split("\n"):
        if line.startswith("## "):
            if current_heading or current_lines:
                sections.append((current_heading, "\n".join(current_lines).strip()))
            current_heading = line.lstrip("# ").strip()
            current_lines = []
        else:
            current_lines.append(line)

    if current_heading or current_lines:
        sections.append((current_heading, "\n".join(current_lines).strip()))

    return sections


def _score_section(heading: str, content: str, query_stems: list[str]) -> float:
    """
    Score a section against a set of query stems.

    Heading matches are worth 5× a body match.
    Final score is normalised by section word count (density) so short,
    focused sections beat long ones with the same raw hit count.
    """
    if not query_stems:
        return 0.0

    head_stems = _tokenize(heading)
    body_stems = _tokenize(content)
    all_words = len(body_stems) or 1

    hits = 0.0
    for q in query_stems:
        # Exact stem match
        head_hits = sum(1 for t in head_stems if t == q)
        body_hits = sum(1 for t in body_stems if t == q)
        # Substring match (catches compound words like "dockerized" for "docker")
        if not head_hits:
            head_hits += sum(1 for t in head_stems if q in t or t in q)
        if not body_hits:
            body_hits += sum(1 for t in body_stems if q in t or t in q) * 0.5
        hits += head_hits * 5 + body_hits

    # Density: hits per 100 words, capped so tiny sections don't dominate
    density = (hits / min(all_words, 200)) * 100
    return round(hits * 0.6 + density * 0.4, 3)


def _mastery_level(score: int) -> str:
    for lo, hi, label in _LEVELS:
        if lo <= score <= hi:
            return label
    return "novice"


# ---------------------------------------------------------------------------
# SkillsStore — all file I/O lives here
# ---------------------------------------------------------------------------

class SkillsStore:

    def __init__(self, root: Path) -> None:
        self.root = root
        self.root.mkdir(parents=True, exist_ok=True)
        self._index_path = root / "index.json"

    # --- Internal helpers ---

    def _skill_dir(self, name: str) -> Path:
        return self.root / name

    def _skill_path(self, name: str) -> Path:
        return self.root / name / "SKILL.md"

    def _require(self, name: str) -> None:
        if not self._skill_path(name).exists():
            raise FileNotFoundError(f"Skill '{name}' not found")

    def _read(self, name: str) -> tuple[dict, str]:
        self._require(name)
        return _parse_frontmatter(self._skill_path(name).read_text())

    def _write(self, name: str, fm: dict, body: str) -> None:
        d = self._skill_dir(name)
        d.mkdir(parents=True, exist_ok=True)
        p = self._skill_path(name)
        tmp = p.with_suffix(".tmp")
        tmp.write_text(_serialize_frontmatter(fm) + "\n\n" + body.lstrip("\n"))
        os.replace(tmp, p)
        self._rebuild_index()

    def _names(self) -> list[str]:
        if not self.root.exists():
            return []
        return sorted(
            e.name for e in self.root.iterdir()
            if e.is_dir() and (e / "SKILL.md").exists()
        )

    def _meta_int(self, meta: dict, key: str, default: int = 0) -> int:
        try:
            return int(meta.get(key, default))
        except (TypeError, ValueError):
            return default

    def _rebuild_index(self) -> None:
        skills: dict = {}
        for name in self._names():
            try:
                fm, body = self._read(name)
                meta = fm.get("metadata") or {}
                assessed = self._meta_int(meta, "mastery_score")
                eff, due_date, days_until_due = _effective_score(assessed, meta)
                skills[name] = {
                    "description": fm.get("description", ""),
                    "mastery_score": assessed,
                    "mastery_level": meta.get("mastery_level", "novice"),
                    "effective_score": eff,
                    "effective_level": _mastery_level(int(eff)),
                    "due_date": due_date,
                    "days_until_due": days_until_due,
                    "source_type": meta.get("source_type", ""),
                    "has_sources": bool(_extract_sources(body)),
                    "use_count": self._meta_int(meta, "use_count"),
                    "update_count": self._meta_int(meta, "update_count"),
                    "tags": [t.strip() for t in meta.get("tags", "").split(",") if t.strip()],
                    "updated_at": meta.get("updated_at", ""),
                }
            except Exception:
                pass
        index = {"version": 2, "updated_at": _now(), "skills": skills}
        tmp = self._index_path.with_suffix(".tmp")
        tmp.write_text(json.dumps(index, indent=2))
        os.replace(tmp, self._index_path)

    def _index(self) -> dict:
        if not self._index_path.exists():
            self._rebuild_index()
        return json.loads(self._index_path.read_text())

    # --- Tool implementations ---

    def list_skills(self) -> dict:
        idx = self._index()
        skills = [
            {
                "name": name,
                "description": info["description"],
                "mastery_level": info["mastery_level"],
                "mastery_score": info["mastery_score"],
                "effective_score": info.get("effective_score", info["mastery_score"]),
                "effective_level": info.get("effective_level", info["mastery_level"]),
                "days_until_due": info.get("days_until_due", 0),
                "use_count": info["use_count"],
                "update_count": info.get("update_count", 0),
                "tags": info["tags"],
            }
            for name, info in idx.get("skills", {}).items()
        ]
        return {"skills": skills, "count": len(skills)}

    def peek_skill(self, name: str) -> dict:
        fm, body = self._read(name)
        meta = fm.get("metadata") or {}

        # Split body into sections by ## headings
        sections: list[str] = []
        current: list[str] = []
        for line in body.split("\n"):
            if line.startswith("## ") and current:
                sections.append("\n".join(current).strip())
                current = [line]
            else:
                current.append(line)
        if current:
            sections.append("\n".join(current).strip())

        assessed = self._meta_int(meta, "mastery_score")
        eff, due_date, days_until = _effective_score(assessed, meta)

        return {
            "name": fm.get("name", name),
            "description": fm.get("description", ""),
            "compatibility": fm.get("compatibility", ""),
            "mastery_level": meta.get("mastery_level", "novice"),
            "mastery_score": assessed,
            "effective_score": eff,
            "effective_level": _mastery_level(int(eff)),
            "due_date": due_date,
            "days_until_due": days_until,
            "source_type": meta.get("source_type", ""),
            "has_sources": bool(_extract_sources(body)),
            "tags": [t.strip() for t in meta.get("tags", "").split(",") if t.strip()],
            "preview": sections[0] if sections else "",
            "total_sections": len(sections),
        }

    def load_skill(self, name: str) -> dict:
        fm, body = self._read(name)
        meta = fm.setdefault("metadata", {})
        meta["use_count"] = str(self._meta_int(meta, "use_count") + 1)
        meta["last_used"] = _now()
        self._write(name, fm, body)
        return {
            "name": name,
            "content": _serialize_frontmatter(fm) + "\n\n" + body.lstrip("\n"),
            "use_count": int(meta["use_count"]),
        }

    def create_skill(
        self,
        name: str,
        description: str,
        content: str,
        tags: str = "",
        compatibility: str = "opencode",
    ) -> dict:
        if self._skill_path(name).exists():
            raise ValueError(f"Skill '{name}' already exists — use update_skill to modify")
        now = _now()
        fm: dict = {
            "name": name,
            "description": description,
            "compatibility": compatibility,
            "metadata": {
                "mastery_score": "0",
                "mastery_level": "novice",
                "use_count": "0",
                "update_count": "0",
                "tags": tags,
                "created_at": now,
                "updated_at": now,
                "last_used": "",
                "score_completeness": "0",
                "score_specificity": "0",
                "score_examples": "0",
                "score_edge_cases": "0",
                "score_actionability": "0",
                "assessment_notes": "",
                # SM-2 state — initialised on first record_assessment
                "sm2_repetitions": "0",
                "sm2_ef": "",
                "sm2_interval": "",
                "sm2_due_date": "",
                "sm2_last_review": "",
                "last_validated": "",
                "source_type": "",
            },
        }
        self._write(name, fm, content)
        return {
            "created": name,
            "path": str(self._skill_path(name)),
            "next_step": _ASSESSMENT_TRIGGER,
        }

    def update_skill(self, name: str, content: str) -> dict:
        fm, _ = self._read(name)
        meta = fm.setdefault("metadata", {})
        meta["updated_at"] = _now()
        meta["update_count"] = str(self._meta_int(meta, "update_count") + 1)
        self._write(name, fm, content)
        return {
            "updated": name,
            "next_step": _ASSESSMENT_TRIGGER,
        }

    def record_assessment(
        self,
        name: str,
        completeness: int,
        specificity: int,
        examples: int,
        edge_cases: int,
        actionability: int,
        notes: str = "",
        source_type: str = "",
    ) -> dict:
        dims = {
            "completeness":  max(0, min(20, int(completeness))),
            "specificity":   max(0, min(20, int(specificity))),
            "examples":      max(0, min(20, int(examples))),
            "edge_cases":    max(0, min(20, int(edge_cases))),
            "actionability": max(0, min(20, int(actionability))),
        }
        total = sum(dims.values())
        level = _mastery_level(total)

        fm, body = self._read(name)
        meta = fm.setdefault("metadata", {})
        for dim, score in dims.items():
            meta[f"score_{dim}"] = str(score)
        meta["mastery_score"] = str(total)
        meta["mastery_level"] = level
        meta["assessment_notes"] = notes
        if source_type:
            meta["source_type"] = source_type
        now = _now()
        meta["last_validated"] = now
        meta["updated_at"] = now

        # Initialise SM-2 — first interval = 1 day, due tomorrow
        meta["sm2_repetitions"] = "0"
        meta["sm2_ef"] = f"{_SM2_INITIAL_EF:.4f}"
        meta["sm2_interval"] = f"{_SM2_INITIAL_INTERVAL:.1f}"
        meta["sm2_due_date"] = str(_today() + timedelta(days=1))
        meta["sm2_last_review"] = now

        self._write(name, fm, body)
        eff, due_date, days_until_due = _effective_score(total, meta)

        return {
            "name": name,
            "mastery_score": total,
            "mastery_level": level,
            "effective_score": eff,
            "due_date": due_date,
            "dimensions": dims,
            "notes": notes,
        }

    def report_outcome(self, name: str, quality: int, notes: str = "") -> dict:
        """
        Record the outcome of using a skill.

        quality (SM-2 scale):
          5 — perfect, effortless
          4 — correct after brief thought
          3 — correct but required significant effort
          2 — incorrect but answer was obvious once seen
          1 — incorrect, hard to recall
          0 — complete blackout / skill was wrong
        """
        quality = max(0, min(5, int(quality)))
        fm, body = self._read(name)
        meta = fm.setdefault("metadata", {})

        repetitions = self._meta_int(meta, "sm2_repetitions")
        ef = float(meta.get("sm2_ef", "") or str(_SM2_INITIAL_EF))
        interval = float(meta.get("sm2_interval", "") or str(_SM2_INITIAL_INTERVAL))

        new_rep, new_ef, new_interval = _sm2_update(quality, repetitions, ef, interval)
        now = _now()
        due = _today() + timedelta(days=int(new_interval))

        meta["sm2_repetitions"] = str(new_rep)
        meta["sm2_ef"] = f"{new_ef:.4f}"
        meta["sm2_interval"] = f"{new_interval:.1f}"
        meta["sm2_due_date"] = str(due)
        meta["sm2_last_review"] = now

        log_entry = {
            "timestamp": now,
            "quality": quality,
            "notes": notes,
            "interval_before": interval,
            "ef_before": round(ef, 4),
            "new_interval": new_interval,
            "new_ef": round(new_ef, 4),
        }
        log_path = self._skill_dir(name) / "usage_log.jsonl"
        with open(log_path, "a") as f:
            f.write(json.dumps(log_entry) + "\n")

        self._write(name, fm, body)

        assessed = self._meta_int(meta, "mastery_score")
        eff, due_str, days_until = _effective_score(assessed, meta)

        return {
            "name": name,
            "quality": quality,
            "sm2_repetitions": new_rep,
            "sm2_ef": round(new_ef, 4),
            "sm2_interval_days": new_interval,
            "sm2_due_date": str(due),
            "assessed_score": assessed,
            "effective_score": eff,
            "effective_level": _mastery_level(int(eff)),
        }

    def review_stale_skills(self, limit: int = 10) -> dict:
        """
        Return skills that are due for review, sorted by urgency.

        Urgency = days_overdue / sm2_interval — how far past due as a fraction
        of the full interval. A skill 30 days overdue on a 30-day interval is
        more urgent than one 30 days overdue on a 365-day interval.
        """
        self._rebuild_index()
        idx = self._index()
        today = _today()
        due_skills = []

        for name, info in idx.get("skills", {}).items():
            days_until = info.get("days_until_due", 0)
            if days_until < 0:  # overdue
                due_date = info.get("due_date", "")
                fm, body = self._read(name)
                meta = fm.get("metadata") or {}
                interval = float(meta.get("sm2_interval", "") or str(_SM2_DEFAULT_HALFLIFE))
                urgency = (-days_until) / max(interval, 1.0)
                due_skills.append({
                    "name": name,
                    "description": info["description"],
                    "assessed_score": info["mastery_score"],
                    "effective_score": info.get("effective_score", info["mastery_score"]),
                    "score_drop": round(info["mastery_score"] - info.get("effective_score", info["mastery_score"]), 1),
                    "days_overdue": -days_until,
                    "due_date": due_date,
                    "source_type": info.get("source_type", "derived"),
                    "has_sources": info.get("has_sources", False),
                    "urgency": round(urgency, 3),
                })

        due_skills.sort(key=lambda s: s["urgency"], reverse=True)
        due_skills = due_skills[:limit]

        review_note = (
            "For each skill: call validate_skill(name) to load content + sources, "
            "fetch/review source material, update if needed, then record_assessment to reset the SM-2 clock."
        )
        return {
            "due_count": len(due_skills),
            "skills": due_skills,
            "review_workflow": review_note,
        }

    def validate_skill(self, name: str) -> dict:
        """
        Load a skill for review, including its extracted source references.

        The caller (agent) is responsible for fetching/reading the sources
        and determining whether the skill content needs updating.
        """
        fm, body = self._read(name)
        meta = fm.get("metadata") or {}
        assessed = self._meta_int(meta, "mastery_score")
        eff, due_date, days_until = _effective_score(assessed, meta)
        sources = _extract_sources(body)
        source_type = meta.get("source_type", "derived")

        if source_type == "derived":
            review_hint = "This skill is derived knowledge (no external source). Validate through use — check usage_log.jsonl for outcome history."
        elif source_type == "codebase":
            review_hint = "This skill is derived from source code. Read the referenced files and compare against skill content."
        else:
            review_hint = "Fetch each source URL and compare against skill content. Update with update_skill if content has drifted."

        return {
            "name": name,
            "assessed_score": assessed,
            "effective_score": eff,
            "days_until_due": days_until,
            "source_type": source_type,
            "sources": sources,
            "review_hint": review_hint,
            "content": _serialize_frontmatter(fm) + "\n\n" + body.lstrip("\n"),
        }

    def skill_slice(self, name: str, query: str, max_sections: int = 2) -> dict:
        """
        Return the most relevant section(s) of a skill for a given query.

        Sits between peek_skill (first section only) and load_skill (full content).
        Uses naive stemming + tf-idf-lite scoring — zero dependencies.
        """
        fm, body = self._read(name)
        meta = fm.get("metadata") or {}
        query_stems = _tokenize(query)

        sections = _parse_sections(body)
        if not sections:
            return {
                "name": name,
                "query": query,
                "sections": [],
                "note": "Skill has no ## sections — use load_skill for full content.",
            }

        scored = [
            (heading, content, _score_section(heading, content, query_stems))
            for heading, content in sections
        ]
        scored.sort(key=lambda x: x[2], reverse=True)

        top = scored[:max(1, max_sections)]
        zero_score = all(s[2] == 0.0 for s in top)

        return {
            "name": name,
            "query": query,
            "sections": [
                {"heading": h, "content": c, "relevance_score": s}
                for h, c, s in top
            ],
            "total_sections": len(sections),
            "sections_returned": len(top),
            "note": (
                "No strong match found — showing top sections by position. Use load_skill for full content."
                if zero_score else
                f"Showing {len(top)} of {len(sections)} sections. Use load_skill for full content."
            ),
        }
        """
        Ask an independent AI process to review the skill for accuracy and completeness.

        Requires ai_review.enabled = true in ~/.config/skills-mcp/config.json:
          {
            "ai_review": {
              "enabled": true,
              "command": ["gh", "copilot", "-p"],
              "timeout": 120
            }
          }

        The reviewer receives only the skill content and sources — it has no memory
        of the session that created the skill, so its assessment is independent.
        """
        if not _AI_REVIEW_ENABLED:
            return {
                "ai_review": "disabled",
                "hint": (
                    "Enable AI review by adding to ~/.config/skills-mcp/config.json: "
                    '{"ai_review": {"enabled": true, "command": ["gh", "copilot", "-p"], "timeout": 120}}'
                ),
            }

        fm, body = self._read(name)
        meta = fm.get("metadata") or {}
        assessed = self._meta_int(meta, "mastery_score")
        sources = _extract_sources(body)
        source_type = meta.get("source_type", "derived")
        content = _serialize_frontmatter(fm) + "\n\n" + body.lstrip("\n")

        source_guidance = {
            "external_url": "The skill claims to document a public API or external tool. Check whether the content is still accurate.",
            "codebase":     "The skill was derived from source code. Check whether it still reflects the codebase accurately.",
            "derived":      "This skill is tribal/derived knowledge with no external reference. Focus on internal consistency, completeness, and whether the patterns described are sound.",
        }.get(source_type, "Assess accuracy and completeness as best you can.")

        sources_block = "\n".join(f"  - {s}" for s in sources) if sources else "  (none listed)"

        prompt = f"""\
You are an independent reviewer assessing a skill document for accuracy, completeness, and continued relevance.
You did NOT write this skill. Evaluate it critically.

Source type: {source_type}
{source_guidance}

Sources listed in the skill:
{sources_block}

---

{content}

---

Please provide:
1. A brief summary of what the skill covers
2. Any inaccuracies, outdated information, or missing important details you can identify
3. An overall quality score from 0-5:
   5 = accurate, complete, immediately usable
   4 = good, minor gaps
   3 = usable but notable gaps or possible staleness
   2 = significant issues, use with caution
   1 = mostly wrong or misleading
   0 = should be deleted or fully rewritten
4. Specific suggestions for improvement (if any)

Be concise. Focus on correctness over style."""

        try:
            result = subprocess.run(
                _AI_REVIEW_COMMAND + [prompt],
                capture_output=True,
                text=True,
                timeout=_AI_REVIEW_TIMEOUT,
                stdin=subprocess.DEVNULL,
            )
            review_text = result.stdout.strip()
            stderr_text = result.stderr.strip()

            if result.returncode != 0 and not review_text:
                return {
                    "ai_review": "error",
                    "returncode": result.returncode,
                    "error": stderr_text or f"Command exited with code {result.returncode}",
                    "command": _AI_REVIEW_COMMAND,
                }

            return {
                "name": name,
                "assessed_score": assessed,
                "source_type": source_type,
                "ai_review": review_text,
                "stderr": stderr_text if stderr_text else None,
                "next_steps": (
                    "Based on the review: call update_skill if content needs changes, "
                    "then record_assessment to re-score, then report_outcome with the "
                    "quality score suggested by the reviewer."
                ),
            }

        except subprocess.TimeoutExpired:
            return {
                "ai_review": "timeout",
                "error": f"Command timed out after {_AI_REVIEW_TIMEOUT}s",
                "command": _AI_REVIEW_COMMAND,
            }
        except FileNotFoundError:
            cmd = _AI_REVIEW_COMMAND[0]
            return {
                "ai_review": "error",
                "error": f"Command not found: {cmd!r} — is it installed and on PATH?",
                "command": _AI_REVIEW_COMMAND,
            }

    def reflect_on_session(self, context_summary: str) -> dict:
        idx = self._index()
        existing = list(idx.get("skills", {}).keys())
        existing_digest = "\n".join(
            f"  - {name}: {info['description']} [{info['mastery_level']}]"
            for name, info in idx.get("skills", {}).items()
        ) or "  (none yet)"

        template = f"""\
## Session Skill Reflection

**Context summary:**
{context_summary}

**Existing skills ({len(existing)}):**
{existing_digest}

---

### Step 1 — Identify new skills
Are there reusable patterns, workflows, or knowledge demonstrated this session
that aren't captured in an existing skill? For each new skill:
- Choose a `name`: lowercase-hyphen slug, max 64 chars
- Write a `description`: 1–2 sentences (max 1024 chars)
- Write the full `content`: Markdown body with ## sections
- List `tags`: comma-separated
- Add a `## Sources` section at the end listing URLs, file paths, or noting
  `source_type: derived` if this is tribal knowledge with no external reference.

### Step 2 — Identify improvements
Are any existing skills incomplete or inaccurate given what was demonstrated?
Name the skill and describe what should change.

### Step 3 — Execute
- Call `create_skill` for each new skill (returns assessment prompt)
- Call `update_skill` for each improvement (returns assessment prompt)
- Call `record_assessment` to score each one (pass `source_type` if known)
"""
        return {"reflection_template": template, "existing_skills": existing}

    def skills_report(self) -> dict:
        self._rebuild_index()
        idx = self._index()
        skills_raw = idx.get("skills", {})

        rows = sorted(
            [{"name": k, **v} for k, v in skills_raw.items()],
            key=lambda s: s.get("effective_score", s["mastery_score"]),
            reverse=True,
        )

        if not rows:
            return {"skills": [], "total": 0, "summary": "No skills yet."}

        avg_assessed = sum(s["mastery_score"] for s in rows) / len(rows)
        avg_effective = sum(s.get("effective_score", s["mastery_score"]) for s in rows) / len(rows)
        by_level: dict[str, list[str]] = {}
        for s in rows:
            level = s.get("effective_level", s["mastery_level"])
            by_level.setdefault(level, []).append(s["name"])

        overdue = [s["name"] for s in rows if s.get("days_until_due", 0) < 0]

        return {
            "skills": rows,
            "total": len(rows),
            "average_assessed_score": round(avg_assessed, 1),
            "average_effective_score": round(avg_effective, 1),
            "average_effective_level": _mastery_level(int(avg_effective)),
            "overdue_skills": overdue,
            "by_effective_level": by_level,
        }

    def list_skill_files(self, name: str) -> dict:
        d = self._skill_dir(name)
        if not d.exists():
            raise FileNotFoundError(f"Skill '{name}' not found")
        files = sorted(
            str(f.relative_to(d))
            for f in d.rglob("*")
            if f.is_file() and f.name != "SKILL.md"
        )
        return {"name": name, "files": files}

    def read_skill_file(self, name: str, path: str) -> dict:
        d = self._skill_dir(name)
        target = (d / path).resolve()
        if not str(target).startswith(str(d.resolve())):
            raise ValueError("Path traversal not allowed")
        if not target.exists():
            raise FileNotFoundError(f"File '{path}' not found in skill '{name}'")
        return {"name": name, "path": path, "content": target.read_text()}


# ---------------------------------------------------------------------------
# MCP protocol
# ---------------------------------------------------------------------------

_STORE = SkillsStore(SKILLS_DIR)

TOOLS = [
    {
        "name": "list_skills",
        "description": (
            "Level 1 (brief): List all skills — name, description, mastery level, and use count. "
            "Start here to discover available skills before loading them. "
            "Very token-efficient: ~20 tokens per skill."
        ),
        "inputSchema": {"type": "object", "properties": {}, "required": []},
    },
    {
        "name": "peek_skill",
        "description": (
            "Level 2 (medium): Get a skill's metadata plus its first section only. "
            "Use to decide if the full skill is needed without paying the full token cost."
        ),
        "inputSchema": {
            "type": "object",
            "properties": {
                "name": {"type": "string", "description": "Skill name (must match directory name)"},
            },
            "required": ["name"],
        },
    },
    {
        "name": "load_skill",
        "description": (
            "Level 3 (full): Load the complete SKILL.md. "
            "Auto-increments use_count and records last_used."
        ),
        "inputSchema": {
            "type": "object",
            "properties": {"name": {"type": "string"}},
            "required": ["name"],
        },
    },
    {
        "name": "create_skill",
        "description": (
            "Create a new skill. Initialises mastery at Novice (0/100). "
            "Returns an assessment prompt — call record_assessment to score it."
        ),
        "inputSchema": {
            "type": "object",
            "properties": {
                "name": {"type": "string", "description": "Lowercase hyphen slug, 1–64 chars, matches directory name"},
                "description": {"type": "string", "description": "1–1024 char summary for discovery"},
                "content": {"type": "string", "description": "Full Markdown body (## sections recommended)"},
                "tags": {"type": "string", "description": "Comma-separated tags (optional)"},
                "compatibility": {"type": "string", "description": "Host compatibility tag, default: opencode"},
            },
            "required": ["name", "description", "content"],
        },
    },
    {
        "name": "update_skill",
        "description": (
            "Replace the body of an existing skill, preserving all metadata. "
            "Returns an assessment prompt to trigger the post-update scoring loop."
        ),
        "inputSchema": {
            "type": "object",
            "properties": {
                "name": {"type": "string"},
                "content": {"type": "string", "description": "New Markdown body"},
            },
            "required": ["name", "content"],
        },
    },
    {
        "name": "record_assessment",
        "description": (
            "Score a skill on 5 dimensions (each 0–20, total 0–100). "
            "Updates mastery_score, mastery_level, and initialises the SM-2 decay clock.\n\n"
            "Scoring rubric:\n"
            "  completeness   0=missing major areas      20=covers full scope\n"
            "  specificity    0=vague generic guidance   20=concrete unambiguous steps\n"
            "  examples       0=no examples              20=multiple worked examples\n"
            "  edge_cases     0=no error handling        20=failures and exceptions noted\n"
            "  actionability  0=needs clarification      20=followable without questions\n\n"
            "Mastery levels: Novice 0-20 · Apprentice 21-40 · Journeyman 41-60 · Expert 61-80 · Master 81-100\n\n"
            "Also accepts source_type: external_url | codebase | derived"
        ),
        "inputSchema": {
            "type": "object",
            "properties": {
                "name": {"type": "string"},
                "completeness":  {"type": "integer", "description": "0-20: Covers full scope"},
                "specificity":   {"type": "integer", "description": "0-20: Concrete vs vague"},
                "examples":      {"type": "integer", "description": "0-20: Has worked examples"},
                "edge_cases":    {"type": "integer", "description": "0-20: Handles failures/exceptions"},
                "actionability": {"type": "integer", "description": "0-20: Followable without clarification"},
                "notes": {"type": "string", "description": "Optional assessment notes"},
                "source_type": {"type": "string", "description": "external_url | codebase | derived"},
            },
            "required": ["name", "completeness", "specificity", "examples", "edge_cases", "actionability"],
        },
    },
    {
        "name": "report_outcome",
        "description": (
            "Record the outcome of using a skill. Runs the SM-2 algorithm to update the review interval "
            "and due date. Low quality scores shorten the interval and accelerate effective_score decay. "
            "quality scale: 5=perfect · 4=correct · 3=correct with effort · 2=wrong but obvious · 1=wrong · 0=blackout"
        ),
        "inputSchema": {
            "type": "object",
            "properties": {
                "name": {"type": "string"},
                "quality": {"type": "integer", "description": "0-5 SM-2 quality score"},
                "notes": {"type": "string", "description": "Optional notes on what was wrong or missing"},
            },
            "required": ["name", "quality"],
        },
    },
    {
        "name": "review_stale_skills",
        "description": (
            "Return skills that are past their SM-2 due date, sorted by urgency "
            "(days_overdue / interval — relative staleness). "
            "Use this as a review queue: load each skill with validate_skill, check sources, update if needed."
        ),
        "inputSchema": {
            "type": "object",
            "properties": {
                "limit": {"type": "integer", "description": "Max skills to return (default 10)"},
            },
            "required": [],
        },
    },
    {
        "name": "skill_slice",
        "description": (
            "Level 2.5: Return only the most relevant section(s) of a skill for a specific query. "
            "Use when you know which skill you need but want less than the full content. "
            "Sits between peek_skill (first section only) and load_skill (everything). "
            "Uses naive stemming + tf-idf-lite scoring — no semantic search, but handles "
            "common word variations (mocking→mock, commands→command, etc.)."
        ),
        "inputSchema": {
            "type": "object",
            "properties": {
                "name": {"type": "string"},
                "query": {"type": "string", "description": "What you're looking for, e.g. 'docker mock pattern' or 'assert exit status'"},
                "max_sections": {"type": "integer", "description": "Max sections to return (default 2)"},
            },
            "required": ["name", "query"],
        },
    },
    {
        "name": "validate_skill",
        "description": (
            "Load a skill for review: returns full content plus extracted source references. "
            "The caller fetches/reads the sources and determines whether content needs updating. "
            "After reviewing, call update_skill if needed, then record_assessment to reset the SM-2 clock."
        ),
        "inputSchema": {
            "type": "object",
            "properties": {"name": {"type": "string"}},
            "required": ["name"],
        },
    },
    {
        "name": "ai_review_skill",
        "description": (
            "Ask an independent AI process (gh copilot) to review a skill for accuracy and completeness. "
            "The reviewer has no memory of the session that created the skill — providing an unbiased second opinion. "
            "Requires ai_review.enabled=true in ~/.config/skills-mcp/config.json. "
            "Returns the review text and suggested quality score (0-5) to feed into report_outcome."
        ),
        "inputSchema": {
            "type": "object",
            "properties": {"name": {"type": "string"}},
            "required": ["name"],
        },
    },
    {
        "name": "reflect_on_session",
        "description": (
            "Session reflection loop (Loop A): given a summary of this session, returns a structured "
            "template listing existing skills and prompting identification of new ones or improvements. "
            "Follow the template by calling create_skill or update_skill, then record_assessment."
        ),
        "inputSchema": {
            "type": "object",
            "properties": {
                "context_summary": {
                    "type": "string",
                    "description": "Brief description of what was done or discussed this session",
                },
            },
            "required": ["context_summary"],
        },
    },
    {
        "name": "skills_report",
        "description": (
            "Full report of all skills sorted by mastery score descending. "
            "Includes per-level grouping and collection average."
        ),
        "inputSchema": {"type": "object", "properties": {}, "required": []},
    },
    {
        "name": "list_skill_files",
        "description": "List auxiliary files in a skill's directory (excludes SKILL.md itself).",
        "inputSchema": {
            "type": "object",
            "properties": {"name": {"type": "string"}},
            "required": ["name"],
        },
    },
    {
        "name": "read_skill_file",
        "description": "Read an auxiliary file from a skill directory (e.g. references/guide.md).",
        "inputSchema": {
            "type": "object",
            "properties": {
                "name": {"type": "string"},
                "path": {"type": "string", "description": "Relative path within the skill directory"},
            },
            "required": ["name", "path"],
        },
    },
]

_TOOL_MAP = {
    "list_skills":          lambda a: _STORE.list_skills(),
    "peek_skill":           lambda a: _STORE.peek_skill(a["name"]),
    "load_skill":           lambda a: _STORE.load_skill(a["name"]),
    "create_skill":         lambda a: _STORE.create_skill(**a),
    "update_skill":         lambda a: _STORE.update_skill(**a),
    "record_assessment":    lambda a: _STORE.record_assessment(**a),
    "report_outcome":       lambda a: _STORE.report_outcome(a["name"], a["quality"], a.get("notes", "")),
    "review_stale_skills":  lambda a: _STORE.review_stale_skills(int(a.get("limit", 10))),
    "skill_slice":          lambda a: _STORE.skill_slice(a["name"], a["query"], int(a.get("max_sections", 2))),
    "validate_skill":       lambda a: _STORE.validate_skill(a["name"]),
    "ai_review_skill":      lambda a: _STORE.ai_review_skill(a["name"]),
    "reflect_on_session":   lambda a: _STORE.reflect_on_session(a["context_summary"]),
    "skills_report":        lambda a: _STORE.skills_report(),
    "list_skill_files":     lambda a: _STORE.list_skill_files(a["name"]),
    "read_skill_file":      lambda a: _STORE.read_skill_file(a["name"], a["path"]),
}


_SKILLS_PROMPT = {
    "name": "skills_workflow",
    "description": "Skills library workflow: when to check, update, and reflect on skills",
    "arguments": [],
}

_SKILLS_PROMPT_TEMPLATE = """\
## Skills Library Workflow

A skills library is available via the `skills` MCP tools. Follow this workflow:

### Before starting work
1. `list_skills` — see what's available (~20 tokens/skill)
2. `peek_skill(name)` — preview relevant candidates
3. `load_skill(name)` — load when you'll actually use it
4. If any skill shows negative `days_until_due`, run `review_stale_skills()` first

### While working
- Skill was **wrong or incomplete**: `update_skill` → `record_assessment` → `report_outcome(quality=1)`
- Skill worked **correctly**: `report_outcome(quality=4)` (normal) or `quality=5` (perfect)

### End of session
`reflect_on_session(summary)` → create/update skills → `record_assessment`
"""


def _handle(req: dict) -> dict | None:
    method = req.get("method")
    req_id = req.get("id")
    params = req.get("params", {})

    def ok(result: dict) -> dict:
        return {"jsonrpc": "2.0", "id": req_id, "result": result}

    def err(code: int, message: str) -> dict:
        return {"jsonrpc": "2.0", "id": req_id, "error": {"code": code, "message": message}}

    if method == "initialize":
        return ok({
            "protocolVersion": "2024-11-05",
            "serverInfo": {"name": "skills-mcp", "version": "1.0.0"},
            "capabilities": {"tools": {}, "prompts": {}},
        })

    if method == "prompts/list":
        return ok({"prompts": [_SKILLS_PROMPT]})

    if method == "prompts/get":
        if params.get("name") != "skills_workflow":
            return err(-32602, f"Unknown prompt: {params.get('name')}")
        return ok({"description": _SKILLS_PROMPT["description"],
                   "messages": [{"role": "user", "content": {"type": "text", "text": _SKILLS_PROMPT_TEMPLATE}}]})

    if method == "notifications/initialized":
        return None

    if method == "tools/list":
        return ok({"tools": TOOLS})

    if method == "tools/call":
        name = params.get("name")
        args = params.get("arguments", {})
        if name not in _TOOL_MAP:
            return err(-32601, f"Unknown tool: {name}")
        try:
            result = _TOOL_MAP[name](args)
            return ok({"content": [{"type": "text", "text": json.dumps(result, indent=2)}], "isError": False})
        except (FileNotFoundError, ValueError) as e:
            return ok({"content": [{"type": "text", "text": str(e)}], "isError": True})
        except Exception as e:
            return ok({"content": [{"type": "text", "text": f"Unexpected error: {e}"}], "isError": True})

    if req_id is not None:
        return err(-32601, f"Method not found: {method}")
    return None


def main() -> None:
    for line in sys.stdin:
        line = line.strip()
        if not line:
            continue
        try:
            req = json.loads(line)
        except json.JSONDecodeError:
            continue
        resp = _handle(req)
        if resp is not None:
            sys.stdout.write(json.dumps(resp) + "\n")
            sys.stdout.flush()


if __name__ == "__main__":
    main()