"""Read-only virtual filesystem facade for agent flat context documents.

This adapter provides shell-like primitives (`list_context`, `search_context`,
`read_context_file`) over the JSON documents managed by AgentFlatContextStore.
"""

from __future__ import annotations

import json
import re
import os
import fcntl
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import deque
from fnmatch import fnmatch
from pathlib import Path
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional, Tuple

from loguru import logger

from services.intelligence.agent_flat_context import AgentFlatContextStore


class SmartGrepEngine:
    """Streaming grep engine with regex fallback and contextual snippets."""

    def __init__(self, context_window: int = 1):
        self.context_window = max(0, int(context_window))

    @staticmethod
    def _compile_pattern(pattern: str) -> re.Pattern:
        try:
            return re.compile(pattern, re.IGNORECASE)
        except re.error:
            return re.compile(re.escape(pattern), re.IGNORECASE)

    @staticmethod
    def _truncate(text: str, limit: int = 180) -> str:
        text = " ".join(text.split())
        if len(text) <= limit:
            return text
        return text[:limit] + "..."

    def stream_file(self, file_path: Path, pattern: str, *, path_label: str) -> List[Dict[str, Any]]:
        regex = self._compile_pattern(pattern)
        matches: List[Dict[str, Any]] = []
        prev = deque(maxlen=self.context_window)
        active: List[Dict[str, Any]] = []

        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
            for line_no, line in enumerate(f, start=1):
                # Fill trailing context for active matches.
                for item in active:
                    if item["remaining_after"] > 0:
                        item["after"].append(line.rstrip("\n"))
                        item["remaining_after"] -= 1

                # Detect a new match on current line.
                if regex.search(line):
                    current = line.rstrip("\n")
                    record = {
                        "path": path_label,
                        "line": line_no,
                        "before": list(prev),
                        "match_line": current,
                        "after": [],
                        "remaining_after": self.context_window,
                    }
                    active.append(record)
                    matches.append(record)

                prev.append(line.rstrip("\n"))

        formatted: List[Dict[str, Any]] = []
        for m in matches:
            snippet_parts = [*m["before"], m["match_line"], *m["after"]]
            snippet = self._truncate(" | ".join([p for p in snippet_parts if p is not None]))
            line_l = m["match_line"].lower()
            is_high_signal = any(k in line_l for k in ("agent_summary", "high_signal_terms", "quick_facts"))
            formatted.append(
                {
                    "path": m["path"],
                    "line": m["line"],
                    "snippet": snippet,
                    "relevance": "High Relevance" if is_high_signal else "Supporting Detail",
                    "reason": "matched summary field in stream" if is_high_signal else "matched streamed body line",
                    "score": 70 if is_high_signal else 50,
                }
            )
        return formatted


class AgentContextVFS:
    """Read-only adapter that maps virtual paths to flat context documents."""

    VIRTUAL_MAP = {
        "/steps/website": AgentFlatContextStore.STEP2_FILENAME,
        "/steps/research": AgentFlatContextStore.STEP3_FILENAME,
        "/steps/persona": AgentFlatContextStore.STEP4_FILENAME,
        "/steps/integrations": AgentFlatContextStore.STEP5_FILENAME,
    }

    def __init__(self, user_id: str, project_id: Optional[str] = None):
        self.user_id = user_id
        self.project_id = project_id
        self.store = AgentFlatContextStore(user_id)
        self.grep_engine = SmartGrepEngine(context_window=1)

    @staticmethod
    def _safe_slug(value: Optional[str], fallback: str) -> str:
        raw = str(value or "").strip()
        safe = "".join(c for c in raw if c.isalnum() or c in ("-", "_"))
        return safe or fallback

    def _manifest_docs(self) -> List[Dict[str, Any]]:
        manifest = self.store.load_context_manifest() or {"documents": []}
        docs = manifest.get("documents")
        return docs if isinstance(docs, list) else []

    def _workspace_root(self) -> Path:
        if self.project_id:
            root_dir = Path(__file__).resolve().parents[3]
            safe_project = self._safe_slug(self.project_id, "default_project")
            project_root = root_dir / "workspace" / f"project_{safe_project}"
            project_root.mkdir(parents=True, exist_ok=True)
            os.chmod(project_root, 0o700)
            return project_root
        return self.store._workspace_dir()

    def _scratchpad_dir(self) -> Path:
        scratch = self._workspace_root() / "scratchpad"
        scratch.mkdir(parents=True, exist_ok=True)
        os.chmod(scratch, 0o700)
        return scratch

    def _allowlisted_workspace_files(self) -> List[Path]:
        """Return sandboxed files eligible for streaming search."""
        files: List[Path] = []
        workspace = self._workspace_root()
        context_dir = self.store._context_dir()

        # 1) manifest-backed onboarding context files
        for item in self._manifest_docs():
            if not isinstance(item, dict):
                continue
            rel = str(item.get("path") or "")
            if not rel:
                continue
            try:
                candidate = self.store._safe_resolve_under(context_dir, rel)
                if candidate.exists() and candidate.is_file():
                    files.append(candidate)
            except Exception:
                continue

        # 2) workspace text artifacts (README, operator notes, etc.)
        for candidate in workspace.glob("*.txt"):
            if candidate.is_file():
                files.append(candidate.resolve())
        readme = workspace / "README.md"
        if readme.exists() and readme.is_file():
            files.append(readme.resolve())

        # dedupe
        seen = set()
        unique: List[Path] = []
        for p in files:
            rp = str(p)
            if rp in seen:
                continue
            seen.add(rp)
            unique.append(p)
        return unique

    @staticmethod
    def _query_variants(query: str) -> List[str]:
        """Generate normalized and synonym-expanded query variants."""
        base = (query or "").strip().lower()
        if not base:
            return []
        synonyms = {
            "tone": ["brand voice", "writing tone"],
            "voice": ["brand voice", "writing style"],
            "competitor": ["competition", "rival"],
            "seo": ["search", "metadata"],
            "persona": ["audience profile", "target audience"],
        }
        variants = [base]
        tokens = base.split()
        for idx, tok in enumerate(tokens):
            if tok in synonyms:
                for repl in synonyms[tok]:
                    new_tokens = tokens.copy()
                    new_tokens[idx] = repl
                    variants.append(" ".join(new_tokens))
        variants.extend([base.replace("-", " "), base.replace("_", " ")])
        # dedupe, preserve order
        seen = set()
        out: List[str] = []
        for v in variants:
            vv = v.strip()
            if not vv or vv in seen:
                continue
            seen.add(vv)
            out.append(vv)
        return out

    @staticmethod
    def _freshness_score(updated_at: Optional[str]) -> float:
        if not updated_at:
            return 0.3
        try:
            from datetime import datetime, timezone

            ts = datetime.fromisoformat(str(updated_at).replace("Z", "+00:00"))
            if ts.tzinfo is None:
                ts = ts.replace(tzinfo=timezone.utc)
            days = max(0.0, (datetime.now(timezone.utc) - ts).total_seconds() / 86400.0)
            if days <= 1:
                return 1.0
            if days <= 7:
                return 0.9
            if days <= 30:
                return 0.75
            if days <= 90:
                return 0.6
            return 0.4
        except Exception:
            return 0.3

    def _cluster_results(self, results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        """Deduplicate repeated hits by file + reason and keep strongest evidence."""
        buckets: Dict[Tuple[str, str], Dict[str, Any]] = {}
        for r in results:
            path = str(r.get("path") or "")
            reason = str(r.get("reason") or "")
            key = (path, reason)
            existing = buckets.get(key)
            if not existing:
                buckets[key] = {**r, "hit_count": 1}
                continue
            existing["hit_count"] = int(existing.get("hit_count", 1)) + 1
            if int(r.get("score", 0)) > int(existing.get("score", 0)):
                existing.update({k: v for k, v in r.items() if k != "hit_count"})
                existing["hit_count"] = int(existing.get("hit_count", 1))
        clustered = list(buckets.values())
        clustered.sort(key=lambda r: (-int(r.get("score", 0)), str(r.get("path") or "")))
        return clustered

    def _resolve_path(self, path: str) -> Tuple[str, Optional[str]]:
        normalized = (path or "").strip()
        if not normalized:
            return "", None
        if normalized == "/env/summary":
            return "virtual_summary", None
        if normalized in self.VIRTUAL_MAP:
            return "file", self.VIRTUAL_MAP[normalized]
        if ".." in normalized or "\\" in normalized:
            return "", None
        if normalized.startswith("/"):
            candidate = normalized.rsplit("/", 1)[-1]
        else:
            candidate = normalized
        if "/" in candidate:
            return "", None
        allowed = AgentFlatContextStore.ALLOWED_CONTEXT_FILES - {AgentFlatContextStore.MANIFEST_FILENAME}
        if candidate not in allowed:
            return "", None
        return "file", candidate

    def list_context(self) -> Dict[str, Any]:
        """List available context files (ls-equivalent)."""
        docs = self._manifest_docs()
        items = []
        for d in docs:
            if not isinstance(d, dict):
                continue
            items.append(
                {
                    "path": d.get("path"),
                    "type": d.get("type"),
                    "updated_at": d.get("updated_at"),
                    "size_bytes": d.get("size_bytes", 0),
                }
            )
        items.sort(key=lambda x: str(x.get("path") or ""))
        result = {
            "workspace_hint": "Use this list to see which onboarding steps are complete.",
            "tip": "Use `search_context` to find specific keywords across all steps.",
            "virtual_paths": ["/env/summary", *sorted(self.VIRTUAL_MAP.keys())],
            "files": items,
            "collaboration": {
                "scratchpad_dir": str(self._scratchpad_dir()),
                "activity_log": "scratchpad/activity_log.jsonl",
            },
        }
        logger.info(f"[vfs_audit] user={self.store.safe_user_id} action=list_context files={len(items)}")
        return result

    @staticmethod
    def _flatten_strings(data: Any, limit: int = 2000) -> str:
        pieces: List[str] = []

        def walk(v: Any) -> None:
            if len(pieces) >= limit:
                return
            if isinstance(v, dict):
                for key, value in v.items():
                    pieces.append(str(key))
                    walk(value)
            elif isinstance(v, list):
                for item in v:
                    walk(item)
            elif isinstance(v, (str, int, float, bool)):
                pieces.append(str(v))

        walk(data)
        return " ".join(pieces)

    @staticmethod
    def _extract_search_fields(doc: Dict[str, Any]) -> Tuple[List[str], Dict[str, Any], str]:
        summary = doc.get("agent_summary") if isinstance(doc.get("agent_summary"), dict) else {}
        hints = summary.get("retrieval_hints") if isinstance(summary.get("retrieval_hints"), dict) else {}
        quick_facts = summary.get("quick_facts") if isinstance(summary.get("quick_facts"), dict) else {}
        high_terms = hints.get("high_signal_terms") if isinstance(hints.get("high_signal_terms"), list) else []
        body = AgentContextVFS._flatten_strings(doc.get("data") if isinstance(doc.get("data"), dict) else {})
        return [str(t).lower() for t in high_terms], quick_facts, body.lower()

    def search_context(self, query: str, *, limit: int = 10, path_glob: Optional[str] = None) -> Dict[str, Any]:
        """Smart grep with coarse-to-fine ranking and parallel stream scans."""
        normalized = (query or "").strip()
        if not normalized:
            return {"query": query, "results": []}
        self.store._audit_event("vfs_search", normalized, "started")
        try:
            variants = self._query_variants(normalized)
            attempted_queries: List[str] = []
            scored: List[Dict[str, Any]] = []

            for candidate_query in variants:
                attempted_queries.append(candidate_query)
                needle = candidate_query.lower()

                # Pass 1: summary-first ranking (high relevance)
                docs = self._manifest_docs()
                variant_scored: List[Dict[str, Any]] = []
                for item in docs:
                    if not isinstance(item, dict):
                        continue
                    path = str(item.get("path") or "")
                    if not path:
                        continue
                    if path_glob and not fnmatch(path, path_glob):
                        continue
                    doc = self.store.load_context_document(path) or {}
                    high_terms, quick_facts, _ = self._extract_search_fields(doc)

                    high_match = any(needle in term for term in high_terms)
                    quick_match = any(needle in str(v).lower() for v in quick_facts.values()) if isinstance(quick_facts, dict) else False
                    if not (high_match or quick_match):
                        continue

                    score = 100 if high_match else 80
                    reason = "matched high_signal_terms" if high_match else "matched quick_facts"
                    variant_scored.append(
                        {
                            "path": path,
                            "line": None,
                            "snippet": f"{reason}: {candidate_query}"[:100],
                            "type": item.get("type"),
                            "updated_at": item.get("updated_at"),
                            "relevance": "High Relevance",
                            "reason": reason,
                            "score": score,
                        }
                    )

                # Pass 2: parallelized stream scan over allowlisted workspace files.
                allowlisted = self._allowlisted_workspace_files()
                body_matches: List[Dict[str, Any]] = []
                if allowlisted:
                    with ThreadPoolExecutor(max_workers=min(8, max(1, len(allowlisted)))) as pool:
                        future_map = {}
                        for p in allowlisted:
                            path_label = p.name
                            if path_glob and not fnmatch(path_label, path_glob):
                                continue
                            future = pool.submit(self.grep_engine.stream_file, p, candidate_query, path_label=path_label)
                            future_map[future] = path_label

                        for future in as_completed(future_map):
                            try:
                                body_matches.extend(future.result() or [])
                            except Exception:
                                continue

                variant_scored.extend(body_matches)
                if variant_scored:
                    scored = variant_scored
                    break

            scored = self._cluster_results(scored)

            # Add confidence based on score + freshness + hit density.
            for r in scored:
                base = min(1.0, max(0.0, float(r.get("score", 0)) / 100.0))
                freshness = self._freshness_score(r.get("updated_at"))
                density = min(1.0, 0.2 + (int(r.get("hit_count", 1)) * 0.1))
                confidence = round((base * 0.6) + (freshness * 0.25) + (density * 0.15), 3)
                r["confidence"] = confidence

            scored.sort(key=lambda r: (-int(r.get("score", 0)), str(r.get("path") or "")))
            matched_files = sorted({str(r.get("path") or "") for r in scored if r.get("path")})
            capped_results = scored[: max(1, limit)]
            notice = None
            if len(matched_files) > 10:
                notice = f"Found {len(matched_files)} matches. Showing top 10. Use a more specific keyword to narrow down."
                capped_results = scored[:10]

            # Token/length budgeting (~2000 tokens ~= ~8000 chars).
            budget_chars = 8000
            bounded_results = []
            used = 0
            for r in capped_results:
                snippet = str(r.get("snippet") or "")
                cost = len(snippet) + 120  # account for metadata fields
                if bounded_results and used + cost > budget_chars:
                    break
                bounded_results.append(r)
                used += cost

            result = {
                "query": normalized,
                "attempted_queries": attempted_queries,
                "matched_files_count": len(matched_files),
                "results": bounded_results,
                "notice": notice,
                "char_budget_used": used,
                "can_answer": bool(bounded_results),
            }
            logger.info(
                f"[vfs_audit] user={self.store.safe_user_id} action=search_context query={normalized!r} results={len(result['results'])}"
            )
            self.store._audit_event("vfs_search", normalized, f"success_{len(result['results'])}_hits")
            return result
        except Exception as exc:
            self.store._audit_event("vfs_search", normalized, f"failed_{exc.__class__.__name__}")
            return {"query": normalized, "matched_files_count": 0, "results": [], "notice": "Search failed.", "can_answer": False}

    @staticmethod
    def _strip_technical_metadata(doc: Dict[str, Any]) -> Dict[str, Any]:
        sanitized = {
            "context_type": doc.get("context_type"),
            "updated_at": doc.get("updated_at"),
            "journey": ((doc.get("document_context") or {}).get("journey") or {}) if isinstance(doc.get("document_context"), dict) else {},
            "agent_summary": doc.get("agent_summary") if isinstance(doc.get("agent_summary"), dict) else {},
            "data": doc.get("data") if isinstance(doc.get("data"), dict) else {},
        }
        return sanitized

    def inspect_file(self, path: str, *, key: Optional[str] = None, small_file_bytes: int = 5 * 1024) -> Dict[str, Any]:
        """Smart reader (cat/head equivalent) with summary-first behavior."""
        kind, resolved = self._resolve_path(path)
        if kind == "virtual_summary":
            result = {
                "path": "/env/summary",
                "mode": "summary",
                "data": self.store.generate_total_summary(),
            }
            logger.info(f"[vfs_audit] user={self.store.safe_user_id} action=read_context_file path=/env/summary mode=summary")
            return result

        if not resolved:
            logger.info(f"[vfs_audit] user={self.store.safe_user_id} action=read_context_file path={path!r} status=rejected")
            return {"error": "File not found", "path": path}

        # JSON context doc path
        doc = self.store.load_context_document(resolved)
        if doc:
            view = self._strip_technical_metadata(doc)
            data = view.get("data") if isinstance(view.get("data"), dict) else {}
            raw_size = self.store.estimate_size_bytes(view)

            if key:
                if key in data:
                    result = {
                        "path": resolved,
                        "mode": "key",
                        "key": key,
                        "agent_summary": view.get("agent_summary"),
                        "data": data.get(key),
                    }
                    logger.info(f"[vfs_audit] user={self.store.safe_user_id} action=inspect_file path={resolved} mode=key")
                    return result
                logger.info(
                    f"[vfs_audit] user={self.store.safe_user_id} action=inspect_file path={resolved} mode=key_missing key={key}"
                )
                return {
                    "path": resolved,
                    "mode": "key_missing",
                    "key": key,
                    "available_keys": sorted(list(data.keys())),
                    "message": "Requested key not found. Choose one of available_keys.",
                }

            if raw_size <= small_file_bytes:
                result = {
                    "path": resolved,
                    "mode": "full",
                    "data": view,
                }
                logger.info(f"[vfs_audit] user={self.store.safe_user_id} action=inspect_file path={resolved} mode=full")
                return result

            result = {
                "path": resolved,
                "mode": "summary_plus_keys",
                "size_bytes": raw_size,
                "agent_summary": view.get("agent_summary"),
                "keys": sorted(list(data.keys())),
                "message": "File is large. Re-run with key to inspect a specific section.",
            }
            logger.info(f"[vfs_audit] user={self.store.safe_user_id} action=inspect_file path={resolved} mode=summary_plus_keys")
            return result

        logger.info(f"[vfs_audit] user={self.store.safe_user_id} action=inspect_file path={resolved} status=not_found")
        return {"error": "File not found", "path": path, "resolved": resolved}

    def read_context_file(self, path: str, *, subkey: Optional[str] = None) -> Dict[str, Any]:
        """Backward-compatible alias for inspect_file."""
        return self.inspect_file(path, key=subkey)

    def write_context_file(self, *_args: Any, **_kwargs: Any) -> None:
        """Disallow writes from the agent-facing VFS."""
        raise OSError("EROFS: read-only file system")

    # Backward-compat function name requested in design docs.
    inspect = inspect_file

    def write_shared_note(self, note: str, *, agent_id: str = "agent", filename: str = "collaboration.md") -> Dict[str, Any]:
        """Append a shared project note with advisory locking in scratchpad."""
        safe_name = Path(filename).name
        if safe_name != filename or ".." in filename or "/" in filename or "\\" in filename:
            self.store._audit_event("write_shared_note", filename, "rejected_filename")
            return {"ok": False, "error": "Invalid filename"}

        scratch = self._scratchpad_dir()
        target = (scratch / safe_name).resolve()
        if scratch.resolve() not in target.parents:
            self.store._audit_event("write_shared_note", filename, "rejected_path")
            return {"ok": False, "error": "Unsafe path"}

        lock_path = scratch / f".{safe_name}.lock"
        ts = datetime.now(timezone.utc).isoformat()
        header = f"\n## {ts} | {self._safe_slug(agent_id, 'agent')}\n"
        payload = header + str(note).rstrip() + "\n"

        try:
            with open(lock_path, "w", encoding="utf-8") as lf:
                fcntl.flock(lf.fileno(), fcntl.LOCK_EX)
                with open(target, "a", encoding="utf-8") as tf:
                    tf.write(payload)
                    tf.flush()
                    os.fsync(tf.fileno())
                os.chmod(target, 0o600)
                fcntl.flock(lf.fileno(), fcntl.LOCK_UN)
            self.store._audit_event("write_shared_note", safe_name, "success")
            self.append_activity_log(
                event_type="shared_note_written",
                actor=agent_id,
                details={"file": safe_name, "bytes": len(payload)},
            )
            return {"ok": True, "file": safe_name, "bytes_written": len(payload)}
        except Exception as exc:
            self.store._audit_event("write_shared_note", safe_name, f"failed_{exc.__class__.__name__}")
            return {"ok": False, "error": str(exc)}

    def append_activity_log(self, *, event_type: str, actor: str, details: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        """Write append-only project activity log entry in JSONL format."""
        scratch = self._scratchpad_dir()
        target = (scratch / "activity_log.jsonl").resolve()
        lock_path = scratch / ".activity_log.jsonl.lock"
        entry = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "event_type": str(event_type),
            "actor": self._safe_slug(actor, "agent"),
            "project_id": self._safe_slug(self.project_id, "none") if self.project_id else None,
            "details": details or {},
        }
        line = json.dumps(entry, ensure_ascii=False) + "\n"
        try:
            with open(lock_path, "w", encoding="utf-8") as lf:
                fcntl.flock(lf.fileno(), fcntl.LOCK_EX)
                with open(target, "a", encoding="utf-8") as tf:
                    tf.write(line)
                    tf.flush()
                    os.fsync(tf.fileno())
                os.chmod(target, 0o600)
                fcntl.flock(lf.fileno(), fcntl.LOCK_UN)
            return {"ok": True}
        except Exception as exc:
            logger.warning(f"Failed to append activity log: {exc}")
            return {"ok": False, "error": str(exc)}


def build_filesystem_header(user_id: str) -> str:
    """Generate compact prompt header with available files and priority hints."""
    try:
        store = AgentFlatContextStore(user_id)
        manifest = store.load_context_manifest() or {"documents": []}
        docs = manifest.get("documents") if isinstance(manifest.get("documents"), list) else []
        available = [str(d.get("path")) for d in docs if isinstance(d, dict) and d.get("path")]
        files = ", ".join(sorted(available)) if available else "none"
        return (
            "Workspace Context: You have access to a local flat-file store. "
            f"Available Files: {files}. "
            "Instructions: For style guidelines, prioritize step4_persona_data.json. "
            "For technical site data, prioritize step2_website_analysis.json."
        )
    except Exception as exc:
        logger.warning(f"Failed to build filesystem header for user {user_id}: {exc}")
        return "Workspace Context: local flat-file store unavailable."