opencode-skill/skills/design-system/scripts/slide_search_core.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Slide Search Core - BM25 search engine for slide design databases
"""

import csv
import re
from pathlib import Path
from math import log
from collections import defaultdict

# ============ CONFIGURATION ============
DATA_DIR = Path(__file__).parent.parent / "data"
MAX_RESULTS = 3

CSV_CONFIG = {
    "strategy": {
        "file": "slide-strategies.csv",
        "search_cols": ["strategy_name", "keywords", "goal", "audience", "narrative_arc"],
        "output_cols": ["strategy_name", "keywords", "slide_count", "structure", "goal", "audience", "tone", "narrative_arc", "sources"]
    },
    "layout": {
        "file": "slide-layouts.csv",
        "search_cols": ["layout_name", "keywords", "use_case", "recommended_for"],
        "output_cols": ["layout_name", "keywords", "use_case", "content_zones", "visual_weight", "cta_placement", "recommended_for", "avoid_for", "css_structure"]
    },
    "copy": {
        "file": "slide-copy.csv",
        "search_cols": ["formula_name", "keywords", "use_case", "emotion_trigger", "slide_type"],
        "output_cols": ["formula_name", "keywords", "components", "use_case", "example_template", "emotion_trigger", "slide_type", "source"]
    },
    "chart": {
        "file": "slide-charts.csv",
        "search_cols": ["chart_type", "keywords", "best_for", "when_to_use", "slide_context"],
        "output_cols": ["chart_type", "keywords", "best_for", "data_type", "when_to_use", "when_to_avoid", "max_categories", "slide_context", "css_implementation", "accessibility_notes"]
    }
}

AVAILABLE_DOMAINS = list(CSV_CONFIG.keys())


# ============ BM25 IMPLEMENTATION ============
class BM25:
    """BM25 ranking algorithm for text search"""

    def __init__(self, k1=1.5, b=0.75):
        self.k1 = k1
        self.b = b
        self.corpus = []
        self.doc_lengths = []
        self.avgdl = 0
        self.idf = {}
        self.doc_freqs = defaultdict(int)
        self.N = 0

    def tokenize(self, text):
        """Lowercase, split, remove punctuation, filter short words"""
        text = re.sub(r'[^\w\s]', ' ', str(text).lower())
        return [w for w in text.split() if len(w) > 2]

    def fit(self, documents):
        """Build BM25 index from documents"""
        self.corpus = [self.tokenize(doc) for doc in documents]
        self.N = len(self.corpus)
        if self.N == 0:
            return
        self.doc_lengths = [len(doc) for doc in self.corpus]
        self.avgdl = sum(self.doc_lengths) / self.N

        for doc in self.corpus:
            seen = set()
            for word in doc:
                if word not in seen:
                    self.doc_freqs[word] += 1
                    seen.add(word)

        for word, freq in self.doc_freqs.items():
            self.idf[word] = log((self.N - freq + 0.5) / (freq + 0.5) + 1)

    def score(self, query):
        """Score all documents against query"""
        query_tokens = self.tokenize(query)
        scores = []

        for idx, doc in enumerate(self.corpus):
            score = 0
            doc_len = self.doc_lengths[idx]
            term_freqs = defaultdict(int)
            for word in doc:
                term_freqs[word] += 1

            for token in query_tokens:
                if token in self.idf:
                    tf = term_freqs[token]
                    idf = self.idf[token]
                    numerator = tf * (self.k1 + 1)
                    denominator = tf + self.k1 * (1 - self.b + self.b * doc_len / self.avgdl)
                    score += idf * numerator / denominator

            scores.append((idx, score))

        return sorted(scores, key=lambda x: x[1], reverse=True)


# ============ SEARCH FUNCTIONS ============
def _load_csv(filepath):
    """Load CSV and return list of dicts"""
    with open(filepath, 'r', encoding='utf-8') as f:
        return list(csv.DictReader(f))


def _search_csv(filepath, search_cols, output_cols, query, max_results):
    """Core search function using BM25"""
    if not filepath.exists():
        return []

    data = _load_csv(filepath)

    # Build documents from search columns
    documents = [" ".join(str(row.get(col, "")) for col in search_cols) for row in data]

    # BM25 search
    bm25 = BM25()
    bm25.fit(documents)
    ranked = bm25.score(query)

    # Get top results with score > 0
    results = []
    for idx, score in ranked[:max_results]:
        if score > 0:
            row = data[idx]
            results.append({col: row.get(col, "") for col in output_cols if col in row})

    return results


def detect_domain(query):
    """Auto-detect the most relevant domain from query"""
    query_lower = query.lower()

    domain_keywords = {
        "strategy": ["pitch", "deck", "investor", "yc", "seed", "series", "demo", "sales", "webinar",
                     "conference", "board", "qbr", "all-hands", "duarte", "kawasaki", "structure"],
        "layout": ["slide", "layout", "grid", "column", "title", "hero", "section", "cta",
                   "screenshot", "quote", "timeline", "comparison", "pricing", "team"],
        "copy": ["headline", "copy", "formula", "aida", "pas", "hook", "cta", "benefit",
                 "objection", "proof", "testimonial", "urgency", "scarcity"],
        "chart": ["chart", "graph", "bar", "line", "pie", "funnel", "metrics", "data",
                  "visualization", "kpi", "trend", "comparison", "heatmap", "gauge"]
    }

    scores = {domain: sum(1 for kw in keywords if kw in query_lower) for domain, keywords in domain_keywords.items()}
    best = max(scores, key=scores.get)
    return best if scores[best] > 0 else "strategy"


def search(query, domain=None, max_results=MAX_RESULTS):
    """Main search function with auto-domain detection"""
    if domain is None:
        domain = detect_domain(query)

    config = CSV_CONFIG.get(domain, CSV_CONFIG["strategy"])
    filepath = DATA_DIR / config["file"]

    if not filepath.exists():
        return {"error": f"File not found: {filepath}", "domain": domain}

    results = _search_csv(filepath, config["search_cols"], config["output_cols"], query, max_results)

    return {
        "domain": domain,
        "query": query,
        "file": config["file"],
        "count": len(results),
        "results": results
    }


def search_all(query, max_results=2):
    """Search across all domains for comprehensive results"""
    all_results = {}

    for domain in AVAILABLE_DOMAINS:
        result = search(query, domain, max_results)
        if result.get("count", 0) > 0:
            all_results[domain] = result

    return all_results


# ============ CONTEXTUAL SEARCH (Premium Slide System) ============

# New CSV configurations for decision system
DECISION_CSV_CONFIG = {
    "layout-logic": {
        "file": "slide-layout-logic.csv",
        "key_col": "goal"
    },
    "typography": {
        "file": "slide-typography.csv",
        "key_col": "content_type"
    },
    "color-logic": {
        "file": "slide-color-logic.csv",
        "key_col": "emotion"
    },
    "backgrounds": {
        "file": "slide-backgrounds.csv",
        "key_col": "slide_type"
    }
}


def _load_decision_csv(csv_type):
    """Load a decision CSV and return as dict keyed by primary column."""
    config = DECISION_CSV_CONFIG.get(csv_type)
    if not config:
        return {}

    filepath = DATA_DIR / config["file"]
    if not filepath.exists():
        return {}

    data = _load_csv(filepath)
    return {row[config["key_col"]]: row for row in data if config["key_col"] in row}


def get_layout_for_goal(goal, previous_emotion=None):
    """
    Get layout recommendation based on slide goal.
    Uses slide-layout-logic.csv for decision.
    """
    layouts = _load_decision_csv("layout-logic")
    row = layouts.get(goal, layouts.get("features", {}))

    result = dict(row) if row else {}

    # Apply pattern-breaking logic
    if result.get("break_pattern") == "true" and previous_emotion:
        result["_pattern_break"] = True
        result["_contrast_with"] = previous_emotion

    return result


def get_typography_for_slide(slide_type, has_metrics=False, has_quote=False):
    """
    Get typography recommendation based on slide content.
    Uses slide-typography.csv for decision.
    """
    typography = _load_decision_csv("typography")

    if has_metrics:
        return typography.get("metric-callout", {})
    if has_quote:
        return typography.get("quote-block", {})

    # Map slide types to typography
    type_map = {
        "hero": "hero-statement",
        "hook": "hero-statement",
        "title": "title-only",
        "problem": "subtitle-heavy",
        "agitation": "metric-callout",
        "solution": "subtitle-heavy",
        "features": "feature-grid",
        "proof": "metric-callout",
        "traction": "data-insight",
        "social": "quote-block",
        "testimonial": "testimonial",
        "pricing": "pricing",
        "team": "team",
        "cta": "cta-action",
        "comparison": "comparison",
        "timeline": "timeline",
    }

    content_type = type_map.get(slide_type, "feature-grid")
    return typography.get(content_type, {})


def get_color_for_emotion(emotion):
    """
    Get color treatment based on emotional beat.
    Uses slide-color-logic.csv for decision.
    """
    colors = _load_decision_csv("color-logic")
    return colors.get(emotion, colors.get("clarity", {}))


def get_background_config(slide_type):
    """
    Get background image configuration.
    Uses slide-backgrounds.csv for decision.
    """
    backgrounds = _load_decision_csv("backgrounds")
    return backgrounds.get(slide_type, {})


def should_use_full_bleed(slide_index, total_slides, emotion):
    """
    Determine if slide should use full-bleed background.
    Premium decks use 2-3 full-bleed slides strategically.

    Rules:
    1. Never consecutive full-bleed
    2. One in first third, one in middle, one at end
    3. Reserved for high-emotion beats (hope, urgency, fear)
    """
    high_emotion_beats = ["hope", "urgency", "fear", "curiosity"]

    if emotion not in high_emotion_beats:
        return False

    if total_slides < 3:
        return False

    third = total_slides // 3
    strategic_positions = [1, third, third * 2, total_slides - 1]

    return slide_index in strategic_positions


def calculate_pattern_break(slide_index, total_slides, previous_emotion=None):
    """
    Determine if this slide should break the visual pattern.
    Used for emotional contrast (Duarte Sparkline technique).
    """
    # Pattern breaks at strategic positions
    if total_slides < 5:
        return False

    # Break at 1/3 and 2/3 points
    third = total_slides // 3
    if slide_index in [third, third * 2]:
        return True

    # Break when switching between frustration and hope
    contrasting_emotions = {
        "frustration": ["hope", "relief"],
        "hope": ["frustration", "fear"],
        "fear": ["hope", "relief"],
    }

    if previous_emotion in contrasting_emotions:
        return True

    return False


def search_with_context(query, slide_position=1, total_slides=9, previous_emotion=None):
    """
    Enhanced search that considers deck context.

    Args:
        query: Search query
        slide_position: Current slide index (1-based)
        total_slides: Total slides in deck
        previous_emotion: Emotion of previous slide (for contrast)

    Returns:
        Search results enriched with contextual recommendations
    """
    # Get base results from existing BM25 search
    base_results = search_all(query, max_results=2)

    # Detect likely slide goal from query
    goal = detect_domain(query.lower())
    if "problem" in query.lower():
        goal = "problem"
    elif "solution" in query.lower():
        goal = "solution"
    elif "cta" in query.lower() or "call to action" in query.lower():
        goal = "cta"
    elif "hook" in query.lower() or "title" in query.lower():
        goal = "hook"
    elif "traction" in query.lower() or "metric" in query.lower():
        goal = "traction"

    # Enrich with contextual recommendations
    context = {
        "slide_position": slide_position,
        "total_slides": total_slides,
        "previous_emotion": previous_emotion,
        "inferred_goal": goal,
    }

    # Get layout recommendation
    layout = get_layout_for_goal(goal, previous_emotion)
    if layout:
        context["recommended_layout"] = layout.get("layout_pattern")
        context["layout_direction"] = layout.get("direction")
        context["visual_weight"] = layout.get("visual_weight")
        context["use_background_image"] = layout.get("use_bg_image") == "true"

    # Get typography recommendation
    typography = get_typography_for_slide(goal)
    if typography:
        context["typography"] = {
            "primary_size": typography.get("primary_size"),
            "secondary_size": typography.get("secondary_size"),
            "weight_contrast": typography.get("weight_contrast"),
        }

    # Get color treatment
    emotion = layout.get("emotion", "clarity") if layout else "clarity"
    color = get_color_for_emotion(emotion)
    if color:
        context["color_treatment"] = {
            "background": color.get("background"),
            "text_color": color.get("text_color"),
            "accent_usage": color.get("accent_usage"),
            "card_style": color.get("card_style"),
        }

    # Calculate pattern breaking
    context["should_break_pattern"] = calculate_pattern_break(
        slide_position, total_slides, previous_emotion
    )
    context["should_use_full_bleed"] = should_use_full_bleed(
        slide_position, total_slides, emotion
    )

    # Get background config if needed
    if context.get("use_background_image"):
        bg_config = get_background_config(goal)
        if bg_config:
            context["background"] = {
                "image_category": bg_config.get("image_category"),
                "overlay_style": bg_config.get("overlay_style"),
                "search_keywords": bg_config.get("search_keywords"),
            }

    # Suggested animation classes
    animation_map = {
        "hook": "animate-fade-up",
        "problem": "animate-fade-up",
        "agitation": "animate-count animate-stagger",
        "solution": "animate-scale",
        "features": "animate-stagger",
        "traction": "animate-chart animate-count",
        "proof": "animate-stagger-scale",
        "social": "animate-fade-up",
        "cta": "animate-pulse",
    }
    context["animation_class"] = animation_map.get(goal, "animate-fade-up")

    return {
        "query": query,
        "context": context,
        "base_results": base_results,
    }