opencode-skill/skills/seo-analyzers/scripts/content_quality_scorer.py

#!/usr/bin/env python3
"""
Content Quality Scorer

Calculate overall content quality score (0-100) with Thai language support.
Analyzes keyword optimization, readability, structure, and brand voice alignment.
"""

import argparse
import json
import os
from typing import Dict, List, Optional
from pathlib import Path

# Import analyzers
try:
    from thai_keyword_analyzer import ThaiKeywordAnalyzer
    from thai_readability import ThaiReadabilityAnalyzer
except ImportError:
    import sys
    sys.path.insert(0, os.path.dirname(__file__))
    from thai_keyword_analyzer import ThaiKeywordAnalyzer
    from thai_readability import ThaiReadabilityAnalyzer


class ContentQualityScorer:
    """Calculate overall content quality score (0-100)"""

    def __init__(self, brand_voice: Optional[Dict] = None):
        self.keyword_analyzer = ThaiKeywordAnalyzer()
        self.readability_analyzer = ThaiReadabilityAnalyzer()
        self.brand_voice = brand_voice or {}

    def score_keyword_optimization(self, text: str, keyword: str) -> float:
        """Score keyword optimization (0-25 points)"""
        analysis = self.keyword_analyzer.analyze(text, keyword)
        density = analysis['density']
        placements = analysis['critical_placements']

        score = 0

        # Density score (10 points)
        if 1.0 <= density <= 1.5:
            score += 10
        elif 0.5 <= density < 1.0 or 1.5 < density <= 2.0:
            score += 5

        # Critical placements (15 points)
        if placements['in_first_100_words']:
            score += 5
        if placements['in_h1']:
            score += 5
        if placements['in_conclusion']:
            score += 5

        return score

    def score_readability(self, text: str) -> float:
        """Score readability (0-25 points)"""
        analysis = self.readability_analyzer.analyze(text)

        score = 0

        # Sentence length (10 points)
        avg_len = analysis['avg_sentence_length']
        if 15 <= avg_len <= 25:
            score += 10
        elif 10 <= avg_len < 15 or 25 < avg_len <= 30:
            score += 6

        # Grade level (10 points)
        grade = analysis['grade_level']['thai']
        if "ม.10" in grade or "ม.12" in grade or "ปานกลาง" in grade:
            score += 10
        elif "ม.6" in grade or "ม.9" in grade or "ง่าย" in grade:
            score += 8

        # Paragraph structure (5 points)
        para = analysis['paragraph_structure']
        if para['paragraph_count'] >= 5 and para['avg_length_words'] < 200:
            score += 5
        elif para['paragraph_count'] >= 3:
            score += 3

        return score

    def score_structure(self, text: str) -> float:
        """Score content structure (0-25 points)"""
        score = 0

        # Check for headings
        lines = text.split('\n')
        h1_count = sum(1 for line in lines if line.startswith('# '))
        h2_count = sum(1 for line in lines if line.startswith('## '))
        h3_count = sum(1 for line in lines if line.startswith('### '))

        # H1 (5 points)
        if h1_count == 1:
            score += 5

        # H2 sections (10 points)
        if 4 <= h2_count <= 7:
            score += 10
        elif 2 <= h2_count < 4 or 7 < h2_count <= 10:
            score += 6

        # H3 subsections (5 points)
        if h3_count >= 2:
            score += 5

        # Word count (5 points)
        word_count = self.keyword_analyzer.count_words(text)
        if 1500 <= word_count <= 3000:
            score += 5
        elif 1000 <= word_count < 1500 or 3000 < word_count <= 4000:
            score += 3

        return score

    def score_brand_voice(self, text: str) -> float:
        """Score brand voice alignment (0-25 points)"""
        if not self.brand_voice:
            return 20  # Default score if no brand voice defined

        score = 0

        # Check formality level
        formality = self.readability_analyzer.detect_formality(text)
        target_formality = self.brand_voice.get('formality', 'ปกติ')

        if target_formality == formality['level']:
            score += 15
        elif abs(formality['score'] - 50) < 20:
            score += 10

        # Check for banned terms
        banned_terms = self.brand_voice.get('avoid_terms', [])
        if not any(term in text for term in banned_terms):
            score += 10

        return min(score, 25)

    def calculate_overall_score(self, text: str, keyword: str) -> Dict:
        """Calculate overall quality score (0-100)"""
        scores = {
            'keyword_optimization': self.score_keyword_optimization(text, keyword),
            'readability': self.score_readability(text),
            'structure': self.score_structure(text),
            'brand_voice': self.score_brand_voice(text)
        }

        total = sum(scores.values())

        # Determine status
        if total >= 90:
            status = "excellent"
            action = "Publish immediately"
        elif total >= 80:
            status = "good"
            action = "Minor tweaks, publishable"
        elif total >= 70:
            status = "fair"
            action = "Address priority fixes"
        else:
            status = "needs_work"
            action = "Significant improvements required"

        # Generate recommendations
        recommendations = self._generate_recommendations(scores, text, keyword)

        return {
            'overall_score': round(total, 1),
            'categories': scores,
            'status': status,
            'action': action,
            'publishing_readiness': total >= 70,
            'recommendations': recommendations
        }

    def _generate_recommendations(self, scores: Dict, text: str, keyword: str) -> List[str]:
        """Generate recommendations based on scores"""
        recs = []

        # Keyword optimization
        if scores['keyword_optimization'] < 20:
            keyword_analysis = self.keyword_analyzer.analyze(text, keyword)
            if keyword_analysis['density'] < 1.0:
                recs.append(f"เพิ่มการใช้คำหลัก '{keyword}' (ปัจจุบัน: {keyword_analysis['density']}%)")
            if not keyword_analysis['critical_placements']['in_h1']:
                recs.append("เพิ่มคำหลักในหัวข้อหลัก (H1)")

        # Readability
        if scores['readability'] < 18:
            recs.append("ปรับปรุงการอ่านให้ง่ายขึ้น (ประโยคสั้นลง, ย่อหน้ามากขึ้น)")

        # Structure
        if scores['structure'] < 18:
            recs.append("ปรับปรุงโครงสร้าง (เพิ่ม H2, H3, จัดความยาวเนื้อหา)")

        # Brand voice
        if scores['brand_voice'] < 18:
            recs.append("ปรับ brand voice ให้ตรงกับคู่มือมากขึ้น")

        return recs


def load_context(context_path: str) -> Optional[Dict]:
    """Load context files from project"""
    brand_voice_file = os.path.join(context_path, 'brand-voice.md')

    if not os.path.exists(brand_voice_file):
        return None

    # Parse brand voice (simplified)
    with open(brand_voice_file, 'r', encoding='utf-8') as f:
        content = f.read()

    # Extract formality level (simplified parsing)
    formality = 'ปกติ'
    if 'กันเอง' in content:
        formality = 'กันเอง'
    elif 'เป็นทางการ' in content:
        formality = 'เป็นทางการ'

    return {
        'formality': formality,
        'avoid_terms': []
    }


def main():
    """Main entry point"""
    parser = argparse.ArgumentParser(
        description='Calculate content quality score (0-100)'
    )

    parser.add_argument(
        '--text', '-t',
        help='Text content to analyze'
    )

    parser.add_argument(
        '--file', '-f',
        help='File path to analyze'
    )

    parser.add_argument(
        '--keyword', '-k',
        required=True,
        help='Target keyword'
    )

    parser.add_argument(
        '--context', '-c',
        help='Path to context folder (optional)'
    )

    parser.add_argument(
        '--output', '-o',
        choices=['json', 'text'],
        default='text',
        help='Output format (default: text)'
    )

    args = parser.parse_args()

    # Load text
    if args.file:
        with open(args.file, 'r', encoding='utf-8') as f:
            text = f.read()
    elif args.text:
        text = args.text
    else:
        print("Error: Must provide --text or --file")
        sys.exit(1)

    # Load context if provided
    brand_voice = None
    if args.context and os.path.exists(args.context):
        brand_voice = load_context(args.context)

    # Calculate score
    scorer = ContentQualityScorer(brand_voice)
    result = scorer.calculate_overall_score(text, args.keyword)

    # Output
    if args.output == 'json':
        print(json.dumps(result, indent=2, ensure_ascii=False))
    else:
        print("\n⭐ Content Quality Score\n")
        print(f"Overall Score: {result['overall_score']}/100")
        print(f"Status: {result['status']}")
        print(f"Action: {result['action']}")
        print(f"\nCategory Scores:")
        print(f"  • Keyword Optimization: {result['categories']['keyword_optimization']}/25")
        print(f"  • Readability: {result['categories']['readability']}/25")
        print(f"  • Structure: {result['categories']['structure']}/25")
        print(f"  • Brand Voice: {result['categories']['brand_voice']}/25")

        if result['recommendations']:
            print(f"\n💡 Priority Recommendations:")
            for rec in result['recommendations']:
                print(f"  • {rec}")

        print()


if __name__ == '__main__':
    main()