#!/usr/bin/env python3 """ Content Quality Scorer Calculate overall content quality score (0-100) with Thai language support. Analyzes keyword optimization, readability, structure, and brand voice alignment. """ import argparse import json import os from typing import Dict, List, Optional from pathlib import Path # Import analyzers try: from thai_keyword_analyzer import ThaiKeywordAnalyzer from thai_readability import ThaiReadabilityAnalyzer except ImportError: import sys sys.path.insert(0, os.path.dirname(__file__)) from thai_keyword_analyzer import ThaiKeywordAnalyzer from thai_readability import ThaiReadabilityAnalyzer class ContentQualityScorer: """Calculate overall content quality score (0-100)""" def __init__(self, brand_voice: Optional[Dict] = None): self.keyword_analyzer = ThaiKeywordAnalyzer() self.readability_analyzer = ThaiReadabilityAnalyzer() self.brand_voice = brand_voice or {} def score_keyword_optimization(self, text: str, keyword: str) -> float: """Score keyword optimization (0-25 points)""" analysis = self.keyword_analyzer.analyze(text, keyword) density = analysis['density'] placements = analysis['critical_placements'] score = 0 # Density score (10 points) if 1.0 <= density <= 1.5: score += 10 elif 0.5 <= density < 1.0 or 1.5 < density <= 2.0: score += 5 # Critical placements (15 points) if placements['in_first_100_words']: score += 5 if placements['in_h1']: score += 5 if placements['in_conclusion']: score += 5 return score def score_readability(self, text: str) -> float: """Score readability (0-25 points)""" analysis = self.readability_analyzer.analyze(text) score = 0 # Sentence length (10 points) avg_len = analysis['avg_sentence_length'] if 15 <= avg_len <= 25: score += 10 elif 10 <= avg_len < 15 or 25 < avg_len <= 30: score += 6 # Grade level (10 points) grade = analysis['grade_level']['thai'] if "ม.10" in grade or "ม.12" in grade or "ปานกลาง" in grade: score += 10 elif "ม.6" in grade or "ม.9" in grade or "ง่าย" in grade: score += 8 # Paragraph structure (5 points) para = analysis['paragraph_structure'] if para['paragraph_count'] >= 5 and para['avg_length_words'] < 200: score += 5 elif para['paragraph_count'] >= 3: score += 3 return score def score_structure(self, text: str) -> float: """Score content structure (0-25 points)""" score = 0 # Check for headings lines = text.split('\n') h1_count = sum(1 for line in lines if line.startswith('# ')) h2_count = sum(1 for line in lines if line.startswith('## ')) h3_count = sum(1 for line in lines if line.startswith('### ')) # H1 (5 points) if h1_count == 1: score += 5 # H2 sections (10 points) if 4 <= h2_count <= 7: score += 10 elif 2 <= h2_count < 4 or 7 < h2_count <= 10: score += 6 # H3 subsections (5 points) if h3_count >= 2: score += 5 # Word count (5 points) word_count = self.keyword_analyzer.count_words(text) if 1500 <= word_count <= 3000: score += 5 elif 1000 <= word_count < 1500 or 3000 < word_count <= 4000: score += 3 return score def score_brand_voice(self, text: str) -> float: """Score brand voice alignment (0-25 points)""" if not self.brand_voice: return 20 # Default score if no brand voice defined score = 0 # Check formality level formality = self.readability_analyzer.detect_formality(text) target_formality = self.brand_voice.get('formality', 'ปกติ') if target_formality == formality['level']: score += 15 elif abs(formality['score'] - 50) < 20: score += 10 # Check for banned terms banned_terms = self.brand_voice.get('avoid_terms', []) if not any(term in text for term in banned_terms): score += 10 return min(score, 25) def calculate_overall_score(self, text: str, keyword: str) -> Dict: """Calculate overall quality score (0-100)""" scores = { 'keyword_optimization': self.score_keyword_optimization(text, keyword), 'readability': self.score_readability(text), 'structure': self.score_structure(text), 'brand_voice': self.score_brand_voice(text) } total = sum(scores.values()) # Determine status if total >= 90: status = "excellent" action = "Publish immediately" elif total >= 80: status = "good" action = "Minor tweaks, publishable" elif total >= 70: status = "fair" action = "Address priority fixes" else: status = "needs_work" action = "Significant improvements required" # Generate recommendations recommendations = self._generate_recommendations(scores, text, keyword) return { 'overall_score': round(total, 1), 'categories': scores, 'status': status, 'action': action, 'publishing_readiness': total >= 70, 'recommendations': recommendations } def _generate_recommendations(self, scores: Dict, text: str, keyword: str) -> List[str]: """Generate recommendations based on scores""" recs = [] # Keyword optimization if scores['keyword_optimization'] < 20: keyword_analysis = self.keyword_analyzer.analyze(text, keyword) if keyword_analysis['density'] < 1.0: recs.append(f"เพิ่มการใช้คำหลัก '{keyword}' (ปัจจุบัน: {keyword_analysis['density']}%)") if not keyword_analysis['critical_placements']['in_h1']: recs.append("เพิ่มคำหลักในหัวข้อหลัก (H1)") # Readability if scores['readability'] < 18: recs.append("ปรับปรุงการอ่านให้ง่ายขึ้น (ประโยคสั้นลง, ย่อหน้ามากขึ้น)") # Structure if scores['structure'] < 18: recs.append("ปรับปรุงโครงสร้าง (เพิ่ม H2, H3, จัดความยาวเนื้อหา)") # Brand voice if scores['brand_voice'] < 18: recs.append("ปรับ brand voice ให้ตรงกับคู่มือมากขึ้น") return recs def load_context(context_path: str) -> Optional[Dict]: """Load context files from project""" brand_voice_file = os.path.join(context_path, 'brand-voice.md') if not os.path.exists(brand_voice_file): return None # Parse brand voice (simplified) with open(brand_voice_file, 'r', encoding='utf-8') as f: content = f.read() # Extract formality level (simplified parsing) formality = 'ปกติ' if 'กันเอง' in content: formality = 'กันเอง' elif 'เป็นทางการ' in content: formality = 'เป็นทางการ' return { 'formality': formality, 'avoid_terms': [] } def main(): """Main entry point""" parser = argparse.ArgumentParser( description='Calculate content quality score (0-100)' ) parser.add_argument( '--text', '-t', help='Text content to analyze' ) parser.add_argument( '--file', '-f', help='File path to analyze' ) parser.add_argument( '--keyword', '-k', required=True, help='Target keyword' ) parser.add_argument( '--context', '-c', help='Path to context folder (optional)' ) parser.add_argument( '--output', '-o', choices=['json', 'text'], default='text', help='Output format (default: text)' ) args = parser.parse_args() # Load text if args.file: with open(args.file, 'r', encoding='utf-8') as f: text = f.read() elif args.text: text = args.text else: print("Error: Must provide --text or --file") sys.exit(1) # Load context if provided brand_voice = None if args.context and os.path.exists(args.context): brand_voice = load_context(args.context) # Calculate score scorer = ContentQualityScorer(brand_voice) result = scorer.calculate_overall_score(text, args.keyword) # Output if args.output == 'json': print(json.dumps(result, indent=2, ensure_ascii=False)) else: print("\n⭐ Content Quality Score\n") print(f"Overall Score: {result['overall_score']}/100") print(f"Status: {result['status']}") print(f"Action: {result['action']}") print(f"\nCategory Scores:") print(f" • Keyword Optimization: {result['categories']['keyword_optimization']}/25") print(f" • Readability: {result['categories']['readability']}/25") print(f" • Structure: {result['categories']['structure']}/25") print(f" • Brand Voice: {result['categories']['brand_voice']}/25") if result['recommendations']: print(f"\n💡 Priority Recommendations:") for rec in result['recommendations']: print(f" • {rec}") print() if __name__ == '__main__': main()