Files
opencode-skill/skills/seo-analyzers/scripts/content_quality_scorer.py
2026-03-08 23:03:19 +07:00

310 lines
10 KiB
Python

#!/usr/bin/env python3
"""
Content Quality Scorer
Calculate overall content quality score (0-100) with Thai language support.
Analyzes keyword optimization, readability, structure, and brand voice alignment.
"""
import argparse
import json
import os
from typing import Dict, List, Optional
from pathlib import Path
# Import analyzers
try:
from thai_keyword_analyzer import ThaiKeywordAnalyzer
from thai_readability import ThaiReadabilityAnalyzer
except ImportError:
import sys
sys.path.insert(0, os.path.dirname(__file__))
from thai_keyword_analyzer import ThaiKeywordAnalyzer
from thai_readability import ThaiReadabilityAnalyzer
class ContentQualityScorer:
"""Calculate overall content quality score (0-100)"""
def __init__(self, brand_voice: Optional[Dict] = None):
self.keyword_analyzer = ThaiKeywordAnalyzer()
self.readability_analyzer = ThaiReadabilityAnalyzer()
self.brand_voice = brand_voice or {}
def score_keyword_optimization(self, text: str, keyword: str) -> float:
"""Score keyword optimization (0-25 points)"""
analysis = self.keyword_analyzer.analyze(text, keyword)
density = analysis['density']
placements = analysis['critical_placements']
score = 0
# Density score (10 points)
if 1.0 <= density <= 1.5:
score += 10
elif 0.5 <= density < 1.0 or 1.5 < density <= 2.0:
score += 5
# Critical placements (15 points)
if placements['in_first_100_words']:
score += 5
if placements['in_h1']:
score += 5
if placements['in_conclusion']:
score += 5
return score
def score_readability(self, text: str) -> float:
"""Score readability (0-25 points)"""
analysis = self.readability_analyzer.analyze(text)
score = 0
# Sentence length (10 points)
avg_len = analysis['avg_sentence_length']
if 15 <= avg_len <= 25:
score += 10
elif 10 <= avg_len < 15 or 25 < avg_len <= 30:
score += 6
# Grade level (10 points)
grade = analysis['grade_level']['thai']
if "ม.10" in grade or "ม.12" in grade or "ปานกลาง" in grade:
score += 10
elif "ม.6" in grade or "ม.9" in grade or "ง่าย" in grade:
score += 8
# Paragraph structure (5 points)
para = analysis['paragraph_structure']
if para['paragraph_count'] >= 5 and para['avg_length_words'] < 200:
score += 5
elif para['paragraph_count'] >= 3:
score += 3
return score
def score_structure(self, text: str) -> float:
"""Score content structure (0-25 points)"""
score = 0
# Check for headings
lines = text.split('\n')
h1_count = sum(1 for line in lines if line.startswith('# '))
h2_count = sum(1 for line in lines if line.startswith('## '))
h3_count = sum(1 for line in lines if line.startswith('### '))
# H1 (5 points)
if h1_count == 1:
score += 5
# H2 sections (10 points)
if 4 <= h2_count <= 7:
score += 10
elif 2 <= h2_count < 4 or 7 < h2_count <= 10:
score += 6
# H3 subsections (5 points)
if h3_count >= 2:
score += 5
# Word count (5 points)
word_count = self.keyword_analyzer.count_words(text)
if 1500 <= word_count <= 3000:
score += 5
elif 1000 <= word_count < 1500 or 3000 < word_count <= 4000:
score += 3
return score
def score_brand_voice(self, text: str) -> float:
"""Score brand voice alignment (0-25 points)"""
if not self.brand_voice:
return 20 # Default score if no brand voice defined
score = 0
# Check formality level
formality = self.readability_analyzer.detect_formality(text)
target_formality = self.brand_voice.get('formality', 'ปกติ')
if target_formality == formality['level']:
score += 15
elif abs(formality['score'] - 50) < 20:
score += 10
# Check for banned terms
banned_terms = self.brand_voice.get('avoid_terms', [])
if not any(term in text for term in banned_terms):
score += 10
return min(score, 25)
def calculate_overall_score(self, text: str, keyword: str) -> Dict:
"""Calculate overall quality score (0-100)"""
scores = {
'keyword_optimization': self.score_keyword_optimization(text, keyword),
'readability': self.score_readability(text),
'structure': self.score_structure(text),
'brand_voice': self.score_brand_voice(text)
}
total = sum(scores.values())
# Determine status
if total >= 90:
status = "excellent"
action = "Publish immediately"
elif total >= 80:
status = "good"
action = "Minor tweaks, publishable"
elif total >= 70:
status = "fair"
action = "Address priority fixes"
else:
status = "needs_work"
action = "Significant improvements required"
# Generate recommendations
recommendations = self._generate_recommendations(scores, text, keyword)
return {
'overall_score': round(total, 1),
'categories': scores,
'status': status,
'action': action,
'publishing_readiness': total >= 70,
'recommendations': recommendations
}
def _generate_recommendations(self, scores: Dict, text: str, keyword: str) -> List[str]:
"""Generate recommendations based on scores"""
recs = []
# Keyword optimization
if scores['keyword_optimization'] < 20:
keyword_analysis = self.keyword_analyzer.analyze(text, keyword)
if keyword_analysis['density'] < 1.0:
recs.append(f"เพิ่มการใช้คำหลัก '{keyword}' (ปัจจุบัน: {keyword_analysis['density']}%)")
if not keyword_analysis['critical_placements']['in_h1']:
recs.append("เพิ่มคำหลักในหัวข้อหลัก (H1)")
# Readability
if scores['readability'] < 18:
recs.append("ปรับปรุงการอ่านให้ง่ายขึ้น (ประโยคสั้นลง, ย่อหน้ามากขึ้น)")
# Structure
if scores['structure'] < 18:
recs.append("ปรับปรุงโครงสร้าง (เพิ่ม H2, H3, จัดความยาวเนื้อหา)")
# Brand voice
if scores['brand_voice'] < 18:
recs.append("ปรับ brand voice ให้ตรงกับคู่มือมากขึ้น")
return recs
def load_context(context_path: str) -> Optional[Dict]:
"""Load context files from project"""
brand_voice_file = os.path.join(context_path, 'brand-voice.md')
if not os.path.exists(brand_voice_file):
return None
# Parse brand voice (simplified)
with open(brand_voice_file, 'r', encoding='utf-8') as f:
content = f.read()
# Extract formality level (simplified parsing)
formality = 'ปกติ'
if 'กันเอง' in content:
formality = 'กันเอง'
elif 'เป็นทางการ' in content:
formality = 'เป็นทางการ'
return {
'formality': formality,
'avoid_terms': []
}
def main():
"""Main entry point"""
parser = argparse.ArgumentParser(
description='Calculate content quality score (0-100)'
)
parser.add_argument(
'--text', '-t',
help='Text content to analyze'
)
parser.add_argument(
'--file', '-f',
help='File path to analyze'
)
parser.add_argument(
'--keyword', '-k',
required=True,
help='Target keyword'
)
parser.add_argument(
'--context', '-c',
help='Path to context folder (optional)'
)
parser.add_argument(
'--output', '-o',
choices=['json', 'text'],
default='text',
help='Output format (default: text)'
)
args = parser.parse_args()
# Load text
if args.file:
with open(args.file, 'r', encoding='utf-8') as f:
text = f.read()
elif args.text:
text = args.text
else:
print("Error: Must provide --text or --file")
sys.exit(1)
# Load context if provided
brand_voice = None
if args.context and os.path.exists(args.context):
brand_voice = load_context(args.context)
# Calculate score
scorer = ContentQualityScorer(brand_voice)
result = scorer.calculate_overall_score(text, args.keyword)
# Output
if args.output == 'json':
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print("\n⭐ Content Quality Score\n")
print(f"Overall Score: {result['overall_score']}/100")
print(f"Status: {result['status']}")
print(f"Action: {result['action']}")
print(f"\nCategory Scores:")
print(f" • Keyword Optimization: {result['categories']['keyword_optimization']}/25")
print(f" • Readability: {result['categories']['readability']}/25")
print(f" • Structure: {result['categories']['structure']}/25")
print(f" • Brand Voice: {result['categories']['brand_voice']}/25")
if result['recommendations']:
print(f"\n💡 Priority Recommendations:")
for rec in result['recommendations']:
print(f"{rec}")
print()
if __name__ == '__main__':
main()