310 lines
10 KiB
Python
310 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Content Quality Scorer
|
|
|
|
Calculate overall content quality score (0-100) with Thai language support.
|
|
Analyzes keyword optimization, readability, structure, and brand voice alignment.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
from typing import Dict, List, Optional
|
|
from pathlib import Path
|
|
|
|
# Import analyzers
|
|
try:
|
|
from thai_keyword_analyzer import ThaiKeywordAnalyzer
|
|
from thai_readability import ThaiReadabilityAnalyzer
|
|
except ImportError:
|
|
import sys
|
|
sys.path.insert(0, os.path.dirname(__file__))
|
|
from thai_keyword_analyzer import ThaiKeywordAnalyzer
|
|
from thai_readability import ThaiReadabilityAnalyzer
|
|
|
|
|
|
class ContentQualityScorer:
|
|
"""Calculate overall content quality score (0-100)"""
|
|
|
|
def __init__(self, brand_voice: Optional[Dict] = None):
|
|
self.keyword_analyzer = ThaiKeywordAnalyzer()
|
|
self.readability_analyzer = ThaiReadabilityAnalyzer()
|
|
self.brand_voice = brand_voice or {}
|
|
|
|
def score_keyword_optimization(self, text: str, keyword: str) -> float:
|
|
"""Score keyword optimization (0-25 points)"""
|
|
analysis = self.keyword_analyzer.analyze(text, keyword)
|
|
density = analysis['density']
|
|
placements = analysis['critical_placements']
|
|
|
|
score = 0
|
|
|
|
# Density score (10 points)
|
|
if 1.0 <= density <= 1.5:
|
|
score += 10
|
|
elif 0.5 <= density < 1.0 or 1.5 < density <= 2.0:
|
|
score += 5
|
|
|
|
# Critical placements (15 points)
|
|
if placements['in_first_100_words']:
|
|
score += 5
|
|
if placements['in_h1']:
|
|
score += 5
|
|
if placements['in_conclusion']:
|
|
score += 5
|
|
|
|
return score
|
|
|
|
def score_readability(self, text: str) -> float:
|
|
"""Score readability (0-25 points)"""
|
|
analysis = self.readability_analyzer.analyze(text)
|
|
|
|
score = 0
|
|
|
|
# Sentence length (10 points)
|
|
avg_len = analysis['avg_sentence_length']
|
|
if 15 <= avg_len <= 25:
|
|
score += 10
|
|
elif 10 <= avg_len < 15 or 25 < avg_len <= 30:
|
|
score += 6
|
|
|
|
# Grade level (10 points)
|
|
grade = analysis['grade_level']['thai']
|
|
if "ม.10" in grade or "ม.12" in grade or "ปานกลาง" in grade:
|
|
score += 10
|
|
elif "ม.6" in grade or "ม.9" in grade or "ง่าย" in grade:
|
|
score += 8
|
|
|
|
# Paragraph structure (5 points)
|
|
para = analysis['paragraph_structure']
|
|
if para['paragraph_count'] >= 5 and para['avg_length_words'] < 200:
|
|
score += 5
|
|
elif para['paragraph_count'] >= 3:
|
|
score += 3
|
|
|
|
return score
|
|
|
|
def score_structure(self, text: str) -> float:
|
|
"""Score content structure (0-25 points)"""
|
|
score = 0
|
|
|
|
# Check for headings
|
|
lines = text.split('\n')
|
|
h1_count = sum(1 for line in lines if line.startswith('# '))
|
|
h2_count = sum(1 for line in lines if line.startswith('## '))
|
|
h3_count = sum(1 for line in lines if line.startswith('### '))
|
|
|
|
# H1 (5 points)
|
|
if h1_count == 1:
|
|
score += 5
|
|
|
|
# H2 sections (10 points)
|
|
if 4 <= h2_count <= 7:
|
|
score += 10
|
|
elif 2 <= h2_count < 4 or 7 < h2_count <= 10:
|
|
score += 6
|
|
|
|
# H3 subsections (5 points)
|
|
if h3_count >= 2:
|
|
score += 5
|
|
|
|
# Word count (5 points)
|
|
word_count = self.keyword_analyzer.count_words(text)
|
|
if 1500 <= word_count <= 3000:
|
|
score += 5
|
|
elif 1000 <= word_count < 1500 or 3000 < word_count <= 4000:
|
|
score += 3
|
|
|
|
return score
|
|
|
|
def score_brand_voice(self, text: str) -> float:
|
|
"""Score brand voice alignment (0-25 points)"""
|
|
if not self.brand_voice:
|
|
return 20 # Default score if no brand voice defined
|
|
|
|
score = 0
|
|
|
|
# Check formality level
|
|
formality = self.readability_analyzer.detect_formality(text)
|
|
target_formality = self.brand_voice.get('formality', 'ปกติ')
|
|
|
|
if target_formality == formality['level']:
|
|
score += 15
|
|
elif abs(formality['score'] - 50) < 20:
|
|
score += 10
|
|
|
|
# Check for banned terms
|
|
banned_terms = self.brand_voice.get('avoid_terms', [])
|
|
if not any(term in text for term in banned_terms):
|
|
score += 10
|
|
|
|
return min(score, 25)
|
|
|
|
def calculate_overall_score(self, text: str, keyword: str) -> Dict:
|
|
"""Calculate overall quality score (0-100)"""
|
|
scores = {
|
|
'keyword_optimization': self.score_keyword_optimization(text, keyword),
|
|
'readability': self.score_readability(text),
|
|
'structure': self.score_structure(text),
|
|
'brand_voice': self.score_brand_voice(text)
|
|
}
|
|
|
|
total = sum(scores.values())
|
|
|
|
# Determine status
|
|
if total >= 90:
|
|
status = "excellent"
|
|
action = "Publish immediately"
|
|
elif total >= 80:
|
|
status = "good"
|
|
action = "Minor tweaks, publishable"
|
|
elif total >= 70:
|
|
status = "fair"
|
|
action = "Address priority fixes"
|
|
else:
|
|
status = "needs_work"
|
|
action = "Significant improvements required"
|
|
|
|
# Generate recommendations
|
|
recommendations = self._generate_recommendations(scores, text, keyword)
|
|
|
|
return {
|
|
'overall_score': round(total, 1),
|
|
'categories': scores,
|
|
'status': status,
|
|
'action': action,
|
|
'publishing_readiness': total >= 70,
|
|
'recommendations': recommendations
|
|
}
|
|
|
|
def _generate_recommendations(self, scores: Dict, text: str, keyword: str) -> List[str]:
|
|
"""Generate recommendations based on scores"""
|
|
recs = []
|
|
|
|
# Keyword optimization
|
|
if scores['keyword_optimization'] < 20:
|
|
keyword_analysis = self.keyword_analyzer.analyze(text, keyword)
|
|
if keyword_analysis['density'] < 1.0:
|
|
recs.append(f"เพิ่มการใช้คำหลัก '{keyword}' (ปัจจุบัน: {keyword_analysis['density']}%)")
|
|
if not keyword_analysis['critical_placements']['in_h1']:
|
|
recs.append("เพิ่มคำหลักในหัวข้อหลัก (H1)")
|
|
|
|
# Readability
|
|
if scores['readability'] < 18:
|
|
recs.append("ปรับปรุงการอ่านให้ง่ายขึ้น (ประโยคสั้นลง, ย่อหน้ามากขึ้น)")
|
|
|
|
# Structure
|
|
if scores['structure'] < 18:
|
|
recs.append("ปรับปรุงโครงสร้าง (เพิ่ม H2, H3, จัดความยาวเนื้อหา)")
|
|
|
|
# Brand voice
|
|
if scores['brand_voice'] < 18:
|
|
recs.append("ปรับ brand voice ให้ตรงกับคู่มือมากขึ้น")
|
|
|
|
return recs
|
|
|
|
|
|
def load_context(context_path: str) -> Optional[Dict]:
|
|
"""Load context files from project"""
|
|
brand_voice_file = os.path.join(context_path, 'brand-voice.md')
|
|
|
|
if not os.path.exists(brand_voice_file):
|
|
return None
|
|
|
|
# Parse brand voice (simplified)
|
|
with open(brand_voice_file, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
# Extract formality level (simplified parsing)
|
|
formality = 'ปกติ'
|
|
if 'กันเอง' in content:
|
|
formality = 'กันเอง'
|
|
elif 'เป็นทางการ' in content:
|
|
formality = 'เป็นทางการ'
|
|
|
|
return {
|
|
'formality': formality,
|
|
'avoid_terms': []
|
|
}
|
|
|
|
|
|
def main():
|
|
"""Main entry point"""
|
|
parser = argparse.ArgumentParser(
|
|
description='Calculate content quality score (0-100)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--text', '-t',
|
|
help='Text content to analyze'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--file', '-f',
|
|
help='File path to analyze'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--keyword', '-k',
|
|
required=True,
|
|
help='Target keyword'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--context', '-c',
|
|
help='Path to context folder (optional)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--output', '-o',
|
|
choices=['json', 'text'],
|
|
default='text',
|
|
help='Output format (default: text)'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Load text
|
|
if args.file:
|
|
with open(args.file, 'r', encoding='utf-8') as f:
|
|
text = f.read()
|
|
elif args.text:
|
|
text = args.text
|
|
else:
|
|
print("Error: Must provide --text or --file")
|
|
sys.exit(1)
|
|
|
|
# Load context if provided
|
|
brand_voice = None
|
|
if args.context and os.path.exists(args.context):
|
|
brand_voice = load_context(args.context)
|
|
|
|
# Calculate score
|
|
scorer = ContentQualityScorer(brand_voice)
|
|
result = scorer.calculate_overall_score(text, args.keyword)
|
|
|
|
# Output
|
|
if args.output == 'json':
|
|
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
else:
|
|
print("\n⭐ Content Quality Score\n")
|
|
print(f"Overall Score: {result['overall_score']}/100")
|
|
print(f"Status: {result['status']}")
|
|
print(f"Action: {result['action']}")
|
|
print(f"\nCategory Scores:")
|
|
print(f" • Keyword Optimization: {result['categories']['keyword_optimization']}/25")
|
|
print(f" • Readability: {result['categories']['readability']}/25")
|
|
print(f" • Structure: {result['categories']['structure']}/25")
|
|
print(f" • Brand Voice: {result['categories']['brand_voice']}/25")
|
|
|
|
if result['recommendations']:
|
|
print(f"\n💡 Priority Recommendations:")
|
|
for rec in result['recommendations']:
|
|
print(f" • {rec}")
|
|
|
|
print()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|