Auto-sync from website-creator
This commit is contained in:
309
skills/seo-analyzers/scripts/content_quality_scorer.py
Normal file
309
skills/seo-analyzers/scripts/content_quality_scorer.py
Normal file
@@ -0,0 +1,309 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Content Quality Scorer
|
||||
|
||||
Calculate overall content quality score (0-100) with Thai language support.
|
||||
Analyzes keyword optimization, readability, structure, and brand voice alignment.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
# Import analyzers
|
||||
try:
|
||||
from thai_keyword_analyzer import ThaiKeywordAnalyzer
|
||||
from thai_readability import ThaiReadabilityAnalyzer
|
||||
except ImportError:
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
from thai_keyword_analyzer import ThaiKeywordAnalyzer
|
||||
from thai_readability import ThaiReadabilityAnalyzer
|
||||
|
||||
|
||||
class ContentQualityScorer:
|
||||
"""Calculate overall content quality score (0-100)"""
|
||||
|
||||
def __init__(self, brand_voice: Optional[Dict] = None):
|
||||
self.keyword_analyzer = ThaiKeywordAnalyzer()
|
||||
self.readability_analyzer = ThaiReadabilityAnalyzer()
|
||||
self.brand_voice = brand_voice or {}
|
||||
|
||||
def score_keyword_optimization(self, text: str, keyword: str) -> float:
|
||||
"""Score keyword optimization (0-25 points)"""
|
||||
analysis = self.keyword_analyzer.analyze(text, keyword)
|
||||
density = analysis['density']
|
||||
placements = analysis['critical_placements']
|
||||
|
||||
score = 0
|
||||
|
||||
# Density score (10 points)
|
||||
if 1.0 <= density <= 1.5:
|
||||
score += 10
|
||||
elif 0.5 <= density < 1.0 or 1.5 < density <= 2.0:
|
||||
score += 5
|
||||
|
||||
# Critical placements (15 points)
|
||||
if placements['in_first_100_words']:
|
||||
score += 5
|
||||
if placements['in_h1']:
|
||||
score += 5
|
||||
if placements['in_conclusion']:
|
||||
score += 5
|
||||
|
||||
return score
|
||||
|
||||
def score_readability(self, text: str) -> float:
|
||||
"""Score readability (0-25 points)"""
|
||||
analysis = self.readability_analyzer.analyze(text)
|
||||
|
||||
score = 0
|
||||
|
||||
# Sentence length (10 points)
|
||||
avg_len = analysis['avg_sentence_length']
|
||||
if 15 <= avg_len <= 25:
|
||||
score += 10
|
||||
elif 10 <= avg_len < 15 or 25 < avg_len <= 30:
|
||||
score += 6
|
||||
|
||||
# Grade level (10 points)
|
||||
grade = analysis['grade_level']['thai']
|
||||
if "ม.10" in grade or "ม.12" in grade or "ปานกลาง" in grade:
|
||||
score += 10
|
||||
elif "ม.6" in grade or "ม.9" in grade or "ง่าย" in grade:
|
||||
score += 8
|
||||
|
||||
# Paragraph structure (5 points)
|
||||
para = analysis['paragraph_structure']
|
||||
if para['paragraph_count'] >= 5 and para['avg_length_words'] < 200:
|
||||
score += 5
|
||||
elif para['paragraph_count'] >= 3:
|
||||
score += 3
|
||||
|
||||
return score
|
||||
|
||||
def score_structure(self, text: str) -> float:
|
||||
"""Score content structure (0-25 points)"""
|
||||
score = 0
|
||||
|
||||
# Check for headings
|
||||
lines = text.split('\n')
|
||||
h1_count = sum(1 for line in lines if line.startswith('# '))
|
||||
h2_count = sum(1 for line in lines if line.startswith('## '))
|
||||
h3_count = sum(1 for line in lines if line.startswith('### '))
|
||||
|
||||
# H1 (5 points)
|
||||
if h1_count == 1:
|
||||
score += 5
|
||||
|
||||
# H2 sections (10 points)
|
||||
if 4 <= h2_count <= 7:
|
||||
score += 10
|
||||
elif 2 <= h2_count < 4 or 7 < h2_count <= 10:
|
||||
score += 6
|
||||
|
||||
# H3 subsections (5 points)
|
||||
if h3_count >= 2:
|
||||
score += 5
|
||||
|
||||
# Word count (5 points)
|
||||
word_count = self.keyword_analyzer.count_words(text)
|
||||
if 1500 <= word_count <= 3000:
|
||||
score += 5
|
||||
elif 1000 <= word_count < 1500 or 3000 < word_count <= 4000:
|
||||
score += 3
|
||||
|
||||
return score
|
||||
|
||||
def score_brand_voice(self, text: str) -> float:
|
||||
"""Score brand voice alignment (0-25 points)"""
|
||||
if not self.brand_voice:
|
||||
return 20 # Default score if no brand voice defined
|
||||
|
||||
score = 0
|
||||
|
||||
# Check formality level
|
||||
formality = self.readability_analyzer.detect_formality(text)
|
||||
target_formality = self.brand_voice.get('formality', 'ปกติ')
|
||||
|
||||
if target_formality == formality['level']:
|
||||
score += 15
|
||||
elif abs(formality['score'] - 50) < 20:
|
||||
score += 10
|
||||
|
||||
# Check for banned terms
|
||||
banned_terms = self.brand_voice.get('avoid_terms', [])
|
||||
if not any(term in text for term in banned_terms):
|
||||
score += 10
|
||||
|
||||
return min(score, 25)
|
||||
|
||||
def calculate_overall_score(self, text: str, keyword: str) -> Dict:
|
||||
"""Calculate overall quality score (0-100)"""
|
||||
scores = {
|
||||
'keyword_optimization': self.score_keyword_optimization(text, keyword),
|
||||
'readability': self.score_readability(text),
|
||||
'structure': self.score_structure(text),
|
||||
'brand_voice': self.score_brand_voice(text)
|
||||
}
|
||||
|
||||
total = sum(scores.values())
|
||||
|
||||
# Determine status
|
||||
if total >= 90:
|
||||
status = "excellent"
|
||||
action = "Publish immediately"
|
||||
elif total >= 80:
|
||||
status = "good"
|
||||
action = "Minor tweaks, publishable"
|
||||
elif total >= 70:
|
||||
status = "fair"
|
||||
action = "Address priority fixes"
|
||||
else:
|
||||
status = "needs_work"
|
||||
action = "Significant improvements required"
|
||||
|
||||
# Generate recommendations
|
||||
recommendations = self._generate_recommendations(scores, text, keyword)
|
||||
|
||||
return {
|
||||
'overall_score': round(total, 1),
|
||||
'categories': scores,
|
||||
'status': status,
|
||||
'action': action,
|
||||
'publishing_readiness': total >= 70,
|
||||
'recommendations': recommendations
|
||||
}
|
||||
|
||||
def _generate_recommendations(self, scores: Dict, text: str, keyword: str) -> List[str]:
|
||||
"""Generate recommendations based on scores"""
|
||||
recs = []
|
||||
|
||||
# Keyword optimization
|
||||
if scores['keyword_optimization'] < 20:
|
||||
keyword_analysis = self.keyword_analyzer.analyze(text, keyword)
|
||||
if keyword_analysis['density'] < 1.0:
|
||||
recs.append(f"เพิ่มการใช้คำหลัก '{keyword}' (ปัจจุบัน: {keyword_analysis['density']}%)")
|
||||
if not keyword_analysis['critical_placements']['in_h1']:
|
||||
recs.append("เพิ่มคำหลักในหัวข้อหลัก (H1)")
|
||||
|
||||
# Readability
|
||||
if scores['readability'] < 18:
|
||||
recs.append("ปรับปรุงการอ่านให้ง่ายขึ้น (ประโยคสั้นลง, ย่อหน้ามากขึ้น)")
|
||||
|
||||
# Structure
|
||||
if scores['structure'] < 18:
|
||||
recs.append("ปรับปรุงโครงสร้าง (เพิ่ม H2, H3, จัดความยาวเนื้อหา)")
|
||||
|
||||
# Brand voice
|
||||
if scores['brand_voice'] < 18:
|
||||
recs.append("ปรับ brand voice ให้ตรงกับคู่มือมากขึ้น")
|
||||
|
||||
return recs
|
||||
|
||||
|
||||
def load_context(context_path: str) -> Optional[Dict]:
|
||||
"""Load context files from project"""
|
||||
brand_voice_file = os.path.join(context_path, 'brand-voice.md')
|
||||
|
||||
if not os.path.exists(brand_voice_file):
|
||||
return None
|
||||
|
||||
# Parse brand voice (simplified)
|
||||
with open(brand_voice_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Extract formality level (simplified parsing)
|
||||
formality = 'ปกติ'
|
||||
if 'กันเอง' in content:
|
||||
formality = 'กันเอง'
|
||||
elif 'เป็นทางการ' in content:
|
||||
formality = 'เป็นทางการ'
|
||||
|
||||
return {
|
||||
'formality': formality,
|
||||
'avoid_terms': []
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Calculate content quality score (0-100)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--text', '-t',
|
||||
help='Text content to analyze'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--file', '-f',
|
||||
help='File path to analyze'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--keyword', '-k',
|
||||
required=True,
|
||||
help='Target keyword'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--context', '-c',
|
||||
help='Path to context folder (optional)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output', '-o',
|
||||
choices=['json', 'text'],
|
||||
default='text',
|
||||
help='Output format (default: text)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load text
|
||||
if args.file:
|
||||
with open(args.file, 'r', encoding='utf-8') as f:
|
||||
text = f.read()
|
||||
elif args.text:
|
||||
text = args.text
|
||||
else:
|
||||
print("Error: Must provide --text or --file")
|
||||
sys.exit(1)
|
||||
|
||||
# Load context if provided
|
||||
brand_voice = None
|
||||
if args.context and os.path.exists(args.context):
|
||||
brand_voice = load_context(args.context)
|
||||
|
||||
# Calculate score
|
||||
scorer = ContentQualityScorer(brand_voice)
|
||||
result = scorer.calculate_overall_score(text, args.keyword)
|
||||
|
||||
# Output
|
||||
if args.output == 'json':
|
||||
print(json.dumps(result, indent=2, ensure_ascii=False))
|
||||
else:
|
||||
print("\n⭐ Content Quality Score\n")
|
||||
print(f"Overall Score: {result['overall_score']}/100")
|
||||
print(f"Status: {result['status']}")
|
||||
print(f"Action: {result['action']}")
|
||||
print(f"\nCategory Scores:")
|
||||
print(f" • Keyword Optimization: {result['categories']['keyword_optimization']}/25")
|
||||
print(f" • Readability: {result['categories']['readability']}/25")
|
||||
print(f" • Structure: {result['categories']['structure']}/25")
|
||||
print(f" • Brand Voice: {result['categories']['brand_voice']}/25")
|
||||
|
||||
if result['recommendations']:
|
||||
print(f"\n💡 Priority Recommendations:")
|
||||
for rec in result['recommendations']:
|
||||
print(f" • {rec}")
|
||||
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user