Update skills: add website-creator, mql-developer, ecommerce-astro

Changes:
- Add FAL_KEY and GEMINI_API_KEY to .env.example
- Update picture-it to use ~/.config/opencode/.env (unified creds)
- Remove shodh-memory skill (no longer used)
- Remove alphaear-* skills (deprecated)
- Remove thai-frontend-dev skill (replaced by website-creator)
- Remove theme-factory skill
- Add mql-developer skill (MQL5 trading)
- Add ecommerce-astro skill (Astro e-commerce)
- Add website-creator skill (Next.js + Payload CMS)
- Update install script for new skills
This commit is contained in:
2026-04-16 17:40:27 +07:00
parent 5053ccdba2
commit b26c8199a5
562 changed files with 59030 additions and 37600 deletions

View File

@@ -0,0 +1,47 @@
---
name: seo-analyzers
description: Analyze content quality for Thai language. Use for keyword density checking, readability scoring, content quality rating (0-100), and AI pattern detection in Thai text.
---
# SEO Analyzers
Analyze Thai content quality — keyword density, readability, quality scoring, AI pattern removal.
## Scripts
| Script | Purpose |
|--------|---------|
| `thai_keyword_analyzer.py` | Keyword density analysis |
| `thai_readability.py` | Readability scoring |
| `content_quality_scorer.py` | Overall quality rating (0-100) |
## Usage
```bash
# Keyword density
python3 ~/.hermes/skills/website-creator/seo-analyzers/scripts/thai_keyword_analyzer.py \
--text "บทความภาษาไทย..." --keyword "คำหลัก" --language th
# Readability
python3 ~/.hermes/skills/website-creator/seo-analyzers/scripts/thai_readability.py \
--text "เนื้อหาภาษาไทย..." --language th
# Quality score
python3 ~/.hermes/skills/website-creator/seo-analyzers/scripts/content_quality_scorer.py \
--file ./article.md --keyword "คำหลัก"
```
## Quality Thresholds
| Score | Status | Action |
|-------|--------|--------|
| 90-100 | Excellent | Publish immediately |
| 80-89 | Good | Minor tweaks |
| 70-79 | Fair | Address priority fixes |
| Below 70 | Needs Work | Significant improvements |
## Thai-Specific
- Target keyword density: **1.0-1.5%** (lower than English)
- Readability: Grade levels ม.6-ม.12
- Formality detection from particles (ครับ/ค่ะ vs นะ/จ้ะ)

View File

@@ -0,0 +1,309 @@
#!/usr/bin/env python3
"""
Content Quality Scorer
Calculate overall content quality score (0-100) with Thai language support.
Analyzes keyword optimization, readability, structure, and brand voice alignment.
"""
import argparse
import json
import os
from typing import Dict, List, Optional
from pathlib import Path
# Import analyzers
try:
from thai_keyword_analyzer import ThaiKeywordAnalyzer
from thai_readability import ThaiReadabilityAnalyzer
except ImportError:
import sys
sys.path.insert(0, os.path.dirname(__file__))
from thai_keyword_analyzer import ThaiKeywordAnalyzer
from thai_readability import ThaiReadabilityAnalyzer
class ContentQualityScorer:
"""Calculate overall content quality score (0-100)"""
def __init__(self, brand_voice: Optional[Dict] = None):
self.keyword_analyzer = ThaiKeywordAnalyzer()
self.readability_analyzer = ThaiReadabilityAnalyzer()
self.brand_voice = brand_voice or {}
def score_keyword_optimization(self, text: str, keyword: str) -> float:
"""Score keyword optimization (0-25 points)"""
analysis = self.keyword_analyzer.analyze(text, keyword)
density = analysis['density']
placements = analysis['critical_placements']
score = 0
# Density score (10 points)
if 1.0 <= density <= 1.5:
score += 10
elif 0.5 <= density < 1.0 or 1.5 < density <= 2.0:
score += 5
# Critical placements (15 points)
if placements['in_first_100_words']:
score += 5
if placements['in_h1']:
score += 5
if placements['in_conclusion']:
score += 5
return score
def score_readability(self, text: str) -> float:
"""Score readability (0-25 points)"""
analysis = self.readability_analyzer.analyze(text)
score = 0
# Sentence length (10 points)
avg_len = analysis['avg_sentence_length']
if 15 <= avg_len <= 25:
score += 10
elif 10 <= avg_len < 15 or 25 < avg_len <= 30:
score += 6
# Grade level (10 points)
grade = analysis['grade_level']['thai']
if "ม.10" in grade or "ม.12" in grade or "ปานกลาง" in grade:
score += 10
elif "ม.6" in grade or "ม.9" in grade or "ง่าย" in grade:
score += 8
# Paragraph structure (5 points)
para = analysis['paragraph_structure']
if para['paragraph_count'] >= 5 and para['avg_length_words'] < 200:
score += 5
elif para['paragraph_count'] >= 3:
score += 3
return score
def score_structure(self, text: str) -> float:
"""Score content structure (0-25 points)"""
score = 0
# Check for headings
lines = text.split('\n')
h1_count = sum(1 for line in lines if line.startswith('# '))
h2_count = sum(1 for line in lines if line.startswith('## '))
h3_count = sum(1 for line in lines if line.startswith('### '))
# H1 (5 points)
if h1_count == 1:
score += 5
# H2 sections (10 points)
if 4 <= h2_count <= 7:
score += 10
elif 2 <= h2_count < 4 or 7 < h2_count <= 10:
score += 6
# H3 subsections (5 points)
if h3_count >= 2:
score += 5
# Word count (5 points)
word_count = self.keyword_analyzer.count_words(text)
if 1500 <= word_count <= 3000:
score += 5
elif 1000 <= word_count < 1500 or 3000 < word_count <= 4000:
score += 3
return score
def score_brand_voice(self, text: str) -> float:
"""Score brand voice alignment (0-25 points)"""
if not self.brand_voice:
return 20 # Default score if no brand voice defined
score = 0
# Check formality level
formality = self.readability_analyzer.detect_formality(text)
target_formality = self.brand_voice.get('formality', 'ปกติ')
if target_formality == formality['level']:
score += 15
elif abs(formality['score'] - 50) < 20:
score += 10
# Check for banned terms
banned_terms = self.brand_voice.get('avoid_terms', [])
if not any(term in text for term in banned_terms):
score += 10
return min(score, 25)
def calculate_overall_score(self, text: str, keyword: str) -> Dict:
"""Calculate overall quality score (0-100)"""
scores = {
'keyword_optimization': self.score_keyword_optimization(text, keyword),
'readability': self.score_readability(text),
'structure': self.score_structure(text),
'brand_voice': self.score_brand_voice(text)
}
total = sum(scores.values())
# Determine status
if total >= 90:
status = "excellent"
action = "Publish immediately"
elif total >= 80:
status = "good"
action = "Minor tweaks, publishable"
elif total >= 70:
status = "fair"
action = "Address priority fixes"
else:
status = "needs_work"
action = "Significant improvements required"
# Generate recommendations
recommendations = self._generate_recommendations(scores, text, keyword)
return {
'overall_score': round(total, 1),
'categories': scores,
'status': status,
'action': action,
'publishing_readiness': total >= 70,
'recommendations': recommendations
}
def _generate_recommendations(self, scores: Dict, text: str, keyword: str) -> List[str]:
"""Generate recommendations based on scores"""
recs = []
# Keyword optimization
if scores['keyword_optimization'] < 20:
keyword_analysis = self.keyword_analyzer.analyze(text, keyword)
if keyword_analysis['density'] < 1.0:
recs.append(f"เพิ่มการใช้คำหลัก '{keyword}' (ปัจจุบัน: {keyword_analysis['density']}%)")
if not keyword_analysis['critical_placements']['in_h1']:
recs.append("เพิ่มคำหลักในหัวข้อหลัก (H1)")
# Readability
if scores['readability'] < 18:
recs.append("ปรับปรุงการอ่านให้ง่ายขึ้น (ประโยคสั้นลง, ย่อหน้ามากขึ้น)")
# Structure
if scores['structure'] < 18:
recs.append("ปรับปรุงโครงสร้าง (เพิ่ม H2, H3, จัดความยาวเนื้อหา)")
# Brand voice
if scores['brand_voice'] < 18:
recs.append("ปรับ brand voice ให้ตรงกับคู่มือมากขึ้น")
return recs
def load_context(context_path: str) -> Optional[Dict]:
"""Load context files from project"""
brand_voice_file = os.path.join(context_path, 'brand-voice.md')
if not os.path.exists(brand_voice_file):
return None
# Parse brand voice (simplified)
with open(brand_voice_file, 'r', encoding='utf-8') as f:
content = f.read()
# Extract formality level (simplified parsing)
formality = 'ปกติ'
if 'กันเอง' in content:
formality = 'กันเอง'
elif 'เป็นทางการ' in content:
formality = 'เป็นทางการ'
return {
'formality': formality,
'avoid_terms': []
}
def main():
"""Main entry point"""
parser = argparse.ArgumentParser(
description='Calculate content quality score (0-100)'
)
parser.add_argument(
'--text', '-t',
help='Text content to analyze'
)
parser.add_argument(
'--file', '-f',
help='File path to analyze'
)
parser.add_argument(
'--keyword', '-k',
required=True,
help='Target keyword'
)
parser.add_argument(
'--context', '-c',
help='Path to context folder (optional)'
)
parser.add_argument(
'--output', '-o',
choices=['json', 'text'],
default='text',
help='Output format (default: text)'
)
args = parser.parse_args()
# Load text
if args.file:
with open(args.file, 'r', encoding='utf-8') as f:
text = f.read()
elif args.text:
text = args.text
else:
print("Error: Must provide --text or --file")
sys.exit(1)
# Load context if provided
brand_voice = None
if args.context and os.path.exists(args.context):
brand_voice = load_context(args.context)
# Calculate score
scorer = ContentQualityScorer(brand_voice)
result = scorer.calculate_overall_score(text, args.keyword)
# Output
if args.output == 'json':
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print("\n⭐ Content Quality Score\n")
print(f"Overall Score: {result['overall_score']}/100")
print(f"Status: {result['status']}")
print(f"Action: {result['action']}")
print(f"\nCategory Scores:")
print(f" • Keyword Optimization: {result['categories']['keyword_optimization']}/25")
print(f" • Readability: {result['categories']['readability']}/25")
print(f" • Structure: {result['categories']['structure']}/25")
print(f" • Brand Voice: {result['categories']['brand_voice']}/25")
if result['recommendations']:
print(f"\n💡 Priority Recommendations:")
for rec in result['recommendations']:
print(f"{rec}")
print()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,11 @@
# SEO Analyzers - Dependencies
# Thai language processing (REQUIRED)
pythainlp>=3.2.0
# Data handling
pandas>=2.1.0
# Utilities
tqdm>=4.66.0
rich>=13.7.0

View File

@@ -0,0 +1,270 @@
#!/usr/bin/env python3
"""
Thai Keyword Analyzer
Analyze keyword density in Thai text with PyThaiNLP integration.
Handles Thai language specifics (no spaces between words).
"""
import argparse
import json
import sys
from typing import Dict, List, Optional
try:
from pythainlp import word_tokenize
from pythainlp.util import normalize
THAI_SUPPORT = True
except ImportError:
THAI_SUPPORT = False
print("Warning: PyThaiNLP not installed. Install with: pip install pythainlp")
class ThaiKeywordAnalyzer:
"""Analyze keyword density in Thai text"""
def __init__(self):
self.thai_stopwords = set([
'และ', 'หรือ', 'แต่', 'ว่า', 'ถ้า', 'หาก', 'ซึ่ง', 'ที่', 'ใน', 'บน',
'ใต้', 'เหนือ', 'จาก', 'ถึง', 'ที่', 'การ', 'ความ', 'อย่าง', 'เมื่อ',
'สำหรับ', 'กับ', 'ของ', 'เป็น', 'อยู่', 'คือ', 'ได้', 'ให้', 'ไป', 'มา'
])
def count_words(self, text: str) -> int:
"""Count Thai words accurately"""
if not THAI_SUPPORT:
return len(text.split())
tokens = word_tokenize(text, engine="newmm")
return len([t for t in tokens if t.strip() and not t.isspace()])
def calculate_density(self, text: str, keyword: str) -> float:
"""Calculate keyword density"""
if not THAI_SUPPORT:
text_words = text.lower().split()
keyword_count = text.lower().count(keyword.lower())
return (keyword_count / len(text_words) * 100) if text_words else 0
text_norm = normalize(text)
keyword_norm = normalize(keyword)
count = text_norm.count(keyword_norm)
word_count = self.count_words(text)
return (count / word_count * 100) if word_count > 0 else 0
def find_positions(self, text: str, keyword: str) -> List[int]:
"""Find all keyword positions"""
positions = []
text_lower = text.lower()
keyword_lower = keyword.lower()
start = 0
while True:
pos = text_lower.find(keyword_lower, start)
if pos == -1:
break
positions.append(pos)
start = pos + 1
return positions
def check_critical_placements(self, text: str, keyword: str) -> Dict:
"""Check keyword in critical locations"""
text_lower = text.lower()
keyword_lower = keyword.lower()
# First 200 chars (approximately first 100 Thai words)
in_first_100_words = keyword_lower in text_lower[:200]
# Check H1 (first line if it starts with #)
lines = text.split('\n')
in_h1 = False
if lines and lines[0].startswith('#'):
in_h1 = keyword_lower in lines[0].lower()
# Last 500 chars (approximately conclusion)
in_conclusion = keyword_lower in text_lower[-500:] if len(text) > 500 else False
# Count H2 occurrences
h2_count = sum(1 for line in lines if line.startswith('##') and keyword_lower in line.lower())
return {
'in_first_100_words': in_first_100_words,
'in_h1': in_h1,
'in_conclusion': in_conclusion,
'in_h2_count': h2_count
}
def detect_stuffing(self, text: str, keyword: str, density: float) -> Dict:
"""Detect keyword stuffing risk"""
risk_level = "none"
warnings = []
if density > 3.0:
risk_level = "high"
warnings.append(f"Keyword density {density:.1f}% is very high (over 3%)")
elif density > 2.5:
risk_level = "medium"
warnings.append(f"Keyword density {density:.1f}% is high (over 2.5%)")
# Check for clustering in paragraphs
paragraphs = text.split('\n\n')
for i, para in enumerate(paragraphs[:10]): # Check first 10 paragraphs
para_density = self.calculate_density(para, keyword)
if para_density > 5.0:
risk_level = "high" if risk_level != "high" else risk_level
warnings.append(f"Paragraph {i+1} has very high density ({para_density:.1f}%)")
return {
'risk_level': risk_level,
'warnings': warnings,
'safe': risk_level in ["none", "low"]
}
def get_density_status(self, density: float, language: str = 'th') -> str:
"""Determine if density is appropriate"""
if language == 'th':
# Thai target: 1.0-1.5%
if density < 0.5:
return "too_low"
elif density < 1.0:
return "slightly_low"
elif density <= 1.5:
return "optimal"
elif density <= 2.0:
return "slightly_high"
else:
return "too_high"
else:
# English target: 1.5-2.0%
if density < 1.0:
return "too_low"
elif density < 1.5:
return "slightly_low"
elif density <= 2.0:
return "optimal"
elif density <= 2.5:
return "slightly_high"
else:
return "too_high"
def get_recommendations(self, density: float, placements: Dict, language: str = 'th') -> List[str]:
"""Generate recommendations"""
recs = []
if language == 'th':
if density < 1.0:
recs.append("เพิ่มการใช้คำหลักในเนื้อหา (target: 1.0-1.5%)")
elif density > 2.0:
recs.append("ลดการใช้คำหลักลง อาจถูกมองว่า keyword stuffing")
if not placements['in_first_100_words']:
recs.append("เพิ่มคำหลักในย่อหน้าแรก (100 คำแรก)")
if not placements['in_h1']:
recs.append("เพิ่มคำหลักในหัวข้อหลัก (H1)")
if not placements['in_conclusion']:
recs.append("เพิ่มคำหลักในบทสรุป")
if placements['in_h2_count'] < 2:
recs.append("เพิ่มคำหลักในหัวข้อรอง (H2) อย่างน้อย 2-3 แห่ง")
else:
if density < 1.5:
recs.append("Increase keyword usage (target: 1.5-2.0%)")
elif density > 2.5:
recs.append("Reduce keyword usage to avoid stuffing penalty")
if not placements['in_first_100_words']:
recs.append("Add keyword in first 100 words")
if not placements['in_h1']:
recs.append("Add keyword in H1 headline")
if not placements['in_conclusion']:
recs.append("Add keyword in conclusion")
return recs
def analyze(self, text: str, keyword: str, language: str = 'th') -> Dict:
"""Full keyword analysis"""
word_count = self.count_words(text)
density = self.calculate_density(text, keyword)
positions = self.find_positions(text, keyword)
placements = self.check_critical_placements(text, keyword)
stuffing = self.detect_stuffing(text, keyword, density)
status = self.get_density_status(density, language)
recommendations = self.get_recommendations(density, placements, language)
return {
'word_count': word_count,
'keyword': keyword,
'occurrences': len(positions),
'density': round(density, 2),
'target_density': '1.0-1.5%' if language == 'th' else '1.5-2.0%',
'status': status,
'critical_placements': placements,
'keyword_stuffing_risk': stuffing['risk_level'],
'recommendations': recommendations
}
def main():
"""Main entry point"""
parser = argparse.ArgumentParser(
description='Analyze keyword density in Thai or English text'
)
parser.add_argument(
'--text', '-t',
required=True,
help='Text content to analyze'
)
parser.add_argument(
'--keyword', '-k',
required=True,
help='Target keyword'
)
parser.add_argument(
'--language', '-l',
choices=['th', 'en'],
default='th',
help='Content language (default: th)'
)
parser.add_argument(
'--output', '-o',
choices=['json', 'text'],
default='text',
help='Output format (default: text)'
)
args = parser.parse_args()
# Analyze
analyzer = ThaiKeywordAnalyzer()
result = analyzer.analyze(args.text, args.keyword, args.language)
# Output
if args.output == 'json':
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print("\n📊 Keyword Analysis Results\n")
print(f"Keyword: {result['keyword']}")
print(f"Word Count: {result['word_count']}")
print(f"Occurrences: {result['occurrences']}")
print(f"Density: {result['density']}% (target: {result['target_density']})")
print(f"Status: {result['status']}")
print(f"\nCritical Placements:")
print(f" ✓ First 100 words: {'Yes' if result['critical_placements']['in_first_100_words'] else 'No'}")
print(f" ✓ H1 Headline: {'Yes' if result['critical_placements']['in_h1'] else 'No'}")
print(f" ✓ Conclusion: {'Yes' if result['critical_placements']['in_conclusion'] else 'No'}")
print(f" ✓ H2 Headings: {result['critical_placements']['in_h2_count']} found")
print(f"\nKeyword Stuffing Risk: {result['keyword_stuffing_risk']}")
if result['recommendations']:
print(f"\n💡 Recommendations:")
for rec in result['recommendations']:
print(f"{rec}")
print()
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,334 @@
#!/usr/bin/env python3
"""
Thai Readability Analyzer
Analyze Thai text readability with PyThaiNLP integration.
Detects formality level, grade level, and sentence structure.
"""
import argparse
import json
import re
from typing import Dict, List
try:
from pythainlp import word_tokenize, sent_tokenize
THAI_SUPPORT = True
except ImportError:
THAI_SUPPORT = False
print("Warning: PyThaiNLP not installed. Install with: pip install pythainlp")
class ThaiReadabilityAnalyzer:
"""Analyze Thai text readability"""
def __init__(self):
self.formal_particles = [
'ครับ', 'ค่ะ', 'ข้าพเจ้า', 'กระผม', 'ดิฉัน', 'ท่าน', 'ซึ่ง', 'อัน',
'ย่อม', 'ย่อมเป็น', 'ประการ', 'ดังกล่าว', 'ดังกล่าวแล้ว', 'ดังนี้'
]
self.informal_particles = [
'นะ', 'จ้ะ', 'อ่ะ', 'มั้ย', 'เปล่าว่ะ', 'gue', 'mang', 'เว้ย',
'วะ', 'เหอะ', 'ซิ', 'นู่น', 'นี่', 'นั่น', 'โครต', 'มาก'
]
def count_sentences(self, text: str) -> int:
"""Count Thai sentences"""
if not THAI_SUPPORT:
# Fallback: count Thai sentence endings
thai_endings = ['.', '!', '?', '', '']
count = sum(text.count(e) for e in thai_endings)
return max(count, 1)
sentences = sent_tokenize(text, engine="whitespace")
return len([s for s in sentences if s.strip()])
def count_words(self, text: str) -> int:
"""Count Thai words"""
if not THAI_SUPPORT:
return len(text.split())
tokens = word_tokenize(text, engine="newmm")
return len([t for t in tokens if t.strip()])
def calculate_avg_sentence_length(self, text: str) -> float:
"""Calculate average sentence length"""
if not THAI_SUPPORT:
sentences = re.split(r'[.!?]', text)
sentences = [s for s in sentences if s.strip()]
if not sentences:
return 0
words = text.split()
return len(words) / len(sentences)
sentences = sent_tokenize(text, engine="whitespace")
sentences = [s for s in sentences if s.strip()]
if not sentences:
return 0
total_words = sum(
len(word_tokenize(s, engine="newmm"))
for s in sentences
)
return total_words / len(sentences)
def detect_formality(self, text: str) -> Dict:
"""Detect Thai formality level"""
formal_count = sum(text.count(p) for p in self.formal_particles)
informal_count = sum(text.count(p) for p in self.informal_particles)
total = formal_count + informal_count
if total == 0:
ratio = 0.5 # Neutral
else:
ratio = formal_count / total
if ratio > 0.6:
level = "เป็นทางการ (Formal)"
score = 80
elif ratio < 0.4:
level = "กันเอง (Casual)"
score = 20
else:
level = "ปกติ (Normal)"
score = 50
return {
'level': level,
'score': score,
'formal_particle_count': formal_count,
'informal_particle_count': informal_count,
'ratio': round(ratio, 2)
}
def estimate_grade_level(self, avg_sentence_length: float, formality_score: int) -> Dict:
"""Estimate Thai grade level"""
# Thai grade level estimation based on sentence complexity
if avg_sentence_length < 15:
grade_th = "ง่าย (ม.6-ม.9)"
grade_num = 6-9
elif avg_sentence_length < 25:
grade_th = "ปานกลาง (ม.10-ม.12)"
grade_num = 10-12
else:
grade_th = "ยาก (ม.13+)"
grade_num = 13
# Adjust for formality
if formality_score > 70:
grade_th += " (ทางการ)"
elif formality_score < 30:
grade_th += " (กันเอง)"
return {
'thai': grade_th,
'numeric_range': grade_num,
'us_equivalent': self._thai_to_us_grade(grade_num)
}
def _thai_to_us_grade(self, thai_grade_range) -> str:
"""Convert Thai grade to US equivalent"""
if isinstance(thai_grade_range, range):
avg = sum(thai_grade_range) / len(thai_grade_range)
elif isinstance(thai_grade_range, int):
avg = thai_grade_range
else:
avg = 10
# Very rough conversion
if avg <= 9:
return "6th-8th grade"
elif avg <= 12:
return "9th-12th grade"
else:
return "College+"
def analyze_paragraph_structure(self, text: str) -> Dict:
"""Analyze paragraph structure"""
paragraphs = [p for p in text.split('\n\n') if p.strip()]
if not paragraphs:
return {
'paragraph_count': 0,
'avg_length_words': 0,
'avg_length_sentences': 0
}
paragraph_lengths = [
self.count_words(p)
for p in paragraphs
]
paragraph_sentences = [
self.count_sentences(p)
for p in paragraphs
]
return {
'paragraph_count': len(paragraphs),
'avg_length_words': round(sum(paragraph_lengths) / len(paragraphs), 1),
'avg_length_sentences': round(sum(paragraph_sentences) / len(paragraphs), 1),
'shortest_paragraph': min(paragraph_lengths),
'longest_paragraph': max(paragraph_lengths)
}
def calculate_readability_score(self, avg_sentence_length: float, formality_score: int,
paragraph_score: float) -> float:
"""
Calculate overall readability score (0-100)
Factors:
- Sentence length (optimal: 15-25 words)
- Formality (optimal: 40-60 for general content)
- Paragraph structure (optimal: varied lengths)
"""
# Sentence length score (0-40)
if 15 <= avg_sentence_length <= 25:
sentence_score = 40
elif 10 <= avg_sentence_length < 15 or 25 < avg_sentence_length <= 30:
sentence_score = 30
elif avg_sentence_length < 10:
sentence_score = 20
else:
sentence_score = 15
# Formality score (0-30)
# Optimal: 40-60 (normal/formal mix)
if 40 <= formality_score <= 60:
formality_points = 30
elif 30 <= formality_score < 40 or 60 < formality_score <= 70:
formality_points = 25
else:
formality_points = 15
# Paragraph score (0-30)
paragraph_points = min(30, paragraph_score * 30)
total = sentence_score + formality_points + paragraph_points
return round(total, 1)
def get_recommendations(self, analysis: Dict) -> List[str]:
"""Generate recommendations"""
recs = []
avg_len = analysis['avg_sentence_length']
if avg_len < 15:
recs.append("ประโยคสั้นเกินไป พิจารณาเพิ่มรายละเอียดบ้าง")
elif avg_len > 25:
recs.append("ประโยคยาวเกินไป แบ่งออกเป็น 2-3 ประโยคจะอ่านง่ายขึ้น")
formality = analysis['formality']['level']
if "เป็นทางการ" in formality:
recs.append("ภาษาเป็นทางการเกินไปสำหรับเนื้อหาทั่วไป พิจารณาใช้ภาษาที่เป็นกันเองมากขึ้น")
elif "กันเอง" in formality:
recs.append("ภาษาเป็นกันเองมาก ตรวจสอบว่าเหมาะกับกลุ่มเป้าหมายหรือไม่")
para = analysis['paragraph_structure']
if para['avg_length_words'] > 200:
recs.append("บางย่อหน้ายาวเกินไป แบ่งย่อหน้าเพื่อให้อ่านง่ายขึ้น")
if para['paragraph_count'] < 5:
recs.append("เพิ่มจำนวนย่อหน้าเพื่อให้อ่านง่ายขึ้น")
return recs
def analyze(self, text: str) -> Dict:
"""Full readability analysis"""
avg_sentence_length = self.calculate_avg_sentence_length(text)
formality = self.detect_formality(text)
grade_level = self.estimate_grade_level(avg_sentence_length, formality['score'])
paragraph_structure = self.analyze_paragraph_structure(text)
# Calculate paragraph score (0-1)
para_score = 0.5 # Default
if paragraph_structure['paragraph_count'] > 0:
# Score based on variety
lengths = [paragraph_structure['avg_length_words']]
if paragraph_structure['shortest_paragraph'] != paragraph_structure['longest_paragraph']:
para_score = 0.8 # Good variety
else:
para_score = 0.6 # Same length
readability_score = self.calculate_readability_score(
avg_sentence_length,
formality['score'],
para_score
)
recommendations = self.get_recommendations({
'avg_sentence_length': avg_sentence_length,
'formality': formality,
'paragraph_structure': paragraph_structure
})
return {
'avg_sentence_length': round(avg_sentence_length, 1),
'sentence_count': self.count_sentences(text),
'word_count': self.count_words(text),
'grade_level': grade_level,
'formality': formality,
'paragraph_structure': paragraph_structure,
'readability_score': readability_score,
'recommendations': recommendations
}
def main():
"""Main entry point"""
parser = argparse.ArgumentParser(
description='Analyze Thai text readability'
)
parser.add_argument(
'--text', '-t',
required=True,
help='Text content to analyze'
)
parser.add_argument(
'--output', '-o',
choices=['json', 'text'],
default='text',
help='Output format (default: text)'
)
args = parser.parse_args()
# Analyze
analyzer = ThaiReadabilityAnalyzer()
result = analyzer.analyze(args.text)
# Output
if args.output == 'json':
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print("\n📖 Thai Readability Analysis\n")
print(f"Sentence Count: {result['sentence_count']}")
print(f"Word Count: {result['word_count']}")
print(f"Avg Sentence Length: {result['avg_sentence_length']} words")
print(f"\nGrade Level: {result['grade_level']['thai']}")
print(f"US Equivalent: {result['grade_level']['us_equivalent']}")
print(f"\nFormality: {result['formality']['level']} (score: {result['formality']['score']})")
print(f" - Formal particles: {result['formality']['formal_particle_count']}")
print(f" - Informal particles: {result['formality']['informal_particle_count']}")
print(f"\nParagraph Structure:")
print(f" - Count: {result['paragraph_structure']['paragraph_count']}")
print(f" - Avg length: {result['paragraph_structure']['avg_length_words']} words")
print(f"\nReadability Score: {result['readability_score']}/100")
if result['recommendations']:
print(f"\n💡 Recommendations:")
for rec in result['recommendations']:
print(f"{rec}")
print()
if __name__ == '__main__':
main()