Files
opencode-skill/skills/website-creator/seo-analyzers/scripts/thai_readability.py
Kunthawat Greethong b26c8199a5 Update skills: add website-creator, mql-developer, ecommerce-astro
Changes:
- Add FAL_KEY and GEMINI_API_KEY to .env.example
- Update picture-it to use ~/.config/opencode/.env (unified creds)
- Remove shodh-memory skill (no longer used)
- Remove alphaear-* skills (deprecated)
- Remove thai-frontend-dev skill (replaced by website-creator)
- Remove theme-factory skill
- Add mql-developer skill (MQL5 trading)
- Add ecommerce-astro skill (Astro e-commerce)
- Add website-creator skill (Next.js + Payload CMS)
- Update install script for new skills
2026-04-16 17:40:27 +07:00

335 lines
12 KiB
Python

#!/usr/bin/env python3
"""
Thai Readability Analyzer
Analyze Thai text readability with PyThaiNLP integration.
Detects formality level, grade level, and sentence structure.
"""
import argparse
import json
import re
from typing import Dict, List
try:
from pythainlp import word_tokenize, sent_tokenize
THAI_SUPPORT = True
except ImportError:
THAI_SUPPORT = False
print("Warning: PyThaiNLP not installed. Install with: pip install pythainlp")
class ThaiReadabilityAnalyzer:
"""Analyze Thai text readability"""
def __init__(self):
self.formal_particles = [
'ครับ', 'ค่ะ', 'ข้าพเจ้า', 'กระผม', 'ดิฉัน', 'ท่าน', 'ซึ่ง', 'อัน',
'ย่อม', 'ย่อมเป็น', 'ประการ', 'ดังกล่าว', 'ดังกล่าวแล้ว', 'ดังนี้'
]
self.informal_particles = [
'นะ', 'จ้ะ', 'อ่ะ', 'มั้ย', 'เปล่าว่ะ', 'gue', 'mang', 'เว้ย',
'วะ', 'เหอะ', 'ซิ', 'นู่น', 'นี่', 'นั่น', 'โครต', 'มาก'
]
def count_sentences(self, text: str) -> int:
"""Count Thai sentences"""
if not THAI_SUPPORT:
# Fallback: count Thai sentence endings
thai_endings = ['.', '!', '?', '', '']
count = sum(text.count(e) for e in thai_endings)
return max(count, 1)
sentences = sent_tokenize(text, engine="whitespace")
return len([s for s in sentences if s.strip()])
def count_words(self, text: str) -> int:
"""Count Thai words"""
if not THAI_SUPPORT:
return len(text.split())
tokens = word_tokenize(text, engine="newmm")
return len([t for t in tokens if t.strip()])
def calculate_avg_sentence_length(self, text: str) -> float:
"""Calculate average sentence length"""
if not THAI_SUPPORT:
sentences = re.split(r'[.!?]', text)
sentences = [s for s in sentences if s.strip()]
if not sentences:
return 0
words = text.split()
return len(words) / len(sentences)
sentences = sent_tokenize(text, engine="whitespace")
sentences = [s for s in sentences if s.strip()]
if not sentences:
return 0
total_words = sum(
len(word_tokenize(s, engine="newmm"))
for s in sentences
)
return total_words / len(sentences)
def detect_formality(self, text: str) -> Dict:
"""Detect Thai formality level"""
formal_count = sum(text.count(p) for p in self.formal_particles)
informal_count = sum(text.count(p) for p in self.informal_particles)
total = formal_count + informal_count
if total == 0:
ratio = 0.5 # Neutral
else:
ratio = formal_count / total
if ratio > 0.6:
level = "เป็นทางการ (Formal)"
score = 80
elif ratio < 0.4:
level = "กันเอง (Casual)"
score = 20
else:
level = "ปกติ (Normal)"
score = 50
return {
'level': level,
'score': score,
'formal_particle_count': formal_count,
'informal_particle_count': informal_count,
'ratio': round(ratio, 2)
}
def estimate_grade_level(self, avg_sentence_length: float, formality_score: int) -> Dict:
"""Estimate Thai grade level"""
# Thai grade level estimation based on sentence complexity
if avg_sentence_length < 15:
grade_th = "ง่าย (ม.6-ม.9)"
grade_num = 6-9
elif avg_sentence_length < 25:
grade_th = "ปานกลาง (ม.10-ม.12)"
grade_num = 10-12
else:
grade_th = "ยาก (ม.13+)"
grade_num = 13
# Adjust for formality
if formality_score > 70:
grade_th += " (ทางการ)"
elif formality_score < 30:
grade_th += " (กันเอง)"
return {
'thai': grade_th,
'numeric_range': grade_num,
'us_equivalent': self._thai_to_us_grade(grade_num)
}
def _thai_to_us_grade(self, thai_grade_range) -> str:
"""Convert Thai grade to US equivalent"""
if isinstance(thai_grade_range, range):
avg = sum(thai_grade_range) / len(thai_grade_range)
elif isinstance(thai_grade_range, int):
avg = thai_grade_range
else:
avg = 10
# Very rough conversion
if avg <= 9:
return "6th-8th grade"
elif avg <= 12:
return "9th-12th grade"
else:
return "College+"
def analyze_paragraph_structure(self, text: str) -> Dict:
"""Analyze paragraph structure"""
paragraphs = [p for p in text.split('\n\n') if p.strip()]
if not paragraphs:
return {
'paragraph_count': 0,
'avg_length_words': 0,
'avg_length_sentences': 0
}
paragraph_lengths = [
self.count_words(p)
for p in paragraphs
]
paragraph_sentences = [
self.count_sentences(p)
for p in paragraphs
]
return {
'paragraph_count': len(paragraphs),
'avg_length_words': round(sum(paragraph_lengths) / len(paragraphs), 1),
'avg_length_sentences': round(sum(paragraph_sentences) / len(paragraphs), 1),
'shortest_paragraph': min(paragraph_lengths),
'longest_paragraph': max(paragraph_lengths)
}
def calculate_readability_score(self, avg_sentence_length: float, formality_score: int,
paragraph_score: float) -> float:
"""
Calculate overall readability score (0-100)
Factors:
- Sentence length (optimal: 15-25 words)
- Formality (optimal: 40-60 for general content)
- Paragraph structure (optimal: varied lengths)
"""
# Sentence length score (0-40)
if 15 <= avg_sentence_length <= 25:
sentence_score = 40
elif 10 <= avg_sentence_length < 15 or 25 < avg_sentence_length <= 30:
sentence_score = 30
elif avg_sentence_length < 10:
sentence_score = 20
else:
sentence_score = 15
# Formality score (0-30)
# Optimal: 40-60 (normal/formal mix)
if 40 <= formality_score <= 60:
formality_points = 30
elif 30 <= formality_score < 40 or 60 < formality_score <= 70:
formality_points = 25
else:
formality_points = 15
# Paragraph score (0-30)
paragraph_points = min(30, paragraph_score * 30)
total = sentence_score + formality_points + paragraph_points
return round(total, 1)
def get_recommendations(self, analysis: Dict) -> List[str]:
"""Generate recommendations"""
recs = []
avg_len = analysis['avg_sentence_length']
if avg_len < 15:
recs.append("ประโยคสั้นเกินไป พิจารณาเพิ่มรายละเอียดบ้าง")
elif avg_len > 25:
recs.append("ประโยคยาวเกินไป แบ่งออกเป็น 2-3 ประโยคจะอ่านง่ายขึ้น")
formality = analysis['formality']['level']
if "เป็นทางการ" in formality:
recs.append("ภาษาเป็นทางการเกินไปสำหรับเนื้อหาทั่วไป พิจารณาใช้ภาษาที่เป็นกันเองมากขึ้น")
elif "กันเอง" in formality:
recs.append("ภาษาเป็นกันเองมาก ตรวจสอบว่าเหมาะกับกลุ่มเป้าหมายหรือไม่")
para = analysis['paragraph_structure']
if para['avg_length_words'] > 200:
recs.append("บางย่อหน้ายาวเกินไป แบ่งย่อหน้าเพื่อให้อ่านง่ายขึ้น")
if para['paragraph_count'] < 5:
recs.append("เพิ่มจำนวนย่อหน้าเพื่อให้อ่านง่ายขึ้น")
return recs
def analyze(self, text: str) -> Dict:
"""Full readability analysis"""
avg_sentence_length = self.calculate_avg_sentence_length(text)
formality = self.detect_formality(text)
grade_level = self.estimate_grade_level(avg_sentence_length, formality['score'])
paragraph_structure = self.analyze_paragraph_structure(text)
# Calculate paragraph score (0-1)
para_score = 0.5 # Default
if paragraph_structure['paragraph_count'] > 0:
# Score based on variety
lengths = [paragraph_structure['avg_length_words']]
if paragraph_structure['shortest_paragraph'] != paragraph_structure['longest_paragraph']:
para_score = 0.8 # Good variety
else:
para_score = 0.6 # Same length
readability_score = self.calculate_readability_score(
avg_sentence_length,
formality['score'],
para_score
)
recommendations = self.get_recommendations({
'avg_sentence_length': avg_sentence_length,
'formality': formality,
'paragraph_structure': paragraph_structure
})
return {
'avg_sentence_length': round(avg_sentence_length, 1),
'sentence_count': self.count_sentences(text),
'word_count': self.count_words(text),
'grade_level': grade_level,
'formality': formality,
'paragraph_structure': paragraph_structure,
'readability_score': readability_score,
'recommendations': recommendations
}
def main():
"""Main entry point"""
parser = argparse.ArgumentParser(
description='Analyze Thai text readability'
)
parser.add_argument(
'--text', '-t',
required=True,
help='Text content to analyze'
)
parser.add_argument(
'--output', '-o',
choices=['json', 'text'],
default='text',
help='Output format (default: text)'
)
args = parser.parse_args()
# Analyze
analyzer = ThaiReadabilityAnalyzer()
result = analyzer.analyze(args.text)
# Output
if args.output == 'json':
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print("\n📖 Thai Readability Analysis\n")
print(f"Sentence Count: {result['sentence_count']}")
print(f"Word Count: {result['word_count']}")
print(f"Avg Sentence Length: {result['avg_sentence_length']} words")
print(f"\nGrade Level: {result['grade_level']['thai']}")
print(f"US Equivalent: {result['grade_level']['us_equivalent']}")
print(f"\nFormality: {result['formality']['level']} (score: {result['formality']['score']})")
print(f" - Formal particles: {result['formality']['formal_particle_count']}")
print(f" - Informal particles: {result['formality']['informal_particle_count']}")
print(f"\nParagraph Structure:")
print(f" - Count: {result['paragraph_structure']['paragraph_count']}")
print(f" - Avg length: {result['paragraph_structure']['avg_length_words']} words")
print(f"\nReadability Score: {result['readability_score']}/100")
if result['recommendations']:
print(f"\n💡 Recommendations:")
for rec in result['recommendations']:
print(f"{rec}")
print()
if __name__ == '__main__':
main()