Auto-sync from website-creator

This commit is contained in:
Kunthawat Greethong
2026-03-08 23:03:19 +07:00
commit 9be686f587
117 changed files with 24737 additions and 0 deletions

View File

@@ -0,0 +1,270 @@
#!/usr/bin/env python3
"""
Thai Keyword Analyzer
Analyze keyword density in Thai text with PyThaiNLP integration.
Handles Thai language specifics (no spaces between words).
"""
import argparse
import json
import sys
from typing import Dict, List, Optional
try:
from pythainlp import word_tokenize
from pythainlp.util import normalize
THAI_SUPPORT = True
except ImportError:
THAI_SUPPORT = False
print("Warning: PyThaiNLP not installed. Install with: pip install pythainlp")
class ThaiKeywordAnalyzer:
"""Analyze keyword density in Thai text"""
def __init__(self):
self.thai_stopwords = set([
'และ', 'หรือ', 'แต่', 'ว่า', 'ถ้า', 'หาก', 'ซึ่ง', 'ที่', 'ใน', 'บน',
'ใต้', 'เหนือ', 'จาก', 'ถึง', 'ที่', 'การ', 'ความ', 'อย่าง', 'เมื่อ',
'สำหรับ', 'กับ', 'ของ', 'เป็น', 'อยู่', 'คือ', 'ได้', 'ให้', 'ไป', 'มา'
])
def count_words(self, text: str) -> int:
"""Count Thai words accurately"""
if not THAI_SUPPORT:
return len(text.split())
tokens = word_tokenize(text, engine="newmm")
return len([t for t in tokens if t.strip() and not t.isspace()])
def calculate_density(self, text: str, keyword: str) -> float:
"""Calculate keyword density"""
if not THAI_SUPPORT:
text_words = text.lower().split()
keyword_count = text.lower().count(keyword.lower())
return (keyword_count / len(text_words) * 100) if text_words else 0
text_norm = normalize(text)
keyword_norm = normalize(keyword)
count = text_norm.count(keyword_norm)
word_count = self.count_words(text)
return (count / word_count * 100) if word_count > 0 else 0
def find_positions(self, text: str, keyword: str) -> List[int]:
"""Find all keyword positions"""
positions = []
text_lower = text.lower()
keyword_lower = keyword.lower()
start = 0
while True:
pos = text_lower.find(keyword_lower, start)
if pos == -1:
break
positions.append(pos)
start = pos + 1
return positions
def check_critical_placements(self, text: str, keyword: str) -> Dict:
"""Check keyword in critical locations"""
text_lower = text.lower()
keyword_lower = keyword.lower()
# First 200 chars (approximately first 100 Thai words)
in_first_100_words = keyword_lower in text_lower[:200]
# Check H1 (first line if it starts with #)
lines = text.split('\n')
in_h1 = False
if lines and lines[0].startswith('#'):
in_h1 = keyword_lower in lines[0].lower()
# Last 500 chars (approximately conclusion)
in_conclusion = keyword_lower in text_lower[-500:] if len(text) > 500 else False
# Count H2 occurrences
h2_count = sum(1 for line in lines if line.startswith('##') and keyword_lower in line.lower())
return {
'in_first_100_words': in_first_100_words,
'in_h1': in_h1,
'in_conclusion': in_conclusion,
'in_h2_count': h2_count
}
def detect_stuffing(self, text: str, keyword: str, density: float) -> Dict:
"""Detect keyword stuffing risk"""
risk_level = "none"
warnings = []
if density > 3.0:
risk_level = "high"
warnings.append(f"Keyword density {density:.1f}% is very high (over 3%)")
elif density > 2.5:
risk_level = "medium"
warnings.append(f"Keyword density {density:.1f}% is high (over 2.5%)")
# Check for clustering in paragraphs
paragraphs = text.split('\n\n')
for i, para in enumerate(paragraphs[:10]): # Check first 10 paragraphs
para_density = self.calculate_density(para, keyword)
if para_density > 5.0:
risk_level = "high" if risk_level != "high" else risk_level
warnings.append(f"Paragraph {i+1} has very high density ({para_density:.1f}%)")
return {
'risk_level': risk_level,
'warnings': warnings,
'safe': risk_level in ["none", "low"]
}
def get_density_status(self, density: float, language: str = 'th') -> str:
"""Determine if density is appropriate"""
if language == 'th':
# Thai target: 1.0-1.5%
if density < 0.5:
return "too_low"
elif density < 1.0:
return "slightly_low"
elif density <= 1.5:
return "optimal"
elif density <= 2.0:
return "slightly_high"
else:
return "too_high"
else:
# English target: 1.5-2.0%
if density < 1.0:
return "too_low"
elif density < 1.5:
return "slightly_low"
elif density <= 2.0:
return "optimal"
elif density <= 2.5:
return "slightly_high"
else:
return "too_high"
def get_recommendations(self, density: float, placements: Dict, language: str = 'th') -> List[str]:
"""Generate recommendations"""
recs = []
if language == 'th':
if density < 1.0:
recs.append("เพิ่มการใช้คำหลักในเนื้อหา (target: 1.0-1.5%)")
elif density > 2.0:
recs.append("ลดการใช้คำหลักลง อาจถูกมองว่า keyword stuffing")
if not placements['in_first_100_words']:
recs.append("เพิ่มคำหลักในย่อหน้าแรก (100 คำแรก)")
if not placements['in_h1']:
recs.append("เพิ่มคำหลักในหัวข้อหลัก (H1)")
if not placements['in_conclusion']:
recs.append("เพิ่มคำหลักในบทสรุป")
if placements['in_h2_count'] < 2:
recs.append("เพิ่มคำหลักในหัวข้อรอง (H2) อย่างน้อย 2-3 แห่ง")
else:
if density < 1.5:
recs.append("Increase keyword usage (target: 1.5-2.0%)")
elif density > 2.5:
recs.append("Reduce keyword usage to avoid stuffing penalty")
if not placements['in_first_100_words']:
recs.append("Add keyword in first 100 words")
if not placements['in_h1']:
recs.append("Add keyword in H1 headline")
if not placements['in_conclusion']:
recs.append("Add keyword in conclusion")
return recs
def analyze(self, text: str, keyword: str, language: str = 'th') -> Dict:
"""Full keyword analysis"""
word_count = self.count_words(text)
density = self.calculate_density(text, keyword)
positions = self.find_positions(text, keyword)
placements = self.check_critical_placements(text, keyword)
stuffing = self.detect_stuffing(text, keyword, density)
status = self.get_density_status(density, language)
recommendations = self.get_recommendations(density, placements, language)
return {
'word_count': word_count,
'keyword': keyword,
'occurrences': len(positions),
'density': round(density, 2),
'target_density': '1.0-1.5%' if language == 'th' else '1.5-2.0%',
'status': status,
'critical_placements': placements,
'keyword_stuffing_risk': stuffing['risk_level'],
'recommendations': recommendations
}
def main():
"""Main entry point"""
parser = argparse.ArgumentParser(
description='Analyze keyword density in Thai or English text'
)
parser.add_argument(
'--text', '-t',
required=True,
help='Text content to analyze'
)
parser.add_argument(
'--keyword', '-k',
required=True,
help='Target keyword'
)
parser.add_argument(
'--language', '-l',
choices=['th', 'en'],
default='th',
help='Content language (default: th)'
)
parser.add_argument(
'--output', '-o',
choices=['json', 'text'],
default='text',
help='Output format (default: text)'
)
args = parser.parse_args()
# Analyze
analyzer = ThaiKeywordAnalyzer()
result = analyzer.analyze(args.text, args.keyword, args.language)
# Output
if args.output == 'json':
print(json.dumps(result, indent=2, ensure_ascii=False))
else:
print("\n📊 Keyword Analysis Results\n")
print(f"Keyword: {result['keyword']}")
print(f"Word Count: {result['word_count']}")
print(f"Occurrences: {result['occurrences']}")
print(f"Density: {result['density']}% (target: {result['target_density']})")
print(f"Status: {result['status']}")
print(f"\nCritical Placements:")
print(f" ✓ First 100 words: {'Yes' if result['critical_placements']['in_first_100_words'] else 'No'}")
print(f" ✓ H1 Headline: {'Yes' if result['critical_placements']['in_h1'] else 'No'}")
print(f" ✓ Conclusion: {'Yes' if result['critical_placements']['in_conclusion'] else 'No'}")
print(f" ✓ H2 Headings: {result['critical_placements']['in_h2_count']} found")
print(f"\nKeyword Stuffing Risk: {result['keyword_stuffing_risk']}")
if result['recommendations']:
print(f"\n💡 Recommendations:")
for rec in result['recommendations']:
print(f"{rec}")
print()
if __name__ == '__main__':
main()