Files
ALwrity/backend/services/blog_writer/research/keyword_analyzer.py
ajaysi d90d441019 chore: push all remaining changes
- Blog writer enhancements and bug fixes
- Wix integration improvements
- Frontend UI updates
- GSC dashboard docs cleanup
- Image studio assets
- LinkedIn requirements file
- Various dependency updates
2026-06-12 20:32:03 +05:30

101 lines
4.2 KiB
Python

"""
Keyword Analyzer - AI-powered keyword analysis for research content.
Extracts and analyzes keywords from research content using structured AI responses.
"""
from typing import Dict, Any, List
from loguru import logger
import json
class KeywordAnalyzer:
"""Analyzes keywords from research content using AI-powered extraction."""
def analyze(self, content: str, original_keywords: List[str], user_id: str = None) -> Dict[str, Any]:
"""Parse comprehensive keyword analysis from the research content using AI."""
# Use AI to extract and analyze keywords from the rich research content
keyword_prompt = f"""
Analyze the following research content and extract comprehensive keyword insights for: {', '.join(original_keywords)}
Research Content:
{content[:8000]}
Extract and analyze:
1. Primary keywords (main topic terms)
2. Secondary keywords (related terms, synonyms)
3. Long-tail opportunities (specific phrases people search for)
4. Search intent (informational, commercial, navigational, transactional)
5. Keyword difficulty assessment (1-10 scale)
6. Content gaps (what competitors are missing)
7. Semantic keywords (related concepts)
8. Trending terms (emerging keywords)
Respond with JSON:
{{
"primary": ["keyword1", "keyword2"],
"secondary": ["related1", "related2"],
"long_tail": ["specific phrase 1", "specific phrase 2"],
"search_intent": "informational|commercial|navigational|transactional",
"difficulty": 7,
"content_gaps": ["gap1", "gap2"],
"semantic_keywords": ["concept1", "concept2"],
"trending_terms": ["trend1", "trend2"],
"analysis_insights": "Brief analysis of keyword landscape"
}}
"""
from services.llm_providers.main_text_generation import llm_text_gen
keyword_schema = {
"type": "object",
"properties": {
"primary": {"type": "array", "items": {"type": "string"}},
"secondary": {"type": "array", "items": {"type": "string"}},
"long_tail": {"type": "array", "items": {"type": "string"}},
"search_intent": {"type": "string"},
"difficulty": {"type": "integer"},
"content_gaps": {"type": "array", "items": {"type": "string"}},
"semantic_keywords": {"type": "array", "items": {"type": "string"}},
"trending_terms": {"type": "array", "items": {"type": "string"}},
"analysis_insights": {"type": "string"}
},
"required": ["primary", "secondary", "long_tail", "search_intent", "difficulty", "content_gaps", "semantic_keywords", "trending_terms", "analysis_insights"]
}
raw = llm_text_gen(
prompt=keyword_prompt,
user_id=user_id
)
# Parse JSON from LLM response (works with both string and dict return types)
import re
if isinstance(raw, str):
cleaned = raw.strip()
if cleaned.startswith('```json'):
cleaned = cleaned[7:]
if cleaned.startswith('```'):
cleaned = cleaned[3:]
if cleaned.endswith('```'):
cleaned = cleaned[:-3]
cleaned = cleaned.strip()
try:
keyword_analysis = json.loads(cleaned)
except json.JSONDecodeError:
json_match = re.search(r'\{.*\}', cleaned, re.DOTALL)
if json_match:
keyword_analysis = json.loads(json_match.group(0))
else:
raise ValueError(f"Keyword analysis returned non-JSON string: {cleaned[:200]}")
elif isinstance(raw, dict):
keyword_analysis = raw
else:
raise ValueError(f"Unexpected LLM response type: {type(raw)}")
if 'error' in keyword_analysis:
raise ValueError(f"Keyword analysis failed: {keyword_analysis.get('error', 'Unknown error')}")
logger.info("✅ AI keyword analysis completed successfully")
return keyword_analysis