Files
ALwrity/backend/services/blog_writer/research/keyword_analyzer.py
ajaysi 928c2f20aa fix: WYSIWYG editor, content generation, and writing assistant bug fixes
- Fix text selection menu not showing: wire contentRef via inputRef on multiline TextField
- Fix blog title not truncating: add min-w-0 for flex item overflow
- Fix outline generation 500: escape curly braces in f-string prompt template
- Fix content generation 'NoneType not callable': replace SessionLocal() with get_session_for_user(), add db param to MediumBlogGenerator, fix signature mismatch in database_task_manager
- Fix writing assistant suggest 500: add auth + user_id to API endpoint and service, replace sync requests with httpx.AsyncClient
- Fix hallucination detector 404: explicitly include router in main.py and app.py
- Fix missing error_data in task failure responses
- Hide CopilotKit web inspector button
- Remove hardcoded fallback suggestions from SmartTypingAssist
- Fix stale closure refs in SmartTypingAssist handleTypingChange
- Add two-column editor layout, stats bar, section hover menu
- Various subscription, billing, and research module improvements
2026-05-14 09:11:51 +05:30

101 lines
4.3 KiB
Python

"""
Keyword Analyzer - AI-powered keyword analysis for research content.
Extracts and analyzes keywords from research content using structured AI responses.
"""
from typing import Dict, Any, List
from loguru import logger
import json
class KeywordAnalyzer:
"""Analyzes keywords from research content using AI-powered extraction."""
def analyze(self, content: str, original_keywords: List[str], user_id: str = None) -> Dict[str, Any]:
"""Parse comprehensive keyword analysis from the research content using AI."""
# Use AI to extract and analyze keywords from the rich research content
keyword_prompt = f"""
Analyze the following research content and extract comprehensive keyword insights for: {', '.join(original_keywords)}
Research Content:
{content[:3000]} # Limit to avoid token limits
Extract and analyze:
1. Primary keywords (main topic terms)
2. Secondary keywords (related terms, synonyms)
3. Long-tail opportunities (specific phrases people search for)
4. Search intent (informational, commercial, navigational, transactional)
5. Keyword difficulty assessment (1-10 scale)
6. Content gaps (what competitors are missing)
7. Semantic keywords (related concepts)
8. Trending terms (emerging keywords)
Respond with JSON:
{{
"primary": ["keyword1", "keyword2"],
"secondary": ["related1", "related2"],
"long_tail": ["specific phrase 1", "specific phrase 2"],
"search_intent": "informational|commercial|navigational|transactional",
"difficulty": 7,
"content_gaps": ["gap1", "gap2"],
"semantic_keywords": ["concept1", "concept2"],
"trending_terms": ["trend1", "trend2"],
"analysis_insights": "Brief analysis of keyword landscape"
}}
"""
from services.llm_providers.main_text_generation import llm_text_gen
keyword_schema = {
"type": "object",
"properties": {
"primary": {"type": "array", "items": {"type": "string"}},
"secondary": {"type": "array", "items": {"type": "string"}},
"long_tail": {"type": "array", "items": {"type": "string"}},
"search_intent": {"type": "string"},
"difficulty": {"type": "integer"},
"content_gaps": {"type": "array", "items": {"type": "string"}},
"semantic_keywords": {"type": "array", "items": {"type": "string"}},
"trending_terms": {"type": "array", "items": {"type": "string"}},
"analysis_insights": {"type": "string"}
},
"required": ["primary", "secondary", "long_tail", "search_intent", "difficulty", "content_gaps", "semantic_keywords", "trending_terms", "analysis_insights"]
}
raw = llm_text_gen(
prompt=keyword_prompt,
user_id=user_id
)
# Parse JSON from LLM response (works with both string and dict return types)
import re
if isinstance(raw, str):
cleaned = raw.strip()
if cleaned.startswith('```json'):
cleaned = cleaned[7:]
if cleaned.startswith('```'):
cleaned = cleaned[3:]
if cleaned.endswith('```'):
cleaned = cleaned[:-3]
cleaned = cleaned.strip()
try:
keyword_analysis = json.loads(cleaned)
except json.JSONDecodeError:
json_match = re.search(r'\{.*\}', cleaned, re.DOTALL)
if json_match:
keyword_analysis = json.loads(json_match.group(0))
else:
raise ValueError(f"Keyword analysis returned non-JSON string: {cleaned[:200]}")
elif isinstance(raw, dict):
keyword_analysis = raw
else:
raise ValueError(f"Unexpected LLM response type: {type(raw)}")
if 'error' in keyword_analysis:
raise ValueError(f"Keyword analysis failed: {keyword_analysis.get('error', 'Unknown error')}")
logger.info("✅ AI keyword analysis completed successfully")
return keyword_analysis