Made changes to Getting started with ALwrity and added lot of details on API keys
This commit is contained in:
203
lib/personalization/style_analyzer.py
Normal file
203
lib/personalization/style_analyzer.py
Normal file
@@ -0,0 +1,203 @@
|
||||
"""Style analyzer module for analyzing content style using LLM."""
|
||||
|
||||
from typing import Dict, List, Optional
|
||||
from loguru import logger
|
||||
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
import json
|
||||
import re
|
||||
|
||||
class StyleAnalyzer:
|
||||
"""Analyzer for content style using LLM."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the style analyzer."""
|
||||
logger.info("[StyleAnalyzer.__init__] Initializing style analyzer")
|
||||
|
||||
def _clean_json_response(self, text: str) -> str:
|
||||
"""
|
||||
Clean the LLM response to extract valid JSON.
|
||||
|
||||
Args:
|
||||
text (str): Raw response from LLM
|
||||
|
||||
Returns:
|
||||
str: Cleaned JSON string
|
||||
"""
|
||||
try:
|
||||
# Remove markdown code block markers
|
||||
cleaned_string = text.replace("```json", "").replace("```", "").strip()
|
||||
|
||||
# Log the cleaned JSON for debugging
|
||||
logger.debug(f"[StyleAnalyzer._clean_json_response] Cleaned JSON: {cleaned_string}")
|
||||
|
||||
return cleaned_string
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[StyleAnalyzer._clean_json_response] Error cleaning response: {str(e)}")
|
||||
return ""
|
||||
|
||||
def analyze_content_style(self, content: Dict) -> Dict:
|
||||
"""
|
||||
Analyze the style of the provided content.
|
||||
|
||||
Args:
|
||||
content (Dict): Content to analyze, containing main_content, title, etc.
|
||||
|
||||
Returns:
|
||||
Dict: Analysis results
|
||||
"""
|
||||
try:
|
||||
logger.info("[StyleAnalyzer.analyze_content_style] Starting content style analysis")
|
||||
|
||||
# Prepare content for analysis
|
||||
main_content = content.get("main_content", "")
|
||||
title = content.get("title", "")
|
||||
description = content.get("description", "")
|
||||
|
||||
# Construct the analysis prompt
|
||||
prompt = f"""Analyze the following content and provide a comprehensive writing style analysis.
|
||||
Focus on identifying the writing style, tone, and characteristics that make this content unique.
|
||||
|
||||
Title: {title}
|
||||
Description: {description}
|
||||
Content: {main_content[:4000]} # Limit content length for API
|
||||
|
||||
IMPORTANT: Respond ONLY with a JSON object in the following format. Do not include any additional text, explanations, or markdown formatting:
|
||||
{{
|
||||
"writing_style": {{
|
||||
"tone": "formal/casual/technical/etc",
|
||||
"voice": "active/passive",
|
||||
"complexity": "simple/moderate/complex",
|
||||
"engagement_level": "low/medium/high"
|
||||
}},
|
||||
"content_characteristics": {{
|
||||
"sentence_structure": "description",
|
||||
"vocabulary_level": "basic/intermediate/advanced",
|
||||
"paragraph_organization": "description",
|
||||
"content_flow": "description"
|
||||
}},
|
||||
"target_audience": {{
|
||||
"demographics": ["list"],
|
||||
"expertise_level": "beginner/intermediate/advanced",
|
||||
"industry_focus": "primary industry",
|
||||
"geographic_focus": "primary region"
|
||||
}},
|
||||
"content_type": {{
|
||||
"primary_type": "blog/article/product/etc",
|
||||
"secondary_types": ["list"],
|
||||
"purpose": "inform/entertain/persuade/etc",
|
||||
"call_to_action": "type and frequency"
|
||||
}},
|
||||
"recommended_settings": {{
|
||||
"writing_tone": "recommended tone",
|
||||
"target_audience": "recommended audience",
|
||||
"content_type": "recommended type",
|
||||
"creativity_level": "low/medium/high",
|
||||
"geographic_location": "recommended location"
|
||||
}}
|
||||
}}"""
|
||||
|
||||
# Get analysis from LLM
|
||||
logger.debug("[StyleAnalyzer.analyze_content_style] Sending prompt to LLM")
|
||||
analysis_text = llm_text_gen(prompt)
|
||||
|
||||
try:
|
||||
# Clean and parse the JSON response
|
||||
cleaned_json = self._clean_json_response(analysis_text)
|
||||
if not cleaned_json:
|
||||
raise ValueError("No valid JSON found in response")
|
||||
|
||||
# Log the cleaned JSON for debugging
|
||||
logger.debug(f"[StyleAnalyzer.analyze_content_style] Cleaned JSON: {cleaned_json}")
|
||||
|
||||
# Try to parse the cleaned JSON
|
||||
try:
|
||||
analysis = json.loads(cleaned_json)
|
||||
except json.JSONDecodeError as e:
|
||||
# If parsing fails, try to fix common JSON issues
|
||||
logger.warning(f"[StyleAnalyzer.analyze_content_style] Initial JSON parsing failed: {e}")
|
||||
|
||||
# Fix any remaining issues
|
||||
cleaned_json = re.sub(r'([^"\\])\n', r'\1 ', cleaned_json)
|
||||
cleaned_json = re.sub(r'\\n', ' ', cleaned_json)
|
||||
|
||||
# Try parsing again
|
||||
analysis = json.loads(cleaned_json)
|
||||
|
||||
logger.info("[StyleAnalyzer.analyze_content_style] Successfully parsed analysis results")
|
||||
return analysis
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"[StyleAnalyzer.analyze_content_style] Failed to parse JSON response: {e}")
|
||||
logger.debug(f"[StyleAnalyzer.analyze_content_style] Raw response: {analysis_text}")
|
||||
return {
|
||||
"error": "Failed to parse analysis results",
|
||||
"raw_response": analysis_text
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[StyleAnalyzer.analyze_content_style] Error during analysis: {str(e)}")
|
||||
return {
|
||||
"error": str(e),
|
||||
"success": False
|
||||
}
|
||||
|
||||
def analyze_style_patterns(self, content: Dict) -> Dict:
|
||||
"""
|
||||
Analyze specific writing style patterns in the content.
|
||||
|
||||
Args:
|
||||
content (Dict): Content to analyze
|
||||
|
||||
Returns:
|
||||
Dict: Pattern analysis results
|
||||
"""
|
||||
try:
|
||||
main_content = content.get("main_content", "")
|
||||
|
||||
prompt = f"""Analyze the following content for specific writing style patterns.
|
||||
Focus on identifying recurring patterns in sentence structure, word choice, and rhetorical devices.
|
||||
|
||||
Content: {main_content[:4000]}
|
||||
|
||||
IMPORTANT: Respond ONLY with a JSON object in the following format. Do not include any additional text, explanations, or markdown formatting:
|
||||
{{
|
||||
"sentence_patterns": {{
|
||||
"structure": ["list of patterns"],
|
||||
"length": "short/medium/long",
|
||||
"complexity": "simple/moderate/complex"
|
||||
}},
|
||||
"word_patterns": {{
|
||||
"vocabulary": ["list of patterns"],
|
||||
"frequency": "low/medium/high",
|
||||
"diversity": "low/medium/high"
|
||||
}},
|
||||
"rhetorical_devices": {{
|
||||
"types": ["list of devices"],
|
||||
"frequency": "low/medium/high",
|
||||
"effectiveness": "low/medium/high"
|
||||
}}
|
||||
}}"""
|
||||
|
||||
analysis_text = llm_text_gen(prompt)
|
||||
|
||||
try:
|
||||
cleaned_json = self._clean_json_response(analysis_text)
|
||||
if not cleaned_json:
|
||||
raise ValueError("No valid JSON found in response")
|
||||
|
||||
analysis = json.loads(cleaned_json)
|
||||
return analysis
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"[StyleAnalyzer.analyze_style_patterns] Failed to parse JSON response: {e}")
|
||||
return {
|
||||
"error": "Failed to parse pattern analysis results",
|
||||
"raw_response": analysis_text
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[StyleAnalyzer.analyze_style_patterns] Error during analysis: {str(e)}")
|
||||
return {
|
||||
"error": str(e),
|
||||
"success": False
|
||||
}
|
||||
Reference in New Issue
Block a user