203 lines
8.4 KiB
Python
203 lines
8.4 KiB
Python
"""Style analyzer module for analyzing content style using LLM."""
|
|
|
|
from typing import Dict, List, Optional
|
|
from loguru import logger
|
|
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
|
import json
|
|
import re
|
|
|
|
class StyleAnalyzer:
|
|
"""Analyzer for content style using LLM."""
|
|
|
|
def __init__(self):
|
|
"""Initialize the style analyzer."""
|
|
logger.info("[StyleAnalyzer.__init__] Initializing style analyzer")
|
|
|
|
def _clean_json_response(self, text: str) -> str:
|
|
"""
|
|
Clean the LLM response to extract valid JSON.
|
|
|
|
Args:
|
|
text (str): Raw response from LLM
|
|
|
|
Returns:
|
|
str: Cleaned JSON string
|
|
"""
|
|
try:
|
|
# Remove markdown code block markers
|
|
cleaned_string = text.replace("```json", "").replace("```", "").strip()
|
|
|
|
# Log the cleaned JSON for debugging
|
|
logger.debug(f"[StyleAnalyzer._clean_json_response] Cleaned JSON: {cleaned_string}")
|
|
|
|
return cleaned_string
|
|
|
|
except Exception as e:
|
|
logger.error(f"[StyleAnalyzer._clean_json_response] Error cleaning response: {str(e)}")
|
|
return ""
|
|
|
|
def analyze_content_style(self, content: Dict) -> Dict:
|
|
"""
|
|
Analyze the style of the provided content.
|
|
|
|
Args:
|
|
content (Dict): Content to analyze, containing main_content, title, etc.
|
|
|
|
Returns:
|
|
Dict: Analysis results
|
|
"""
|
|
try:
|
|
logger.info("[StyleAnalyzer.analyze_content_style] Starting content style analysis")
|
|
|
|
# Prepare content for analysis
|
|
main_content = content.get("main_content", "")
|
|
title = content.get("title", "")
|
|
description = content.get("description", "")
|
|
|
|
# Construct the analysis prompt
|
|
prompt = f"""Analyze the following content and provide a comprehensive writing style analysis.
|
|
Focus on identifying the writing style, tone, and characteristics that make this content unique.
|
|
|
|
Title: {title}
|
|
Description: {description}
|
|
Content: {main_content[:4000]} # Limit content length for API
|
|
|
|
IMPORTANT: Respond ONLY with a JSON object in the following format. Do not include any additional text, explanations, or markdown formatting:
|
|
{{
|
|
"writing_style": {{
|
|
"tone": "formal/casual/technical/etc",
|
|
"voice": "active/passive",
|
|
"complexity": "simple/moderate/complex",
|
|
"engagement_level": "low/medium/high"
|
|
}},
|
|
"content_characteristics": {{
|
|
"sentence_structure": "description",
|
|
"vocabulary_level": "basic/intermediate/advanced",
|
|
"paragraph_organization": "description",
|
|
"content_flow": "description"
|
|
}},
|
|
"target_audience": {{
|
|
"demographics": ["list"],
|
|
"expertise_level": "beginner/intermediate/advanced",
|
|
"industry_focus": "primary industry",
|
|
"geographic_focus": "primary region"
|
|
}},
|
|
"content_type": {{
|
|
"primary_type": "blog/article/product/etc",
|
|
"secondary_types": ["list"],
|
|
"purpose": "inform/entertain/persuade/etc",
|
|
"call_to_action": "type and frequency"
|
|
}},
|
|
"recommended_settings": {{
|
|
"writing_tone": "recommended tone",
|
|
"target_audience": "recommended audience",
|
|
"content_type": "recommended type",
|
|
"creativity_level": "low/medium/high",
|
|
"geographic_location": "recommended location"
|
|
}}
|
|
}}"""
|
|
|
|
# Get analysis from LLM
|
|
logger.debug("[StyleAnalyzer.analyze_content_style] Sending prompt to LLM")
|
|
analysis_text = llm_text_gen(prompt)
|
|
|
|
try:
|
|
# Clean and parse the JSON response
|
|
cleaned_json = self._clean_json_response(analysis_text)
|
|
if not cleaned_json:
|
|
raise ValueError("No valid JSON found in response")
|
|
|
|
# Log the cleaned JSON for debugging
|
|
logger.debug(f"[StyleAnalyzer.analyze_content_style] Cleaned JSON: {cleaned_json}")
|
|
|
|
# Try to parse the cleaned JSON
|
|
try:
|
|
analysis = json.loads(cleaned_json)
|
|
except json.JSONDecodeError as e:
|
|
# If parsing fails, try to fix common JSON issues
|
|
logger.warning(f"[StyleAnalyzer.analyze_content_style] Initial JSON parsing failed: {e}")
|
|
|
|
# Fix any remaining issues
|
|
cleaned_json = re.sub(r'([^"\\])\n', r'\1 ', cleaned_json)
|
|
cleaned_json = re.sub(r'\\n', ' ', cleaned_json)
|
|
|
|
# Try parsing again
|
|
analysis = json.loads(cleaned_json)
|
|
|
|
logger.info("[StyleAnalyzer.analyze_content_style] Successfully parsed analysis results")
|
|
return analysis
|
|
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"[StyleAnalyzer.analyze_content_style] Failed to parse JSON response: {e}")
|
|
logger.debug(f"[StyleAnalyzer.analyze_content_style] Raw response: {analysis_text}")
|
|
return {
|
|
"error": "Failed to parse analysis results",
|
|
"raw_response": analysis_text
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"[StyleAnalyzer.analyze_content_style] Error during analysis: {str(e)}")
|
|
return {
|
|
"error": str(e),
|
|
"success": False
|
|
}
|
|
|
|
def analyze_style_patterns(self, content: Dict) -> Dict:
|
|
"""
|
|
Analyze specific writing style patterns in the content.
|
|
|
|
Args:
|
|
content (Dict): Content to analyze
|
|
|
|
Returns:
|
|
Dict: Pattern analysis results
|
|
"""
|
|
try:
|
|
main_content = content.get("main_content", "")
|
|
|
|
prompt = f"""Analyze the following content for specific writing style patterns.
|
|
Focus on identifying recurring patterns in sentence structure, word choice, and rhetorical devices.
|
|
|
|
Content: {main_content[:4000]}
|
|
|
|
IMPORTANT: Respond ONLY with a JSON object in the following format. Do not include any additional text, explanations, or markdown formatting:
|
|
{{
|
|
"sentence_patterns": {{
|
|
"structure": ["list of patterns"],
|
|
"length": "short/medium/long",
|
|
"complexity": "simple/moderate/complex"
|
|
}},
|
|
"word_patterns": {{
|
|
"vocabulary": ["list of patterns"],
|
|
"frequency": "low/medium/high",
|
|
"diversity": "low/medium/high"
|
|
}},
|
|
"rhetorical_devices": {{
|
|
"types": ["list of devices"],
|
|
"frequency": "low/medium/high",
|
|
"effectiveness": "low/medium/high"
|
|
}}
|
|
}}"""
|
|
|
|
analysis_text = llm_text_gen(prompt)
|
|
|
|
try:
|
|
cleaned_json = self._clean_json_response(analysis_text)
|
|
if not cleaned_json:
|
|
raise ValueError("No valid JSON found in response")
|
|
|
|
analysis = json.loads(cleaned_json)
|
|
return analysis
|
|
except json.JSONDecodeError as e:
|
|
logger.error(f"[StyleAnalyzer.analyze_style_patterns] Failed to parse JSON response: {e}")
|
|
return {
|
|
"error": "Failed to parse pattern analysis results",
|
|
"raw_response": analysis_text
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"[StyleAnalyzer.analyze_style_patterns] Error during analysis: {str(e)}")
|
|
return {
|
|
"error": str(e),
|
|
"success": False
|
|
} |