ALwrity Version 0.5.0 (Fastapi + React )
This commit is contained in:
@@ -572,6 +572,7 @@ def render_ai_content_strategy():
|
||||
budget = st.selectbox(
|
||||
"Monthly Content Budget",
|
||||
[
|
||||
"No budget",
|
||||
"Under $1,000",
|
||||
"$1,000 - $5,000",
|
||||
"$5,000 - $10,000",
|
||||
|
||||
@@ -1,135 +0,0 @@
|
||||
###################################################
|
||||
#
|
||||
# The script covers many SEO factors, including keyword presence, title length,
|
||||
# meta description, images, img alt text, headings, internal links, external links,
|
||||
# spelling errors, grammar errors, and readability.
|
||||
#
|
||||
##################################################
|
||||
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from textstat import flesch_reading_ease
|
||||
import spellchecker
|
||||
|
||||
class SEOAnalyzer:
|
||||
def __init__(self, html_content, target_keywords):
|
||||
self.html_content = html_content
|
||||
self.target_keywords = target_keywords
|
||||
|
||||
def analyze_html_content(self):
|
||||
try:
|
||||
soup = BeautifulSoup(self.html_content, 'html.parser')
|
||||
|
||||
# Extract and clean text from HTML
|
||||
text = ' '.join(soup.stripped_strings)
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
|
||||
# Calculate keyword density
|
||||
keyword_density = {}
|
||||
for keyword in self.target_keywords:
|
||||
keyword_density[keyword] = (text.lower().count(keyword.lower()) / len(text.split())) * 100
|
||||
|
||||
# Check for the presence of keywords in the title
|
||||
title_tag = soup.find('title')
|
||||
title_text = title_tag.text.lower() if title_tag else ''
|
||||
keyword_presence_in_title = {keyword: keyword.lower() in title_text for keyword in self.target_keywords}
|
||||
|
||||
# Check for the presence of images and keywords in image alt text
|
||||
images = soup.find_all('img')
|
||||
img_alt_text = [img.get('alt', '').lower() for img in images]
|
||||
keyword_presence_in_img_alt_text = {keyword: any(keyword.lower() in alt_text for alt_text in img_alt_text) for keyword in self.target_keywords}
|
||||
|
||||
# Check for the presence of headings
|
||||
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
||||
headings_text = ' '.join(heading.text.lower() for heading in headings)
|
||||
|
||||
# Check for the presence of internal and external links
|
||||
internal_links = len([link for link in soup.find_all('a') if '#' not in link.get('href', '')])
|
||||
external_links = len([link for link in soup.find_all('a') if 'http' in link.get('href', '')])
|
||||
|
||||
# Calculate readability score
|
||||
readability_score = flesch_reading_ease(text)
|
||||
|
||||
# Check for spelling and grammar errors
|
||||
spell = spellchecker.SpellChecker()
|
||||
spelling_errors = len(spell.unknown(text.split()))
|
||||
grammar_errors = len(spell.check_grammar(text))
|
||||
|
||||
# Calculate SEO score
|
||||
seo_score = 0
|
||||
|
||||
# Check for the presence of relevant keywords
|
||||
for keyword in self.target_keywords:
|
||||
if keyword in text.lower():
|
||||
seo_score += 1
|
||||
|
||||
# Check for title length
|
||||
title_length = len(title_text.split()) if title_text else 0
|
||||
recommended_title_length = (50, 70)
|
||||
|
||||
if recommended_title_length[0] <= title_length <= recommended_title_length[1]:
|
||||
seo_score += 1
|
||||
|
||||
# Generate suggestions for improvement
|
||||
suggestions = []
|
||||
if seo_score < 5:
|
||||
suggestions.append("Add more relevant keywords to your HTML content.")
|
||||
suggestions.append("Make sure your title contains keywords.")
|
||||
suggestions.append("Add keywords to image alt text.")
|
||||
suggestions.append("Add headings to your HTML content.")
|
||||
suggestions.append("Add internal links to your HTML content.")
|
||||
|
||||
return {
|
||||
'Keyword Density': keyword_density,
|
||||
'Keyword Presence in Title': keyword_presence_in_title,
|
||||
'Keyword Presence in Image Alt Text': keyword_presence_in_img_alt_text,
|
||||
'Headings Text': headings_text,
|
||||
'Internal Links': internal_links,
|
||||
'External Links': external_links,
|
||||
'Readability Score': readability_score,
|
||||
'Spelling Errors': spelling_errors,
|
||||
'Grammar Errors': grammar_errors,
|
||||
'SEO Score': seo_score,
|
||||
'Suggestions': suggestions
|
||||
}
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
html_content = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>SEO Analyzer - Sample Page</title>
|
||||
<meta name="description" content="This is a sample page for SEO analysis.">
|
||||
</head>
|
||||
<body>
|
||||
<h1>Welcome to the SEO Analyzer</h1>
|
||||
<p>This is a sample page with some sample content for SEO analysis. It mentions the target keywords SEO, keywords, and content.</p>
|
||||
<img src="image1.jpg" alt="SEO image">
|
||||
<img src="image2.jpg" alt="Keywords image">
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
keywords = ['SEO', 'keywords', 'content'] # Replace with your target keywords
|
||||
|
||||
seo_analyzer = SEOAnalyzer(html_content, keywords)
|
||||
results = seo_analyzer.analyze_html_content()
|
||||
|
||||
print("SEO Analysis Results:")
|
||||
print(f"Keyword Density: {results['Keyword Density']}")
|
||||
print(f"Keyword Presence in Title: {results['Keyword Presence in Title']}")
|
||||
print(f"Keyword Presence in Image Alt Text: {results['Keyword Presence in Image Alt Text']}")
|
||||
print(f"Headings Text: {results['Headings Text']}")
|
||||
print(f"Internal Links: {results['Internal Links']}")
|
||||
print(f"External Links: {results['External Links']}")
|
||||
print(f"Readability Score: {results['Readability Score']}")
|
||||
print(f"Spelling Errors: {results['Spelling Errors']}")
|
||||
print(f"Grammar Errors: {results['Grammar Errors']}")
|
||||
print(f"SEO Score: {results['SEO Score']}")
|
||||
print("Suggestions:")
|
||||
for suggestion in results['Suggestions']:
|
||||
print(suggestion)
|
||||
|
||||
@@ -1,167 +0,0 @@
|
||||
# Content Calendar & Topic Planning System
|
||||
|
||||
A comprehensive content planning and scheduling system that leverages existing SEO tools and AI capabilities to create optimized content calendars based on content gap analysis.
|
||||
|
||||
## Folder Structure
|
||||
|
||||
```
|
||||
content_calendar/
|
||||
├── README.md
|
||||
├── core/
|
||||
│ ├── __init__.py
|
||||
│ ├── calendar_manager.py # Main calendar management system
|
||||
│ ├── topic_generator.py # AI-powered topic generation
|
||||
│ └── content_predictor.py # Content performance prediction
|
||||
├── integrations/
|
||||
│ ├── __init__.py
|
||||
│ ├── seo_tools.py # Integration with existing SEO tools
|
||||
│ ├── gap_analyzer.py # Content gap analysis integration
|
||||
│ └── platform_adapters.py # Platform-specific content adaptation
|
||||
├── models/
|
||||
│ ├── __init__.py
|
||||
│ ├── calendar.py # Calendar data models
|
||||
│ ├── content.py # Content data models
|
||||
│ └── analytics.py # Analytics data models
|
||||
├── utils/
|
||||
│ ├── __init__.py
|
||||
│ ├── date_utils.py # Date and scheduling utilities
|
||||
│ ├── validation.py # Input validation
|
||||
│ └── error_handling.py # Error handling utilities
|
||||
└── tests/
|
||||
├── __init__.py
|
||||
├── test_calendar.py
|
||||
├── test_topic_generator.py
|
||||
└── test_integrations.py
|
||||
```
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Phase 1: Core Infrastructure
|
||||
|
||||
1. **Basic Calendar Management**
|
||||
- Implement calendar data structures
|
||||
- Create scheduling algorithms
|
||||
- Build date management utilities
|
||||
|
||||
2. **Topic Generation System**
|
||||
- Integrate with existing AI tools
|
||||
- Implement topic generation logic
|
||||
- Add SEO optimization features
|
||||
|
||||
3. **Integration Framework**
|
||||
- Connect with existing SEO tools
|
||||
- Implement content gap analysis integration
|
||||
- Create platform-specific adapters
|
||||
|
||||
### Phase 2: AI & SEO Enhancement
|
||||
|
||||
1. **AI-Powered Features**
|
||||
- Implement topic ideation
|
||||
- Add content structure generation
|
||||
- Create performance prediction models
|
||||
|
||||
2. **SEO Optimization**
|
||||
- Integrate title optimization
|
||||
- Add meta description generation
|
||||
- Implement structured data creation
|
||||
|
||||
3. **Content Performance**
|
||||
- Add performance tracking
|
||||
- Implement analytics collection
|
||||
- Create reporting system
|
||||
|
||||
### Phase 3: UI Development
|
||||
|
||||
1. **Calendar Interface**
|
||||
- Create interactive calendar view
|
||||
- Implement drag-and-drop functionality
|
||||
- Add platform-specific views
|
||||
|
||||
2. **Content Planning Panel**
|
||||
- Build topic suggestion interface
|
||||
- Create SEO metrics display
|
||||
- Implement content gap visualization
|
||||
|
||||
3. **Analytics Dashboard**
|
||||
- Design performance metrics view
|
||||
- Create engagement tracking
|
||||
- Implement progress monitoring
|
||||
|
||||
### Phase 4: Testing & Refinement
|
||||
|
||||
1. **Testing**
|
||||
- Unit testing
|
||||
- Integration testing
|
||||
- User acceptance testing
|
||||
|
||||
2. **Optimization**
|
||||
- Performance optimization
|
||||
- Code refactoring
|
||||
- Bug fixes
|
||||
|
||||
3. **Documentation**
|
||||
- API documentation
|
||||
- User guides
|
||||
- Integration guides
|
||||
|
||||
## Integration with Existing Tools
|
||||
|
||||
### SEO Tools Integration
|
||||
- `content_title_generator.py` - For optimized titles
|
||||
- `meta_desc_generator.py` - For meta descriptions
|
||||
- `seo_structured_data.py` - For structured data
|
||||
- `content_gap_analysis/` - For gap analysis
|
||||
- `webpage_content_analysis.py` - For content analysis
|
||||
|
||||
### AI Capabilities
|
||||
- Leverage existing `llm_text_gen` for:
|
||||
- Topic generation
|
||||
- Content structure
|
||||
- Performance prediction
|
||||
|
||||
## Key Features
|
||||
|
||||
1. **Content Planning**
|
||||
- AI-powered topic generation
|
||||
- SEO-optimized content scheduling
|
||||
- Platform-specific planning
|
||||
|
||||
2. **SEO Integration**
|
||||
- Automated SEO optimization
|
||||
- Performance tracking
|
||||
- Gap analysis integration
|
||||
|
||||
3. **Analytics & Reporting**
|
||||
- Content performance metrics
|
||||
- SEO impact tracking
|
||||
- Platform engagement stats
|
||||
|
||||
## Getting Started
|
||||
|
||||
1. **Prerequisites**
|
||||
- Python 3.8+
|
||||
- Access to existing SEO tools
|
||||
- Required API keys
|
||||
|
||||
2. **Installation**
|
||||
```bash
|
||||
# Add installation steps here
|
||||
```
|
||||
|
||||
3. **Configuration**
|
||||
```python
|
||||
# Add configuration example here
|
||||
```
|
||||
|
||||
4. **Basic Usage**
|
||||
```python
|
||||
# Add usage example here
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
Guidelines for contributing to the project.
|
||||
|
||||
## License
|
||||
|
||||
Project license information.
|
||||
@@ -1,754 +0,0 @@
|
||||
from typing import Dict, List, Any, Optional
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import json
|
||||
|
||||
# Add parent directory to path to import existing tools
|
||||
parent_dir = str(Path(__file__).parent.parent.parent.parent)
|
||||
if parent_dir not in sys.path:
|
||||
sys.path.append(parent_dir)
|
||||
|
||||
from lib.database.models import ContentType, ContentItem, Platform
|
||||
from lib.ai_seo_tools.content_calendar.utils.error_handling import handle_calendar_error
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
from lib.ai_seo_tools.content_gap_analysis.main import ContentGapAnalysis
|
||||
from lib.ai_seo_tools.content_title_generator import ai_title_generator
|
||||
from lib.ai_seo_tools.meta_desc_generator import metadesc_generator_main
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AIGenerator:
|
||||
"""AI-powered content generation and enhancement."""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger('content_calendar.ai_generator')
|
||||
self.logger.info("Initializing AIGenerator")
|
||||
self._setup_logging()
|
||||
self._load_ai_tools()
|
||||
|
||||
def _setup_logging(self):
|
||||
"""Configure logging for AI generator."""
|
||||
logger.setLevel(logging.INFO)
|
||||
handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
def _load_ai_tools(self):
|
||||
"""Load and initialize AI tools."""
|
||||
try:
|
||||
# Initialize AI tools
|
||||
self.gap_analyzer = ContentGapAnalysis()
|
||||
self.title_generator = ai_title_generator
|
||||
self.meta_generator = metadesc_generator_main
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading AI tools: {str(e)}")
|
||||
raise
|
||||
|
||||
def generate_content(self, content_item: ContentItem, target_audience: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate base content using AI."""
|
||||
try:
|
||||
self.logger.info(f"Generating content for: {content_item.title}")
|
||||
|
||||
# Generate content based on type and platform
|
||||
content = {
|
||||
'title': content_item.title,
|
||||
'content_flow': {
|
||||
'introduction': {
|
||||
'summary': f"An engaging introduction about {content_item.title}",
|
||||
'key_points': [
|
||||
f"Key point 1 about {content_item.title}",
|
||||
f"Key point 2 about {content_item.title}",
|
||||
f"Key point 3 about {content_item.title}"
|
||||
]
|
||||
},
|
||||
'main_content': {
|
||||
'sections': [
|
||||
{
|
||||
'title': f"Section 1: Understanding {content_item.title}",
|
||||
'content': f"Detailed content about {content_item.title}",
|
||||
'subsections': []
|
||||
},
|
||||
{
|
||||
'title': f"Section 2: Best Practices for {content_item.title}",
|
||||
'content': "Best practices and recommendations",
|
||||
'subsections': []
|
||||
}
|
||||
]
|
||||
},
|
||||
'conclusion': {
|
||||
'summary': f"Concluding thoughts about {content_item.title}",
|
||||
'call_to_action': "Next steps and actions"
|
||||
}
|
||||
},
|
||||
'metadata': {
|
||||
'tone': target_audience.get('content_settings', {}).get('tone', 'professional'),
|
||||
'length': target_audience.get('content_settings', {}).get('length', 'medium'),
|
||||
'platform': content_item.platforms[0].name if content_item.platforms else 'Unknown',
|
||||
'content_type': content_item.content_type.name
|
||||
}
|
||||
}
|
||||
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating content: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def enhance_content(self, content: ContentItem, enhancement_type: str, target_audience: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Enhance existing content using AI."""
|
||||
try:
|
||||
self.logger.info(f"Enhancing content: {content.title}")
|
||||
|
||||
# Enhance content based on type
|
||||
enhanced = {
|
||||
'content': f"Enhanced version of {content.description}",
|
||||
'changes': [
|
||||
"Improved readability",
|
||||
"Enhanced engagement elements",
|
||||
"Optimized for target audience"
|
||||
],
|
||||
'metadata': {
|
||||
'enhancement_type': enhancement_type,
|
||||
'target_audience': target_audience
|
||||
}
|
||||
}
|
||||
|
||||
return enhanced
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error enhancing content: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def enhance_for_platform(self, content: Dict[str, Any], platform: Platform, enhancement_type: str) -> Dict[str, Any]:
|
||||
"""Enhance content specifically for a platform."""
|
||||
try:
|
||||
self.logger.info(f"Enhancing content for platform: {platform.name}")
|
||||
|
||||
# Platform-specific enhancements
|
||||
enhanced = {
|
||||
'content': content.get('content', ''),
|
||||
'changes': [
|
||||
f"Optimized for {platform.name}",
|
||||
"Platform-specific formatting",
|
||||
"Enhanced engagement elements"
|
||||
],
|
||||
'metadata': {
|
||||
'platform': platform.name,
|
||||
'enhancement_type': enhancement_type
|
||||
}
|
||||
}
|
||||
|
||||
return enhanced
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error enhancing for platform: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def enhance_variant(self, content: Dict[str, Any], variant_type: str, optimization_goals: List[str]) -> Dict[str, Any]:
|
||||
"""Enhance a content variant for A/B testing."""
|
||||
try:
|
||||
self.logger.info(f"Enhancing variant: {variant_type}")
|
||||
|
||||
# Variant-specific enhancements
|
||||
enhanced = {
|
||||
'content': content.get('content', ''),
|
||||
'changes': [
|
||||
f"Optimized for {', '.join(optimization_goals)}",
|
||||
"Enhanced variant-specific elements",
|
||||
"Improved engagement metrics"
|
||||
],
|
||||
'metadata': {
|
||||
'variant_type': variant_type,
|
||||
'optimization_goals': optimization_goals
|
||||
}
|
||||
}
|
||||
|
||||
return enhanced
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error enhancing variant: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def enhance_for_seo(self, content: Dict[str, Any], seo_goals: List[str]) -> Dict[str, Any]:
|
||||
"""Enhance content for SEO optimization."""
|
||||
try:
|
||||
self.logger.info("Enhancing content for SEO")
|
||||
|
||||
# SEO-specific enhancements
|
||||
enhanced = {
|
||||
'content': content.get('content', ''),
|
||||
'changes': [
|
||||
f"Optimized for {', '.join(seo_goals)}",
|
||||
"Enhanced keyword placement",
|
||||
"Improved meta information"
|
||||
],
|
||||
'metadata': {
|
||||
'seo_goals': seo_goals
|
||||
}
|
||||
}
|
||||
|
||||
return enhanced
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error enhancing for SEO: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def generate_series_content(self, content_item: ContentItem, series_info: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate content for a series."""
|
||||
try:
|
||||
self.logger.info(f"Generating series content: {content_item.title}")
|
||||
|
||||
# Generate series-specific content
|
||||
content = {
|
||||
'title': content_item.title,
|
||||
'content_flow': {
|
||||
'introduction': {
|
||||
'summary': f"Part {series_info['part_number']} of {series_info['total_parts']} about {series_info['topic']}",
|
||||
'key_points': [
|
||||
f"Key point 1 for part {series_info['part_number']}",
|
||||
f"Key point 2 for part {series_info['part_number']}",
|
||||
f"Key point 3 for part {series_info['part_number']}"
|
||||
]
|
||||
},
|
||||
'main_content': {
|
||||
'sections': [
|
||||
{
|
||||
'title': f"Section 1: Part {series_info['part_number']} Overview",
|
||||
'content': f"Detailed content for part {series_info['part_number']}",
|
||||
'subsections': []
|
||||
},
|
||||
{
|
||||
'title': f"Section 2: Part {series_info['part_number']} Details",
|
||||
'content': "Specific details and information",
|
||||
'subsections': []
|
||||
}
|
||||
]
|
||||
},
|
||||
'conclusion': {
|
||||
'summary': f"Concluding thoughts for part {series_info['part_number']}",
|
||||
'next_part': f"Preview of part {series_info['part_number'] + 1}" if series_info['part_number'] < series_info['total_parts'] else "Series conclusion"
|
||||
}
|
||||
},
|
||||
'metadata': {
|
||||
'series_info': series_info,
|
||||
'platform': content_item.platforms[0].name if content_item.platforms else 'Unknown',
|
||||
'content_type': content_item.content_type.name
|
||||
}
|
||||
}
|
||||
|
||||
return content
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating series content: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
@handle_calendar_error
|
||||
def generate_headings(
|
||||
self,
|
||||
title: str,
|
||||
content_type: ContentType,
|
||||
context: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate content headings using AI.
|
||||
|
||||
Args:
|
||||
title: Content title
|
||||
content_type: Type of content
|
||||
context: Content context from gap analysis
|
||||
|
||||
Returns:
|
||||
List of generated headings with metadata
|
||||
"""
|
||||
try:
|
||||
# Get content gaps and opportunities
|
||||
gaps = self.gap_analyzer.analyze_gaps(context.get('website_url', ''))
|
||||
|
||||
# Generate headings based on content type and gaps
|
||||
prompt = self._create_heading_prompt(title, content_type, gaps)
|
||||
headings = self._call_ai_model(prompt)
|
||||
|
||||
return self._format_headings(headings)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating headings: {str(e)}")
|
||||
return []
|
||||
|
||||
@handle_calendar_error
|
||||
def generate_subheadings(
|
||||
self,
|
||||
main_heading: Dict[str, Any],
|
||||
content_type: ContentType,
|
||||
context: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate subheadings for a main heading.
|
||||
|
||||
Args:
|
||||
main_heading: Main heading to generate subheadings for
|
||||
content_type: Type of content
|
||||
context: Content context
|
||||
|
||||
Returns:
|
||||
List of generated subheadings
|
||||
"""
|
||||
try:
|
||||
# Create prompt for subheading generation
|
||||
prompt = self._create_subheading_prompt(
|
||||
main_heading,
|
||||
content_type,
|
||||
context
|
||||
)
|
||||
|
||||
# Generate subheadings
|
||||
subheadings = self._call_ai_model(prompt)
|
||||
|
||||
return self._format_subheadings(subheadings)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating subheadings: {str(e)}")
|
||||
return []
|
||||
|
||||
@handle_calendar_error
|
||||
def generate_key_points(
|
||||
self,
|
||||
title: str,
|
||||
content_type: ContentType,
|
||||
context: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate key points for content.
|
||||
|
||||
Args:
|
||||
title: Content title
|
||||
content_type: Type of content
|
||||
context: Content context
|
||||
|
||||
Returns:
|
||||
List of key points with supporting information
|
||||
"""
|
||||
try:
|
||||
# Generate title and meta description for SEO context
|
||||
seo_title = self.title_generator(title)
|
||||
meta_desc = self.meta_generator(title)
|
||||
|
||||
# Create prompt for key points
|
||||
prompt = self._create_key_points_prompt(
|
||||
title,
|
||||
content_type,
|
||||
{'title': seo_title, 'meta_description': meta_desc},
|
||||
context
|
||||
)
|
||||
|
||||
# Generate key points
|
||||
points = self._call_ai_model(prompt)
|
||||
|
||||
return self._format_key_points(points)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating key points: {str(e)}")
|
||||
return []
|
||||
|
||||
@handle_calendar_error
|
||||
def generate_content_flow(
|
||||
self,
|
||||
title: str,
|
||||
content_type: ContentType,
|
||||
outline: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate content flow and structure.
|
||||
|
||||
Args:
|
||||
title: Content title
|
||||
content_type: Type of content
|
||||
outline: Content outline with headings and key points
|
||||
|
||||
Returns:
|
||||
Dictionary containing content flow and structure
|
||||
"""
|
||||
try:
|
||||
# Create prompt for content flow
|
||||
prompt = self._create_flow_prompt(title, content_type, outline)
|
||||
|
||||
# Generate content flow
|
||||
flow = self._call_ai_model(prompt)
|
||||
|
||||
return self._format_content_flow(flow)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating content flow: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _create_heading_prompt(
|
||||
self,
|
||||
title: str,
|
||||
content_type: ContentType,
|
||||
gaps: Dict[str, Any]
|
||||
) -> str:
|
||||
"""Create prompt for heading generation."""
|
||||
return f"""
|
||||
Generate main headings for a {content_type.value} titled "{title}".
|
||||
Consider the following content gaps and opportunities:
|
||||
{json.dumps(gaps, indent=2)}
|
||||
|
||||
For each heading, provide:
|
||||
1. Title
|
||||
2. Level (1 for main headings)
|
||||
3. Key keywords to include
|
||||
4. Brief summary of what this section should cover
|
||||
|
||||
Format the response as a JSON array of heading objects.
|
||||
"""
|
||||
|
||||
def _create_subheading_prompt(
|
||||
self,
|
||||
main_heading: Dict[str, Any],
|
||||
content_type: ContentType,
|
||||
context: Dict[str, Any]
|
||||
) -> str:
|
||||
"""Create prompt for subheading generation."""
|
||||
return f"""
|
||||
Generate subheadings for the main heading "{main_heading['title']}"
|
||||
in a {content_type.value}.
|
||||
|
||||
Main heading details:
|
||||
{json.dumps(main_heading, indent=2)}
|
||||
|
||||
For each subheading, provide:
|
||||
1. Title
|
||||
2. Level (2 for subheadings)
|
||||
3. Key keywords to include
|
||||
4. Brief summary of what this subsection should cover
|
||||
|
||||
Format the response as a JSON array of subheading objects.
|
||||
"""
|
||||
|
||||
def _create_key_points_prompt(
|
||||
self,
|
||||
title: str,
|
||||
content_type: ContentType,
|
||||
seo_data: Dict[str, Any],
|
||||
context: Dict[str, Any]
|
||||
) -> str:
|
||||
"""Create prompt for key points generation."""
|
||||
return f"""
|
||||
Generate key points for a {content_type.value} titled "{title}".
|
||||
|
||||
SEO Requirements:
|
||||
{json.dumps(seo_data, indent=2)}
|
||||
|
||||
For each key point, provide:
|
||||
1. Main point
|
||||
2. Importance level (high/medium/low)
|
||||
3. Supporting evidence or examples
|
||||
4. Related keywords to include
|
||||
|
||||
Format the response as a JSON array of key point objects.
|
||||
"""
|
||||
|
||||
def _create_flow_prompt(
|
||||
self,
|
||||
title: str,
|
||||
content_type: ContentType,
|
||||
outline: Dict[str, Any]
|
||||
) -> str:
|
||||
"""Create prompt for content flow generation."""
|
||||
return f"""
|
||||
Generate content flow and structure for a {content_type.value} titled "{title}".
|
||||
|
||||
Content Outline:
|
||||
{json.dumps(outline, indent=2)}
|
||||
|
||||
Provide:
|
||||
1. Introduction structure
|
||||
2. Main sections flow
|
||||
3. Conclusion approach
|
||||
4. Transition points between sections
|
||||
5. Content pacing recommendations
|
||||
|
||||
Format the response as a JSON object with these sections.
|
||||
"""
|
||||
|
||||
def _call_ai_model(self, prompt: str) -> Any:
|
||||
"""
|
||||
Call the AI model with the given prompt.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to send to the AI model
|
||||
|
||||
Returns:
|
||||
The AI model's response, parsed as JSON
|
||||
"""
|
||||
try:
|
||||
# Call the AI model
|
||||
response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
max_tokens=1000,
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
frequency_penalty=0.5,
|
||||
presence_penalty=0.5
|
||||
)
|
||||
|
||||
# Parse the response as JSON
|
||||
try:
|
||||
return json.loads(response)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Error parsing AI response as JSON: {str(e)}")
|
||||
logger.error(f"Raw response: {response}")
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error calling AI model: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _format_headings(self, headings: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Format and validate generated headings."""
|
||||
formatted = []
|
||||
for heading in headings:
|
||||
formatted.append({
|
||||
'title': heading.get('title', ''),
|
||||
'level': heading.get('level', 1),
|
||||
'keywords': heading.get('keywords', []),
|
||||
'summary': heading.get('summary', '')
|
||||
})
|
||||
return formatted
|
||||
|
||||
def _format_subheadings(self, subheadings: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Format and validate generated subheadings."""
|
||||
formatted = []
|
||||
for subheading in subheadings:
|
||||
formatted.append({
|
||||
'title': subheading.get('title', ''),
|
||||
'level': subheading.get('level', 2),
|
||||
'keywords': subheading.get('keywords', []),
|
||||
'summary': subheading.get('summary', '')
|
||||
})
|
||||
return formatted
|
||||
|
||||
def _format_key_points(self, points: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Format and validate generated key points."""
|
||||
formatted = []
|
||||
for point in points:
|
||||
formatted.append({
|
||||
'point': point.get('point', ''),
|
||||
'importance': point.get('importance', 'medium'),
|
||||
'supporting_evidence': point.get('evidence', []),
|
||||
'related_keywords': point.get('keywords', [])
|
||||
})
|
||||
return formatted
|
||||
|
||||
def _format_content_flow(self, flow: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Format and validate generated content flow."""
|
||||
return {
|
||||
'introduction': flow.get('introduction', {}),
|
||||
'main_sections': flow.get('main_sections', []),
|
||||
'conclusion': flow.get('conclusion', {}),
|
||||
'transitions': flow.get('transitions', []),
|
||||
'content_pacing': flow.get('pacing', {})
|
||||
}
|
||||
|
||||
def generate_ai_suggestions(
|
||||
self,
|
||||
content_type: str,
|
||||
topic: str,
|
||||
audience: str,
|
||||
goals: List[str],
|
||||
tone: str,
|
||||
length: str,
|
||||
model_settings: Dict[str, Any],
|
||||
style_preferences: List[str],
|
||||
seo_preferences: Dict[str, Any],
|
||||
platform_settings: Dict[str, Any],
|
||||
platform: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate AI content suggestions based on input parameters.
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Generating AI suggestions for topic: {topic}")
|
||||
|
||||
# Create a comprehensive prompt for content generation
|
||||
prompt = f"""Generate content suggestions for the following parameters:
|
||||
|
||||
Content Type: {content_type}
|
||||
Topic: {topic}
|
||||
Target Audience: {audience}
|
||||
Goals: {', '.join(goals)}
|
||||
Tone: {tone}
|
||||
Length: {length}
|
||||
|
||||
Style Preferences:
|
||||
- Creativity Level: {model_settings.get('Creativity Level', 'medium')}
|
||||
- Formality Level: {model_settings.get('Formality Level', 'professional')}
|
||||
- Style Elements: {', '.join(style_preferences)}
|
||||
|
||||
SEO Preferences:
|
||||
- Keyword Density: {seo_preferences.get('Keyword Density', 2)}%
|
||||
- Internal Linking: {'Enabled' if seo_preferences.get('Internal Linking', True) else 'Disabled'}
|
||||
- External Linking: {'Enabled' if seo_preferences.get('External Linking', True) else 'Disabled'}
|
||||
|
||||
Platform Settings:
|
||||
- Platform: {platform}
|
||||
- Platform-specific requirements: {', '.join(platform_settings)}
|
||||
|
||||
Please generate 3 different content suggestions. Format your response as a valid JSON object with the following structure:
|
||||
{{
|
||||
"suggestions": [
|
||||
{{
|
||||
"title": "string",
|
||||
"introduction": "string",
|
||||
"key_points": ["string"],
|
||||
"main_sections": [
|
||||
{{
|
||||
"title": "string",
|
||||
"content": "string",
|
||||
"engagement_elements": ["string"],
|
||||
"seo_elements": ["string"]
|
||||
}}
|
||||
],
|
||||
"conclusion": "string",
|
||||
"seo_elements": ["string"],
|
||||
"platform_optimizations": ["string"],
|
||||
"engagement_strategies": ["string"],
|
||||
"content_metrics": {{
|
||||
"estimated_read_time": "string",
|
||||
"word_count": "number",
|
||||
"keyword_density": "number",
|
||||
"engagement_score": "number"
|
||||
}}
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
IMPORTANT: Your response must be a valid JSON object. Do not include any text before or after the JSON object."""
|
||||
|
||||
# Generate content using llm_text_gen
|
||||
generated_content = llm_text_gen(
|
||||
prompt=prompt,
|
||||
max_tokens=1000,
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
frequency_penalty=0.5,
|
||||
presence_penalty=0.5
|
||||
)
|
||||
|
||||
if not generated_content:
|
||||
self.logger.error("No content generated from AI model")
|
||||
return []
|
||||
|
||||
# Parse the generated content
|
||||
try:
|
||||
# If generated_content is already a dict, use it directly
|
||||
if isinstance(generated_content, dict):
|
||||
content_data = generated_content
|
||||
else:
|
||||
# Try to parse as JSON string
|
||||
content_data = json.loads(generated_content)
|
||||
|
||||
if not content_data or 'suggestions' not in content_data:
|
||||
self.logger.error("Invalid content structure in AI response")
|
||||
return []
|
||||
|
||||
return self._format_suggestions(
|
||||
content_data,
|
||||
content_type,
|
||||
audience,
|
||||
goals,
|
||||
tone,
|
||||
length,
|
||||
model_settings,
|
||||
seo_preferences,
|
||||
platform
|
||||
)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
self.logger.error(f"Error parsing generated content: {str(e)}")
|
||||
# Try to extract JSON from the response if it's wrapped in other text
|
||||
try:
|
||||
# Find the first '{' and last '}'
|
||||
start = generated_content.find('{')
|
||||
end = generated_content.rfind('}') + 1
|
||||
if start >= 0 and end > start:
|
||||
json_str = generated_content[start:end]
|
||||
content_data = json.loads(json_str)
|
||||
if not content_data or 'suggestions' not in content_data:
|
||||
self.logger.error("Invalid content structure in extracted JSON")
|
||||
return []
|
||||
return self._format_suggestions(
|
||||
content_data,
|
||||
content_type,
|
||||
audience,
|
||||
goals,
|
||||
tone,
|
||||
length,
|
||||
model_settings,
|
||||
seo_preferences,
|
||||
platform
|
||||
)
|
||||
except Exception as e2:
|
||||
self.logger.error(f"Error extracting JSON from response: {str(e2)}")
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating AI suggestions: {str(e)}", exc_info=True)
|
||||
return []
|
||||
|
||||
def _format_suggestions(
|
||||
self,
|
||||
content_data: Dict[str, Any],
|
||||
content_type: str,
|
||||
audience: str,
|
||||
goals: List[str],
|
||||
tone: str,
|
||||
length: str,
|
||||
model_settings: Dict[str, Any],
|
||||
seo_preferences: Dict[str, Any],
|
||||
platform: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Format and process suggestions from content data."""
|
||||
suggestions = []
|
||||
for suggestion in content_data.get('suggestions', []):
|
||||
formatted_suggestion = {
|
||||
'title': suggestion.get('title', ''),
|
||||
'type': content_type,
|
||||
'platform': platform,
|
||||
'audience': audience,
|
||||
'impact': f"High impact for {', '.join(goals)}",
|
||||
'preview': suggestion.get('introduction', ''),
|
||||
'style_elements': [
|
||||
f"Tone: {tone}",
|
||||
f"Length: {length}",
|
||||
f"Creativity: {model_settings['Creativity Level']}",
|
||||
f"Formality: {model_settings['Formality Level']}"
|
||||
],
|
||||
'seo_elements': [
|
||||
f"Keyword Density: {seo_preferences['Keyword Density']}%",
|
||||
"Internal Linking: Enabled" if seo_preferences['Internal Linking'] else "Internal Linking: Disabled",
|
||||
"External Linking: Enabled" if seo_preferences['External Linking'] else "External Linking: Disabled"
|
||||
],
|
||||
'engagement_score': f"{85 + len(suggestions)*5}%",
|
||||
'reach': 'High',
|
||||
'conversion': f"{3.5 + len(suggestions)*0.5}%",
|
||||
'seo_impact': 'Strong',
|
||||
'platform_optimizations': suggestion.get('platform_optimizations', []),
|
||||
'variations': [
|
||||
"Alternative headline",
|
||||
"Different content angle",
|
||||
"Alternative format"
|
||||
],
|
||||
'seo_recommendations': suggestion.get('seo_elements', []),
|
||||
'media_suggestions': [
|
||||
"Featured image",
|
||||
"Supporting graphics",
|
||||
"Social media visuals"
|
||||
]
|
||||
}
|
||||
suggestions.append(formatted_suggestion)
|
||||
return suggestions
|
||||
@@ -1,163 +0,0 @@
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Any, Optional
|
||||
import logging
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
from lib.database.models import ContentItem, ContentType, Platform, get_engine, get_session, init_db
|
||||
from ..integrations.seo_tools import SEOToolsIntegration
|
||||
from ..integrations.gap_analyzer import GapAnalyzerIntegration
|
||||
from ..utils.date_utils import calculate_publish_dates
|
||||
from ..utils.error_handling import handle_calendar_error
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.StreamHandler(sys.stdout),
|
||||
logging.FileHandler('content_calendar_debug.log', mode='a')
|
||||
]
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
engine = get_engine()
|
||||
init_db(engine)
|
||||
session = get_session(engine)
|
||||
|
||||
class CalendarManager:
|
||||
"""
|
||||
Main calendar management system that coordinates content planning,
|
||||
scheduling, and optimization.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger('content_calendar.manager')
|
||||
self.logger.info("Initializing CalendarManager")
|
||||
self.seo_tools = SEOToolsIntegration()
|
||||
self.gap_analyzer = GapAnalyzerIntegration()
|
||||
self.logger.info("CalendarManager initialized successfully")
|
||||
|
||||
@handle_calendar_error
|
||||
def create_calendar(
|
||||
self,
|
||||
start_date: datetime,
|
||||
duration: str, # 'weekly', 'monthly', 'quarterly'
|
||||
platforms: List[str],
|
||||
website_url: str
|
||||
) -> List[ContentItem]:
|
||||
self.logger.info(f"Creating new calendar for {website_url}")
|
||||
self.logger.debug(f"Parameters: start_date={start_date}, duration={duration}, platforms={platforms}")
|
||||
try:
|
||||
gap_analysis = self.gap_analyzer.analyze_gaps(website_url)
|
||||
topics = self._generate_topics(gap_analysis, platforms)
|
||||
schedule = calculate_publish_dates(
|
||||
topics=topics,
|
||||
start_date=start_date,
|
||||
duration=duration
|
||||
)
|
||||
# Add to DB
|
||||
for topic in schedule:
|
||||
session.add(topic)
|
||||
session.commit()
|
||||
self.logger.info("Calendar created and content scheduled in DB successfully")
|
||||
return schedule
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error creating calendar: {str(e)}", exc_info=True)
|
||||
raise
|
||||
|
||||
def _generate_topics(
|
||||
self,
|
||||
gap_analysis: Dict[str, Any],
|
||||
platforms: List[str]
|
||||
) -> List[ContentItem]:
|
||||
topics = []
|
||||
for gap in gap_analysis['gaps']:
|
||||
topic = self._generate_topic_from_gap(gap, platforms)
|
||||
optimized_topic = self._optimize_topic(topic)
|
||||
topics.append(optimized_topic)
|
||||
return topics
|
||||
|
||||
def _generate_topic_from_gap(
|
||||
self,
|
||||
gap: Dict[str, Any],
|
||||
platforms: List[str]
|
||||
) -> ContentItem:
|
||||
topic_data = {
|
||||
'title': self._generate_title(gap),
|
||||
'description': self._generate_description(gap),
|
||||
'keywords': gap.get('keywords', []),
|
||||
'platforms': platforms,
|
||||
'content_type': self._determine_content_type(gap, platforms),
|
||||
'publish_date': datetime.now(),
|
||||
'status': 'Draft',
|
||||
'author': None,
|
||||
'tags': [],
|
||||
'notes': None,
|
||||
'seo_data': {}
|
||||
}
|
||||
return ContentItem(**topic_data)
|
||||
|
||||
def _optimize_topic(self, topic: ContentItem) -> ContentItem:
|
||||
topic.title = self.seo_tools.optimize_title(topic.title)
|
||||
topic.seo_data['meta_description'] = self.seo_tools.generate_meta_description(topic.description)
|
||||
topic.seo_data['structured_data'] = self.seo_tools.generate_structured_data(topic.content_type)
|
||||
return topic
|
||||
|
||||
def get_all_content(self) -> List[ContentItem]:
|
||||
return session.query(ContentItem).all()
|
||||
|
||||
def remove_content(self, content_id):
|
||||
content = session.query(ContentItem).get(content_id)
|
||||
if content:
|
||||
session.delete(content)
|
||||
session.commit()
|
||||
|
||||
def update_content(self, content_id, **kwargs):
|
||||
content = session.query(ContentItem).get(content_id)
|
||||
if content:
|
||||
for key, value in kwargs.items():
|
||||
setattr(content, key, value)
|
||||
session.commit()
|
||||
|
||||
def get_calendar(self) -> Optional[List[ContentItem]]:
|
||||
"""
|
||||
Get the current calendar.
|
||||
"""
|
||||
self.logger.debug("Getting current calendar")
|
||||
return self.get_all_content()
|
||||
|
||||
def update_calendar(self, calendar: List[ContentItem]) -> None:
|
||||
"""
|
||||
Update the current calendar.
|
||||
"""
|
||||
self.get_all_content()
|
||||
for content in calendar:
|
||||
session.add(content)
|
||||
session.commit()
|
||||
|
||||
def export_calendar(self) -> Optional[Dict[str, Any]]:
|
||||
"""Export the current calendar."""
|
||||
self.logger.info("Exporting calendar")
|
||||
calendar = self.get_calendar()
|
||||
if not calendar:
|
||||
self.logger.warning("No calendar to export")
|
||||
return None
|
||||
|
||||
try:
|
||||
calendar_data = [content.to_dict() for content in calendar]
|
||||
self.logger.info("Calendar exported successfully")
|
||||
return calendar_data
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error exporting calendar: {str(e)}", exc_info=True)
|
||||
return None
|
||||
|
||||
def save_calendar_to_json(self):
|
||||
calendar = self.get_calendar()
|
||||
if calendar:
|
||||
with open("calendar_data.json", "w") as f:
|
||||
json.dump(calendar, f, indent=2, default=str)
|
||||
|
||||
def load_calendar_from_json(self):
|
||||
if os.path.exists("calendar_data.json"):
|
||||
with open("calendar_data.json", "r") as f:
|
||||
data = json.load(f)
|
||||
self.update_calendar(data)
|
||||
@@ -1,151 +0,0 @@
|
||||
from typing import Dict, List, Any, Optional
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
# Add parent directory to path to import existing tools
|
||||
parent_dir = str(Path(__file__).parent.parent.parent.parent)
|
||||
if parent_dir not in sys.path:
|
||||
sys.path.append(parent_dir)
|
||||
|
||||
from lib.database.models import ContentType, ContentItem, Platform
|
||||
from lib.ai_seo_tools.content_calendar.utils.error_handling import handle_calendar_error
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
from lib.ai_seo_tools.content_gap_analysis.main import ContentGapAnalysis
|
||||
from lib.ai_seo_tools.content_title_generator import ai_title_generator
|
||||
from lib.ai_seo_tools.meta_desc_generator import metadesc_generator_main
|
||||
from .ai_generator import AIGenerator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ContentBriefGenerator:
|
||||
"""
|
||||
Generates comprehensive content briefs using AI-powered analysis.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger('content_calendar.content_brief')
|
||||
self.logger.info("Initializing ContentBriefGenerator")
|
||||
self._setup_logging()
|
||||
self._load_ai_tools()
|
||||
|
||||
def _setup_logging(self):
|
||||
"""Configure logging for content brief generator."""
|
||||
logger.setLevel(logging.INFO)
|
||||
handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
def _load_ai_tools(self):
|
||||
"""Load and initialize AI tools."""
|
||||
try:
|
||||
# Initialize AI tools
|
||||
self.gap_analyzer = ContentGapAnalysis()
|
||||
self.title_generator = ai_title_generator
|
||||
self.meta_generator = metadesc_generator_main
|
||||
self.ai_generator = AIGenerator()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading AI tools: {str(e)}")
|
||||
raise
|
||||
|
||||
@handle_calendar_error
|
||||
def generate_brief(
|
||||
self,
|
||||
content_item: ContentItem,
|
||||
target_audience: Optional[Dict[str, Any]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a comprehensive content brief.
|
||||
|
||||
Args:
|
||||
content_item: Content item to generate brief for
|
||||
target_audience: Optional target audience data
|
||||
|
||||
Returns:
|
||||
Dictionary containing the content brief
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Generating content brief for: {content_item.title}")
|
||||
|
||||
# Generate content outline
|
||||
outline = self._generate_outline(content_item)
|
||||
|
||||
# Generate key points
|
||||
key_points = self.ai_generator.generate_key_points(
|
||||
title=content_item.title,
|
||||
content_type=content_item.content_type,
|
||||
context=content_item.context
|
||||
)
|
||||
|
||||
# Generate content flow
|
||||
content_flow = self.ai_generator.generate_content_flow(
|
||||
title=content_item.title,
|
||||
content_type=content_item.content_type,
|
||||
outline=outline
|
||||
)
|
||||
|
||||
# Compile the brief
|
||||
brief = {
|
||||
'title': content_item.title,
|
||||
'content_type': content_item.content_type.value,
|
||||
'outline': outline,
|
||||
'key_points': key_points,
|
||||
'content_flow': content_flow,
|
||||
'target_audience': target_audience or {},
|
||||
'seo_data': content_item.seo_data,
|
||||
'platform_specs': content_item.platform_specs
|
||||
}
|
||||
|
||||
logger.info("Content brief generated successfully")
|
||||
return brief
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating content brief: {str(e)}")
|
||||
raise
|
||||
|
||||
def _generate_outline(
|
||||
self,
|
||||
content_item: ContentItem
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate content outline with headings and subheadings.
|
||||
|
||||
Args:
|
||||
content_item: Content item to generate outline for
|
||||
|
||||
Returns:
|
||||
Dictionary containing the content outline
|
||||
"""
|
||||
try:
|
||||
# Generate main headings
|
||||
main_headings = self.ai_generator.generate_headings(
|
||||
title=content_item.title,
|
||||
content_type=content_item.content_type,
|
||||
context=content_item.context
|
||||
)
|
||||
|
||||
# Generate subheadings for each main heading
|
||||
subheadings = {}
|
||||
for heading in main_headings:
|
||||
heading_subheadings = self.ai_generator.generate_subheadings(
|
||||
main_heading=heading,
|
||||
content_type=content_item.content_type,
|
||||
context=content_item.context
|
||||
)
|
||||
subheadings[heading['title']] = heading_subheadings
|
||||
|
||||
return {
|
||||
'main_headings': main_headings,
|
||||
'subheadings': subheadings
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating outline: {str(e)}")
|
||||
return {
|
||||
'main_headings': [],
|
||||
'subheadings': {}
|
||||
}
|
||||
@@ -1,626 +0,0 @@
|
||||
from typing import Dict, List, Any, Optional
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Add parent directory to path to import existing tools
|
||||
parent_dir = str(Path(__file__).parent.parent.parent.parent)
|
||||
if parent_dir not in sys.path:
|
||||
sys.path.append(parent_dir)
|
||||
|
||||
from lib.database.models import ContentItem, ContentType, Platform
|
||||
from ..utils.error_handling import handle_calendar_error
|
||||
from lib.ai_seo_tools.content_gap_analysis.main import ContentGapAnalysis
|
||||
from lib.ai_seo_tools.content_title_generator import ai_title_generator
|
||||
from lib.ai_seo_tools.meta_desc_generator import metadesc_generator_main
|
||||
from lib.ai_seo_tools.content_calendar.core.content_repurposer import SmartContentRepurposingEngine
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ContentGenerator:
|
||||
"""
|
||||
Enhanced content generator with smart repurposing capabilities.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger('content_calendar.content_generator')
|
||||
self.logger.info("Initializing ContentGenerator")
|
||||
self._setup_logging()
|
||||
self._load_ai_tools()
|
||||
# Initialize the Smart Content Repurposing Engine
|
||||
self.repurposing_engine = SmartContentRepurposingEngine()
|
||||
|
||||
def _setup_logging(self):
|
||||
"""Configure logging for content generator."""
|
||||
logger.setLevel(logging.INFO)
|
||||
handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
def _load_ai_tools(self):
|
||||
"""Load and initialize AI tools."""
|
||||
try:
|
||||
# Initialize AI tools
|
||||
self.gap_analyzer = ContentGapAnalysis()
|
||||
self.title_generator = ai_title_generator
|
||||
self.meta_generator = metadesc_generator_main
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading AI tools: {str(e)}")
|
||||
raise
|
||||
|
||||
@handle_calendar_error
|
||||
def generate_headings(
|
||||
self,
|
||||
content_item: ContentItem,
|
||||
context: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate main headings for content.
|
||||
|
||||
Args:
|
||||
content_item: Content item to generate headings for
|
||||
context: Content context from gap analysis
|
||||
|
||||
Returns:
|
||||
List of main headings with metadata
|
||||
"""
|
||||
try:
|
||||
# Use AI to generate headings based on content type and context
|
||||
headings = self._generate_ai_headings(
|
||||
title=content_item.title,
|
||||
content_type=content_item.content_type,
|
||||
context=context
|
||||
)
|
||||
|
||||
# Format and validate headings
|
||||
formatted_headings = []
|
||||
for heading in headings:
|
||||
formatted_heading = {
|
||||
'title': heading['title'],
|
||||
'level': heading.get('level', 1),
|
||||
'keywords': heading.get('keywords', []),
|
||||
'summary': heading.get('summary', '')
|
||||
}
|
||||
formatted_headings.append(formatted_heading)
|
||||
|
||||
return formatted_headings
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating headings: {str(e)}")
|
||||
return []
|
||||
|
||||
@handle_calendar_error
|
||||
def generate_subheadings(
|
||||
self,
|
||||
content_item: ContentItem,
|
||||
main_headings: List[Dict[str, Any]],
|
||||
context: Dict[str, Any]
|
||||
) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""
|
||||
Generate subheadings for each main heading.
|
||||
|
||||
Args:
|
||||
content_item: Content item to generate subheadings for
|
||||
main_headings: List of main headings
|
||||
context: Content context from gap analysis
|
||||
|
||||
Returns:
|
||||
Dictionary mapping main headings to their subheadings
|
||||
"""
|
||||
try:
|
||||
subheadings = {}
|
||||
|
||||
for heading in main_headings:
|
||||
# Generate subheadings for each main heading
|
||||
heading_subheadings = self._generate_ai_subheadings(
|
||||
main_heading=heading,
|
||||
content_type=content_item.content_type,
|
||||
context=context
|
||||
)
|
||||
|
||||
# Format and validate subheadings
|
||||
formatted_subheadings = []
|
||||
for subheading in heading_subheadings:
|
||||
formatted_subheading = {
|
||||
'title': subheading['title'],
|
||||
'level': subheading.get('level', 2),
|
||||
'keywords': subheading.get('keywords', []),
|
||||
'summary': subheading.get('summary', '')
|
||||
}
|
||||
formatted_subheadings.append(formatted_subheading)
|
||||
|
||||
subheadings[heading['title']] = formatted_subheadings
|
||||
|
||||
return subheadings
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating subheadings: {str(e)}")
|
||||
return {}
|
||||
|
||||
@handle_calendar_error
|
||||
def generate_key_points(
|
||||
self,
|
||||
content_item: ContentItem,
|
||||
context: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Generate key points for the content.
|
||||
|
||||
Args:
|
||||
content_item: Content item to generate key points for
|
||||
context: Content context from gap analysis
|
||||
|
||||
Returns:
|
||||
List of key points with supporting information
|
||||
"""
|
||||
try:
|
||||
# Generate key points using AI
|
||||
key_points = self._generate_ai_key_points(
|
||||
title=content_item.title,
|
||||
content_type=content_item.content_type,
|
||||
context=context
|
||||
)
|
||||
|
||||
# Format and validate key points
|
||||
formatted_points = []
|
||||
for point in key_points:
|
||||
formatted_point = {
|
||||
'point': point['point'],
|
||||
'importance': point.get('importance', 'medium'),
|
||||
'supporting_evidence': point.get('evidence', []),
|
||||
'related_keywords': point.get('keywords', [])
|
||||
}
|
||||
formatted_points.append(formatted_point)
|
||||
|
||||
return formatted_points
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating key points: {str(e)}")
|
||||
return []
|
||||
|
||||
@handle_calendar_error
|
||||
def generate_content_flow(
|
||||
self,
|
||||
content_item: ContentItem,
|
||||
outline: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate content flow and structure.
|
||||
|
||||
Args:
|
||||
content_item: Content item to generate flow for
|
||||
outline: Content outline with headings and key points
|
||||
|
||||
Returns:
|
||||
Dictionary containing content flow and structure
|
||||
"""
|
||||
try:
|
||||
# Generate content flow using AI
|
||||
flow = self._generate_ai_content_flow(
|
||||
title=content_item.title,
|
||||
content_type=content_item.content_type,
|
||||
outline=outline
|
||||
)
|
||||
|
||||
return {
|
||||
'introduction': flow.get('introduction', {}),
|
||||
'main_sections': flow.get('main_sections', []),
|
||||
'conclusion': flow.get('conclusion', {}),
|
||||
'transitions': flow.get('transitions', []),
|
||||
'content_pacing': flow.get('pacing', {})
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating content flow: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _generate_ai_headings(
|
||||
self,
|
||||
title: str,
|
||||
content_type: ContentType,
|
||||
context: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Use AI to generate content headings.
|
||||
"""
|
||||
# TODO: Implement AI heading generation
|
||||
# This would use the existing AI tools to generate headings
|
||||
return []
|
||||
|
||||
def _generate_ai_subheadings(
|
||||
self,
|
||||
main_heading: Dict[str, Any],
|
||||
content_type: ContentType,
|
||||
context: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Use AI to generate subheadings.
|
||||
"""
|
||||
# TODO: Implement AI subheading generation
|
||||
return []
|
||||
|
||||
def _generate_ai_key_points(
|
||||
self,
|
||||
title: str,
|
||||
content_type: ContentType,
|
||||
context: Dict[str, Any]
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Use AI to generate key points.
|
||||
"""
|
||||
# TODO: Implement AI key point generation
|
||||
return []
|
||||
|
||||
def _generate_ai_content_flow(
|
||||
self,
|
||||
title: str,
|
||||
content_type: ContentType,
|
||||
outline: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Use AI to generate content flow.
|
||||
"""
|
||||
# TODO: Implement AI content flow generation
|
||||
return {}
|
||||
|
||||
def generate_variation(self, content: Dict[str, Any], variation_type: str) -> Dict[str, Any]:
|
||||
"""Generate a variation of the given content.
|
||||
|
||||
Args:
|
||||
content: Original content to vary
|
||||
variation_type: Type of variation to generate ('tone', 'length', 'style', etc.)
|
||||
|
||||
Returns:
|
||||
Dictionary containing the varied content
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Generating {variation_type} variation for content")
|
||||
|
||||
# Generate variation based on type
|
||||
variation = {
|
||||
'title': f"{content.get('title', '')} - {variation_type.title()} Variation",
|
||||
'content_flow': {
|
||||
'introduction': {
|
||||
'summary': f"Varied introduction for {content.get('title', '')}",
|
||||
'key_points': [
|
||||
f"Varied key point 1 for {variation_type}",
|
||||
f"Varied key point 2 for {variation_type}",
|
||||
f"Varied key point 3 for {variation_type}"
|
||||
]
|
||||
},
|
||||
'main_content': {
|
||||
'sections': [
|
||||
{
|
||||
'title': f"Varied Section 1: {variation_type.title()} Approach",
|
||||
'content': f"Varied content for {variation_type}",
|
||||
'subsections': []
|
||||
},
|
||||
{
|
||||
'title': f"Varied Section 2: {variation_type.title()} Details",
|
||||
'content': "Varied details and information",
|
||||
'subsections': []
|
||||
}
|
||||
]
|
||||
},
|
||||
'conclusion': {
|
||||
'summary': f"Varied conclusion for {variation_type}",
|
||||
'call_to_action': "Varied call to action"
|
||||
}
|
||||
},
|
||||
'metadata': {
|
||||
'variation_type': variation_type,
|
||||
'original_content': content.get('title', ''),
|
||||
'platform': content.get('metadata', {}).get('platform', 'Unknown'),
|
||||
'content_type': content.get('metadata', {}).get('content_type', 'Unknown')
|
||||
}
|
||||
}
|
||||
|
||||
return variation
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating variation: {str(e)}")
|
||||
return {}
|
||||
|
||||
@handle_calendar_error
|
||||
def repurpose_content_for_platforms(
|
||||
self,
|
||||
content_item: ContentItem,
|
||||
target_platforms: List[Platform],
|
||||
strategy: str = 'adaptive'
|
||||
) -> List[ContentItem]:
|
||||
"""
|
||||
Repurpose existing content for multiple platforms using the Smart Content Repurposing Engine.
|
||||
|
||||
Args:
|
||||
content_item: Original content to repurpose
|
||||
target_platforms: List of platforms to create content for
|
||||
strategy: Repurposing strategy ('adaptive', 'atomic', 'series')
|
||||
|
||||
Returns:
|
||||
List of repurposed content items optimized for each platform
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Repurposing content '{content_item.title}' for {len(target_platforms)} platforms")
|
||||
|
||||
# Use the repurposing engine to create platform-specific content
|
||||
repurposed_content = self.repurposing_engine.repurpose_single_content(
|
||||
content=content_item,
|
||||
target_platforms=target_platforms,
|
||||
strategy=strategy
|
||||
)
|
||||
|
||||
self.logger.info(f"Successfully created {len(repurposed_content)} repurposed content pieces")
|
||||
return repurposed_content
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error repurposing content: {str(e)}")
|
||||
return []
|
||||
|
||||
@handle_calendar_error
|
||||
def create_content_series_across_platforms(
|
||||
self,
|
||||
source_content: ContentItem,
|
||||
platforms: List[Platform],
|
||||
series_type: str = 'progressive_disclosure'
|
||||
) -> Dict[str, List[ContentItem]]:
|
||||
"""
|
||||
Create a cross-platform content series with progressive disclosure strategy.
|
||||
|
||||
Args:
|
||||
source_content: Original comprehensive content
|
||||
platforms: Target platforms for the series
|
||||
series_type: Type of series ('progressive_disclosure', 'platform_native')
|
||||
|
||||
Returns:
|
||||
Dictionary mapping platforms to their content pieces
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Creating cross-platform series for '{source_content.title}'")
|
||||
|
||||
# Use the repurposing engine to create a content series
|
||||
series_content = self.repurposing_engine.create_content_series(
|
||||
content=source_content,
|
||||
platforms=platforms,
|
||||
series_type=series_type
|
||||
)
|
||||
|
||||
total_pieces = sum(len(pieces) for pieces in series_content.values())
|
||||
self.logger.info(f"Successfully created series with {total_pieces} pieces across {len(series_content)} platforms")
|
||||
|
||||
return series_content
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error creating content series: {str(e)}")
|
||||
return {}
|
||||
|
||||
@handle_calendar_error
|
||||
def analyze_content_for_repurposing(
|
||||
self,
|
||||
content_item: ContentItem,
|
||||
available_platforms: List[Platform]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze content and get AI-powered repurposing suggestions.
|
||||
|
||||
Args:
|
||||
content_item: Content to analyze
|
||||
available_platforms: Available platforms for repurposing
|
||||
|
||||
Returns:
|
||||
Dictionary containing repurposing suggestions and analysis
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Analyzing content '{content_item.title}' for repurposing opportunities")
|
||||
|
||||
# Get repurposing suggestions from the engine
|
||||
suggestions = self.repurposing_engine.get_repurposing_suggestions(
|
||||
content=content_item,
|
||||
available_platforms=available_platforms
|
||||
)
|
||||
|
||||
# Add content analysis
|
||||
content_text = content_item.description or content_item.notes or ""
|
||||
content_atoms = self.repurposing_engine.analyze_content_atoms(
|
||||
content=content_text,
|
||||
title=content_item.title
|
||||
)
|
||||
|
||||
analysis = {
|
||||
'content_analysis': {
|
||||
'word_count': len(content_text.split()) if content_text else 0,
|
||||
'content_richness': self._assess_content_richness(content_atoms),
|
||||
'repurposing_potential': self._assess_repurposing_potential(content_atoms),
|
||||
'content_atoms': content_atoms
|
||||
},
|
||||
'platform_suggestions': suggestions['recommended_platforms'],
|
||||
'strategy_suggestions': suggestions['repurposing_strategies'],
|
||||
'estimated_output': {
|
||||
'total_pieces': suggestions['estimated_pieces'],
|
||||
'time_savings': f"{suggestions['estimated_pieces'] * 2} hours",
|
||||
'content_multiplication': f"{suggestions['estimated_pieces']}x"
|
||||
}
|
||||
}
|
||||
|
||||
return analysis
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error analyzing content for repurposing: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _assess_content_richness(self, content_atoms: Dict[str, List[str]]) -> str:
|
||||
"""Assess the richness of content based on extracted atoms."""
|
||||
total_atoms = sum(len(atoms) for atoms in content_atoms.values())
|
||||
|
||||
if total_atoms >= 15:
|
||||
return "High"
|
||||
elif total_atoms >= 8:
|
||||
return "Medium"
|
||||
else:
|
||||
return "Low"
|
||||
|
||||
def _assess_repurposing_potential(self, content_atoms: Dict[str, List[str]]) -> str:
|
||||
"""Assess the repurposing potential based on content atoms."""
|
||||
# Check for diverse content types
|
||||
atom_types_with_content = sum(1 for atoms in content_atoms.values() if atoms)
|
||||
|
||||
if atom_types_with_content >= 4:
|
||||
return "Excellent"
|
||||
elif atom_types_with_content >= 3:
|
||||
return "Good"
|
||||
elif atom_types_with_content >= 2:
|
||||
return "Fair"
|
||||
else:
|
||||
return "Limited"
|
||||
|
||||
@handle_calendar_error
|
||||
def generate_content_with_repurposing_plan(
|
||||
self,
|
||||
content_item: ContentItem,
|
||||
context: Dict[str, Any],
|
||||
target_platforms: List[Platform] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate content along with a comprehensive repurposing plan.
|
||||
|
||||
Args:
|
||||
content_item: Content item to generate
|
||||
context: Content context from gap analysis
|
||||
target_platforms: Platforms to include in repurposing plan
|
||||
|
||||
Returns:
|
||||
Dictionary containing generated content and repurposing plan
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Generating content with repurposing plan for '{content_item.title}'")
|
||||
|
||||
# Generate the main content structure
|
||||
headings = self.generate_headings(content_item, context)
|
||||
subheadings = self.generate_subheadings(content_item, headings, context)
|
||||
key_points = self.generate_key_points(content_item, context)
|
||||
|
||||
outline = {
|
||||
'headings': headings,
|
||||
'subheadings': subheadings,
|
||||
'key_points': key_points
|
||||
}
|
||||
|
||||
content_flow = self.generate_content_flow(content_item, outline)
|
||||
|
||||
# Create repurposing plan if platforms are specified
|
||||
repurposing_plan = {}
|
||||
if target_platforms:
|
||||
# Analyze repurposing potential
|
||||
analysis = self.analyze_content_for_repurposing(content_item, target_platforms)
|
||||
|
||||
# Generate repurposing suggestions
|
||||
repurposing_plan = {
|
||||
'analysis': analysis,
|
||||
'recommended_strategy': self._recommend_repurposing_strategy(analysis),
|
||||
'platform_roadmap': self._create_platform_roadmap(content_item, target_platforms),
|
||||
'content_calendar_integration': self._suggest_calendar_integration(content_item, target_platforms)
|
||||
}
|
||||
|
||||
return {
|
||||
'content': {
|
||||
'outline': outline,
|
||||
'content_flow': content_flow,
|
||||
'metadata': {
|
||||
'generated_at': str(datetime.now()),
|
||||
'content_type': content_item.content_type.name,
|
||||
'platforms': [p.name for p in content_item.platforms] if content_item.platforms else []
|
||||
}
|
||||
},
|
||||
'repurposing_plan': repurposing_plan
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating content with repurposing plan: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _recommend_repurposing_strategy(self, analysis: Dict[str, Any]) -> str:
|
||||
"""Recommend the best repurposing strategy based on content analysis."""
|
||||
content_richness = analysis.get('content_analysis', {}).get('content_richness', 'Low')
|
||||
repurposing_potential = analysis.get('content_analysis', {}).get('repurposing_potential', 'Limited')
|
||||
|
||||
if content_richness == 'High' and repurposing_potential in ['Excellent', 'Good']:
|
||||
return 'progressive_disclosure'
|
||||
elif content_richness in ['Medium', 'High']:
|
||||
return 'adaptive'
|
||||
else:
|
||||
return 'atomic'
|
||||
|
||||
def _create_platform_roadmap(
|
||||
self,
|
||||
content_item: ContentItem,
|
||||
target_platforms: List[Platform]
|
||||
) -> Dict[str, Any]:
|
||||
"""Create a roadmap for content distribution across platforms."""
|
||||
roadmap = {
|
||||
'timeline': {},
|
||||
'platform_sequence': [],
|
||||
'cross_promotion_opportunities': []
|
||||
}
|
||||
|
||||
# Create a timeline for content release
|
||||
base_date = content_item.publish_date or datetime.now()
|
||||
|
||||
for i, platform in enumerate(target_platforms):
|
||||
release_date = base_date + timedelta(days=i)
|
||||
roadmap['timeline'][platform.name] = {
|
||||
'release_date': release_date.strftime('%Y-%m-%d'),
|
||||
'content_type': self._get_optimal_content_type_for_platform(platform),
|
||||
'engagement_strategy': self._get_engagement_strategy_for_platform(platform)
|
||||
}
|
||||
roadmap['platform_sequence'].append(platform.name)
|
||||
|
||||
return roadmap
|
||||
|
||||
def _suggest_calendar_integration(
|
||||
self,
|
||||
content_item: ContentItem,
|
||||
target_platforms: List[Platform]
|
||||
) -> Dict[str, Any]:
|
||||
"""Suggest how to integrate repurposed content into the content calendar."""
|
||||
return {
|
||||
'scheduling_recommendations': {
|
||||
'primary_content': 'Schedule as main content piece',
|
||||
'repurposed_content': 'Schedule 1-2 days after primary content',
|
||||
'series_content': 'Schedule weekly releases for maximum impact'
|
||||
},
|
||||
'calendar_tags': [
|
||||
'repurposed_content',
|
||||
f'source_{content_item.id}',
|
||||
'multi_platform_series'
|
||||
],
|
||||
'performance_tracking': {
|
||||
'metrics_to_track': ['engagement_rate', 'cross_platform_traffic', 'conversion_rate'],
|
||||
'comparison_baseline': 'Compare against single-platform content performance'
|
||||
}
|
||||
}
|
||||
|
||||
def _get_optimal_content_type_for_platform(self, platform: Platform) -> str:
|
||||
"""Get the optimal content type for a specific platform."""
|
||||
platform_content_types = {
|
||||
Platform.TWITTER: 'Thread or single tweet',
|
||||
Platform.LINKEDIN: 'Professional post or article',
|
||||
Platform.INSTAGRAM: 'Visual post with caption',
|
||||
Platform.FACEBOOK: 'Engaging post with discussion starter',
|
||||
Platform.WEBSITE: 'Full blog post or article'
|
||||
}
|
||||
return platform_content_types.get(platform, 'Standard post')
|
||||
|
||||
def _get_engagement_strategy_for_platform(self, platform: Platform) -> str:
|
||||
"""Get the engagement strategy for a specific platform."""
|
||||
engagement_strategies = {
|
||||
Platform.TWITTER: 'Use hashtags, engage in conversations, create polls',
|
||||
Platform.LINKEDIN: 'Professional networking, thought leadership, industry discussions',
|
||||
Platform.INSTAGRAM: 'Visual storytelling, user-generated content, stories',
|
||||
Platform.FACEBOOK: 'Community building, discussions, live interactions',
|
||||
Platform.WEBSITE: 'SEO optimization, internal linking, lead magnets'
|
||||
}
|
||||
return engagement_strategies.get(platform, 'Standard engagement tactics')
|
||||
@@ -1,599 +0,0 @@
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import json
|
||||
|
||||
# Add parent directory to path to import existing tools
|
||||
parent_dir = str(Path(__file__).parent.parent.parent.parent)
|
||||
if parent_dir not in sys.path:
|
||||
sys.path.append(parent_dir)
|
||||
|
||||
from lib.database.models import ContentItem, ContentType, Platform, SEOData
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
from ..utils.error_handling import handle_calendar_error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ContentAtomizer:
|
||||
"""
|
||||
Break down content into atomic pieces that can be recombined
|
||||
for different platforms and purposes.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger('content_calendar.atomizer')
|
||||
|
||||
def atomize_content(self, content: str, title: str = "") -> Dict[str, List[str]]:
|
||||
"""
|
||||
Extract key quotes, statistics, tips, and examples from content.
|
||||
|
||||
Args:
|
||||
content: The content text to atomize
|
||||
title: The content title for context
|
||||
|
||||
Returns:
|
||||
Dictionary containing different types of content atoms
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Atomizing content: {title[:50]}...")
|
||||
|
||||
# Use AI to extract content atoms
|
||||
prompt = f"""
|
||||
Analyze the following content and extract key elements that can be repurposed:
|
||||
|
||||
Title: {title}
|
||||
Content: {content[:3000]}...
|
||||
|
||||
Extract and categorize the following elements:
|
||||
1. Key Statistics (numbers, percentages, data points)
|
||||
2. Quotable Insights (memorable quotes or key insights)
|
||||
3. Actionable Tips (practical advice or steps)
|
||||
4. Examples/Case Studies (real examples or stories)
|
||||
5. Key Questions (thought-provoking questions)
|
||||
6. Main Arguments (core points or arguments)
|
||||
|
||||
Format your response as JSON:
|
||||
{{
|
||||
"statistics": ["stat1", "stat2", ...],
|
||||
"quotes": ["quote1", "quote2", ...],
|
||||
"tips": ["tip1", "tip2", ...],
|
||||
"examples": ["example1", "example2", ...],
|
||||
"questions": ["question1", "question2", ...],
|
||||
"arguments": ["argument1", "argument2", ...]
|
||||
}}
|
||||
"""
|
||||
|
||||
response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt="You are an expert content analyst. Extract key elements that can be repurposed across different platforms.",
|
||||
json_struct={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"statistics": {"type": "array", "items": {"type": "string"}},
|
||||
"quotes": {"type": "array", "items": {"type": "string"}},
|
||||
"tips": {"type": "array", "items": {"type": "string"}},
|
||||
"examples": {"type": "array", "items": {"type": "string"}},
|
||||
"questions": {"type": "array", "items": {"type": "string"}},
|
||||
"arguments": {"type": "array", "items": {"type": "string"}}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
if response:
|
||||
return response
|
||||
else:
|
||||
# Fallback to basic extraction
|
||||
return self._basic_content_extraction(content)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error atomizing content: {str(e)}")
|
||||
return self._basic_content_extraction(content)
|
||||
|
||||
def _basic_content_extraction(self, content: str) -> Dict[str, List[str]]:
|
||||
"""Fallback method for basic content extraction."""
|
||||
atoms = {
|
||||
"statistics": [],
|
||||
"quotes": [],
|
||||
"tips": [],
|
||||
"examples": [],
|
||||
"questions": [],
|
||||
"arguments": []
|
||||
}
|
||||
|
||||
# Extract statistics (numbers with %)
|
||||
stats = re.findall(r'\d+%|\d+\.\d+%|\d+,\d+|\d+ percent', content)
|
||||
atoms["statistics"] = stats[:5] # Limit to 5
|
||||
|
||||
# Extract questions
|
||||
questions = re.findall(r'[A-Z][^.!?]*\?', content)
|
||||
atoms["questions"] = questions[:3] # Limit to 3
|
||||
|
||||
# Extract sentences that might be tips (containing words like "should", "must", "need to")
|
||||
tip_patterns = r'[^.!?]*(?:should|must|need to|important to|remember to)[^.!?]*[.!?]'
|
||||
tips = re.findall(tip_patterns, content, re.IGNORECASE)
|
||||
atoms["tips"] = tips[:5] # Limit to 5
|
||||
|
||||
return atoms
|
||||
|
||||
class ContentRepurposer:
|
||||
"""
|
||||
Main content repurposing engine that transforms content for different platforms.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger('content_calendar.repurposer')
|
||||
self.atomizer = ContentAtomizer()
|
||||
|
||||
# Platform-specific content specifications
|
||||
self.platform_specs = {
|
||||
Platform.TWITTER: {
|
||||
'max_length': 280,
|
||||
'optimal_length': 240,
|
||||
'format': 'concise',
|
||||
'tone': 'engaging',
|
||||
'hashtags': True,
|
||||
'mentions': True
|
||||
},
|
||||
Platform.LINKEDIN: {
|
||||
'max_length': 3000,
|
||||
'optimal_length': 1500,
|
||||
'format': 'professional',
|
||||
'tone': 'authoritative',
|
||||
'hashtags': True,
|
||||
'mentions': False
|
||||
},
|
||||
Platform.INSTAGRAM: {
|
||||
'max_length': 2200,
|
||||
'optimal_length': 1000,
|
||||
'format': 'visual-focused',
|
||||
'tone': 'casual',
|
||||
'hashtags': True,
|
||||
'mentions': True
|
||||
},
|
||||
Platform.FACEBOOK: {
|
||||
'max_length': 63206,
|
||||
'optimal_length': 500,
|
||||
'format': 'engaging',
|
||||
'tone': 'conversational',
|
||||
'hashtags': False,
|
||||
'mentions': True
|
||||
},
|
||||
Platform.WEBSITE: {
|
||||
'max_length': None,
|
||||
'optimal_length': 2000,
|
||||
'format': 'comprehensive',
|
||||
'tone': 'informative',
|
||||
'hashtags': False,
|
||||
'mentions': False
|
||||
}
|
||||
}
|
||||
|
||||
@handle_calendar_error
|
||||
def repurpose_content(
|
||||
self,
|
||||
source_content: ContentItem,
|
||||
target_platforms: List[Platform],
|
||||
repurpose_strategy: str = 'adaptive'
|
||||
) -> List[ContentItem]:
|
||||
"""
|
||||
Repurpose content for multiple platforms.
|
||||
|
||||
Args:
|
||||
source_content: Original content to repurpose
|
||||
target_platforms: List of platforms to create content for
|
||||
repurpose_strategy: Strategy for repurposing ('adaptive', 'atomic', 'series')
|
||||
|
||||
Returns:
|
||||
List of repurposed content items
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Repurposing content '{source_content.title}' for {len(target_platforms)} platforms")
|
||||
|
||||
repurposed_content = []
|
||||
|
||||
# Get content text (assuming it's in description or notes)
|
||||
content_text = source_content.description or source_content.notes or ""
|
||||
|
||||
if not content_text:
|
||||
self.logger.warning("No content text found for repurposing")
|
||||
return []
|
||||
|
||||
# Atomize the content
|
||||
atoms = self.atomizer.atomize_content(content_text, source_content.title)
|
||||
|
||||
# Generate repurposed content for each platform
|
||||
for platform in target_platforms:
|
||||
if platform == source_content.platforms[0] if source_content.platforms else None:
|
||||
continue # Skip the original platform
|
||||
|
||||
repurposed_item = self._create_platform_specific_content(
|
||||
source_content=source_content,
|
||||
target_platform=platform,
|
||||
atoms=atoms,
|
||||
strategy=repurpose_strategy
|
||||
)
|
||||
|
||||
if repurposed_item:
|
||||
repurposed_content.append(repurposed_item)
|
||||
|
||||
self.logger.info(f"Successfully repurposed content into {len(repurposed_content)} variations")
|
||||
return repurposed_content
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error repurposing content: {str(e)}")
|
||||
return []
|
||||
|
||||
def _create_platform_specific_content(
|
||||
self,
|
||||
source_content: ContentItem,
|
||||
target_platform: Platform,
|
||||
atoms: Dict[str, List[str]],
|
||||
strategy: str
|
||||
) -> Optional[ContentItem]:
|
||||
"""Create platform-specific content variation."""
|
||||
try:
|
||||
platform_spec = self.platform_specs.get(target_platform, {})
|
||||
|
||||
# Generate platform-specific content using AI
|
||||
repurposed_text = self._generate_platform_content(
|
||||
source_content=source_content,
|
||||
target_platform=target_platform,
|
||||
atoms=atoms,
|
||||
platform_spec=platform_spec,
|
||||
strategy=strategy
|
||||
)
|
||||
|
||||
if not repurposed_text:
|
||||
return None
|
||||
|
||||
# Create new content item
|
||||
repurposed_item = ContentItem(
|
||||
title=self._adapt_title_for_platform(source_content.title, target_platform),
|
||||
description=repurposed_text,
|
||||
content_type=self._determine_content_type_for_platform(target_platform),
|
||||
platforms=[target_platform],
|
||||
publish_date=source_content.publish_date + timedelta(days=1), # Schedule for next day
|
||||
status="draft",
|
||||
author=source_content.author,
|
||||
tags=source_content.tags + [f"repurposed_from_{source_content.id}"],
|
||||
notes=f"Repurposed from: {source_content.title}",
|
||||
seo_data=self._adapt_seo_data_for_platform(source_content.seo_data, target_platform)
|
||||
)
|
||||
|
||||
return repurposed_item
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error creating platform-specific content: {str(e)}")
|
||||
return None
|
||||
|
||||
def _generate_platform_content(
|
||||
self,
|
||||
source_content: ContentItem,
|
||||
target_platform: Platform,
|
||||
atoms: Dict[str, List[str]],
|
||||
platform_spec: Dict[str, Any],
|
||||
strategy: str
|
||||
) -> str:
|
||||
"""Generate content optimized for specific platform."""
|
||||
try:
|
||||
# Prepare content elements
|
||||
title = source_content.title
|
||||
original_content = source_content.description or ""
|
||||
|
||||
# Create platform-specific prompt
|
||||
prompt = self._create_repurposing_prompt(
|
||||
title=title,
|
||||
original_content=original_content,
|
||||
target_platform=target_platform,
|
||||
atoms=atoms,
|
||||
platform_spec=platform_spec,
|
||||
strategy=strategy
|
||||
)
|
||||
|
||||
# Generate content using AI
|
||||
repurposed_content = llm_text_gen(prompt)
|
||||
|
||||
return repurposed_content or ""
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating platform content: {str(e)}")
|
||||
return ""
|
||||
|
||||
def _create_repurposing_prompt(
|
||||
self,
|
||||
title: str,
|
||||
original_content: str,
|
||||
target_platform: Platform,
|
||||
atoms: Dict[str, List[str]],
|
||||
platform_spec: Dict[str, Any],
|
||||
strategy: str
|
||||
) -> str:
|
||||
"""Create AI prompt for content repurposing."""
|
||||
|
||||
platform_guidelines = {
|
||||
Platform.TWITTER: "Create engaging tweets that drive conversation. Use threads for complex topics. Include relevant hashtags.",
|
||||
Platform.LINKEDIN: "Write professional content that provides value to business professionals. Focus on insights and actionable advice.",
|
||||
Platform.INSTAGRAM: "Create visually-oriented content with engaging captions. Use storytelling and include relevant hashtags.",
|
||||
Platform.FACEBOOK: "Write conversational content that encourages engagement. Ask questions and create community discussion.",
|
||||
Platform.WEBSITE: "Create comprehensive, SEO-optimized content with clear structure and valuable information."
|
||||
}
|
||||
|
||||
atoms_text = ""
|
||||
for atom_type, atom_list in atoms.items():
|
||||
if atom_list:
|
||||
atoms_text += f"\n{atom_type.title()}: {', '.join(atom_list[:3])}"
|
||||
|
||||
prompt = f"""
|
||||
Repurpose the following content for {target_platform.name}:
|
||||
|
||||
Original Title: {title}
|
||||
Original Content: {original_content[:1500]}...
|
||||
|
||||
Key Content Elements:{atoms_text}
|
||||
|
||||
Platform Guidelines: {platform_guidelines.get(target_platform, '')}
|
||||
|
||||
Platform Specifications:
|
||||
- Optimal Length: {platform_spec.get('optimal_length', 'flexible')} characters
|
||||
- Format: {platform_spec.get('format', 'standard')}
|
||||
- Tone: {platform_spec.get('tone', 'professional')}
|
||||
- Include Hashtags: {platform_spec.get('hashtags', False)}
|
||||
|
||||
Requirements:
|
||||
1. Adapt the content to fit {target_platform.name}'s format and audience
|
||||
2. Maintain the core message and value
|
||||
3. Optimize for {target_platform.name} engagement
|
||||
4. Include platform-appropriate calls to action
|
||||
5. Use the extracted content elements effectively
|
||||
|
||||
Create compelling, platform-optimized content that will perform well on {target_platform.name}.
|
||||
"""
|
||||
|
||||
return prompt
|
||||
|
||||
def _adapt_title_for_platform(self, original_title: str, platform: Platform) -> str:
|
||||
"""Adapt title for specific platform."""
|
||||
platform_prefixes = {
|
||||
Platform.TWITTER: "🧵 ",
|
||||
Platform.LINKEDIN: "💼 ",
|
||||
Platform.INSTAGRAM: "📸 ",
|
||||
Platform.FACEBOOK: "💬 ",
|
||||
Platform.WEBSITE: ""
|
||||
}
|
||||
|
||||
prefix = platform_prefixes.get(platform, "")
|
||||
return f"{prefix}{original_title}"
|
||||
|
||||
def _determine_content_type_for_platform(self, platform: Platform) -> ContentType:
|
||||
"""Determine appropriate content type for platform."""
|
||||
platform_content_types = {
|
||||
Platform.TWITTER: ContentType.SOCIAL_MEDIA,
|
||||
Platform.LINKEDIN: ContentType.SOCIAL_MEDIA,
|
||||
Platform.INSTAGRAM: ContentType.SOCIAL_MEDIA,
|
||||
Platform.FACEBOOK: ContentType.SOCIAL_MEDIA,
|
||||
Platform.WEBSITE: ContentType.BLOG_POST
|
||||
}
|
||||
|
||||
return platform_content_types.get(platform, ContentType.SOCIAL_MEDIA)
|
||||
|
||||
def _adapt_seo_data_for_platform(self, original_seo: SEOData, platform: Platform) -> SEOData:
|
||||
"""Adapt SEO data for specific platform."""
|
||||
if platform == Platform.WEBSITE:
|
||||
return original_seo
|
||||
|
||||
# For social media platforms, create simplified SEO data
|
||||
return SEOData(
|
||||
title=original_seo.title,
|
||||
meta_description=original_seo.meta_description[:160] if original_seo.meta_description else "",
|
||||
keywords=original_seo.keywords[:5] if original_seo.keywords else [],
|
||||
structured_data={}
|
||||
)
|
||||
|
||||
class ContentSeriesRepurposer:
|
||||
"""
|
||||
Create cross-platform content series with progressive disclosure strategy.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger('content_calendar.series_repurposer')
|
||||
self.repurposer = ContentRepurposer()
|
||||
|
||||
def create_cross_platform_series(
|
||||
self,
|
||||
source_content: ContentItem,
|
||||
platforms: List[Platform],
|
||||
series_strategy: str = 'progressive_disclosure'
|
||||
) -> Dict[str, List[ContentItem]]:
|
||||
"""
|
||||
Create a content series that progressively reveals information
|
||||
across different platforms, driving traffic between them.
|
||||
|
||||
Args:
|
||||
source_content: Original comprehensive content
|
||||
platforms: Target platforms for the series
|
||||
series_strategy: Strategy for content distribution
|
||||
|
||||
Returns:
|
||||
Dictionary mapping platforms to their content pieces
|
||||
"""
|
||||
try:
|
||||
self.logger.info(f"Creating cross-platform series for: {source_content.title}")
|
||||
|
||||
series_content = {}
|
||||
|
||||
if series_strategy == 'progressive_disclosure':
|
||||
series_content = self._create_progressive_disclosure_series(
|
||||
source_content, platforms
|
||||
)
|
||||
elif series_strategy == 'platform_native':
|
||||
series_content = self._create_platform_native_series(
|
||||
source_content, platforms
|
||||
)
|
||||
else:
|
||||
# Default to simple repurposing
|
||||
repurposed = self.repurposer.repurpose_content(
|
||||
source_content, platforms
|
||||
)
|
||||
for item in repurposed:
|
||||
platform = item.platforms[0]
|
||||
if platform not in series_content:
|
||||
series_content[platform] = []
|
||||
series_content[platform].append(item)
|
||||
|
||||
return series_content
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error creating cross-platform series: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _create_progressive_disclosure_series(
|
||||
self,
|
||||
source_content: ContentItem,
|
||||
platforms: List[Platform]
|
||||
) -> Dict[str, List[ContentItem]]:
|
||||
"""Create series with progressive information disclosure."""
|
||||
series_content = {}
|
||||
|
||||
# Define disclosure strategy
|
||||
disclosure_strategy = {
|
||||
Platform.TWITTER: "teaser", # Hook with key stat/question
|
||||
Platform.INSTAGRAM: "visual", # Visual summary with key points
|
||||
Platform.LINKEDIN: "insight", # Professional insight/analysis
|
||||
Platform.FACEBOOK: "discussion", # Community discussion starter
|
||||
Platform.WEBSITE: "complete" # Full detailed content
|
||||
}
|
||||
|
||||
for platform in platforms:
|
||||
strategy = disclosure_strategy.get(platform, "summary")
|
||||
content_piece = self._create_disclosure_content(
|
||||
source_content, platform, strategy
|
||||
)
|
||||
if content_piece:
|
||||
series_content[platform] = [content_piece]
|
||||
|
||||
return series_content
|
||||
|
||||
def _create_disclosure_content(
|
||||
self,
|
||||
source_content: ContentItem,
|
||||
platform: Platform,
|
||||
disclosure_type: str
|
||||
) -> Optional[ContentItem]:
|
||||
"""Create content piece for specific disclosure strategy."""
|
||||
try:
|
||||
# This would use the repurposer with specific instructions
|
||||
# for the disclosure type
|
||||
repurposed = self.repurposer._create_platform_specific_content(
|
||||
source_content=source_content,
|
||||
target_platform=platform,
|
||||
atoms=self.repurposer.atomizer.atomize_content(
|
||||
source_content.description or "",
|
||||
source_content.title
|
||||
),
|
||||
strategy=disclosure_type
|
||||
)
|
||||
|
||||
return repurposed
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error creating disclosure content: {str(e)}")
|
||||
return None
|
||||
|
||||
def _create_platform_native_series(
|
||||
self,
|
||||
source_content: ContentItem,
|
||||
platforms: List[Platform]
|
||||
) -> Dict[str, List[ContentItem]]:
|
||||
"""Create series optimized for each platform's native format."""
|
||||
# Implementation for platform-native series
|
||||
# This would create multiple pieces per platform
|
||||
# optimized for that platform's specific characteristics
|
||||
return {}
|
||||
|
||||
# Main repurposing interface
|
||||
class SmartContentRepurposingEngine:
|
||||
"""
|
||||
Main interface for the Smart Content Repurposing Engine.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger('content_calendar.repurposing_engine')
|
||||
self.repurposer = ContentRepurposer()
|
||||
self.series_repurposer = ContentSeriesRepurposer()
|
||||
self.atomizer = ContentAtomizer()
|
||||
|
||||
def repurpose_single_content(
|
||||
self,
|
||||
content: ContentItem,
|
||||
target_platforms: List[Platform],
|
||||
strategy: str = 'adaptive'
|
||||
) -> List[ContentItem]:
|
||||
"""Repurpose a single piece of content."""
|
||||
return self.repurposer.repurpose_content(content, target_platforms, strategy)
|
||||
|
||||
def create_content_series(
|
||||
self,
|
||||
content: ContentItem,
|
||||
platforms: List[Platform],
|
||||
series_type: str = 'progressive_disclosure'
|
||||
) -> Dict[str, List[ContentItem]]:
|
||||
"""Create a cross-platform content series."""
|
||||
return self.series_repurposer.create_cross_platform_series(
|
||||
content, platforms, series_type
|
||||
)
|
||||
|
||||
def analyze_content_atoms(self, content: str, title: str = "") -> Dict[str, List[str]]:
|
||||
"""Analyze content and extract reusable atoms."""
|
||||
return self.atomizer.atomize_content(content, title)
|
||||
|
||||
def get_repurposing_suggestions(
|
||||
self,
|
||||
content: ContentItem,
|
||||
available_platforms: List[Platform]
|
||||
) -> Dict[str, Any]:
|
||||
"""Get AI-powered suggestions for content repurposing."""
|
||||
try:
|
||||
# Analyze content to suggest best repurposing strategies
|
||||
content_text = content.description or content.notes or ""
|
||||
atoms = self.atomizer.atomize_content(content_text, content.title)
|
||||
|
||||
suggestions = {
|
||||
'recommended_platforms': [],
|
||||
'repurposing_strategies': [],
|
||||
'content_atoms': atoms,
|
||||
'estimated_pieces': 0
|
||||
}
|
||||
|
||||
# Analyze content type and suggest platforms
|
||||
if content.content_type == ContentType.BLOG_POST:
|
||||
suggestions['recommended_platforms'] = [
|
||||
Platform.TWITTER, Platform.LINKEDIN, Platform.INSTAGRAM
|
||||
]
|
||||
suggestions['estimated_pieces'] = len(available_platforms) * 2
|
||||
elif content.content_type == ContentType.VIDEO:
|
||||
suggestions['recommended_platforms'] = [
|
||||
Platform.TWITTER, Platform.INSTAGRAM, Platform.FACEBOOK
|
||||
]
|
||||
suggestions['estimated_pieces'] = len(available_platforms) * 3
|
||||
|
||||
# Suggest strategies based on content richness
|
||||
if len(atoms.get('statistics', [])) > 3:
|
||||
suggestions['repurposing_strategies'].append('data_driven')
|
||||
if len(atoms.get('tips', [])) > 5:
|
||||
suggestions['repurposing_strategies'].append('tip_series')
|
||||
if len(atoms.get('examples', [])) > 2:
|
||||
suggestions['repurposing_strategies'].append('case_study_series')
|
||||
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error getting repurposing suggestions: {str(e)}")
|
||||
return {
|
||||
'recommended_platforms': [],
|
||||
'repurposing_strategies': [],
|
||||
'content_atoms': {},
|
||||
'estimated_pieces': 0
|
||||
}
|
||||
@@ -1,127 +0,0 @@
|
||||
"""
|
||||
Gap analyzer integration for content calendar.
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from lib.ai_seo_tools.content_gap_analysis.main import ContentGapAnalysis
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
# Configure logger for content calendar debugging
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="DEBUG",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan> | <yellow>{function}</yellow> | {message}",
|
||||
filter=lambda record: "content_calendar" in record["name"].lower()
|
||||
)
|
||||
|
||||
class GapAnalyzerIntegration:
|
||||
"""Integrates content gap analysis with content calendar."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the gap analyzer integration."""
|
||||
self.gap_analyzer = ContentGapAnalysis()
|
||||
logger.debug("GapAnalyzerIntegration initialized for content calendar")
|
||||
|
||||
def analyze_gaps(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze content gaps.
|
||||
|
||||
Args:
|
||||
data: Dictionary containing content data
|
||||
|
||||
Returns:
|
||||
Dictionary containing gap analysis results
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Starting gap analysis with data: {json.dumps(data, indent=2)}")
|
||||
# Run gap analysis
|
||||
results = self.gap_analyzer.analyze(data)
|
||||
logger.debug(f"Gap analysis completed with results: {json.dumps(results, indent=2)}")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error analyzing content gaps: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'gaps': [],
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
def get_topic_suggestions(
|
||||
self,
|
||||
gap_analysis: Dict[str, Any],
|
||||
platform: str,
|
||||
count: int = 5
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get topic suggestions for a specific platform based on gap analysis.
|
||||
|
||||
Args:
|
||||
gap_analysis: Results from gap analysis
|
||||
platform: Target platform for content
|
||||
count: Number of suggestions to generate
|
||||
|
||||
Returns:
|
||||
List of topic suggestions
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Generating topic suggestions for platform: {platform}, count: {count}")
|
||||
suggestions = []
|
||||
|
||||
for gap in gap_analysis.get('processed_gaps', []):
|
||||
# Generate platform-specific topics
|
||||
platform_topics = self.ai_processor.generate_platform_topics(
|
||||
gap=gap,
|
||||
platform=platform,
|
||||
count=count
|
||||
)
|
||||
logger.debug(f"Generated topics for gap: {json.dumps(platform_topics, indent=2)}")
|
||||
suggestions.extend(platform_topics)
|
||||
|
||||
logger.debug(f"Total suggestions generated: {len(suggestions)}")
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating topic suggestions: {str(e)}")
|
||||
return []
|
||||
|
||||
def analyze_topic_relevance(
|
||||
self,
|
||||
topic: Dict[str, Any],
|
||||
gap_analysis: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze how well a topic addresses content gaps.
|
||||
|
||||
Args:
|
||||
topic: Topic to analyze
|
||||
gap_analysis: Results from gap analysis
|
||||
|
||||
Returns:
|
||||
Dictionary containing relevance analysis
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Analyzing topic relevance: {json.dumps(topic, indent=2)}")
|
||||
relevance = self.ai_processor.analyze_topic_relevance(
|
||||
topic=topic,
|
||||
gaps=gap_analysis.get('gaps', [])
|
||||
)
|
||||
|
||||
logger.debug(f"Topic relevance analysis completed: {json.dumps(relevance, indent=2)}")
|
||||
return relevance
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing topic relevance: {str(e)}")
|
||||
return {
|
||||
'error': str(e),
|
||||
'score': 0
|
||||
}
|
||||
@@ -1,196 +0,0 @@
|
||||
import logging
|
||||
from typing import Dict, List, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from ..core.calendar_manager import CalendarManager
|
||||
from ..core.content_brief import ContentBriefGenerator
|
||||
from .platform_adapters import UnifiedPlatformAdapter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class IntegrationManager:
|
||||
"""Manages integration between content calendar and platform adapters."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the integration manager."""
|
||||
self.calendar_manager = CalendarManager()
|
||||
self.content_brief_generator = ContentBriefGenerator()
|
||||
self.platform_adapter = UnifiedPlatformAdapter()
|
||||
|
||||
def create_cross_platform_calendar(
|
||||
self,
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
platforms: List[str],
|
||||
content_types: List[str],
|
||||
target_audience: Optional[Dict[str, Any]] = None,
|
||||
industry: Optional[str] = None,
|
||||
keywords: Optional[List[str]] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Create a cross-platform content calendar."""
|
||||
try:
|
||||
# Generate base calendar
|
||||
calendar = self.calendar_manager.create_calendar(
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
content_types=content_types,
|
||||
target_audience=target_audience,
|
||||
industry=industry,
|
||||
keywords=keywords
|
||||
)
|
||||
|
||||
# Adapt content for each platform
|
||||
platform_calendars = {}
|
||||
for platform in platforms:
|
||||
platform_calendars[platform] = self._adapt_calendar_for_platform(
|
||||
calendar=calendar,
|
||||
platform=platform
|
||||
)
|
||||
|
||||
return {
|
||||
'base_calendar': calendar,
|
||||
'platform_calendars': platform_calendars,
|
||||
'metadata': {
|
||||
'start_date': start_date,
|
||||
'end_date': end_date,
|
||||
'platforms': platforms,
|
||||
'content_types': content_types,
|
||||
'industry': industry,
|
||||
'keywords': keywords
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating cross-platform calendar: {str(e)}")
|
||||
raise
|
||||
|
||||
def _adapt_calendar_for_platform(
|
||||
self,
|
||||
calendar: Dict[str, Any],
|
||||
platform: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Adapt calendar content for a specific platform."""
|
||||
try:
|
||||
adapted_calendar = {
|
||||
'platform': platform,
|
||||
'content_items': [],
|
||||
'metadata': calendar.get('metadata', {})
|
||||
}
|
||||
|
||||
# Adapt each content item
|
||||
for item in calendar.get('content_items', []):
|
||||
adapted_item = self._adapt_content_item(item, platform)
|
||||
if adapted_item:
|
||||
adapted_calendar['content_items'].append(adapted_item)
|
||||
|
||||
return adapted_calendar
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error adapting calendar for platform {platform}: {str(e)}")
|
||||
return {
|
||||
'platform': platform,
|
||||
'content_items': [],
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def _adapt_content_item(
|
||||
self,
|
||||
item: Dict[str, Any],
|
||||
platform: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Adapt a content item for a specific platform."""
|
||||
try:
|
||||
# Generate content brief if not exists
|
||||
if 'brief' not in item:
|
||||
item['brief'] = self.content_brief_generator.generate_brief(item)
|
||||
|
||||
# Adapt content for platform
|
||||
adapted_content = self.platform_adapter.adapt_content(
|
||||
content=item,
|
||||
platform=platform
|
||||
)
|
||||
|
||||
if adapted_content:
|
||||
return {
|
||||
'original_item': item,
|
||||
'adapted_content': adapted_content,
|
||||
'platform_specifics': self.platform_adapter.get_platform_specs(platform)
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error adapting content item for platform {platform}: {str(e)}")
|
||||
return None
|
||||
|
||||
def get_platform_suggestions(
|
||||
self,
|
||||
content: Dict[str, Any],
|
||||
platforms: List[str]
|
||||
) -> Dict[str, Any]:
|
||||
"""Get platform-specific suggestions for content."""
|
||||
try:
|
||||
suggestions = {}
|
||||
|
||||
for platform in platforms:
|
||||
platform_suggestions = self.platform_adapter.get_platform_suggestions(
|
||||
content=content,
|
||||
platform=platform
|
||||
)
|
||||
if platform_suggestions:
|
||||
suggestions[platform] = platform_suggestions
|
||||
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting platform suggestions: {str(e)}")
|
||||
return {}
|
||||
|
||||
def validate_platform_content(
|
||||
self,
|
||||
content: Dict[str, Any],
|
||||
platform: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Validate content for a specific platform."""
|
||||
try:
|
||||
validation_result = self.platform_adapter.validate_content(
|
||||
content=content,
|
||||
platform=platform
|
||||
)
|
||||
|
||||
return {
|
||||
'platform': platform,
|
||||
'is_valid': validation_result,
|
||||
'specifications': self.platform_adapter.get_platform_specs(platform)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error validating platform content: {str(e)}")
|
||||
return {
|
||||
'platform': platform,
|
||||
'is_valid': False,
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def optimize_cross_platform_content(
|
||||
self,
|
||||
content: Dict[str, Any],
|
||||
platforms: List[str]
|
||||
) -> Dict[str, Any]:
|
||||
"""Optimize content for multiple platforms."""
|
||||
try:
|
||||
optimized_content = {}
|
||||
|
||||
for platform in platforms:
|
||||
platform_optimized = self.platform_adapter.optimize_content(
|
||||
content=content,
|
||||
platform=platform
|
||||
)
|
||||
if platform_optimized:
|
||||
optimized_content[platform] = platform_optimized
|
||||
|
||||
return optimized_content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing cross-platform content: {str(e)}")
|
||||
return {}
|
||||
@@ -1,307 +0,0 @@
|
||||
"""
|
||||
Unified platform adapter for content adaptation across different platforms.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional, TypedDict
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from lib.ai_seo_tools.content_gap_analysis.main import ContentGapAnalysis
|
||||
from lib.ai_seo_tools.content_title_generator import ai_title_generator
|
||||
from lib.ai_seo_tools.meta_desc_generator import metadesc_generator_main
|
||||
from lib.ai_seo_tools.seo_structured_data import ai_structured_data
|
||||
|
||||
class ContentItem(TypedDict):
|
||||
"""Type definition for content items."""
|
||||
id: str
|
||||
title: str
|
||||
content: str
|
||||
platforms: List[str]
|
||||
status: str
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
published_at: Optional[datetime]
|
||||
metadata: Dict[str, Any]
|
||||
analytics: Optional[Dict[str, Any]]
|
||||
|
||||
class UnifiedPlatformAdapter:
|
||||
"""Unified adapter for different social media platforms."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the platform adapter."""
|
||||
self.platform_handlers = {
|
||||
'instagram': self._handle_instagram,
|
||||
'linkedin': self._handle_linkedin,
|
||||
'twitter': self._handle_twitter,
|
||||
'facebook': self._handle_facebook
|
||||
}
|
||||
logger.info("UnifiedPlatformAdapter initialized")
|
||||
|
||||
def generate_content(self, platform: str, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate content for a specific platform.
|
||||
|
||||
Args:
|
||||
platform: Target platform
|
||||
data: Content data
|
||||
|
||||
Returns:
|
||||
Dictionary containing generated content
|
||||
"""
|
||||
try:
|
||||
handler = self.platform_handlers.get(platform.lower())
|
||||
if not handler:
|
||||
raise ValueError(f"Unsupported platform: {platform}")
|
||||
|
||||
return handler(data)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error generating content for {platform}: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'content': None
|
||||
}
|
||||
|
||||
def get_content_performance(self, content_item: ContentItem) -> Dict[str, Any]:
|
||||
"""Get performance metrics for content across platforms."""
|
||||
try:
|
||||
logger.info(f"Getting performance metrics for content: {getattr(content_item, 'title', 'Untitled')}")
|
||||
|
||||
# Get platform from content item
|
||||
platforms = getattr(content_item, 'platforms', None)
|
||||
if platforms and len(platforms) > 0:
|
||||
platform = platforms[0].name if hasattr(platforms[0], 'name') else str(platforms[0])
|
||||
else:
|
||||
platform = 'Unknown'
|
||||
|
||||
# Initialize performance metrics
|
||||
performance = {
|
||||
'engagement_metrics': {
|
||||
'likes': 0,
|
||||
'comments': 0,
|
||||
'shares': 0,
|
||||
'reach': 0
|
||||
},
|
||||
'seo_metrics': {
|
||||
'impressions': 0,
|
||||
'clicks': 0,
|
||||
'ctr': 0,
|
||||
'position': 0
|
||||
},
|
||||
'conversion_metrics': {
|
||||
'conversions': 0,
|
||||
'conversion_rate': 0,
|
||||
'revenue': 0
|
||||
},
|
||||
'platform_specific': {},
|
||||
'performance_trends': [],
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
# Add platform-specific metrics
|
||||
if platform.upper() == 'WEBSITE':
|
||||
performance['platform_specific'] = {
|
||||
'bounce_rate': 0,
|
||||
'time_on_page': 0,
|
||||
'page_views': 0
|
||||
}
|
||||
|
||||
return performance
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error getting content performance: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'metrics': {},
|
||||
'trends': {},
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
def _handle_instagram(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Handle Instagram content generation."""
|
||||
try:
|
||||
# Generate Instagram-specific content
|
||||
caption = metadesc_generator_main(data)
|
||||
hashtags = self._generate_hashtags(data)
|
||||
|
||||
return {
|
||||
'platform': 'instagram',
|
||||
'content': {
|
||||
'caption': caption,
|
||||
'hashtags': hashtags,
|
||||
'media_suggestions': self._get_media_suggestions(data)
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating Instagram content: {str(e)}")
|
||||
return {
|
||||
'platform': 'instagram',
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def _handle_linkedin(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Handle LinkedIn content generation."""
|
||||
try:
|
||||
# Generate LinkedIn-specific content
|
||||
post = metadesc_generator_main(data)
|
||||
|
||||
return {
|
||||
'platform': 'linkedin',
|
||||
'content': {
|
||||
'post': post,
|
||||
'engagement_optimization': self._get_engagement_suggestions(data),
|
||||
'media_suggestions': self._get_media_suggestions(data)
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating LinkedIn content: {str(e)}")
|
||||
return {
|
||||
'platform': 'linkedin',
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def _handle_twitter(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Handle Twitter content generation."""
|
||||
try:
|
||||
# Generate Twitter-specific content
|
||||
tweet = metadesc_generator_main(data)
|
||||
hashtags = self._generate_hashtags(data)
|
||||
|
||||
return {
|
||||
'platform': 'twitter',
|
||||
'content': {
|
||||
'tweet': tweet,
|
||||
'hashtags': hashtags,
|
||||
'thread_structure': self._get_thread_structure(data),
|
||||
'media_suggestions': self._get_media_suggestions(data)
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating Twitter content: {str(e)}")
|
||||
return {
|
||||
'platform': 'twitter',
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def _handle_facebook(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Handle Facebook content generation."""
|
||||
try:
|
||||
# Generate Facebook-specific content
|
||||
post = metadesc_generator_main(data)
|
||||
|
||||
return {
|
||||
'platform': 'facebook',
|
||||
'content': {
|
||||
'post': post,
|
||||
'engagement_optimization': self._get_engagement_suggestions(data),
|
||||
'media_suggestions': self._get_media_suggestions(data)
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating Facebook content: {str(e)}")
|
||||
return {
|
||||
'platform': 'facebook',
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def _generate_hashtags(self, data: Dict[str, Any]) -> List[str]:
|
||||
"""Generate relevant hashtags for content."""
|
||||
try:
|
||||
# Extract keywords from content
|
||||
keywords = data.get('keywords', [])
|
||||
|
||||
# Add platform-specific hashtags
|
||||
platform = data.get('platform', '').lower()
|
||||
platform_hashtags = {
|
||||
'instagram': ['#instagood', '#photooftheday'],
|
||||
'twitter': ['#trending', '#followme'],
|
||||
'linkedin': ['#business', '#professional'],
|
||||
'facebook': ['#social', '#community']
|
||||
}.get(platform, [])
|
||||
|
||||
return keywords + platform_hashtags
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating hashtags: {str(e)}")
|
||||
return []
|
||||
|
||||
def _get_media_suggestions(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Get media suggestions for content."""
|
||||
try:
|
||||
# Generate media suggestions based on content type
|
||||
content_type = data.get('type', 'post')
|
||||
|
||||
suggestions = []
|
||||
if content_type == 'blog':
|
||||
suggestions.append({
|
||||
'type': 'featured_image',
|
||||
'description': 'Main blog post image',
|
||||
'dimensions': '1200x630'
|
||||
})
|
||||
elif content_type == 'social':
|
||||
suggestions.append({
|
||||
'type': 'post_image',
|
||||
'description': 'Social media post image',
|
||||
'dimensions': '1080x1080'
|
||||
})
|
||||
|
||||
return suggestions
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting media suggestions: {str(e)}")
|
||||
return []
|
||||
|
||||
def _get_engagement_suggestions(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Get engagement optimization suggestions."""
|
||||
try:
|
||||
return {
|
||||
'best_posting_times': ['9:00 AM', '5:00 PM'],
|
||||
'engagement_tips': [
|
||||
'Ask questions to encourage comments',
|
||||
'Use relevant hashtags',
|
||||
'Include a clear call-to-action'
|
||||
],
|
||||
'content_length': {
|
||||
'optimal': '150-200 characters',
|
||||
'maximum': '300 characters'
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting engagement suggestions: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _get_thread_structure(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Get thread structure for Twitter threads."""
|
||||
try:
|
||||
content = data.get('content', '')
|
||||
sentences = content.split('.')
|
||||
|
||||
thread = []
|
||||
current_tweet = ''
|
||||
|
||||
for sentence in sentences:
|
||||
if len(current_tweet + sentence) <= 280:
|
||||
current_tweet += sentence + '.'
|
||||
else:
|
||||
if current_tweet:
|
||||
thread.append({
|
||||
'content': current_tweet.strip(),
|
||||
'type': 'tweet'
|
||||
})
|
||||
current_tweet = sentence + '.'
|
||||
|
||||
if current_tweet:
|
||||
thread.append({
|
||||
'content': current_tweet.strip(),
|
||||
'type': 'tweet'
|
||||
})
|
||||
|
||||
return thread
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating thread structure: {str(e)}")
|
||||
return []
|
||||
@@ -1,219 +0,0 @@
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from ...meta_desc_generator import generate_blog_metadesc
|
||||
from ...content_title_generator import generate_blog_titles
|
||||
from ...seo_structured_data import generate_json_data
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class SEOOptimizer:
|
||||
"""Integrates SEO tools with content calendar system."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the SEO optimizer."""
|
||||
self._setup_logging()
|
||||
|
||||
def _setup_logging(self):
|
||||
"""Configure logging for SEO optimizer."""
|
||||
logger.setLevel(logging.INFO)
|
||||
handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
def optimize_content(
|
||||
self,
|
||||
content: Dict[str, Any],
|
||||
content_type: str = 'article',
|
||||
language: str = 'English',
|
||||
search_intent: str = 'Informational Intent'
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Optimize content for SEO using existing tools.
|
||||
|
||||
Args:
|
||||
content: Content to optimize
|
||||
content_type: Type of content (article, product, etc.)
|
||||
language: Content language
|
||||
search_intent: Search intent type
|
||||
|
||||
Returns:
|
||||
Optimized content with SEO elements
|
||||
"""
|
||||
try:
|
||||
# Extract content details
|
||||
title = content.get('title', '')
|
||||
keywords = content.get('keywords', [])
|
||||
content_text = content.get('content', '')
|
||||
|
||||
# Generate SEO elements
|
||||
optimized_title = self._optimize_title(
|
||||
title=title,
|
||||
keywords=keywords,
|
||||
content_type=content_type,
|
||||
language=language,
|
||||
search_intent=search_intent
|
||||
)
|
||||
|
||||
meta_description = self._generate_meta_description(
|
||||
keywords=keywords,
|
||||
content_type=content_type,
|
||||
language=language,
|
||||
search_intent=search_intent
|
||||
)
|
||||
|
||||
structured_data = self._generate_structured_data(
|
||||
content=content,
|
||||
content_type=content_type
|
||||
)
|
||||
|
||||
return {
|
||||
'original_content': content,
|
||||
'seo_optimized': {
|
||||
'title': optimized_title,
|
||||
'meta_description': meta_description,
|
||||
'structured_data': structured_data,
|
||||
'keywords': keywords,
|
||||
'content_type': content_type,
|
||||
'language': language,
|
||||
'search_intent': search_intent
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing content: {str(e)}")
|
||||
return {
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
def _optimize_title(
|
||||
self,
|
||||
title: str,
|
||||
keywords: List[str],
|
||||
content_type: str,
|
||||
language: str,
|
||||
search_intent: str
|
||||
) -> List[str]:
|
||||
"""Generate SEO-optimized titles."""
|
||||
try:
|
||||
# Convert keywords list to comma-separated string
|
||||
keywords_str = ', '.join(keywords)
|
||||
|
||||
# Generate titles using existing tool
|
||||
titles = generate_blog_titles(
|
||||
input_blog_keywords=keywords_str,
|
||||
input_blog_content=title,
|
||||
input_title_type=content_type,
|
||||
input_title_intent=search_intent,
|
||||
input_language=language
|
||||
)
|
||||
|
||||
return titles.split('\n') if titles else []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing title: {str(e)}")
|
||||
return []
|
||||
|
||||
def _generate_meta_description(
|
||||
self,
|
||||
keywords: List[str],
|
||||
content_type: str,
|
||||
language: str,
|
||||
search_intent: str
|
||||
) -> List[str]:
|
||||
"""Generate SEO-optimized meta descriptions."""
|
||||
try:
|
||||
# Convert keywords list to comma-separated string
|
||||
keywords_str = ', '.join(keywords)
|
||||
|
||||
# Generate meta descriptions using existing tool
|
||||
descriptions = generate_blog_metadesc(
|
||||
keywords=keywords_str,
|
||||
tone='Informative',
|
||||
search_type=search_intent,
|
||||
language=language
|
||||
)
|
||||
|
||||
return descriptions.split('\n') if descriptions else []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating meta description: {str(e)}")
|
||||
return []
|
||||
|
||||
def _generate_structured_data(
|
||||
self,
|
||||
content: Dict[str, Any],
|
||||
content_type: str
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Generate structured data for content."""
|
||||
try:
|
||||
# Prepare content details for structured data
|
||||
details = {
|
||||
'Headline': content.get('title', ''),
|
||||
'Author': content.get('author', ''),
|
||||
'Date Published': content.get('publish_date', datetime.now().isoformat()),
|
||||
'Keywords': ', '.join(content.get('keywords', [])),
|
||||
'Description': content.get('description', ''),
|
||||
'Image URL': content.get('image_url', '')
|
||||
}
|
||||
|
||||
# Generate structured data using existing tool
|
||||
structured_data = generate_json_data(
|
||||
content_type=content_type,
|
||||
details=details,
|
||||
url=content.get('url', '')
|
||||
)
|
||||
|
||||
return structured_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating structured data: {str(e)}")
|
||||
return None
|
||||
|
||||
def optimize_calendar_content(
|
||||
self,
|
||||
calendar: Dict[str, Any],
|
||||
content_type: str = 'article',
|
||||
language: str = 'English',
|
||||
search_intent: str = 'Informational Intent'
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Optimize all content in calendar for SEO.
|
||||
|
||||
Args:
|
||||
calendar: Content calendar to optimize
|
||||
content_type: Type of content
|
||||
language: Content language
|
||||
search_intent: Search intent type
|
||||
|
||||
Returns:
|
||||
Calendar with SEO-optimized content
|
||||
"""
|
||||
try:
|
||||
optimized_calendar = {
|
||||
'metadata': calendar.get('metadata', {}),
|
||||
'content_items': []
|
||||
}
|
||||
|
||||
# Optimize each content item
|
||||
for item in calendar.get('content_items', []):
|
||||
optimized_item = self.optimize_content(
|
||||
content=item,
|
||||
content_type=content_type,
|
||||
language=language,
|
||||
search_intent=search_intent
|
||||
)
|
||||
if optimized_item:
|
||||
optimized_calendar['content_items'].append(optimized_item)
|
||||
|
||||
return optimized_calendar
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing calendar content: {str(e)}")
|
||||
return {
|
||||
'error': str(e)
|
||||
}
|
||||
@@ -1,143 +0,0 @@
|
||||
"""SEO tools integration for content calendar."""
|
||||
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from typing import Dict, Any, List, Optional
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
from lib.ai_seo_tools.content_title_generator import ai_title_generator
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/seo_tools_integration.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
class SEOToolsIntegration:
|
||||
"""Integration with SEO tools for content calendar."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the SEO tools integration."""
|
||||
self.website_analyzer = WebsiteAnalyzer()
|
||||
logger.info("SEOToolsIntegration initialized")
|
||||
|
||||
def analyze_content(self, url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze content for SEO optimization.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
Dictionary containing SEO analysis results
|
||||
"""
|
||||
try:
|
||||
# Analyze website
|
||||
analysis = self.website_analyzer.analyze_website(url)
|
||||
if not analysis.get('success', False):
|
||||
return {
|
||||
'error': analysis.get('error', 'Unknown error in analysis'),
|
||||
'seo_score': 0,
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
# Extract SEO information
|
||||
seo_info = analysis['data']['analysis']['seo_info']
|
||||
|
||||
return {
|
||||
'seo_score': seo_info.get('overall_score', 0),
|
||||
'meta_tags': seo_info.get('meta_tags', {}),
|
||||
'content': seo_info.get('content', {}),
|
||||
'recommendations': seo_info.get('recommendations', [])
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error analyzing content: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'seo_score': 0,
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
def generate_title(self, url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate SEO-optimized title.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
Dictionary containing title suggestions
|
||||
"""
|
||||
return ai_title_generator(url)
|
||||
|
||||
def optimize_content(self, content: str, keywords: List[str]) -> Dict[str, Any]:
|
||||
"""
|
||||
Optimize content for SEO.
|
||||
|
||||
Args:
|
||||
content: The content to optimize
|
||||
keywords: List of target keywords
|
||||
|
||||
Returns:
|
||||
Dictionary containing optimization suggestions
|
||||
"""
|
||||
try:
|
||||
# Prepare prompt for content optimization
|
||||
prompt = f"""Optimize the following content for SEO:
|
||||
|
||||
Content: {content}
|
||||
Target Keywords: {', '.join(keywords)}
|
||||
|
||||
Provide optimization suggestions for:
|
||||
1. Keyword usage and placement
|
||||
2. Content structure and readability
|
||||
3. Meta information
|
||||
4. Internal linking opportunities
|
||||
5. Content length and depth
|
||||
|
||||
Format the response as JSON with 'suggestions' and 'score' keys."""
|
||||
|
||||
# Get AI optimization suggestions
|
||||
suggestions = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt="You are an SEO expert specializing in content optimization.",
|
||||
response_format="json_object"
|
||||
)
|
||||
|
||||
if not suggestions:
|
||||
return {
|
||||
'error': 'Failed to generate optimization suggestions',
|
||||
'suggestions': [],
|
||||
'score': 0
|
||||
}
|
||||
|
||||
return {
|
||||
'suggestions': suggestions.get('suggestions', []),
|
||||
'score': suggestions.get('score', 0)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error optimizing content: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'suggestions': [],
|
||||
'score': 0
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
import streamlit as st
|
||||
|
||||
def render_add_content_modal(selected_date, on_add_content, on_generate_with_ai):
|
||||
if st.button("+ Add Content", key="open_add_content_dialog_bottom"):
|
||||
st.session_state['show_add_content_dialog'] = True
|
||||
if st.session_state.get('show_add_content_dialog', False):
|
||||
st.markdown("### Add Content")
|
||||
with st.form("quick_add_form_dialog_bottom"):
|
||||
title = st.text_input("Title")
|
||||
platform = st.selectbox("Platform", ["Blog", "Instagram", "Twitter", "LinkedIn", "Facebook"])
|
||||
content_type = st.selectbox("Content Type", ["Article", "Social Post", "Video", "Newsletter"])
|
||||
publish_date = st.date_input("Publish Date", selected_date)
|
||||
col_add, col_ai = st.columns([0.6, 0.4])
|
||||
with col_add:
|
||||
if st.form_submit_button("Add Content"):
|
||||
on_add_content(title, platform, content_type, publish_date)
|
||||
with col_ai:
|
||||
if st.form_submit_button("Generate with AI"):
|
||||
on_generate_with_ai(title, platform, content_type)
|
||||
if st.button("Close", key="close_add_content_dialog_bottom"):
|
||||
st.session_state['show_add_content_dialog'] = False
|
||||
@@ -1,137 +0,0 @@
|
||||
import streamlit as st
|
||||
|
||||
def render_ai_suggestions_modal(generate_ai_suggestions, on_create_brief, on_schedule, on_refine, on_customize):
|
||||
st.subheader("AI Content Suggestions")
|
||||
default_type = st.session_state.get('ai_modal_type', "Blog Post")
|
||||
default_topic = st.session_state.get('ai_modal_topic', "")
|
||||
default_platform = st.session_state.get('ai_modal_platform', "Blog")
|
||||
content_types = {
|
||||
"Blog Post": "Long-form content for in-depth topics",
|
||||
"Social Media Post": "Short, engaging content for social platforms",
|
||||
"Video": "Visual content with script and storyboard",
|
||||
"Newsletter": "Email content for subscriber engagement"
|
||||
}
|
||||
content_type = st.selectbox(
|
||||
"Content Type",
|
||||
list(content_types.keys()),
|
||||
format_func=lambda x: f"{x} - {content_types[x]}",
|
||||
key="modal_suggestion_type",
|
||||
index=list(content_types.keys()).index(default_type) if default_type in content_types else 0
|
||||
)
|
||||
topic = st.text_input("Enter topic or keyword", value=default_topic, key="modal_suggestion_topic")
|
||||
with st.expander("Advanced Options"):
|
||||
audience = st.multiselect(
|
||||
"Target Audience",
|
||||
["Professionals", "Students", "Entrepreneurs", "General Public", "Industry Experts"],
|
||||
default=["Professionals"]
|
||||
)
|
||||
goals = st.multiselect(
|
||||
"Content Goals",
|
||||
["Increase Engagement", "Generate Leads", "Build Authority", "Drive Traffic", "Educate"],
|
||||
default=["Increase Engagement"]
|
||||
)
|
||||
tone = st.select_slider(
|
||||
"Content Tone",
|
||||
options=["Professional", "Casual", "Educational", "Entertaining", "Persuasive"],
|
||||
value="Professional"
|
||||
)
|
||||
length = st.radio(
|
||||
"Content Length",
|
||||
["Short", "Medium", "Long"],
|
||||
horizontal=True
|
||||
)
|
||||
st.subheader("AI Model Settings")
|
||||
model_settings = {
|
||||
"Creativity Level": st.slider("Creativity Level", 0.0, 1.0, 0.7, 0.1),
|
||||
"Formality Level": st.slider("Formality Level", 0.0, 1.0, 0.5, 0.1),
|
||||
"Technical Depth": st.slider("Technical Depth", 0.0, 1.0, 0.5, 0.1)
|
||||
}
|
||||
st.subheader("Content Style Preferences")
|
||||
style_preferences = {
|
||||
"Use Examples": st.checkbox("Include Real-world Examples", True),
|
||||
"Use Statistics": st.checkbox("Include Statistics and Data", True),
|
||||
"Use Quotes": st.checkbox("Include Expert Quotes", False),
|
||||
"Use Case Studies": st.checkbox("Include Case Studies", False)
|
||||
}
|
||||
st.subheader("SEO Preferences")
|
||||
seo_preferences = {
|
||||
"Keyword Density": st.slider("Keyword Density (%)", 1, 5, 2),
|
||||
"Internal Linking": st.checkbox("Suggest Internal Links", True),
|
||||
"External Linking": st.checkbox("Suggest External Links", True),
|
||||
"Meta Description": st.checkbox("Generate Meta Description", True)
|
||||
}
|
||||
st.subheader("Platform-specific Settings")
|
||||
platform_settings = {
|
||||
"Hashtag Usage": st.checkbox("Suggest Hashtags", True),
|
||||
"Image Suggestions": st.checkbox("Suggest Images", True),
|
||||
"Video Suggestions": st.checkbox("Suggest Videos", False),
|
||||
"Interactive Elements": st.checkbox("Suggest Interactive Elements", False)
|
||||
}
|
||||
if st.button("Generate Suggestions", type="primary", key="modal_generate_btn"):
|
||||
with st.spinner("Generating suggestions..."):
|
||||
suggestions = generate_ai_suggestions(
|
||||
content_type,
|
||||
topic,
|
||||
audience,
|
||||
goals,
|
||||
tone,
|
||||
length,
|
||||
model_settings,
|
||||
style_preferences,
|
||||
seo_preferences,
|
||||
platform_settings
|
||||
)
|
||||
if suggestions:
|
||||
suggestion_tabs = st.tabs([f"Suggestion {i+1}" for i in range(len(suggestions))])
|
||||
for i, (tab, suggestion) in enumerate(zip(suggestion_tabs, suggestions)):
|
||||
with tab:
|
||||
col1, col2 = st.columns([2, 1])
|
||||
with col1:
|
||||
st.subheader(suggestion['title'])
|
||||
st.write(f"**Type:** {suggestion['type']}")
|
||||
st.write(f"**Platform:** {suggestion['platform']}")
|
||||
st.write(f"**Target Audience:** {', '.join(suggestion['audience'])}")
|
||||
st.write(f"**Estimated Impact:** {suggestion['impact']}")
|
||||
with st.expander("Content Preview"):
|
||||
st.write(suggestion.get('preview', 'Preview not available'))
|
||||
if suggestion.get('style_elements'):
|
||||
st.write("**Style Elements:**")
|
||||
for element in suggestion['style_elements']:
|
||||
st.write(f"- {element}")
|
||||
if suggestion.get('seo_elements'):
|
||||
st.write("**SEO Elements:**")
|
||||
for element in suggestion['seo_elements']:
|
||||
st.write(f"- {element}")
|
||||
with col2:
|
||||
st.subheader("Performance Metrics")
|
||||
metrics = {
|
||||
"Engagement Score": suggestion.get('engagement_score', '85%'),
|
||||
"Reach Potential": suggestion.get('reach', 'High'),
|
||||
"Conversion Rate": suggestion.get('conversion', '3.5%'),
|
||||
"SEO Impact": suggestion.get('seo_impact', 'Strong')
|
||||
}
|
||||
for metric, value in metrics.items():
|
||||
st.metric(metric, value)
|
||||
st.subheader("Actions")
|
||||
if st.button("Create Brief", key=f"modal_brief_{i}"):
|
||||
on_create_brief(suggestion)
|
||||
if st.button("Schedule", key=f"modal_schedule_{i}"):
|
||||
on_schedule(suggestion)
|
||||
if st.button("Refine", key=f"modal_refine_{i}"):
|
||||
on_refine(suggestion)
|
||||
if st.button("Customize", key=f"modal_customize_{i}"):
|
||||
on_customize(suggestion)
|
||||
with st.expander("Additional Options"):
|
||||
st.write("**Platform Optimizations**")
|
||||
for platform in suggestion.get('platform_optimizations', []):
|
||||
st.write(f"- {platform}")
|
||||
st.write("**Content Variations**")
|
||||
for variation in suggestion.get('variations', []):
|
||||
st.write(f"- {variation}")
|
||||
st.write("**SEO Recommendations**")
|
||||
for seo in suggestion.get('seo_recommendations', []):
|
||||
st.write(f"- {seo}")
|
||||
if suggestion.get('media_suggestions'):
|
||||
st.write("**Media Suggestions**")
|
||||
for media in suggestion['media_suggestions']:
|
||||
st.write(f"- {media}")
|
||||
@@ -1,51 +0,0 @@
|
||||
import streamlit as st
|
||||
from .components.content_card import render_content_card
|
||||
from .components.badge import render_badge
|
||||
|
||||
def render_calendar_view(calendar_data, icon_map, status_color, on_edit, on_delete, on_generate, get_item_key):
|
||||
if calendar_data is not None and not calendar_data.empty:
|
||||
st.markdown("### All Scheduled Content")
|
||||
calendar_data = calendar_data.sort_values(by="date")
|
||||
grouped = list(calendar_data.groupby(calendar_data['date'].dt.date))
|
||||
for i, (date, group) in enumerate(grouped):
|
||||
exp_open = (i == 0)
|
||||
with st.expander(f"{date.strftime('%B %d, %Y')}", expanded=exp_open):
|
||||
for idx, row in group.iterrows():
|
||||
item_key = get_item_key(row)
|
||||
is_editing = st.session_state.get("editing_item_key") == item_key
|
||||
platform = str(row['platform'])
|
||||
if hasattr(platform, 'value'):
|
||||
platform = platform.value
|
||||
platform_map = {
|
||||
'blog': 'Blog',
|
||||
'website': 'Blog',
|
||||
'instagram': 'Instagram',
|
||||
'twitter': 'Twitter',
|
||||
'linkedin': 'LinkedIn',
|
||||
'facebook': 'Facebook',
|
||||
}
|
||||
platform_disp = platform_map.get(platform.lower(), 'Blog')
|
||||
type_disp = str(row['type'])
|
||||
if hasattr(type_disp, 'value'):
|
||||
type_disp = type_disp.value
|
||||
type_disp = type_disp.replace('_', ' ').title()
|
||||
status_disp = row['status'].capitalize()
|
||||
platform_icon = icon_map.get(platform_disp, '🌐')
|
||||
type_icon = icon_map.get(type_disp, '📄')
|
||||
render_content_card(
|
||||
row=row,
|
||||
is_editing=is_editing,
|
||||
on_edit=lambda r=row: on_edit(r),
|
||||
on_delete=lambda r=row: on_delete(r),
|
||||
on_generate=lambda r=row: on_generate(r),
|
||||
icon_map=icon_map,
|
||||
status_color=status_color,
|
||||
platform_disp=platform_disp,
|
||||
type_disp=type_disp,
|
||||
status_disp=status_disp,
|
||||
platform_icon=platform_icon,
|
||||
type_icon=type_icon,
|
||||
item_key=item_key
|
||||
)
|
||||
else:
|
||||
st.info("No content scheduled yet. Add content to see it here.")
|
||||
@@ -1,294 +0,0 @@
|
||||
import streamlit as st
|
||||
from typing import Dict, Any, List
|
||||
from lib.database.models import ContentItem
|
||||
import logging
|
||||
from lib.ai_seo_tools.content_calendar.core.content_generator import ContentGenerator
|
||||
from lib.ai_seo_tools.content_calendar.core.calendar_manager import CalendarManager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def render_ab_testing(content_generator: ContentGenerator, calendar_manager: CalendarManager):
|
||||
"""Render the A/B testing interface."""
|
||||
st.header("A/B Testing")
|
||||
|
||||
# Check if calendar manager is available
|
||||
if 'calendar_manager' not in st.session_state:
|
||||
st.error("Calendar manager not initialized. Please refresh the page.")
|
||||
return
|
||||
|
||||
# Get available content
|
||||
try:
|
||||
available_content = calendar_manager.get_calendar().get_all_content()
|
||||
content_options = [item.title for item in available_content]
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting content options: {str(e)}")
|
||||
st.error("Error loading content. Please try again.")
|
||||
return
|
||||
|
||||
if not content_options:
|
||||
st.info("""
|
||||
## Welcome to A/B Testing! 🧪
|
||||
|
||||
Test different versions of your content to find what works best. Here's what you can do:
|
||||
|
||||
### Features:
|
||||
- 🔄 **Variant Generation**: Create multiple versions of your content
|
||||
- 📊 **Performance Tracking**: Compare metrics across variants
|
||||
- 📈 **Statistical Analysis**: Get data-driven insights
|
||||
- 🎯 **Winner Selection**: Identify the best performing content
|
||||
|
||||
### Getting Started:
|
||||
1. First, add some content to your calendar
|
||||
2. Select the content you want to test
|
||||
3. Generate variants with different parameters
|
||||
4. Track performance and analyze results
|
||||
|
||||
Ready to get started? Add some content to your calendar first!
|
||||
""")
|
||||
return
|
||||
|
||||
# Content Selection
|
||||
selected_content = st.selectbox(
|
||||
"Select content to test",
|
||||
options=content_options,
|
||||
key="ab_test_content_select"
|
||||
)
|
||||
|
||||
if selected_content:
|
||||
try:
|
||||
content_item = next(
|
||||
item for item in available_content
|
||||
if item.title == selected_content
|
||||
)
|
||||
|
||||
# Show onboarding info if no test history
|
||||
if not st.session_state.get('ab_test_results', {}).get(content_item.title):
|
||||
st.info("""
|
||||
### A/B Testing Guide
|
||||
|
||||
Create and compare different versions of your content:
|
||||
|
||||
- **Headline Variations**: Test different titles and hooks
|
||||
- **Content Structure**: Try different content flows
|
||||
- **Call-to-Action**: Test various CTAs
|
||||
- **Visual Elements**: Compare different media placements
|
||||
|
||||
Click 'Generate Test Variants' to get started!
|
||||
""")
|
||||
|
||||
# Test Configuration
|
||||
st.markdown("### Create A/B Test")
|
||||
col1, col2 = st.columns([2, 1])
|
||||
|
||||
with col1:
|
||||
test_content = st.selectbox(
|
||||
"Select content to A/B test",
|
||||
options=content_options,
|
||||
key="ab_test_content_select_unique"
|
||||
)
|
||||
|
||||
with col2:
|
||||
num_variants = st.slider(
|
||||
"Number of variants",
|
||||
min_value=2,
|
||||
max_value=5,
|
||||
value=2,
|
||||
help="Number of different versions to test"
|
||||
)
|
||||
|
||||
if test_content:
|
||||
content_item = next(
|
||||
item for item in calendar_manager.get_calendar().get_all_content()
|
||||
if item.title == test_content
|
||||
)
|
||||
|
||||
# Test Settings
|
||||
with st.expander("Test Settings"):
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
test_duration = st.number_input(
|
||||
"Test Duration (days)",
|
||||
min_value=1,
|
||||
max_value=30,
|
||||
value=7
|
||||
)
|
||||
target_metric = st.selectbox(
|
||||
"Primary Metric",
|
||||
options=['Engagement', 'Conversion', 'Reach', 'Click-through'],
|
||||
index=0
|
||||
)
|
||||
with col2:
|
||||
audience_size = st.select_slider(
|
||||
"Audience Size",
|
||||
options=['Small', 'Medium', 'Large'],
|
||||
value='Medium'
|
||||
)
|
||||
confidence_level = st.slider(
|
||||
"Confidence Level",
|
||||
min_value=90,
|
||||
max_value=99,
|
||||
value=95,
|
||||
help="Statistical confidence level for test results"
|
||||
)
|
||||
|
||||
# Generate Variants
|
||||
if st.button("Generate Variants"):
|
||||
with st.spinner("Generating variants..."):
|
||||
variants = _generate_ab_test_variants(content_generator, content_item, num_variants)
|
||||
if variants:
|
||||
st.success(f"Generated {len(variants)} variants!")
|
||||
|
||||
# Display variants in tabs
|
||||
variant_tabs = st.tabs([f"Variant {i+1}" for i in range(len(variants))])
|
||||
for i, tab in enumerate(variant_tabs):
|
||||
with tab:
|
||||
st.markdown(f"### Variant {i+1}")
|
||||
st.json(variants[i]['content'])
|
||||
|
||||
# Variant metrics
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
st.metric(
|
||||
"Engagement Score",
|
||||
f"{variants[i]['metrics']['engagement_score']:.1f}%"
|
||||
)
|
||||
with col2:
|
||||
st.metric(
|
||||
"Conversion Rate",
|
||||
f"{variants[i]['metrics']['conversion_rate']:.1f}%"
|
||||
)
|
||||
with col3:
|
||||
st.metric(
|
||||
"Reach",
|
||||
f"{variants[i]['metrics']['reach']:,}"
|
||||
)
|
||||
|
||||
# Results Analysis
|
||||
st.markdown("### Analyze Results")
|
||||
if test_content in st.session_state.ab_test_results:
|
||||
test_data = st.session_state.ab_test_results[test_content]
|
||||
|
||||
# Test Status
|
||||
st.info(f"Test Status: {test_data['status']}")
|
||||
st.write(f"Started: {test_data['start_time']}")
|
||||
|
||||
if test_data['status'] == 'running':
|
||||
if st.button("End Test and Analyze"):
|
||||
with st.spinner("Analyzing results..."):
|
||||
results = _analyze_ab_test_results(content_item)
|
||||
if results:
|
||||
st.success("Analysis complete!")
|
||||
_display_test_results(results)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in A/B testing interface: {str(e)}", exc_info=True)
|
||||
st.error(f"Error in A/B testing: {str(e)}")
|
||||
|
||||
def _generate_ab_test_variants(
|
||||
content_generator,
|
||||
content: ContentItem,
|
||||
num_variants: int
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Generate A/B test variants for content."""
|
||||
try:
|
||||
logger.info(f"Generating {num_variants} variants for content: {content.title}")
|
||||
|
||||
# Convert content to dictionary format
|
||||
content_dict = {
|
||||
'title': content.title,
|
||||
'content': content.description,
|
||||
'metadata': {
|
||||
'platform': content.platforms[0].name if content.platforms else 'Unknown',
|
||||
'content_type': content.content_type.name
|
||||
}
|
||||
}
|
||||
|
||||
variants = []
|
||||
for i in range(num_variants):
|
||||
# Generate different variations
|
||||
variant = content_generator.generate_variation(
|
||||
content=content_dict,
|
||||
variation_type=f"variant_{i+1}"
|
||||
)
|
||||
if variant:
|
||||
variants.append(variant)
|
||||
|
||||
return variants
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating variants: {str(e)}")
|
||||
return []
|
||||
|
||||
def _analyze_ab_test_results(content_item: ContentItem) -> Dict[str, Any]:
|
||||
"""Analyze results of A/B testing for content optimization."""
|
||||
try:
|
||||
logger.info(f"Analyzing A/B test results for: {content_item.title}")
|
||||
|
||||
if content_item.title not in st.session_state.ab_test_results:
|
||||
raise ValueError("No A/B test results found for this content")
|
||||
|
||||
test_data = st.session_state.ab_test_results[content_item.title]
|
||||
variants = test_data['variants']
|
||||
|
||||
# Calculate performance metrics
|
||||
results = {
|
||||
'total_engagement': sum(v['metrics']['engagement_score'] for v in variants),
|
||||
'total_conversions': sum(v['metrics']['conversion_rate'] for v in variants),
|
||||
'total_reach': sum(v['metrics']['reach'] for v in variants),
|
||||
'best_performing_variant': max(variants, key=lambda x: x['metrics']['engagement_score']),
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
# Generate recommendations
|
||||
for variant in variants:
|
||||
if variant['metrics']['engagement_score'] > 0.7: # High engagement threshold
|
||||
results['recommendations'].append({
|
||||
'variant_id': variant['variant_id'],
|
||||
'reason': 'High engagement score',
|
||||
'suggested_actions': ['Scale this variant', 'Apply learnings to other content']
|
||||
})
|
||||
|
||||
# Update test status
|
||||
test_data['status'] = 'completed'
|
||||
test_data['results'] = results
|
||||
|
||||
logger.info("A/B test results analyzed successfully")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing A/B test results: {str(e)}", exc_info=True)
|
||||
st.error(f"Error analyzing A/B test results: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _display_test_results(results: Dict[str, Any]) -> None:
|
||||
"""Display A/B test results in the UI."""
|
||||
with st.expander("Overall Performance", expanded=True):
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
st.metric(
|
||||
"Total Engagement",
|
||||
f"{results['total_engagement']:.1f}%"
|
||||
)
|
||||
with col2:
|
||||
st.metric(
|
||||
"Total Conversions",
|
||||
f"{results['total_conversions']:.1f}%"
|
||||
)
|
||||
with col3:
|
||||
st.metric(
|
||||
"Total Reach",
|
||||
f"{results['total_reach']:,}"
|
||||
)
|
||||
|
||||
with st.expander("Best Performing Variant", expanded=True):
|
||||
best_variant = results['best_performing_variant']
|
||||
st.markdown(f"### {best_variant['variant_id']}")
|
||||
st.json(best_variant['content'])
|
||||
|
||||
with st.expander("Recommendations", expanded=True):
|
||||
for rec in results['recommendations']:
|
||||
st.markdown(f"#### {rec['variant_id']}")
|
||||
st.write(f"Reason: {rec['reason']}")
|
||||
st.write("Suggested Actions:")
|
||||
for action in rec['suggested_actions']:
|
||||
st.write(f"- {action}")
|
||||
@@ -1,2 +0,0 @@
|
||||
def render_badge(platform_disp, platform_icon, type_disp, status_disp):
|
||||
return f"<span class='badge-content-calendar badge-platform-{platform_disp.lower()}'>{platform_icon} {platform_disp} | {type_disp} | <span class='chip-status chip-status-{status_disp.lower()}'>{status_disp}</span></span>"
|
||||
@@ -1,22 +0,0 @@
|
||||
import streamlit as st
|
||||
|
||||
def render_content_card(row, is_editing, on_edit, on_delete, on_generate, icon_map, status_color, platform_disp, type_disp, status_disp, platform_icon, type_icon, item_key):
|
||||
st.markdown(f"<div class='card-content-calendar'>", unsafe_allow_html=True)
|
||||
st.markdown(f"<div style='display:flex;align-items:center;justify-content:space-between;gap:8px;'>", unsafe_allow_html=True)
|
||||
st.markdown(f"<div style='display:flex;align-items:center;gap:8px;min-width:0;flex:1;'>"
|
||||
f"{type_icon}<span class='content-title'>{row['title']}</span></div>", unsafe_allow_html=True)
|
||||
st.markdown("<div style='display:flex;align-items:center;gap:4px;'>", unsafe_allow_html=True)
|
||||
col1, col2, col3 = st.columns([1, 1, 1])
|
||||
with col1:
|
||||
if st.button("⚡", key=f"generate_{item_key}", help="Generate with AI Blog Writer", use_container_width=True):
|
||||
on_generate()
|
||||
with col2:
|
||||
if st.button("✏️", key=f"edit_{item_key}", help="Edit Content", use_container_width=True):
|
||||
on_edit()
|
||||
with col3:
|
||||
if st.button("🗑️", key=f"delete_{item_key}", help="Delete Content", use_container_width=True):
|
||||
on_delete()
|
||||
st.markdown("</div>", unsafe_allow_html=True)
|
||||
st.markdown("</div>", unsafe_allow_html=True)
|
||||
st.markdown(f"<div class='content-meta'><span class='badge-content-calendar badge-platform-{platform_disp.lower()}'>{platform_icon} {platform_disp} | {type_disp} | <span class='chip-status chip-status-{status_disp.lower()}'>{status_disp}</span></span></div>", unsafe_allow_html=True)
|
||||
st.markdown("</div>", unsafe_allow_html=True)
|
||||
@@ -1,498 +0,0 @@
|
||||
import streamlit as st
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
from lib.ai_seo_tools.content_calendar.core.content_generator import ContentGenerator
|
||||
from lib.ai_seo_tools.content_calendar.core.ai_generator import AIGenerator
|
||||
from lib.ai_seo_tools.content_calendar.integrations.seo_optimizer import SEOOptimizer
|
||||
from lib.database.models import ContentItem, ContentType, Platform, SEOData
|
||||
import logging
|
||||
from lib.database.models import get_engine, get_session, init_db
|
||||
|
||||
logger = logging.getLogger('content_calendar.optimization')
|
||||
|
||||
engine = get_engine()
|
||||
init_db(engine)
|
||||
session = get_session(engine)
|
||||
|
||||
class OptimizationManager:
|
||||
def __init__(self):
|
||||
if 'optimization_history' not in st.session_state:
|
||||
st.session_state.optimization_history = {}
|
||||
if 'optimization_previews' not in st.session_state:
|
||||
st.session_state.optimization_previews = {}
|
||||
if 'optimization_metrics' not in st.session_state:
|
||||
st.session_state.optimization_metrics = {}
|
||||
|
||||
def track_optimization(self, content_id: str, optimization_data: Dict[str, Any]) -> bool:
|
||||
"""Track optimization changes for content with detailed metrics."""
|
||||
try:
|
||||
if content_id not in st.session_state.optimization_history:
|
||||
st.session_state.optimization_history[content_id] = []
|
||||
|
||||
optimization_data['timestamp'] = datetime.now()
|
||||
optimization_data['metrics'] = self._calculate_optimization_metrics(optimization_data)
|
||||
st.session_state.optimization_history[content_id].append(optimization_data)
|
||||
|
||||
# Update metrics
|
||||
if content_id not in st.session_state.optimization_metrics:
|
||||
st.session_state.optimization_metrics[content_id] = []
|
||||
st.session_state.optimization_metrics[content_id].append(optimization_data['metrics'])
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error tracking optimization: {str(e)}")
|
||||
return False
|
||||
|
||||
def _calculate_optimization_metrics(self, optimization_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Calculate detailed optimization metrics."""
|
||||
try:
|
||||
metrics = {
|
||||
'readability_score': 0,
|
||||
'seo_score': 0,
|
||||
'engagement_potential': 0,
|
||||
'keyword_density': 0,
|
||||
'content_quality': 0
|
||||
}
|
||||
|
||||
# Calculate readability score
|
||||
if 'content' in optimization_data:
|
||||
content = optimization_data['content']
|
||||
metrics['readability_score'] = self._calculate_readability(content)
|
||||
|
||||
# Calculate SEO score
|
||||
if 'seo_data' in optimization_data:
|
||||
seo_data = optimization_data['seo_data']
|
||||
metrics['seo_score'] = self._calculate_seo_score(seo_data)
|
||||
metrics['keyword_density'] = self._calculate_keyword_density(seo_data)
|
||||
|
||||
# Calculate engagement potential
|
||||
if 'engagement_metrics' in optimization_data:
|
||||
engagement = optimization_data['engagement_metrics']
|
||||
metrics['engagement_potential'] = self._calculate_engagement_potential(engagement)
|
||||
|
||||
# Calculate overall content quality
|
||||
metrics['content_quality'] = (
|
||||
metrics['readability_score'] * 0.3 +
|
||||
metrics['seo_score'] * 0.3 +
|
||||
metrics['engagement_potential'] * 0.4
|
||||
)
|
||||
|
||||
return metrics
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating optimization metrics: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _calculate_readability(self, content: str) -> float:
|
||||
"""Calculate content readability score."""
|
||||
try:
|
||||
# Implement readability calculation logic
|
||||
# This is a placeholder implementation
|
||||
return 0.8
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating readability: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_seo_score(self, seo_data: SEOData) -> float:
|
||||
"""Calculate SEO optimization score."""
|
||||
try:
|
||||
# Implement SEO score calculation logic
|
||||
# This is a placeholder implementation
|
||||
return 0.85
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating SEO score: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_keyword_density(self, seo_data: SEOData) -> float:
|
||||
"""Calculate keyword density."""
|
||||
try:
|
||||
# Implement keyword density calculation logic
|
||||
# This is a placeholder implementation
|
||||
return 2.5
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating keyword density: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def _calculate_engagement_potential(self, engagement: Dict[str, Any]) -> float:
|
||||
"""Calculate content engagement potential."""
|
||||
try:
|
||||
# Implement engagement potential calculation logic
|
||||
# This is a placeholder implementation
|
||||
return 0.75
|
||||
except Exception as e:
|
||||
logger.error(f"Error calculating engagement potential: {str(e)}")
|
||||
return 0.0
|
||||
|
||||
def get_optimization_history(self, content_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get detailed optimization history for content."""
|
||||
return st.session_state.optimization_history.get(content_id, [])
|
||||
|
||||
def get_optimization_metrics(self, content_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get optimization metrics history."""
|
||||
return st.session_state.optimization_metrics.get(content_id, [])
|
||||
|
||||
def save_preview(self, content_id: str, preview_data: Dict[str, Any]) -> bool:
|
||||
"""Save optimization preview with versioning."""
|
||||
try:
|
||||
if content_id not in st.session_state.optimization_previews:
|
||||
st.session_state.optimization_previews[content_id] = []
|
||||
|
||||
preview_data['version'] = len(st.session_state.optimization_previews[content_id]) + 1
|
||||
preview_data['timestamp'] = datetime.now()
|
||||
st.session_state.optimization_previews[content_id].append(preview_data)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving preview: {str(e)}")
|
||||
return False
|
||||
|
||||
def get_preview(self, content_id: str, version: int = None) -> Dict[str, Any]:
|
||||
"""Get optimization preview with optional versioning."""
|
||||
try:
|
||||
previews = st.session_state.optimization_previews.get(content_id, [])
|
||||
if not previews:
|
||||
return {}
|
||||
|
||||
if version is None:
|
||||
return previews[-1]
|
||||
|
||||
for preview in previews:
|
||||
if preview['version'] == version:
|
||||
return preview
|
||||
|
||||
return {}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting preview: {str(e)}")
|
||||
return {}
|
||||
|
||||
def render_content_optimization(
|
||||
content_generator: ContentGenerator,
|
||||
ai_generator: AIGenerator,
|
||||
seo_optimizer: SEOOptimizer
|
||||
):
|
||||
"""Render the content optimization interface with advanced features."""
|
||||
st.title("Content Calendar")
|
||||
|
||||
# Initialize optimization manager
|
||||
optimization_manager = OptimizationManager()
|
||||
|
||||
# Check if calendar manager is available
|
||||
if 'calendar_manager' not in st.session_state:
|
||||
st.error("Calendar manager not initialized. Please refresh the page.")
|
||||
return
|
||||
|
||||
# Create main tabs
|
||||
main_tabs = st.tabs(["Content Planning", "Content Optimization"])
|
||||
|
||||
with main_tabs[0]:
|
||||
# Create two columns for the layout
|
||||
col1, col2 = st.columns([1, 1])
|
||||
|
||||
with col1:
|
||||
st.header("Quick Calendar Generation")
|
||||
st.markdown("""
|
||||
Generate a content calendar in three simple steps:
|
||||
1. Enter your keywords
|
||||
2. Select target platforms
|
||||
3. Choose time period
|
||||
""")
|
||||
|
||||
# Step 1: Keywords Input
|
||||
st.subheader("Step 1: Enter Keywords")
|
||||
keywords = st.text_area(
|
||||
"Enter keywords or topics (one per line)",
|
||||
help="Enter the main topics or keywords you want to create content about"
|
||||
)
|
||||
|
||||
# Step 2: Platform Selection
|
||||
st.subheader("Step 2: Select Target Platforms")
|
||||
platform_categories = {
|
||||
"Website": ["WEBSITE"],
|
||||
"Social Media": ["INSTAGRAM", "FACEBOOK", "TWITTER", "LINKEDIN"],
|
||||
"Video": ["YOUTUBE"],
|
||||
"Newsletter": ["NEWSLETTER"]
|
||||
}
|
||||
|
||||
selected_platforms = []
|
||||
for category, platforms in platform_categories.items():
|
||||
st.markdown(f"**{category}**")
|
||||
for platform in platforms:
|
||||
if st.checkbox(platform.replace("_", " ").title(), key=f"platform_{platform}"):
|
||||
selected_platforms.append(platform)
|
||||
|
||||
# Step 3: Time Period
|
||||
st.subheader("Step 3: Choose Time Period")
|
||||
time_period = st.selectbox(
|
||||
"Select time period",
|
||||
["1 Week", "2 Weeks", "1 Month", "3 Months", "6 Months"],
|
||||
help="Choose how far ahead you want to plan your content"
|
||||
)
|
||||
|
||||
# Generate Calendar Button
|
||||
if st.button("Generate with AI", type="primary"):
|
||||
if not keywords or not selected_platforms:
|
||||
st.error("Please enter keywords and select at least one platform.")
|
||||
else:
|
||||
with st.spinner("Generating content calendar..."):
|
||||
try:
|
||||
# Generate content ideas based on keywords
|
||||
content_ideas = []
|
||||
for keyword in keywords.split('\n'):
|
||||
if keyword.strip():
|
||||
# Generate content ideas for each platform
|
||||
for platform in selected_platforms:
|
||||
try:
|
||||
# Create a content item for the AI generator
|
||||
content_item = ContentItem(
|
||||
title=keyword.strip(),
|
||||
description=f"Content about {keyword.strip()}",
|
||||
content_type=ContentType.BLOG_POST if platform == "WEBSITE" else ContentType.SOCIAL_MEDIA,
|
||||
platforms=[Platform[platform]],
|
||||
publish_date=datetime.now(),
|
||||
seo_data=SEOData(
|
||||
title=keyword.strip(),
|
||||
meta_description=f"Content about {keyword.strip()}",
|
||||
keywords=[keyword.strip()],
|
||||
structured_data={}
|
||||
)
|
||||
)
|
||||
|
||||
# Generate content using AI generator
|
||||
content_idea = ai_generator.enhance_content(
|
||||
content=content_item,
|
||||
enhancement_type='content_generation',
|
||||
target_audience={
|
||||
'content_settings': {
|
||||
'tone': 'professional',
|
||||
'length': 'medium',
|
||||
'engagement_goal': 'awareness',
|
||||
'creativity_level': 5
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
if content_idea:
|
||||
content_ideas.append({
|
||||
'title': content_idea.get('title', keyword.strip()),
|
||||
'introduction': content_idea.get('content', f"Content about {keyword.strip()}"),
|
||||
'platform': platform,
|
||||
'meta_description': content_idea.get('meta_description', ''),
|
||||
'keywords': [keyword.strip()]
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating content for {keyword} on {platform}: {str(e)}")
|
||||
continue
|
||||
|
||||
if content_ideas:
|
||||
# Create calendar entries
|
||||
calendar = st.session_state.calendar_manager.get_calendar()
|
||||
for idea in content_ideas:
|
||||
try:
|
||||
# Create content item
|
||||
content_item = ContentItem(
|
||||
title=idea['title'],
|
||||
description=idea['introduction'],
|
||||
content_type=ContentType.BLOG_POST if idea['platform'] == "WEBSITE" else ContentType.SOCIAL_MEDIA,
|
||||
platforms=[Platform[idea['platform']]],
|
||||
publish_date=datetime.now(),
|
||||
seo_data=SEOData(
|
||||
title=idea['title'],
|
||||
meta_description=idea.get('meta_description', ''),
|
||||
keywords=idea.get('keywords', []),
|
||||
structured_data={}
|
||||
)
|
||||
)
|
||||
calendar.add_content(content_item)
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding content to calendar: {str(e)}")
|
||||
continue
|
||||
|
||||
st.success("Content calendar generated successfully!")
|
||||
st.rerun() # Refresh to show new content
|
||||
else:
|
||||
st.error("Failed to generate any content ideas. Please try different keywords or settings.")
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating content calendar: {str(e)}")
|
||||
st.error("An error occurred while generating the content calendar. Please try again.")
|
||||
|
||||
with col2:
|
||||
st.header("Scheduled Content")
|
||||
# Get all content from calendar
|
||||
calendar = st.session_state.calendar_manager.get_calendar()
|
||||
if not calendar:
|
||||
st.info("No content scheduled yet. Generate content using the form on the left.")
|
||||
else:
|
||||
# Group content by platform
|
||||
platform_content = {}
|
||||
for item in calendar.get_all_content():
|
||||
platform = item.platforms[0].name if item.platforms else "Unknown"
|
||||
if platform not in platform_content:
|
||||
platform_content[platform] = []
|
||||
platform_content[platform].append(item)
|
||||
|
||||
# Create tabs for each platform
|
||||
platform_tabs = st.tabs(list(platform_content.keys()))
|
||||
|
||||
for i, (platform, content) in enumerate(platform_content.items()):
|
||||
with platform_tabs[i]:
|
||||
st.write(f"### {platform} Content")
|
||||
|
||||
# Convert content to DataFrame for better display
|
||||
content_data = []
|
||||
for item in content:
|
||||
content_data.append({
|
||||
'Date': item.publish_date.strftime('%Y-%m-%d'),
|
||||
'Title': item.title,
|
||||
'Type': item.content_type.name,
|
||||
'Status': item.status
|
||||
})
|
||||
|
||||
if content_data:
|
||||
df = pd.DataFrame(content_data)
|
||||
st.dataframe(df, use_container_width=True)
|
||||
|
||||
# Add action buttons for each content item
|
||||
for item in content:
|
||||
with st.expander(f"Actions for: {item.title}"):
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
if st.button("Edit", key=f"edit_{item.title}"):
|
||||
st.session_state.selected_content = item.title
|
||||
with col2:
|
||||
if st.button("Optimize", key=f"optimize_{item.title}"):
|
||||
st.session_state.selected_content = item.title
|
||||
st.session_state.active_tab = "Content Optimization"
|
||||
with col3:
|
||||
if st.button("Delete", key=f"delete_{item.title}"):
|
||||
calendar.remove_content(item)
|
||||
st.success(f"Removed {item.title}")
|
||||
st.rerun()
|
||||
|
||||
with main_tabs[1]:
|
||||
st.header("Content Optimization")
|
||||
# Get available content
|
||||
calendar = st.session_state.calendar_manager.get_calendar()
|
||||
if not calendar:
|
||||
st.info("No content available for optimization. Use the Content Planning tab to generate content.")
|
||||
return
|
||||
|
||||
available_content = calendar.get_all_content()
|
||||
content_options = [item.title for item in available_content]
|
||||
|
||||
# Content selection
|
||||
selected_content = st.selectbox(
|
||||
"Select content to optimize",
|
||||
options=content_options,
|
||||
key="optimize_content_select"
|
||||
)
|
||||
|
||||
if selected_content:
|
||||
try:
|
||||
content_item = next(
|
||||
item for item in available_content
|
||||
if item.title == selected_content
|
||||
)
|
||||
|
||||
# Create tabs for different optimization aspects
|
||||
opt_tabs = st.tabs(["Content Optimization", "SEO Optimization", "Preview", "History", "Analytics"])
|
||||
|
||||
with opt_tabs[0]:
|
||||
st.subheader("Content Optimization")
|
||||
|
||||
# Show onboarding info if no optimization history
|
||||
if not optimization_manager.get_optimization_history(content_item.title):
|
||||
st.info("""
|
||||
### Content Optimization Guide
|
||||
|
||||
Use these tools to enhance your content:
|
||||
|
||||
- **Content Tone**: Adjust the writing style to match your brand voice
|
||||
- **Content Length**: Optimize for your target platform's requirements
|
||||
- **Engagement Goal**: Focus on specific audience actions
|
||||
- **Creativity Level**: Balance between creative and professional content
|
||||
|
||||
Click 'Generate Optimization' to get started!
|
||||
""")
|
||||
|
||||
# Advanced Optimization Settings
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
tone = st.select_slider(
|
||||
"Content Tone",
|
||||
options=["Professional", "Casual", "Educational", "Entertaining", "Persuasive"],
|
||||
value="Professional"
|
||||
)
|
||||
length = st.radio(
|
||||
"Content Length",
|
||||
["Short", "Medium", "Long"],
|
||||
horizontal=True
|
||||
)
|
||||
with col2:
|
||||
engagement_goal = st.selectbox(
|
||||
"Engagement Goal",
|
||||
["Awareness", "Consideration", "Conversion", "Retention"]
|
||||
)
|
||||
creativity_level = st.slider(
|
||||
"Creativity Level",
|
||||
min_value=1,
|
||||
max_value=10,
|
||||
value=5
|
||||
)
|
||||
|
||||
if st.button("Generate Optimization", type="primary"):
|
||||
with st.spinner("Optimizing content..."):
|
||||
try:
|
||||
# Generate optimization
|
||||
optimization = content_generator.optimize_content(
|
||||
content=content_item,
|
||||
tone=tone,
|
||||
length=length,
|
||||
engagement_goal=engagement_goal,
|
||||
creativity_level=creativity_level
|
||||
)
|
||||
|
||||
if optimization:
|
||||
st.success("Content optimized successfully!")
|
||||
|
||||
# Show optimization results
|
||||
st.subheader("Optimization Results")
|
||||
st.write(optimization.get('content', ''))
|
||||
|
||||
# Save optimization history
|
||||
optimization_manager.track_optimization(
|
||||
content_item.title,
|
||||
{
|
||||
'tone': tone,
|
||||
'length': length,
|
||||
'engagement_goal': engagement_goal,
|
||||
'creativity_level': creativity_level,
|
||||
'content': optimization.get('content', ''),
|
||||
'timestamp': datetime.now()
|
||||
}
|
||||
)
|
||||
else:
|
||||
st.error("Failed to optimize content. Please try again.")
|
||||
except Exception as e:
|
||||
logger.error(f"Error optimizing content: {str(e)}")
|
||||
st.error("An error occurred while optimizing content. Please try again.")
|
||||
|
||||
with opt_tabs[1]:
|
||||
st.subheader("SEO Optimization")
|
||||
# SEO optimization content here
|
||||
|
||||
with opt_tabs[2]:
|
||||
st.subheader("Content Preview")
|
||||
# Content preview here
|
||||
|
||||
with opt_tabs[3]:
|
||||
st.subheader("Optimization History")
|
||||
# Optimization history here
|
||||
|
||||
with opt_tabs[4]:
|
||||
st.subheader("Performance Analytics")
|
||||
# Analytics content here
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing selected content: {str(e)}")
|
||||
st.error("Error processing selected content. Please try again.")
|
||||
|
||||
# Remove everything after this point
|
||||
@@ -1,517 +0,0 @@
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
from typing import Dict, List, Any, Optional
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
# Add parent directory to path to import existing tools
|
||||
parent_dir = str(Path(__file__).parent.parent.parent.parent.parent)
|
||||
if parent_dir not in sys.path:
|
||||
sys.path.append(parent_dir)
|
||||
|
||||
from lib.database.models import ContentItem, ContentType, Platform, SEOData
|
||||
from lib.ai_seo_tools.content_calendar.core.content_repurposer import SmartContentRepurposingEngine
|
||||
from lib.ai_seo_tools.content_calendar.core.content_generator import ContentGenerator
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ContentRepurposingUI:
|
||||
"""
|
||||
Streamlit UI component for the Smart Content Repurposing Engine.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.repurposing_engine = SmartContentRepurposingEngine()
|
||||
self.content_generator = ContentGenerator()
|
||||
self.logger = logging.getLogger('content_calendar.repurposing_ui')
|
||||
|
||||
def render_repurposing_interface(self):
|
||||
"""Render the main repurposing interface."""
|
||||
st.header("🔄 Smart Content Repurposing Engine")
|
||||
st.markdown("Transform your content into multiple platform-optimized pieces with AI-powered repurposing.")
|
||||
|
||||
# Create tabs for different repurposing functions
|
||||
tab1, tab2, tab3, tab4 = st.tabs([
|
||||
"📝 Single Content Repurposing",
|
||||
"📚 Content Series Creation",
|
||||
"🔍 Content Analysis",
|
||||
"📊 Repurposing Dashboard"
|
||||
])
|
||||
|
||||
with tab1:
|
||||
self._render_single_content_repurposing()
|
||||
|
||||
with tab2:
|
||||
self._render_content_series_creation()
|
||||
|
||||
with tab3:
|
||||
self._render_content_analysis()
|
||||
|
||||
with tab4:
|
||||
self._render_repurposing_dashboard()
|
||||
|
||||
def _render_single_content_repurposing(self):
|
||||
"""Render the single content repurposing interface."""
|
||||
st.subheader("Repurpose Single Content")
|
||||
st.markdown("Transform one piece of content into multiple platform-optimized variations.")
|
||||
|
||||
# Content input section
|
||||
col1, col2 = st.columns([2, 1])
|
||||
|
||||
with col1:
|
||||
st.markdown("### 📄 Source Content")
|
||||
|
||||
# Content input options
|
||||
input_method = st.radio(
|
||||
"How would you like to provide content?",
|
||||
["Manual Input", "Upload File", "Select from Calendar"],
|
||||
horizontal=True
|
||||
)
|
||||
|
||||
source_content = None
|
||||
|
||||
if input_method == "Manual Input":
|
||||
source_content = self._render_manual_content_input()
|
||||
elif input_method == "Upload File":
|
||||
source_content = self._render_file_upload_input()
|
||||
else: # Select from Calendar
|
||||
source_content = self._render_calendar_selection()
|
||||
|
||||
with col2:
|
||||
st.markdown("### 🎯 Target Platforms")
|
||||
|
||||
# Platform selection
|
||||
available_platforms = [
|
||||
Platform.TWITTER,
|
||||
Platform.LINKEDIN,
|
||||
Platform.INSTAGRAM,
|
||||
Platform.FACEBOOK,
|
||||
Platform.WEBSITE
|
||||
]
|
||||
|
||||
selected_platforms = st.multiselect(
|
||||
"Select target platforms:",
|
||||
options=available_platforms,
|
||||
default=[Platform.TWITTER, Platform.LINKEDIN],
|
||||
format_func=lambda x: x.name.title()
|
||||
)
|
||||
|
||||
# Repurposing strategy
|
||||
strategy = st.selectbox(
|
||||
"Repurposing Strategy:",
|
||||
["adaptive", "atomic", "series"],
|
||||
help="Adaptive: AI chooses best approach, Atomic: Break into small pieces, Series: Create connected content"
|
||||
)
|
||||
|
||||
# Generate repurposed content
|
||||
if st.button("🚀 Generate Repurposed Content", type="primary"):
|
||||
if source_content and selected_platforms:
|
||||
with st.spinner("Repurposing content..."):
|
||||
try:
|
||||
repurposed_content = self.content_generator.repurpose_content_for_platforms(
|
||||
content_item=source_content,
|
||||
target_platforms=selected_platforms,
|
||||
strategy=strategy
|
||||
)
|
||||
|
||||
if repurposed_content:
|
||||
self._display_repurposed_content(repurposed_content)
|
||||
else:
|
||||
st.error("Failed to generate repurposed content. Please try again.")
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error during repurposing: {str(e)}")
|
||||
else:
|
||||
st.warning("Please provide source content and select at least one target platform.")
|
||||
|
||||
def _render_content_series_creation(self):
|
||||
"""Render the content series creation interface."""
|
||||
st.subheader("Create Cross-Platform Content Series")
|
||||
st.markdown("Generate a strategic content series that progressively reveals information across platforms.")
|
||||
|
||||
# Source content input
|
||||
source_content = self._render_manual_content_input(key_suffix="_series")
|
||||
|
||||
if source_content:
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.markdown("### 🌐 Platform Strategy")
|
||||
|
||||
# Platform selection with strategy
|
||||
platforms = st.multiselect(
|
||||
"Select platforms for series:",
|
||||
options=[Platform.TWITTER, Platform.LINKEDIN, Platform.INSTAGRAM, Platform.FACEBOOK, Platform.WEBSITE],
|
||||
default=[Platform.TWITTER, Platform.LINKEDIN, Platform.WEBSITE],
|
||||
format_func=lambda x: x.name.title(),
|
||||
key="series_platforms"
|
||||
)
|
||||
|
||||
series_type = st.selectbox(
|
||||
"Series Strategy:",
|
||||
["progressive_disclosure", "platform_native"],
|
||||
help="Progressive: Gradually reveal info across platforms, Native: Optimize for each platform's strengths"
|
||||
)
|
||||
|
||||
with col2:
|
||||
st.markdown("### 📅 Timeline Preview")
|
||||
|
||||
if platforms:
|
||||
# Show timeline preview
|
||||
timeline_df = self._create_series_timeline_preview(source_content, platforms)
|
||||
st.dataframe(timeline_df, use_container_width=True)
|
||||
|
||||
# Generate series
|
||||
if st.button("📚 Create Content Series", type="primary", key="create_series"):
|
||||
if platforms:
|
||||
with st.spinner("Creating content series..."):
|
||||
try:
|
||||
series_content = self.content_generator.create_content_series_across_platforms(
|
||||
source_content=source_content,
|
||||
platforms=platforms,
|
||||
series_type=series_type
|
||||
)
|
||||
|
||||
if series_content:
|
||||
self._display_content_series(series_content)
|
||||
else:
|
||||
st.error("Failed to create content series. Please try again.")
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error creating series: {str(e)}")
|
||||
else:
|
||||
st.warning("Please select at least one platform for the series.")
|
||||
|
||||
def _render_content_analysis(self):
|
||||
"""Render the content analysis interface."""
|
||||
st.subheader("Content Repurposing Analysis")
|
||||
st.markdown("Analyze your content's repurposing potential and get AI-powered recommendations.")
|
||||
|
||||
# Content input
|
||||
content_to_analyze = self._render_manual_content_input(key_suffix="_analysis")
|
||||
|
||||
if content_to_analyze:
|
||||
col1, col2 = st.columns([1, 1])
|
||||
|
||||
with col1:
|
||||
available_platforms = st.multiselect(
|
||||
"Available platforms for analysis:",
|
||||
options=[Platform.TWITTER, Platform.LINKEDIN, Platform.INSTAGRAM, Platform.FACEBOOK, Platform.WEBSITE],
|
||||
default=[Platform.TWITTER, Platform.LINKEDIN, Platform.INSTAGRAM, Platform.FACEBOOK, Platform.WEBSITE],
|
||||
format_func=lambda x: x.name.title(),
|
||||
key="analysis_platforms"
|
||||
)
|
||||
|
||||
with col2:
|
||||
if st.button("🔍 Analyze Content", type="primary"):
|
||||
if available_platforms:
|
||||
with st.spinner("Analyzing content..."):
|
||||
try:
|
||||
analysis = self.content_generator.analyze_content_for_repurposing(
|
||||
content_item=content_to_analyze,
|
||||
available_platforms=available_platforms
|
||||
)
|
||||
|
||||
if analysis:
|
||||
self._display_content_analysis(analysis)
|
||||
else:
|
||||
st.error("Failed to analyze content. Please try again.")
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error during analysis: {str(e)}")
|
||||
else:
|
||||
st.warning("Please select at least one platform for analysis.")
|
||||
|
||||
def _render_repurposing_dashboard(self):
|
||||
"""Render the repurposing dashboard with metrics and insights."""
|
||||
st.subheader("Repurposing Dashboard")
|
||||
st.markdown("Track your content repurposing performance and insights.")
|
||||
|
||||
# Mock data for demonstration
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
|
||||
with col1:
|
||||
st.metric("Content Pieces Created", "156", "+23")
|
||||
|
||||
with col2:
|
||||
st.metric("Time Saved", "312 hours", "+45 hours")
|
||||
|
||||
with col3:
|
||||
st.metric("Platform Coverage", "85%", "+12%")
|
||||
|
||||
with col4:
|
||||
st.metric("Engagement Boost", "34%", "+8%")
|
||||
|
||||
# Recent repurposing activity
|
||||
st.markdown("### 📈 Recent Repurposing Activity")
|
||||
|
||||
# Mock data for recent activity
|
||||
recent_activity = pd.DataFrame({
|
||||
'Date': ['2024-01-15', '2024-01-14', '2024-01-13', '2024-01-12'],
|
||||
'Source Content': ['AI Writing Tips', 'SEO Best Practices', 'Content Strategy Guide', 'Social Media Trends'],
|
||||
'Platforms': ['Twitter, LinkedIn', 'LinkedIn, Instagram', 'All Platforms', 'Twitter, Facebook'],
|
||||
'Pieces Created': [3, 2, 5, 2],
|
||||
'Status': ['Published', 'Scheduled', 'Draft', 'Published']
|
||||
})
|
||||
|
||||
st.dataframe(recent_activity, use_container_width=True)
|
||||
|
||||
# Performance insights
|
||||
st.markdown("### 💡 Performance Insights")
|
||||
|
||||
insights_col1, insights_col2 = st.columns(2)
|
||||
|
||||
with insights_col1:
|
||||
st.info("🎯 **Best Performing Platform**: LinkedIn posts show 45% higher engagement when repurposed from blog content.")
|
||||
|
||||
with insights_col2:
|
||||
st.success("📊 **Optimization Tip**: Twitter threads perform 60% better when created from long-form content with statistics.")
|
||||
|
||||
def _render_manual_content_input(self, key_suffix: str = "") -> Optional[ContentItem]:
|
||||
"""Render manual content input form."""
|
||||
with st.form(f"content_input_form{key_suffix}"):
|
||||
title = st.text_input("Content Title:", key=f"title{key_suffix}")
|
||||
content_type = st.selectbox(
|
||||
"Content Type:",
|
||||
options=[ContentType.BLOG_POST, ContentType.SOCIAL_MEDIA, ContentType.VIDEO, ContentType.NEWSLETTER],
|
||||
format_func=lambda x: x.name.replace('_', ' ').title(),
|
||||
key=f"content_type{key_suffix}"
|
||||
)
|
||||
|
||||
description = st.text_area(
|
||||
"Content Description/Body:",
|
||||
height=200,
|
||||
help="Paste your content here. This will be analyzed and repurposed.",
|
||||
key=f"description{key_suffix}"
|
||||
)
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
author = st.text_input("Author:", value="Content Creator", key=f"author{key_suffix}")
|
||||
with col2:
|
||||
tags = st.text_input("Tags (comma-separated):", key=f"tags{key_suffix}")
|
||||
|
||||
submitted = st.form_submit_button("📝 Use This Content")
|
||||
|
||||
if submitted and title and description:
|
||||
# Create ContentItem
|
||||
content_item = ContentItem(
|
||||
title=title,
|
||||
description=description,
|
||||
content_type=content_type,
|
||||
platforms=[],
|
||||
publish_date=datetime.now(),
|
||||
status="draft",
|
||||
author=author,
|
||||
tags=tags.split(',') if tags else [],
|
||||
notes="",
|
||||
seo_data=SEOData(title=title, meta_description="", keywords=[], structured_data={})
|
||||
)
|
||||
return content_item
|
||||
|
||||
return None
|
||||
|
||||
def _render_file_upload_input(self) -> Optional[ContentItem]:
|
||||
"""Render file upload input."""
|
||||
uploaded_file = st.file_uploader(
|
||||
"Upload content file:",
|
||||
type=['txt', 'md', 'docx'],
|
||||
help="Upload a text file, markdown file, or Word document"
|
||||
)
|
||||
|
||||
if uploaded_file:
|
||||
try:
|
||||
# Read file content
|
||||
if uploaded_file.type == "text/plain":
|
||||
content = str(uploaded_file.read(), "utf-8")
|
||||
else:
|
||||
content = str(uploaded_file.read(), "utf-8") # Simplified for demo
|
||||
|
||||
# Extract title from filename
|
||||
title = uploaded_file.name.split('.')[0].replace('_', ' ').title()
|
||||
|
||||
# Create ContentItem
|
||||
content_item = ContentItem(
|
||||
title=title,
|
||||
description=content,
|
||||
content_type=ContentType.BLOG_POST,
|
||||
platforms=[],
|
||||
publish_date=datetime.now(),
|
||||
status="draft",
|
||||
author="Uploaded Content",
|
||||
tags=[],
|
||||
notes=f"Uploaded from file: {uploaded_file.name}",
|
||||
seo_data=SEOData(title=title, meta_description="", keywords=[], structured_data={})
|
||||
)
|
||||
|
||||
st.success(f"✅ File uploaded: {uploaded_file.name}")
|
||||
return content_item
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error reading file: {str(e)}")
|
||||
|
||||
return None
|
||||
|
||||
def _render_calendar_selection(self) -> Optional[ContentItem]:
|
||||
"""Render calendar content selection."""
|
||||
st.info("📅 Calendar integration coming soon! For now, please use manual input or file upload.")
|
||||
return None
|
||||
|
||||
def _display_repurposed_content(self, repurposed_content: List[ContentItem]):
|
||||
"""Display the repurposed content results."""
|
||||
st.success(f"✅ Successfully created {len(repurposed_content)} repurposed content pieces!")
|
||||
|
||||
for i, content in enumerate(repurposed_content):
|
||||
with st.expander(f"📱 {content.platforms[0].name.title()} - {content.title}"):
|
||||
st.markdown(f"**Platform:** {content.platforms[0].name.title()}")
|
||||
st.markdown(f"**Content Type:** {content.content_type.name.replace('_', ' ').title()}")
|
||||
st.markdown(f"**Scheduled for:** {content.publish_date.strftime('%Y-%m-%d')}")
|
||||
|
||||
st.markdown("**Content:**")
|
||||
st.write(content.description)
|
||||
|
||||
if content.tags:
|
||||
st.markdown(f"**Tags:** {', '.join(content.tags)}")
|
||||
|
||||
# Action buttons
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
if st.button(f"📝 Edit", key=f"edit_{i}"):
|
||||
st.info("Edit functionality coming soon!")
|
||||
with col2:
|
||||
if st.button(f"📅 Schedule", key=f"schedule_{i}"):
|
||||
st.info("Scheduling functionality coming soon!")
|
||||
with col3:
|
||||
if st.button(f"📋 Copy", key=f"copy_{i}"):
|
||||
st.code(content.description)
|
||||
|
||||
def _display_content_series(self, series_content: Dict[str, List[ContentItem]]):
|
||||
"""Display the content series results."""
|
||||
total_pieces = sum(len(pieces) for pieces in series_content.values())
|
||||
st.success(f"✅ Successfully created content series with {total_pieces} pieces across {len(series_content)} platforms!")
|
||||
|
||||
for platform, content_pieces in series_content.items():
|
||||
st.markdown(f"### 📱 {platform.title()} Series ({len(content_pieces)} pieces)")
|
||||
|
||||
for i, content in enumerate(content_pieces):
|
||||
with st.expander(f"Part {i+1}: {content.title}"):
|
||||
st.markdown(f"**Scheduled for:** {content.publish_date.strftime('%Y-%m-%d')}")
|
||||
st.markdown("**Content:**")
|
||||
st.write(content.description)
|
||||
|
||||
if content.tags:
|
||||
st.markdown(f"**Tags:** {', '.join(content.tags)}")
|
||||
|
||||
def _display_content_analysis(self, analysis: Dict[str, Any]):
|
||||
"""Display content analysis results."""
|
||||
st.markdown("### 📊 Content Analysis Results")
|
||||
|
||||
# Content metrics
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
content_analysis = analysis.get('content_analysis', {})
|
||||
|
||||
with col1:
|
||||
st.metric("Word Count", content_analysis.get('word_count', 0))
|
||||
|
||||
with col2:
|
||||
richness = content_analysis.get('content_richness', 'Unknown')
|
||||
st.metric("Content Richness", richness)
|
||||
|
||||
with col3:
|
||||
potential = content_analysis.get('repurposing_potential', 'Unknown')
|
||||
st.metric("Repurposing Potential", potential)
|
||||
|
||||
# Recommendations
|
||||
st.markdown("### 💡 Recommendations")
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.markdown("**Recommended Platforms:**")
|
||||
platforms = analysis.get('platform_suggestions', [])
|
||||
for platform in platforms:
|
||||
st.write(f"• {platform.name.title()}")
|
||||
|
||||
with col2:
|
||||
st.markdown("**Suggested Strategies:**")
|
||||
strategies = analysis.get('strategy_suggestions', [])
|
||||
for strategy in strategies:
|
||||
st.write(f"• {strategy.replace('_', ' ').title()}")
|
||||
|
||||
# Content atoms
|
||||
st.markdown("### 🔬 Content Atoms Analysis")
|
||||
|
||||
atoms = content_analysis.get('content_atoms', {})
|
||||
|
||||
for atom_type, atom_list in atoms.items():
|
||||
if atom_list:
|
||||
with st.expander(f"{atom_type.title()} ({len(atom_list)} found)"):
|
||||
for atom in atom_list:
|
||||
st.write(f"• {atom}")
|
||||
|
||||
# Estimated output
|
||||
estimated = analysis.get('estimated_output', {})
|
||||
if estimated:
|
||||
st.markdown("### 📈 Estimated Output")
|
||||
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
st.metric("Total Pieces", estimated.get('total_pieces', 0))
|
||||
|
||||
with col2:
|
||||
st.metric("Time Savings", estimated.get('time_savings', '0 hours'))
|
||||
|
||||
with col3:
|
||||
st.metric("Content Multiplication", estimated.get('content_multiplication', '1x'))
|
||||
|
||||
def _create_series_timeline_preview(self, content: ContentItem, platforms: List[Platform]) -> pd.DataFrame:
|
||||
"""Create a preview timeline for content series."""
|
||||
timeline_data = []
|
||||
base_date = datetime.now()
|
||||
|
||||
for i, platform in enumerate(platforms):
|
||||
release_date = base_date + timedelta(days=i)
|
||||
timeline_data.append({
|
||||
'Platform': platform.name.title(),
|
||||
'Release Date': release_date.strftime('%Y-%m-%d'),
|
||||
'Content Type': self._get_platform_content_type(platform),
|
||||
'Strategy': self._get_platform_strategy(platform)
|
||||
})
|
||||
|
||||
return pd.DataFrame(timeline_data)
|
||||
|
||||
def _get_platform_content_type(self, platform: Platform) -> str:
|
||||
"""Get content type for platform."""
|
||||
types = {
|
||||
Platform.TWITTER: "Thread/Tweet",
|
||||
Platform.LINKEDIN: "Professional Post",
|
||||
Platform.INSTAGRAM: "Visual Post",
|
||||
Platform.FACEBOOK: "Engaging Post",
|
||||
Platform.WEBSITE: "Blog Article"
|
||||
}
|
||||
return types.get(platform, "Standard Post")
|
||||
|
||||
def _get_platform_strategy(self, platform: Platform) -> str:
|
||||
"""Get strategy for platform."""
|
||||
strategies = {
|
||||
Platform.TWITTER: "Hook & Engage",
|
||||
Platform.LINKEDIN: "Authority Building",
|
||||
Platform.INSTAGRAM: "Visual Storytelling",
|
||||
Platform.FACEBOOK: "Community Discussion",
|
||||
Platform.WEBSITE: "Complete Information"
|
||||
}
|
||||
return strategies.get(platform, "Standard Approach")
|
||||
|
||||
# Main function to render the UI
|
||||
def render_content_repurposing_ui():
|
||||
"""Main function to render the content repurposing UI."""
|
||||
ui = ContentRepurposingUI()
|
||||
ui.render_repurposing_interface()
|
||||
|
||||
# For testing
|
||||
if __name__ == "__main__":
|
||||
render_content_repurposing_ui()
|
||||
@@ -1,457 +0,0 @@
|
||||
import streamlit as st
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime, timedelta
|
||||
import pandas as pd
|
||||
from lib.ai_seo_tools.content_calendar.core.content_generator import ContentGenerator
|
||||
from lib.ai_seo_tools.content_calendar.core.ai_generator import AIGenerator
|
||||
from lib.ai_seo_tools.content_calendar.integrations.seo_optimizer import SEOOptimizer
|
||||
from lib.database.models import ContentItem, ContentType, Platform, SEOData
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger('content_calendar.series')
|
||||
|
||||
class SeriesManager:
|
||||
def __init__(self):
|
||||
self.series_data = {}
|
||||
if 'content_series' not in st.session_state:
|
||||
st.session_state.content_series = {}
|
||||
if 'series_relationships' not in st.session_state:
|
||||
st.session_state.series_relationships = {}
|
||||
if 'series_performance' not in st.session_state:
|
||||
st.session_state.series_performance = {}
|
||||
|
||||
def create_series(self, series_id: str, topic: str, num_pieces: int, content_type: ContentType,
|
||||
platforms: List[Platform], schedule_strategy: str = 'linear', series_type: str = '', series_flow: str = '', metadata: Dict[str, Any] = {}) -> Dict[str, Any]:
|
||||
"""Create a new content series with tracking and scheduling."""
|
||||
try:
|
||||
series = {
|
||||
'id': series_id,
|
||||
'topic': topic,
|
||||
'num_pieces': num_pieces,
|
||||
'content_type': content_type,
|
||||
'platforms': platforms,
|
||||
'schedule_strategy': schedule_strategy,
|
||||
'series_type': series_type,
|
||||
'series_flow': series_flow,
|
||||
'pieces': [],
|
||||
'performance': {},
|
||||
'created_at': datetime.now(),
|
||||
'status': 'draft',
|
||||
'relationships': {},
|
||||
'platform_distribution': {p.name: [] for p in platforms},
|
||||
'metadata': metadata
|
||||
}
|
||||
st.session_state.content_series[series_id] = series
|
||||
return series
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating series: {str(e)}")
|
||||
return None
|
||||
|
||||
def add_piece(self, series_id: str, piece: Dict[str, Any]) -> bool:
|
||||
"""Add a content piece to the series with relationship tracking."""
|
||||
try:
|
||||
if series_id in st.session_state.content_series:
|
||||
series = st.session_state.content_series[series_id]
|
||||
piece_id = f"piece_{len(series['pieces'])}"
|
||||
|
||||
# Create a structured piece object
|
||||
structured_piece = {
|
||||
'id': piece_id,
|
||||
'title': piece.get('title', f"Part {len(series['pieces']) + 1}"),
|
||||
'content': piece.get('content', ''),
|
||||
'platform': piece.get('platform', series['platforms'][0]),
|
||||
'scheduled_date': None,
|
||||
'status': 'draft',
|
||||
'relationships': {
|
||||
'previous': None,
|
||||
'next': None
|
||||
},
|
||||
'performance': {
|
||||
'engagement': 0,
|
||||
'reach': 0,
|
||||
'conversion_rate': 0
|
||||
}
|
||||
}
|
||||
|
||||
# Track relationships
|
||||
if series['pieces']:
|
||||
previous_piece = series['pieces'][-1]
|
||||
structured_piece['relationships']['previous'] = previous_piece['id']
|
||||
structured_piece['relationships']['next'] = piece_id
|
||||
|
||||
# Add to platform distribution
|
||||
platform_name = structured_piece['platform'].name
|
||||
if platform_name in series['platform_distribution']:
|
||||
series['platform_distribution'][platform_name].append(piece_id)
|
||||
|
||||
series['pieces'].append(structured_piece)
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding piece to series: {str(e)}")
|
||||
return False
|
||||
|
||||
def get_series_performance(self, series_id: str) -> Dict[str, Any]:
|
||||
"""Get comprehensive performance analytics for a series."""
|
||||
try:
|
||||
if series_id in st.session_state.content_series:
|
||||
series = st.session_state.content_series[series_id]
|
||||
performance = {
|
||||
'overall': {
|
||||
'total_engagement': 0,
|
||||
'total_reach': 0,
|
||||
'conversion_rate': 0,
|
||||
'average_engagement': 0
|
||||
},
|
||||
'platforms': {},
|
||||
'pieces': {},
|
||||
'trends': {
|
||||
'engagement': [],
|
||||
'reach': [],
|
||||
'conversions': []
|
||||
}
|
||||
}
|
||||
|
||||
# Calculate overall metrics
|
||||
for piece in series['pieces']:
|
||||
piece_performance = piece.get('performance', {})
|
||||
performance['overall']['total_engagement'] += piece_performance.get('engagement', 0)
|
||||
performance['overall']['total_reach'] += piece_performance.get('reach', 0)
|
||||
performance['overall']['conversion_rate'] += piece_performance.get('conversion_rate', 0)
|
||||
|
||||
# Track piece-specific performance
|
||||
performance['pieces'][piece['id']] = piece_performance
|
||||
|
||||
# Track trends
|
||||
performance['trends']['engagement'].append(piece_performance.get('engagement', 0))
|
||||
performance['trends']['reach'].append(piece_performance.get('reach', 0))
|
||||
performance['trends']['conversions'].append(piece_performance.get('conversion_rate', 0))
|
||||
|
||||
# Calculate averages
|
||||
num_pieces = len(series['pieces'])
|
||||
if num_pieces > 0:
|
||||
performance['overall']['average_engagement'] = performance['overall']['total_engagement'] / num_pieces
|
||||
performance['overall']['conversion_rate'] = performance['overall']['conversion_rate'] / num_pieces
|
||||
|
||||
# Calculate platform-specific performance
|
||||
for platform in series['platforms']:
|
||||
platform_pieces = series['platform_distribution'].get(platform.name, [])
|
||||
platform_performance = {
|
||||
'engagement': 0,
|
||||
'reach': 0,
|
||||
'conversion_rate': 0
|
||||
}
|
||||
|
||||
for piece_id in platform_pieces:
|
||||
piece_performance = performance['pieces'].get(piece_id, {})
|
||||
platform_performance['engagement'] += piece_performance.get('engagement', 0)
|
||||
platform_performance['reach'] += piece_performance.get('reach', 0)
|
||||
platform_performance['conversion_rate'] += piece_performance.get('conversion_rate', 0)
|
||||
|
||||
if platform_pieces:
|
||||
platform_performance['engagement'] /= len(platform_pieces)
|
||||
platform_performance['conversion_rate'] /= len(platform_pieces)
|
||||
|
||||
performance['platforms'][platform.name] = platform_performance
|
||||
|
||||
return performance
|
||||
return {}
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting series performance: {str(e)}")
|
||||
return {}
|
||||
|
||||
def update_series_status(self, series_id: str, status: str) -> bool:
|
||||
"""Update the status of a series."""
|
||||
try:
|
||||
if series_id in st.session_state.content_series:
|
||||
st.session_state.content_series[series_id]['status'] = status
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating series status: {str(e)}")
|
||||
return False
|
||||
|
||||
def schedule_series(self, series_id: str, start_date: datetime, interval: int = 7) -> bool:
|
||||
"""Schedule the series content with flexible scheduling strategies."""
|
||||
try:
|
||||
if series_id in st.session_state.content_series:
|
||||
series = st.session_state.content_series[series_id]
|
||||
current_date = start_date
|
||||
|
||||
for piece in series['pieces']:
|
||||
piece['scheduled_date'] = current_date
|
||||
if series['schedule_strategy'] == 'linear':
|
||||
current_date += timedelta(days=interval)
|
||||
elif series['schedule_strategy'] == 'burst':
|
||||
current_date += timedelta(days=1)
|
||||
elif series['schedule_strategy'] == 'custom':
|
||||
# Custom scheduling is handled by the UI
|
||||
pass
|
||||
|
||||
return True
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error scheduling series: {str(e)}")
|
||||
return False
|
||||
|
||||
def render_content_series_generator(
|
||||
ai_generator: AIGenerator,
|
||||
content_generator: ContentGenerator,
|
||||
seo_optimizer: SEOOptimizer
|
||||
):
|
||||
"""Render the content series generator interface."""
|
||||
st.header("Content Series Generator")
|
||||
|
||||
# Check if calendar manager is available
|
||||
if 'calendar_manager' not in st.session_state:
|
||||
st.error("Calendar manager not initialized. Please refresh the page.")
|
||||
return
|
||||
|
||||
# Get available content
|
||||
try:
|
||||
available_content = st.session_state.calendar_manager.get_calendar().get_all_content()
|
||||
content_options = [item.title for item in available_content]
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting content options: {str(e)}")
|
||||
st.error("Error loading content. Please try again.")
|
||||
return
|
||||
|
||||
if not content_options:
|
||||
st.info("""
|
||||
## Welcome to Content Series Generator! 📚
|
||||
|
||||
Create and manage content series across multiple platforms. Here's what you can do:
|
||||
|
||||
### Features:
|
||||
- 📝 **Series Creation**: Generate connected content pieces
|
||||
- 🔄 **Cross-Platform Distribution**: Optimize for different platforms
|
||||
- 📊 **Series Analytics**: Track performance across the series
|
||||
- 📅 **Smart Scheduling**: Plan content distribution
|
||||
|
||||
### Getting Started:
|
||||
1. First, add some content to your calendar
|
||||
2. Select a topic for your content series
|
||||
3. Configure series parameters and platforms
|
||||
4. Generate and schedule your series
|
||||
|
||||
Ready to get started? Add some content to your calendar first!
|
||||
""")
|
||||
return
|
||||
|
||||
# Series Configuration
|
||||
st.subheader("Create New Content Series")
|
||||
|
||||
# Show onboarding info if no series exist
|
||||
if not st.session_state.get('content_series', {}):
|
||||
st.info("""
|
||||
### Content Series Guide
|
||||
|
||||
Create engaging content series with these features:
|
||||
|
||||
- **Series Planning**: Define your series structure and goals
|
||||
- **Content Generation**: Create connected content pieces
|
||||
- **Platform Optimization**: Adapt content for each platform
|
||||
- **Performance Tracking**: Monitor series success
|
||||
|
||||
Fill out the form below to create your first series!
|
||||
""")
|
||||
|
||||
# Initialize series manager
|
||||
series_manager = SeriesManager()
|
||||
|
||||
# Series Creation Form
|
||||
with st.form("series_creation_form"):
|
||||
st.subheader("Create New Series")
|
||||
series_topic = st.text_input("Series Topic")
|
||||
num_pieces = st.slider("Number of pieces", 2, 10, 3)
|
||||
content_type = st.selectbox(
|
||||
"Content Type",
|
||||
options=[ct.name for ct in ContentType],
|
||||
key="series_content_type"
|
||||
)
|
||||
|
||||
# Multi-platform selection
|
||||
platforms = st.multiselect(
|
||||
"Target Platforms",
|
||||
options=[p.name for p in Platform],
|
||||
default=['WEBSITE'],
|
||||
key="series_platforms"
|
||||
)
|
||||
|
||||
# Schedule strategy
|
||||
schedule_strategy = st.selectbox(
|
||||
"Schedule Strategy",
|
||||
options=['linear', 'burst', 'custom'],
|
||||
help="Linear: Evenly spaced, Burst: Grouped together, Custom: Manual scheduling"
|
||||
)
|
||||
|
||||
# Series metadata
|
||||
with st.expander("Series Metadata"):
|
||||
target_audience = st.text_area("Target Audience")
|
||||
series_goals = st.multiselect(
|
||||
"Series Goals",
|
||||
options=['Awareness', 'Engagement', 'Conversion', 'Education'],
|
||||
default=['Awareness']
|
||||
)
|
||||
series_tone = st.select_slider(
|
||||
"Series Tone",
|
||||
options=['Professional', 'Casual', 'Friendly', 'Authoritative', 'Conversational'],
|
||||
value='Professional'
|
||||
)
|
||||
|
||||
submitted = st.form_submit_button("Generate Series")
|
||||
|
||||
if submitted and series_topic:
|
||||
with st.spinner("Generating content series..."):
|
||||
try:
|
||||
# Create series
|
||||
series_id = f"series_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
|
||||
# Prepare metadata with default values
|
||||
metadata = {
|
||||
'tone': series_tone,
|
||||
'length': 'medium', # Default length
|
||||
'engagement_goal': series_goals[0] if series_goals else 'Awareness',
|
||||
'creativity_level': 'balanced' # Default creativity level
|
||||
}
|
||||
|
||||
series = series_manager.create_series(
|
||||
series_id=series_id,
|
||||
topic=series_topic,
|
||||
num_pieces=num_pieces,
|
||||
content_type=ContentType[content_type],
|
||||
platforms=[Platform[p] for p in platforms],
|
||||
schedule_strategy=schedule_strategy,
|
||||
series_type=series_goals[0] if series_goals else 'Awareness',
|
||||
series_flow='sequential', # Default flow
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
if series:
|
||||
# Generate series content
|
||||
series_content = content_generator.generate_content(
|
||||
content_type=ContentType[content_type],
|
||||
topic=series_topic,
|
||||
platforms=[Platform[p] for p in platforms],
|
||||
num_pieces=num_pieces,
|
||||
requirements={
|
||||
'tone': series_tone,
|
||||
'length': metadata['length'],
|
||||
'engagement_goal': metadata['engagement_goal'],
|
||||
'creativity_level': metadata['creativity_level'],
|
||||
'series_type': metadata['engagement_goal'],
|
||||
'series_flow': 'sequential',
|
||||
'target_audience': target_audience
|
||||
}
|
||||
)
|
||||
|
||||
if series_content:
|
||||
# Add content pieces to series
|
||||
for piece in series_content:
|
||||
series_manager.add_piece(
|
||||
series_id=series['id'],
|
||||
piece=piece
|
||||
)
|
||||
|
||||
# Schedule series
|
||||
if schedule_strategy == 'linear':
|
||||
start_date = st.date_input("Start Date", datetime.now())
|
||||
interval = st.number_input("Days between pieces", min_value=1, value=7)
|
||||
series_manager.schedule_series(
|
||||
series_id=series['id'],
|
||||
start_date=start_date,
|
||||
interval_days=interval
|
||||
)
|
||||
elif schedule_strategy == 'burst':
|
||||
start_date = st.date_input("Start Date", datetime.now())
|
||||
burst_size = st.number_input("Burst Size", min_value=1, value=1)
|
||||
series_manager.schedule_series(
|
||||
series_id=series['id'],
|
||||
start_date=start_date,
|
||||
interval_days=1,
|
||||
burst_size=burst_size
|
||||
)
|
||||
else: # custom
|
||||
for i, piece in enumerate(series_manager.series_data[series['id']]['pieces']):
|
||||
piece['scheduled_date'] = st.date_input(
|
||||
f"Publish Date for Part {i+1}",
|
||||
datetime.now() + timedelta(days=i*7)
|
||||
)
|
||||
|
||||
if st.button("Save Schedule"):
|
||||
st.success("Series schedule saved!")
|
||||
|
||||
st.success(f"Generated {num_pieces} content pieces for series!")
|
||||
|
||||
# Display series preview
|
||||
with st.expander("Series Preview", expanded=True):
|
||||
for piece in series_manager.series_data[series_id]['pieces']:
|
||||
st.markdown(f"### Part {piece['part_number']}")
|
||||
st.json(piece['content'])
|
||||
|
||||
# Platform-specific previews
|
||||
st.markdown("#### Platform Previews")
|
||||
for platform in platforms:
|
||||
with st.expander(f"{platform} Preview"):
|
||||
st.write(piece['content'].get('platform_previews', {}).get(platform, 'No preview available'))
|
||||
|
||||
# Series performance tracking
|
||||
st.subheader("Series Performance")
|
||||
performance_data = series_manager.get_series_performance(series_id)
|
||||
if performance_data:
|
||||
st.write("### Overall Performance")
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
st.metric("Total Engagement", f"{performance_data['overall']['total_engagement']:.1f}%")
|
||||
with col2:
|
||||
st.metric("Total Reach", f"{performance_data['overall']['total_reach']:,}")
|
||||
with col3:
|
||||
st.metric("Conversion Rate", f"{performance_data['overall']['conversion_rate']:.1f}%")
|
||||
|
||||
# Platform-specific performance
|
||||
st.write("### Platform Performance")
|
||||
for platform in platforms:
|
||||
with st.expander(f"{platform} Performance"):
|
||||
platform_data = performance_data['platforms'].get(platform, {})
|
||||
st.write(f"Engagement: {platform_data.get('engagement', 0):.1f}%")
|
||||
st.write(f"Reach: {platform_data.get('reach', 0):,}")
|
||||
st.write(f"Conversions: {platform_data.get('conversion_rate', 0):.1f}%")
|
||||
|
||||
# Performance trends
|
||||
st.write("### Performance Trends")
|
||||
trend_data = performance_data['trends']
|
||||
st.line_chart(pd.DataFrame({
|
||||
'Engagement': trend_data['engagement'],
|
||||
'Reach': trend_data['reach'],
|
||||
'Conversions': trend_data['conversions']
|
||||
}))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating series: {str(e)}", exc_info=True)
|
||||
st.error(f"Error generating series: {str(e)}")
|
||||
|
||||
# Display existing series
|
||||
if st.session_state.content_series:
|
||||
st.subheader("Existing Series")
|
||||
for series_id, series in st.session_state.content_series.items():
|
||||
with st.expander(f"Series: {series['topic']}"):
|
||||
st.write(f"Status: {series['status']}")
|
||||
st.write(f"Pieces: {len(series['pieces'])}")
|
||||
st.write(f"Created: {series['created_at']}")
|
||||
|
||||
# Series actions
|
||||
if st.button(f"View Details", key=f"view_{series_id}"):
|
||||
st.session_state.selected_series = series_id
|
||||
|
||||
if st.button(f"Delete Series", key=f"delete_{series_id}"):
|
||||
del st.session_state.content_series[series_id]
|
||||
st.rerun()
|
||||
|
||||
def on_series_complete():
|
||||
"""Handle series completion."""
|
||||
try:
|
||||
st.session_state.series_complete = True
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling series completion: {str(e)}")
|
||||
st.error("An error occurred while completing the series. Please try again.")
|
||||
@@ -1,81 +0,0 @@
|
||||
import streamlit as st
|
||||
from typing import Dict, Any
|
||||
from lib.database.models import ContentItem
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def render_performance_insights(content_item: ContentItem, platform_adapter) -> None:
|
||||
"""Render performance insights for a content item."""
|
||||
try:
|
||||
logger.info(f"Rendering performance insights for: {content_item.title}")
|
||||
|
||||
# Get performance data from platform adapter
|
||||
performance_data = platform_adapter.get_content_performance(content_item)
|
||||
|
||||
if not performance_data:
|
||||
st.warning("No performance data available for this content")
|
||||
return
|
||||
|
||||
# Create metrics section
|
||||
st.subheader("Performance Metrics")
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
st.metric(
|
||||
"Engagement Rate",
|
||||
f"{performance_data.get('engagement_rate', 0):.1f}%",
|
||||
f"{performance_data.get('engagement_rate_change', 0):+.1f}%"
|
||||
)
|
||||
|
||||
with col2:
|
||||
st.metric(
|
||||
"Reach",
|
||||
f"{performance_data.get('reach', 0):,}",
|
||||
f"{performance_data.get('reach_change', 0):+,}"
|
||||
)
|
||||
|
||||
with col3:
|
||||
st.metric(
|
||||
"Conversion Rate",
|
||||
f"{performance_data.get('conversion_rate', 0):.1f}%",
|
||||
f"{performance_data.get('conversion_rate_change', 0):+.1f}%"
|
||||
)
|
||||
|
||||
# Create audience insights section
|
||||
st.subheader("Audience Insights")
|
||||
audience_data = performance_data.get('audience_insights', {})
|
||||
|
||||
if audience_data:
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.write("Demographics")
|
||||
st.write(f"- Age: {audience_data.get('age_range', 'N/A')}")
|
||||
st.write(f"- Gender: {audience_data.get('gender', 'N/A')}")
|
||||
st.write(f"- Location: {audience_data.get('location', 'N/A')}")
|
||||
|
||||
with col2:
|
||||
st.write("Behavior")
|
||||
st.write(f"- Peak Time: {audience_data.get('peak_time', 'N/A')}")
|
||||
st.write(f"- Device: {audience_data.get('device', 'N/A')}")
|
||||
st.write(f"- Platform: {audience_data.get('platform', 'N/A')}")
|
||||
|
||||
# Create content insights section
|
||||
st.subheader("Content Insights")
|
||||
content_insights = performance_data.get('content_insights', {})
|
||||
|
||||
if content_insights:
|
||||
st.write("Top Performing Elements")
|
||||
for element, score in content_insights.get('top_elements', {}).items():
|
||||
st.write(f"- {element}: {score}")
|
||||
|
||||
st.write("Improvement Suggestions")
|
||||
for suggestion in content_insights.get('suggestions', []):
|
||||
st.write(f"- {suggestion}")
|
||||
|
||||
logger.info(f"Performance insights rendered successfully for: {content_item.title}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rendering performance insights: {str(e)}", exc_info=True)
|
||||
st.error(f"Error rendering performance insights: {str(e)}")
|
||||
@@ -1,638 +0,0 @@
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
import sys
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
from .calendar_view import render_calendar_view
|
||||
from .filters import render_filters
|
||||
from .add_content_modal import render_add_content_modal
|
||||
from .ai_suggestions_modal import render_ai_suggestions_modal
|
||||
from .components.content_optimization import render_content_optimization
|
||||
from .components.ab_testing import render_ab_testing
|
||||
from .components.content_series import render_content_series_generator
|
||||
from .components.performance_insights import render_performance_insights
|
||||
import json
|
||||
from lib.content_scheduler.ui.dashboard import run_dashboard as run_scheduler_dashboard
|
||||
|
||||
# Add parent directory to path to import existing tools
|
||||
parent_dir = str(Path(__file__).parent.parent.parent.parent)
|
||||
if parent_dir not in sys.path:
|
||||
sys.path.append(parent_dir)
|
||||
|
||||
from lib.database.models import ContentItem, ContentType, Platform, get_engine, get_session, init_db
|
||||
from ..core.calendar_manager import CalendarManager
|
||||
from ..core.content_generator import ContentGenerator
|
||||
from ..core.ai_generator import AIGenerator
|
||||
from ..core.content_brief import ContentBriefGenerator
|
||||
from ..integrations.seo_optimizer import SEOOptimizer
|
||||
from lib.integrations.platform_adapters import PlatformAdapter, UnifiedPlatformAdapter
|
||||
|
||||
# Initialize logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize DB/session (do this once at app startup)
|
||||
engine = get_engine()
|
||||
init_db(engine)
|
||||
session = get_session(engine)
|
||||
|
||||
# Import content repurposing UI with error handling
|
||||
def render_smart_repurposing_tab():
|
||||
"""Render the Smart Content Repurposing tab with error handling."""
|
||||
try:
|
||||
from lib.ai_seo_tools.content_calendar.ui.components.content_repurposing_ui import render_content_repurposing_ui
|
||||
render_content_repurposing_ui()
|
||||
except ImportError as e:
|
||||
st.error(f"Smart Content Repurposing feature is not available: {str(e)}")
|
||||
st.info("Please ensure all dependencies are installed correctly.")
|
||||
except Exception as e:
|
||||
st.error(f"Error loading Smart Content Repurposing: {str(e)}")
|
||||
st.info("Please check the logs for more details.")
|
||||
|
||||
class ContentCalendarDashboard:
|
||||
"""Interactive dashboard for content calendar management."""
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger('content_calendar.dashboard')
|
||||
self.logger.info("Initializing ContentCalendarDashboard")
|
||||
self.content_brief_generator = ContentBriefGenerator()
|
||||
self.content_generator = ContentGenerator()
|
||||
self.ai_generator = AIGenerator()
|
||||
self.platform_adapter = UnifiedPlatformAdapter()
|
||||
self.seo_optimizer = SEOOptimizer()
|
||||
# Initialize session state variables
|
||||
if 'ab_test_results' not in st.session_state:
|
||||
st.session_state.ab_test_results = {}
|
||||
if 'optimization_history' not in st.session_state:
|
||||
st.session_state.optimization_history = {}
|
||||
if 'calendar_data' not in st.session_state:
|
||||
st.session_state.calendar_data = None
|
||||
if 'selected_content' not in st.session_state:
|
||||
st.session_state.selected_content = None
|
||||
if 'view_mode' not in st.session_state:
|
||||
st.session_state.view_mode = 'day'
|
||||
if 'selected_date' not in st.session_state:
|
||||
st.session_state.selected_date = datetime.now()
|
||||
self.logger.info("ContentCalendarDashboard initialized successfully")
|
||||
|
||||
def render(self):
|
||||
self.logger.info("Starting dashboard render (tabbed UI)")
|
||||
try:
|
||||
self._inject_custom_css()
|
||||
st.title("AI Content Planning")
|
||||
st.markdown("""
|
||||
Plan, schedule, and manage your content strategy with AI-powered insights. Use the calendar to organize your content and leverage AI tools for optimization.
|
||||
""")
|
||||
tabs = st.tabs([
|
||||
"Content Planning",
|
||||
"Content Optimization",
|
||||
"🔄 Smart Repurposing",
|
||||
"A/B Testing",
|
||||
"Content Series",
|
||||
"Analytics",
|
||||
"Content Scheduling"
|
||||
])
|
||||
with tabs[0]:
|
||||
icon_map = {
|
||||
'Blog': '📝', 'Website': '🌐', 'Instagram': '📸', 'Twitter': '🐦', 'LinkedIn': '💼', 'Facebook': '📘',
|
||||
'Article': '📄', 'Social Post': '💬', 'Video': '🎬', 'Newsletter': '✉️'
|
||||
}
|
||||
status_color = {
|
||||
'Draft': '#bdbdbd', 'Scheduled': '#1976d2', 'Published': '#43a047', 'Archived': '#757575'
|
||||
}
|
||||
calendar_data = self._get_calendar_data()
|
||||
def on_edit(row):
|
||||
try:
|
||||
st.session_state.editing_content = row
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling edit action: {str(e)}")
|
||||
st.error("An error occurred while editing content. Please try again.")
|
||||
def on_delete(row):
|
||||
try:
|
||||
self._delete_content(row)
|
||||
st.success(f"Successfully deleted content: {row['title']}")
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
logger.error(f"Error handling delete action: {str(e)}")
|
||||
st.error("An error occurred while deleting content. Please try again.")
|
||||
def on_generate(row):
|
||||
st.session_state['show_ai_modal'] = True
|
||||
st.session_state['ai_modal_topic'] = row['title']
|
||||
st.session_state['ai_modal_type'] = str(row['type'])
|
||||
st.session_state['ai_modal_platform'] = str(row['platform'])
|
||||
st.rerun()
|
||||
render_calendar_view(
|
||||
calendar_data=calendar_data,
|
||||
icon_map=icon_map,
|
||||
status_color=status_color,
|
||||
on_edit=on_edit,
|
||||
on_delete=on_delete,
|
||||
on_generate=on_generate,
|
||||
get_item_key=self._get_item_key
|
||||
)
|
||||
st.markdown("---")
|
||||
render_filters()
|
||||
def handle_add_content(title, platform, content_type, publish_date):
|
||||
self._add_content({
|
||||
'title': title,
|
||||
'platform': platform,
|
||||
'type': content_type,
|
||||
'publish_date': publish_date
|
||||
})
|
||||
st.session_state['show_add_content_dialog'] = False
|
||||
st.success("Content added!")
|
||||
st.rerun()
|
||||
def handle_generate_with_ai(title, platform, content_type):
|
||||
st.session_state['show_add_content_dialog'] = False
|
||||
st.session_state['show_ai_modal'] = True
|
||||
st.session_state['ai_modal_topic'] = title
|
||||
st.session_state['ai_modal_type'] = content_type
|
||||
st.session_state['ai_modal_platform'] = platform
|
||||
render_add_content_modal(
|
||||
selected_date=st.session_state.selected_date,
|
||||
on_add_content=handle_add_content,
|
||||
on_generate_with_ai=handle_generate_with_ai
|
||||
)
|
||||
if st.session_state.get('show_ai_modal', False):
|
||||
st.markdown("### AI Content Suggestions")
|
||||
with st.container():
|
||||
render_ai_suggestions_modal(
|
||||
generate_ai_suggestions=self._generate_ai_suggestions,
|
||||
on_create_brief=self._create_content_brief,
|
||||
on_schedule=self._schedule_content,
|
||||
on_refine=self._refine_suggestion,
|
||||
on_customize=self._customize_suggestion
|
||||
)
|
||||
if st.button("Close"):
|
||||
st.session_state['show_ai_modal'] = False
|
||||
with tabs[1]:
|
||||
render_content_optimization(
|
||||
content_generator=self.content_generator,
|
||||
ai_generator=self.ai_generator,
|
||||
seo_optimizer=self.seo_optimizer
|
||||
)
|
||||
with tabs[2]:
|
||||
render_smart_repurposing_tab()
|
||||
with tabs[3]:
|
||||
render_ab_testing(self.content_generator, None)
|
||||
with tabs[4]:
|
||||
render_content_series_generator(
|
||||
self.ai_generator,
|
||||
self.content_generator,
|
||||
self.seo_optimizer
|
||||
)
|
||||
with tabs[5]:
|
||||
st.header("Analytics")
|
||||
st.markdown("### Performance Insights")
|
||||
all_content = session.query(ContentItem).all()
|
||||
selected_content = st.selectbox(
|
||||
"Select content to analyze",
|
||||
options=[item.title for item in all_content],
|
||||
key="analytics_content_select"
|
||||
)
|
||||
if selected_content:
|
||||
content_item = next(
|
||||
item for item in all_content
|
||||
if item.title == selected_content
|
||||
)
|
||||
render_performance_insights(content_item, self.platform_adapter)
|
||||
st.markdown("### Optimization History")
|
||||
if selected_content in st.session_state.optimization_history:
|
||||
st.json(st.session_state.optimization_history[selected_content])
|
||||
with tabs[6]:
|
||||
run_scheduler_dashboard()
|
||||
self.logger.info("Dashboard render completed successfully (tabbed UI)")
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering dashboard: {str(e)}", exc_info=True)
|
||||
st.error(f"An error occurred: {str(e)}")
|
||||
|
||||
def _inject_custom_css(self):
|
||||
st.markdown("""
|
||||
<style>
|
||||
/* Add your custom CSS here if needed */
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
def _get_calendar_data(self):
|
||||
self.logger.info("_get_calendar_data called")
|
||||
try:
|
||||
all_content = session.query(ContentItem).all()
|
||||
data = []
|
||||
for item in all_content:
|
||||
data.append({
|
||||
'date': item.publish_date,
|
||||
'title': item.title,
|
||||
'platform': item.platforms[0] if item.platforms else 'Unknown',
|
||||
'type': item.content_type.value if hasattr(item.content_type, 'value') else str(item.content_type),
|
||||
'status': item.status
|
||||
})
|
||||
df = pd.DataFrame(data) if data else None
|
||||
return df
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error loading calendar data: {str(e)}", exc_info=True)
|
||||
st.error(f"Error loading calendar data: {str(e)}")
|
||||
return None
|
||||
|
||||
def _add_content(self, content):
|
||||
platform_map = {
|
||||
'Blog': Platform.WEBSITE,
|
||||
'Instagram': Platform.INSTAGRAM,
|
||||
'Twitter': Platform.TWITTER,
|
||||
'LinkedIn': Platform.LINKEDIN,
|
||||
'Facebook': Platform.FACEBOOK,
|
||||
}
|
||||
platform_enum = platform_map.get(content['platform'], Platform.WEBSITE)
|
||||
content_type_map = {
|
||||
'Article': ContentType.BLOG_POST,
|
||||
'Social Post': ContentType.SOCIAL_MEDIA,
|
||||
'Video': ContentType.VIDEO,
|
||||
'Newsletter': ContentType.NEWSLETTER,
|
||||
}
|
||||
content_type_enum = content_type_map.get(content['type'], ContentType.BLOG_POST)
|
||||
new_item = ContentItem(
|
||||
title=content['title'],
|
||||
description="",
|
||||
content_type=content_type_enum,
|
||||
platforms=[platform_enum.value],
|
||||
publish_date=pd.to_datetime(content['publish_date']),
|
||||
status=content.get('status', 'Draft'),
|
||||
author=None,
|
||||
tags=[],
|
||||
notes=None,
|
||||
seo_data={}
|
||||
)
|
||||
session.add(new_item)
|
||||
session.commit()
|
||||
|
||||
def _delete_content(self, row):
|
||||
# Find by title and publish_date (could be improved with unique IDs)
|
||||
all_content = session.query(ContentItem).all()
|
||||
for item in all_content:
|
||||
if (item.title == row['title'] and
|
||||
str(item.publish_date.date()) == str(row['date'].date()) and
|
||||
(item.platforms[0] if item.platforms else 'Unknown') == str(row['platform']) and
|
||||
(item.content_type.value if hasattr(item.content_type, 'value') else str(item.content_type)) == str(row['type'])):
|
||||
session.delete(item)
|
||||
session.commit()
|
||||
break
|
||||
|
||||
def _edit_content(self, row, new_title, new_platform, new_type, new_status):
|
||||
self._delete_content(row)
|
||||
self._add_content({
|
||||
'title': new_title,
|
||||
'platform': new_platform,
|
||||
'type': new_type,
|
||||
'publish_date': row['date'],
|
||||
'status': new_status
|
||||
})
|
||||
|
||||
def _get_item_key(self, row):
|
||||
key_str = f"{row['title']}_{row['date']}_{row['platform']}_{row['type']}"
|
||||
return hashlib.md5(key_str.encode()).hexdigest()
|
||||
|
||||
def _generate_ai_suggestions(self, content_type, topic, audience, goals, tone, length, model_settings, style_preferences, seo_preferences, platform_settings):
|
||||
"""Generate AI content suggestions based on input parameters."""
|
||||
try:
|
||||
self.logger.info(f"Generating AI suggestions for topic: {topic}")
|
||||
|
||||
# Map content type string to ContentType enum
|
||||
content_type_map = {
|
||||
'Blog Post': ContentType.BLOG_POST,
|
||||
'Social Media Post': ContentType.SOCIAL_MEDIA,
|
||||
'Video': ContentType.VIDEO,
|
||||
'Newsletter': ContentType.NEWSLETTER,
|
||||
'Article': ContentType.BLOG_POST,
|
||||
'Social Post': ContentType.SOCIAL_MEDIA
|
||||
}
|
||||
content_type_enum = content_type_map.get(content_type, ContentType.BLOG_POST)
|
||||
|
||||
# Map platform string to Platform enum
|
||||
platform_map = {
|
||||
'Blog': Platform.WEBSITE,
|
||||
'Instagram': Platform.INSTAGRAM,
|
||||
'Twitter': Platform.TWITTER,
|
||||
'LinkedIn': Platform.LINKEDIN,
|
||||
'Facebook': Platform.FACEBOOK,
|
||||
'Website': Platform.WEBSITE
|
||||
}
|
||||
platform = st.session_state.get('ai_modal_platform', 'Blog')
|
||||
platform_enum = platform_map.get(platform, Platform.WEBSITE)
|
||||
|
||||
# Create a content item for the suggestion
|
||||
content_item = ContentItem(
|
||||
title=topic,
|
||||
description="",
|
||||
content_type=content_type_enum,
|
||||
platforms=[platform_enum],
|
||||
publish_date=datetime.now(),
|
||||
seo_data=SEOData(
|
||||
title=topic,
|
||||
meta_description="",
|
||||
keywords=[],
|
||||
structured_data={}
|
||||
),
|
||||
status='Draft'
|
||||
)
|
||||
|
||||
# Use AIGenerator to generate suggestions
|
||||
suggestions = self.ai_generator.generate_ai_suggestions(
|
||||
content_type=content_type_enum,
|
||||
topic=topic,
|
||||
audience=audience,
|
||||
goals=goals,
|
||||
tone=tone,
|
||||
length=length,
|
||||
model_settings=model_settings,
|
||||
style_preferences=style_preferences,
|
||||
seo_preferences=seo_preferences,
|
||||
platform_settings=platform_settings,
|
||||
platform=platform_enum
|
||||
)
|
||||
|
||||
if not suggestions:
|
||||
self.logger.warning("No suggestions generated")
|
||||
return []
|
||||
|
||||
# Format suggestions
|
||||
formatted_suggestions = []
|
||||
for suggestion in suggestions:
|
||||
formatted_suggestion = {
|
||||
'title': suggestion.get('title', topic),
|
||||
'type': content_type,
|
||||
'platform': platform,
|
||||
'audience': audience,
|
||||
'impact': f"High impact for {', '.join(goals)}",
|
||||
'preview': suggestion.get('preview', ''),
|
||||
'style_elements': [
|
||||
f"Tone: {tone}",
|
||||
f"Length: {length}",
|
||||
f"Creativity: {model_settings.get('Creativity Level', 'balanced')}",
|
||||
f"Formality: {model_settings.get('Formality Level', 'professional')}"
|
||||
],
|
||||
'seo_elements': [
|
||||
f"Keyword Density: {seo_preferences.get('Keyword Density', '2')}%",
|
||||
"Internal Linking: Enabled" if seo_preferences.get('Internal Linking', True) else "Internal Linking: Disabled",
|
||||
"External Linking: Enabled" if seo_preferences.get('External Linking', True) else "External Linking: Disabled"
|
||||
],
|
||||
'engagement_score': f"{85 + len(formatted_suggestions)*5}%",
|
||||
'reach': 'High',
|
||||
'conversion': f"{3.5 + len(formatted_suggestions)*0.5}%",
|
||||
'seo_impact': 'Strong',
|
||||
'platform_optimizations': suggestion.get('platform_optimizations', []),
|
||||
'variations': suggestion.get('variations', [
|
||||
"Alternative headline",
|
||||
"Different content angle",
|
||||
"Alternative format"
|
||||
]),
|
||||
'seo_recommendations': suggestion.get('seo_elements', []),
|
||||
'media_suggestions': suggestion.get('media_suggestions', [
|
||||
"Featured image",
|
||||
"Supporting graphics",
|
||||
"Social media visuals"
|
||||
])
|
||||
}
|
||||
formatted_suggestions.append(formatted_suggestion)
|
||||
|
||||
self.logger.info(f"Generated {len(formatted_suggestions)} suggestions successfully")
|
||||
return formatted_suggestions
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error generating AI suggestions: {str(e)}", exc_info=True)
|
||||
st.error(f"Error generating suggestions: {str(e)}")
|
||||
return []
|
||||
|
||||
def _create_content_brief(self, content_item: ContentItem) -> Dict[str, Any]:
|
||||
"""Create a detailed content brief for the given content item."""
|
||||
try:
|
||||
self.logger.info(f"Creating content brief for: {content_item.title}")
|
||||
|
||||
# Generate content brief using the content brief generator
|
||||
brief = self.content_brief_generator.generate_brief(
|
||||
content_item=content_item,
|
||||
target_audience={
|
||||
'audience': content_item.description,
|
||||
'goals': ['engage', 'inform', 'convert']
|
||||
}
|
||||
)
|
||||
|
||||
# Enhance brief with SEO data
|
||||
if brief and 'content_flow' in brief:
|
||||
brief['seo_optimization'] = {
|
||||
'meta_description': self.seo_optimizer.generate_meta_description(
|
||||
brief['content_flow'].get('introduction', {}).get('summary', '')
|
||||
),
|
||||
'keywords': self.seo_optimizer.extract_keywords(
|
||||
brief['content_flow'].get('introduction', {}).get('summary', '')
|
||||
),
|
||||
'structured_data': self.seo_optimizer.generate_structured_data(
|
||||
content_item.content_type
|
||||
)
|
||||
}
|
||||
|
||||
self.logger.info(f"Content brief created successfully for: {content_item.title}")
|
||||
return brief
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error creating content brief: {str(e)}", exc_info=True)
|
||||
st.error(f"Error creating content brief: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _schedule_content(self, content_item: ContentItem, publish_date: datetime) -> bool:
|
||||
"""Schedule content for publishing on the specified date."""
|
||||
try:
|
||||
self.logger.info(f"Scheduling content: {content_item.title} for {publish_date}")
|
||||
|
||||
# Get the calendar
|
||||
calendar = self.calendar_manager.get_calendar()
|
||||
if not calendar:
|
||||
raise ValueError("No calendar found")
|
||||
|
||||
# Update the publish date
|
||||
content_item.publish_date = publish_date
|
||||
|
||||
# Add to calendar
|
||||
calendar.add_content(content_item)
|
||||
|
||||
# Save changes
|
||||
self.calendar_manager.save_calendar_to_json()
|
||||
|
||||
self.logger.info(f"Content scheduled successfully: {content_item.title}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error scheduling content: {str(e)}", exc_info=True)
|
||||
st.error(f"Error scheduling content: {str(e)}")
|
||||
return False
|
||||
|
||||
def _refine_suggestion(self, suggestion: Dict[str, Any], feedback: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Refine an AI-generated suggestion based on user feedback."""
|
||||
try:
|
||||
self.logger.info("Refining AI suggestion based on feedback")
|
||||
|
||||
# Update suggestion based on feedback
|
||||
if 'tone' in feedback:
|
||||
suggestion['style_elements'] = [
|
||||
f"Tone: {feedback['tone']}",
|
||||
*[elem for elem in suggestion['style_elements'] if not elem.startswith('Tone:')]
|
||||
]
|
||||
|
||||
if 'length' in feedback:
|
||||
suggestion['style_elements'] = [
|
||||
f"Length: {feedback['length']}",
|
||||
*[elem for elem in suggestion['style_elements'] if not elem.startswith('Length:')]
|
||||
]
|
||||
|
||||
if 'keywords' in feedback:
|
||||
suggestion['seo_elements'] = [
|
||||
f"Keywords: {', '.join(feedback['keywords'])}",
|
||||
*[elem for elem in suggestion['seo_elements'] if not elem.startswith('Keywords:')]
|
||||
]
|
||||
|
||||
# Regenerate content with refined parameters
|
||||
refined_content = self.content_brief_generator.generate_brief(
|
||||
content_item=ContentItem(
|
||||
title=suggestion['title'],
|
||||
description="",
|
||||
content_type=ContentType[suggestion['type'].upper().replace(' ', '_')],
|
||||
platforms=[Platform[suggestion['platform'].upper()]],
|
||||
publish_date=datetime.now(),
|
||||
seo_data=SEOData(
|
||||
title=suggestion['title'],
|
||||
meta_description="",
|
||||
keywords=feedback.get('keywords', []),
|
||||
structured_data={}
|
||||
),
|
||||
status='Draft'
|
||||
),
|
||||
target_audience={
|
||||
'audience': suggestion['audience'],
|
||||
'goals': feedback.get('goals', ['engage', 'inform']),
|
||||
'preferences': {
|
||||
'tone': feedback.get('tone', 'professional'),
|
||||
'length': feedback.get('length', 'medium')
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
if refined_content:
|
||||
suggestion['preview'] = refined_content.get('content_flow', {}).get('introduction', {}).get('summary', '')
|
||||
|
||||
self.logger.info("Suggestion refined successfully")
|
||||
return suggestion
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error refining suggestion: {str(e)}", exc_info=True)
|
||||
st.error(f"Error refining suggestion: {str(e)}")
|
||||
return suggestion
|
||||
|
||||
def _customize_suggestion(self, suggestion: Dict[str, Any], customizations: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Customize an AI-generated suggestion with specific requirements."""
|
||||
try:
|
||||
self.logger.info("Customizing AI suggestion")
|
||||
|
||||
# Apply customizations
|
||||
if 'title' in customizations:
|
||||
suggestion['title'] = customizations['title']
|
||||
|
||||
if 'platform' in customizations:
|
||||
suggestion['platform'] = customizations['platform']
|
||||
|
||||
if 'style' in customizations:
|
||||
suggestion['style_elements'] = [
|
||||
f"Tone: {customizations['style'].get('tone', 'professional')}",
|
||||
f"Length: {customizations['style'].get('length', 'medium')}",
|
||||
f"Creativity: {customizations['style'].get('creativity', 'balanced')}",
|
||||
f"Formality: {customizations['style'].get('formality', 'professional')}"
|
||||
]
|
||||
|
||||
if 'seo' in customizations:
|
||||
suggestion['seo_elements'] = [
|
||||
f"Keyword Density: {customizations['seo'].get('keyword_density', '2')}%",
|
||||
"Internal Linking: Enabled" if customizations['seo'].get('internal_linking', True) else "Internal Linking: Disabled",
|
||||
"External Linking: Enabled" if customizations['seo'].get('external_linking', True) else "External Linking: Disabled"
|
||||
]
|
||||
|
||||
# Regenerate content with customizations
|
||||
customized_content = self.content_brief_generator.generate_brief(
|
||||
content_item=ContentItem(
|
||||
title=suggestion['title'],
|
||||
description="",
|
||||
content_type=ContentType[suggestion['type'].upper().replace(' ', '_')],
|
||||
platforms=[Platform[suggestion['platform'].upper()]],
|
||||
publish_date=datetime.now(),
|
||||
seo_data=SEOData(
|
||||
title=suggestion['title'],
|
||||
meta_description="",
|
||||
keywords=customizations.get('seo', {}).get('keywords', []),
|
||||
structured_data={}
|
||||
),
|
||||
status='Draft'
|
||||
),
|
||||
target_audience={
|
||||
'audience': suggestion['audience'],
|
||||
'goals': customizations.get('goals', ['engage', 'inform']),
|
||||
'preferences': customizations.get('style', {})
|
||||
}
|
||||
)
|
||||
|
||||
if customized_content:
|
||||
suggestion['preview'] = customized_content.get('content_flow', {}).get('introduction', {}).get('summary', '')
|
||||
|
||||
self.logger.info("Suggestion customized successfully")
|
||||
return suggestion
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error customizing suggestion: {str(e)}", exc_info=True)
|
||||
st.error(f"Error customizing suggestion: {str(e)}")
|
||||
return suggestion
|
||||
|
||||
def _optimize_content_for_platform(self, content_item: ContentItem, platform: Platform) -> Dict[str, Any]:
|
||||
"""Optimize content specifically for a target platform."""
|
||||
try:
|
||||
self.logger.info(f"Optimizing content for {platform.name}: {content_item.title}")
|
||||
|
||||
# Get platform-specific requirements
|
||||
platform_requirements = self.platform_adapter.get_platform_requirements(platform)
|
||||
|
||||
# Generate platform-optimized content
|
||||
optimized_content = self.content_generator.optimize_for_platform(
|
||||
content=content_item,
|
||||
platform=platform,
|
||||
requirements=platform_requirements
|
||||
)
|
||||
|
||||
if not optimized_content:
|
||||
raise ValueError(f"Failed to optimize content for {platform.name}")
|
||||
|
||||
# Enhance with AI
|
||||
ai_enhanced = self.ai_generator.enhance_for_platform(
|
||||
content=optimized_content,
|
||||
platform=platform,
|
||||
enhancement_type='platform_specific'
|
||||
)
|
||||
|
||||
if ai_enhanced:
|
||||
optimized_content.update(ai_enhanced)
|
||||
|
||||
# Track optimization history
|
||||
if content_item.title not in st.session_state.optimization_history:
|
||||
st.session_state.optimization_history[content_item.title] = []
|
||||
st.session_state.optimization_history[content_item.title].append({
|
||||
'platform': platform.name,
|
||||
'timestamp': datetime.now(),
|
||||
'changes': optimized_content.get('changes', [])
|
||||
})
|
||||
|
||||
self.logger.info(f"Content optimized successfully for {platform.name}")
|
||||
return optimized_content
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error optimizing content: {str(e)}", exc_info=True)
|
||||
st.error(f"Error optimizing content: {str(e)}")
|
||||
return {}
|
||||
|
||||
if __name__ == "__main__":
|
||||
dashboard = ContentCalendarDashboard()
|
||||
dashboard.render()
|
||||
@@ -1,30 +0,0 @@
|
||||
import streamlit as st
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
def render_filters():
|
||||
with st.expander("Filters", expanded=False):
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
start_date = st.date_input("Start Date", st.session_state.get('filter_start_date', datetime.now()))
|
||||
end_date = st.date_input("End Date", st.session_state.get('filter_end_date', datetime.now() + timedelta(days=30)))
|
||||
st.session_state['filter_start_date'] = start_date
|
||||
st.session_state['filter_end_date'] = end_date
|
||||
with col2:
|
||||
platforms = st.multiselect(
|
||||
"Platforms",
|
||||
["Blog", "Instagram", "Twitter", "LinkedIn", "Facebook"],
|
||||
default=st.session_state.get('filter_platforms', ["Blog"])
|
||||
)
|
||||
st.session_state['filter_platforms'] = platforms
|
||||
content_types = st.multiselect(
|
||||
"Content Types",
|
||||
["Article", "Social Post", "Video", "Newsletter"],
|
||||
default=st.session_state.get('filter_content_types', ["Article"])
|
||||
)
|
||||
st.session_state['filter_content_types'] = content_types
|
||||
statuses = st.multiselect(
|
||||
"Status",
|
||||
["Draft", "Scheduled", "Published", "Archived"],
|
||||
default=st.session_state.get('filter_statuses', ["Draft", "Scheduled"])
|
||||
)
|
||||
st.session_state['filter_statuses'] = statuses
|
||||
@@ -1,198 +0,0 @@
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Any
|
||||
import calendar
|
||||
import random
|
||||
|
||||
def calculate_publish_dates(
|
||||
topics: List[Dict[str, Any]],
|
||||
start_date: datetime,
|
||||
duration: str
|
||||
) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""
|
||||
Calculate optimal publish dates for content topics.
|
||||
|
||||
Args:
|
||||
topics: List of content topics to schedule
|
||||
start_date: When to start publishing
|
||||
duration: How long the calendar should span ('weekly', 'monthly', 'quarterly')
|
||||
|
||||
Returns:
|
||||
Dictionary mapping dates to scheduled content
|
||||
"""
|
||||
# Calculate end date based on duration
|
||||
end_date = _calculate_end_date(start_date, duration)
|
||||
|
||||
# Get all dates in range
|
||||
dates = _get_dates_in_range(start_date, end_date)
|
||||
|
||||
# Calculate optimal posting frequency
|
||||
frequency = _calculate_posting_frequency(len(topics), len(dates))
|
||||
|
||||
# Schedule content
|
||||
schedule = _schedule_content(topics, dates, frequency)
|
||||
|
||||
return schedule
|
||||
|
||||
def _calculate_end_date(start_date: datetime, duration: str) -> datetime:
|
||||
"""Calculate end date based on duration."""
|
||||
if duration == 'weekly':
|
||||
return start_date + timedelta(days=7)
|
||||
elif duration == 'monthly':
|
||||
# Add one month
|
||||
if start_date.month == 12:
|
||||
return datetime(start_date.year + 1, 1, start_date.day)
|
||||
return datetime(start_date.year, start_date.month + 1, start_date.day)
|
||||
elif duration == 'quarterly':
|
||||
# Add three months
|
||||
new_month = start_date.month + 3
|
||||
new_year = start_date.year
|
||||
if new_month > 12:
|
||||
new_month -= 12
|
||||
new_year += 1
|
||||
return datetime(new_year, new_month, start_date.day)
|
||||
else:
|
||||
raise ValueError(f"Invalid duration: {duration}")
|
||||
|
||||
def _get_dates_in_range(
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> List[datetime]:
|
||||
"""Get all dates in the given range."""
|
||||
dates = []
|
||||
current_date = start_date
|
||||
|
||||
while current_date <= end_date:
|
||||
# Skip weekends
|
||||
if current_date.weekday() < 5: # 0-4 are weekdays
|
||||
dates.append(current_date)
|
||||
current_date += timedelta(days=1)
|
||||
|
||||
return dates
|
||||
|
||||
def _calculate_posting_frequency(
|
||||
num_topics: int,
|
||||
num_dates: int
|
||||
) -> Dict[str, int]:
|
||||
"""
|
||||
Calculate optimal posting frequency based on number of topics and dates.
|
||||
|
||||
Returns:
|
||||
Dictionary with posting frequency for each content type
|
||||
"""
|
||||
# Calculate base frequency
|
||||
base_frequency = num_dates / num_topics
|
||||
|
||||
# Adjust for content types
|
||||
return {
|
||||
'blog_post': max(1, int(base_frequency * 0.4)), # 40% of content
|
||||
'social_media': max(1, int(base_frequency * 0.3)), # 30% of content
|
||||
'video': max(1, int(base_frequency * 0.2)), # 20% of content
|
||||
'newsletter': max(1, int(base_frequency * 0.1)) # 10% of content
|
||||
}
|
||||
|
||||
def _schedule_content(
|
||||
topics: List[Dict[str, Any]],
|
||||
dates: List[datetime],
|
||||
frequency: Dict[str, int]
|
||||
) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""
|
||||
Schedule content topics across available dates.
|
||||
|
||||
Args:
|
||||
topics: List of content topics to schedule
|
||||
dates: Available dates for scheduling
|
||||
frequency: Posting frequency for each content type
|
||||
|
||||
Returns:
|
||||
Dictionary mapping dates to scheduled content
|
||||
"""
|
||||
schedule = {}
|
||||
current_date_index = 0
|
||||
|
||||
# Group topics by content type
|
||||
topics_by_type = _group_topics_by_type(topics)
|
||||
|
||||
# Schedule each content type
|
||||
for content_type, type_topics in topics_by_type.items():
|
||||
type_frequency = frequency.get(content_type, 1)
|
||||
|
||||
for topic in type_topics:
|
||||
# Find next available date
|
||||
while current_date_index < len(dates):
|
||||
date = dates[current_date_index]
|
||||
date_str = date.strftime('%Y-%m-%d')
|
||||
|
||||
# Check if date is available
|
||||
if date_str not in schedule:
|
||||
schedule[date_str] = []
|
||||
|
||||
# Add topic to schedule
|
||||
schedule[date_str].append(topic)
|
||||
|
||||
# Move to next date based on frequency
|
||||
current_date_index += type_frequency
|
||||
break
|
||||
|
||||
# If we've used all dates, wrap around
|
||||
if current_date_index >= len(dates):
|
||||
current_date_index = 0
|
||||
|
||||
return schedule
|
||||
|
||||
def _group_topics_by_type(
|
||||
topics: List[Dict[str, Any]]
|
||||
) -> Dict[str, List[Dict[str, Any]]]:
|
||||
"""Group topics by their content type."""
|
||||
grouped = {}
|
||||
|
||||
for topic in topics:
|
||||
content_type = topic.get('content_type', 'blog_post')
|
||||
if content_type not in grouped:
|
||||
grouped[content_type] = []
|
||||
grouped[content_type].append(topic)
|
||||
|
||||
return grouped
|
||||
|
||||
def get_optimal_posting_time(
|
||||
content_type: str,
|
||||
platform: str
|
||||
) -> datetime.time:
|
||||
"""
|
||||
Get optimal posting time for content type and platform.
|
||||
|
||||
Args:
|
||||
content_type: Type of content
|
||||
platform: Target platform
|
||||
|
||||
Returns:
|
||||
Optimal time to post
|
||||
"""
|
||||
# Default optimal times (can be customized based on platform analytics)
|
||||
optimal_times = {
|
||||
'blog_post': {
|
||||
'website': datetime.time(9, 0), # 9 AM
|
||||
'medium': datetime.time(10, 0) # 10 AM
|
||||
},
|
||||
'social_media': {
|
||||
'facebook': datetime.time(15, 0), # 3 PM
|
||||
'twitter': datetime.time(12, 0), # 12 PM
|
||||
'linkedin': datetime.time(8, 0), # 8 AM
|
||||
'instagram': datetime.time(19, 0) # 7 PM
|
||||
},
|
||||
'video': {
|
||||
'youtube': datetime.time(14, 0) # 2 PM
|
||||
},
|
||||
'newsletter': {
|
||||
'email': datetime.time(6, 0) # 6 AM
|
||||
}
|
||||
}
|
||||
|
||||
# Get optimal time for content type and platform
|
||||
content_times = optimal_times.get(content_type, {})
|
||||
optimal_time = content_times.get(platform)
|
||||
|
||||
if optimal_time is None:
|
||||
# Default to 9 AM if no specific time is set
|
||||
optimal_time = datetime.time(9, 0)
|
||||
|
||||
return optimal_time
|
||||
@@ -1,154 +0,0 @@
|
||||
import functools
|
||||
import logging
|
||||
from typing import Any, Callable, TypeVar, cast
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
def handle_calendar_error(func: Callable[..., T]) -> Callable[..., T]:
|
||||
"""
|
||||
Decorator to handle errors in calendar operations.
|
||||
|
||||
Args:
|
||||
func: Function to decorate
|
||||
|
||||
Returns:
|
||||
Decorated function with error handling
|
||||
"""
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args: Any, **kwargs: Any) -> T:
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except ValueError as e:
|
||||
logger.error(f"Invalid input in {func.__name__}: {str(e)}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error in {func.__name__}: {str(e)}")
|
||||
raise CalendarError(f"Calendar operation failed: {str(e)}")
|
||||
return cast(Callable[..., T], wrapper)
|
||||
|
||||
class CalendarError(Exception):
|
||||
"""Base exception for calendar-related errors."""
|
||||
pass
|
||||
|
||||
class ContentError(CalendarError):
|
||||
"""Exception for content-related errors."""
|
||||
pass
|
||||
|
||||
class SchedulingError(CalendarError):
|
||||
"""Exception for scheduling-related errors."""
|
||||
pass
|
||||
|
||||
class ValidationError(CalendarError):
|
||||
"""Exception for validation-related errors."""
|
||||
pass
|
||||
|
||||
def validate_date_range(
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> None:
|
||||
"""
|
||||
Validate date range for calendar operations.
|
||||
|
||||
Args:
|
||||
start_date: Start date
|
||||
end_date: End date
|
||||
|
||||
Raises:
|
||||
ValidationError: If date range is invalid
|
||||
"""
|
||||
if not isinstance(start_date, datetime):
|
||||
raise ValidationError("Start date must be a datetime object")
|
||||
|
||||
if not isinstance(end_date, datetime):
|
||||
raise ValidationError("End date must be a datetime object")
|
||||
|
||||
if start_date > end_date:
|
||||
raise ValidationError("Start date must be before end date")
|
||||
|
||||
if (end_date - start_date).days > 365:
|
||||
raise ValidationError("Calendar duration cannot exceed one year")
|
||||
|
||||
def validate_content_item(content: dict) -> None:
|
||||
"""
|
||||
Validate content item structure.
|
||||
|
||||
Args:
|
||||
content: Content item to validate
|
||||
|
||||
Raises:
|
||||
ValidationError: If content item is invalid
|
||||
"""
|
||||
required_fields = ['title', 'description', 'content_type', 'platforms']
|
||||
|
||||
for field in required_fields:
|
||||
if field not in content:
|
||||
raise ValidationError(f"Missing required field: {field}")
|
||||
|
||||
if not isinstance(content['platforms'], list):
|
||||
raise ValidationError("Platforms must be a list")
|
||||
|
||||
if not content['platforms']:
|
||||
raise ValidationError("At least one platform must be specified")
|
||||
|
||||
def validate_calendar_duration(duration: str) -> None:
|
||||
"""
|
||||
Validate calendar duration.
|
||||
|
||||
Args:
|
||||
duration: Duration to validate ('weekly', 'monthly', 'quarterly')
|
||||
|
||||
Raises:
|
||||
ValidationError: If duration is invalid
|
||||
"""
|
||||
valid_durations = ['weekly', 'monthly', 'quarterly']
|
||||
|
||||
if duration not in valid_durations:
|
||||
raise ValidationError(
|
||||
f"Invalid duration: {duration}. "
|
||||
f"Must be one of: {', '.join(valid_durations)}"
|
||||
)
|
||||
|
||||
def log_calendar_operation(
|
||||
operation: str,
|
||||
details: dict
|
||||
) -> None:
|
||||
"""
|
||||
Log calendar operation details.
|
||||
|
||||
Args:
|
||||
operation: Name of the operation
|
||||
details: Operation details to log
|
||||
"""
|
||||
logger.info(f"Calendar operation: {operation}")
|
||||
logger.debug(f"Operation details: {details}")
|
||||
|
||||
def handle_api_error(
|
||||
error: Exception,
|
||||
operation: str
|
||||
) -> None:
|
||||
"""
|
||||
Handle API-related errors.
|
||||
|
||||
Args:
|
||||
error: The error that occurred
|
||||
operation: The operation that failed
|
||||
"""
|
||||
logger.error(f"API error in {operation}: {str(error)}")
|
||||
raise CalendarError(f"API operation failed: {str(error)}")
|
||||
|
||||
def handle_integration_error(
|
||||
error: Exception,
|
||||
integration: str
|
||||
) -> None:
|
||||
"""
|
||||
Handle integration-related errors.
|
||||
|
||||
Args:
|
||||
error: The error that occurred
|
||||
integration: The integration that failed
|
||||
"""
|
||||
logger.error(f"Integration error with {integration}: {str(error)}")
|
||||
raise CalendarError(f"Integration failed: {str(error)}")
|
||||
@@ -1,182 +0,0 @@
|
||||
# Content Gap Analysis Tool
|
||||
|
||||
A comprehensive AI-powered tool for analyzing content gaps and generating strategic content recommendations.
|
||||
|
||||
## Overview
|
||||
|
||||
The Content Gap Analysis tool combines multiple SEO tools to provide a complete analysis of your content strategy, identify opportunities, and generate actionable recommendations. It leverages existing AI SEO tools and adds new capabilities for comprehensive content analysis.
|
||||
|
||||
## Workflow Design
|
||||
|
||||
### 1. Website Analysis
|
||||
**Input:** Website URL
|
||||
**Tools Integration:**
|
||||
- `analyze_onpage_seo()`: Analyze content quality and structure
|
||||
- `url_seo_checker()`: Check technical SEO aspects
|
||||
- `google_pagespeed_insights()`: Assess page performance
|
||||
|
||||
**Analysis Components:**
|
||||
- Content structure mapping
|
||||
- Topic categorization
|
||||
- Content depth assessment
|
||||
- Performance metrics
|
||||
|
||||
### 2. Competitor Analysis
|
||||
**Input:** Competitor URLs
|
||||
**Tools Integration:**
|
||||
- `url_seo_checker()`: Analyze competitor URLs
|
||||
- `analyze_onpage_seo()`: Compare content quality
|
||||
- `ai_title_generator()`: Analyze title patterns
|
||||
|
||||
**Analysis Components:**
|
||||
- Content strategy comparison
|
||||
- Topic coverage gaps
|
||||
- Content format analysis
|
||||
- Title pattern analysis
|
||||
|
||||
### 3. Keyword Research
|
||||
**Input:** Industry/Niche
|
||||
**Tools Integration:**
|
||||
- `ai_title_generator()`: Generate keyword-based titles
|
||||
- `metadesc_generator_main()`: Analyze meta descriptions for keyword usage
|
||||
- `ai_structured_data()`: Check structured data implementation
|
||||
|
||||
**Analysis Components:**
|
||||
- Keyword opportunity identification
|
||||
- Search intent analysis
|
||||
- Content format suggestions
|
||||
- Topic clustering
|
||||
|
||||
### 4. AI-Powered Recommendations
|
||||
**Tools Integration:**
|
||||
- `ai_title_generator()`: Generate content titles
|
||||
- `metadesc_generator_main()`: Create content summaries
|
||||
- `ai_structured_data()`: Suggest structured data implementation
|
||||
|
||||
**Output Components:**
|
||||
- Content topic suggestions
|
||||
- Format recommendations
|
||||
- Priority scoring
|
||||
- Implementation timeline
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Phase 1: Core Infrastructure
|
||||
1. Create base classes and interfaces
|
||||
2. Implement data collection modules
|
||||
3. Set up AI model integration
|
||||
4. Develop data storage system
|
||||
|
||||
### Phase 2: Tool Integration
|
||||
1. Integrate existing SEO tools
|
||||
2. Create unified API for tool interaction
|
||||
3. Implement data sharing between tools
|
||||
4. Develop result aggregation system
|
||||
|
||||
### Phase 3: Analysis Engine
|
||||
1. Implement content structure analysis
|
||||
2. Develop competitor analysis algorithms
|
||||
3. Create keyword research system
|
||||
4. Build recommendation engine
|
||||
|
||||
### Phase 4: UI/UX Development
|
||||
1. Create step-by-step workflow interface
|
||||
2. Implement progress tracking
|
||||
3. Develop visualization components
|
||||
4. Add export functionality
|
||||
|
||||
## Technical Requirements
|
||||
|
||||
### Dependencies
|
||||
- Existing SEO tools from `lib/ai_seo_tools/`
|
||||
- AI models for content analysis
|
||||
- Web scraping capabilities
|
||||
- Data storage system
|
||||
|
||||
### File Structure
|
||||
```
|
||||
content_gap_analysis/
|
||||
├── __init__.py
|
||||
├── main.py
|
||||
├── website_analyzer.py
|
||||
├── competitor_analyzer.py
|
||||
├── keyword_researcher.py
|
||||
├── recommendation_engine.py
|
||||
├── utils/
|
||||
│ ├── __init__.py
|
||||
│ ├── data_collector.py
|
||||
│ ├── content_parser.py
|
||||
│ └── ai_processor.py
|
||||
└── tests/
|
||||
├── __init__.py
|
||||
├── test_website_analyzer.py
|
||||
├── test_competitor_analyzer.py
|
||||
└── test_keyword_researcher.py
|
||||
```
|
||||
|
||||
## Integration Points
|
||||
|
||||
### Existing Tools
|
||||
1. **On-Page SEO Analyzer**
|
||||
- Function: `analyze_onpage_seo()`
|
||||
- Purpose: Content quality assessment
|
||||
- Integration: Content structure analysis
|
||||
|
||||
2. **URL SEO Checker**
|
||||
- Function: `url_seo_checker()`
|
||||
- Purpose: Technical optimization
|
||||
- Integration: URL structure analysis
|
||||
|
||||
3. **Blog Title Generator**
|
||||
- Function: `ai_title_generator()`
|
||||
- Purpose: Content ideas
|
||||
- Integration: Keyword analysis
|
||||
|
||||
4. **Meta Description Generator**
|
||||
- Function: `metadesc_generator_main()`
|
||||
- Purpose: Content summaries
|
||||
- Integration: Content optimization
|
||||
|
||||
5. **Structured Data Generator**
|
||||
- Function: `ai_structured_data()`
|
||||
- Purpose: Rich snippets
|
||||
- Integration: Content enhancement
|
||||
|
||||
### New Components
|
||||
1. **Content Structure Analyzer**
|
||||
- Purpose: Map website content structure
|
||||
- Output: Content hierarchy and relationships
|
||||
|
||||
2. **Competitor Content Analyzer**
|
||||
- Purpose: Analyze competitor content strategy
|
||||
- Output: Content gaps and opportunities
|
||||
|
||||
3. **Keyword Opportunity Finder**
|
||||
- Purpose: Identify keyword gaps
|
||||
- Output: Keyword recommendations
|
||||
|
||||
4. **AI Recommendation Engine**
|
||||
- Purpose: Generate content recommendations
|
||||
- Output: Actionable content strategy
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
1. **Advanced Analytics**
|
||||
- Content performance tracking
|
||||
- ROI analysis
|
||||
- Trend prediction
|
||||
|
||||
2. **Automation Features**
|
||||
- Automated content planning
|
||||
- Schedule generation
|
||||
- Priority scoring
|
||||
|
||||
3. **Integration Expansion**
|
||||
- CMS integration
|
||||
- Analytics platform connection
|
||||
- Social media analysis
|
||||
|
||||
4. **AI Improvements**
|
||||
- Advanced topic modeling
|
||||
- Sentiment analysis
|
||||
- Content quality scoring
|
||||
@@ -1,36 +0,0 @@
|
||||
"""
|
||||
Content Gap Analysis Tool for Alwrity.
|
||||
"""
|
||||
|
||||
from .ui import ContentGapAnalysisUI
|
||||
from .main import ContentGapAnalysis
|
||||
from .keyword_researcher import KeywordResearcher
|
||||
from .competitor_analyzer import CompetitorAnalyzer
|
||||
from .website_analyzer import WebsiteAnalyzer
|
||||
from .recommendation_engine import RecommendationEngine
|
||||
from .utils.ai_processor import AIProcessor
|
||||
|
||||
__all__ = [
|
||||
'ContentGapAnalysisUI',
|
||||
'ContentGapAnalysis',
|
||||
'KeywordResearcher',
|
||||
'CompetitorAnalyzer',
|
||||
'WebsiteAnalyzer',
|
||||
'RecommendationEngine',
|
||||
'AIProcessor'
|
||||
]
|
||||
|
||||
def run_content_gap_analysis():
|
||||
"""Run the Content Gap Analysis tool."""
|
||||
# Initialize the UI with proper configuration
|
||||
ui = ContentGapAnalysisUI()
|
||||
|
||||
# Set up the page configuration
|
||||
st.set_page_config(
|
||||
page_title="Content Gap Analysis",
|
||||
page_icon="📊",
|
||||
layout="wide"
|
||||
)
|
||||
|
||||
# Run the UI
|
||||
ui.run()
|
||||
@@ -1,711 +0,0 @@
|
||||
"""
|
||||
Competitor analyzer for content gap analysis.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
import streamlit as st
|
||||
from collections import Counter, defaultdict
|
||||
from loguru import logger
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.data_collector import DataCollector
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.content_parser import ContentParser
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.ai_processor import AIProcessor, ProgressTracker
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/competitor_analyzer.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
class CompetitorAnalyzer:
|
||||
"""Analyzes competitor content and market position."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the competitor analyzer."""
|
||||
self.website_analyzer = WebsiteAnalyzer()
|
||||
self.ai_processor = AIProcessor()
|
||||
self.progress = ProgressTracker()
|
||||
|
||||
# Define analysis stages
|
||||
self.stages = {
|
||||
'competitor_analysis': {
|
||||
'name': 'Competitor Analysis',
|
||||
'steps': [
|
||||
'Initializing competitor analysis',
|
||||
'Analyzing competitor content',
|
||||
'Evaluating market position',
|
||||
'Identifying content gaps',
|
||||
'Generating competitive insights'
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("CompetitorAnalyzer initialized")
|
||||
|
||||
def analyze(self, competitor_urls: List[str], industry: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze competitor websites.
|
||||
|
||||
Args:
|
||||
competitor_urls: List of competitor URLs to analyze
|
||||
industry: Industry category
|
||||
|
||||
Returns:
|
||||
Dictionary containing competitor analysis results
|
||||
"""
|
||||
try:
|
||||
results = {
|
||||
'competitors': [],
|
||||
'market_position': {},
|
||||
'content_gaps': [],
|
||||
'advantages': []
|
||||
}
|
||||
|
||||
# Analyze each competitor
|
||||
for url in competitor_urls:
|
||||
competitor_analysis = self.website_analyzer.analyze_website(url)
|
||||
if competitor_analysis.get('success', False):
|
||||
results['competitors'].append({
|
||||
'url': url,
|
||||
'analysis': competitor_analysis['data']
|
||||
})
|
||||
|
||||
# Generate market position analysis using AI
|
||||
prompt = f"""Analyze the market position of competitors in the {industry} industry:
|
||||
|
||||
Competitor Analyses:
|
||||
{json.dumps(results['competitors'], indent=2)}
|
||||
|
||||
Provide:
|
||||
1. Market position analysis
|
||||
2. Content gaps
|
||||
3. Competitive advantages
|
||||
|
||||
Format the response as JSON with 'market_position', 'content_gaps', and 'advantages' keys."""
|
||||
|
||||
# Get AI analysis
|
||||
analysis = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt="You are an SEO expert specializing in competitive analysis.",
|
||||
response_format="json_object"
|
||||
)
|
||||
|
||||
if analysis:
|
||||
results['market_position'] = analysis.get('market_position', {})
|
||||
results['content_gaps'] = analysis.get('content_gaps', [])
|
||||
results['advantages'] = analysis.get('advantages', [])
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error analyzing competitors: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'competitors': [],
|
||||
'market_position': {},
|
||||
'content_gaps': [],
|
||||
'advantages': []
|
||||
}
|
||||
|
||||
def _analyze_competitor_content(self, competitor_urls: List[str]) -> Dict[str, Any]:
|
||||
"""Analyze competitor content."""
|
||||
try:
|
||||
content_analysis = {}
|
||||
|
||||
for url in competitor_urls:
|
||||
# Get AI analysis for each competitor
|
||||
analysis = self.ai_processor.analyze_content({
|
||||
'url': url,
|
||||
'content': {} # Content will be fetched by AI processor
|
||||
})
|
||||
|
||||
content_analysis[url] = {
|
||||
'content_metrics': analysis.get('content_metrics', {}),
|
||||
'content_evolution': analysis.get('content_evolution', {}),
|
||||
'topic_trends': analysis.get('topic_trends', {}),
|
||||
'performance_trends': analysis.get('performance_trends', {})
|
||||
}
|
||||
|
||||
return content_analysis
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing competitor content: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _evaluate_market_position(self, content_analysis: Dict[str, Any], industry: str) -> Dict[str, Any]:
|
||||
"""Evaluate market position."""
|
||||
try:
|
||||
market_position = {
|
||||
'industry_rank': 0,
|
||||
'content_quality_rank': 0,
|
||||
'market_share': 0,
|
||||
'competitive_advantages': [],
|
||||
'competitive_disadvantages': []
|
||||
}
|
||||
|
||||
# Calculate industry rank based on content quality
|
||||
content_quality_scores = [
|
||||
analysis.get('content_metrics', {}).get('quality_score', 0)
|
||||
for analysis in content_analysis.values()
|
||||
]
|
||||
|
||||
if content_quality_scores:
|
||||
market_position['content_quality_rank'] = sum(content_quality_scores) / len(content_quality_scores)
|
||||
|
||||
# Identify competitive advantages and disadvantages
|
||||
for url, analysis in content_analysis.items():
|
||||
quality_score = analysis.get('content_metrics', {}).get('quality_score', 0)
|
||||
|
||||
if quality_score > market_position['content_quality_rank']:
|
||||
market_position['competitive_advantages'].append({
|
||||
'url': url,
|
||||
'advantage': 'Higher content quality',
|
||||
'score': quality_score
|
||||
})
|
||||
elif quality_score < market_position['content_quality_rank']:
|
||||
market_position['competitive_disadvantages'].append({
|
||||
'url': url,
|
||||
'disadvantage': 'Lower content quality',
|
||||
'score': quality_score
|
||||
})
|
||||
|
||||
return market_position
|
||||
except Exception as e:
|
||||
st.error(f"Error evaluating market position: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _identify_content_gaps(self, content_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Identify content gaps."""
|
||||
try:
|
||||
content_gaps = []
|
||||
|
||||
# Analyze content coverage
|
||||
all_topics = set()
|
||||
for analysis in content_analysis.values():
|
||||
topics = analysis.get('topic_trends', {}).get('topics', [])
|
||||
all_topics.update(topics)
|
||||
|
||||
# Identify missing topics for each competitor
|
||||
for url, analysis in content_analysis.items():
|
||||
covered_topics = set(analysis.get('topic_trends', {}).get('topics', []))
|
||||
missing_topics = all_topics - covered_topics
|
||||
|
||||
if missing_topics:
|
||||
content_gaps.append({
|
||||
'url': url,
|
||||
'missing_topics': list(missing_topics),
|
||||
'gap_type': 'topic_coverage'
|
||||
})
|
||||
|
||||
return content_gaps
|
||||
except Exception as e:
|
||||
st.error(f"Error identifying content gaps: {str(e)}")
|
||||
return []
|
||||
|
||||
def _generate_competitive_insights(self, content_analysis: Dict[str, Any], market_position: Dict[str, Any], content_gaps: List[Dict[str, Any]]) -> List[str]:
|
||||
"""Generate competitive insights."""
|
||||
try:
|
||||
insights = []
|
||||
|
||||
# Market position insights
|
||||
if market_position.get('content_quality_rank', 0) > 80:
|
||||
insights.append("Strong market position with high content quality")
|
||||
elif market_position.get('content_quality_rank', 0) > 60:
|
||||
insights.append("Moderate market position with room for improvement")
|
||||
else:
|
||||
insights.append("Weak market position requiring significant improvement")
|
||||
|
||||
# Content gap insights
|
||||
if content_gaps:
|
||||
insights.append(f"Identified {len(content_gaps)} content gaps across competitors")
|
||||
|
||||
# Competitive advantage insights
|
||||
if market_position.get('competitive_advantages'):
|
||||
insights.append(f"Found {len(market_position['competitive_advantages'])} competitive advantages")
|
||||
|
||||
return insights
|
||||
except Exception as e:
|
||||
st.error(f"Error generating competitive insights: {str(e)}")
|
||||
return []
|
||||
|
||||
def _run_seo_analysis(self, url: str) -> dict:
|
||||
"""
|
||||
Run SEO analysis on competitor website.
|
||||
|
||||
Args:
|
||||
url (str): The URL to analyze
|
||||
|
||||
Returns:
|
||||
dict: SEO analysis results
|
||||
"""
|
||||
# Run website analysis using the new analyzer
|
||||
analysis = self.website_analyzer.analyze_website(url)
|
||||
|
||||
if not analysis.get('success', False):
|
||||
return {
|
||||
'error': analysis.get('error', 'Unknown error in SEO analysis'),
|
||||
'onpage_seo': {},
|
||||
'url_seo': {}
|
||||
}
|
||||
|
||||
# Extract SEO information from the analysis
|
||||
seo_info = analysis['data']['analysis']['seo_info']
|
||||
basic_info = analysis['data']['analysis']['basic_info']
|
||||
|
||||
return {
|
||||
'onpage_seo': {
|
||||
'meta_tags': seo_info.get('meta_tags', {}),
|
||||
'content': seo_info.get('content', {}),
|
||||
'recommendations': seo_info.get('recommendations', [])
|
||||
},
|
||||
'url_seo': {
|
||||
'title': basic_info.get('title', ''),
|
||||
'meta_description': basic_info.get('meta_description', ''),
|
||||
'has_robots_txt': bool(basic_info.get('robots_txt')),
|
||||
'has_sitemap': bool(basic_info.get('sitemap'))
|
||||
}
|
||||
}
|
||||
|
||||
def _analyze_title_patterns(self, url: str) -> dict:
|
||||
"""
|
||||
Analyze title patterns using the title generator.
|
||||
|
||||
Args:
|
||||
url (str): The URL to analyze
|
||||
|
||||
Returns:
|
||||
dict: Title pattern analysis results
|
||||
"""
|
||||
# Use title generator to analyze patterns
|
||||
title_analysis = ai_title_generator(url)
|
||||
|
||||
return {
|
||||
'patterns': title_analysis.get('patterns', {}),
|
||||
'suggestions': title_analysis.get('suggestions', [])
|
||||
}
|
||||
|
||||
def _compare_competitors(self, results: dict) -> dict:
|
||||
"""
|
||||
Compare results across all competitors.
|
||||
|
||||
Args:
|
||||
results (dict): Analysis results for all competitors
|
||||
|
||||
Returns:
|
||||
dict: Comparative analysis results
|
||||
"""
|
||||
comparison = {
|
||||
'content_comparison': self._compare_content(results),
|
||||
'seo_comparison': self._compare_seo(results),
|
||||
'title_comparison': self._compare_titles(results),
|
||||
'performance_metrics': self._compare_performance(results),
|
||||
'content_gaps': self._identify_content_gaps(results)
|
||||
}
|
||||
|
||||
# Add AI-enhanced insights
|
||||
comparison['ai_insights'] = self.ai_processor.analyze_competitor_comparison(comparison)
|
||||
|
||||
return comparison
|
||||
|
||||
def _compare_content(self, results: dict) -> dict:
|
||||
"""Compare content structure across competitors."""
|
||||
content_comparison = {
|
||||
'topic_distribution': self._analyze_topic_distribution(results),
|
||||
'content_depth': self._analyze_content_depth(results),
|
||||
'content_formats': self._analyze_content_formats(results),
|
||||
'content_quality': self._analyze_content_quality(results)
|
||||
}
|
||||
|
||||
return content_comparison
|
||||
|
||||
def _analyze_topic_distribution(self, results: dict) -> dict:
|
||||
"""Analyze topic distribution across competitors."""
|
||||
all_topics = []
|
||||
topic_frequency = Counter()
|
||||
|
||||
for url, data in results.items():
|
||||
topics = data['content_structure'].get('topics', [])
|
||||
all_topics.extend([t['topic'] for t in topics])
|
||||
topic_frequency.update([t['topic'] for t in topics])
|
||||
|
||||
return {
|
||||
'common_topics': [topic for topic, count in topic_frequency.most_common(10)],
|
||||
'unique_topics': list(set(all_topics)),
|
||||
'topic_frequency': dict(topic_frequency.most_common()),
|
||||
'topic_coverage': len(set(all_topics)) / len(all_topics) if all_topics else 0
|
||||
}
|
||||
|
||||
def _analyze_content_depth(self, results: dict) -> dict:
|
||||
"""Analyze content depth across competitors."""
|
||||
depth_metrics = {
|
||||
'word_counts': {},
|
||||
'section_counts': {},
|
||||
'heading_distribution': defaultdict(list),
|
||||
'content_hierarchy': {}
|
||||
}
|
||||
|
||||
for url, data in results.items():
|
||||
content_structure = data['content_structure']
|
||||
|
||||
# Word count analysis
|
||||
depth_metrics['word_counts'][url] = content_structure.get('text_statistics', {}).get('word_count', 0)
|
||||
|
||||
# Section analysis
|
||||
depth_metrics['section_counts'][url] = len(content_structure.get('sections', []))
|
||||
|
||||
# Heading distribution
|
||||
for level, count in content_structure.get('hierarchy', {}).get('heading_distribution', {}).items():
|
||||
depth_metrics['heading_distribution'][level].append(count)
|
||||
|
||||
# Content hierarchy
|
||||
depth_metrics['content_hierarchy'][url] = content_structure.get('hierarchy', {})
|
||||
|
||||
return depth_metrics
|
||||
|
||||
def _analyze_content_formats(self, results: dict) -> dict:
|
||||
"""Analyze content formats across competitors."""
|
||||
format_analysis = {
|
||||
'format_types': defaultdict(int),
|
||||
'format_distribution': defaultdict(list),
|
||||
'format_effectiveness': {}
|
||||
}
|
||||
|
||||
for url, data in results.items():
|
||||
sections = data['content_structure'].get('sections', [])
|
||||
|
||||
for section in sections:
|
||||
format_type = section.get('type', 'unknown')
|
||||
format_analysis['format_types'][format_type] += 1
|
||||
format_analysis['format_distribution'][format_type].append({
|
||||
'url': url,
|
||||
'heading': section.get('heading', ''),
|
||||
'word_count': section.get('word_count', 0)
|
||||
})
|
||||
|
||||
return format_analysis
|
||||
|
||||
def _analyze_content_quality(self, results: dict) -> dict:
|
||||
"""Analyze content quality across competitors."""
|
||||
quality_metrics = {
|
||||
'readability_scores': {},
|
||||
'content_structure_scores': {},
|
||||
'engagement_metrics': {},
|
||||
'overall_quality': {}
|
||||
}
|
||||
|
||||
for url, data in results.items():
|
||||
content_structure = data['content_structure']
|
||||
|
||||
# Readability analysis
|
||||
readability = content_structure.get('readability', {})
|
||||
quality_metrics['readability_scores'][url] = {
|
||||
'flesch_score': readability.get('flesch_score', 0),
|
||||
'avg_sentence_length': readability.get('avg_sentence_length', 0),
|
||||
'avg_word_length': readability.get('avg_word_length', 0)
|
||||
}
|
||||
|
||||
# Structure analysis
|
||||
hierarchy = content_structure.get('hierarchy', {})
|
||||
quality_metrics['content_structure_scores'][url] = {
|
||||
'has_proper_hierarchy': hierarchy.get('has_proper_hierarchy', False),
|
||||
'heading_distribution': hierarchy.get('heading_distribution', {}),
|
||||
'max_depth': hierarchy.get('max_depth', 0)
|
||||
}
|
||||
|
||||
return quality_metrics
|
||||
|
||||
def _compare_seo(self, results: dict) -> dict:
|
||||
"""Compare SEO metrics across competitors."""
|
||||
seo_comparison = {
|
||||
'onpage_metrics': defaultdict(list),
|
||||
'technical_metrics': defaultdict(list),
|
||||
'content_metrics': defaultdict(list),
|
||||
'overall_seo_score': {}
|
||||
}
|
||||
|
||||
for url, data in results.items():
|
||||
seo_info = data.get('website_analysis', {}).get('analysis', {}).get('seo_info', {})
|
||||
|
||||
# On-page SEO metrics
|
||||
meta_tags = seo_info.get('meta_tags', {})
|
||||
seo_comparison['onpage_metrics']['title_score'].append(
|
||||
100 if meta_tags.get('title', {}).get('status') == 'good' else 50
|
||||
)
|
||||
seo_comparison['onpage_metrics']['description_score'].append(
|
||||
100 if meta_tags.get('description', {}).get('status') == 'good' else 50
|
||||
)
|
||||
seo_comparison['onpage_metrics']['keywords_score'].append(
|
||||
100 if meta_tags.get('keywords', {}).get('status') == 'good' else 50
|
||||
)
|
||||
|
||||
# Technical SEO metrics
|
||||
technical = data.get('website_analysis', {}).get('analysis', {}).get('basic_info', {})
|
||||
seo_comparison['technical_metrics']['has_robots_txt'].append(
|
||||
100 if technical.get('robots_txt') else 0
|
||||
)
|
||||
seo_comparison['technical_metrics']['has_sitemap'].append(
|
||||
100 if technical.get('sitemap') else 0
|
||||
)
|
||||
|
||||
# Content SEO metrics
|
||||
content = seo_info.get('content', {})
|
||||
seo_comparison['content_metrics']['readability_score'].append(
|
||||
content.get('readability_score', 0)
|
||||
)
|
||||
seo_comparison['content_metrics']['content_quality_score'].append(
|
||||
content.get('content_quality_score', 0)
|
||||
)
|
||||
|
||||
# Overall SEO score
|
||||
seo_comparison['overall_seo_score'][url] = seo_info.get('overall_score', 0)
|
||||
|
||||
return seo_comparison
|
||||
|
||||
def _compare_titles(self, results: dict) -> dict:
|
||||
"""Compare title patterns across competitors."""
|
||||
title_comparison = {
|
||||
'pattern_distribution': defaultdict(int),
|
||||
'length_distribution': defaultdict(list),
|
||||
'keyword_usage': defaultdict(int),
|
||||
'format_preferences': defaultdict(int)
|
||||
}
|
||||
|
||||
for url, data in results.items():
|
||||
title_patterns = data['title_patterns']
|
||||
|
||||
# Pattern analysis
|
||||
for pattern in title_patterns.get('patterns', {}):
|
||||
title_comparison['pattern_distribution'][pattern] += 1
|
||||
|
||||
# Length analysis
|
||||
for suggestion in title_patterns.get('suggestions', []):
|
||||
title_comparison['length_distribution'][len(suggestion)].append(suggestion)
|
||||
|
||||
# Keyword analysis
|
||||
for suggestion in title_patterns.get('suggestions', []):
|
||||
words = suggestion.lower().split()
|
||||
for word in words:
|
||||
if len(word) > 3: # Filter out short words
|
||||
title_comparison['keyword_usage'][word] += 1
|
||||
|
||||
return title_comparison
|
||||
|
||||
def _compare_performance(self, results: dict) -> dict:
|
||||
"""Compare performance metrics across competitors."""
|
||||
performance_metrics = {
|
||||
'content_effectiveness': {},
|
||||
'engagement_metrics': {},
|
||||
'technical_performance': {},
|
||||
'overall_performance': {}
|
||||
}
|
||||
|
||||
for url, data in results.items():
|
||||
# Content effectiveness
|
||||
content_structure = data['content_structure']
|
||||
performance_metrics['content_effectiveness'][url] = {
|
||||
'content_depth': content_structure.get('text_statistics', {}).get('word_count', 0),
|
||||
'content_quality': content_structure.get('readability', {}).get('flesch_score', 0),
|
||||
'content_structure': content_structure.get('hierarchy', {}).get('has_proper_hierarchy', False)
|
||||
}
|
||||
|
||||
# Technical performance
|
||||
seo_analysis = data['seo_analysis']
|
||||
performance_metrics['technical_performance'][url] = {
|
||||
'onpage_score': sum(1 for v in seo_analysis.get('onpage_seo', {}).values() if v),
|
||||
'technical_score': sum(1 for v in seo_analysis.get('url_seo', {}).values() if v)
|
||||
}
|
||||
|
||||
return performance_metrics
|
||||
|
||||
def _find_missing_topics(self, results: dict) -> List[Dict[str, Any]]:
|
||||
"""Find topics that are missing or underrepresented."""
|
||||
all_topics = set()
|
||||
topic_coverage = defaultdict(int)
|
||||
|
||||
# Collect all topics and their coverage
|
||||
for url, data in results.items():
|
||||
topics = data['content_structure'].get('topics', [])
|
||||
for topic in topics:
|
||||
all_topics.add(topic['topic'])
|
||||
topic_coverage[topic['topic']] += 1
|
||||
|
||||
# Identify missing or underrepresented topics
|
||||
missing_topics = []
|
||||
total_competitors = len(results)
|
||||
|
||||
for topic in all_topics:
|
||||
coverage = topic_coverage[topic] / total_competitors
|
||||
if coverage < 0.5: # Topic covered by less than 50% of competitors
|
||||
missing_topics.append({
|
||||
'topic': topic,
|
||||
'coverage': coverage,
|
||||
'opportunity_score': 1 - coverage
|
||||
})
|
||||
|
||||
return sorted(missing_topics, key=lambda x: x['opportunity_score'], reverse=True)
|
||||
|
||||
def _identify_opportunities(self, results: dict) -> List[Dict[str, Any]]:
|
||||
"""Identify content opportunities based on analysis."""
|
||||
opportunities = []
|
||||
|
||||
# Analyze content depth opportunities
|
||||
depth_metrics = self._analyze_content_depth(results)
|
||||
avg_word_count = sum(depth_metrics['word_counts'].values()) / len(depth_metrics['word_counts'])
|
||||
|
||||
for url, word_count in depth_metrics['word_counts'].items():
|
||||
if word_count < avg_word_count * 0.7: # Content depth significantly below average
|
||||
opportunities.append({
|
||||
'type': 'content_depth',
|
||||
'url': url,
|
||||
'current_value': word_count,
|
||||
'target_value': avg_word_count,
|
||||
'opportunity_score': (avg_word_count - word_count) / avg_word_count
|
||||
})
|
||||
|
||||
# Analyze format opportunities
|
||||
format_analysis = self._analyze_content_formats(results)
|
||||
for format_type, distribution in format_analysis['format_distribution'].items():
|
||||
if len(distribution) < len(results) * 0.3: # Format used by less than 30% of competitors
|
||||
opportunities.append({
|
||||
'type': 'content_format',
|
||||
'format': format_type,
|
||||
'current_coverage': len(distribution) / len(results),
|
||||
'opportunity_score': 1 - (len(distribution) / len(results))
|
||||
})
|
||||
|
||||
return sorted(opportunities, key=lambda x: x['opportunity_score'], reverse=True)
|
||||
|
||||
def _analyze_format_gaps(self, results: dict) -> List[Dict[str, Any]]:
|
||||
"""Analyze gaps in content formats."""
|
||||
format_gaps = []
|
||||
format_analysis = self._analyze_content_formats(results)
|
||||
|
||||
# Identify underutilized formats
|
||||
for format_type, count in format_analysis['format_types'].items():
|
||||
if count < len(results) * 0.3: # Format used by less than 30% of competitors
|
||||
format_gaps.append({
|
||||
'format': format_type,
|
||||
'current_usage': count,
|
||||
'potential_impact': 'high' if count < len(results) * 0.2 else 'medium',
|
||||
'suggested_implementation': self._generate_format_suggestions(format_type)
|
||||
})
|
||||
|
||||
return format_gaps
|
||||
|
||||
def _analyze_quality_gaps(self, results: dict) -> List[Dict[str, Any]]:
|
||||
"""Analyze gaps in content quality."""
|
||||
quality_gaps = []
|
||||
quality_metrics = self._analyze_content_quality(results)
|
||||
|
||||
# Analyze readability gaps
|
||||
readability_scores = quality_metrics['readability_scores']
|
||||
avg_flesch = sum(score['flesch_score'] for score in readability_scores.values()) / len(readability_scores)
|
||||
|
||||
for url, scores in readability_scores.items():
|
||||
if scores['flesch_score'] < avg_flesch * 0.8: # Readability significantly below average
|
||||
quality_gaps.append({
|
||||
'type': 'readability',
|
||||
'url': url,
|
||||
'current_score': scores['flesch_score'],
|
||||
'target_score': avg_flesch,
|
||||
'improvement_needed': avg_flesch - scores['flesch_score']
|
||||
})
|
||||
|
||||
return quality_gaps
|
||||
|
||||
def _analyze_seo_gaps(self, results: dict) -> List[Dict[str, Any]]:
|
||||
"""Analyze gaps in SEO implementation."""
|
||||
seo_gaps = []
|
||||
seo_comparison = self._compare_seo(results)
|
||||
|
||||
# Analyze on-page SEO gaps
|
||||
for metric, values in seo_comparison['onpage_metrics'].items():
|
||||
avg_value = sum(values) / len(values)
|
||||
for url, value in zip(results.keys(), values):
|
||||
if value < avg_value * 0.7: # Significantly below average
|
||||
seo_gaps.append({
|
||||
'type': 'onpage_seo',
|
||||
'metric': metric,
|
||||
'url': url,
|
||||
'current_value': value,
|
||||
'target_value': avg_value,
|
||||
'improvement_needed': avg_value - value
|
||||
})
|
||||
|
||||
# Analyze technical SEO gaps
|
||||
for metric, values in seo_comparison['technical_metrics'].items():
|
||||
avg_value = sum(values) / len(values)
|
||||
for url, value in zip(results.keys(), values):
|
||||
if value < avg_value * 0.7: # Significantly below average
|
||||
seo_gaps.append({
|
||||
'type': 'technical_seo',
|
||||
'metric': metric,
|
||||
'url': url,
|
||||
'current_value': value,
|
||||
'target_value': avg_value,
|
||||
'improvement_needed': avg_value - value
|
||||
})
|
||||
|
||||
# Analyze content SEO gaps
|
||||
for metric, values in seo_comparison['content_metrics'].items():
|
||||
avg_value = sum(values) / len(values)
|
||||
for url, value in zip(results.keys(), values):
|
||||
if value < avg_value * 0.7: # Significantly below average
|
||||
seo_gaps.append({
|
||||
'type': 'content_seo',
|
||||
'metric': metric,
|
||||
'url': url,
|
||||
'current_value': value,
|
||||
'target_value': avg_value,
|
||||
'improvement_needed': avg_value - value
|
||||
})
|
||||
|
||||
return seo_gaps
|
||||
|
||||
def _generate_format_suggestions(self, format_type: str) -> List[str]:
|
||||
"""Generate suggestions for implementing specific content formats."""
|
||||
format_suggestions = {
|
||||
'article': [
|
||||
'Create in-depth articles with comprehensive coverage',
|
||||
'Include expert quotes and statistics',
|
||||
'Add visual elements and infographics'
|
||||
],
|
||||
'blog_post': [
|
||||
'Write engaging blog posts with personal insights',
|
||||
'Include call-to-actions',
|
||||
'Add social sharing buttons'
|
||||
],
|
||||
'how-to': [
|
||||
'Create step-by-step guides',
|
||||
'Include screenshots or videos',
|
||||
'Add troubleshooting sections'
|
||||
],
|
||||
'case_study': [
|
||||
'Present real-world examples',
|
||||
'Include metrics and results',
|
||||
'Add client testimonials'
|
||||
]
|
||||
}
|
||||
|
||||
return format_suggestions.get(format_type, [
|
||||
'Research successful examples',
|
||||
'Analyze competitor implementation',
|
||||
'Create unique value proposition'
|
||||
])
|
||||
@@ -1,674 +0,0 @@
|
||||
"""
|
||||
Enhanced Content Gap Analysis with Advertools Integration and AI Insights.
|
||||
|
||||
This module provides comprehensive content gap analysis using:
|
||||
- adv.serp_goog: Competitor SERP analysis
|
||||
- adv.kw_generate: Keyword research expansion
|
||||
- adv.crawl: Deep competitor content analysis
|
||||
- adv.word_frequency: Content theme identification
|
||||
- llm_text_gen: AI-powered insights and recommendations
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
import advertools as adv
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
import tempfile
|
||||
import os
|
||||
from datetime import datetime
|
||||
import asyncio
|
||||
import json
|
||||
from collections import Counter, defaultdict
|
||||
from loguru import logger
|
||||
|
||||
# Import existing modules
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from .utils.ai_processor import AIProcessor, ProgressTracker
|
||||
|
||||
class EnhancedContentGapAnalyzer:
|
||||
"""Enhanced content gap analyzer with advertools and AI integration."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the enhanced analyzer."""
|
||||
self.website_analyzer = WebsiteAnalyzer()
|
||||
self.ai_processor = AIProcessor()
|
||||
self.progress = ProgressTracker()
|
||||
|
||||
# Temporary directories for crawl data
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
logger.info("EnhancedContentGapAnalyzer initialized")
|
||||
|
||||
def analyze_comprehensive_gap(self, target_url: str, competitor_urls: List[str],
|
||||
target_keywords: List[str], industry: str = "general") -> Dict[str, Any]:
|
||||
"""
|
||||
Perform comprehensive content gap analysis.
|
||||
|
||||
Args:
|
||||
target_url: Your website URL
|
||||
competitor_urls: List of competitor URLs (max 5 for performance)
|
||||
target_keywords: List of primary keywords to analyze
|
||||
industry: Industry category for context
|
||||
|
||||
Returns:
|
||||
Comprehensive analysis results
|
||||
"""
|
||||
try:
|
||||
st.info("🚀 Starting Enhanced Content Gap Analysis...")
|
||||
|
||||
# Initialize results structure
|
||||
results = {
|
||||
'analysis_timestamp': datetime.utcnow().isoformat(),
|
||||
'target_url': target_url,
|
||||
'competitor_urls': competitor_urls[:5], # Limit to 5 competitors
|
||||
'target_keywords': target_keywords,
|
||||
'industry': industry,
|
||||
'serp_analysis': {},
|
||||
'keyword_expansion': {},
|
||||
'competitor_content': {},
|
||||
'content_themes': {},
|
||||
'gap_analysis': {},
|
||||
'ai_insights': {},
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
# Phase 1: SERP Analysis using adv.serp_goog
|
||||
with st.expander("🔍 SERP Analysis Progress", expanded=True):
|
||||
serp_results = self._analyze_serp_landscape(target_keywords, competitor_urls)
|
||||
results['serp_analysis'] = serp_results
|
||||
st.success(f"✅ Analyzed {len(target_keywords)} keywords across SERPs")
|
||||
|
||||
# Phase 2: Keyword Expansion using adv.kw_generate
|
||||
with st.expander("🎯 Keyword Research Expansion", expanded=True):
|
||||
expanded_keywords = self._expand_keyword_research(target_keywords, industry)
|
||||
results['keyword_expansion'] = expanded_keywords
|
||||
st.success(f"✅ Generated {len(expanded_keywords.get('expanded_keywords', []))} additional keywords")
|
||||
|
||||
# Phase 3: Deep Competitor Analysis using adv.crawl
|
||||
with st.expander("🕷️ Deep Competitor Content Analysis", expanded=True):
|
||||
competitor_content = self._analyze_competitor_content_deep(competitor_urls)
|
||||
results['competitor_content'] = competitor_content
|
||||
st.success(f"✅ Crawled and analyzed {len(competitor_urls)} competitor websites")
|
||||
|
||||
# Phase 4: Content Theme Analysis using adv.word_frequency
|
||||
with st.expander("📊 Content Theme & Gap Identification", expanded=True):
|
||||
content_themes = self._analyze_content_themes(results['competitor_content'])
|
||||
results['content_themes'] = content_themes
|
||||
st.success("✅ Identified content themes and topic clusters")
|
||||
|
||||
# Phase 5: AI-Powered Gap Analysis and Insights
|
||||
with st.expander("🤖 AI-Powered Insights Generation", expanded=True):
|
||||
ai_insights = self._generate_ai_insights(results)
|
||||
results['ai_insights'] = ai_insights
|
||||
results['recommendations'] = ai_insights.get('recommendations', [])
|
||||
st.success("✅ Generated AI-powered insights and recommendations")
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in comprehensive gap analysis: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
st.error(error_msg)
|
||||
return {'error': error_msg}
|
||||
|
||||
def _analyze_serp_landscape(self, keywords: List[str], competitor_urls: List[str]) -> Dict[str, Any]:
|
||||
"""Analyze SERP landscape using adv.serp_goog."""
|
||||
try:
|
||||
st.info("🔍 Analyzing SERP landscape for competitor positions...")
|
||||
|
||||
serp_results = {
|
||||
'keyword_rankings': {},
|
||||
'competitor_presence': {},
|
||||
'serp_features': {},
|
||||
'ranking_opportunities': []
|
||||
}
|
||||
|
||||
# Note: adv.serp_goog requires API key setup
|
||||
# For demo purposes, we'll simulate SERP analysis
|
||||
for keyword in keywords[:10]: # Limit to prevent API overuse
|
||||
try:
|
||||
# In production, use: serp_data = adv.serp_goog(q=keyword, cx='your_cx', key='your_key')
|
||||
# For now, we'll create structured placeholder data
|
||||
serp_results['keyword_rankings'][keyword] = {
|
||||
'top_10_domains': [urlparse(url).netloc for url in competitor_urls],
|
||||
'serp_features': ['featured_snippet', 'people_also_ask', 'related_searches'],
|
||||
'competitor_positions': {
|
||||
urlparse(url).netloc: f"Position {i+3}" for i, url in enumerate(competitor_urls[:5])
|
||||
}
|
||||
}
|
||||
|
||||
st.write(f"• Analyzed keyword: '{keyword}'")
|
||||
|
||||
except Exception as e:
|
||||
st.warning(f"Could not analyze SERP for '{keyword}': {str(e)}")
|
||||
continue
|
||||
|
||||
# Analyze competitor SERP presence
|
||||
domain_counts = Counter()
|
||||
for keyword_data in serp_results['keyword_rankings'].values():
|
||||
for domain in keyword_data.get('top_10_domains', []):
|
||||
domain_counts[domain] += 1
|
||||
|
||||
serp_results['competitor_presence'] = dict(domain_counts.most_common(10))
|
||||
|
||||
# Identify ranking opportunities
|
||||
for keyword, data in serp_results['keyword_rankings'].items():
|
||||
target_domain = urlparse(competitor_urls[0] if competitor_urls else "").netloc
|
||||
if target_domain not in data.get('competitor_positions', {}):
|
||||
serp_results['ranking_opportunities'].append({
|
||||
'keyword': keyword,
|
||||
'opportunity': 'Not ranking in top 10',
|
||||
'serp_features': data.get('serp_features', [])
|
||||
})
|
||||
|
||||
return serp_results
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error in SERP analysis: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _expand_keyword_research(self, seed_keywords: List[str], industry: str) -> Dict[str, Any]:
|
||||
"""Expand keyword research using adv.kw_generate."""
|
||||
try:
|
||||
st.info("🎯 Expanding keyword research...")
|
||||
|
||||
expanded_results = {
|
||||
'seed_keywords': seed_keywords,
|
||||
'expanded_keywords': [],
|
||||
'keyword_categories': {},
|
||||
'search_intent_analysis': {},
|
||||
'long_tail_opportunities': []
|
||||
}
|
||||
|
||||
# Use adv.kw_generate for keyword expansion
|
||||
all_expanded = []
|
||||
|
||||
for seed_keyword in seed_keywords[:5]: # Limit to prevent overload
|
||||
try:
|
||||
# Generate keyword variations using advertools
|
||||
broad_keywords = adv.kw_generate(
|
||||
products=[seed_keyword],
|
||||
words=["best", "top", "how to", "guide", "tips", "vs", "review", "comparison"],
|
||||
max_len=4
|
||||
)
|
||||
|
||||
# Add phrase match keywords
|
||||
phrase_keywords = adv.kw_generate(
|
||||
products=[seed_keyword],
|
||||
words=[industry, "strategy", "analysis", "optimization", "techniques"],
|
||||
max_len=3
|
||||
)
|
||||
|
||||
all_expanded.extend(broad_keywords)
|
||||
all_expanded.extend(phrase_keywords)
|
||||
|
||||
st.write(f"• Generated variations for: '{seed_keyword}'")
|
||||
|
||||
except Exception as e:
|
||||
st.warning(f"Could not expand keyword '{seed_keyword}': {str(e)}")
|
||||
continue
|
||||
|
||||
# Remove duplicates and clean
|
||||
expanded_results['expanded_keywords'] = list(set(all_expanded))
|
||||
|
||||
# Categorize keywords by intent
|
||||
intent_categories = {
|
||||
'informational': [],
|
||||
'commercial': [],
|
||||
'navigational': [],
|
||||
'transactional': []
|
||||
}
|
||||
|
||||
for keyword in expanded_results['expanded_keywords']:
|
||||
keyword_lower = keyword.lower()
|
||||
if any(word in keyword_lower for word in ['how', 'what', 'why', 'guide', 'tips']):
|
||||
intent_categories['informational'].append(keyword)
|
||||
elif any(word in keyword_lower for word in ['best', 'top', 'review', 'comparison']):
|
||||
intent_categories['commercial'].append(keyword)
|
||||
elif any(word in keyword_lower for word in ['buy', 'purchase', 'price', 'cost']):
|
||||
intent_categories['transactional'].append(keyword)
|
||||
else:
|
||||
intent_categories['navigational'].append(keyword)
|
||||
|
||||
expanded_results['keyword_categories'] = intent_categories
|
||||
|
||||
# Identify long-tail opportunities
|
||||
long_tail = [kw for kw in expanded_results['expanded_keywords'] if len(kw.split()) >= 3]
|
||||
expanded_results['long_tail_opportunities'] = long_tail[:20] # Top 20 long-tail
|
||||
|
||||
return expanded_results
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error in keyword expansion: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _analyze_competitor_content_deep(self, competitor_urls: List[str]) -> Dict[str, Any]:
|
||||
"""Deep competitor content analysis using adv.crawl."""
|
||||
try:
|
||||
st.info("🕷️ Performing deep competitor content analysis...")
|
||||
|
||||
competitor_analysis = {
|
||||
'crawl_results': {},
|
||||
'content_structure': {},
|
||||
'page_analysis': {},
|
||||
'technical_insights': {}
|
||||
}
|
||||
|
||||
for i, url in enumerate(competitor_urls[:3]): # Limit to 3 for performance
|
||||
try:
|
||||
domain = urlparse(url).netloc
|
||||
st.write(f"🔍 Analyzing competitor {i+1}: {domain}")
|
||||
|
||||
# Create temporary file for crawl results
|
||||
crawl_file = os.path.join(self.temp_dir, f"crawl_{domain.replace('.', '_')}.jl")
|
||||
|
||||
# Use adv.crawl for comprehensive analysis
|
||||
# Note: This is a simplified crawl - in production, customize settings
|
||||
adv.crawl(
|
||||
url_list=[url],
|
||||
output_file=crawl_file,
|
||||
follow_links=True,
|
||||
custom_settings={
|
||||
'DEPTH_LIMIT': 2, # Crawl 2 levels deep
|
||||
'CLOSESPIDER_PAGECOUNT': 50, # Limit pages
|
||||
'DOWNLOAD_DELAY': 1, # Be respectful
|
||||
}
|
||||
)
|
||||
|
||||
# Read and analyze crawl results
|
||||
if os.path.exists(crawl_file):
|
||||
crawl_df = pd.read_json(crawl_file, lines=True)
|
||||
|
||||
competitor_analysis['crawl_results'][domain] = {
|
||||
'total_pages': len(crawl_df),
|
||||
'status_codes': crawl_df['status'].value_counts().to_dict(),
|
||||
'page_types': self._categorize_pages(crawl_df),
|
||||
'content_length_stats': {
|
||||
'mean': crawl_df['size'].mean() if 'size' in crawl_df.columns else 0,
|
||||
'median': crawl_df['size'].median() if 'size' in crawl_df.columns else 0
|
||||
}
|
||||
}
|
||||
|
||||
# Analyze content structure
|
||||
competitor_analysis['content_structure'][domain] = self._analyze_content_structure(crawl_df)
|
||||
|
||||
st.success(f"✅ Crawled {len(crawl_df)} pages from {domain}")
|
||||
else:
|
||||
st.warning(f"⚠️ No crawl data available for {domain}")
|
||||
|
||||
except Exception as e:
|
||||
st.warning(f"Could not crawl {url}: {str(e)}")
|
||||
continue
|
||||
|
||||
return competitor_analysis
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error in deep competitor analysis: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _analyze_content_themes(self, competitor_content: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Analyze content themes using adv.word_frequency."""
|
||||
try:
|
||||
st.info("📊 Analyzing content themes and topics...")
|
||||
|
||||
theme_analysis = {
|
||||
'dominant_themes': {},
|
||||
'content_clusters': {},
|
||||
'topic_gaps': [],
|
||||
'content_opportunities': []
|
||||
}
|
||||
|
||||
all_content_text = ""
|
||||
|
||||
# Extract content from crawl results
|
||||
for domain, crawl_data in competitor_content.get('crawl_results', {}).items():
|
||||
try:
|
||||
# In a real implementation, you'd extract text content from crawled pages
|
||||
# For now, we'll simulate content analysis
|
||||
|
||||
# Simulate word frequency analysis using domain and page data
|
||||
sample_content = f"content marketing seo optimization digital strategy {domain} website analysis competitor research keyword targeting"
|
||||
all_content_text += " " + sample_content
|
||||
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if all_content_text.strip():
|
||||
# Use adv.word_frequency for theme analysis
|
||||
word_freq = adv.word_frequency(
|
||||
text_list=[all_content_text],
|
||||
phrase_len=2, # Analyze 2-word phrases
|
||||
rm_words=['the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']
|
||||
)
|
||||
|
||||
# Process word frequency results
|
||||
if not word_freq.empty:
|
||||
top_themes = word_freq.head(20)
|
||||
theme_analysis['dominant_themes'] = top_themes.to_dict('records')
|
||||
|
||||
# Categorize themes into clusters
|
||||
theme_analysis['content_clusters'] = self._cluster_themes(top_themes)
|
||||
|
||||
st.success("✅ Identified dominant content themes")
|
||||
|
||||
return theme_analysis
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error in content theme analysis: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _generate_ai_insights(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate AI-powered insights using llm_text_gen."""
|
||||
try:
|
||||
st.info("🤖 Generating AI-powered insights...")
|
||||
|
||||
# Prepare analysis summary for AI
|
||||
analysis_summary = {
|
||||
'target_url': analysis_results.get('target_url', ''),
|
||||
'industry': analysis_results.get('industry', ''),
|
||||
'serp_opportunities': len(analysis_results.get('serp_analysis', {}).get('ranking_opportunities', [])),
|
||||
'expanded_keywords_count': len(analysis_results.get('keyword_expansion', {}).get('expanded_keywords', [])),
|
||||
'competitors_analyzed': len(analysis_results.get('competitor_urls', [])),
|
||||
'dominant_themes': analysis_results.get('content_themes', {}).get('dominant_themes', [])[:10]
|
||||
}
|
||||
|
||||
# Generate comprehensive AI insights
|
||||
prompt = f"""
|
||||
As an expert SEO content strategist, analyze this comprehensive content gap analysis data and provide actionable insights:
|
||||
|
||||
TARGET ANALYSIS:
|
||||
- Website: {analysis_summary['target_url']}
|
||||
- Industry: {analysis_summary['industry']}
|
||||
- SERP Opportunities: {analysis_summary['serp_opportunities']} keywords not ranking
|
||||
- Keyword Expansion: {analysis_summary['expanded_keywords_count']} additional keywords identified
|
||||
- Competitors Analyzed: {analysis_summary['competitors_analyzed']} websites
|
||||
|
||||
DOMINANT CONTENT THEMES:
|
||||
{json.dumps(analysis_summary['dominant_themes'], indent=2)}
|
||||
|
||||
PROVIDE:
|
||||
1. Strategic Content Gap Analysis
|
||||
2. Priority Content Recommendations (top 5)
|
||||
3. Keyword Strategy Insights
|
||||
4. Competitive Positioning Advice
|
||||
5. Content Format Recommendations
|
||||
6. Technical SEO Opportunities
|
||||
7. Implementation Timeline (30/60/90 days)
|
||||
|
||||
Format as JSON with clear, actionable recommendations.
|
||||
"""
|
||||
|
||||
ai_response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt="You are an expert SEO content strategist with 15+ years of experience in content gap analysis and competitive intelligence.",
|
||||
response_format="json_object"
|
||||
)
|
||||
|
||||
if ai_response:
|
||||
st.success("✅ Generated comprehensive AI insights")
|
||||
return ai_response
|
||||
else:
|
||||
st.warning("⚠️ Could not generate AI insights")
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error generating AI insights: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _categorize_pages(self, crawl_df: pd.DataFrame) -> Dict[str, int]:
|
||||
"""Categorize crawled pages by type."""
|
||||
page_categories = {
|
||||
'blog_posts': 0,
|
||||
'product_pages': 0,
|
||||
'category_pages': 0,
|
||||
'landing_pages': 0,
|
||||
'other': 0
|
||||
}
|
||||
|
||||
if 'url' in crawl_df.columns:
|
||||
for url in crawl_df['url']:
|
||||
url_lower = url.lower()
|
||||
if any(indicator in url_lower for indicator in ['/blog/', '/post/', '/article/', '/news/']):
|
||||
page_categories['blog_posts'] += 1
|
||||
elif any(indicator in url_lower for indicator in ['/product/', '/item/', '/shop/']):
|
||||
page_categories['product_pages'] += 1
|
||||
elif any(indicator in url_lower for indicator in ['/category/', '/collection/', '/browse/']):
|
||||
page_categories['category_pages'] += 1
|
||||
elif any(indicator in url_lower for indicator in ['/landing/', '/promo/', '/campaign/']):
|
||||
page_categories['landing_pages'] += 1
|
||||
else:
|
||||
page_categories['other'] += 1
|
||||
|
||||
return page_categories
|
||||
|
||||
def _analyze_content_structure(self, crawl_df: pd.DataFrame) -> Dict[str, Any]:
|
||||
"""Analyze content structure from crawl data."""
|
||||
structure_analysis = {
|
||||
'avg_title_length': 0,
|
||||
'avg_meta_desc_length': 0,
|
||||
'h1_usage': 0,
|
||||
'internal_links_avg': 0,
|
||||
'external_links_avg': 0
|
||||
}
|
||||
|
||||
# Analyze available columns
|
||||
if 'title' in crawl_df.columns:
|
||||
structure_analysis['avg_title_length'] = crawl_df['title'].str.len().mean()
|
||||
|
||||
if 'meta_desc' in crawl_df.columns:
|
||||
structure_analysis['avg_meta_desc_length'] = crawl_df['meta_desc'].str.len().mean()
|
||||
|
||||
# Add more structure analysis based on available crawl data
|
||||
|
||||
return structure_analysis
|
||||
|
||||
def _cluster_themes(self, themes_df: pd.DataFrame) -> Dict[str, List[str]]:
|
||||
"""Cluster themes into topic groups."""
|
||||
clusters = {
|
||||
'technical_seo': [],
|
||||
'content_marketing': [],
|
||||
'business_strategy': [],
|
||||
'user_experience': [],
|
||||
'other': []
|
||||
}
|
||||
|
||||
# Simple keyword-based clustering
|
||||
for _, row in themes_df.iterrows():
|
||||
word = row.get('word', '') if 'word' in row else str(row.get(0, ''))
|
||||
word_lower = word.lower()
|
||||
|
||||
if any(term in word_lower for term in ['seo', 'optimization', 'ranking', 'search']):
|
||||
clusters['technical_seo'].append(word)
|
||||
elif any(term in word_lower for term in ['content', 'marketing', 'blog', 'article']):
|
||||
clusters['content_marketing'].append(word)
|
||||
elif any(term in word_lower for term in ['business', 'strategy', 'revenue', 'growth']):
|
||||
clusters['business_strategy'].append(word)
|
||||
elif any(term in word_lower for term in ['user', 'experience', 'interface', 'design']):
|
||||
clusters['user_experience'].append(word)
|
||||
else:
|
||||
clusters['other'].append(word)
|
||||
|
||||
return clusters
|
||||
|
||||
def render_analysis_dashboard(self, results: Dict[str, Any]):
|
||||
"""Render comprehensive analysis dashboard."""
|
||||
if not results or 'error' in results:
|
||||
st.error("❌ Analysis failed or no results available")
|
||||
return
|
||||
|
||||
st.markdown("## 🎯 Enhanced Content Gap Analysis Results")
|
||||
|
||||
# Overview metrics
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
|
||||
with col1:
|
||||
st.metric(
|
||||
"Keywords Analyzed",
|
||||
len(results.get('target_keywords', []))
|
||||
)
|
||||
|
||||
with col2:
|
||||
st.metric(
|
||||
"Competitors Crawled",
|
||||
len(results.get('competitor_urls', []))
|
||||
)
|
||||
|
||||
with col3:
|
||||
st.metric(
|
||||
"Expanded Keywords",
|
||||
len(results.get('keyword_expansion', {}).get('expanded_keywords', []))
|
||||
)
|
||||
|
||||
with col4:
|
||||
st.metric(
|
||||
"SERP Opportunities",
|
||||
len(results.get('serp_analysis', {}).get('ranking_opportunities', []))
|
||||
)
|
||||
|
||||
# Detailed analysis tabs
|
||||
tab1, tab2, tab3, tab4, tab5 = st.tabs([
|
||||
"🔍 SERP Analysis",
|
||||
"🎯 Keyword Research",
|
||||
"🕷️ Competitor Analysis",
|
||||
"📊 Content Themes",
|
||||
"🤖 AI Insights"
|
||||
])
|
||||
|
||||
with tab1:
|
||||
self._render_serp_analysis(results.get('serp_analysis', {}))
|
||||
|
||||
with tab2:
|
||||
self._render_keyword_analysis(results.get('keyword_expansion', {}))
|
||||
|
||||
with tab3:
|
||||
self._render_competitor_analysis(results.get('competitor_content', {}))
|
||||
|
||||
with tab4:
|
||||
self._render_content_themes(results.get('content_themes', {}))
|
||||
|
||||
with tab5:
|
||||
self._render_ai_insights(results.get('ai_insights', {}))
|
||||
|
||||
def _render_serp_analysis(self, serp_data: Dict[str, Any]):
|
||||
"""Render SERP analysis results."""
|
||||
st.subheader("🔍 SERP Landscape Analysis")
|
||||
|
||||
if not serp_data:
|
||||
st.info("No SERP analysis data available")
|
||||
return
|
||||
|
||||
# Competitor presence chart
|
||||
if serp_data.get('competitor_presence'):
|
||||
st.subheader("🏆 Competitor SERP Presence")
|
||||
presence_df = pd.DataFrame(
|
||||
list(serp_data['competitor_presence'].items()),
|
||||
columns=['Domain', 'Keywords Ranking']
|
||||
)
|
||||
st.bar_chart(presence_df.set_index('Domain'))
|
||||
|
||||
# Ranking opportunities
|
||||
if serp_data.get('ranking_opportunities'):
|
||||
st.subheader("🎯 Ranking Opportunities")
|
||||
opportunities_df = pd.DataFrame(serp_data['ranking_opportunities'])
|
||||
st.dataframe(opportunities_df, use_container_width=True)
|
||||
|
||||
def _render_keyword_analysis(self, keyword_data: Dict[str, Any]):
|
||||
"""Render keyword expansion analysis."""
|
||||
st.subheader("🎯 Keyword Research Expansion")
|
||||
|
||||
if not keyword_data:
|
||||
st.info("No keyword expansion data available")
|
||||
return
|
||||
|
||||
# Keyword categories
|
||||
if keyword_data.get('keyword_categories'):
|
||||
st.subheader("📂 Keywords by Search Intent")
|
||||
|
||||
for intent, keywords in keyword_data['keyword_categories'].items():
|
||||
if keywords:
|
||||
with st.expander(f"{intent.title()} Keywords ({len(keywords)})"):
|
||||
for kw in keywords[:20]: # Show first 20
|
||||
st.write(f"• {kw}")
|
||||
|
||||
# Long-tail opportunities
|
||||
if keyword_data.get('long_tail_opportunities'):
|
||||
st.subheader("🎣 Long-tail Opportunities")
|
||||
long_tail_df = pd.DataFrame(
|
||||
keyword_data['long_tail_opportunities'],
|
||||
columns=['Long-tail Keyword']
|
||||
)
|
||||
st.dataframe(long_tail_df, use_container_width=True)
|
||||
|
||||
def _render_competitor_analysis(self, competitor_data: Dict[str, Any]):
|
||||
"""Render competitor analysis results."""
|
||||
st.subheader("🕷️ Deep Competitor Analysis")
|
||||
|
||||
if not competitor_data.get('crawl_results'):
|
||||
st.info("No competitor crawl data available")
|
||||
return
|
||||
|
||||
# Crawl results summary
|
||||
st.subheader("📊 Crawl Results Summary")
|
||||
|
||||
crawl_summary = []
|
||||
for domain, data in competitor_data['crawl_results'].items():
|
||||
crawl_summary.append({
|
||||
'Domain': domain,
|
||||
'Pages Crawled': data.get('total_pages', 0),
|
||||
'Avg Content Length': round(data.get('content_length_stats', {}).get('mean', 0))
|
||||
})
|
||||
|
||||
if crawl_summary:
|
||||
summary_df = pd.DataFrame(crawl_summary)
|
||||
st.dataframe(summary_df, use_container_width=True)
|
||||
|
||||
def _render_content_themes(self, theme_data: Dict[str, Any]):
|
||||
"""Render content theme analysis."""
|
||||
st.subheader("📊 Content Theme Analysis")
|
||||
|
||||
if not theme_data:
|
||||
st.info("No content theme data available")
|
||||
return
|
||||
|
||||
# Dominant themes
|
||||
if theme_data.get('dominant_themes'):
|
||||
st.subheader("🎯 Dominant Content Themes")
|
||||
themes_df = pd.DataFrame(theme_data['dominant_themes'])
|
||||
st.dataframe(themes_df, use_container_width=True)
|
||||
|
||||
# Content clusters
|
||||
if theme_data.get('content_clusters'):
|
||||
st.subheader("🗂️ Content Topic Clusters")
|
||||
|
||||
for cluster, themes in theme_data['content_clusters'].items():
|
||||
if themes:
|
||||
with st.expander(f"{cluster.replace('_', ' ').title()} ({len(themes)} themes)"):
|
||||
for theme in themes[:10]: # Show first 10
|
||||
st.write(f"• {theme}")
|
||||
|
||||
def _render_ai_insights(self, ai_data: Dict[str, Any]):
|
||||
"""Render AI-generated insights."""
|
||||
st.subheader("🤖 AI-Powered Strategic Insights")
|
||||
|
||||
if not ai_data:
|
||||
st.info("No AI insights available")
|
||||
return
|
||||
|
||||
# Strategic recommendations
|
||||
if ai_data.get('recommendations'):
|
||||
st.subheader("🎯 Priority Recommendations")
|
||||
|
||||
for i, rec in enumerate(ai_data['recommendations'][:5], 1):
|
||||
st.markdown(f"**{i}. {rec}**")
|
||||
|
||||
# Implementation timeline
|
||||
if ai_data.get('implementation_timeline'):
|
||||
st.subheader("📅 Implementation Timeline")
|
||||
|
||||
timeline_data = ai_data['implementation_timeline']
|
||||
for period, tasks in timeline_data.items():
|
||||
with st.expander(f"{period} Plan"):
|
||||
for task in tasks:
|
||||
st.write(f"• {task}")
|
||||
@@ -1,787 +0,0 @@
|
||||
"""
|
||||
Enhanced UI for Content Gap Analysis with Advertools Integration.
|
||||
|
||||
This module provides a comprehensive Streamlit interface for content gap analysis
|
||||
using the EnhancedContentGapAnalyzer with advertools and AI insights.
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
from typing import Dict, Any, List
|
||||
import json
|
||||
from datetime import datetime
|
||||
import io
|
||||
import base64
|
||||
|
||||
from .enhanced_analyzer import EnhancedContentGapAnalyzer
|
||||
from lib.alwrity_ui.dashboard_styles import apply_dashboard_style, render_dashboard_header
|
||||
|
||||
class EnhancedContentGapAnalysisUI:
|
||||
"""Enhanced UI for content gap analysis."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the enhanced UI."""
|
||||
self.analyzer = EnhancedContentGapAnalyzer()
|
||||
|
||||
# Apply dashboard styling
|
||||
apply_dashboard_style()
|
||||
|
||||
def render(self):
|
||||
"""Render the enhanced content gap analysis interface."""
|
||||
|
||||
# Enhanced dashboard header
|
||||
render_dashboard_header(
|
||||
"🎯 Enhanced Content Gap Analysis",
|
||||
"Discover content opportunities with AI-powered insights using advertools, SERP analysis, competitor crawling, and strategic recommendations."
|
||||
)
|
||||
|
||||
# Main content area
|
||||
with st.container():
|
||||
# Analysis input form
|
||||
self._render_analysis_form()
|
||||
|
||||
# Session state for results
|
||||
if 'gap_analysis_results' in st.session_state and st.session_state.gap_analysis_results:
|
||||
st.markdown("---")
|
||||
self._render_results_dashboard(st.session_state.gap_analysis_results)
|
||||
|
||||
def _render_analysis_form(self):
|
||||
"""Render the analysis input form."""
|
||||
st.markdown("## 🚀 Setup Your Content Gap Analysis")
|
||||
|
||||
with st.form("enhanced_gap_analysis_form"):
|
||||
# Target website input
|
||||
col1, col2 = st.columns([2, 1])
|
||||
|
||||
with col1:
|
||||
target_url = st.text_input(
|
||||
"🎯 Your Website URL",
|
||||
placeholder="https://yourwebsite.com",
|
||||
help="Enter your website URL to analyze"
|
||||
)
|
||||
|
||||
with col2:
|
||||
industry = st.selectbox(
|
||||
"🏭 Industry",
|
||||
options=[
|
||||
"general", "technology", "healthcare", "finance",
|
||||
"ecommerce", "education", "real estate", "travel",
|
||||
"food", "fitness", "marketing", "consulting"
|
||||
],
|
||||
help="Select your industry for better analysis context"
|
||||
)
|
||||
|
||||
# Competitor URLs
|
||||
st.markdown("### 🏆 Competitor Analysis")
|
||||
competitor_urls_text = st.text_area(
|
||||
"Competitor URLs (one per line, max 5)",
|
||||
placeholder="https://competitor1.com\nhttps://competitor2.com\nhttps://competitor3.com",
|
||||
height=120,
|
||||
help="Enter up to 5 competitor URLs for comprehensive analysis"
|
||||
)
|
||||
|
||||
# Target keywords
|
||||
st.markdown("### 🎯 Keyword Focus")
|
||||
target_keywords_text = st.text_input(
|
||||
"Primary Keywords (comma-separated)",
|
||||
placeholder="seo, content marketing, digital marketing",
|
||||
help="Enter your main keywords to analyze and expand"
|
||||
)
|
||||
|
||||
# Analysis options
|
||||
st.markdown("### ⚙️ Analysis Options")
|
||||
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
enable_serp = st.checkbox(
|
||||
"🔍 SERP Analysis",
|
||||
value=True,
|
||||
help="Analyze competitor positions in search results"
|
||||
)
|
||||
|
||||
with col2:
|
||||
enable_crawling = st.checkbox(
|
||||
"🕷️ Deep Crawling",
|
||||
value=True,
|
||||
help="Perform comprehensive competitor content crawling"
|
||||
)
|
||||
|
||||
with col3:
|
||||
enable_ai_insights = st.checkbox(
|
||||
"🤖 AI Insights",
|
||||
value=True,
|
||||
help="Generate AI-powered strategic recommendations"
|
||||
)
|
||||
|
||||
# Submit button
|
||||
submitted = st.form_submit_button(
|
||||
"🚀 Start Enhanced Analysis",
|
||||
use_container_width=True,
|
||||
type="primary"
|
||||
)
|
||||
|
||||
if submitted:
|
||||
# Validate inputs
|
||||
if not target_url or not target_url.startswith(('http://', 'https://')):
|
||||
st.error("❌ Please enter a valid target URL starting with http:// or https://")
|
||||
return
|
||||
|
||||
if not target_keywords_text.strip():
|
||||
st.error("❌ Please enter at least one target keyword")
|
||||
return
|
||||
|
||||
# Process inputs
|
||||
competitor_urls = [
|
||||
url.strip() for url in competitor_urls_text.split('\n')
|
||||
if url.strip() and url.strip().startswith(('http://', 'https://'))
|
||||
]
|
||||
|
||||
if not competitor_urls:
|
||||
st.error("❌ Please enter at least one valid competitor URL")
|
||||
return
|
||||
|
||||
target_keywords = [
|
||||
kw.strip() for kw in target_keywords_text.split(',')
|
||||
if kw.strip()
|
||||
]
|
||||
|
||||
# Run analysis
|
||||
self._run_enhanced_analysis(
|
||||
target_url=target_url,
|
||||
competitor_urls=competitor_urls,
|
||||
target_keywords=target_keywords,
|
||||
industry=industry,
|
||||
options={
|
||||
'enable_serp': enable_serp,
|
||||
'enable_crawling': enable_crawling,
|
||||
'enable_ai_insights': enable_ai_insights
|
||||
}
|
||||
)
|
||||
|
||||
def _run_enhanced_analysis(self, target_url: str, competitor_urls: List[str],
|
||||
target_keywords: List[str], industry: str, options: Dict[str, bool]):
|
||||
"""Run the enhanced content gap analysis."""
|
||||
|
||||
try:
|
||||
with st.spinner("🔄 Running Enhanced Content Gap Analysis..."):
|
||||
|
||||
# Initialize progress tracking
|
||||
progress_bar = st.progress(0)
|
||||
status_text = st.empty()
|
||||
|
||||
# Update progress
|
||||
progress_bar.progress(10)
|
||||
status_text.text("🚀 Initializing analysis...")
|
||||
|
||||
# Run comprehensive analysis
|
||||
results = self.analyzer.analyze_comprehensive_gap(
|
||||
target_url=target_url,
|
||||
competitor_urls=competitor_urls,
|
||||
target_keywords=target_keywords,
|
||||
industry=industry
|
||||
)
|
||||
|
||||
progress_bar.progress(100)
|
||||
status_text.text("✅ Analysis complete!")
|
||||
|
||||
# Store results in session state
|
||||
st.session_state.gap_analysis_results = results
|
||||
|
||||
# Clear progress indicators
|
||||
progress_bar.empty()
|
||||
status_text.empty()
|
||||
|
||||
if 'error' in results:
|
||||
st.error(f"❌ Analysis failed: {results['error']}")
|
||||
else:
|
||||
st.success("🎉 Enhanced Content Gap Analysis completed successfully!")
|
||||
st.balloons()
|
||||
|
||||
# Rerun to show results
|
||||
st.rerun()
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"❌ Error running analysis: {str(e)}")
|
||||
|
||||
def _render_results_dashboard(self, results: Dict[str, Any]):
|
||||
"""Render the comprehensive results dashboard."""
|
||||
|
||||
if 'error' in results:
|
||||
st.error(f"❌ Analysis Error: {results['error']}")
|
||||
return
|
||||
|
||||
# Results header
|
||||
st.markdown("## 📊 Enhanced Content Gap Analysis Results")
|
||||
|
||||
# Key metrics overview
|
||||
self._render_metrics_overview(results)
|
||||
|
||||
# Detailed analysis tabs
|
||||
self._render_detailed_analysis(results)
|
||||
|
||||
# Export functionality
|
||||
self._render_export_options(results)
|
||||
|
||||
def _render_metrics_overview(self, results: Dict[str, Any]):
|
||||
"""Render key metrics overview."""
|
||||
|
||||
st.markdown("### 📈 Analysis Overview")
|
||||
|
||||
# Create metrics columns
|
||||
col1, col2, col3, col4, col5 = st.columns(5)
|
||||
|
||||
with col1:
|
||||
st.metric(
|
||||
"🎯 Keywords Analyzed",
|
||||
len(results.get('target_keywords', [])),
|
||||
help="Number of primary keywords analyzed"
|
||||
)
|
||||
|
||||
with col2:
|
||||
st.metric(
|
||||
"🏆 Competitors Crawled",
|
||||
len(results.get('competitor_urls', [])),
|
||||
help="Number of competitor websites analyzed"
|
||||
)
|
||||
|
||||
with col3:
|
||||
expanded_keywords = results.get('keyword_expansion', {}).get('expanded_keywords', [])
|
||||
st.metric(
|
||||
"🔍 Keywords Discovered",
|
||||
len(expanded_keywords),
|
||||
help="Additional keywords discovered through expansion"
|
||||
)
|
||||
|
||||
with col4:
|
||||
ranking_opportunities = results.get('serp_analysis', {}).get('ranking_opportunities', [])
|
||||
st.metric(
|
||||
"🚀 SERP Opportunities",
|
||||
len(ranking_opportunities),
|
||||
help="Keywords with ranking opportunities identified"
|
||||
)
|
||||
|
||||
with col5:
|
||||
recommendations = results.get('recommendations', [])
|
||||
st.metric(
|
||||
"💡 AI Recommendations",
|
||||
len(recommendations),
|
||||
help="AI-generated strategic recommendations"
|
||||
)
|
||||
|
||||
# Analysis timestamp
|
||||
if results.get('analysis_timestamp'):
|
||||
timestamp = datetime.fromisoformat(results['analysis_timestamp'].replace('Z', '+00:00'))
|
||||
st.caption(f"📅 Analysis completed: {timestamp.strftime('%Y-%m-%d %H:%M:%S UTC')}")
|
||||
|
||||
def _render_detailed_analysis(self, results: Dict[str, Any]):
|
||||
"""Render detailed analysis in tabs."""
|
||||
|
||||
# Create main analysis tabs
|
||||
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
|
||||
"🔍 SERP Analysis",
|
||||
"🎯 Keyword Research",
|
||||
"🕷️ Competitor Intelligence",
|
||||
"📊 Content Themes",
|
||||
"🤖 AI Strategic Insights",
|
||||
"📋 Action Plan"
|
||||
])
|
||||
|
||||
with tab1:
|
||||
self._render_serp_analysis(results.get('serp_analysis', {}))
|
||||
|
||||
with tab2:
|
||||
self._render_keyword_research(results.get('keyword_expansion', {}))
|
||||
|
||||
with tab3:
|
||||
self._render_competitor_intelligence(results.get('competitor_content', {}))
|
||||
|
||||
with tab4:
|
||||
self._render_content_themes(results.get('content_themes', {}))
|
||||
|
||||
with tab5:
|
||||
self._render_ai_insights(results.get('ai_insights', {}))
|
||||
|
||||
with tab6:
|
||||
self._render_action_plan(results)
|
||||
|
||||
def _render_serp_analysis(self, serp_data: Dict[str, Any]):
|
||||
"""Render SERP analysis results."""
|
||||
|
||||
st.markdown("### 🔍 Search Engine Results Analysis")
|
||||
|
||||
if not serp_data:
|
||||
st.info("No SERP analysis data available")
|
||||
return
|
||||
|
||||
# Competitor SERP presence
|
||||
if serp_data.get('competitor_presence'):
|
||||
st.markdown("#### 🏆 Competitor SERP Dominance")
|
||||
|
||||
presence_data = serp_data['competitor_presence']
|
||||
presence_df = pd.DataFrame(
|
||||
list(presence_data.items()),
|
||||
columns=['Domain', 'Keywords Ranking']
|
||||
)
|
||||
|
||||
# Display as chart
|
||||
st.bar_chart(presence_df.set_index('Domain'))
|
||||
|
||||
# Top performers
|
||||
st.markdown("**🥇 Top Performing Competitors:**")
|
||||
for domain, count in list(presence_data.items())[:3]:
|
||||
st.write(f"• **{domain}**: Ranking for {count} keywords")
|
||||
|
||||
# Ranking opportunities
|
||||
if serp_data.get('ranking_opportunities'):
|
||||
st.markdown("#### 🚀 Ranking Opportunities")
|
||||
|
||||
opportunities = serp_data['ranking_opportunities']
|
||||
|
||||
if opportunities:
|
||||
opp_df = pd.DataFrame(opportunities)
|
||||
st.dataframe(opp_df, use_container_width=True)
|
||||
|
||||
st.info(f"💡 Found {len(opportunities)} keywords where you're not ranking in top 10!")
|
||||
else:
|
||||
st.success("🎉 You're already ranking well for your target keywords!")
|
||||
|
||||
# SERP features analysis
|
||||
if serp_data.get('keyword_rankings'):
|
||||
st.markdown("#### 🎯 SERP Features Opportunities")
|
||||
|
||||
all_features = []
|
||||
for keyword_data in serp_data['keyword_rankings'].values():
|
||||
all_features.extend(keyword_data.get('serp_features', []))
|
||||
|
||||
if all_features:
|
||||
feature_counts = pd.Series(all_features).value_counts()
|
||||
st.bar_chart(feature_counts)
|
||||
|
||||
st.markdown("**🎯 Focus on these SERP features:**")
|
||||
for feature, count in feature_counts.head(3).items():
|
||||
st.write(f"• **{feature.replace('_', ' ').title()}**: Appears in {count} keyword searches")
|
||||
|
||||
def _render_keyword_research(self, keyword_data: Dict[str, Any]):
|
||||
"""Render keyword research results."""
|
||||
|
||||
st.markdown("### 🎯 Advanced Keyword Research")
|
||||
|
||||
if not keyword_data:
|
||||
st.info("No keyword expansion data available")
|
||||
return
|
||||
|
||||
# Seed vs expanded keywords
|
||||
seed_keywords = keyword_data.get('seed_keywords', [])
|
||||
expanded_keywords = keyword_data.get('expanded_keywords', [])
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.metric("🌱 Seed Keywords", len(seed_keywords))
|
||||
if seed_keywords:
|
||||
for kw in seed_keywords:
|
||||
st.write(f"• {kw}")
|
||||
|
||||
with col2:
|
||||
st.metric("🔍 Expanded Keywords", len(expanded_keywords))
|
||||
st.write(f"**Expansion Factor:** {len(expanded_keywords) / len(seed_keywords) if seed_keywords else 0:.1f}x")
|
||||
|
||||
# Search intent categorization
|
||||
if keyword_data.get('keyword_categories'):
|
||||
st.markdown("#### 🧠 Search Intent Analysis")
|
||||
|
||||
categories = keyword_data['keyword_categories']
|
||||
|
||||
# Create intent distribution chart
|
||||
intent_counts = {intent: len(keywords) for intent, keywords in categories.items() if keywords}
|
||||
|
||||
if intent_counts:
|
||||
intent_df = pd.DataFrame(
|
||||
list(intent_counts.items()),
|
||||
columns=['Search Intent', 'Keywords']
|
||||
)
|
||||
st.bar_chart(intent_df.set_index('Search Intent'))
|
||||
|
||||
# Detailed breakdown
|
||||
for intent, keywords in categories.items():
|
||||
if keywords:
|
||||
with st.expander(f"📂 {intent.title()} Keywords ({len(keywords)})"):
|
||||
for kw in keywords[:20]: # Show first 20
|
||||
st.write(f"• {kw}")
|
||||
|
||||
# Long-tail opportunities
|
||||
if keyword_data.get('long_tail_opportunities'):
|
||||
st.markdown("#### 🎣 Long-tail Keyword Opportunities")
|
||||
|
||||
long_tail = keyword_data['long_tail_opportunities']
|
||||
|
||||
if long_tail:
|
||||
st.info(f"🎯 Found {len(long_tail)} long-tail opportunities with lower competition!")
|
||||
|
||||
# Display in expandable format
|
||||
with st.expander("View Long-tail Keywords"):
|
||||
for i, kw in enumerate(long_tail, 1):
|
||||
st.write(f"{i}. {kw}")
|
||||
else:
|
||||
st.warning("No long-tail opportunities identified")
|
||||
|
||||
def _render_competitor_intelligence(self, competitor_data: Dict[str, Any]):
|
||||
"""Render competitor intelligence results."""
|
||||
|
||||
st.markdown("### 🕷️ Competitive Intelligence")
|
||||
|
||||
if not competitor_data.get('crawl_results'):
|
||||
st.info("No competitor crawl data available")
|
||||
return
|
||||
|
||||
# Crawl summary
|
||||
crawl_results = competitor_data['crawl_results']
|
||||
|
||||
st.markdown("#### 📊 Competitor Content Overview")
|
||||
|
||||
# Create summary table
|
||||
summary_data = []
|
||||
for domain, data in crawl_results.items():
|
||||
summary_data.append({
|
||||
'Competitor': domain,
|
||||
'Pages Crawled': data.get('total_pages', 0),
|
||||
'Avg Content Length': f"{data.get('content_length_stats', {}).get('mean', 0):,.0f} chars",
|
||||
'Success Rate': f"{data.get('status_codes', {}).get(200, 0) / data.get('total_pages', 1) * 100:.1f}%"
|
||||
})
|
||||
|
||||
if summary_data:
|
||||
summary_df = pd.DataFrame(summary_data)
|
||||
st.dataframe(summary_df, use_container_width=True)
|
||||
|
||||
# Page type analysis
|
||||
st.markdown("#### 📄 Content Type Distribution")
|
||||
|
||||
for domain, data in crawl_results.items():
|
||||
page_types = data.get('page_types', {})
|
||||
|
||||
if page_types:
|
||||
with st.expander(f"📊 {domain} Content Types"):
|
||||
|
||||
# Create chart data
|
||||
types_df = pd.DataFrame(
|
||||
list(page_types.items()),
|
||||
columns=['Page Type', 'Count']
|
||||
)
|
||||
|
||||
if not types_df.empty:
|
||||
st.bar_chart(types_df.set_index('Page Type'))
|
||||
|
||||
# Key insights
|
||||
total_pages = sum(page_types.values())
|
||||
if total_pages > 0:
|
||||
blog_ratio = page_types.get('blog_posts', 0) / total_pages * 100
|
||||
product_ratio = page_types.get('product_pages', 0) / total_pages * 100
|
||||
|
||||
st.write("**Content Strategy Insights:**")
|
||||
st.write(f"• Blog content: {blog_ratio:.1f}% of pages")
|
||||
st.write(f"• Product focus: {product_ratio:.1f}% of pages")
|
||||
|
||||
# Content structure insights
|
||||
if competitor_data.get('content_structure'):
|
||||
st.markdown("#### 🏗️ Content Structure Analysis")
|
||||
|
||||
structure_data = competitor_data['content_structure']
|
||||
|
||||
for domain, structure in structure_data.items():
|
||||
with st.expander(f"🔍 {domain} Structure Analysis"):
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.metric("Avg Title Length", f"{structure.get('avg_title_length', 0):.0f} chars")
|
||||
st.metric("H1 Usage", f"{structure.get('h1_usage', 0):.1f}%")
|
||||
|
||||
with col2:
|
||||
st.metric("Avg Meta Desc Length", f"{structure.get('avg_meta_desc_length', 0):.0f} chars")
|
||||
st.metric("Internal Links", f"{structure.get('internal_links_avg', 0):.1f} avg")
|
||||
|
||||
def _render_content_themes(self, theme_data: Dict[str, Any]):
|
||||
"""Render content theme analysis."""
|
||||
|
||||
st.markdown("### 📊 Content Theme Intelligence")
|
||||
|
||||
if not theme_data:
|
||||
st.info("No content theme data available")
|
||||
return
|
||||
|
||||
# Dominant themes
|
||||
if theme_data.get('dominant_themes'):
|
||||
st.markdown("#### 🎯 Dominant Content Themes")
|
||||
|
||||
themes = theme_data['dominant_themes']
|
||||
|
||||
if themes:
|
||||
themes_df = pd.DataFrame(themes)
|
||||
st.dataframe(themes_df, use_container_width=True)
|
||||
|
||||
# Top themes highlight
|
||||
st.markdown("**🔥 Top Content Themes:**")
|
||||
for i, theme in enumerate(themes[:5], 1):
|
||||
word = theme.get('word', theme.get('text', 'Unknown'))
|
||||
freq = theme.get('freq', theme.get('frequency', 0))
|
||||
st.write(f"{i}. **{word}** (appears {freq} times)")
|
||||
|
||||
# Content clusters
|
||||
if theme_data.get('content_clusters'):
|
||||
st.markdown("#### 🗂️ Topic Cluster Analysis")
|
||||
|
||||
clusters = theme_data['content_clusters']
|
||||
|
||||
# Cluster distribution
|
||||
cluster_counts = {name: len(themes) for name, themes in clusters.items() if themes}
|
||||
|
||||
if cluster_counts:
|
||||
cluster_df = pd.DataFrame(
|
||||
list(cluster_counts.items()),
|
||||
columns=['Topic Cluster', 'Theme Count']
|
||||
)
|
||||
st.bar_chart(cluster_df.set_index('Topic Cluster'))
|
||||
|
||||
# Detailed cluster view
|
||||
for cluster_name, themes in clusters.items():
|
||||
if themes:
|
||||
with st.expander(f"📂 {cluster_name.replace('_', ' ').title()} ({len(themes)} themes)"):
|
||||
for theme in themes[:15]: # Show first 15
|
||||
st.write(f"• {theme}")
|
||||
|
||||
# Content gaps and opportunities
|
||||
if theme_data.get('content_opportunities'):
|
||||
st.markdown("#### 🎯 Content Gap Opportunities")
|
||||
|
||||
opportunities = theme_data['content_opportunities']
|
||||
|
||||
if opportunities:
|
||||
for opp in opportunities:
|
||||
st.write(f"🎯 **{opp}**")
|
||||
else:
|
||||
st.info("No specific content opportunities identified in theme analysis")
|
||||
|
||||
def _render_ai_insights(self, ai_data: Dict[str, Any]):
|
||||
"""Render AI-generated strategic insights."""
|
||||
|
||||
st.markdown("### 🤖 AI-Powered Strategic Insights")
|
||||
|
||||
if not ai_data:
|
||||
st.info("No AI insights available")
|
||||
return
|
||||
|
||||
# Strategic recommendations
|
||||
if ai_data.get('recommendations'):
|
||||
st.markdown("#### 🎯 Priority Strategic Recommendations")
|
||||
|
||||
recommendations = ai_data['recommendations']
|
||||
|
||||
for i, rec in enumerate(recommendations[:5], 1):
|
||||
with st.expander(f"🎯 Recommendation {i}"):
|
||||
st.markdown(rec)
|
||||
|
||||
# Competitive positioning
|
||||
if ai_data.get('competitive_positioning'):
|
||||
st.markdown("#### 🏆 Competitive Positioning Insights")
|
||||
st.markdown(ai_data['competitive_positioning'])
|
||||
|
||||
# Content strategy insights
|
||||
if ai_data.get('content_strategy'):
|
||||
st.markdown("#### 📝 Content Strategy Recommendations")
|
||||
st.markdown(ai_data['content_strategy'])
|
||||
|
||||
# Implementation timeline
|
||||
if ai_data.get('implementation_timeline'):
|
||||
st.markdown("#### 📅 Implementation Roadmap")
|
||||
|
||||
timeline = ai_data['implementation_timeline']
|
||||
|
||||
for period, tasks in timeline.items():
|
||||
with st.expander(f"📅 {period.replace('_', ' ').title()} Plan"):
|
||||
for task in tasks:
|
||||
st.write(f"• {task}")
|
||||
|
||||
# Technical SEO opportunities
|
||||
if ai_data.get('technical_opportunities'):
|
||||
st.markdown("#### ⚙️ Technical SEO Opportunities")
|
||||
|
||||
tech_opps = ai_data['technical_opportunities']
|
||||
|
||||
for opp in tech_opps:
|
||||
st.write(f"⚙️ {opp}")
|
||||
|
||||
def _render_action_plan(self, results: Dict[str, Any]):
|
||||
"""Render actionable implementation plan."""
|
||||
|
||||
st.markdown("### 📋 Your Content Gap Action Plan")
|
||||
|
||||
# Quick wins section
|
||||
st.markdown("#### 🚀 Quick Wins (Week 1-2)")
|
||||
|
||||
quick_wins = []
|
||||
|
||||
# SERP opportunities
|
||||
serp_opportunities = results.get('serp_analysis', {}).get('ranking_opportunities', [])
|
||||
if serp_opportunities:
|
||||
quick_wins.append(f"🎯 Target {len(serp_opportunities)} keywords where you're not ranking")
|
||||
|
||||
# Long-tail keywords
|
||||
long_tail = results.get('keyword_expansion', {}).get('long_tail_opportunities', [])
|
||||
if long_tail:
|
||||
quick_wins.append(f"🎣 Create content for {min(5, len(long_tail))} high-potential long-tail keywords")
|
||||
|
||||
# Content themes
|
||||
themes = results.get('content_themes', {}).get('dominant_themes', [])
|
||||
if themes:
|
||||
top_theme = themes[0].get('word', 'top theme') if themes else 'content optimization'
|
||||
quick_wins.append(f"📊 Optimize existing content around '{top_theme}' theme")
|
||||
|
||||
for i, win in enumerate(quick_wins, 1):
|
||||
st.write(f"{i}. {win}")
|
||||
|
||||
# Medium-term strategy
|
||||
st.markdown("#### 📈 Medium-term Strategy (Month 1-3)")
|
||||
|
||||
medium_term = [
|
||||
"🕷️ Conduct regular competitor content audits",
|
||||
"🎯 Develop content calendar based on keyword gaps",
|
||||
"📊 Implement content theme clusters",
|
||||
"🤖 Set up automated SERP monitoring"
|
||||
]
|
||||
|
||||
for i, strategy in enumerate(medium_term, 1):
|
||||
st.write(f"{i}. {strategy}")
|
||||
|
||||
# Long-term vision
|
||||
st.markdown("#### 🎯 Long-term Vision (Quarter 2+)")
|
||||
|
||||
long_term = [
|
||||
"🏆 Establish thought leadership in identified content gaps",
|
||||
"🌐 Build comprehensive content hub around dominant themes",
|
||||
"📈 Scale content production based on proven gaps",
|
||||
"🤝 Develop strategic partnerships for content collaboration"
|
||||
]
|
||||
|
||||
for i, vision in enumerate(long_term, 1):
|
||||
st.write(f"{i}. {vision}")
|
||||
|
||||
# Success metrics
|
||||
st.markdown("#### 📊 Success Metrics to Track")
|
||||
|
||||
metrics = [
|
||||
"🎯 Keyword ranking improvements for target terms",
|
||||
"📈 Organic traffic growth from new content",
|
||||
"🔍 SERP feature acquisitions (featured snippets, etc.)",
|
||||
"🏆 Competitive ranking gains in content themes",
|
||||
"📊 Content engagement metrics and user behavior"
|
||||
]
|
||||
|
||||
for metric in metrics:
|
||||
st.write(f"• {metric}")
|
||||
|
||||
def _render_export_options(self, results: Dict[str, Any]):
|
||||
"""Render export options for analysis results."""
|
||||
|
||||
st.markdown("---")
|
||||
st.markdown("### 📥 Export Analysis Results")
|
||||
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
# JSON export
|
||||
if st.button("📄 Export as JSON", use_container_width=True):
|
||||
json_data = json.dumps(results, indent=2, default=str)
|
||||
|
||||
st.download_button(
|
||||
label="⬇️ Download JSON Report",
|
||||
data=json_data,
|
||||
file_name=f"content_gap_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
|
||||
mime="application/json",
|
||||
use_container_width=True
|
||||
)
|
||||
|
||||
with col2:
|
||||
# CSV export for keywords
|
||||
if st.button("📊 Export Keywords CSV", use_container_width=True):
|
||||
expanded_keywords = results.get('keyword_expansion', {}).get('expanded_keywords', [])
|
||||
|
||||
if expanded_keywords:
|
||||
keywords_df = pd.DataFrame(expanded_keywords, columns=['Keyword'])
|
||||
csv_data = keywords_df.to_csv(index=False)
|
||||
|
||||
st.download_button(
|
||||
label="⬇️ Download Keywords CSV",
|
||||
data=csv_data,
|
||||
file_name=f"discovered_keywords_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
|
||||
mime="text/csv",
|
||||
use_container_width=True
|
||||
)
|
||||
else:
|
||||
st.warning("No keywords available for export")
|
||||
|
||||
with col3:
|
||||
# Summary report
|
||||
if st.button("📋 Generate Summary Report", use_container_width=True):
|
||||
summary = self._generate_summary_report(results)
|
||||
|
||||
st.download_button(
|
||||
label="⬇️ Download Summary Report",
|
||||
data=summary,
|
||||
file_name=f"content_gap_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
|
||||
mime="text/plain",
|
||||
use_container_width=True
|
||||
)
|
||||
|
||||
def _generate_summary_report(self, results: Dict[str, Any]) -> str:
|
||||
"""Generate a text summary report."""
|
||||
|
||||
target_url = results.get('target_url', 'Unknown')
|
||||
timestamp = results.get('analysis_timestamp', datetime.now().isoformat())
|
||||
|
||||
summary = f"""
|
||||
ENHANCED CONTENT GAP ANALYSIS REPORT
|
||||
=====================================
|
||||
|
||||
Target Website: {target_url}
|
||||
Analysis Date: {timestamp}
|
||||
Industry: {results.get('industry', 'General')}
|
||||
|
||||
EXECUTIVE SUMMARY
|
||||
-----------------
|
||||
Keywords Analyzed: {len(results.get('target_keywords', []))}
|
||||
Competitors Analyzed: {len(results.get('competitor_urls', []))}
|
||||
Keywords Discovered: {len(results.get('keyword_expansion', {}).get('expanded_keywords', []))}
|
||||
SERP Opportunities: {len(results.get('serp_analysis', {}).get('ranking_opportunities', []))}
|
||||
|
||||
RANKING OPPORTUNITIES
|
||||
---------------------
|
||||
"""
|
||||
|
||||
# Add ranking opportunities
|
||||
opportunities = results.get('serp_analysis', {}).get('ranking_opportunities', [])
|
||||
for i, opp in enumerate(opportunities[:10], 1):
|
||||
summary += f"{i}. {opp.get('keyword', 'Unknown keyword')}\n"
|
||||
|
||||
# Add top keywords discovered
|
||||
summary += "\nTOP DISCOVERED KEYWORDS\n-----------------------\n"
|
||||
expanded_keywords = results.get('keyword_expansion', {}).get('expanded_keywords', [])
|
||||
for i, kw in enumerate(expanded_keywords[:20], 1):
|
||||
summary += f"{i}. {kw}\n"
|
||||
|
||||
# Add AI recommendations
|
||||
recommendations = results.get('ai_insights', {}).get('recommendations', [])
|
||||
if recommendations:
|
||||
summary += "\nAI STRATEGIC RECOMMENDATIONS\n----------------------------\n"
|
||||
for i, rec in enumerate(recommendations[:5], 1):
|
||||
summary += f"{i}. {rec}\n"
|
||||
|
||||
summary += f"\n\nReport generated by ALwrity Enhanced Content Gap Analysis\nTimestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
||||
|
||||
return summary
|
||||
|
||||
# Render function for integration with main dashboard
|
||||
def render_enhanced_content_gap_analysis():
|
||||
"""Render the enhanced content gap analysis UI."""
|
||||
ui = EnhancedContentGapAnalysisUI()
|
||||
ui.render()
|
||||
@@ -1,649 +0,0 @@
|
||||
"""
|
||||
Keyword researcher for content gap analysis.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.data_collector import DataCollector
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.content_parser import ContentParser
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.ai_processor import AIProcessor, ProgressTracker
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
from lib.ai_seo_tools.content_title_generator import ai_title_generator
|
||||
from lib.ai_seo_tools.meta_desc_generator import metadesc_generator_main
|
||||
from lib.ai_seo_tools.seo_structured_data import ai_structured_data
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/keyword_researcher.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
class KeywordResearcher:
|
||||
"""Researches and analyzes keywords for content strategy."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the keyword researcher."""
|
||||
self.ai_processor = AIProcessor()
|
||||
self.progress = ProgressTracker()
|
||||
|
||||
# Define analysis stages
|
||||
self.stages = {
|
||||
'keyword_analysis': {
|
||||
'name': 'Keyword Analysis',
|
||||
'steps': [
|
||||
'Initializing keyword research',
|
||||
'Analyzing keyword trends',
|
||||
'Evaluating search intent',
|
||||
'Identifying opportunities',
|
||||
'Generating keyword insights'
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
def analyze(self, industry: str, url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze keywords for content strategy.
|
||||
|
||||
Args:
|
||||
industry: Industry category
|
||||
url: Target website URL
|
||||
|
||||
Returns:
|
||||
Dictionary containing analysis results
|
||||
"""
|
||||
try:
|
||||
self.progress.start_stage('keyword_analysis')
|
||||
self.progress.next_step()
|
||||
|
||||
# Analyze keyword trends
|
||||
trend_analysis = self._analyze_keyword_trends(industry)
|
||||
self.progress.next_step()
|
||||
|
||||
# Evaluate search intent
|
||||
intent_analysis = self._evaluate_search_intent(trend_analysis)
|
||||
self.progress.next_step()
|
||||
|
||||
# Identify opportunities
|
||||
opportunities = self._identify_opportunities(trend_analysis, intent_analysis)
|
||||
self.progress.next_step()
|
||||
|
||||
# Generate insights
|
||||
insights = self._generate_keyword_insights(trend_analysis, intent_analysis, opportunities)
|
||||
self.progress.next_step()
|
||||
|
||||
self.progress.complete_stage()
|
||||
|
||||
return {
|
||||
'trend_analysis': trend_analysis,
|
||||
'intent_analysis': intent_analysis,
|
||||
'opportunities': opportunities,
|
||||
'insights': insights
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
if self.progress.current_stage:
|
||||
self.progress.update_progress(0, f"Error in {self.progress.stages[self.progress.current_stage]['name']}: {str(e)}")
|
||||
st.error(f"Error analyzing keywords: {str(e)}")
|
||||
return {
|
||||
'error': str(e),
|
||||
'trend_analysis': {},
|
||||
'intent_analysis': {},
|
||||
'opportunities': [],
|
||||
'insights': []
|
||||
}
|
||||
|
||||
def _analyze_keyword_trends(self, industry: str) -> Dict[str, Any]:
|
||||
"""Analyze keyword trends."""
|
||||
try:
|
||||
# Get AI analysis for keyword trends
|
||||
analysis = self.ai_processor.analyze_keywords({
|
||||
'industry': industry,
|
||||
'keywords': {} # Keywords will be fetched by AI processor
|
||||
})
|
||||
|
||||
return {
|
||||
'trends': analysis.get('keyword_trends', {}),
|
||||
'search_intent': analysis.get('search_intent', {}),
|
||||
'keyword_insights': analysis.get('keyword_insights', {})
|
||||
}
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing keyword trends: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _evaluate_search_intent(self, trend_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Evaluate search intent."""
|
||||
try:
|
||||
intent_analysis = {
|
||||
'informational': [],
|
||||
'transactional': [],
|
||||
'navigational': [],
|
||||
'commercial': []
|
||||
}
|
||||
|
||||
# Categorize keywords by intent
|
||||
for keyword, data in trend_analysis.get('trends', {}).items():
|
||||
intent = data.get('intent', 'informational')
|
||||
if intent in intent_analysis:
|
||||
intent_analysis[intent].append({
|
||||
'keyword': keyword,
|
||||
'volume': data.get('volume', 0),
|
||||
'difficulty': data.get('difficulty', 0)
|
||||
})
|
||||
|
||||
return intent_analysis
|
||||
except Exception as e:
|
||||
st.error(f"Error evaluating search intent: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _identify_opportunities(self, trend_analysis: Dict[str, Any], intent_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Identify keyword opportunities."""
|
||||
try:
|
||||
opportunities = []
|
||||
|
||||
# Analyze each intent category
|
||||
for intent, keywords in intent_analysis.items():
|
||||
for keyword_data in keywords:
|
||||
# Calculate opportunity score
|
||||
volume = keyword_data.get('volume', 0)
|
||||
difficulty = keyword_data.get('difficulty', 0)
|
||||
opportunity_score = volume * (1 - difficulty/100)
|
||||
|
||||
if opportunity_score > 50: # Threshold for good opportunities
|
||||
opportunities.append({
|
||||
'keyword': keyword_data['keyword'],
|
||||
'intent': intent,
|
||||
'volume': volume,
|
||||
'difficulty': difficulty,
|
||||
'opportunity_score': opportunity_score
|
||||
})
|
||||
|
||||
# Sort by opportunity score
|
||||
opportunities.sort(key=lambda x: x['opportunity_score'], reverse=True)
|
||||
|
||||
return opportunities
|
||||
except Exception as e:
|
||||
st.error(f"Error identifying opportunities: {str(e)}")
|
||||
return []
|
||||
|
||||
def _generate_keyword_insights(self, trend_analysis: Dict[str, Any], intent_analysis: Dict[str, Any], opportunities: List[Dict[str, Any]]) -> List[str]:
|
||||
"""Generate keyword insights."""
|
||||
try:
|
||||
insights = []
|
||||
|
||||
# Trend insights
|
||||
if trend_analysis.get('trends'):
|
||||
insights.append(f"Analyzed {len(trend_analysis['trends'])} keywords for trends")
|
||||
|
||||
# Intent insights
|
||||
for intent, keywords in intent_analysis.items():
|
||||
if keywords:
|
||||
insights.append(f"Found {len(keywords)} {intent} keywords")
|
||||
|
||||
# Opportunity insights
|
||||
if opportunities:
|
||||
insights.append(f"Identified {len(opportunities)} high-potential keyword opportunities")
|
||||
|
||||
return insights
|
||||
except Exception as e:
|
||||
st.error(f"Error generating keyword insights: {str(e)}")
|
||||
return []
|
||||
|
||||
def _generate_titles(self, industry: str) -> dict:
|
||||
"""
|
||||
Generate keyword-based titles using the title generator.
|
||||
|
||||
Args:
|
||||
industry (str): The industry to generate titles for
|
||||
|
||||
Returns:
|
||||
dict: Generated titles and patterns
|
||||
"""
|
||||
return ai_title_generator(industry)
|
||||
|
||||
def _analyze_meta_descriptions(self, industry: str) -> dict:
|
||||
"""
|
||||
Analyze meta descriptions for keyword usage.
|
||||
|
||||
Args:
|
||||
industry (str): The industry to analyze
|
||||
|
||||
Returns:
|
||||
dict: Meta description analysis results
|
||||
"""
|
||||
return metadesc_generator_main(industry)
|
||||
|
||||
def _analyze_structured_data(self, industry: str) -> dict:
|
||||
"""
|
||||
Analyze structured data implementation.
|
||||
|
||||
Args:
|
||||
industry (str): The industry to analyze
|
||||
|
||||
Returns:
|
||||
dict: Structured data analysis results
|
||||
"""
|
||||
return ai_structured_data(industry)
|
||||
|
||||
def _extract_keywords(self, titles: dict, meta_analysis: dict) -> list:
|
||||
"""
|
||||
Extract keywords from titles and meta descriptions.
|
||||
|
||||
Args:
|
||||
titles (dict): Generated titles
|
||||
meta_analysis (dict): Meta description analysis
|
||||
|
||||
Returns:
|
||||
list: Extracted keywords with metrics
|
||||
"""
|
||||
prompt = f"""
|
||||
As an SEO expert, analyze the following content and extract relevant keywords with their metrics:
|
||||
|
||||
Titles: {titles}
|
||||
Meta Descriptions: {meta_analysis}
|
||||
|
||||
Please provide a JSON response with the following structure:
|
||||
{{
|
||||
"keywords": [
|
||||
{{
|
||||
"keyword": "string",
|
||||
"search_volume": "number",
|
||||
"difficulty": "number",
|
||||
"relevance_score": "number",
|
||||
"content_type": "string"
|
||||
}}
|
||||
],
|
||||
"summary": {{
|
||||
"total_keywords": "number",
|
||||
"high_opportunity_keywords": "number",
|
||||
"recommended_focus_areas": ["string"]
|
||||
}}
|
||||
}}
|
||||
|
||||
Focus on:
|
||||
1. Primary keywords and their variations
|
||||
2. Long-tail keywords
|
||||
3. Industry-specific terminology
|
||||
4. Search volume and difficulty metrics
|
||||
5. Content type recommendations
|
||||
"""
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt, json_struct={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"keywords": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"keyword": {"type": "string"},
|
||||
"search_volume": {"type": "number"},
|
||||
"difficulty": {"type": "number"},
|
||||
"relevance_score": {"type": "number"},
|
||||
"content_type": {"type": "string"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"summary": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"total_keywords": {"type": "number"},
|
||||
"high_opportunity_keywords": {"type": "number"},
|
||||
"recommended_focus_areas": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
return response
|
||||
except Exception as e:
|
||||
st.error(f"Error extracting keywords: {e}")
|
||||
return []
|
||||
|
||||
def _analyze_search_intent(self, ai_insights: dict) -> dict:
|
||||
"""
|
||||
Analyze search intent from AI insights.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
dict: Search intent analysis
|
||||
"""
|
||||
prompt = f"""
|
||||
As an SEO expert, analyze the following content insights and determine the search intent:
|
||||
|
||||
Content Insights: {ai_insights}
|
||||
|
||||
Please provide a JSON response with the following structure:
|
||||
{{
|
||||
"informational": [
|
||||
{{
|
||||
"keyword": "string",
|
||||
"intent_type": "string",
|
||||
"content_suggestions": ["string"]
|
||||
}}
|
||||
],
|
||||
"transactional": [
|
||||
{{
|
||||
"keyword": "string",
|
||||
"intent_type": "string",
|
||||
"content_suggestions": ["string"]
|
||||
}}
|
||||
],
|
||||
"navigational": [
|
||||
{{
|
||||
"keyword": "string",
|
||||
"intent_type": "string",
|
||||
"content_suggestions": ["string"]
|
||||
}}
|
||||
],
|
||||
"summary": {{
|
||||
"dominant_intent": "string",
|
||||
"content_strategy_recommendations": ["string"]
|
||||
}}
|
||||
}}
|
||||
|
||||
Focus on:
|
||||
1. Identifying primary search intent for each keyword
|
||||
2. Suggesting appropriate content types
|
||||
3. Providing content strategy recommendations
|
||||
4. Analyzing user behavior patterns
|
||||
"""
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt, json_struct={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"informational": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"keyword": {"type": "string"},
|
||||
"intent_type": {"type": "string"},
|
||||
"content_suggestions": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"transactional": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"keyword": {"type": "string"},
|
||||
"intent_type": {"type": "string"},
|
||||
"content_suggestions": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"navigational": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"keyword": {"type": "string"},
|
||||
"intent_type": {"type": "string"},
|
||||
"content_suggestions": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"summary": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dominant_intent": {"type": "string"},
|
||||
"content_strategy_recommendations": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
return response
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing search intent: {e}")
|
||||
return {
|
||||
'informational': [],
|
||||
'transactional': [],
|
||||
'navigational': []
|
||||
}
|
||||
|
||||
def _suggest_content_formats(self, ai_insights: dict) -> list:
|
||||
"""
|
||||
Suggest content formats based on AI insights.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
list: Suggested content formats
|
||||
"""
|
||||
prompt = f"""
|
||||
As a content strategy expert, analyze the following insights and suggest appropriate content formats:
|
||||
|
||||
AI Insights: {ai_insights}
|
||||
|
||||
Please provide a JSON response with the following structure:
|
||||
{{
|
||||
"content_formats": [
|
||||
{{
|
||||
"format": "string",
|
||||
"description": "string",
|
||||
"use_cases": ["string"],
|
||||
"recommended_topics": ["string"],
|
||||
"estimated_impact": "string"
|
||||
}}
|
||||
],
|
||||
"format_strategy": {{
|
||||
"primary_formats": ["string"],
|
||||
"secondary_formats": ["string"],
|
||||
"implementation_priority": ["string"]
|
||||
}}
|
||||
}}
|
||||
|
||||
Focus on:
|
||||
1. Identifying the most effective content formats
|
||||
2. Matching formats to user intent
|
||||
3. Suggesting specific use cases
|
||||
4. Providing implementation guidance
|
||||
"""
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt, json_struct={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content_formats": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"format": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"use_cases": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"recommended_topics": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"estimated_impact": {"type": "string"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"format_strategy": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"primary_formats": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"secondary_formats": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"implementation_priority": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
return response
|
||||
except Exception as e:
|
||||
st.error(f"Error suggesting content formats: {e}")
|
||||
return []
|
||||
|
||||
def _create_topic_clusters(self, ai_insights: dict) -> dict:
|
||||
"""
|
||||
Create topic clusters from AI insights.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
dict: Topic clusters and relationships
|
||||
"""
|
||||
prompt = f"""
|
||||
As a content organization expert, analyze the following insights and create topic clusters:
|
||||
|
||||
AI Insights: {ai_insights}
|
||||
|
||||
Please provide a JSON response with the following structure:
|
||||
{{
|
||||
"clusters": [
|
||||
{{
|
||||
"cluster_name": "string",
|
||||
"main_topics": ["string"],
|
||||
"subtopics": ["string"],
|
||||
"related_keywords": ["string"],
|
||||
"content_opportunities": ["string"]
|
||||
}}
|
||||
],
|
||||
"relationships": {{
|
||||
"cluster_connections": [
|
||||
{{
|
||||
"source": "string",
|
||||
"target": "string",
|
||||
"relationship_type": "string",
|
||||
"strength": "number"
|
||||
}}
|
||||
],
|
||||
"content_hierarchy": {{
|
||||
"primary_topics": ["string"],
|
||||
"secondary_topics": ["string"],
|
||||
"tertiary_topics": ["string"]
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
|
||||
Focus on:
|
||||
1. Identifying main topic clusters
|
||||
2. Organizing subtopics and related keywords
|
||||
3. Mapping relationships between clusters
|
||||
4. Suggesting content opportunities
|
||||
"""
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt, json_struct={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"clusters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cluster_name": {"type": "string"},
|
||||
"main_topics": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"subtopics": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"related_keywords": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"content_opportunities": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"relationships": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cluster_connections": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"source": {"type": "string"},
|
||||
"target": {"type": "string"},
|
||||
"relationship_type": {"type": "string"},
|
||||
"strength": {"type": "number"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"content_hierarchy": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"primary_topics": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"secondary_topics": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"tertiary_topics": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
return response
|
||||
except Exception as e:
|
||||
st.error(f"Error creating topic clusters: {e}")
|
||||
return {
|
||||
'clusters': [],
|
||||
'relationships': {}
|
||||
}
|
||||
@@ -1,361 +0,0 @@
|
||||
"""
|
||||
Main module for content gap analysis.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from .competitor_analyzer import CompetitorAnalyzer
|
||||
from .keyword_researcher import KeywordResearcher
|
||||
from .recommendation_engine import RecommendationEngine
|
||||
from .utils.ai_processor import AIProcessor, ProgressTracker
|
||||
from .utils.storage import ContentGapAnalysisStorage
|
||||
from datetime import datetime
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
from .utils.content_parser import ContentParser
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/content_gap_analysis.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
class ContentGapAnalysis:
|
||||
"""Main class for content gap analysis."""
|
||||
|
||||
def __init__(self, db_session=None):
|
||||
"""Initialize the content gap analysis components."""
|
||||
self.website_analyzer = WebsiteAnalyzer()
|
||||
self.competitor_analyzer = CompetitorAnalyzer()
|
||||
self.keyword_researcher = KeywordResearcher()
|
||||
self.recommendation_engine = RecommendationEngine()
|
||||
self.ai_processor = AIProcessor()
|
||||
self.progress = ProgressTracker()
|
||||
self.storage = ContentGapAnalysisStorage(db_session) if db_session else None
|
||||
|
||||
# Define analysis phases
|
||||
self.phases = {
|
||||
'website_analysis': {
|
||||
'name': 'Website Analysis',
|
||||
'steps': [
|
||||
'Initializing website analysis',
|
||||
'Analyzing website content',
|
||||
'Evaluating SEO elements',
|
||||
'Generating website insights'
|
||||
]
|
||||
},
|
||||
'competitor_analysis': {
|
||||
'name': 'Competitor Analysis',
|
||||
'steps': [
|
||||
'Initializing competitor analysis',
|
||||
'Analyzing competitor content',
|
||||
'Comparing market position',
|
||||
'Generating competitive insights'
|
||||
]
|
||||
},
|
||||
'keyword_analysis': {
|
||||
'name': 'Keyword Analysis',
|
||||
'steps': [
|
||||
'Initializing keyword research',
|
||||
'Analyzing keyword trends',
|
||||
'Evaluating search intent',
|
||||
'Generating keyword insights'
|
||||
]
|
||||
},
|
||||
'recommendation_generation': {
|
||||
'name': 'Recommendation Generation',
|
||||
'steps': [
|
||||
'Initializing recommendation engine',
|
||||
'Analyzing content gaps',
|
||||
'Generating recommendations',
|
||||
'Creating implementation plan'
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("ContentGapAnalysis initialized")
|
||||
|
||||
def analyze(self, url: str, industry: str, competitor_urls: Optional[List[str]] = None, user_id: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Run the complete content gap analysis workflow.
|
||||
|
||||
Args:
|
||||
url: Target website URL
|
||||
industry: Industry category
|
||||
competitor_urls: Optional list of competitor URLs
|
||||
user_id: Optional user ID for storing results
|
||||
|
||||
Returns:
|
||||
Dictionary containing analysis results
|
||||
"""
|
||||
try:
|
||||
results = {}
|
||||
start_time = datetime.utcnow()
|
||||
|
||||
# Phase 1: Website Analysis
|
||||
self.progress.start_stage('website_analysis')
|
||||
self.progress.next_step()
|
||||
|
||||
website_analysis = self.website_analyzer.analyze(url)
|
||||
results['website'] = website_analysis
|
||||
|
||||
self.progress.next_step()
|
||||
self.progress.complete_stage()
|
||||
|
||||
# Phase 2: Competitor Analysis
|
||||
if competitor_urls:
|
||||
self.progress.start_stage('competitor_analysis')
|
||||
self.progress.next_step()
|
||||
|
||||
competitor_analysis = self.competitor_analyzer.analyze(competitor_urls, industry)
|
||||
results['competitors'] = competitor_analysis
|
||||
|
||||
self.progress.next_step()
|
||||
self.progress.complete_stage()
|
||||
|
||||
# Phase 3: Keyword Analysis
|
||||
self.progress.start_stage('keyword_analysis')
|
||||
self.progress.next_step()
|
||||
|
||||
keyword_analysis = self.keyword_researcher.analyze(industry, url)
|
||||
results['keywords'] = keyword_analysis
|
||||
|
||||
self.progress.next_step()
|
||||
self.progress.complete_stage()
|
||||
|
||||
# Phase 4: Recommendation Generation
|
||||
self.progress.start_stage('recommendation_generation')
|
||||
self.progress.next_step()
|
||||
|
||||
recommendations = self.recommendation_engine.generate_recommendations(
|
||||
website_analysis,
|
||||
competitor_analysis if competitor_urls else None,
|
||||
keyword_analysis
|
||||
)
|
||||
results['recommendations'] = recommendations
|
||||
|
||||
self.progress.next_step()
|
||||
self.progress.complete_stage()
|
||||
|
||||
# Calculate analysis duration
|
||||
end_time = datetime.utcnow()
|
||||
results['duration'] = (end_time - start_time).total_seconds()
|
||||
|
||||
# Store results if user_id is provided and storage is available
|
||||
if user_id and self.storage:
|
||||
analysis_id = self.storage.save_analysis(user_id, url, industry, results)
|
||||
if analysis_id:
|
||||
results['analysis_id'] = analysis_id
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
if self.progress.current_stage:
|
||||
self.progress.update_progress(0, f"Error in {self.progress.stages[self.progress.current_stage]['name']}: {str(e)}")
|
||||
st.error(f"Error in content gap analysis: {str(e)}")
|
||||
return {
|
||||
'error': str(e),
|
||||
'website': {},
|
||||
'competitors': [],
|
||||
'keywords': {},
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
def get_analysis(self, analysis_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve stored analysis results.
|
||||
|
||||
Args:
|
||||
analysis_id: Analysis ID
|
||||
|
||||
Returns:
|
||||
Dictionary containing analysis results if found, None otherwise
|
||||
"""
|
||||
if not self.storage:
|
||||
st.error("Storage not initialized")
|
||||
return None
|
||||
return self.storage.get_analysis(analysis_id)
|
||||
|
||||
def get_user_analyses(self, user_id: int) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all analyses for a user.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
|
||||
Returns:
|
||||
List of analysis summaries
|
||||
"""
|
||||
if not self.storage:
|
||||
st.error("Storage not initialized")
|
||||
return []
|
||||
return self.storage.get_user_analyses(user_id)
|
||||
|
||||
def update_recommendation_status(self, recommendation_id: int, status: str) -> bool:
|
||||
"""
|
||||
Update the status of a recommendation.
|
||||
|
||||
Args:
|
||||
recommendation_id: Recommendation ID
|
||||
status: New status
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
if not self.storage:
|
||||
st.error("Storage not initialized")
|
||||
return False
|
||||
return self.storage.update_recommendation_status(recommendation_id, status)
|
||||
|
||||
def delete_analysis(self, analysis_id: int) -> bool:
|
||||
"""
|
||||
Delete an analysis and all related data.
|
||||
|
||||
Args:
|
||||
analysis_id: Analysis ID
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
if not self.storage:
|
||||
st.error("Storage not initialized")
|
||||
return False
|
||||
return self.storage.delete_analysis(analysis_id)
|
||||
|
||||
def get_analysis_summary(self, results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a summary of the analysis results.
|
||||
|
||||
Args:
|
||||
results: Dictionary containing analysis results
|
||||
|
||||
Returns:
|
||||
Dictionary containing summary metrics and insights
|
||||
"""
|
||||
try:
|
||||
self.progress.start_stage('summary_generation')
|
||||
self.progress.next_step()
|
||||
|
||||
summary = {
|
||||
'website_metrics': self._summarize_website_metrics(results.get('website', {})),
|
||||
'competitor_insights': self._summarize_competitor_insights(results.get('competitors', {})),
|
||||
'keyword_opportunities': self._summarize_keyword_opportunities(results.get('keywords', {})),
|
||||
'recommendation_highlights': self._summarize_recommendations(results.get('recommendations', {})),
|
||||
'ai_insights': results.get('ai_insights', {})
|
||||
}
|
||||
|
||||
self.progress.complete_stage()
|
||||
return summary
|
||||
|
||||
except Exception as e:
|
||||
if self.progress.current_stage:
|
||||
self.progress.update_progress(0, f"Error generating summary: {str(e)}")
|
||||
st.error(f"Error generating analysis summary: {str(e)}")
|
||||
return {
|
||||
'error': str(e),
|
||||
'website_metrics': {},
|
||||
'competitor_insights': {},
|
||||
'keyword_opportunities': {},
|
||||
'recommendation_highlights': {},
|
||||
'ai_insights': {}
|
||||
}
|
||||
|
||||
def export_results(self, results: Dict[str, Any], format: str = 'json') -> str:
|
||||
"""
|
||||
Export analysis results in the specified format.
|
||||
|
||||
Args:
|
||||
results: Dictionary containing analysis results
|
||||
format: Export format ('json' or 'csv')
|
||||
|
||||
Returns:
|
||||
String containing exported results
|
||||
"""
|
||||
try:
|
||||
self.progress.start_stage('export')
|
||||
self.progress.next_step()
|
||||
|
||||
if format.lower() == 'json':
|
||||
import json
|
||||
exported = json.dumps(results, indent=2)
|
||||
elif format.lower() == 'csv':
|
||||
import pandas as pd
|
||||
# Convert results to DataFrame and then to CSV
|
||||
df = pd.DataFrame(results)
|
||||
exported = df.to_csv(index=False)
|
||||
else:
|
||||
raise ValueError(f"Unsupported export format: {format}")
|
||||
|
||||
self.progress.complete_stage()
|
||||
return exported
|
||||
|
||||
except Exception as e:
|
||||
if self.progress.current_stage:
|
||||
self.progress.update_progress(0, f"Error exporting results: {str(e)}")
|
||||
st.error(f"Error exporting results: {str(e)}")
|
||||
return str(e)
|
||||
|
||||
def _summarize_website_metrics(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate summary of website metrics."""
|
||||
try:
|
||||
return {
|
||||
'content_score': website_data.get('content_score', 0),
|
||||
'seo_score': website_data.get('seo_score', 0),
|
||||
'structure_score': website_data.get('structure_score', 0),
|
||||
'key_insights': website_data.get('insights', [])[:5] # Top 5 insights
|
||||
}
|
||||
except Exception as e:
|
||||
st.error(f"Error summarizing website metrics: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _summarize_competitor_insights(self, competitor_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate summary of competitor insights."""
|
||||
try:
|
||||
return {
|
||||
'market_position': competitor_data.get('market_position', {}),
|
||||
'content_gaps': competitor_data.get('content_gaps', [])[:5], # Top 5 gaps
|
||||
'competitive_advantages': competitor_data.get('advantages', [])[:5] # Top 5 advantages
|
||||
}
|
||||
except Exception as e:
|
||||
st.error(f"Error summarizing competitor insights: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _summarize_keyword_opportunities(self, keyword_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate summary of keyword opportunities."""
|
||||
try:
|
||||
return {
|
||||
'top_keywords': keyword_data.get('top_keywords', [])[:10], # Top 10 keywords
|
||||
'search_intent': keyword_data.get('search_intent', {}),
|
||||
'opportunities': keyword_data.get('opportunities', [])[:5] # Top 5 opportunities
|
||||
}
|
||||
except Exception as e:
|
||||
st.error(f"Error summarizing keyword opportunities: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _summarize_recommendations(self, recommendation_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate summary of recommendations."""
|
||||
try:
|
||||
return {
|
||||
'priority_recommendations': recommendation_data.get('priority_recommendations', [])[:5], # Top 5 recommendations
|
||||
'implementation_timeline': recommendation_data.get('timeline', {}),
|
||||
'expected_impact': recommendation_data.get('impact', {})
|
||||
}
|
||||
except Exception as e:
|
||||
st.error(f"Error summarizing recommendations: {str(e)}")
|
||||
return {}
|
||||
@@ -1,41 +0,0 @@
|
||||
"""
|
||||
Navigation component for Content Gap Analysis tool.
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
|
||||
def show_content_gap_analysis_nav():
|
||||
"""Show navigation for Content Gap Analysis tool."""
|
||||
st.sidebar.title("Content Gap Analysis")
|
||||
st.sidebar.markdown("""
|
||||
Analyze your content strategy, identify gaps, and get AI-powered recommendations.
|
||||
""")
|
||||
|
||||
# Navigation options
|
||||
nav_option = st.sidebar.radio(
|
||||
"Select Analysis Type",
|
||||
["Website Analysis", "Competitor Analysis", "Keyword Research", "Recommendations"]
|
||||
)
|
||||
|
||||
# Tool description
|
||||
st.sidebar.markdown("""
|
||||
### Features
|
||||
- Website content analysis
|
||||
- Competitor content comparison
|
||||
- Keyword research and trends
|
||||
- AI-powered recommendations
|
||||
- Content gap identification
|
||||
- Implementation timeline
|
||||
""")
|
||||
|
||||
# Help section
|
||||
with st.sidebar.expander("How to Use"):
|
||||
st.markdown("""
|
||||
1. Start with Website Analysis
|
||||
2. Add competitor URLs
|
||||
3. Research keywords
|
||||
4. Get recommendations
|
||||
5. Export results
|
||||
""")
|
||||
|
||||
return nav_option
|
||||
@@ -1,440 +0,0 @@
|
||||
"""
|
||||
Recommendation engine for content gap analysis.
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.data_collector import DataCollector
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.content_parser import ContentParser
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.ai_processor import AIProcessor, ProgressTracker
|
||||
from lib.ai_seo_tools.content_title_generator import ai_title_generator
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/recommendation_engine.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
class RecommendationEngine:
|
||||
"""
|
||||
Generates content recommendations based on analysis results.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the recommendation engine with required components."""
|
||||
self.ai_processor = AIProcessor()
|
||||
self.progress = ProgressTracker()
|
||||
|
||||
# Define analysis stages
|
||||
self.stages = {
|
||||
'recommendation_generation': {
|
||||
'name': 'Recommendation Generation',
|
||||
'steps': [
|
||||
'Initializing recommendation engine',
|
||||
'Analyzing content gaps',
|
||||
'Evaluating opportunities',
|
||||
'Generating recommendations',
|
||||
'Creating implementation plan'
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
def generate_recommendations(self, website_analysis: Dict[str, Any], competitor_analysis: Optional[Dict[str, Any]], keyword_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate content recommendations.
|
||||
|
||||
Args:
|
||||
website_analysis: Website analysis results
|
||||
competitor_analysis: Optional competitor analysis results
|
||||
keyword_analysis: Keyword analysis results
|
||||
|
||||
Returns:
|
||||
Dictionary containing recommendations
|
||||
"""
|
||||
try:
|
||||
self.progress.start_stage('recommendation_generation')
|
||||
self.progress.next_step()
|
||||
|
||||
# Analyze content gaps
|
||||
content_gaps = self._analyze_content_gaps(website_analysis, competitor_analysis, keyword_analysis)
|
||||
self.progress.next_step()
|
||||
|
||||
# Evaluate opportunities
|
||||
opportunities = self._evaluate_opportunities(content_gaps, keyword_analysis)
|
||||
self.progress.next_step()
|
||||
|
||||
# Generate recommendations
|
||||
recommendations = self._generate_recommendations(content_gaps, opportunities)
|
||||
self.progress.next_step()
|
||||
|
||||
# Create implementation plan
|
||||
implementation_plan = self._create_implementation_plan(recommendations)
|
||||
self.progress.next_step()
|
||||
|
||||
self.progress.complete_stage()
|
||||
|
||||
return {
|
||||
'content_gaps': content_gaps,
|
||||
'opportunities': opportunities,
|
||||
'recommendations': recommendations,
|
||||
'implementation_plan': implementation_plan
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
if self.progress.current_stage:
|
||||
self.progress.update_progress(0, f"Error in {self.progress.stages[self.progress.current_stage]['name']}: {str(e)}")
|
||||
st.error(f"Error generating recommendations: {str(e)}")
|
||||
return {
|
||||
'error': str(e),
|
||||
'content_gaps': [],
|
||||
'opportunities': [],
|
||||
'recommendations': [],
|
||||
'implementation_plan': {}
|
||||
}
|
||||
|
||||
def _analyze_content_gaps(self, website_analysis: Dict[str, Any], competitor_analysis: Optional[Dict[str, Any]], keyword_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Analyze content gaps."""
|
||||
try:
|
||||
content_gaps = []
|
||||
|
||||
# Analyze website content gaps
|
||||
website_gaps = self._analyze_website_gaps(website_analysis)
|
||||
content_gaps.extend(website_gaps)
|
||||
|
||||
# Analyze competitor gaps if available
|
||||
if competitor_analysis:
|
||||
competitor_gaps = self._analyze_competitor_gaps(competitor_analysis)
|
||||
content_gaps.extend(competitor_gaps)
|
||||
|
||||
# Analyze keyword gaps
|
||||
keyword_gaps = self._analyze_keyword_gaps(keyword_analysis)
|
||||
content_gaps.extend(keyword_gaps)
|
||||
|
||||
return content_gaps
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing content gaps: {str(e)}")
|
||||
return []
|
||||
|
||||
def _analyze_website_gaps(self, website_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Analyze website content gaps."""
|
||||
try:
|
||||
gaps = []
|
||||
|
||||
# Check content quality
|
||||
quality_metrics = website_analysis.get('quality_metrics', {})
|
||||
if quality_metrics.get('readability_score', 0) < 70:
|
||||
gaps.append({
|
||||
'type': 'content_quality',
|
||||
'issue': 'Low readability score',
|
||||
'score': quality_metrics.get('readability_score', 0),
|
||||
'recommendation': 'Improve content readability'
|
||||
})
|
||||
|
||||
# Check SEO elements
|
||||
seo_metrics = website_analysis.get('seo_metrics', {})
|
||||
if seo_metrics.get('seo_score', 0) < 70:
|
||||
gaps.append({
|
||||
'type': 'seo',
|
||||
'issue': 'Low SEO score',
|
||||
'score': seo_metrics.get('seo_score', 0),
|
||||
'recommendation': 'Enhance SEO optimization'
|
||||
})
|
||||
|
||||
return gaps
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing website gaps: {str(e)}")
|
||||
return []
|
||||
|
||||
def _analyze_competitor_gaps(self, competitor_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Analyze competitor content gaps."""
|
||||
try:
|
||||
gaps = []
|
||||
|
||||
# Check content gaps
|
||||
content_gaps = competitor_analysis.get('content_gaps', [])
|
||||
for gap in content_gaps:
|
||||
gaps.append({
|
||||
'type': 'competitor',
|
||||
'issue': f"Missing topic: {', '.join(gap.get('missing_topics', []))}",
|
||||
'recommendation': 'Create content for missing topics'
|
||||
})
|
||||
|
||||
return gaps
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing competitor gaps: {str(e)}")
|
||||
return []
|
||||
|
||||
def _analyze_keyword_gaps(self, keyword_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Analyze keyword gaps."""
|
||||
try:
|
||||
gaps = []
|
||||
|
||||
# Check keyword opportunities
|
||||
opportunities = keyword_analysis.get('opportunities', [])
|
||||
for opportunity in opportunities:
|
||||
gaps.append({
|
||||
'type': 'keyword',
|
||||
'issue': f"Keyword opportunity: {opportunity.get('keyword')}",
|
||||
'volume': opportunity.get('volume', 0),
|
||||
'difficulty': opportunity.get('difficulty', 0),
|
||||
'recommendation': f"Target keyword: {opportunity.get('keyword')}"
|
||||
})
|
||||
|
||||
return gaps
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing keyword gaps: {str(e)}")
|
||||
return []
|
||||
|
||||
def _evaluate_opportunities(self, content_gaps: List[Dict[str, Any]], keyword_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Evaluate content opportunities."""
|
||||
try:
|
||||
opportunities = []
|
||||
|
||||
# Evaluate each gap
|
||||
for gap in content_gaps:
|
||||
# Calculate priority score
|
||||
priority_score = self._calculate_priority_score(gap, keyword_analysis)
|
||||
|
||||
if priority_score > 50: # Threshold for good opportunities
|
||||
opportunities.append({
|
||||
'type': gap.get('type'),
|
||||
'issue': gap.get('issue'),
|
||||
'recommendation': gap.get('recommendation'),
|
||||
'priority_score': priority_score
|
||||
})
|
||||
|
||||
# Sort by priority score
|
||||
opportunities.sort(key=lambda x: x['priority_score'], reverse=True)
|
||||
|
||||
return opportunities
|
||||
except Exception as e:
|
||||
st.error(f"Error evaluating opportunities: {str(e)}")
|
||||
return []
|
||||
|
||||
def _calculate_priority_score(self, gap: Dict[str, Any], keyword_analysis: Dict[str, Any]) -> float:
|
||||
"""Calculate priority score for a gap."""
|
||||
try:
|
||||
base_score = 0
|
||||
|
||||
# Base score based on gap type
|
||||
if gap.get('type') == 'content_quality':
|
||||
base_score = 70
|
||||
elif gap.get('type') == 'seo':
|
||||
base_score = 80
|
||||
elif gap.get('type') == 'competitor':
|
||||
base_score = 60
|
||||
elif gap.get('type') == 'keyword':
|
||||
base_score = 50
|
||||
|
||||
# Adjust score based on keyword data
|
||||
if gap.get('type') == 'keyword':
|
||||
keyword = gap.get('issue', '').split(': ')[-1]
|
||||
keyword_data = keyword_analysis.get('trend_analysis', {}).get('trends', {}).get(keyword, {})
|
||||
if keyword_data:
|
||||
base_score += keyword_data.get('volume', 0) * 0.1
|
||||
base_score -= keyword_data.get('difficulty', 0) * 0.2
|
||||
|
||||
return min(100, max(0, base_score))
|
||||
except Exception as e:
|
||||
st.error(f"Error calculating priority score: {str(e)}")
|
||||
return 0
|
||||
|
||||
def _generate_recommendations(self, content_gaps: List[Dict[str, Any]], opportunities: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Generate content recommendations."""
|
||||
try:
|
||||
recommendations = []
|
||||
|
||||
# Generate recommendations for each opportunity
|
||||
for opportunity in opportunities:
|
||||
recommendations.append({
|
||||
'type': opportunity.get('type'),
|
||||
'issue': opportunity.get('issue'),
|
||||
'recommendation': opportunity.get('recommendation'),
|
||||
'priority': opportunity.get('priority_score', 0),
|
||||
'implementation_steps': self._generate_implementation_steps(opportunity)
|
||||
})
|
||||
|
||||
return recommendations
|
||||
except Exception as e:
|
||||
st.error(f"Error generating recommendations: {str(e)}")
|
||||
return []
|
||||
|
||||
def _generate_implementation_steps(self, opportunity: Dict[str, Any]) -> List[str]:
|
||||
"""Generate implementation steps for a recommendation."""
|
||||
try:
|
||||
steps = []
|
||||
|
||||
if opportunity.get('type') == 'content_quality':
|
||||
steps = [
|
||||
'Review current content structure',
|
||||
'Improve readability and formatting',
|
||||
'Enhance content organization',
|
||||
'Update content based on best practices'
|
||||
]
|
||||
elif opportunity.get('type') == 'seo':
|
||||
steps = [
|
||||
'Audit current SEO implementation',
|
||||
'Optimize meta tags and descriptions',
|
||||
'Improve content structure for SEO',
|
||||
'Implement technical SEO improvements'
|
||||
]
|
||||
elif opportunity.get('type') == 'competitor':
|
||||
steps = [
|
||||
'Research competitor content',
|
||||
'Identify unique value proposition',
|
||||
'Create content for missing topics',
|
||||
'Optimize content for target keywords'
|
||||
]
|
||||
elif opportunity.get('type') == 'keyword':
|
||||
steps = [
|
||||
'Research keyword intent',
|
||||
'Create content strategy',
|
||||
'Develop content for target keyword',
|
||||
'Optimize content for search'
|
||||
]
|
||||
|
||||
return steps
|
||||
except Exception as e:
|
||||
st.error(f"Error generating implementation steps: {str(e)}")
|
||||
return []
|
||||
|
||||
def _create_implementation_plan(self, recommendations: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Create implementation plan."""
|
||||
try:
|
||||
plan = {
|
||||
'phases': [],
|
||||
'timeline': {},
|
||||
'resources': {},
|
||||
'success_metrics': {}
|
||||
}
|
||||
|
||||
# Create phases based on recommendation types
|
||||
phases = {
|
||||
'content_quality': 'Content Enhancement',
|
||||
'seo': 'SEO Optimization',
|
||||
'competitor': 'Competitive Content',
|
||||
'keyword': 'Keyword Targeting'
|
||||
}
|
||||
|
||||
# Group recommendations by phase
|
||||
for phase_name in phases.values():
|
||||
phase_recommendations = [
|
||||
rec for rec in recommendations
|
||||
if phases.get(rec.get('type')) == phase_name
|
||||
]
|
||||
|
||||
if phase_recommendations:
|
||||
plan['phases'].append({
|
||||
'name': phase_name,
|
||||
'recommendations': phase_recommendations,
|
||||
'duration': '2-4 weeks',
|
||||
'resources': ['Content team', 'SEO team'],
|
||||
'success_metrics': [
|
||||
'Content quality score',
|
||||
'SEO performance',
|
||||
'User engagement'
|
||||
]
|
||||
})
|
||||
|
||||
return plan
|
||||
except Exception as e:
|
||||
st.error(f"Error creating implementation plan: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _generate_content_topics(self, ai_insights: dict) -> list:
|
||||
"""
|
||||
Generate content topic suggestions.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
list: Content topic suggestions
|
||||
"""
|
||||
# TODO: Implement content topic generation
|
||||
return []
|
||||
|
||||
def _suggest_content_formats(self, ai_insights: dict) -> list:
|
||||
"""
|
||||
Suggest content formats based on analysis.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
list: Content format suggestions
|
||||
"""
|
||||
# TODO: Implement content format suggestions
|
||||
return []
|
||||
|
||||
def _calculate_priority_scores(self, ai_insights: dict) -> dict:
|
||||
"""
|
||||
Calculate priority scores for recommendations.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
dict: Priority scores for each recommendation
|
||||
"""
|
||||
# TODO: Implement priority scoring
|
||||
return {}
|
||||
|
||||
def _create_timeline(self, ai_insights: dict) -> dict:
|
||||
"""
|
||||
Create implementation timeline for recommendations.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
dict: Implementation timeline
|
||||
"""
|
||||
# TODO: Implement timeline creation
|
||||
return {
|
||||
'short_term': [],
|
||||
'medium_term': [],
|
||||
'long_term': []
|
||||
}
|
||||
|
||||
def _generate_specific_suggestions(self, recommendations: dict, analysis_results: dict) -> dict:
|
||||
"""
|
||||
Generate specific content suggestions using existing tools.
|
||||
|
||||
Args:
|
||||
recommendations (dict): General recommendations
|
||||
analysis_results (dict): Analysis results
|
||||
|
||||
Returns:
|
||||
dict: Specific content suggestions
|
||||
"""
|
||||
suggestions = {}
|
||||
|
||||
# Generate titles for suggested topics
|
||||
for topic in recommendations['content_topics']:
|
||||
suggestions[topic] = {
|
||||
'titles': ai_title_generator(topic),
|
||||
'meta_descriptions': metadesc_generator_main(topic),
|
||||
'structured_data': ai_structured_data(topic)
|
||||
}
|
||||
|
||||
return suggestions
|
||||
@@ -1,769 +0,0 @@
|
||||
"""
|
||||
Streamlit UI for Content Gap Analysis workflow.
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
import json
|
||||
from datetime import datetime
|
||||
from .main import ContentGapAnalysis
|
||||
from .keyword_researcher import KeywordResearcher
|
||||
from .competitor_analyzer import CompetitorAnalyzer
|
||||
from .website_analyzer import WebsiteAnalyzer
|
||||
from .recommendation_engine import RecommendationEngine
|
||||
from .utils.ai_processor import AIProcessor
|
||||
from .navigation import show_content_gap_analysis_nav
|
||||
from typing import Dict, Any
|
||||
import logging
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ContentGapAnalysisUI:
|
||||
"""Streamlit UI for Content Gap Analysis workflow."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the UI components."""
|
||||
# Initialize session state for progress tracking
|
||||
if 'current_step' not in st.session_state:
|
||||
st.session_state.current_step = 1
|
||||
if 'analysis_results' not in st.session_state:
|
||||
st.session_state.analysis_results = {}
|
||||
|
||||
# Initialize analysis components
|
||||
self.analyzer = ContentGapAnalysis()
|
||||
self.keyword_researcher = KeywordResearcher()
|
||||
self.competitor_analyzer = CompetitorAnalyzer()
|
||||
self.website_analyzer = WebsiteAnalyzer()
|
||||
self.recommendation_engine = RecommendationEngine()
|
||||
self.ai_processor = AIProcessor()
|
||||
|
||||
def run(self):
|
||||
"""Run the Streamlit interface."""
|
||||
try:
|
||||
# Show navigation
|
||||
nav_option = show_content_gap_analysis_nav()
|
||||
|
||||
# Main content area
|
||||
st.title("Content Gap Analysis")
|
||||
st.markdown("""
|
||||
This tool helps you identify content gaps and opportunities by analyzing your website,
|
||||
competitors, and market trends. Follow the steps below to get started.
|
||||
""")
|
||||
|
||||
# Progress tracking
|
||||
self._show_progress()
|
||||
|
||||
# Main workflow steps
|
||||
if nav_option == "Website Analysis" or st.session_state.current_step == 1:
|
||||
self._website_analysis_step()
|
||||
elif nav_option == "Competitor Analysis" or st.session_state.current_step == 2:
|
||||
self._competitor_analysis_step()
|
||||
elif nav_option == "Keyword Research" or st.session_state.current_step == 3:
|
||||
self._keyword_research_step()
|
||||
elif nav_option == "Recommendations" or st.session_state.current_step == 4:
|
||||
self._recommendations_step()
|
||||
else:
|
||||
self._export_results()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in run method: {str(e)}", exc_info=True)
|
||||
st.error(f"An error occurred: {str(e)}")
|
||||
|
||||
def _show_progress(self):
|
||||
"""Display progress tracking."""
|
||||
steps = [
|
||||
"Website Analysis",
|
||||
"Competitor Analysis",
|
||||
"Keyword Research",
|
||||
"Recommendations",
|
||||
"Export Results"
|
||||
]
|
||||
|
||||
progress = st.session_state.current_step / len(steps)
|
||||
st.progress(progress)
|
||||
|
||||
cols = st.columns(len(steps))
|
||||
for i, col in enumerate(cols):
|
||||
with col:
|
||||
if i + 1 < st.session_state.current_step:
|
||||
st.success(f"✓ {steps[i]}")
|
||||
elif i + 1 == st.session_state.current_step:
|
||||
st.info(f"→ {steps[i]}")
|
||||
else:
|
||||
st.text(f"○ {steps[i]}")
|
||||
|
||||
def _website_analysis_step(self):
|
||||
"""Website analysis step UI."""
|
||||
try:
|
||||
st.header("Step 1: Website Analysis")
|
||||
|
||||
# Display previous results if they exist
|
||||
if 'website' in st.session_state.analysis_results:
|
||||
st.info("Previous analysis results found. You can analyze a new website or proceed to the next step.")
|
||||
self._display_website_analysis(st.session_state.analysis_results['website'])
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
if st.button("Analyze New Website"):
|
||||
st.session_state.analysis_results.pop('website', None)
|
||||
st.rerun()
|
||||
with col2:
|
||||
if st.button("Proceed to Competitor Analysis"):
|
||||
st.session_state.current_step = 2
|
||||
st.rerun()
|
||||
return
|
||||
|
||||
# Create form for new analysis
|
||||
with st.form("website_analysis_form"):
|
||||
website_url = st.text_input("Enter your website URL")
|
||||
industry = st.text_input("Enter your industry/niche")
|
||||
|
||||
submitted = st.form_submit_button("Analyze Website")
|
||||
|
||||
# Handle form submission outside the form
|
||||
if submitted and website_url and industry:
|
||||
# Initialize progress tracking
|
||||
if 'analysis_progress' not in st.session_state:
|
||||
st.session_state.analysis_progress = {
|
||||
'status': 'initializing',
|
||||
'current_step': 'Starting Analysis',
|
||||
'progress': 0,
|
||||
'details': 'Initializing analysis...'
|
||||
}
|
||||
|
||||
# Create progress container
|
||||
progress_container = st.empty()
|
||||
status_container = st.empty()
|
||||
details_container = st.empty()
|
||||
|
||||
# Update progress display
|
||||
def update_progress_display():
|
||||
progress = st.session_state.analysis_progress
|
||||
|
||||
# Update progress bar
|
||||
with progress_container:
|
||||
st.progress(progress['progress'] / 100)
|
||||
|
||||
# Update status
|
||||
with status_container:
|
||||
if progress['status'] == 'error':
|
||||
st.error(f"Error: {progress['current_step']}")
|
||||
elif progress['status'] == 'completed':
|
||||
st.success(f"✓ {progress['current_step']}")
|
||||
else:
|
||||
st.info(f"→ {progress['current_step']}")
|
||||
|
||||
# Update details
|
||||
with details_container:
|
||||
st.write(progress['details'])
|
||||
|
||||
# Initial progress display
|
||||
update_progress_display()
|
||||
|
||||
try:
|
||||
# Get basic analysis
|
||||
results = self.website_analyzer.analyze(website_url)
|
||||
|
||||
# Update progress from analyzer
|
||||
st.session_state.analysis_progress = self.website_analyzer.progress.get_progress()
|
||||
update_progress_display()
|
||||
|
||||
if isinstance(results, dict) and 'error' in results:
|
||||
st.error(f"Error in website analysis: {results['error']}")
|
||||
return
|
||||
|
||||
# Get AI-enhanced analysis
|
||||
st.session_state.analysis_progress.update({
|
||||
'current_step': 'AI Analysis',
|
||||
'progress': 95,
|
||||
'details': 'Performing AI-enhanced analysis...'
|
||||
})
|
||||
update_progress_display()
|
||||
|
||||
ai_analysis = self.ai_processor.analyze_content({
|
||||
'url': website_url,
|
||||
'industry': industry,
|
||||
'content': results
|
||||
})
|
||||
|
||||
# Combine results
|
||||
if isinstance(results, dict):
|
||||
results.update(ai_analysis)
|
||||
else:
|
||||
results = {'error': 'Invalid analysis results format'}
|
||||
|
||||
# Store results in session state
|
||||
st.session_state.analysis_results['website'] = results
|
||||
|
||||
# Update final progress
|
||||
st.session_state.analysis_progress.update({
|
||||
'status': 'completed',
|
||||
'current_step': 'Analysis Complete',
|
||||
'progress': 100,
|
||||
'details': 'Analysis completed successfully!'
|
||||
})
|
||||
update_progress_display()
|
||||
|
||||
# Display results
|
||||
self._display_website_analysis(results)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during website analysis: {str(e)}", exc_info=True)
|
||||
st.session_state.analysis_progress.update({
|
||||
'status': 'error',
|
||||
'current_step': 'Analysis Failed',
|
||||
'details': f"Error during website analysis: {str(e)}"
|
||||
})
|
||||
update_progress_display()
|
||||
st.error(f"Error during website analysis: {str(e)}")
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in website analysis step: {str(e)}", exc_info=True)
|
||||
st.error(f"Error in website analysis: {str(e)}")
|
||||
|
||||
def _display_website_analysis(self, results: Dict[str, Any]):
|
||||
"""Display website analysis results."""
|
||||
try:
|
||||
if not isinstance(results, dict):
|
||||
st.error("Invalid analysis results format")
|
||||
return
|
||||
|
||||
if 'error' in results:
|
||||
st.error(f"Error in analysis: {results['error']}")
|
||||
return
|
||||
|
||||
# Content Metrics
|
||||
st.subheader("Content Metrics")
|
||||
content_metrics = results.get('content_metrics', {})
|
||||
|
||||
if content_metrics:
|
||||
# Basic metrics in columns
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
with col1:
|
||||
st.metric("Word Count", f"{content_metrics.get('word_count', 0):,}")
|
||||
with col2:
|
||||
st.metric("Headings", f"{content_metrics.get('heading_count', 0):,}")
|
||||
with col3:
|
||||
st.metric("Images", f"{content_metrics.get('image_count', 0):,}")
|
||||
with col4:
|
||||
st.metric("Links", f"{content_metrics.get('link_count', 0):,}")
|
||||
|
||||
# Content Structure Visualization
|
||||
st.write("Content Structure")
|
||||
heading_data = {
|
||||
'Type': ['H1', 'H2', 'H3', 'Paragraphs'],
|
||||
'Count': [
|
||||
content_metrics.get('h1_count', 0),
|
||||
content_metrics.get('h2_count', 0),
|
||||
content_metrics.get('h3_count', 0),
|
||||
content_metrics.get('paragraph_count', 0)
|
||||
]
|
||||
}
|
||||
fig = px.bar(
|
||||
heading_data,
|
||||
x='Type',
|
||||
y='Count',
|
||||
title="Content Structure Distribution",
|
||||
color='Type',
|
||||
color_discrete_sequence=px.colors.qualitative.Set3
|
||||
)
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
# Content Features
|
||||
st.write("Content Features")
|
||||
features = {
|
||||
'Feature': ['Meta Description', 'Robots.txt', 'Sitemap'],
|
||||
'Status': [
|
||||
content_metrics.get('has_meta_description', False),
|
||||
content_metrics.get('has_robots_txt', False),
|
||||
content_metrics.get('has_sitemap', False)
|
||||
]
|
||||
}
|
||||
fig = px.bar(
|
||||
features,
|
||||
x='Feature',
|
||||
y='Status',
|
||||
title="Content Features Status",
|
||||
color='Status',
|
||||
color_discrete_sequence=['red', 'green']
|
||||
)
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
# SEO Metrics
|
||||
st.subheader("SEO Metrics")
|
||||
seo_metrics = results.get('seo_metrics', {})
|
||||
|
||||
if seo_metrics:
|
||||
# Basic metrics in columns
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
with col1:
|
||||
st.metric("Overall Score", f"{seo_metrics.get('overall_score', 0):.1f}%")
|
||||
with col2:
|
||||
content_quality = seo_metrics.get('content', {}).get('content_quality_score', 0)
|
||||
st.metric("Content Quality", f"{content_quality:.1f}%")
|
||||
with col3:
|
||||
readability = seo_metrics.get('content', {}).get('readability_score', 0)
|
||||
st.metric("Readability", f"{readability:.1f}%")
|
||||
with col4:
|
||||
keyword_density = seo_metrics.get('content', {}).get('keyword_density', 0)
|
||||
st.metric("Keyword Density", f"{keyword_density:.1f}%")
|
||||
|
||||
# SEO Scores Radar Chart
|
||||
seo_scores = {
|
||||
'Metric': ['Overall', 'Content Quality', 'Readability', 'Keyword Density'],
|
||||
'Score': [
|
||||
seo_metrics.get('overall_score', 0),
|
||||
content_quality,
|
||||
readability,
|
||||
keyword_density
|
||||
]
|
||||
}
|
||||
fig = px.line_polar(
|
||||
seo_scores,
|
||||
r='Score',
|
||||
theta='Metric',
|
||||
line_close=True,
|
||||
title="SEO Performance Overview"
|
||||
)
|
||||
fig.update_traces(fill='toself')
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
# Meta Tags Analysis
|
||||
st.write("Meta Tags Analysis")
|
||||
meta_tags = seo_metrics.get('meta_tags', {})
|
||||
if meta_tags:
|
||||
# Title Analysis
|
||||
title = meta_tags.get('title', {})
|
||||
st.write("Title Tag")
|
||||
st.write(f"Status: {'✅' if title.get('status') == 'good' else '❌'}")
|
||||
st.write(f"Value: {title.get('value', 'N/A')}")
|
||||
st.write(f"Length: {title.get('length', 0)} characters")
|
||||
st.write(f"Score: {title.get('score', 0)}%")
|
||||
if title.get('recommendation'):
|
||||
st.warning(title.get('recommendation'))
|
||||
|
||||
# Description Analysis
|
||||
desc = meta_tags.get('description', {})
|
||||
st.write("Meta Description")
|
||||
st.write(f"Status: {'✅' if desc.get('status') == 'good' else '❌'}")
|
||||
st.write(f"Value: {desc.get('value', 'N/A')}")
|
||||
st.write(f"Length: {desc.get('length', 0)} characters")
|
||||
st.write(f"Score: {desc.get('score', 0)}%")
|
||||
if desc.get('recommendation'):
|
||||
st.warning(desc.get('recommendation'))
|
||||
|
||||
# Keywords Analysis
|
||||
keywords = meta_tags.get('keywords', {})
|
||||
st.write("Meta Keywords")
|
||||
st.write(f"Status: {'✅' if keywords.get('status') == 'good' else '❌'}")
|
||||
st.write(f"Value: {keywords.get('value', 'N/A')}")
|
||||
if keywords.get('recommendation'):
|
||||
st.warning(keywords.get('recommendation'))
|
||||
|
||||
# Technical Metrics
|
||||
st.subheader("Technical Metrics")
|
||||
technical_info = results.get('technical_info', {})
|
||||
|
||||
if technical_info:
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.write("Basic Information")
|
||||
st.metric("Status Code", technical_info.get('status_code', 'N/A'))
|
||||
st.metric("Server", technical_info.get('server_info', {}).get('server', 'N/A'))
|
||||
st.metric("Content Type", technical_info.get('server_info', {}).get('content_type', 'N/A'))
|
||||
with col2:
|
||||
st.write("Security Information")
|
||||
security_info = technical_info.get('security_info', {})
|
||||
security_data = {
|
||||
'Feature': ['SSL', 'HSTS', 'XSS Protection'],
|
||||
'Status': [
|
||||
security_info.get('ssl', False),
|
||||
security_info.get('hsts', False),
|
||||
security_info.get('xss_protection', False)
|
||||
]
|
||||
}
|
||||
fig = px.bar(
|
||||
security_data,
|
||||
x='Feature',
|
||||
y='Status',
|
||||
title="Security Features Status",
|
||||
color='Status',
|
||||
color_discrete_sequence=['red', 'green']
|
||||
)
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
# Performance Metrics
|
||||
st.subheader("Performance Metrics")
|
||||
performance = results.get('performance', {})
|
||||
|
||||
if performance:
|
||||
# Basic metrics in columns
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
with col1:
|
||||
st.metric("Load Time", f"{performance.get('load_time', 0):.2f}s")
|
||||
with col2:
|
||||
st.metric("Page Size", f"{performance.get('page_size', 0):.1f} KB")
|
||||
with col3:
|
||||
st.metric("Status Code", performance.get('status_code', 'N/A'))
|
||||
with col4:
|
||||
st.metric("Response Time", f"{performance.get('response_time', 0):.2f}s")
|
||||
|
||||
# Insights and Recommendations
|
||||
st.subheader("Insights and Recommendations")
|
||||
insights = results.get('insights', [])
|
||||
if insights:
|
||||
for insight in insights:
|
||||
st.info(f"• {insight}")
|
||||
else:
|
||||
st.info("No specific insights available")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error displaying website analysis: {str(e)}", exc_info=True)
|
||||
st.error(f"Error displaying website analysis: {str(e)}")
|
||||
|
||||
def _competitor_analysis_step(self):
|
||||
"""Competitor analysis step UI."""
|
||||
try:
|
||||
st.header("Step 2: Competitor Analysis")
|
||||
|
||||
with st.form("competitor_analysis_form"):
|
||||
competitors = st.text_area(
|
||||
"Enter competitor URLs (one per line)",
|
||||
help="Enter the URLs of your main competitors"
|
||||
)
|
||||
|
||||
submitted = st.form_submit_button("Analyze Competitors")
|
||||
|
||||
if submitted and competitors:
|
||||
with st.spinner("Analyzing competitors..."):
|
||||
competitor_urls = [url.strip() for url in competitors.split('\n') if url.strip()]
|
||||
results = self.competitor_analyzer.analyze(competitor_urls)
|
||||
|
||||
# Get AI-enhanced competitor analysis
|
||||
ai_analysis = self.ai_processor.analyze_competitors({
|
||||
'competitors': competitor_urls,
|
||||
'analysis': results
|
||||
})
|
||||
|
||||
# Combine results
|
||||
results.update(ai_analysis)
|
||||
st.session_state.analysis_results['competitors'] = results
|
||||
|
||||
# Display results
|
||||
self._display_competitor_analysis(results)
|
||||
|
||||
# Move to next step
|
||||
st.session_state.current_step = 3
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in competitor analysis step: {str(e)}", exc_info=True)
|
||||
st.error(f"Error in competitor analysis: {str(e)}")
|
||||
|
||||
def _display_competitor_analysis(self, results: dict):
|
||||
"""Display competitor analysis results."""
|
||||
st.subheader("Competitor Analysis Results")
|
||||
|
||||
# Competitor comparison
|
||||
st.subheader("Competitor Comparison")
|
||||
comp_data = pd.DataFrame(results.get('comparison', []))
|
||||
if not comp_data.empty:
|
||||
fig = px.bar(
|
||||
comp_data,
|
||||
x='competitor',
|
||||
y='score',
|
||||
color='metric',
|
||||
title="Competitor Comparison"
|
||||
)
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# AI-Enhanced Competitor Analysis
|
||||
st.subheader("AI-Enhanced Competitor Analysis")
|
||||
|
||||
# Competitor Trend Analysis
|
||||
trend_data = results.get('competitor_trends', {})
|
||||
if trend_data:
|
||||
fig = go.Figure()
|
||||
for competitor, trends in trend_data.items():
|
||||
fig.add_trace(go.Scatter(
|
||||
x=trends.get('timeline', []),
|
||||
y=trends.get('scores', []),
|
||||
name=competitor,
|
||||
mode='lines+markers'
|
||||
))
|
||||
fig.update_layout(
|
||||
title="Competitor Performance Trends",
|
||||
xaxis_title="Timeline",
|
||||
yaxis_title="Score"
|
||||
)
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# Content gaps
|
||||
st.subheader("Content Gaps")
|
||||
gaps = results.get('content_gaps', [])
|
||||
for gap in gaps:
|
||||
st.info(f"• {gap}")
|
||||
|
||||
# AI-Generated Competitive Insights
|
||||
st.subheader("Competitive Insights")
|
||||
insights = results.get('competitive_insights', {})
|
||||
if insights:
|
||||
for category, points in insights.items():
|
||||
with st.expander(f"{category.title()} Analysis"):
|
||||
for point in points:
|
||||
st.success(f"• {point}")
|
||||
|
||||
def _keyword_research_step(self):
|
||||
"""Keyword research step UI."""
|
||||
try:
|
||||
st.header("Step 3: Keyword Research")
|
||||
|
||||
with st.form("keyword_research_form"):
|
||||
industry = st.text_input(
|
||||
"Enter your industry/niche",
|
||||
value=st.session_state.analysis_results.get('website', {}).get('industry', '')
|
||||
)
|
||||
|
||||
submitted = st.form_submit_button("Research Keywords")
|
||||
|
||||
if submitted and industry:
|
||||
with st.spinner("Researching keywords..."):
|
||||
results = self.keyword_researcher.research(industry)
|
||||
|
||||
# Get AI-enhanced keyword analysis
|
||||
ai_analysis = self.ai_processor.analyze_keywords({
|
||||
'industry': industry,
|
||||
'keywords': results
|
||||
})
|
||||
|
||||
# Combine results
|
||||
results.update(ai_analysis)
|
||||
st.session_state.analysis_results['keywords'] = results
|
||||
|
||||
# Display results
|
||||
self._display_keyword_research(results)
|
||||
|
||||
# Move to next step
|
||||
st.session_state.current_step = 4
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in keyword research step: {str(e)}", exc_info=True)
|
||||
st.error(f"Error in keyword research: {str(e)}")
|
||||
|
||||
def _display_keyword_research(self, results: dict):
|
||||
"""Display keyword research results."""
|
||||
st.subheader("Keyword Research Results")
|
||||
|
||||
# Keyword metrics
|
||||
st.subheader("Keyword Metrics")
|
||||
keyword_data = pd.DataFrame(results.get('keywords', []))
|
||||
if not keyword_data.empty:
|
||||
fig = px.scatter(
|
||||
keyword_data,
|
||||
x='search_volume',
|
||||
y='difficulty',
|
||||
size='relevance_score',
|
||||
hover_data=['keyword'],
|
||||
title="Keyword Opportunities"
|
||||
)
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# AI-Enhanced Keyword Analysis
|
||||
st.subheader("AI-Enhanced Keyword Analysis")
|
||||
|
||||
# Keyword Trend Analysis
|
||||
trend_data = results.get('keyword_trends', {})
|
||||
if trend_data:
|
||||
fig = go.Figure()
|
||||
for keyword, trends in trend_data.items():
|
||||
fig.add_trace(go.Scatter(
|
||||
x=trends.get('timeline', []),
|
||||
y=trends.get('scores', []),
|
||||
name=keyword,
|
||||
mode='lines+markers'
|
||||
))
|
||||
fig.update_layout(
|
||||
title="Keyword Trend Analysis",
|
||||
xaxis_title="Timeline",
|
||||
yaxis_title="Trend Score"
|
||||
)
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# Search intent distribution
|
||||
st.subheader("Search Intent Distribution")
|
||||
intent_data = pd.DataFrame(results.get('search_intent', {}).get('summary', {}))
|
||||
if not intent_data.empty:
|
||||
fig = px.pie(
|
||||
intent_data,
|
||||
values='count',
|
||||
names='intent',
|
||||
title="Search Intent Distribution"
|
||||
)
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# Content format suggestions
|
||||
st.subheader("Content Format Suggestions")
|
||||
formats = results.get('content_formats', [])
|
||||
for format in formats:
|
||||
st.info(f"• {format}")
|
||||
|
||||
# AI-Generated Keyword Insights
|
||||
st.subheader("Keyword Insights")
|
||||
insights = results.get('keyword_insights', {})
|
||||
if insights:
|
||||
for category, points in insights.items():
|
||||
with st.expander(f"{category.title()} Insights"):
|
||||
for point in points:
|
||||
st.success(f"• {point}")
|
||||
|
||||
def _recommendations_step(self):
|
||||
"""Recommendations step UI."""
|
||||
try:
|
||||
st.header("Step 4: Content Recommendations")
|
||||
|
||||
with st.spinner("Generating recommendations..."):
|
||||
results = self.recommendation_engine.generate_recommendations(
|
||||
st.session_state.analysis_results
|
||||
)
|
||||
|
||||
# Get AI-enhanced recommendations
|
||||
ai_recommendations = self.ai_processor.analyze_recommendations({
|
||||
'recommendations': results,
|
||||
'analysis': st.session_state.analysis_results
|
||||
})
|
||||
|
||||
# Combine results
|
||||
results.update(ai_recommendations)
|
||||
st.session_state.analysis_results['recommendations'] = results
|
||||
|
||||
# Display results
|
||||
self._display_recommendations(results)
|
||||
|
||||
# Move to next step
|
||||
st.session_state.current_step = 5
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in recommendations step: {str(e)}", exc_info=True)
|
||||
st.error(f"Error in recommendations: {str(e)}")
|
||||
|
||||
def _display_recommendations(self, results: dict):
|
||||
"""Display content recommendations."""
|
||||
st.subheader("Content Recommendations")
|
||||
|
||||
# Priority recommendations
|
||||
st.subheader("Priority Recommendations")
|
||||
priorities = results.get('priorities', [])
|
||||
for priority in priorities:
|
||||
st.success(f"• {priority}")
|
||||
|
||||
# AI-Enhanced Recommendations
|
||||
st.subheader("AI-Enhanced Recommendations")
|
||||
|
||||
# Recommendation Impact Analysis
|
||||
impact_data = results.get('impact_analysis', {})
|
||||
if impact_data:
|
||||
fig = go.Figure()
|
||||
for metric, values in impact_data.items():
|
||||
fig.add_trace(go.Bar(
|
||||
name=metric,
|
||||
x=values.get('categories', []),
|
||||
y=values.get('scores', [])
|
||||
))
|
||||
fig.update_layout(
|
||||
title="Recommendation Impact Analysis",
|
||||
xaxis_title="Categories",
|
||||
yaxis_title="Impact Score",
|
||||
barmode='group'
|
||||
)
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# Implementation timeline
|
||||
st.subheader("Implementation Timeline")
|
||||
timeline = results.get('timeline', [])
|
||||
for item in timeline:
|
||||
st.info(f"• {item}")
|
||||
|
||||
# Expected impact
|
||||
st.subheader("Expected Impact")
|
||||
impact = results.get('impact', {})
|
||||
for metric, value in impact.items():
|
||||
st.metric(metric, value)
|
||||
|
||||
# AI-Generated Strategic Insights
|
||||
st.subheader("Strategic Insights")
|
||||
insights = results.get('strategic_insights', {})
|
||||
if insights:
|
||||
for category, points in insights.items():
|
||||
with st.expander(f"{category.title()} Strategy"):
|
||||
for point in points:
|
||||
st.success(f"• {point}")
|
||||
|
||||
def _export_results(self):
|
||||
"""Export results step UI."""
|
||||
st.header("Step 5: Export Results")
|
||||
|
||||
# Export options
|
||||
export_format = st.radio(
|
||||
"Choose export format",
|
||||
["JSON", "CSV", "PDF"]
|
||||
)
|
||||
|
||||
if st.button("Export Results"):
|
||||
if export_format == "JSON":
|
||||
self._export_json()
|
||||
elif export_format == "CSV":
|
||||
self._export_csv()
|
||||
else:
|
||||
st.info("PDF export coming soon!")
|
||||
|
||||
def _export_json(self):
|
||||
"""Export results as JSON."""
|
||||
results = st.session_state.analysis_results
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"content_gap_analysis_{timestamp}.json"
|
||||
|
||||
st.download_button(
|
||||
"Download JSON",
|
||||
data=json.dumps(results, indent=2),
|
||||
file_name=filename,
|
||||
mime="application/json"
|
||||
)
|
||||
|
||||
def _export_csv(self):
|
||||
"""Export results as CSV."""
|
||||
results = st.session_state.analysis_results
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# Convert results to CSV format
|
||||
csv_data = []
|
||||
for section, data in results.items():
|
||||
if isinstance(data, list):
|
||||
for item in data:
|
||||
if isinstance(item, dict):
|
||||
item['section'] = section
|
||||
csv_data.append(item)
|
||||
elif isinstance(data, dict):
|
||||
data['section'] = section
|
||||
csv_data.append(data)
|
||||
|
||||
if csv_data:
|
||||
df = pd.DataFrame(csv_data)
|
||||
filename = f"content_gap_analysis_{timestamp}.csv"
|
||||
|
||||
st.download_button(
|
||||
"Download CSV",
|
||||
data=df.to_csv(index=False),
|
||||
file_name=filename,
|
||||
mime="text/csv"
|
||||
)
|
||||
|
||||
def main():
|
||||
"""Main entry point for the Streamlit app."""
|
||||
ui = ContentGapAnalysisUI()
|
||||
ui.run()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,249 +0,0 @@
|
||||
# Content Gap Analysis Utils
|
||||
|
||||
This directory contains utility modules that power the Content Gap Analysis tool. These modules provide core functionality for data collection, processing, analysis, and storage.
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
utils/
|
||||
├── README.md
|
||||
├── ai_processor.py # AI-powered content analysis and processing
|
||||
├── content_parser.py # Content structure parsing and analysis
|
||||
├── data_collector.py # Website data collection and processing
|
||||
└── storage.py # Analysis results storage and retrieval
|
||||
```
|
||||
|
||||
## Module Descriptions
|
||||
|
||||
### 1. AI Processor (`ai_processor.py`)
|
||||
|
||||
The AI Processor module enhances content analysis using AI techniques. It provides intelligent analysis of website content, competitor data, and keyword research.
|
||||
|
||||
#### Key Features:
|
||||
- Content quality assessment
|
||||
- Topic analysis and clustering
|
||||
- Performance metrics analysis
|
||||
- Strategic recommendations generation
|
||||
- Progress tracking for analysis tasks
|
||||
|
||||
#### Main Components:
|
||||
- `AIProcessor`: Main class for AI-powered analysis
|
||||
- `ProgressTracker`: Tracks analysis progress and status
|
||||
|
||||
#### Usage Example:
|
||||
```python
|
||||
from utils.ai_processor import AIProcessor
|
||||
|
||||
processor = AIProcessor()
|
||||
analysis = processor.analyze_content({
|
||||
'url': 'https://example.com',
|
||||
'industry': 'technology',
|
||||
'content': content_data
|
||||
})
|
||||
```
|
||||
|
||||
### 2. Content Parser (`content_parser.py`)
|
||||
|
||||
The Content Parser module handles the parsing and analysis of website content structure. It provides detailed insights into content organization and quality.
|
||||
|
||||
#### Key Features:
|
||||
- Content structure analysis
|
||||
- Text statistics calculation
|
||||
- Topic extraction
|
||||
- Readability analysis
|
||||
- Content hierarchy analysis
|
||||
|
||||
#### Main Components:
|
||||
- `ContentParser`: Main class for content parsing and analysis
|
||||
|
||||
#### Usage Example:
|
||||
```python
|
||||
from utils.content_parser import ContentParser
|
||||
|
||||
parser = ContentParser()
|
||||
structure = parser.parse_structure({
|
||||
'main_content': content,
|
||||
'html': html_content,
|
||||
'headings': headings_data
|
||||
})
|
||||
```
|
||||
|
||||
### 3. Data Collector (`data_collector.py`)
|
||||
|
||||
The Data Collector module is responsible for gathering website data for analysis. It handles web scraping and data extraction.
|
||||
|
||||
#### Key Features:
|
||||
- Website content collection
|
||||
- Meta data extraction
|
||||
- Heading structure analysis
|
||||
- Link and image extraction
|
||||
- Error handling and retry logic
|
||||
|
||||
#### Main Components:
|
||||
- `DataCollector`: Main class for data collection
|
||||
|
||||
#### Usage Example:
|
||||
```python
|
||||
from utils.data_collector import DataCollector
|
||||
|
||||
collector = DataCollector()
|
||||
data = collector.collect('https://example.com')
|
||||
```
|
||||
|
||||
### 4. Storage (`storage.py`)
|
||||
|
||||
The Storage module manages the persistence and retrieval of analysis results. It provides a robust database interface for storing and accessing analysis data.
|
||||
|
||||
#### Key Features:
|
||||
- Analysis results storage
|
||||
- Historical data management
|
||||
- Recommendation tracking
|
||||
- User-specific analysis storage
|
||||
- Error handling and rollback support
|
||||
|
||||
#### Main Components:
|
||||
- `ContentGapAnalysisStorage`: Main class for storage operations
|
||||
|
||||
#### Usage Example:
|
||||
```python
|
||||
from utils.storage import ContentGapAnalysisStorage
|
||||
|
||||
storage = ContentGapAnalysisStorage(db_session)
|
||||
analysis_id = storage.save_analysis(
|
||||
user_id=1,
|
||||
website_url='https://example.com',
|
||||
industry='technology',
|
||||
results=analysis_results
|
||||
)
|
||||
```
|
||||
|
||||
## Integration Points
|
||||
|
||||
### 1. Website Analysis Integration
|
||||
```python
|
||||
from utils.data_collector import DataCollector
|
||||
from utils.content_parser import ContentParser
|
||||
from utils.ai_processor import AIProcessor
|
||||
|
||||
# Collect data
|
||||
collector = DataCollector()
|
||||
data = collector.collect(url)
|
||||
|
||||
# Parse content
|
||||
parser = ContentParser()
|
||||
structure = parser.parse_structure(data)
|
||||
|
||||
# Process with AI
|
||||
processor = AIProcessor()
|
||||
analysis = processor.analyze_content({
|
||||
'url': url,
|
||||
'content': structure
|
||||
})
|
||||
```
|
||||
|
||||
### 2. Storage Integration
|
||||
```python
|
||||
from utils.storage import ContentGapAnalysisStorage
|
||||
|
||||
# Store analysis results
|
||||
storage = ContentGapAnalysisStorage(db_session)
|
||||
analysis_id = storage.save_analysis(
|
||||
user_id=user_id,
|
||||
website_url=url,
|
||||
industry=industry,
|
||||
results=analysis_results
|
||||
)
|
||||
|
||||
# Retrieve analysis
|
||||
results = storage.get_analysis(analysis_id)
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
All modules implement comprehensive error handling:
|
||||
|
||||
1. **Data Collection Errors**
|
||||
- Network timeouts
|
||||
- Invalid URLs
|
||||
- Access restrictions
|
||||
- Parsing errors
|
||||
|
||||
2. **Processing Errors**
|
||||
- Invalid data formats
|
||||
- AI processing failures
|
||||
- Resource limitations
|
||||
- Analysis timeouts
|
||||
|
||||
3. **Storage Errors**
|
||||
- Database connection issues
|
||||
- Transaction failures
|
||||
- Data validation errors
|
||||
- Concurrent access conflicts
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Data Collection**
|
||||
- Implement rate limiting
|
||||
- Use proper user agents
|
||||
- Handle redirects
|
||||
- Validate input data
|
||||
|
||||
2. **Content Processing**
|
||||
- Clean and normalize data
|
||||
- Handle encoding issues
|
||||
- Implement fallback strategies
|
||||
- Cache processed results
|
||||
|
||||
3. **Storage Management**
|
||||
- Use transactions
|
||||
- Implement data validation
|
||||
- Handle concurrent access
|
||||
- Maintain data integrity
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
1. **Performance Optimizations**
|
||||
- Implement parallel processing
|
||||
- Add caching layer
|
||||
- Optimize database queries
|
||||
- Enhance error recovery
|
||||
|
||||
2. **Feature Additions**
|
||||
- Content performance tracking
|
||||
- Automated content planning
|
||||
- Enhanced competitive intelligence
|
||||
- Advanced topic clustering
|
||||
|
||||
3. **Integration Improvements**
|
||||
- API endpoints
|
||||
- Export capabilities
|
||||
- Data visualization
|
||||
- Progress tracking
|
||||
|
||||
4. **UI/UX Enhancements**
|
||||
- Interactive visualizations
|
||||
- Real-time progress updates
|
||||
- Export interfaces
|
||||
- Customization options
|
||||
|
||||
## Contributing
|
||||
|
||||
When contributing to these utility modules:
|
||||
|
||||
1. Follow the existing code structure
|
||||
2. Add comprehensive error handling
|
||||
3. Include unit tests
|
||||
4. Update documentation
|
||||
5. Follow PEP 8 style guide
|
||||
|
||||
## Dependencies
|
||||
|
||||
- BeautifulSoup4: HTML parsing
|
||||
- NLTK: Natural language processing
|
||||
- SQLAlchemy: Database operations
|
||||
- Streamlit: UI components
|
||||
- Requests: HTTP requests
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License - see the LICENSE file for details.
|
||||
@@ -1,13 +0,0 @@
|
||||
"""
|
||||
Utility modules for content gap analysis.
|
||||
"""
|
||||
|
||||
from .data_collector import DataCollector
|
||||
from .content_parser import ContentParser
|
||||
from .ai_processor import AIProcessor
|
||||
|
||||
__all__ = [
|
||||
'DataCollector',
|
||||
'ContentParser',
|
||||
'AIProcessor'
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,236 +0,0 @@
|
||||
"""
|
||||
Content parser utility for analyzing website content structure.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
import nltk
|
||||
from nltk.tokenize import sent_tokenize, word_tokenize
|
||||
from nltk.corpus import stopwords
|
||||
from collections import Counter
|
||||
|
||||
class ContentParser:
|
||||
"""Parser for analyzing website content structure."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the content parser."""
|
||||
try:
|
||||
nltk.data.find('tokenizers/punkt')
|
||||
except LookupError:
|
||||
nltk.download('punkt')
|
||||
try:
|
||||
nltk.data.find('corpora/stopwords')
|
||||
except LookupError:
|
||||
nltk.download('stopwords')
|
||||
|
||||
self.stop_words = set(stopwords.words('english'))
|
||||
|
||||
def parse_structure(self, content: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse and analyze the structure of website content.
|
||||
|
||||
Args:
|
||||
content: Dictionary containing website content
|
||||
|
||||
Returns:
|
||||
Dictionary containing parsed content structure
|
||||
"""
|
||||
try:
|
||||
# Parse main content
|
||||
main_content = content.get('main_content', '')
|
||||
soup = BeautifulSoup(content.get('html', ''), 'html.parser')
|
||||
|
||||
# Extract text statistics
|
||||
text_stats = self._analyze_text(main_content)
|
||||
|
||||
# Extract content sections
|
||||
sections = self._extract_sections(soup)
|
||||
|
||||
# Extract topics
|
||||
topics = self._extract_topics(main_content)
|
||||
|
||||
# Analyze readability
|
||||
readability = self._analyze_readability(main_content)
|
||||
|
||||
# Analyze content hierarchy
|
||||
hierarchy = self._analyze_hierarchy(content.get('headings', []))
|
||||
|
||||
return {
|
||||
'text_statistics': text_stats,
|
||||
'sections': sections,
|
||||
'topics': topics,
|
||||
'readability': readability,
|
||||
'hierarchy': hierarchy,
|
||||
'metadata': content.get('metadata', {})
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'error': str(e),
|
||||
'text_statistics': {},
|
||||
'sections': [],
|
||||
'topics': [],
|
||||
'readability': {},
|
||||
'hierarchy': {},
|
||||
'metadata': {}
|
||||
}
|
||||
|
||||
def _analyze_text(self, text: str) -> Dict[str, Any]:
|
||||
"""Analyze text statistics."""
|
||||
sentences = sent_tokenize(text)
|
||||
words = word_tokenize(text.lower())
|
||||
words = [w for w in words if w.isalnum() and w not in self.stop_words]
|
||||
|
||||
return {
|
||||
'word_count': len(words),
|
||||
'sentence_count': len(sentences),
|
||||
'average_sentence_length': len(words) / max(len(sentences), 1),
|
||||
'unique_words': len(set(words)),
|
||||
'stop_words': len([w for w in word_tokenize(text.lower()) if w in self.stop_words]),
|
||||
'characters': len(text),
|
||||
'paragraphs': len(text.split('\n\n')),
|
||||
'sentences': sentences
|
||||
}
|
||||
|
||||
def _extract_sections(self, soup: BeautifulSoup) -> List[Dict[str, Any]]:
|
||||
"""Extract content sections."""
|
||||
sections = []
|
||||
|
||||
# Find main content containers
|
||||
containers = soup.find_all(['article', 'section', 'div'], class_=re.compile(r'content|main|article|section'))
|
||||
|
||||
for container in containers:
|
||||
# Get section heading
|
||||
heading = container.find(['h1', 'h2', 'h3'])
|
||||
heading_text = heading.get_text().strip() if heading else 'Untitled Section'
|
||||
|
||||
# Get section content
|
||||
content = container.get_text().strip()
|
||||
|
||||
# Get section type
|
||||
section_type = container.name
|
||||
if container.get('class'):
|
||||
section_type = ' '.join(container.get('class'))
|
||||
|
||||
sections.append({
|
||||
'heading': heading_text,
|
||||
'content': content,
|
||||
'type': section_type,
|
||||
'word_count': len(word_tokenize(content)),
|
||||
'position': self._get_element_position(container)
|
||||
})
|
||||
|
||||
return sections
|
||||
|
||||
def _extract_topics(self, text: str) -> List[Dict[str, Any]]:
|
||||
"""Extract main topics from content."""
|
||||
# Tokenize and clean text
|
||||
words = word_tokenize(text.lower())
|
||||
words = [w for w in words if w.isalnum() and w not in self.stop_words]
|
||||
|
||||
# Get word frequencies
|
||||
word_freq = Counter(words)
|
||||
|
||||
# Get top topics
|
||||
topics = []
|
||||
for word, freq in word_freq.most_common(10):
|
||||
topics.append({
|
||||
'topic': word,
|
||||
'frequency': freq,
|
||||
'percentage': freq / len(words) * 100
|
||||
})
|
||||
|
||||
return topics
|
||||
|
||||
def _analyze_readability(self, text: str) -> Dict[str, float]:
|
||||
"""Analyze text readability."""
|
||||
sentences = sent_tokenize(text)
|
||||
words = word_tokenize(text.lower())
|
||||
words = [w for w in words if w.isalnum()]
|
||||
|
||||
# Calculate average sentence length
|
||||
avg_sentence_length = len(words) / max(len(sentences), 1)
|
||||
|
||||
# Calculate average word length
|
||||
avg_word_length = sum(len(w) for w in words) / max(len(words), 1)
|
||||
|
||||
# Calculate Flesch Reading Ease score
|
||||
# Formula: 206.835 - 1.015(total words/total sentences) - 84.6(total syllables/total words)
|
||||
syllables = sum(self._count_syllables(w) for w in words)
|
||||
flesch_score = 206.835 - 1.015 * avg_sentence_length - 84.6 * (syllables / max(len(words), 1))
|
||||
|
||||
return {
|
||||
'flesch_score': max(0, min(100, flesch_score)),
|
||||
'avg_sentence_length': avg_sentence_length,
|
||||
'avg_word_length': avg_word_length,
|
||||
'syllables_per_word': syllables / max(len(words), 1)
|
||||
}
|
||||
|
||||
def _analyze_hierarchy(self, headings: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Analyze content hierarchy."""
|
||||
# Group headings by level
|
||||
heading_levels = {}
|
||||
for heading in headings:
|
||||
level = heading['level']
|
||||
if level not in heading_levels:
|
||||
heading_levels[level] = []
|
||||
heading_levels[level].append(heading)
|
||||
|
||||
# Calculate hierarchy metrics
|
||||
total_headings = len(headings)
|
||||
max_depth = max(int(level[1]) for level in heading_levels.keys()) if heading_levels else 0
|
||||
|
||||
return {
|
||||
'total_headings': total_headings,
|
||||
'max_depth': max_depth,
|
||||
'heading_distribution': {level: len(headings) for level, headings in heading_levels.items()},
|
||||
'has_proper_hierarchy': self._check_proper_hierarchy(heading_levels)
|
||||
}
|
||||
|
||||
def _check_proper_hierarchy(self, heading_levels: Dict[str, List[Dict[str, Any]]]) -> bool:
|
||||
"""Check if headings follow proper hierarchy."""
|
||||
if not heading_levels:
|
||||
return False
|
||||
|
||||
# Check if h1 exists
|
||||
if 'h1' not in heading_levels:
|
||||
return False
|
||||
|
||||
# Check if h1 is unique
|
||||
if len(heading_levels['h1']) > 1:
|
||||
return False
|
||||
|
||||
# Check if levels are sequential
|
||||
levels = sorted(int(level[1]) for level in heading_levels.keys())
|
||||
return all(levels[i] - levels[i-1] <= 1 for i in range(1, len(levels)))
|
||||
|
||||
def _count_syllables(self, word: str) -> int:
|
||||
"""Count syllables in a word."""
|
||||
word = word.lower()
|
||||
count = 0
|
||||
vowels = 'aeiouy'
|
||||
word = word.lower()
|
||||
if word[0] in vowels:
|
||||
count += 1
|
||||
for index in range(1, len(word)):
|
||||
if word[index] in vowels and word[index - 1] not in vowels:
|
||||
count += 1
|
||||
if word.endswith('e'):
|
||||
count -= 1
|
||||
if count == 0:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
def _get_element_position(self, element) -> Dict[str, int]:
|
||||
"""Get element position in the document."""
|
||||
try:
|
||||
return {
|
||||
'top': element.sourceline,
|
||||
'left': element.sourcepos
|
||||
}
|
||||
except:
|
||||
return {
|
||||
'top': 0,
|
||||
'left': 0
|
||||
}
|
||||
@@ -1,112 +0,0 @@
|
||||
"""
|
||||
Data collector utility for content gap analysis.
|
||||
"""
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Dict, Any
|
||||
|
||||
class DataCollector:
|
||||
"""
|
||||
Collects and processes website data for analysis.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the data collector."""
|
||||
self.headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
|
||||
def collect(self, url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Collect website data for analysis.
|
||||
|
||||
Args:
|
||||
url (str): The URL to collect data from
|
||||
|
||||
Returns:
|
||||
dict: Collected website data
|
||||
"""
|
||||
try:
|
||||
# Fetch webpage content
|
||||
response = requests.get(url, headers=self.headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse HTML content
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Extract relevant data
|
||||
data = {
|
||||
'url': url,
|
||||
'title': self._extract_title(soup),
|
||||
'meta_description': self._extract_meta_description(soup),
|
||||
'headings': self._extract_headings(soup),
|
||||
'content': self._extract_content(soup),
|
||||
'links': self._extract_links(soup),
|
||||
'images': self._extract_images(soup)
|
||||
}
|
||||
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'error': str(e),
|
||||
'url': url
|
||||
}
|
||||
|
||||
def _extract_title(self, soup: BeautifulSoup) -> str:
|
||||
"""Extract page title."""
|
||||
title = soup.find('title')
|
||||
return title.text if title else ''
|
||||
|
||||
def _extract_meta_description(self, soup: BeautifulSoup) -> str:
|
||||
"""Extract meta description."""
|
||||
meta = soup.find('meta', attrs={'name': 'description'})
|
||||
return meta.get('content', '') if meta else ''
|
||||
|
||||
def _extract_headings(self, soup: BeautifulSoup) -> Dict[str, list]:
|
||||
"""Extract all headings."""
|
||||
headings = {}
|
||||
for i in range(1, 7):
|
||||
tags = soup.find_all(f'h{i}')
|
||||
headings[f'h{i}'] = [tag.text.strip() for tag in tags]
|
||||
return headings
|
||||
|
||||
def _extract_content(self, soup: BeautifulSoup) -> str:
|
||||
"""Extract main content."""
|
||||
# Remove script and style elements
|
||||
for script in soup(['script', 'style']):
|
||||
script.decompose()
|
||||
|
||||
# Get text content
|
||||
text = soup.get_text()
|
||||
|
||||
# Clean up text
|
||||
lines = (line.strip() for line in text.splitlines())
|
||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||
text = ' '.join(chunk for chunk in chunks if chunk)
|
||||
|
||||
return text
|
||||
|
||||
def _extract_links(self, soup: BeautifulSoup) -> list:
|
||||
"""Extract all links."""
|
||||
links = []
|
||||
for link in soup.find_all('a'):
|
||||
href = link.get('href')
|
||||
if href:
|
||||
links.append({
|
||||
'url': href,
|
||||
'text': link.text.strip()
|
||||
})
|
||||
return links
|
||||
|
||||
def _extract_images(self, soup: BeautifulSoup) -> list:
|
||||
"""Extract all images."""
|
||||
images = []
|
||||
for img in soup.find_all('img'):
|
||||
images.append({
|
||||
'src': img.get('src', ''),
|
||||
'alt': img.get('alt', ''),
|
||||
'title': img.get('title', '')
|
||||
})
|
||||
return images
|
||||
@@ -1,237 +0,0 @@
|
||||
"""
|
||||
SEO analyzer utility for content gap analysis.
|
||||
"""
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse, urljoin
|
||||
import re
|
||||
from typing import Dict, Any, List, Optional
|
||||
from ....utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
|
||||
def analyze_onpage_seo(url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze on-page SEO elements of a website.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
Dictionary containing SEO analysis results
|
||||
"""
|
||||
try:
|
||||
# Use the combined website analyzer
|
||||
analyzer = WebsiteAnalyzer()
|
||||
analysis = analyzer.analyze_website(url)
|
||||
|
||||
if not analysis.get('success', False):
|
||||
return {
|
||||
'error': analysis.get('error', 'Unknown error in SEO analysis'),
|
||||
'meta_title': '',
|
||||
'meta_description': '',
|
||||
'has_robots_txt': False,
|
||||
'has_sitemap': False,
|
||||
'mobile_friendly': False,
|
||||
'load_time': 0
|
||||
}
|
||||
|
||||
# Extract relevant information from the analysis
|
||||
seo_info = analysis['data']['analysis']['seo_info']
|
||||
basic_info = analysis['data']['analysis']['basic_info']
|
||||
performance = analysis['data']['analysis']['performance']
|
||||
|
||||
return {
|
||||
'meta_tags': seo_info.get('meta_tags', {}),
|
||||
'content': seo_info.get('content', {}),
|
||||
'meta_title': basic_info.get('title', ''),
|
||||
'meta_description': basic_info.get('meta_description', ''),
|
||||
'has_robots_txt': bool(basic_info.get('robots_txt')),
|
||||
'has_sitemap': bool(basic_info.get('sitemap')),
|
||||
'mobile_friendly': True, # This would need to be implemented separately
|
||||
'load_time': performance.get('load_time', 0)
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'error': str(e),
|
||||
'meta_title': '',
|
||||
'meta_description': '',
|
||||
'has_robots_txt': False,
|
||||
'has_sitemap': False,
|
||||
'mobile_friendly': False,
|
||||
'load_time': 0
|
||||
}
|
||||
|
||||
def _analyze_meta_tags(soup: BeautifulSoup) -> Dict[str, Any]:
|
||||
"""Analyze meta tags of the webpage."""
|
||||
meta_tags = {}
|
||||
|
||||
# Title tag
|
||||
title_tag = soup.find('title')
|
||||
if title_tag:
|
||||
meta_tags['title'] = title_tag.string.strip()
|
||||
|
||||
# Meta description
|
||||
meta_desc = soup.find('meta', {'name': 'description'})
|
||||
if meta_desc:
|
||||
meta_tags['description'] = meta_desc.get('content', '').strip()
|
||||
|
||||
# Meta keywords
|
||||
meta_keywords = soup.find('meta', {'name': 'keywords'})
|
||||
if meta_keywords:
|
||||
meta_tags['keywords'] = meta_keywords.get('content', '').strip()
|
||||
|
||||
# Open Graph tags
|
||||
og_tags = {}
|
||||
for tag in soup.find_all('meta', property=re.compile(r'^og:')):
|
||||
og_tags[tag['property']] = tag.get('content', '')
|
||||
meta_tags['og_tags'] = og_tags
|
||||
|
||||
# Twitter Card tags
|
||||
twitter_tags = {}
|
||||
for tag in soup.find_all('meta', name=re.compile(r'^twitter:')):
|
||||
twitter_tags[tag['name']] = tag.get('content', '')
|
||||
meta_tags['twitter_tags'] = twitter_tags
|
||||
|
||||
return meta_tags
|
||||
|
||||
def _analyze_headings(soup: BeautifulSoup) -> Dict[str, Any]:
|
||||
"""Analyze heading structure of the webpage."""
|
||||
headings = {
|
||||
'h1': [],
|
||||
'h2': [],
|
||||
'h3': [],
|
||||
'h4': [],
|
||||
'h5': [],
|
||||
'h6': []
|
||||
}
|
||||
|
||||
for tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
||||
for heading in soup.find_all(tag):
|
||||
headings[tag].append(heading.get_text().strip())
|
||||
|
||||
return headings
|
||||
|
||||
def _analyze_content(soup: BeautifulSoup) -> Dict[str, Any]:
|
||||
"""Analyze main content of the webpage."""
|
||||
# Find main content
|
||||
main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile(r'content|main|article'))
|
||||
|
||||
if not main_content:
|
||||
return {
|
||||
'word_count': 0,
|
||||
'paragraph_count': 0,
|
||||
'content': ''
|
||||
}
|
||||
|
||||
# Get text content
|
||||
content = main_content.get_text()
|
||||
|
||||
# Count words and paragraphs
|
||||
words = content.split()
|
||||
paragraphs = main_content.find_all('p')
|
||||
|
||||
return {
|
||||
'word_count': len(words),
|
||||
'paragraph_count': len(paragraphs),
|
||||
'content': content
|
||||
}
|
||||
|
||||
def _analyze_links(soup: BeautifulSoup, base_url: str) -> Dict[str, Any]:
|
||||
"""Analyze links on the webpage."""
|
||||
links = {
|
||||
'internal': [],
|
||||
'external': [],
|
||||
'broken': []
|
||||
}
|
||||
|
||||
base_domain = urlparse(base_url).netloc
|
||||
|
||||
for link in soup.find_all('a', href=True):
|
||||
href = link['href']
|
||||
|
||||
# Handle relative URLs
|
||||
if not href.startswith(('http://', 'https://')):
|
||||
href = urljoin(base_url, href)
|
||||
|
||||
# Categorize link
|
||||
if urlparse(href).netloc == base_domain:
|
||||
links['internal'].append({
|
||||
'url': href,
|
||||
'text': link.get_text().strip(),
|
||||
'title': link.get('title', '')
|
||||
})
|
||||
else:
|
||||
links['external'].append({
|
||||
'url': href,
|
||||
'text': link.get_text().strip(),
|
||||
'title': link.get('title', '')
|
||||
})
|
||||
|
||||
return links
|
||||
|
||||
def _analyze_images(soup: BeautifulSoup) -> Dict[str, Any]:
|
||||
"""Analyze images on the webpage."""
|
||||
images = []
|
||||
|
||||
for img in soup.find_all('img'):
|
||||
image_data = {
|
||||
'src': img.get('src', ''),
|
||||
'alt': img.get('alt', ''),
|
||||
'title': img.get('title', ''),
|
||||
'width': img.get('width', ''),
|
||||
'height': img.get('height', ''),
|
||||
'has_alt': bool(img.get('alt')),
|
||||
'has_title': bool(img.get('title')),
|
||||
'has_dimensions': bool(img.get('width') and img.get('height'))
|
||||
}
|
||||
images.append(image_data)
|
||||
|
||||
return {
|
||||
'total': len(images),
|
||||
'with_alt': sum(1 for img in images if img['has_alt']),
|
||||
'with_title': sum(1 for img in images if img['has_title']),
|
||||
'with_dimensions': sum(1 for img in images if img['has_dimensions']),
|
||||
'images': images
|
||||
}
|
||||
|
||||
def _check_technical_elements(soup: BeautifulSoup, url: str) -> Dict[str, Any]:
|
||||
"""Check technical SEO elements."""
|
||||
base_url = urlparse(url)
|
||||
domain = base_url.netloc
|
||||
|
||||
# Check robots.txt
|
||||
robots_url = f"{base_url.scheme}://{domain}/robots.txt"
|
||||
try:
|
||||
robots_response = requests.get(robots_url, timeout=5)
|
||||
has_robots_txt = robots_response.status_code == 200
|
||||
except:
|
||||
has_robots_txt = False
|
||||
|
||||
# Check sitemap
|
||||
sitemap_url = f"{base_url.scheme}://{domain}/sitemap.xml"
|
||||
try:
|
||||
sitemap_response = requests.get(sitemap_url, timeout=5)
|
||||
has_sitemap = sitemap_response.status_code == 200
|
||||
except:
|
||||
has_sitemap = False
|
||||
|
||||
# Check mobile friendliness
|
||||
viewport = soup.find('meta', {'name': 'viewport'})
|
||||
has_viewport = bool(viewport)
|
||||
|
||||
# Check canonical URL
|
||||
canonical = soup.find('link', {'rel': 'canonical'})
|
||||
has_canonical = bool(canonical)
|
||||
|
||||
# Check language
|
||||
html_lang = soup.find('html').get('lang', '')
|
||||
has_language = bool(html_lang)
|
||||
|
||||
return {
|
||||
'has_robots_txt': has_robots_txt,
|
||||
'has_sitemap': has_sitemap,
|
||||
'mobile_friendly': has_viewport,
|
||||
'has_canonical': has_canonical,
|
||||
'has_language': has_language,
|
||||
'language': html_lang
|
||||
}
|
||||
@@ -1,270 +0,0 @@
|
||||
"""
|
||||
Storage module for content gap analysis results.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
import streamlit as st
|
||||
|
||||
class ContentGapAnalysisStorage:
|
||||
"""Handles storage and retrieval of content gap analysis results."""
|
||||
|
||||
def __init__(self, db_session: Session):
|
||||
"""Initialize the storage handler."""
|
||||
self.db = db_session
|
||||
|
||||
def save_analysis(self, user_id: int, website_url: str, industry: str, results: Dict[str, Any]) -> Optional[int]:
|
||||
"""
|
||||
Save content gap analysis results.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
website_url: Target website URL
|
||||
industry: Industry category
|
||||
results: Analysis results dictionary
|
||||
|
||||
Returns:
|
||||
Analysis ID if successful, None otherwise
|
||||
"""
|
||||
try:
|
||||
# Create main analysis record
|
||||
analysis = ContentGapAnalysis(
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
industry=industry,
|
||||
status='completed',
|
||||
metadata={'version': '1.0'}
|
||||
)
|
||||
self.db.add(analysis)
|
||||
self.db.flush() # Get the ID without committing
|
||||
|
||||
# Save website analysis
|
||||
website_analysis = WebsiteAnalysis(
|
||||
content_gap_analysis_id=analysis.id,
|
||||
content_score=results.get('website', {}).get('content_score', 0),
|
||||
seo_score=results.get('website', {}).get('seo_score', 0),
|
||||
structure_score=results.get('website', {}).get('structure_score', 0),
|
||||
content_metrics=results.get('website', {}).get('content_metrics', {}),
|
||||
seo_metrics=results.get('website', {}).get('seo_metrics', {}),
|
||||
technical_metrics=results.get('website', {}).get('technical_metrics', {}),
|
||||
ai_insights=results.get('website', {}).get('ai_insights', {})
|
||||
)
|
||||
self.db.add(website_analysis)
|
||||
|
||||
# Save competitor analysis if available
|
||||
if 'competitors' in results:
|
||||
for competitor in results['competitors']:
|
||||
competitor_analysis = CompetitorAnalysis(
|
||||
content_gap_analysis_id=analysis.id,
|
||||
competitor_url=competitor.get('url'),
|
||||
market_position=competitor.get('market_position', {}),
|
||||
content_gaps=competitor.get('content_gaps', []),
|
||||
competitive_advantages=competitor.get('competitive_advantages', []),
|
||||
trend_analysis=competitor.get('trend_analysis', {})
|
||||
)
|
||||
self.db.add(competitor_analysis)
|
||||
|
||||
# Save keyword analysis
|
||||
keyword_analysis = KeywordAnalysis(
|
||||
content_gap_analysis_id=analysis.id,
|
||||
top_keywords=results.get('keywords', {}).get('top_keywords', []),
|
||||
search_intent=results.get('keywords', {}).get('search_intent', {}),
|
||||
opportunities=results.get('keywords', {}).get('opportunities', []),
|
||||
trend_analysis=results.get('keywords', {}).get('trend_analysis', {})
|
||||
)
|
||||
self.db.add(keyword_analysis)
|
||||
|
||||
# Save recommendations
|
||||
for recommendation in results.get('recommendations', []):
|
||||
content_recommendation = ContentRecommendation(
|
||||
content_gap_analysis_id=analysis.id,
|
||||
recommendation_type=recommendation.get('type'),
|
||||
priority_score=recommendation.get('priority_score', 0),
|
||||
recommendation=recommendation.get('recommendation', ''),
|
||||
implementation_steps=recommendation.get('implementation_steps', []),
|
||||
expected_impact=recommendation.get('expected_impact', {}),
|
||||
status='pending'
|
||||
)
|
||||
self.db.add(content_recommendation)
|
||||
|
||||
# Save analysis history
|
||||
history = AnalysisHistory(
|
||||
content_gap_analysis_id=analysis.id,
|
||||
status='completed',
|
||||
metrics={'duration': results.get('duration', 0)}
|
||||
)
|
||||
self.db.add(history)
|
||||
|
||||
# Commit all changes
|
||||
self.db.commit()
|
||||
return analysis.id
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
self.db.rollback()
|
||||
st.error(f"Error saving analysis results: {str(e)}")
|
||||
return None
|
||||
|
||||
def get_analysis(self, analysis_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve content gap analysis results.
|
||||
|
||||
Args:
|
||||
analysis_id: Analysis ID
|
||||
|
||||
Returns:
|
||||
Dictionary containing analysis results if found, None otherwise
|
||||
"""
|
||||
try:
|
||||
analysis = self.db.query(ContentGapAnalysis).get(analysis_id)
|
||||
if not analysis:
|
||||
return None
|
||||
|
||||
# Get website analysis
|
||||
website_analysis = self.db.query(WebsiteAnalysis).filter_by(
|
||||
content_gap_analysis_id=analysis_id
|
||||
).first()
|
||||
|
||||
# Get competitor analysis
|
||||
competitor_analyses = self.db.query(CompetitorAnalysis).filter_by(
|
||||
content_gap_analysis_id=analysis_id
|
||||
).all()
|
||||
|
||||
# Get keyword analysis
|
||||
keyword_analysis = self.db.query(KeywordAnalysis).filter_by(
|
||||
content_gap_analysis_id=analysis_id
|
||||
).first()
|
||||
|
||||
# Get recommendations
|
||||
recommendations = self.db.query(ContentRecommendation).filter_by(
|
||||
content_gap_analysis_id=analysis_id
|
||||
).all()
|
||||
|
||||
# Get analysis history
|
||||
history = self.db.query(AnalysisHistory).filter_by(
|
||||
content_gap_analysis_id=analysis_id
|
||||
).order_by(AnalysisHistory.run_date.desc()).all()
|
||||
|
||||
return {
|
||||
'id': analysis.id,
|
||||
'website_url': analysis.website_url,
|
||||
'industry': analysis.industry,
|
||||
'analysis_date': analysis.analysis_date,
|
||||
'status': analysis.status,
|
||||
'website': {
|
||||
'content_score': website_analysis.content_score,
|
||||
'seo_score': website_analysis.seo_score,
|
||||
'structure_score': website_analysis.structure_score,
|
||||
'content_metrics': website_analysis.content_metrics,
|
||||
'seo_metrics': website_analysis.seo_metrics,
|
||||
'technical_metrics': website_analysis.technical_metrics,
|
||||
'ai_insights': website_analysis.ai_insights
|
||||
} if website_analysis else {},
|
||||
'competitors': [{
|
||||
'url': ca.competitor_url,
|
||||
'market_position': ca.market_position,
|
||||
'content_gaps': ca.content_gaps,
|
||||
'competitive_advantages': ca.competitive_advantages,
|
||||
'trend_analysis': ca.trend_analysis
|
||||
} for ca in competitor_analyses],
|
||||
'keywords': {
|
||||
'top_keywords': keyword_analysis.top_keywords,
|
||||
'search_intent': keyword_analysis.search_intent,
|
||||
'opportunities': keyword_analysis.opportunities,
|
||||
'trend_analysis': keyword_analysis.trend_analysis
|
||||
} if keyword_analysis else {},
|
||||
'recommendations': [{
|
||||
'type': r.recommendation_type,
|
||||
'priority_score': r.priority_score,
|
||||
'recommendation': r.recommendation,
|
||||
'implementation_steps': r.implementation_steps,
|
||||
'expected_impact': r.expected_impact,
|
||||
'status': r.status
|
||||
} for r in recommendations],
|
||||
'history': [{
|
||||
'run_date': h.run_date,
|
||||
'status': h.status,
|
||||
'metrics': h.metrics,
|
||||
'error_log': h.error_log
|
||||
} for h in history]
|
||||
}
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
st.error(f"Error retrieving analysis results: {str(e)}")
|
||||
return None
|
||||
|
||||
def get_user_analyses(self, user_id: int) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all analyses for a user.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
|
||||
Returns:
|
||||
List of analysis summaries
|
||||
"""
|
||||
try:
|
||||
analyses = self.db.query(ContentGapAnalysis).filter_by(
|
||||
user_id=user_id
|
||||
).order_by(ContentGapAnalysis.analysis_date.desc()).all()
|
||||
|
||||
return [{
|
||||
'id': analysis.id,
|
||||
'website_url': analysis.website_url,
|
||||
'industry': analysis.industry,
|
||||
'analysis_date': analysis.analysis_date,
|
||||
'status': analysis.status
|
||||
} for analysis in analyses]
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
st.error(f"Error retrieving user analyses: {str(e)}")
|
||||
return []
|
||||
|
||||
def update_recommendation_status(self, recommendation_id: int, status: str) -> bool:
|
||||
"""
|
||||
Update the status of a recommendation.
|
||||
|
||||
Args:
|
||||
recommendation_id: Recommendation ID
|
||||
status: New status
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
recommendation = self.db.query(ContentRecommendation).get(recommendation_id)
|
||||
if recommendation:
|
||||
recommendation.status = status
|
||||
recommendation.updated_at = datetime.utcnow()
|
||||
self.db.commit()
|
||||
return True
|
||||
return False
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
self.db.rollback()
|
||||
st.error(f"Error updating recommendation status: {str(e)}")
|
||||
return False
|
||||
|
||||
def delete_analysis(self, analysis_id: int) -> bool:
|
||||
"""
|
||||
Delete an analysis and all related data.
|
||||
|
||||
Args:
|
||||
analysis_id: Analysis ID
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
analysis = self.db.query(ContentGapAnalysis).get(analysis_id)
|
||||
if analysis:
|
||||
self.db.delete(analysis)
|
||||
self.db.commit()
|
||||
return True
|
||||
return False
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
self.db.rollback()
|
||||
st.error(f"Error deleting analysis: {str(e)}")
|
||||
return False
|
||||
@@ -1,291 +0,0 @@
|
||||
"""Website analyzer module for content gap analysis."""
|
||||
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from typing import Dict, Any, List, Optional
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer as BaseWebsiteAnalyzer
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/content_gap_website_analyzer.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
class WebsiteAnalyzer(BaseWebsiteAnalyzer):
|
||||
"""Extended website analyzer for content gap analysis."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the website analyzer."""
|
||||
super().__init__()
|
||||
logger.info("ContentGapWebsiteAnalyzer initialized")
|
||||
|
||||
def analyze_content_gaps(self, url: str, competitor_urls: List[str]) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze content gaps between the target website and competitors.
|
||||
|
||||
Args:
|
||||
url: The target URL to analyze
|
||||
competitor_urls: List of competitor URLs to compare against
|
||||
|
||||
Returns:
|
||||
Dictionary containing content gap analysis results
|
||||
"""
|
||||
try:
|
||||
# Analyze target website
|
||||
target_analysis = self.analyze_website(url)
|
||||
if not target_analysis.get('success', False):
|
||||
return {
|
||||
'error': target_analysis.get('error', 'Unknown error in target analysis'),
|
||||
'gaps': [],
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
# Analyze competitor websites
|
||||
competitor_analyses = []
|
||||
for competitor_url in competitor_urls:
|
||||
analysis = self.analyze_website(competitor_url)
|
||||
if analysis.get('success', False):
|
||||
competitor_analyses.append(analysis['data'])
|
||||
|
||||
# Generate content gap analysis using AI
|
||||
prompt = f"""Analyze content gaps between the target website and competitors:
|
||||
|
||||
Target Website:
|
||||
{json.dumps(target_analysis['data'], indent=2)}
|
||||
|
||||
Competitor Websites:
|
||||
{json.dumps(competitor_analyses, indent=2)}
|
||||
|
||||
Identify:
|
||||
1. Missing content topics
|
||||
2. Content depth differences
|
||||
3. Keyword gaps
|
||||
4. Content structure improvements
|
||||
5. Content quality recommendations
|
||||
|
||||
Format the response as JSON with 'gaps' and 'recommendations' keys."""
|
||||
|
||||
# Get AI analysis
|
||||
analysis = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt="You are an SEO expert specializing in content gap analysis.",
|
||||
response_format="json_object"
|
||||
)
|
||||
|
||||
if not analysis:
|
||||
return {
|
||||
'error': 'Failed to generate content gap analysis',
|
||||
'gaps': [],
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
return {
|
||||
'gaps': analysis.get('gaps', []),
|
||||
'recommendations': analysis.get('recommendations', [])
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error analyzing content gaps: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'gaps': [],
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
def analyze(self, url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze a website for content gaps and SEO opportunities.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
Dictionary containing analysis results
|
||||
"""
|
||||
try:
|
||||
# Initialize progress tracking
|
||||
progress = {
|
||||
'status': 'in_progress',
|
||||
'current_stage': 'content_analysis',
|
||||
'current_step': 'Initializing analysis',
|
||||
'progress': 0,
|
||||
'details': 'Starting website analysis...'
|
||||
}
|
||||
self.progress.update(progress)
|
||||
|
||||
# Get base website analysis
|
||||
logger.info("Starting base website analysis")
|
||||
website_analysis = self.analyze_website(url)
|
||||
|
||||
if not website_analysis.get('success', False):
|
||||
error_msg = website_analysis.get('error', 'Unknown error in website analysis')
|
||||
logger.error(f"Error in website analysis: {error_msg}")
|
||||
progress['status'] = 'error'
|
||||
progress['details'] = error_msg
|
||||
self.progress.update(progress)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'error_details': website_analysis.get('error_details', {}),
|
||||
'progress': progress
|
||||
}
|
||||
|
||||
# Extract SEO metrics from the analysis
|
||||
seo_metrics = self._extract_seo_metrics(website_analysis['data'])
|
||||
|
||||
# Extract performance metrics
|
||||
performance_metrics = self._extract_performance_metrics(website_analysis['data'])
|
||||
|
||||
# Update progress
|
||||
progress['status'] = 'completed'
|
||||
progress['progress'] = 100
|
||||
progress['details'] = 'Analysis completed successfully'
|
||||
self.progress.update(progress)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'data': {
|
||||
'seo_metrics': seo_metrics,
|
||||
'performance_metrics': performance_metrics,
|
||||
'website_analysis': website_analysis['data']
|
||||
},
|
||||
'progress': progress
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in content gap analysis: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
progress['status'] = 'error'
|
||||
progress['details'] = error_msg
|
||||
self.progress.update(progress)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'error_details': {
|
||||
'type': type(e).__name__,
|
||||
'traceback': str(e.__traceback__)
|
||||
},
|
||||
'progress': progress
|
||||
}
|
||||
|
||||
def _extract_seo_metrics(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract SEO-related metrics from website analysis."""
|
||||
try:
|
||||
seo_info = website_analysis.get('analysis', {}).get('seo_info', {})
|
||||
return {
|
||||
'overall_score': seo_info.get('overall_score', 0),
|
||||
'meta_tags': {
|
||||
'title': seo_info.get('meta_tags', {}).get('title', {}),
|
||||
'description': seo_info.get('meta_tags', {}).get('description', {}),
|
||||
'keywords': seo_info.get('meta_tags', {}).get('keywords', {})
|
||||
},
|
||||
'content': {
|
||||
'word_count': seo_info.get('content', {}).get('word_count', 0),
|
||||
'readability_score': seo_info.get('content', {}).get('readability_score', 0),
|
||||
'content_quality_score': seo_info.get('content', {}).get('content_quality_score', 0)
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting SEO metrics: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def _extract_performance_metrics(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract performance metrics from website analysis."""
|
||||
try:
|
||||
performance_info = website_analysis.get('analysis', {}).get('performance', {})
|
||||
return {
|
||||
'load_time': performance_info.get('load_time', 0),
|
||||
'page_size': performance_info.get('page_size', 0),
|
||||
'resource_count': performance_info.get('resource_count', 0),
|
||||
'performance_score': performance_info.get('performance_score', 0)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting performance metrics: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def _extract_content_metrics(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract content-related metrics from website analysis."""
|
||||
try:
|
||||
content_info = website_analysis['analysis']['content_info']
|
||||
return {
|
||||
'word_count': content_info.get('word_count', 0),
|
||||
'heading_count': content_info.get('heading_count', 0),
|
||||
'image_count': content_info.get('image_count', 0),
|
||||
'link_count': content_info.get('link_count', 0),
|
||||
'has_meta_description': content_info.get('has_meta_description', False),
|
||||
'has_robots_txt': content_info.get('has_robots_txt', False),
|
||||
'has_sitemap': content_info.get('has_sitemap', False)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content metrics: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def _extract_technical_info(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract technical information from website analysis."""
|
||||
try:
|
||||
basic_info = website_analysis.get('analysis', {}).get('basic_info', {})
|
||||
return {
|
||||
'title': basic_info.get('title', ''),
|
||||
'meta_description': basic_info.get('meta_description', ''),
|
||||
'headers': basic_info.get('headers', {}),
|
||||
'robots_txt': basic_info.get('robots_txt', ''),
|
||||
'sitemap': basic_info.get('sitemap', ''),
|
||||
'server_info': basic_info.get('server_info', {}),
|
||||
'security_info': basic_info.get('security_info', {})
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting technical info: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def _generate_insights(self, content_metrics: Dict[str, Any], seo_metrics: Dict[str, Any]) -> List[str]:
|
||||
"""Generate content insights based on analysis results."""
|
||||
try:
|
||||
insights = []
|
||||
|
||||
# Content insights
|
||||
if content_metrics['word_count'] < 300:
|
||||
insights.append("Content length is below recommended minimum (300 words)")
|
||||
elif content_metrics['word_count'] > 2000:
|
||||
insights.append("Content length is above recommended maximum (2000 words)")
|
||||
|
||||
if content_metrics['heading_count'] < 2:
|
||||
insights.append("Content structure could be improved with more headings")
|
||||
|
||||
if content_metrics['image_count'] == 0:
|
||||
insights.append("Consider adding images to improve content engagement")
|
||||
|
||||
# SEO insights
|
||||
if seo_metrics.get('overall_score', 0) < 60:
|
||||
insights.append("SEO optimization needs significant improvement")
|
||||
elif seo_metrics.get('overall_score', 0) < 80:
|
||||
insights.append("SEO optimization has room for improvement")
|
||||
|
||||
if not content_metrics['has_meta_description']:
|
||||
insights.append("Missing meta description - important for SEO")
|
||||
|
||||
if not content_metrics['has_robots_txt']:
|
||||
insights.append("Missing robots.txt - important for search engine crawling")
|
||||
|
||||
if not content_metrics['has_sitemap']:
|
||||
insights.append("Missing sitemap.xml - important for search engine indexing")
|
||||
|
||||
return insights
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating insights: {str(e)}", exc_info=True)
|
||||
return []
|
||||
@@ -1,160 +0,0 @@
|
||||
"""Content title generator module."""
|
||||
|
||||
import os
|
||||
import json
|
||||
import streamlit as st
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
)
|
||||
from loguru import logger
|
||||
from typing import Dict, Any, List, Optional
|
||||
import asyncio
|
||||
import sys
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/content_title_generator.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
def ai_title_generator(url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate SEO-optimized titles using AI.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
Dictionary containing title suggestions and analysis
|
||||
"""
|
||||
try:
|
||||
# Initialize analyzer
|
||||
analyzer = WebsiteAnalyzer()
|
||||
|
||||
# Analyze website
|
||||
analysis = analyzer.analyze_website(url)
|
||||
if not analysis.get('success', False):
|
||||
return {
|
||||
'error': analysis.get('error', 'Unknown error in analysis'),
|
||||
'patterns': {},
|
||||
'suggestions': []
|
||||
}
|
||||
|
||||
# Extract content and meta information
|
||||
content_info = analysis['data']['analysis']['content_info']
|
||||
seo_info = analysis['data']['analysis']['seo_info']
|
||||
|
||||
# Generate title suggestions using AI
|
||||
prompt = f"""Based on the following website content and SEO analysis, generate 5 SEO-optimized title suggestions:
|
||||
|
||||
Content Analysis:
|
||||
- Word Count: {content_info.get('word_count', 0)}
|
||||
- Heading Structure: {content_info.get('heading_structure', {})}
|
||||
|
||||
SEO Analysis:
|
||||
- Meta Title: {seo_info.get('meta_tags', {}).get('title', {}).get('value', '')}
|
||||
- Meta Description: {seo_info.get('meta_tags', {}).get('description', {}).get('value', '')}
|
||||
- Keywords: {seo_info.get('meta_tags', {}).get('keywords', {}).get('value', '')}
|
||||
|
||||
Generate 5 title suggestions that are:
|
||||
1. SEO-optimized
|
||||
2. Engaging and click-worthy
|
||||
3. Between 50-60 characters
|
||||
4. Include relevant keywords
|
||||
5. Follow best practices for title optimization
|
||||
|
||||
Format the response as JSON with 'suggestions' and 'patterns' keys."""
|
||||
|
||||
# Get AI suggestions
|
||||
suggestions = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt="You are an SEO expert specializing in title optimization.",
|
||||
response_format="json_object"
|
||||
)
|
||||
|
||||
if not suggestions:
|
||||
return {
|
||||
'error': 'Failed to generate title suggestions',
|
||||
'patterns': {},
|
||||
'suggestions': []
|
||||
}
|
||||
|
||||
return {
|
||||
'patterns': suggestions.get('patterns', {}),
|
||||
'suggestions': suggestions.get('suggestions', [])
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error generating title suggestions: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'patterns': {},
|
||||
'suggestions': []
|
||||
}
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random_exponential(min=1, max=4))
|
||||
def generate_blog_titles(input_blog_keywords, input_blog_content, input_title_type, input_title_intent, input_language):
|
||||
""" Generate SEO optimized blog titles using AI """
|
||||
if input_blog_content and input_blog_keywords:
|
||||
prompt = f"""As a SEO expert, I will provide you with main 'blog keywords' and 'blog content'.
|
||||
Your task is to write 5 SEO optimized blog titles from the given blog keywords and content.
|
||||
|
||||
Follow the below guidelines for generating the blog titles:
|
||||
1. Follow all best practices for SEO optimized blog titles.
|
||||
2. Optimize your response around the given keywords and content.
|
||||
3. Optimize your response for web search intent {input_title_intent}.
|
||||
4. Optimize your response for blog type {input_title_type}.
|
||||
5. The blog titles should be in {input_language} language.
|
||||
|
||||
Blog keywords: '{input_blog_keywords}'
|
||||
Blog content: '{input_blog_content}'
|
||||
"""
|
||||
elif input_blog_keywords and not input_blog_content:
|
||||
prompt = f"""As a SEO expert, I will provide you with the main 'keywords' of a blog.
|
||||
Your task is to write 5 SEO optimized blog titles from the given blog keywords.
|
||||
|
||||
Follow the below guidelines for generating the blog titles:
|
||||
1. Follow all best practices for SEO optimized blog titles.
|
||||
2. Optimize your response around the given keywords.
|
||||
3. Optimize your response for web search intent {input_title_intent}.
|
||||
4. Optimize your response for blog type {input_title_type}.
|
||||
5. The blog titles should be in {input_language} language.
|
||||
|
||||
Blog keywords: '{input_blog_keywords}'
|
||||
"""
|
||||
elif input_blog_content and not input_blog_keywords:
|
||||
prompt = f"""As a SEO expert, I will provide you with the 'blog content'.
|
||||
Your task is to write 5 SEO optimized blog titles from the given blog content.
|
||||
|
||||
Follow the below guidelines for generating the blog titles:
|
||||
1. Follow all best practices for SEO optimized blog titles.
|
||||
2. Optimize your response around the given content.
|
||||
3. Optimize your response for web search intent {input_title_intent}.
|
||||
4. Optimize your response for blog type {input_title_type}.
|
||||
5. The blog titles should be in {input_language} language.
|
||||
|
||||
Blog content: '{input_blog_content}'
|
||||
"""
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
st.error(f"Exit: Failed to get response from LLM: {err}")
|
||||
@@ -1,115 +0,0 @@
|
||||
from typing import List, Dict, Union
|
||||
#from nltk import tokenize, stem, pos_tag
|
||||
from textblob import TextBlob
|
||||
import enchant
|
||||
|
||||
class TextPreprocessor:
|
||||
def preprocess_text(self, text: str) -> str:
|
||||
# Tokenize the text
|
||||
tokens = tokenize.word_tokenize(text)
|
||||
|
||||
# Stem the tokens
|
||||
stemmer = stem.PorterStemmer()
|
||||
stemmed_tokens = [stemmer.stem(token) for token in tokens]
|
||||
|
||||
# Join the stemmed tokens back into a string
|
||||
preprocessed_text = ' '.join(stemmed_tokens)
|
||||
|
||||
return preprocessed_text
|
||||
|
||||
class SEOAnalyzer:
|
||||
def calculate_seo_percentage(self, text: str, keywords: List[str]) -> float:
|
||||
# Calculate the keyword density
|
||||
keyword_density = self.calculate_keyword_density(text, keywords)
|
||||
|
||||
# Calculate the readability score
|
||||
readability_score = self.calculate_readability_score(text)
|
||||
|
||||
# Perform semantic analysis
|
||||
semantic_score = self.perform_semantic_analysis(text)
|
||||
|
||||
# Calculate the SEO percentage based on the metrics
|
||||
seo_percentage = (keyword_density + readability_score + semantic_score) / 3
|
||||
|
||||
return seo_percentage
|
||||
|
||||
def calculate_keyword_density(self, text: str, keywords: List[str]) -> float:
|
||||
# Count the number of occurrences of each keyword in the text
|
||||
keyword_counts = {keyword: text.lower().count(keyword.lower()) for keyword in keywords}
|
||||
|
||||
# Calculate the total number of words in the text
|
||||
word_count = len(tokenize.word_tokenize(text))
|
||||
|
||||
# Calculate the keyword density
|
||||
keyword_density = sum(keyword_counts.values()) / word_count
|
||||
|
||||
return keyword_density
|
||||
|
||||
def calculate_readability_score(self, text: str) -> float:
|
||||
# Calculate the average number of words per sentence
|
||||
sentences = tokenize.sent_tokenize(text)
|
||||
word_count = sum(len(tokenize.word_tokenize(sentence)) for sentence in sentences)
|
||||
sentence_count = len(sentences)
|
||||
average_words_per_sentence = word_count / sentence_count
|
||||
|
||||
# Calculate the readability score
|
||||
readability_score = 1 / average_words_per_sentence
|
||||
|
||||
return readability_score
|
||||
|
||||
def perform_semantic_analysis(self, text: str) -> float:
|
||||
# Perform part-of-speech tagging on the text
|
||||
tagged_text = pos_tag(tokenize.word_tokenize(text))
|
||||
|
||||
# Calculate the semantic score based on the number of nouns and verbs
|
||||
noun_count = sum(1 for word, pos in tagged_text if pos.startswith('N'))
|
||||
verb_count = sum(1 for word, pos in tagged_text if pos.startswith('V'))
|
||||
semantic_score = (noun_count + verb_count) / len(tagged_text)
|
||||
|
||||
return semantic_score
|
||||
|
||||
class SpellChecker:
|
||||
def check_spelling(self, text: str) -> List[str]:
|
||||
# Create a spellchecker object
|
||||
spellchecker = enchant.Dict("en_US")
|
||||
|
||||
# Tokenize the text
|
||||
tokens = tokenize.word_tokenize(text)
|
||||
|
||||
# Check the spelling of each token
|
||||
misspelled_words = [token for token in tokens if not spellchecker.check(token)]
|
||||
|
||||
return misspelled_words
|
||||
|
||||
class SEOAnalysisModule:
|
||||
def __init__(self):
|
||||
self.text_preprocessor = TextPreprocessor()
|
||||
self.seo_analyzer = SEOAnalyzer()
|
||||
self.spell_checker = SpellChecker()
|
||||
|
||||
def analyze_text(self, text: str, keywords: List[str]) -> Dict[str, Union[float, List[str]]]:
|
||||
# Preprocess the text
|
||||
preprocessed_text = self.text_preprocessor.preprocess_text(text)
|
||||
|
||||
# Calculate the SEO percentage
|
||||
seo_percentage = self.seo_analyzer.calculate_seo_percentage(preprocessed_text, keywords)
|
||||
|
||||
# Calculate the keyword density
|
||||
keyword_density = self.seo_analyzer.calculate_keyword_density(preprocessed_text, keywords)
|
||||
|
||||
# Calculate the readability score
|
||||
readability_score = self.seo_analyzer.calculate_readability_score(preprocessed_text)
|
||||
|
||||
# Perform semantic analysis
|
||||
semantic_score = self.seo_analyzer.perform_semantic_analysis(preprocessed_text)
|
||||
|
||||
# Check the spelling
|
||||
spelling_errors = self.spell_checker.check_spelling(preprocessed_text)
|
||||
|
||||
return {
|
||||
'seo_percentage': seo_percentage,
|
||||
'keyword_density': keyword_density,
|
||||
'readability_score': readability_score,
|
||||
'semantic_score': semantic_score,
|
||||
'spelling_errors': spelling_errors
|
||||
}
|
||||
340
lib/ai_seo_tools/seo_analyzer_api.py
Normal file
340
lib/ai_seo_tools/seo_analyzer_api.py
Normal file
@@ -0,0 +1,340 @@
|
||||
"""
|
||||
FastAPI endpoint for the Comprehensive SEO Analyzer
|
||||
Provides data for the React SEO Dashboard
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
from .comprehensive_seo_analyzer import ComprehensiveSEOAnalyzer, SEOAnalysisResult
|
||||
|
||||
app = FastAPI(
|
||||
title="Comprehensive SEO Analyzer API",
|
||||
description="API for analyzing website SEO performance with actionable insights",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# Initialize the analyzer
|
||||
seo_analyzer = ComprehensiveSEOAnalyzer()
|
||||
|
||||
class SEOAnalysisRequest(BaseModel):
|
||||
url: HttpUrl
|
||||
target_keywords: Optional[List[str]] = None
|
||||
|
||||
class SEOAnalysisResponse(BaseModel):
|
||||
url: str
|
||||
timestamp: datetime
|
||||
overall_score: int
|
||||
health_status: str
|
||||
critical_issues: List[str]
|
||||
warnings: List[str]
|
||||
recommendations: List[str]
|
||||
data: Dict[str, Any]
|
||||
success: bool
|
||||
message: str
|
||||
|
||||
@app.post("/analyze-seo", response_model=SEOAnalysisResponse)
|
||||
async def analyze_seo(request: SEOAnalysisRequest):
|
||||
"""
|
||||
Analyze a URL for comprehensive SEO performance
|
||||
|
||||
Args:
|
||||
request: SEOAnalysisRequest containing URL and optional target keywords
|
||||
|
||||
Returns:
|
||||
SEOAnalysisResponse with detailed analysis results
|
||||
"""
|
||||
try:
|
||||
# Convert URL to string
|
||||
url_str = str(request.url)
|
||||
|
||||
# Perform analysis
|
||||
result = seo_analyzer.analyze_url(url_str, request.target_keywords)
|
||||
|
||||
# Convert to response format
|
||||
response_data = {
|
||||
'url': result.url,
|
||||
'timestamp': result.timestamp,
|
||||
'overall_score': result.overall_score,
|
||||
'health_status': result.health_status,
|
||||
'critical_issues': result.critical_issues,
|
||||
'warnings': result.warnings,
|
||||
'recommendations': result.recommendations,
|
||||
'data': result.data,
|
||||
'success': True,
|
||||
'message': f"SEO analysis completed successfully for {result.url}"
|
||||
}
|
||||
|
||||
return SEOAnalysisResponse(**response_data)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error analyzing SEO: {str(e)}"
|
||||
)
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint"""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"timestamp": datetime.now(),
|
||||
"service": "Comprehensive SEO Analyzer API"
|
||||
}
|
||||
|
||||
@app.get("/analysis-summary/{url:path}")
|
||||
async def get_analysis_summary(url: str):
|
||||
"""
|
||||
Get a quick summary of SEO analysis for a URL
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
Summary of SEO analysis
|
||||
"""
|
||||
try:
|
||||
# Ensure URL has protocol
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
url = f"https://{url}"
|
||||
|
||||
# Perform analysis
|
||||
result = seo_analyzer.analyze_url(url)
|
||||
|
||||
# Create summary
|
||||
summary = {
|
||||
"url": result.url,
|
||||
"overall_score": result.overall_score,
|
||||
"health_status": result.health_status,
|
||||
"critical_issues_count": len(result.critical_issues),
|
||||
"warnings_count": len(result.warnings),
|
||||
"recommendations_count": len(result.recommendations),
|
||||
"top_issues": result.critical_issues[:3],
|
||||
"top_recommendations": result.recommendations[:3],
|
||||
"analysis_timestamp": result.timestamp.isoformat()
|
||||
}
|
||||
|
||||
return summary
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error getting analysis summary: {str(e)}"
|
||||
)
|
||||
|
||||
@app.get("/seo-metrics/{url:path}")
|
||||
async def get_seo_metrics(url: str):
|
||||
"""
|
||||
Get detailed SEO metrics for dashboard display
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
Detailed SEO metrics for React dashboard
|
||||
"""
|
||||
try:
|
||||
# Ensure URL has protocol
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
url = f"https://{url}"
|
||||
|
||||
# Perform analysis
|
||||
result = seo_analyzer.analyze_url(url)
|
||||
|
||||
# Extract metrics for dashboard
|
||||
metrics = {
|
||||
"overall_score": result.overall_score,
|
||||
"health_status": result.health_status,
|
||||
"url_structure_score": result.data.get('url_structure', {}).get('score', 0),
|
||||
"meta_data_score": result.data.get('meta_data', {}).get('score', 0),
|
||||
"content_score": result.data.get('content_analysis', {}).get('score', 0),
|
||||
"technical_score": result.data.get('technical_seo', {}).get('score', 0),
|
||||
"performance_score": result.data.get('performance', {}).get('score', 0),
|
||||
"accessibility_score": result.data.get('accessibility', {}).get('score', 0),
|
||||
"user_experience_score": result.data.get('user_experience', {}).get('score', 0),
|
||||
"security_score": result.data.get('security_headers', {}).get('score', 0)
|
||||
}
|
||||
|
||||
# Add detailed data for each category
|
||||
dashboard_data = {
|
||||
"metrics": metrics,
|
||||
"critical_issues": result.critical_issues,
|
||||
"warnings": result.warnings,
|
||||
"recommendations": result.recommendations,
|
||||
"detailed_analysis": {
|
||||
"url_structure": result.data.get('url_structure', {}),
|
||||
"meta_data": result.data.get('meta_data', {}),
|
||||
"content_analysis": result.data.get('content_analysis', {}),
|
||||
"technical_seo": result.data.get('technical_seo', {}),
|
||||
"performance": result.data.get('performance', {}),
|
||||
"accessibility": result.data.get('accessibility', {}),
|
||||
"user_experience": result.data.get('user_experience', {}),
|
||||
"security_headers": result.data.get('security_headers', {}),
|
||||
"keyword_analysis": result.data.get('keyword_analysis', {})
|
||||
},
|
||||
"timestamp": result.timestamp.isoformat(),
|
||||
"url": result.url
|
||||
}
|
||||
|
||||
return dashboard_data
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error getting SEO metrics: {str(e)}"
|
||||
)
|
||||
|
||||
@app.post("/batch-analyze")
|
||||
async def batch_analyze(urls: List[str]):
|
||||
"""
|
||||
Analyze multiple URLs in batch
|
||||
|
||||
Args:
|
||||
urls: List of URLs to analyze
|
||||
|
||||
Returns:
|
||||
Batch analysis results
|
||||
"""
|
||||
try:
|
||||
results = []
|
||||
|
||||
for url in urls:
|
||||
try:
|
||||
# Ensure URL has protocol
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
url = f"https://{url}"
|
||||
|
||||
# Perform analysis
|
||||
result = seo_analyzer.analyze_url(url)
|
||||
|
||||
# Add to results
|
||||
results.append({
|
||||
"url": result.url,
|
||||
"overall_score": result.overall_score,
|
||||
"health_status": result.health_status,
|
||||
"critical_issues_count": len(result.critical_issues),
|
||||
"warnings_count": len(result.warnings),
|
||||
"success": True
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
# Add error result
|
||||
results.append({
|
||||
"url": url,
|
||||
"overall_score": 0,
|
||||
"health_status": "error",
|
||||
"critical_issues_count": 0,
|
||||
"warnings_count": 0,
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
})
|
||||
|
||||
return {
|
||||
"total_urls": len(urls),
|
||||
"successful_analyses": len([r for r in results if r['success']]),
|
||||
"failed_analyses": len([r for r in results if not r['success']]),
|
||||
"results": results
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error in batch analysis: {str(e)}"
|
||||
)
|
||||
|
||||
# Enhanced prompts for better results
|
||||
ENHANCED_PROMPTS = {
|
||||
"critical_issue": "🚨 CRITICAL: This issue is severely impacting your SEO performance and must be fixed immediately.",
|
||||
"warning": "⚠️ WARNING: This could be improved to boost your search rankings.",
|
||||
"recommendation": "💡 RECOMMENDATION: Implement this to improve your SEO score.",
|
||||
"excellent": "🎉 EXCELLENT: Your SEO is performing very well in this area!",
|
||||
"good": "✅ GOOD: Your SEO is performing well, with room for minor improvements.",
|
||||
"needs_improvement": "🔧 NEEDS IMPROVEMENT: Several areas need attention to boost your SEO.",
|
||||
"poor": "❌ POOR: Significant improvements needed across multiple areas."
|
||||
}
|
||||
|
||||
def enhance_analysis_result(result: SEOAnalysisResult) -> SEOAnalysisResult:
|
||||
"""
|
||||
Enhance analysis results with better prompts and user-friendly language
|
||||
"""
|
||||
# Enhance critical issues
|
||||
enhanced_critical_issues = []
|
||||
for issue in result.critical_issues:
|
||||
enhanced_issue = f"{ENHANCED_PROMPTS['critical_issue']} {issue}"
|
||||
enhanced_critical_issues.append(enhanced_issue)
|
||||
|
||||
# Enhance warnings
|
||||
enhanced_warnings = []
|
||||
for warning in result.warnings:
|
||||
enhanced_warning = f"{ENHANCED_PROMPTS['warning']} {warning}"
|
||||
enhanced_warnings.append(enhanced_warning)
|
||||
|
||||
# Enhance recommendations
|
||||
enhanced_recommendations = []
|
||||
for rec in result.recommendations:
|
||||
enhanced_rec = f"{ENHANCED_PROMPTS['recommendation']} {rec}"
|
||||
enhanced_recommendations.append(enhanced_rec)
|
||||
|
||||
# Create enhanced result
|
||||
enhanced_result = SEOAnalysisResult(
|
||||
url=result.url,
|
||||
timestamp=result.timestamp,
|
||||
overall_score=result.overall_score,
|
||||
health_status=result.health_status,
|
||||
critical_issues=enhanced_critical_issues,
|
||||
warnings=enhanced_warnings,
|
||||
recommendations=enhanced_recommendations,
|
||||
data=result.data
|
||||
)
|
||||
|
||||
return enhanced_result
|
||||
|
||||
@app.post("/analyze-seo-enhanced", response_model=SEOAnalysisResponse)
|
||||
async def analyze_seo_enhanced(request: SEOAnalysisRequest):
|
||||
"""
|
||||
Analyze a URL with enhanced, user-friendly prompts
|
||||
|
||||
Args:
|
||||
request: SEOAnalysisRequest containing URL and optional target keywords
|
||||
|
||||
Returns:
|
||||
SEOAnalysisResponse with enhanced, user-friendly analysis results
|
||||
"""
|
||||
try:
|
||||
# Convert URL to string
|
||||
url_str = str(request.url)
|
||||
|
||||
# Perform analysis
|
||||
result = seo_analyzer.analyze_url(url_str, request.target_keywords)
|
||||
|
||||
# Enhance results
|
||||
enhanced_result = enhance_analysis_result(result)
|
||||
|
||||
# Convert to response format
|
||||
response_data = {
|
||||
'url': enhanced_result.url,
|
||||
'timestamp': enhanced_result.timestamp,
|
||||
'overall_score': enhanced_result.overall_score,
|
||||
'health_status': enhanced_result.health_status,
|
||||
'critical_issues': enhanced_result.critical_issues,
|
||||
'warnings': enhanced_result.warnings,
|
||||
'recommendations': enhanced_result.recommendations,
|
||||
'data': enhanced_result.data,
|
||||
'success': True,
|
||||
'message': f"Enhanced SEO analysis completed successfully for {enhanced_result.url}"
|
||||
}
|
||||
|
||||
return SEOAnalysisResponse(**response_data)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error analyzing SEO: {str(e)}"
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
@@ -1,98 +0,0 @@
|
||||
import streamlit as st
|
||||
import openai
|
||||
import os
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
|
||||
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
def scrape_url_content(url):
|
||||
"""
|
||||
Scrapes the content from the provided URL.
|
||||
|
||||
Args:
|
||||
url (str): The URL to scrape content from.
|
||||
|
||||
Returns:
|
||||
str: The extracted text content from the webpage.
|
||||
"""
|
||||
# FIXME: Use firecrawl metadata option for this.
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
text = ' '.join([p.text for p in soup.find_all('p')])
|
||||
return text
|
||||
except requests.exceptions.RequestException as e:
|
||||
st.error(f"Error fetching the URL content: {e}")
|
||||
return ""
|
||||
except Exception as e:
|
||||
st.error(f"Error parsing the HTML content: {e}")
|
||||
return ""
|
||||
|
||||
def generate_twitter_tags(topic, scraped_content=""):
|
||||
"""
|
||||
Generates a list of relevant Twitter hashtags based on the topic and optional scraped content.
|
||||
|
||||
Args:
|
||||
topic (str): The main topic or key phrase.
|
||||
scraped_content (str): Optional scraped content to add more context.
|
||||
|
||||
Returns:
|
||||
str: A list of Twitter hashtags as a string.
|
||||
"""
|
||||
prompt = f"Generate a list of highly relevant and trending Twitter hashtags based on the topic '{topic}'"
|
||||
|
||||
if scraped_content:
|
||||
prompt += f" and the following content: {scraped_content[:700]}..." # Limit content to keep prompt manageable.
|
||||
|
||||
prompt += " Make sure the hashtags are popular and relevant to the topic. Follow Latest best practices for twitter tags."
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
st.error(f"Failed to generate Open Graph tags: {err}")
|
||||
return None
|
||||
|
||||
|
||||
def display_app():
|
||||
"""
|
||||
Displays the Streamlit app UI and handles user interactions.
|
||||
"""
|
||||
st.title("AI Twitter Tag Generator")
|
||||
|
||||
st.write(
|
||||
"Generate trending and highly relevant Twitter tags with minimal input. "
|
||||
"Optionally, provide a URL to make the tags even more targeted."
|
||||
)
|
||||
|
||||
# User Inputs
|
||||
topic = st.text_input(
|
||||
"Enter the topic or key phrase for Twitter tags",
|
||||
placeholder="e.g., AI in marketing"
|
||||
)
|
||||
|
||||
url = st.text_input(
|
||||
"Optional: Enter a URL to scrape for more targeted tags",
|
||||
placeholder="e.g., https://example.com/article"
|
||||
)
|
||||
|
||||
if topic:
|
||||
if url:
|
||||
with st.spinner("Scraping content from the provided URL..."):
|
||||
scraped_content = scrape_url_content(url)
|
||||
if not scraped_content:
|
||||
st.info("No content could be extracted from the provided URL.")
|
||||
else:
|
||||
scraped_content = ""
|
||||
|
||||
if st.button("Generate Twitter Tags"):
|
||||
with st.spinner("Generating Twitter tags..."):
|
||||
tags = generate_twitter_tags(topic, scraped_content)
|
||||
if tags:
|
||||
st.success("Twitter tags generated successfully!")
|
||||
st.write(tags)
|
||||
else:
|
||||
st.info("Please enter a topic or key phrase to generate Twitter tags.")
|
||||
@@ -1,116 +0,0 @@
|
||||
"""Webpage content analysis tool."""
|
||||
|
||||
import streamlit as st
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
import nltk
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.util import ngrams
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import ConversationChain
|
||||
from urllib.parse import urlparse
|
||||
|
||||
st.title("🧠 Web Content Analyzer: Uncover Hidden Insights with AI! 🧠")
|
||||
st.write("""
|
||||
Welcome! This tool leverages the power of AI to analyze your web page's content. It goes beyond just keywords -
|
||||
we'll use cutting-edge technology to uncover valuable insights and unlock new ways to boost your website!
|
||||
""")
|
||||
|
||||
# --- User Input ---
|
||||
|
||||
url_input = st.text_input("Paste your URL here:", "https://www.example.com/")
|
||||
language_input = st.selectbox("What language is your content?", ('English', 'Italian', 'Albanian'))
|
||||
st.write(" ")
|
||||
|
||||
# --- AI Model Setup ---
|
||||
|
||||
llm = OpenAI(temperature=0.7)
|
||||
conversation_chain = ConversationChain(llm=llm)
|
||||
|
||||
# --- Analyze Button & Processing ---
|
||||
|
||||
if st.button("Analyze with AI!"):
|
||||
with st.spinner('Analyzing your content...'):
|
||||
url = url_input.strip()
|
||||
language = language_input.lower()
|
||||
|
||||
if not url.startswith("http"):
|
||||
st.error("Oops! Looks like you forgot 'http://' or 'https://' at the beginning of your URL. Please add it and try again! 😊")
|
||||
st.stop()
|
||||
|
||||
try:
|
||||
# Validate URL
|
||||
parsed_url = urlparse(url)
|
||||
if not parsed_url.scheme:
|
||||
url = "https://" + url
|
||||
|
||||
# Fetch webpage content
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse HTML
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Extract content
|
||||
title = soup.title.string if soup.title else "No title found"
|
||||
meta_description = soup.find('meta', {'name': 'description'})
|
||||
description = meta_description['content'] if meta_description else "No description found"
|
||||
|
||||
# Display results
|
||||
st.subheader("Page Analysis")
|
||||
st.metric("Title", title)
|
||||
st.metric("Description", description)
|
||||
|
||||
# Content statistics
|
||||
text_content = soup.get_text()
|
||||
words = text_content.split()
|
||||
st.metric("Word Count", len(words))
|
||||
st.metric("Unique Words", len(set(words)))
|
||||
|
||||
# Frequency analysis (same as before)
|
||||
freq = nltk.FreqDist(words)
|
||||
keywords = freq.most_common(10)
|
||||
df_keywords = pd.DataFrame(keywords, columns=("Keyword", "Frequency"))
|
||||
|
||||
# --- AI-Powered Insights ---
|
||||
st.subheader("AI Insights:")
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("**Main Theme:**")
|
||||
ai_theme = conversation_chain.run(f"What is the main theme or topic of this content? \n {text_content}")
|
||||
st.markdown(f" {ai_theme}")
|
||||
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("**Suggested Keywords:**")
|
||||
ai_keywords = conversation_chain.run(f"What other relevant keywords might be helpful to target for this content? \n {text_content}")
|
||||
st.markdown(f" {ai_keywords}")
|
||||
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("**Content Improvement:**")
|
||||
ai_improvement = conversation_chain.run(f"What could be done to improve this content for clarity, engagement, or SEO? \n {text_content}")
|
||||
st.markdown(f" {ai_improvement}")
|
||||
|
||||
# --- Display Frequency Results ---
|
||||
st.write(" ")
|
||||
|
||||
st.subheader("Top Keywords:")
|
||||
st.write(" ")
|
||||
st.dataframe(df_keywords)
|
||||
|
||||
st.subheader("What's the Value of This AI Analysis?")
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("""
|
||||
* **Uncover Hidden Insights:** AI can analyze your content in much more nuanced ways, helping you spot connections and trends you might have missed.
|
||||
* **Go Beyond Keywords:** AI can provide in-depth insights into your content's main themes, tone, and even suggest relevant topics to explore further.
|
||||
* **AI as a Partner:** Think of this AI as your content strategist, offering guidance and actionable steps to make your content even better.
|
||||
|
||||
Ready to leverage the power of AI to optimize your content? Start putting the suggestions and insights you just received into practice. See what difference AI can make in your writing! 🚀
|
||||
""")
|
||||
except requests.exceptions.RequestException as e:
|
||||
st.error(f"Oops! Something went wrong fetching the URL. Error: {e}")
|
||||
except Exception as e:
|
||||
st.error(f"An error occurred: {e}")
|
||||
@@ -1,377 +0,0 @@
|
||||
import streamlit as st
|
||||
import advertools as adv
|
||||
import pandas as pd
|
||||
from urllib.parse import urlparse
|
||||
import requests
|
||||
from datetime import datetime
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
|
||||
# Title and introduction
|
||||
def show_title_and_intro():
|
||||
st.title("🌟 URL SEO Checkup: Your Link's Health Report 🌟")
|
||||
st.write("""
|
||||
Welcome to the URL SEO Checkup! This tool is like a doctor for your website links.
|
||||
Just paste your URL, and we'll check if it's healthy and ready to climb the search engine ladder.
|
||||
""")
|
||||
|
||||
|
||||
# Basic HTTPS Check
|
||||
def check_https(url):
|
||||
st.subheader("The Basics - Are We Looking Good?")
|
||||
st.write("---")
|
||||
|
||||
if url.startswith("https://"):
|
||||
st.success("✨ You're using HTTPS! This adds extra security, and Google rewards that with better rankings. Keep it up! ✨")
|
||||
else:
|
||||
st.warning("🚧 Heads Up! Your URL doesn't use 'https://'. This is a red flag for Google.")
|
||||
st.info("🔧 **How to fix:** Contact your hosting provider or website developer to install an SSL certificate. This will secure your site with HTTPS.")
|
||||
|
||||
|
||||
# URL Length Check
|
||||
def check_url_length(path):
|
||||
st.subheader("The Length Test - Keep it Short and Sweet!")
|
||||
st.write("---")
|
||||
|
||||
if len(path) <= 50:
|
||||
st.success("🏆 Great! Your URL is short and user-friendly. Google loves short URLs! 🏆")
|
||||
else:
|
||||
st.warning("🧭 Tip: Try shortening your URL. Shorter URLs are easier to remember and better for SEO.")
|
||||
st.info("🔧 **How to fix:** Consider removing unnecessary words or folders in the URL. Aim for concise, descriptive URLs that are easy for users to read.")
|
||||
|
||||
|
||||
# Hyphen Check
|
||||
def check_hyphens(path):
|
||||
st.subheader("The Hyphen Check - Use Hyphens for Clear Separation!")
|
||||
st.write("---")
|
||||
|
||||
if "-" in path:
|
||||
st.success("😎 You're on the right track! Using hyphens makes your URL more readable for both users and Google. 😎")
|
||||
else:
|
||||
st.warning("❓ Did you know? Using hyphens between words (like 'shoes-for-sale') helps Google understand your URL better!")
|
||||
st.info("🔧 **How to fix:** Update your URL to use hyphens (-) instead of spaces or underscores (_). For example, 'shoes-for-sale' instead of 'shoes_for_sale'.")
|
||||
|
||||
|
||||
# File Extension Check
|
||||
def check_file_extension(path):
|
||||
st.subheader("File Extension Check - Showing Your Files With Pride!")
|
||||
st.write("---")
|
||||
|
||||
if "." in path:
|
||||
st.success("🥳 File Extension Check: Your URL includes a file extension like '.html', which helps Google categorize your page. Nice job! 🥳")
|
||||
else:
|
||||
st.warning("🤔 Your URL seems to be missing a file extension like '.html' or '.php'.")
|
||||
st.info("🔧 **How to fix:** While file extensions are not always required, adding them to static pages (like .html or .php) can improve clarity for search engines.")
|
||||
|
||||
|
||||
# Keyword Insights
|
||||
def show_keyword_insights(netloc, path):
|
||||
st.subheader("Bonus Insight - Let's Talk Keywords")
|
||||
st.write("---")
|
||||
|
||||
st.info("Keywords are the words people use to search for information online. Your goal is to help Google understand what your page is about by using the right keywords in your URL!")
|
||||
|
||||
st.markdown(f"""
|
||||
**Your Domain:** {netloc}
|
||||
**Your URL Path:** {path}
|
||||
|
||||
**Suggestion:** Consider adding a primary keyword to your URL if it aligns with your page content. But don't overdo it – too many keywords can hurt your SEO. Keep it natural!
|
||||
""")
|
||||
|
||||
|
||||
# Enhanced HTTP Headers Analysis using advertools
|
||||
def analyze_http_headers(url):
|
||||
"""Analyze HTTP headers using advertools for comprehensive SEO insights."""
|
||||
st.subheader("🔍 Advanced HTTP Headers Analysis")
|
||||
st.write("---")
|
||||
|
||||
try:
|
||||
with st.spinner("Analyzing HTTP headers..."):
|
||||
# Create a temporary file for output
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.jl', delete=False) as tmp_file:
|
||||
temp_filename = tmp_file.name
|
||||
|
||||
# Use advertools to crawl headers
|
||||
adv.crawl_headers([url], temp_filename)
|
||||
|
||||
# Read the results
|
||||
headers_df = pd.read_json(temp_filename, lines=True)
|
||||
|
||||
# Clean up temp file
|
||||
os.unlink(temp_filename)
|
||||
|
||||
if not headers_df.empty:
|
||||
# Display key SEO-relevant headers
|
||||
st.success("✅ Successfully analyzed HTTP headers!")
|
||||
|
||||
# Create tabs for different header categories
|
||||
tab1, tab2, tab3, tab4 = st.tabs(["🔒 Security", "📈 SEO Headers", "⚡ Performance", "📊 Technical Details"])
|
||||
|
||||
with tab1:
|
||||
st.write("### Security Headers Analysis")
|
||||
security_headers = {
|
||||
'resp_headers_X-Frame-Options': 'X-Frame-Options',
|
||||
'resp_headers_X-Content-Type-Options': 'X-Content-Type-Options',
|
||||
'resp_headers_X-XSS-Protection': 'X-XSS-Protection',
|
||||
'resp_headers_Strict-Transport-Security': 'Strict-Transport-Security',
|
||||
'resp_headers_Content-Security-Policy': 'Content-Security-Policy',
|
||||
'resp_headers_Referrer-Policy': 'Referrer-Policy'
|
||||
}
|
||||
|
||||
for header_key, header_name in security_headers.items():
|
||||
if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
|
||||
st.success(f"✅ **{header_name}**: Present")
|
||||
with st.expander(f"View {header_name} Details"):
|
||||
st.code(headers_df[header_key].iloc[0])
|
||||
else:
|
||||
st.warning(f"⚠️ **{header_name}**: Missing")
|
||||
st.info(f"💡 **Recommendation**: Add {header_name} header for better security")
|
||||
|
||||
with tab2:
|
||||
st.write("### SEO-Related Headers")
|
||||
seo_headers = {
|
||||
'resp_headers_Content-Type': 'Content-Type',
|
||||
'resp_headers_Content-Language': 'Content-Language',
|
||||
'resp_headers_Cache-Control': 'Cache-Control',
|
||||
'resp_headers_Expires': 'Expires',
|
||||
'resp_headers_Last-Modified': 'Last-Modified',
|
||||
'resp_headers_ETag': 'ETag'
|
||||
}
|
||||
|
||||
for header_key, header_name in seo_headers.items():
|
||||
if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
|
||||
st.success(f"✅ **{header_name}**: {headers_df[header_key].iloc[0]}")
|
||||
else:
|
||||
st.info(f"ℹ️ **{header_name}**: Not set or not detected")
|
||||
|
||||
# Special handling for content-type
|
||||
if 'resp_headers_Content-Type' in headers_df.columns:
|
||||
content_type = headers_df['resp_headers_Content-Type'].iloc[0]
|
||||
if 'text/html' in str(content_type):
|
||||
st.success("🎯 **Content-Type**: Properly set for HTML content")
|
||||
if 'charset=utf-8' in str(content_type):
|
||||
st.success("🌍 **Character Encoding**: UTF-8 detected - Great for international SEO!")
|
||||
|
||||
with tab3:
|
||||
st.write("### Performance Headers")
|
||||
perf_headers = {
|
||||
'resp_headers_Server': 'Server',
|
||||
'resp_headers_X-Powered-By': 'X-Powered-By',
|
||||
'resp_headers_Connection': 'Connection',
|
||||
'resp_headers_Transfer-Encoding': 'Transfer-Encoding',
|
||||
'resp_headers_Content-Encoding': 'Content-Encoding',
|
||||
'resp_headers_Content-Length': 'Content-Length'
|
||||
}
|
||||
|
||||
for header_key, header_name in perf_headers.items():
|
||||
if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
|
||||
st.info(f"📊 **{header_name}**: {headers_df[header_key].iloc[0]}")
|
||||
|
||||
# Check for compression
|
||||
if 'resp_headers_Content-Encoding' in headers_df.columns:
|
||||
encoding = headers_df['resp_headers_Content-Encoding'].iloc[0]
|
||||
if 'gzip' in str(encoding) or 'br' in str(encoding):
|
||||
st.success("🚀 **Compression**: Enabled - Great for page speed!")
|
||||
else:
|
||||
st.warning("⚠️ **Compression**: Consider enabling GZIP or Brotli compression")
|
||||
else:
|
||||
st.warning("⚠️ **Compression**: Not detected - Consider enabling compression")
|
||||
|
||||
# Check status code
|
||||
if 'status' in headers_df.columns:
|
||||
status = headers_df['status'].iloc[0]
|
||||
if status == 200:
|
||||
st.success(f"✅ **HTTP Status**: {status} OK")
|
||||
else:
|
||||
st.warning(f"⚠️ **HTTP Status**: {status}")
|
||||
|
||||
with tab4:
|
||||
st.write("### Complete Headers Analysis")
|
||||
|
||||
# Show response headers only (more relevant for SEO)
|
||||
response_headers = {col: col.replace('resp_headers_', '') for col in headers_df.columns if col.startswith('resp_headers_')}
|
||||
if response_headers:
|
||||
st.write("**Response Headers:**")
|
||||
for col, display_name in response_headers.items():
|
||||
if not pd.isna(headers_df[col].iloc[0]):
|
||||
st.write(f"**{display_name}**: `{headers_df[col].iloc[0]}`")
|
||||
|
||||
# Show crawl metadata
|
||||
st.write("**Crawl Information:**")
|
||||
metadata_cols = ['url', 'status', 'crawl_time', 'download_latency']
|
||||
for col in metadata_cols:
|
||||
if col in headers_df.columns:
|
||||
st.write(f"**{col.replace('_', ' ').title()}**: `{headers_df[col].iloc[0]}`")
|
||||
|
||||
# Download option
|
||||
csv = headers_df.to_csv(index=False)
|
||||
st.download_button(
|
||||
label="📥 Download Complete Headers Data as CSV",
|
||||
data=csv,
|
||||
file_name=f"headers_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
|
||||
mime="text/csv"
|
||||
)
|
||||
|
||||
else:
|
||||
st.error("❌ Could not retrieve headers data")
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"❌ Error analyzing headers: {str(e)}")
|
||||
st.info("💡 **Tip**: Make sure the URL is accessible and try again")
|
||||
|
||||
|
||||
# Enhanced robots.txt and sitemap detection
|
||||
def check_robots_and_sitemap(url):
|
||||
"""Check for robots.txt and sitemap files."""
|
||||
st.subheader("🤖 Robots.txt & Sitemap Detection")
|
||||
st.write("---")
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||
|
||||
# Check robots.txt
|
||||
try:
|
||||
robots_url = f"{base_url}/robots.txt"
|
||||
response = requests.get(robots_url, timeout=10)
|
||||
if response.status_code == 200:
|
||||
st.success(f"✅ **Robots.txt found**: {robots_url}")
|
||||
with st.expander("View robots.txt content"):
|
||||
st.code(response.text[:1000]) # Show first 1000 characters
|
||||
else:
|
||||
st.warning(f"⚠️ **Robots.txt not found**: Consider creating one at {robots_url}")
|
||||
except:
|
||||
st.error("❌ Could not check robots.txt")
|
||||
|
||||
# Check common sitemap locations
|
||||
sitemap_locations = [
|
||||
f"{base_url}/sitemap.xml",
|
||||
f"{base_url}/sitemap_index.xml",
|
||||
f"{base_url}/sitemaps.xml"
|
||||
]
|
||||
|
||||
sitemap_found = False
|
||||
for sitemap_url in sitemap_locations:
|
||||
try:
|
||||
response = requests.get(sitemap_url, timeout=10)
|
||||
if response.status_code == 200:
|
||||
st.success(f"✅ **Sitemap found**: {sitemap_url}")
|
||||
sitemap_found = True
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
if not sitemap_found:
|
||||
st.warning("⚠️ **Sitemap not found**: Consider creating an XML sitemap")
|
||||
st.info("💡 **Recommendation**: Submit your sitemap to Google Search Console")
|
||||
|
||||
|
||||
# Enhanced URL structure analysis
|
||||
def enhanced_url_analysis(url):
|
||||
"""Provide enhanced URL structure analysis."""
|
||||
st.subheader("🔗 Enhanced URL Structure Analysis")
|
||||
st.write("---")
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
|
||||
# URL components analysis
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.write("**URL Components:**")
|
||||
st.info(f"**Protocol**: {parsed_url.scheme}")
|
||||
st.info(f"**Domain**: {parsed_url.netloc}")
|
||||
st.info(f"**Path**: {parsed_url.path}")
|
||||
if parsed_url.query:
|
||||
st.info(f"**Query**: {parsed_url.query}")
|
||||
if parsed_url.fragment:
|
||||
st.info(f"**Fragment**: {parsed_url.fragment}")
|
||||
|
||||
with col2:
|
||||
st.write("**SEO Analysis:**")
|
||||
|
||||
# URL length analysis
|
||||
url_length = len(url)
|
||||
if url_length <= 60:
|
||||
st.success(f"✅ **URL Length**: {url_length} characters (Excellent)")
|
||||
elif url_length <= 100:
|
||||
st.warning(f"⚠️ **URL Length**: {url_length} characters (Good, but could be shorter)")
|
||||
else:
|
||||
st.error(f"❌ **URL Length**: {url_length} characters (Too long)")
|
||||
|
||||
# Path depth analysis
|
||||
path_segments = [seg for seg in parsed_url.path.split('/') if seg]
|
||||
depth = len(path_segments)
|
||||
if depth <= 3:
|
||||
st.success(f"✅ **URL Depth**: {depth} levels (Good)")
|
||||
else:
|
||||
st.warning(f"⚠️ **URL Depth**: {depth} levels (Consider flattening)")
|
||||
|
||||
# Special characters check
|
||||
special_chars = set(url) - set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~:/?#[]@!$&\'()*+,;=')
|
||||
if not special_chars:
|
||||
st.success("✅ **Special Characters**: Clean URL structure")
|
||||
else:
|
||||
st.warning(f"⚠️ **Special Characters**: Found {len(special_chars)} special characters")
|
||||
|
||||
|
||||
# Enhanced main function to run the analysis
|
||||
def run_analysis(url):
|
||||
# Parse the URL
|
||||
parsed_url = urlparse(url)
|
||||
netloc = parsed_url.netloc # Domain name
|
||||
path = parsed_url.path # Path after the domain
|
||||
|
||||
# Run existing checks
|
||||
check_https(url)
|
||||
check_url_length(path)
|
||||
check_hyphens(path)
|
||||
check_file_extension(path)
|
||||
|
||||
# Add new enhanced analyses
|
||||
enhanced_url_analysis(url)
|
||||
analyze_http_headers(url)
|
||||
check_robots_and_sitemap(url)
|
||||
|
||||
# Keep existing keyword insights
|
||||
show_keyword_insights(netloc, path)
|
||||
|
||||
# Add summary section
|
||||
st.subheader("📋 Analysis Summary & Recommendations")
|
||||
st.write("---")
|
||||
st.success("🎉 **Analysis Complete!** Review the findings above and implement the recommendations for better SEO performance.")
|
||||
|
||||
recommendations = [
|
||||
"✅ Ensure HTTPS is enabled for security and SEO benefits",
|
||||
"🔗 Keep URLs short, descriptive, and user-friendly",
|
||||
"🔒 Implement security headers to protect your site",
|
||||
"🤖 Create and maintain robots.txt and XML sitemaps",
|
||||
"⚡ Enable compression and optimize HTTP headers for performance",
|
||||
"📊 Monitor your URL structure and avoid excessive depth"
|
||||
]
|
||||
|
||||
st.write("**Key Recommendations:**")
|
||||
for rec in recommendations:
|
||||
st.write(rec)
|
||||
|
||||
|
||||
# Display the app
|
||||
def url_seo_checker():
|
||||
show_title_and_intro()
|
||||
|
||||
# User input for URL
|
||||
url_input = st.text_input("Paste your URL here:", "https://www.example.com/")
|
||||
st.write(" ") # Add spacing
|
||||
|
||||
# When the analyze button is clicked
|
||||
if st.button("Let's Analyze!"):
|
||||
with st.spinner('Checking your link...'):
|
||||
url = url_input.strip() # Clean up the input
|
||||
|
||||
# Validate URL format
|
||||
if not url.startswith(("http://", "https://")):
|
||||
st.error("Oops! It seems like your URL needs 'http://' or 'https://' at the beginning. Please add it!")
|
||||
st.stop()
|
||||
|
||||
# Run the analysis
|
||||
run_analysis(url)
|
||||
@@ -1,113 +0,0 @@
|
||||
"""Word cloud generation tool."""
|
||||
|
||||
import streamlit as st
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
import nltk
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.util import ngrams
|
||||
from wordcloud import WordCloud
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
st.title("🔎 Web Content Analyzer: Uncover Your Words' Power! 🔎")
|
||||
st.write("""
|
||||
Welcome! This tool helps you understand the words that drive your website content. Just paste in your web page's
|
||||
URL, and we'll give you insights you can use to improve your content and reach more people!
|
||||
""")
|
||||
|
||||
url_input = st.text_input("Paste your URL here:", "https://www.example.com/")
|
||||
language_input = st.selectbox("What language is your content?", ('English', 'Italian', 'Albanian'))
|
||||
num_results_input = st.slider("How many top words/phrases should we show?", min_value=10, max_value=150, value=50)
|
||||
st.write(" ")
|
||||
|
||||
authorized_domains = ["example.com", "another-example.com"]
|
||||
|
||||
if st.button("Analyze Your Content!"):
|
||||
with st.spinner('Analyzing your content...'):
|
||||
url = url_input.strip()
|
||||
language = language_input.lower()
|
||||
num_results = num_results_input
|
||||
|
||||
if not url.startswith("http"):
|
||||
st.error("Oops! Looks like you forgot 'http://' or 'https://' at the beginning of your URL. Please add it and try again! 😊")
|
||||
st.stop()
|
||||
|
||||
from urllib.parse import urlparse
|
||||
parsed_url = urlparse(url)
|
||||
if parsed_url.netloc not in authorized_domains:
|
||||
st.error("The domain of the provided URL is not authorized. Please use an authorized domain.")
|
||||
st.stop()
|
||||
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response.raise_for_status() # Check for errors
|
||||
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
body_txt = soup.find('body').text
|
||||
|
||||
words = [w.lower() for w in word_tokenize(body_txt)]
|
||||
stopw = nltk.corpus.stopwords.words(language)
|
||||
|
||||
final_words = [w for w in words if w not in stopw and w.isalpha()]
|
||||
|
||||
# Frequency analysis
|
||||
freq = nltk.FreqDist(final_words)
|
||||
keywords = freq.most_common(num_results)
|
||||
|
||||
bigrams = ngrams(final_words, 2)
|
||||
freq_bigrams = nltk.FreqDist(bigrams)
|
||||
bigrams_freq = freq_bigrams.most_common(num_results)
|
||||
|
||||
# Create DataFrames for Display
|
||||
df_keywords = pd.DataFrame(keywords, columns=("Keyword", "Frequency"))
|
||||
df_bigrams = pd.DataFrame(bigrams_freq, columns=("Bigram", "Frequency"))
|
||||
|
||||
st.subheader("Top Keywords and Phrases:")
|
||||
st.write(" ")
|
||||
st.dataframe(df_keywords)
|
||||
|
||||
st.write(" ")
|
||||
|
||||
st.subheader("Top Two-Word Phrases:")
|
||||
st.write(" ")
|
||||
st.dataframe(df_bigrams)
|
||||
|
||||
st.write(" ")
|
||||
st.subheader("What's the Value of This Analysis?")
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("""
|
||||
* **See What Resonates:** Discover the most popular words and phrases used on your website. This can reveal themes and topics that your audience is interested in.
|
||||
* **Find Keywords for SEO:** The analysis helps identify relevant keywords you could use for your website content and marketing efforts.
|
||||
* **Improve Your Content:** You can understand how people might search for similar content and ensure you're providing the right keywords.
|
||||
* **Stand Out:** Compare your results to other websites or competitors to understand how you can differentiate your content.
|
||||
|
||||
Ready to dive deeper into your content's vocabulary? Start by making some of the keywords you just discovered the stars of your next blog post or social media message. You might be surprised at the impact! 🚀
|
||||
""")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
st.error(f"Oops! Something went wrong fetching the URL. Error: {e}")
|
||||
|
||||
def generate_wordcloud(text):
|
||||
"""Generate a word cloud from the given text."""
|
||||
if not text:
|
||||
st.warning("Please enter some text to generate a word cloud.")
|
||||
return
|
||||
|
||||
# Create and generate a word cloud image
|
||||
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
|
||||
|
||||
# Display the word cloud
|
||||
st.subheader("Word Cloud Visualization")
|
||||
fig, ax = plt.subplots(figsize=(10, 5))
|
||||
ax.imshow(wordcloud, interpolation='bilinear')
|
||||
ax.axis('off')
|
||||
st.pyplot(fig)
|
||||
|
||||
# Add some statistics
|
||||
st.subheader("Text Statistics")
|
||||
words = text.split()
|
||||
unique_words = set(words)
|
||||
st.metric("Total Words", len(words))
|
||||
st.metric("Unique Words", len(unique_words))
|
||||
Reference in New Issue
Block a user