ALwrity Version 0.5.0 (Fastapi + React )
This commit is contained in:
@@ -572,6 +572,7 @@ def render_ai_content_strategy():
|
||||
budget = st.selectbox(
|
||||
"Monthly Content Budget",
|
||||
[
|
||||
"No budget",
|
||||
"Under $1,000",
|
||||
"$1,000 - $5,000",
|
||||
"$5,000 - $10,000",
|
||||
|
||||
@@ -1,135 +0,0 @@
|
||||
###################################################
|
||||
#
|
||||
# The script covers many SEO factors, including keyword presence, title length,
|
||||
# meta description, images, img alt text, headings, internal links, external links,
|
||||
# spelling errors, grammar errors, and readability.
|
||||
#
|
||||
##################################################
|
||||
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from textstat import flesch_reading_ease
|
||||
import spellchecker
|
||||
|
||||
class SEOAnalyzer:
|
||||
def __init__(self, html_content, target_keywords):
|
||||
self.html_content = html_content
|
||||
self.target_keywords = target_keywords
|
||||
|
||||
def analyze_html_content(self):
|
||||
try:
|
||||
soup = BeautifulSoup(self.html_content, 'html.parser')
|
||||
|
||||
# Extract and clean text from HTML
|
||||
text = ' '.join(soup.stripped_strings)
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
|
||||
# Calculate keyword density
|
||||
keyword_density = {}
|
||||
for keyword in self.target_keywords:
|
||||
keyword_density[keyword] = (text.lower().count(keyword.lower()) / len(text.split())) * 100
|
||||
|
||||
# Check for the presence of keywords in the title
|
||||
title_tag = soup.find('title')
|
||||
title_text = title_tag.text.lower() if title_tag else ''
|
||||
keyword_presence_in_title = {keyword: keyword.lower() in title_text for keyword in self.target_keywords}
|
||||
|
||||
# Check for the presence of images and keywords in image alt text
|
||||
images = soup.find_all('img')
|
||||
img_alt_text = [img.get('alt', '').lower() for img in images]
|
||||
keyword_presence_in_img_alt_text = {keyword: any(keyword.lower() in alt_text for alt_text in img_alt_text) for keyword in self.target_keywords}
|
||||
|
||||
# Check for the presence of headings
|
||||
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
||||
headings_text = ' '.join(heading.text.lower() for heading in headings)
|
||||
|
||||
# Check for the presence of internal and external links
|
||||
internal_links = len([link for link in soup.find_all('a') if '#' not in link.get('href', '')])
|
||||
external_links = len([link for link in soup.find_all('a') if 'http' in link.get('href', '')])
|
||||
|
||||
# Calculate readability score
|
||||
readability_score = flesch_reading_ease(text)
|
||||
|
||||
# Check for spelling and grammar errors
|
||||
spell = spellchecker.SpellChecker()
|
||||
spelling_errors = len(spell.unknown(text.split()))
|
||||
grammar_errors = len(spell.check_grammar(text))
|
||||
|
||||
# Calculate SEO score
|
||||
seo_score = 0
|
||||
|
||||
# Check for the presence of relevant keywords
|
||||
for keyword in self.target_keywords:
|
||||
if keyword in text.lower():
|
||||
seo_score += 1
|
||||
|
||||
# Check for title length
|
||||
title_length = len(title_text.split()) if title_text else 0
|
||||
recommended_title_length = (50, 70)
|
||||
|
||||
if recommended_title_length[0] <= title_length <= recommended_title_length[1]:
|
||||
seo_score += 1
|
||||
|
||||
# Generate suggestions for improvement
|
||||
suggestions = []
|
||||
if seo_score < 5:
|
||||
suggestions.append("Add more relevant keywords to your HTML content.")
|
||||
suggestions.append("Make sure your title contains keywords.")
|
||||
suggestions.append("Add keywords to image alt text.")
|
||||
suggestions.append("Add headings to your HTML content.")
|
||||
suggestions.append("Add internal links to your HTML content.")
|
||||
|
||||
return {
|
||||
'Keyword Density': keyword_density,
|
||||
'Keyword Presence in Title': keyword_presence_in_title,
|
||||
'Keyword Presence in Image Alt Text': keyword_presence_in_img_alt_text,
|
||||
'Headings Text': headings_text,
|
||||
'Internal Links': internal_links,
|
||||
'External Links': external_links,
|
||||
'Readability Score': readability_score,
|
||||
'Spelling Errors': spelling_errors,
|
||||
'Grammar Errors': grammar_errors,
|
||||
'SEO Score': seo_score,
|
||||
'Suggestions': suggestions
|
||||
}
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
# Example usage:
|
||||
if __name__ == "__main__":
|
||||
html_content = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>SEO Analyzer - Sample Page</title>
|
||||
<meta name="description" content="This is a sample page for SEO analysis.">
|
||||
</head>
|
||||
<body>
|
||||
<h1>Welcome to the SEO Analyzer</h1>
|
||||
<p>This is a sample page with some sample content for SEO analysis. It mentions the target keywords SEO, keywords, and content.</p>
|
||||
<img src="image1.jpg" alt="SEO image">
|
||||
<img src="image2.jpg" alt="Keywords image">
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
keywords = ['SEO', 'keywords', 'content'] # Replace with your target keywords
|
||||
|
||||
seo_analyzer = SEOAnalyzer(html_content, keywords)
|
||||
results = seo_analyzer.analyze_html_content()
|
||||
|
||||
print("SEO Analysis Results:")
|
||||
print(f"Keyword Density: {results['Keyword Density']}")
|
||||
print(f"Keyword Presence in Title: {results['Keyword Presence in Title']}")
|
||||
print(f"Keyword Presence in Image Alt Text: {results['Keyword Presence in Image Alt Text']}")
|
||||
print(f"Headings Text: {results['Headings Text']}")
|
||||
print(f"Internal Links: {results['Internal Links']}")
|
||||
print(f"External Links: {results['External Links']}")
|
||||
print(f"Readability Score: {results['Readability Score']}")
|
||||
print(f"Spelling Errors: {results['Spelling Errors']}")
|
||||
print(f"Grammar Errors: {results['Grammar Errors']}")
|
||||
print(f"SEO Score: {results['SEO Score']}")
|
||||
print("Suggestions:")
|
||||
for suggestion in results['Suggestions']:
|
||||
print(suggestion)
|
||||
|
||||
@@ -1,182 +0,0 @@
|
||||
# Content Gap Analysis Tool
|
||||
|
||||
A comprehensive AI-powered tool for analyzing content gaps and generating strategic content recommendations.
|
||||
|
||||
## Overview
|
||||
|
||||
The Content Gap Analysis tool combines multiple SEO tools to provide a complete analysis of your content strategy, identify opportunities, and generate actionable recommendations. It leverages existing AI SEO tools and adds new capabilities for comprehensive content analysis.
|
||||
|
||||
## Workflow Design
|
||||
|
||||
### 1. Website Analysis
|
||||
**Input:** Website URL
|
||||
**Tools Integration:**
|
||||
- `analyze_onpage_seo()`: Analyze content quality and structure
|
||||
- `url_seo_checker()`: Check technical SEO aspects
|
||||
- `google_pagespeed_insights()`: Assess page performance
|
||||
|
||||
**Analysis Components:**
|
||||
- Content structure mapping
|
||||
- Topic categorization
|
||||
- Content depth assessment
|
||||
- Performance metrics
|
||||
|
||||
### 2. Competitor Analysis
|
||||
**Input:** Competitor URLs
|
||||
**Tools Integration:**
|
||||
- `url_seo_checker()`: Analyze competitor URLs
|
||||
- `analyze_onpage_seo()`: Compare content quality
|
||||
- `ai_title_generator()`: Analyze title patterns
|
||||
|
||||
**Analysis Components:**
|
||||
- Content strategy comparison
|
||||
- Topic coverage gaps
|
||||
- Content format analysis
|
||||
- Title pattern analysis
|
||||
|
||||
### 3. Keyword Research
|
||||
**Input:** Industry/Niche
|
||||
**Tools Integration:**
|
||||
- `ai_title_generator()`: Generate keyword-based titles
|
||||
- `metadesc_generator_main()`: Analyze meta descriptions for keyword usage
|
||||
- `ai_structured_data()`: Check structured data implementation
|
||||
|
||||
**Analysis Components:**
|
||||
- Keyword opportunity identification
|
||||
- Search intent analysis
|
||||
- Content format suggestions
|
||||
- Topic clustering
|
||||
|
||||
### 4. AI-Powered Recommendations
|
||||
**Tools Integration:**
|
||||
- `ai_title_generator()`: Generate content titles
|
||||
- `metadesc_generator_main()`: Create content summaries
|
||||
- `ai_structured_data()`: Suggest structured data implementation
|
||||
|
||||
**Output Components:**
|
||||
- Content topic suggestions
|
||||
- Format recommendations
|
||||
- Priority scoring
|
||||
- Implementation timeline
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Phase 1: Core Infrastructure
|
||||
1. Create base classes and interfaces
|
||||
2. Implement data collection modules
|
||||
3. Set up AI model integration
|
||||
4. Develop data storage system
|
||||
|
||||
### Phase 2: Tool Integration
|
||||
1. Integrate existing SEO tools
|
||||
2. Create unified API for tool interaction
|
||||
3. Implement data sharing between tools
|
||||
4. Develop result aggregation system
|
||||
|
||||
### Phase 3: Analysis Engine
|
||||
1. Implement content structure analysis
|
||||
2. Develop competitor analysis algorithms
|
||||
3. Create keyword research system
|
||||
4. Build recommendation engine
|
||||
|
||||
### Phase 4: UI/UX Development
|
||||
1. Create step-by-step workflow interface
|
||||
2. Implement progress tracking
|
||||
3. Develop visualization components
|
||||
4. Add export functionality
|
||||
|
||||
## Technical Requirements
|
||||
|
||||
### Dependencies
|
||||
- Existing SEO tools from `lib/ai_seo_tools/`
|
||||
- AI models for content analysis
|
||||
- Web scraping capabilities
|
||||
- Data storage system
|
||||
|
||||
### File Structure
|
||||
```
|
||||
content_gap_analysis/
|
||||
├── __init__.py
|
||||
├── main.py
|
||||
├── website_analyzer.py
|
||||
├── competitor_analyzer.py
|
||||
├── keyword_researcher.py
|
||||
├── recommendation_engine.py
|
||||
├── utils/
|
||||
│ ├── __init__.py
|
||||
│ ├── data_collector.py
|
||||
│ ├── content_parser.py
|
||||
│ └── ai_processor.py
|
||||
└── tests/
|
||||
├── __init__.py
|
||||
├── test_website_analyzer.py
|
||||
├── test_competitor_analyzer.py
|
||||
└── test_keyword_researcher.py
|
||||
```
|
||||
|
||||
## Integration Points
|
||||
|
||||
### Existing Tools
|
||||
1. **On-Page SEO Analyzer**
|
||||
- Function: `analyze_onpage_seo()`
|
||||
- Purpose: Content quality assessment
|
||||
- Integration: Content structure analysis
|
||||
|
||||
2. **URL SEO Checker**
|
||||
- Function: `url_seo_checker()`
|
||||
- Purpose: Technical optimization
|
||||
- Integration: URL structure analysis
|
||||
|
||||
3. **Blog Title Generator**
|
||||
- Function: `ai_title_generator()`
|
||||
- Purpose: Content ideas
|
||||
- Integration: Keyword analysis
|
||||
|
||||
4. **Meta Description Generator**
|
||||
- Function: `metadesc_generator_main()`
|
||||
- Purpose: Content summaries
|
||||
- Integration: Content optimization
|
||||
|
||||
5. **Structured Data Generator**
|
||||
- Function: `ai_structured_data()`
|
||||
- Purpose: Rich snippets
|
||||
- Integration: Content enhancement
|
||||
|
||||
### New Components
|
||||
1. **Content Structure Analyzer**
|
||||
- Purpose: Map website content structure
|
||||
- Output: Content hierarchy and relationships
|
||||
|
||||
2. **Competitor Content Analyzer**
|
||||
- Purpose: Analyze competitor content strategy
|
||||
- Output: Content gaps and opportunities
|
||||
|
||||
3. **Keyword Opportunity Finder**
|
||||
- Purpose: Identify keyword gaps
|
||||
- Output: Keyword recommendations
|
||||
|
||||
4. **AI Recommendation Engine**
|
||||
- Purpose: Generate content recommendations
|
||||
- Output: Actionable content strategy
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
1. **Advanced Analytics**
|
||||
- Content performance tracking
|
||||
- ROI analysis
|
||||
- Trend prediction
|
||||
|
||||
2. **Automation Features**
|
||||
- Automated content planning
|
||||
- Schedule generation
|
||||
- Priority scoring
|
||||
|
||||
3. **Integration Expansion**
|
||||
- CMS integration
|
||||
- Analytics platform connection
|
||||
- Social media analysis
|
||||
|
||||
4. **AI Improvements**
|
||||
- Advanced topic modeling
|
||||
- Sentiment analysis
|
||||
- Content quality scoring
|
||||
@@ -1,36 +0,0 @@
|
||||
"""
|
||||
Content Gap Analysis Tool for Alwrity.
|
||||
"""
|
||||
|
||||
from .ui import ContentGapAnalysisUI
|
||||
from .main import ContentGapAnalysis
|
||||
from .keyword_researcher import KeywordResearcher
|
||||
from .competitor_analyzer import CompetitorAnalyzer
|
||||
from .website_analyzer import WebsiteAnalyzer
|
||||
from .recommendation_engine import RecommendationEngine
|
||||
from .utils.ai_processor import AIProcessor
|
||||
|
||||
__all__ = [
|
||||
'ContentGapAnalysisUI',
|
||||
'ContentGapAnalysis',
|
||||
'KeywordResearcher',
|
||||
'CompetitorAnalyzer',
|
||||
'WebsiteAnalyzer',
|
||||
'RecommendationEngine',
|
||||
'AIProcessor'
|
||||
]
|
||||
|
||||
def run_content_gap_analysis():
|
||||
"""Run the Content Gap Analysis tool."""
|
||||
# Initialize the UI with proper configuration
|
||||
ui = ContentGapAnalysisUI()
|
||||
|
||||
# Set up the page configuration
|
||||
st.set_page_config(
|
||||
page_title="Content Gap Analysis",
|
||||
page_icon="📊",
|
||||
layout="wide"
|
||||
)
|
||||
|
||||
# Run the UI
|
||||
ui.run()
|
||||
@@ -1,711 +0,0 @@
|
||||
"""
|
||||
Competitor analyzer for content gap analysis.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
import streamlit as st
|
||||
from collections import Counter, defaultdict
|
||||
from loguru import logger
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.data_collector import DataCollector
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.content_parser import ContentParser
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.ai_processor import AIProcessor, ProgressTracker
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/competitor_analyzer.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
class CompetitorAnalyzer:
|
||||
"""Analyzes competitor content and market position."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the competitor analyzer."""
|
||||
self.website_analyzer = WebsiteAnalyzer()
|
||||
self.ai_processor = AIProcessor()
|
||||
self.progress = ProgressTracker()
|
||||
|
||||
# Define analysis stages
|
||||
self.stages = {
|
||||
'competitor_analysis': {
|
||||
'name': 'Competitor Analysis',
|
||||
'steps': [
|
||||
'Initializing competitor analysis',
|
||||
'Analyzing competitor content',
|
||||
'Evaluating market position',
|
||||
'Identifying content gaps',
|
||||
'Generating competitive insights'
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("CompetitorAnalyzer initialized")
|
||||
|
||||
def analyze(self, competitor_urls: List[str], industry: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze competitor websites.
|
||||
|
||||
Args:
|
||||
competitor_urls: List of competitor URLs to analyze
|
||||
industry: Industry category
|
||||
|
||||
Returns:
|
||||
Dictionary containing competitor analysis results
|
||||
"""
|
||||
try:
|
||||
results = {
|
||||
'competitors': [],
|
||||
'market_position': {},
|
||||
'content_gaps': [],
|
||||
'advantages': []
|
||||
}
|
||||
|
||||
# Analyze each competitor
|
||||
for url in competitor_urls:
|
||||
competitor_analysis = self.website_analyzer.analyze_website(url)
|
||||
if competitor_analysis.get('success', False):
|
||||
results['competitors'].append({
|
||||
'url': url,
|
||||
'analysis': competitor_analysis['data']
|
||||
})
|
||||
|
||||
# Generate market position analysis using AI
|
||||
prompt = f"""Analyze the market position of competitors in the {industry} industry:
|
||||
|
||||
Competitor Analyses:
|
||||
{json.dumps(results['competitors'], indent=2)}
|
||||
|
||||
Provide:
|
||||
1. Market position analysis
|
||||
2. Content gaps
|
||||
3. Competitive advantages
|
||||
|
||||
Format the response as JSON with 'market_position', 'content_gaps', and 'advantages' keys."""
|
||||
|
||||
# Get AI analysis
|
||||
analysis = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt="You are an SEO expert specializing in competitive analysis.",
|
||||
response_format="json_object"
|
||||
)
|
||||
|
||||
if analysis:
|
||||
results['market_position'] = analysis.get('market_position', {})
|
||||
results['content_gaps'] = analysis.get('content_gaps', [])
|
||||
results['advantages'] = analysis.get('advantages', [])
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error analyzing competitors: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'competitors': [],
|
||||
'market_position': {},
|
||||
'content_gaps': [],
|
||||
'advantages': []
|
||||
}
|
||||
|
||||
def _analyze_competitor_content(self, competitor_urls: List[str]) -> Dict[str, Any]:
|
||||
"""Analyze competitor content."""
|
||||
try:
|
||||
content_analysis = {}
|
||||
|
||||
for url in competitor_urls:
|
||||
# Get AI analysis for each competitor
|
||||
analysis = self.ai_processor.analyze_content({
|
||||
'url': url,
|
||||
'content': {} # Content will be fetched by AI processor
|
||||
})
|
||||
|
||||
content_analysis[url] = {
|
||||
'content_metrics': analysis.get('content_metrics', {}),
|
||||
'content_evolution': analysis.get('content_evolution', {}),
|
||||
'topic_trends': analysis.get('topic_trends', {}),
|
||||
'performance_trends': analysis.get('performance_trends', {})
|
||||
}
|
||||
|
||||
return content_analysis
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing competitor content: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _evaluate_market_position(self, content_analysis: Dict[str, Any], industry: str) -> Dict[str, Any]:
|
||||
"""Evaluate market position."""
|
||||
try:
|
||||
market_position = {
|
||||
'industry_rank': 0,
|
||||
'content_quality_rank': 0,
|
||||
'market_share': 0,
|
||||
'competitive_advantages': [],
|
||||
'competitive_disadvantages': []
|
||||
}
|
||||
|
||||
# Calculate industry rank based on content quality
|
||||
content_quality_scores = [
|
||||
analysis.get('content_metrics', {}).get('quality_score', 0)
|
||||
for analysis in content_analysis.values()
|
||||
]
|
||||
|
||||
if content_quality_scores:
|
||||
market_position['content_quality_rank'] = sum(content_quality_scores) / len(content_quality_scores)
|
||||
|
||||
# Identify competitive advantages and disadvantages
|
||||
for url, analysis in content_analysis.items():
|
||||
quality_score = analysis.get('content_metrics', {}).get('quality_score', 0)
|
||||
|
||||
if quality_score > market_position['content_quality_rank']:
|
||||
market_position['competitive_advantages'].append({
|
||||
'url': url,
|
||||
'advantage': 'Higher content quality',
|
||||
'score': quality_score
|
||||
})
|
||||
elif quality_score < market_position['content_quality_rank']:
|
||||
market_position['competitive_disadvantages'].append({
|
||||
'url': url,
|
||||
'disadvantage': 'Lower content quality',
|
||||
'score': quality_score
|
||||
})
|
||||
|
||||
return market_position
|
||||
except Exception as e:
|
||||
st.error(f"Error evaluating market position: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _identify_content_gaps(self, content_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Identify content gaps."""
|
||||
try:
|
||||
content_gaps = []
|
||||
|
||||
# Analyze content coverage
|
||||
all_topics = set()
|
||||
for analysis in content_analysis.values():
|
||||
topics = analysis.get('topic_trends', {}).get('topics', [])
|
||||
all_topics.update(topics)
|
||||
|
||||
# Identify missing topics for each competitor
|
||||
for url, analysis in content_analysis.items():
|
||||
covered_topics = set(analysis.get('topic_trends', {}).get('topics', []))
|
||||
missing_topics = all_topics - covered_topics
|
||||
|
||||
if missing_topics:
|
||||
content_gaps.append({
|
||||
'url': url,
|
||||
'missing_topics': list(missing_topics),
|
||||
'gap_type': 'topic_coverage'
|
||||
})
|
||||
|
||||
return content_gaps
|
||||
except Exception as e:
|
||||
st.error(f"Error identifying content gaps: {str(e)}")
|
||||
return []
|
||||
|
||||
def _generate_competitive_insights(self, content_analysis: Dict[str, Any], market_position: Dict[str, Any], content_gaps: List[Dict[str, Any]]) -> List[str]:
|
||||
"""Generate competitive insights."""
|
||||
try:
|
||||
insights = []
|
||||
|
||||
# Market position insights
|
||||
if market_position.get('content_quality_rank', 0) > 80:
|
||||
insights.append("Strong market position with high content quality")
|
||||
elif market_position.get('content_quality_rank', 0) > 60:
|
||||
insights.append("Moderate market position with room for improvement")
|
||||
else:
|
||||
insights.append("Weak market position requiring significant improvement")
|
||||
|
||||
# Content gap insights
|
||||
if content_gaps:
|
||||
insights.append(f"Identified {len(content_gaps)} content gaps across competitors")
|
||||
|
||||
# Competitive advantage insights
|
||||
if market_position.get('competitive_advantages'):
|
||||
insights.append(f"Found {len(market_position['competitive_advantages'])} competitive advantages")
|
||||
|
||||
return insights
|
||||
except Exception as e:
|
||||
st.error(f"Error generating competitive insights: {str(e)}")
|
||||
return []
|
||||
|
||||
def _run_seo_analysis(self, url: str) -> dict:
|
||||
"""
|
||||
Run SEO analysis on competitor website.
|
||||
|
||||
Args:
|
||||
url (str): The URL to analyze
|
||||
|
||||
Returns:
|
||||
dict: SEO analysis results
|
||||
"""
|
||||
# Run website analysis using the new analyzer
|
||||
analysis = self.website_analyzer.analyze_website(url)
|
||||
|
||||
if not analysis.get('success', False):
|
||||
return {
|
||||
'error': analysis.get('error', 'Unknown error in SEO analysis'),
|
||||
'onpage_seo': {},
|
||||
'url_seo': {}
|
||||
}
|
||||
|
||||
# Extract SEO information from the analysis
|
||||
seo_info = analysis['data']['analysis']['seo_info']
|
||||
basic_info = analysis['data']['analysis']['basic_info']
|
||||
|
||||
return {
|
||||
'onpage_seo': {
|
||||
'meta_tags': seo_info.get('meta_tags', {}),
|
||||
'content': seo_info.get('content', {}),
|
||||
'recommendations': seo_info.get('recommendations', [])
|
||||
},
|
||||
'url_seo': {
|
||||
'title': basic_info.get('title', ''),
|
||||
'meta_description': basic_info.get('meta_description', ''),
|
||||
'has_robots_txt': bool(basic_info.get('robots_txt')),
|
||||
'has_sitemap': bool(basic_info.get('sitemap'))
|
||||
}
|
||||
}
|
||||
|
||||
def _analyze_title_patterns(self, url: str) -> dict:
|
||||
"""
|
||||
Analyze title patterns using the title generator.
|
||||
|
||||
Args:
|
||||
url (str): The URL to analyze
|
||||
|
||||
Returns:
|
||||
dict: Title pattern analysis results
|
||||
"""
|
||||
# Use title generator to analyze patterns
|
||||
title_analysis = ai_title_generator(url)
|
||||
|
||||
return {
|
||||
'patterns': title_analysis.get('patterns', {}),
|
||||
'suggestions': title_analysis.get('suggestions', [])
|
||||
}
|
||||
|
||||
def _compare_competitors(self, results: dict) -> dict:
|
||||
"""
|
||||
Compare results across all competitors.
|
||||
|
||||
Args:
|
||||
results (dict): Analysis results for all competitors
|
||||
|
||||
Returns:
|
||||
dict: Comparative analysis results
|
||||
"""
|
||||
comparison = {
|
||||
'content_comparison': self._compare_content(results),
|
||||
'seo_comparison': self._compare_seo(results),
|
||||
'title_comparison': self._compare_titles(results),
|
||||
'performance_metrics': self._compare_performance(results),
|
||||
'content_gaps': self._identify_content_gaps(results)
|
||||
}
|
||||
|
||||
# Add AI-enhanced insights
|
||||
comparison['ai_insights'] = self.ai_processor.analyze_competitor_comparison(comparison)
|
||||
|
||||
return comparison
|
||||
|
||||
def _compare_content(self, results: dict) -> dict:
|
||||
"""Compare content structure across competitors."""
|
||||
content_comparison = {
|
||||
'topic_distribution': self._analyze_topic_distribution(results),
|
||||
'content_depth': self._analyze_content_depth(results),
|
||||
'content_formats': self._analyze_content_formats(results),
|
||||
'content_quality': self._analyze_content_quality(results)
|
||||
}
|
||||
|
||||
return content_comparison
|
||||
|
||||
def _analyze_topic_distribution(self, results: dict) -> dict:
|
||||
"""Analyze topic distribution across competitors."""
|
||||
all_topics = []
|
||||
topic_frequency = Counter()
|
||||
|
||||
for url, data in results.items():
|
||||
topics = data['content_structure'].get('topics', [])
|
||||
all_topics.extend([t['topic'] for t in topics])
|
||||
topic_frequency.update([t['topic'] for t in topics])
|
||||
|
||||
return {
|
||||
'common_topics': [topic for topic, count in topic_frequency.most_common(10)],
|
||||
'unique_topics': list(set(all_topics)),
|
||||
'topic_frequency': dict(topic_frequency.most_common()),
|
||||
'topic_coverage': len(set(all_topics)) / len(all_topics) if all_topics else 0
|
||||
}
|
||||
|
||||
def _analyze_content_depth(self, results: dict) -> dict:
|
||||
"""Analyze content depth across competitors."""
|
||||
depth_metrics = {
|
||||
'word_counts': {},
|
||||
'section_counts': {},
|
||||
'heading_distribution': defaultdict(list),
|
||||
'content_hierarchy': {}
|
||||
}
|
||||
|
||||
for url, data in results.items():
|
||||
content_structure = data['content_structure']
|
||||
|
||||
# Word count analysis
|
||||
depth_metrics['word_counts'][url] = content_structure.get('text_statistics', {}).get('word_count', 0)
|
||||
|
||||
# Section analysis
|
||||
depth_metrics['section_counts'][url] = len(content_structure.get('sections', []))
|
||||
|
||||
# Heading distribution
|
||||
for level, count in content_structure.get('hierarchy', {}).get('heading_distribution', {}).items():
|
||||
depth_metrics['heading_distribution'][level].append(count)
|
||||
|
||||
# Content hierarchy
|
||||
depth_metrics['content_hierarchy'][url] = content_structure.get('hierarchy', {})
|
||||
|
||||
return depth_metrics
|
||||
|
||||
def _analyze_content_formats(self, results: dict) -> dict:
|
||||
"""Analyze content formats across competitors."""
|
||||
format_analysis = {
|
||||
'format_types': defaultdict(int),
|
||||
'format_distribution': defaultdict(list),
|
||||
'format_effectiveness': {}
|
||||
}
|
||||
|
||||
for url, data in results.items():
|
||||
sections = data['content_structure'].get('sections', [])
|
||||
|
||||
for section in sections:
|
||||
format_type = section.get('type', 'unknown')
|
||||
format_analysis['format_types'][format_type] += 1
|
||||
format_analysis['format_distribution'][format_type].append({
|
||||
'url': url,
|
||||
'heading': section.get('heading', ''),
|
||||
'word_count': section.get('word_count', 0)
|
||||
})
|
||||
|
||||
return format_analysis
|
||||
|
||||
def _analyze_content_quality(self, results: dict) -> dict:
|
||||
"""Analyze content quality across competitors."""
|
||||
quality_metrics = {
|
||||
'readability_scores': {},
|
||||
'content_structure_scores': {},
|
||||
'engagement_metrics': {},
|
||||
'overall_quality': {}
|
||||
}
|
||||
|
||||
for url, data in results.items():
|
||||
content_structure = data['content_structure']
|
||||
|
||||
# Readability analysis
|
||||
readability = content_structure.get('readability', {})
|
||||
quality_metrics['readability_scores'][url] = {
|
||||
'flesch_score': readability.get('flesch_score', 0),
|
||||
'avg_sentence_length': readability.get('avg_sentence_length', 0),
|
||||
'avg_word_length': readability.get('avg_word_length', 0)
|
||||
}
|
||||
|
||||
# Structure analysis
|
||||
hierarchy = content_structure.get('hierarchy', {})
|
||||
quality_metrics['content_structure_scores'][url] = {
|
||||
'has_proper_hierarchy': hierarchy.get('has_proper_hierarchy', False),
|
||||
'heading_distribution': hierarchy.get('heading_distribution', {}),
|
||||
'max_depth': hierarchy.get('max_depth', 0)
|
||||
}
|
||||
|
||||
return quality_metrics
|
||||
|
||||
def _compare_seo(self, results: dict) -> dict:
|
||||
"""Compare SEO metrics across competitors."""
|
||||
seo_comparison = {
|
||||
'onpage_metrics': defaultdict(list),
|
||||
'technical_metrics': defaultdict(list),
|
||||
'content_metrics': defaultdict(list),
|
||||
'overall_seo_score': {}
|
||||
}
|
||||
|
||||
for url, data in results.items():
|
||||
seo_info = data.get('website_analysis', {}).get('analysis', {}).get('seo_info', {})
|
||||
|
||||
# On-page SEO metrics
|
||||
meta_tags = seo_info.get('meta_tags', {})
|
||||
seo_comparison['onpage_metrics']['title_score'].append(
|
||||
100 if meta_tags.get('title', {}).get('status') == 'good' else 50
|
||||
)
|
||||
seo_comparison['onpage_metrics']['description_score'].append(
|
||||
100 if meta_tags.get('description', {}).get('status') == 'good' else 50
|
||||
)
|
||||
seo_comparison['onpage_metrics']['keywords_score'].append(
|
||||
100 if meta_tags.get('keywords', {}).get('status') == 'good' else 50
|
||||
)
|
||||
|
||||
# Technical SEO metrics
|
||||
technical = data.get('website_analysis', {}).get('analysis', {}).get('basic_info', {})
|
||||
seo_comparison['technical_metrics']['has_robots_txt'].append(
|
||||
100 if technical.get('robots_txt') else 0
|
||||
)
|
||||
seo_comparison['technical_metrics']['has_sitemap'].append(
|
||||
100 if technical.get('sitemap') else 0
|
||||
)
|
||||
|
||||
# Content SEO metrics
|
||||
content = seo_info.get('content', {})
|
||||
seo_comparison['content_metrics']['readability_score'].append(
|
||||
content.get('readability_score', 0)
|
||||
)
|
||||
seo_comparison['content_metrics']['content_quality_score'].append(
|
||||
content.get('content_quality_score', 0)
|
||||
)
|
||||
|
||||
# Overall SEO score
|
||||
seo_comparison['overall_seo_score'][url] = seo_info.get('overall_score', 0)
|
||||
|
||||
return seo_comparison
|
||||
|
||||
def _compare_titles(self, results: dict) -> dict:
|
||||
"""Compare title patterns across competitors."""
|
||||
title_comparison = {
|
||||
'pattern_distribution': defaultdict(int),
|
||||
'length_distribution': defaultdict(list),
|
||||
'keyword_usage': defaultdict(int),
|
||||
'format_preferences': defaultdict(int)
|
||||
}
|
||||
|
||||
for url, data in results.items():
|
||||
title_patterns = data['title_patterns']
|
||||
|
||||
# Pattern analysis
|
||||
for pattern in title_patterns.get('patterns', {}):
|
||||
title_comparison['pattern_distribution'][pattern] += 1
|
||||
|
||||
# Length analysis
|
||||
for suggestion in title_patterns.get('suggestions', []):
|
||||
title_comparison['length_distribution'][len(suggestion)].append(suggestion)
|
||||
|
||||
# Keyword analysis
|
||||
for suggestion in title_patterns.get('suggestions', []):
|
||||
words = suggestion.lower().split()
|
||||
for word in words:
|
||||
if len(word) > 3: # Filter out short words
|
||||
title_comparison['keyword_usage'][word] += 1
|
||||
|
||||
return title_comparison
|
||||
|
||||
def _compare_performance(self, results: dict) -> dict:
|
||||
"""Compare performance metrics across competitors."""
|
||||
performance_metrics = {
|
||||
'content_effectiveness': {},
|
||||
'engagement_metrics': {},
|
||||
'technical_performance': {},
|
||||
'overall_performance': {}
|
||||
}
|
||||
|
||||
for url, data in results.items():
|
||||
# Content effectiveness
|
||||
content_structure = data['content_structure']
|
||||
performance_metrics['content_effectiveness'][url] = {
|
||||
'content_depth': content_structure.get('text_statistics', {}).get('word_count', 0),
|
||||
'content_quality': content_structure.get('readability', {}).get('flesch_score', 0),
|
||||
'content_structure': content_structure.get('hierarchy', {}).get('has_proper_hierarchy', False)
|
||||
}
|
||||
|
||||
# Technical performance
|
||||
seo_analysis = data['seo_analysis']
|
||||
performance_metrics['technical_performance'][url] = {
|
||||
'onpage_score': sum(1 for v in seo_analysis.get('onpage_seo', {}).values() if v),
|
||||
'technical_score': sum(1 for v in seo_analysis.get('url_seo', {}).values() if v)
|
||||
}
|
||||
|
||||
return performance_metrics
|
||||
|
||||
def _find_missing_topics(self, results: dict) -> List[Dict[str, Any]]:
|
||||
"""Find topics that are missing or underrepresented."""
|
||||
all_topics = set()
|
||||
topic_coverage = defaultdict(int)
|
||||
|
||||
# Collect all topics and their coverage
|
||||
for url, data in results.items():
|
||||
topics = data['content_structure'].get('topics', [])
|
||||
for topic in topics:
|
||||
all_topics.add(topic['topic'])
|
||||
topic_coverage[topic['topic']] += 1
|
||||
|
||||
# Identify missing or underrepresented topics
|
||||
missing_topics = []
|
||||
total_competitors = len(results)
|
||||
|
||||
for topic in all_topics:
|
||||
coverage = topic_coverage[topic] / total_competitors
|
||||
if coverage < 0.5: # Topic covered by less than 50% of competitors
|
||||
missing_topics.append({
|
||||
'topic': topic,
|
||||
'coverage': coverage,
|
||||
'opportunity_score': 1 - coverage
|
||||
})
|
||||
|
||||
return sorted(missing_topics, key=lambda x: x['opportunity_score'], reverse=True)
|
||||
|
||||
def _identify_opportunities(self, results: dict) -> List[Dict[str, Any]]:
|
||||
"""Identify content opportunities based on analysis."""
|
||||
opportunities = []
|
||||
|
||||
# Analyze content depth opportunities
|
||||
depth_metrics = self._analyze_content_depth(results)
|
||||
avg_word_count = sum(depth_metrics['word_counts'].values()) / len(depth_metrics['word_counts'])
|
||||
|
||||
for url, word_count in depth_metrics['word_counts'].items():
|
||||
if word_count < avg_word_count * 0.7: # Content depth significantly below average
|
||||
opportunities.append({
|
||||
'type': 'content_depth',
|
||||
'url': url,
|
||||
'current_value': word_count,
|
||||
'target_value': avg_word_count,
|
||||
'opportunity_score': (avg_word_count - word_count) / avg_word_count
|
||||
})
|
||||
|
||||
# Analyze format opportunities
|
||||
format_analysis = self._analyze_content_formats(results)
|
||||
for format_type, distribution in format_analysis['format_distribution'].items():
|
||||
if len(distribution) < len(results) * 0.3: # Format used by less than 30% of competitors
|
||||
opportunities.append({
|
||||
'type': 'content_format',
|
||||
'format': format_type,
|
||||
'current_coverage': len(distribution) / len(results),
|
||||
'opportunity_score': 1 - (len(distribution) / len(results))
|
||||
})
|
||||
|
||||
return sorted(opportunities, key=lambda x: x['opportunity_score'], reverse=True)
|
||||
|
||||
def _analyze_format_gaps(self, results: dict) -> List[Dict[str, Any]]:
|
||||
"""Analyze gaps in content formats."""
|
||||
format_gaps = []
|
||||
format_analysis = self._analyze_content_formats(results)
|
||||
|
||||
# Identify underutilized formats
|
||||
for format_type, count in format_analysis['format_types'].items():
|
||||
if count < len(results) * 0.3: # Format used by less than 30% of competitors
|
||||
format_gaps.append({
|
||||
'format': format_type,
|
||||
'current_usage': count,
|
||||
'potential_impact': 'high' if count < len(results) * 0.2 else 'medium',
|
||||
'suggested_implementation': self._generate_format_suggestions(format_type)
|
||||
})
|
||||
|
||||
return format_gaps
|
||||
|
||||
def _analyze_quality_gaps(self, results: dict) -> List[Dict[str, Any]]:
|
||||
"""Analyze gaps in content quality."""
|
||||
quality_gaps = []
|
||||
quality_metrics = self._analyze_content_quality(results)
|
||||
|
||||
# Analyze readability gaps
|
||||
readability_scores = quality_metrics['readability_scores']
|
||||
avg_flesch = sum(score['flesch_score'] for score in readability_scores.values()) / len(readability_scores)
|
||||
|
||||
for url, scores in readability_scores.items():
|
||||
if scores['flesch_score'] < avg_flesch * 0.8: # Readability significantly below average
|
||||
quality_gaps.append({
|
||||
'type': 'readability',
|
||||
'url': url,
|
||||
'current_score': scores['flesch_score'],
|
||||
'target_score': avg_flesch,
|
||||
'improvement_needed': avg_flesch - scores['flesch_score']
|
||||
})
|
||||
|
||||
return quality_gaps
|
||||
|
||||
def _analyze_seo_gaps(self, results: dict) -> List[Dict[str, Any]]:
|
||||
"""Analyze gaps in SEO implementation."""
|
||||
seo_gaps = []
|
||||
seo_comparison = self._compare_seo(results)
|
||||
|
||||
# Analyze on-page SEO gaps
|
||||
for metric, values in seo_comparison['onpage_metrics'].items():
|
||||
avg_value = sum(values) / len(values)
|
||||
for url, value in zip(results.keys(), values):
|
||||
if value < avg_value * 0.7: # Significantly below average
|
||||
seo_gaps.append({
|
||||
'type': 'onpage_seo',
|
||||
'metric': metric,
|
||||
'url': url,
|
||||
'current_value': value,
|
||||
'target_value': avg_value,
|
||||
'improvement_needed': avg_value - value
|
||||
})
|
||||
|
||||
# Analyze technical SEO gaps
|
||||
for metric, values in seo_comparison['technical_metrics'].items():
|
||||
avg_value = sum(values) / len(values)
|
||||
for url, value in zip(results.keys(), values):
|
||||
if value < avg_value * 0.7: # Significantly below average
|
||||
seo_gaps.append({
|
||||
'type': 'technical_seo',
|
||||
'metric': metric,
|
||||
'url': url,
|
||||
'current_value': value,
|
||||
'target_value': avg_value,
|
||||
'improvement_needed': avg_value - value
|
||||
})
|
||||
|
||||
# Analyze content SEO gaps
|
||||
for metric, values in seo_comparison['content_metrics'].items():
|
||||
avg_value = sum(values) / len(values)
|
||||
for url, value in zip(results.keys(), values):
|
||||
if value < avg_value * 0.7: # Significantly below average
|
||||
seo_gaps.append({
|
||||
'type': 'content_seo',
|
||||
'metric': metric,
|
||||
'url': url,
|
||||
'current_value': value,
|
||||
'target_value': avg_value,
|
||||
'improvement_needed': avg_value - value
|
||||
})
|
||||
|
||||
return seo_gaps
|
||||
|
||||
def _generate_format_suggestions(self, format_type: str) -> List[str]:
|
||||
"""Generate suggestions for implementing specific content formats."""
|
||||
format_suggestions = {
|
||||
'article': [
|
||||
'Create in-depth articles with comprehensive coverage',
|
||||
'Include expert quotes and statistics',
|
||||
'Add visual elements and infographics'
|
||||
],
|
||||
'blog_post': [
|
||||
'Write engaging blog posts with personal insights',
|
||||
'Include call-to-actions',
|
||||
'Add social sharing buttons'
|
||||
],
|
||||
'how-to': [
|
||||
'Create step-by-step guides',
|
||||
'Include screenshots or videos',
|
||||
'Add troubleshooting sections'
|
||||
],
|
||||
'case_study': [
|
||||
'Present real-world examples',
|
||||
'Include metrics and results',
|
||||
'Add client testimonials'
|
||||
]
|
||||
}
|
||||
|
||||
return format_suggestions.get(format_type, [
|
||||
'Research successful examples',
|
||||
'Analyze competitor implementation',
|
||||
'Create unique value proposition'
|
||||
])
|
||||
@@ -1,674 +0,0 @@
|
||||
"""
|
||||
Enhanced Content Gap Analysis with Advertools Integration and AI Insights.
|
||||
|
||||
This module provides comprehensive content gap analysis using:
|
||||
- adv.serp_goog: Competitor SERP analysis
|
||||
- adv.kw_generate: Keyword research expansion
|
||||
- adv.crawl: Deep competitor content analysis
|
||||
- adv.word_frequency: Content theme identification
|
||||
- llm_text_gen: AI-powered insights and recommendations
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
import advertools as adv
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
import tempfile
|
||||
import os
|
||||
from datetime import datetime
|
||||
import asyncio
|
||||
import json
|
||||
from collections import Counter, defaultdict
|
||||
from loguru import logger
|
||||
|
||||
# Import existing modules
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from .utils.ai_processor import AIProcessor, ProgressTracker
|
||||
|
||||
class EnhancedContentGapAnalyzer:
|
||||
"""Enhanced content gap analyzer with advertools and AI integration."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the enhanced analyzer."""
|
||||
self.website_analyzer = WebsiteAnalyzer()
|
||||
self.ai_processor = AIProcessor()
|
||||
self.progress = ProgressTracker()
|
||||
|
||||
# Temporary directories for crawl data
|
||||
self.temp_dir = tempfile.mkdtemp()
|
||||
|
||||
logger.info("EnhancedContentGapAnalyzer initialized")
|
||||
|
||||
def analyze_comprehensive_gap(self, target_url: str, competitor_urls: List[str],
|
||||
target_keywords: List[str], industry: str = "general") -> Dict[str, Any]:
|
||||
"""
|
||||
Perform comprehensive content gap analysis.
|
||||
|
||||
Args:
|
||||
target_url: Your website URL
|
||||
competitor_urls: List of competitor URLs (max 5 for performance)
|
||||
target_keywords: List of primary keywords to analyze
|
||||
industry: Industry category for context
|
||||
|
||||
Returns:
|
||||
Comprehensive analysis results
|
||||
"""
|
||||
try:
|
||||
st.info("🚀 Starting Enhanced Content Gap Analysis...")
|
||||
|
||||
# Initialize results structure
|
||||
results = {
|
||||
'analysis_timestamp': datetime.utcnow().isoformat(),
|
||||
'target_url': target_url,
|
||||
'competitor_urls': competitor_urls[:5], # Limit to 5 competitors
|
||||
'target_keywords': target_keywords,
|
||||
'industry': industry,
|
||||
'serp_analysis': {},
|
||||
'keyword_expansion': {},
|
||||
'competitor_content': {},
|
||||
'content_themes': {},
|
||||
'gap_analysis': {},
|
||||
'ai_insights': {},
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
# Phase 1: SERP Analysis using adv.serp_goog
|
||||
with st.expander("🔍 SERP Analysis Progress", expanded=True):
|
||||
serp_results = self._analyze_serp_landscape(target_keywords, competitor_urls)
|
||||
results['serp_analysis'] = serp_results
|
||||
st.success(f"✅ Analyzed {len(target_keywords)} keywords across SERPs")
|
||||
|
||||
# Phase 2: Keyword Expansion using adv.kw_generate
|
||||
with st.expander("🎯 Keyword Research Expansion", expanded=True):
|
||||
expanded_keywords = self._expand_keyword_research(target_keywords, industry)
|
||||
results['keyword_expansion'] = expanded_keywords
|
||||
st.success(f"✅ Generated {len(expanded_keywords.get('expanded_keywords', []))} additional keywords")
|
||||
|
||||
# Phase 3: Deep Competitor Analysis using adv.crawl
|
||||
with st.expander("🕷️ Deep Competitor Content Analysis", expanded=True):
|
||||
competitor_content = self._analyze_competitor_content_deep(competitor_urls)
|
||||
results['competitor_content'] = competitor_content
|
||||
st.success(f"✅ Crawled and analyzed {len(competitor_urls)} competitor websites")
|
||||
|
||||
# Phase 4: Content Theme Analysis using adv.word_frequency
|
||||
with st.expander("📊 Content Theme & Gap Identification", expanded=True):
|
||||
content_themes = self._analyze_content_themes(results['competitor_content'])
|
||||
results['content_themes'] = content_themes
|
||||
st.success("✅ Identified content themes and topic clusters")
|
||||
|
||||
# Phase 5: AI-Powered Gap Analysis and Insights
|
||||
with st.expander("🤖 AI-Powered Insights Generation", expanded=True):
|
||||
ai_insights = self._generate_ai_insights(results)
|
||||
results['ai_insights'] = ai_insights
|
||||
results['recommendations'] = ai_insights.get('recommendations', [])
|
||||
st.success("✅ Generated AI-powered insights and recommendations")
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in comprehensive gap analysis: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
st.error(error_msg)
|
||||
return {'error': error_msg}
|
||||
|
||||
def _analyze_serp_landscape(self, keywords: List[str], competitor_urls: List[str]) -> Dict[str, Any]:
|
||||
"""Analyze SERP landscape using adv.serp_goog."""
|
||||
try:
|
||||
st.info("🔍 Analyzing SERP landscape for competitor positions...")
|
||||
|
||||
serp_results = {
|
||||
'keyword_rankings': {},
|
||||
'competitor_presence': {},
|
||||
'serp_features': {},
|
||||
'ranking_opportunities': []
|
||||
}
|
||||
|
||||
# Note: adv.serp_goog requires API key setup
|
||||
# For demo purposes, we'll simulate SERP analysis
|
||||
for keyword in keywords[:10]: # Limit to prevent API overuse
|
||||
try:
|
||||
# In production, use: serp_data = adv.serp_goog(q=keyword, cx='your_cx', key='your_key')
|
||||
# For now, we'll create structured placeholder data
|
||||
serp_results['keyword_rankings'][keyword] = {
|
||||
'top_10_domains': [urlparse(url).netloc for url in competitor_urls],
|
||||
'serp_features': ['featured_snippet', 'people_also_ask', 'related_searches'],
|
||||
'competitor_positions': {
|
||||
urlparse(url).netloc: f"Position {i+3}" for i, url in enumerate(competitor_urls[:5])
|
||||
}
|
||||
}
|
||||
|
||||
st.write(f"• Analyzed keyword: '{keyword}'")
|
||||
|
||||
except Exception as e:
|
||||
st.warning(f"Could not analyze SERP for '{keyword}': {str(e)}")
|
||||
continue
|
||||
|
||||
# Analyze competitor SERP presence
|
||||
domain_counts = Counter()
|
||||
for keyword_data in serp_results['keyword_rankings'].values():
|
||||
for domain in keyword_data.get('top_10_domains', []):
|
||||
domain_counts[domain] += 1
|
||||
|
||||
serp_results['competitor_presence'] = dict(domain_counts.most_common(10))
|
||||
|
||||
# Identify ranking opportunities
|
||||
for keyword, data in serp_results['keyword_rankings'].items():
|
||||
target_domain = urlparse(competitor_urls[0] if competitor_urls else "").netloc
|
||||
if target_domain not in data.get('competitor_positions', {}):
|
||||
serp_results['ranking_opportunities'].append({
|
||||
'keyword': keyword,
|
||||
'opportunity': 'Not ranking in top 10',
|
||||
'serp_features': data.get('serp_features', [])
|
||||
})
|
||||
|
||||
return serp_results
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error in SERP analysis: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _expand_keyword_research(self, seed_keywords: List[str], industry: str) -> Dict[str, Any]:
|
||||
"""Expand keyword research using adv.kw_generate."""
|
||||
try:
|
||||
st.info("🎯 Expanding keyword research...")
|
||||
|
||||
expanded_results = {
|
||||
'seed_keywords': seed_keywords,
|
||||
'expanded_keywords': [],
|
||||
'keyword_categories': {},
|
||||
'search_intent_analysis': {},
|
||||
'long_tail_opportunities': []
|
||||
}
|
||||
|
||||
# Use adv.kw_generate for keyword expansion
|
||||
all_expanded = []
|
||||
|
||||
for seed_keyword in seed_keywords[:5]: # Limit to prevent overload
|
||||
try:
|
||||
# Generate keyword variations using advertools
|
||||
broad_keywords = adv.kw_generate(
|
||||
products=[seed_keyword],
|
||||
words=["best", "top", "how to", "guide", "tips", "vs", "review", "comparison"],
|
||||
max_len=4
|
||||
)
|
||||
|
||||
# Add phrase match keywords
|
||||
phrase_keywords = adv.kw_generate(
|
||||
products=[seed_keyword],
|
||||
words=[industry, "strategy", "analysis", "optimization", "techniques"],
|
||||
max_len=3
|
||||
)
|
||||
|
||||
all_expanded.extend(broad_keywords)
|
||||
all_expanded.extend(phrase_keywords)
|
||||
|
||||
st.write(f"• Generated variations for: '{seed_keyword}'")
|
||||
|
||||
except Exception as e:
|
||||
st.warning(f"Could not expand keyword '{seed_keyword}': {str(e)}")
|
||||
continue
|
||||
|
||||
# Remove duplicates and clean
|
||||
expanded_results['expanded_keywords'] = list(set(all_expanded))
|
||||
|
||||
# Categorize keywords by intent
|
||||
intent_categories = {
|
||||
'informational': [],
|
||||
'commercial': [],
|
||||
'navigational': [],
|
||||
'transactional': []
|
||||
}
|
||||
|
||||
for keyword in expanded_results['expanded_keywords']:
|
||||
keyword_lower = keyword.lower()
|
||||
if any(word in keyword_lower for word in ['how', 'what', 'why', 'guide', 'tips']):
|
||||
intent_categories['informational'].append(keyword)
|
||||
elif any(word in keyword_lower for word in ['best', 'top', 'review', 'comparison']):
|
||||
intent_categories['commercial'].append(keyword)
|
||||
elif any(word in keyword_lower for word in ['buy', 'purchase', 'price', 'cost']):
|
||||
intent_categories['transactional'].append(keyword)
|
||||
else:
|
||||
intent_categories['navigational'].append(keyword)
|
||||
|
||||
expanded_results['keyword_categories'] = intent_categories
|
||||
|
||||
# Identify long-tail opportunities
|
||||
long_tail = [kw for kw in expanded_results['expanded_keywords'] if len(kw.split()) >= 3]
|
||||
expanded_results['long_tail_opportunities'] = long_tail[:20] # Top 20 long-tail
|
||||
|
||||
return expanded_results
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error in keyword expansion: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _analyze_competitor_content_deep(self, competitor_urls: List[str]) -> Dict[str, Any]:
|
||||
"""Deep competitor content analysis using adv.crawl."""
|
||||
try:
|
||||
st.info("🕷️ Performing deep competitor content analysis...")
|
||||
|
||||
competitor_analysis = {
|
||||
'crawl_results': {},
|
||||
'content_structure': {},
|
||||
'page_analysis': {},
|
||||
'technical_insights': {}
|
||||
}
|
||||
|
||||
for i, url in enumerate(competitor_urls[:3]): # Limit to 3 for performance
|
||||
try:
|
||||
domain = urlparse(url).netloc
|
||||
st.write(f"🔍 Analyzing competitor {i+1}: {domain}")
|
||||
|
||||
# Create temporary file for crawl results
|
||||
crawl_file = os.path.join(self.temp_dir, f"crawl_{domain.replace('.', '_')}.jl")
|
||||
|
||||
# Use adv.crawl for comprehensive analysis
|
||||
# Note: This is a simplified crawl - in production, customize settings
|
||||
adv.crawl(
|
||||
url_list=[url],
|
||||
output_file=crawl_file,
|
||||
follow_links=True,
|
||||
custom_settings={
|
||||
'DEPTH_LIMIT': 2, # Crawl 2 levels deep
|
||||
'CLOSESPIDER_PAGECOUNT': 50, # Limit pages
|
||||
'DOWNLOAD_DELAY': 1, # Be respectful
|
||||
}
|
||||
)
|
||||
|
||||
# Read and analyze crawl results
|
||||
if os.path.exists(crawl_file):
|
||||
crawl_df = pd.read_json(crawl_file, lines=True)
|
||||
|
||||
competitor_analysis['crawl_results'][domain] = {
|
||||
'total_pages': len(crawl_df),
|
||||
'status_codes': crawl_df['status'].value_counts().to_dict(),
|
||||
'page_types': self._categorize_pages(crawl_df),
|
||||
'content_length_stats': {
|
||||
'mean': crawl_df['size'].mean() if 'size' in crawl_df.columns else 0,
|
||||
'median': crawl_df['size'].median() if 'size' in crawl_df.columns else 0
|
||||
}
|
||||
}
|
||||
|
||||
# Analyze content structure
|
||||
competitor_analysis['content_structure'][domain] = self._analyze_content_structure(crawl_df)
|
||||
|
||||
st.success(f"✅ Crawled {len(crawl_df)} pages from {domain}")
|
||||
else:
|
||||
st.warning(f"⚠️ No crawl data available for {domain}")
|
||||
|
||||
except Exception as e:
|
||||
st.warning(f"Could not crawl {url}: {str(e)}")
|
||||
continue
|
||||
|
||||
return competitor_analysis
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error in deep competitor analysis: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _analyze_content_themes(self, competitor_content: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Analyze content themes using adv.word_frequency."""
|
||||
try:
|
||||
st.info("📊 Analyzing content themes and topics...")
|
||||
|
||||
theme_analysis = {
|
||||
'dominant_themes': {},
|
||||
'content_clusters': {},
|
||||
'topic_gaps': [],
|
||||
'content_opportunities': []
|
||||
}
|
||||
|
||||
all_content_text = ""
|
||||
|
||||
# Extract content from crawl results
|
||||
for domain, crawl_data in competitor_content.get('crawl_results', {}).items():
|
||||
try:
|
||||
# In a real implementation, you'd extract text content from crawled pages
|
||||
# For now, we'll simulate content analysis
|
||||
|
||||
# Simulate word frequency analysis using domain and page data
|
||||
sample_content = f"content marketing seo optimization digital strategy {domain} website analysis competitor research keyword targeting"
|
||||
all_content_text += " " + sample_content
|
||||
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if all_content_text.strip():
|
||||
# Use adv.word_frequency for theme analysis
|
||||
word_freq = adv.word_frequency(
|
||||
text_list=[all_content_text],
|
||||
phrase_len=2, # Analyze 2-word phrases
|
||||
rm_words=['the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']
|
||||
)
|
||||
|
||||
# Process word frequency results
|
||||
if not word_freq.empty:
|
||||
top_themes = word_freq.head(20)
|
||||
theme_analysis['dominant_themes'] = top_themes.to_dict('records')
|
||||
|
||||
# Categorize themes into clusters
|
||||
theme_analysis['content_clusters'] = self._cluster_themes(top_themes)
|
||||
|
||||
st.success("✅ Identified dominant content themes")
|
||||
|
||||
return theme_analysis
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error in content theme analysis: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _generate_ai_insights(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate AI-powered insights using llm_text_gen."""
|
||||
try:
|
||||
st.info("🤖 Generating AI-powered insights...")
|
||||
|
||||
# Prepare analysis summary for AI
|
||||
analysis_summary = {
|
||||
'target_url': analysis_results.get('target_url', ''),
|
||||
'industry': analysis_results.get('industry', ''),
|
||||
'serp_opportunities': len(analysis_results.get('serp_analysis', {}).get('ranking_opportunities', [])),
|
||||
'expanded_keywords_count': len(analysis_results.get('keyword_expansion', {}).get('expanded_keywords', [])),
|
||||
'competitors_analyzed': len(analysis_results.get('competitor_urls', [])),
|
||||
'dominant_themes': analysis_results.get('content_themes', {}).get('dominant_themes', [])[:10]
|
||||
}
|
||||
|
||||
# Generate comprehensive AI insights
|
||||
prompt = f"""
|
||||
As an expert SEO content strategist, analyze this comprehensive content gap analysis data and provide actionable insights:
|
||||
|
||||
TARGET ANALYSIS:
|
||||
- Website: {analysis_summary['target_url']}
|
||||
- Industry: {analysis_summary['industry']}
|
||||
- SERP Opportunities: {analysis_summary['serp_opportunities']} keywords not ranking
|
||||
- Keyword Expansion: {analysis_summary['expanded_keywords_count']} additional keywords identified
|
||||
- Competitors Analyzed: {analysis_summary['competitors_analyzed']} websites
|
||||
|
||||
DOMINANT CONTENT THEMES:
|
||||
{json.dumps(analysis_summary['dominant_themes'], indent=2)}
|
||||
|
||||
PROVIDE:
|
||||
1. Strategic Content Gap Analysis
|
||||
2. Priority Content Recommendations (top 5)
|
||||
3. Keyword Strategy Insights
|
||||
4. Competitive Positioning Advice
|
||||
5. Content Format Recommendations
|
||||
6. Technical SEO Opportunities
|
||||
7. Implementation Timeline (30/60/90 days)
|
||||
|
||||
Format as JSON with clear, actionable recommendations.
|
||||
"""
|
||||
|
||||
ai_response = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt="You are an expert SEO content strategist with 15+ years of experience in content gap analysis and competitive intelligence.",
|
||||
response_format="json_object"
|
||||
)
|
||||
|
||||
if ai_response:
|
||||
st.success("✅ Generated comprehensive AI insights")
|
||||
return ai_response
|
||||
else:
|
||||
st.warning("⚠️ Could not generate AI insights")
|
||||
return {}
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error generating AI insights: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _categorize_pages(self, crawl_df: pd.DataFrame) -> Dict[str, int]:
|
||||
"""Categorize crawled pages by type."""
|
||||
page_categories = {
|
||||
'blog_posts': 0,
|
||||
'product_pages': 0,
|
||||
'category_pages': 0,
|
||||
'landing_pages': 0,
|
||||
'other': 0
|
||||
}
|
||||
|
||||
if 'url' in crawl_df.columns:
|
||||
for url in crawl_df['url']:
|
||||
url_lower = url.lower()
|
||||
if any(indicator in url_lower for indicator in ['/blog/', '/post/', '/article/', '/news/']):
|
||||
page_categories['blog_posts'] += 1
|
||||
elif any(indicator in url_lower for indicator in ['/product/', '/item/', '/shop/']):
|
||||
page_categories['product_pages'] += 1
|
||||
elif any(indicator in url_lower for indicator in ['/category/', '/collection/', '/browse/']):
|
||||
page_categories['category_pages'] += 1
|
||||
elif any(indicator in url_lower for indicator in ['/landing/', '/promo/', '/campaign/']):
|
||||
page_categories['landing_pages'] += 1
|
||||
else:
|
||||
page_categories['other'] += 1
|
||||
|
||||
return page_categories
|
||||
|
||||
def _analyze_content_structure(self, crawl_df: pd.DataFrame) -> Dict[str, Any]:
|
||||
"""Analyze content structure from crawl data."""
|
||||
structure_analysis = {
|
||||
'avg_title_length': 0,
|
||||
'avg_meta_desc_length': 0,
|
||||
'h1_usage': 0,
|
||||
'internal_links_avg': 0,
|
||||
'external_links_avg': 0
|
||||
}
|
||||
|
||||
# Analyze available columns
|
||||
if 'title' in crawl_df.columns:
|
||||
structure_analysis['avg_title_length'] = crawl_df['title'].str.len().mean()
|
||||
|
||||
if 'meta_desc' in crawl_df.columns:
|
||||
structure_analysis['avg_meta_desc_length'] = crawl_df['meta_desc'].str.len().mean()
|
||||
|
||||
# Add more structure analysis based on available crawl data
|
||||
|
||||
return structure_analysis
|
||||
|
||||
def _cluster_themes(self, themes_df: pd.DataFrame) -> Dict[str, List[str]]:
|
||||
"""Cluster themes into topic groups."""
|
||||
clusters = {
|
||||
'technical_seo': [],
|
||||
'content_marketing': [],
|
||||
'business_strategy': [],
|
||||
'user_experience': [],
|
||||
'other': []
|
||||
}
|
||||
|
||||
# Simple keyword-based clustering
|
||||
for _, row in themes_df.iterrows():
|
||||
word = row.get('word', '') if 'word' in row else str(row.get(0, ''))
|
||||
word_lower = word.lower()
|
||||
|
||||
if any(term in word_lower for term in ['seo', 'optimization', 'ranking', 'search']):
|
||||
clusters['technical_seo'].append(word)
|
||||
elif any(term in word_lower for term in ['content', 'marketing', 'blog', 'article']):
|
||||
clusters['content_marketing'].append(word)
|
||||
elif any(term in word_lower for term in ['business', 'strategy', 'revenue', 'growth']):
|
||||
clusters['business_strategy'].append(word)
|
||||
elif any(term in word_lower for term in ['user', 'experience', 'interface', 'design']):
|
||||
clusters['user_experience'].append(word)
|
||||
else:
|
||||
clusters['other'].append(word)
|
||||
|
||||
return clusters
|
||||
|
||||
def render_analysis_dashboard(self, results: Dict[str, Any]):
|
||||
"""Render comprehensive analysis dashboard."""
|
||||
if not results or 'error' in results:
|
||||
st.error("❌ Analysis failed or no results available")
|
||||
return
|
||||
|
||||
st.markdown("## 🎯 Enhanced Content Gap Analysis Results")
|
||||
|
||||
# Overview metrics
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
|
||||
with col1:
|
||||
st.metric(
|
||||
"Keywords Analyzed",
|
||||
len(results.get('target_keywords', []))
|
||||
)
|
||||
|
||||
with col2:
|
||||
st.metric(
|
||||
"Competitors Crawled",
|
||||
len(results.get('competitor_urls', []))
|
||||
)
|
||||
|
||||
with col3:
|
||||
st.metric(
|
||||
"Expanded Keywords",
|
||||
len(results.get('keyword_expansion', {}).get('expanded_keywords', []))
|
||||
)
|
||||
|
||||
with col4:
|
||||
st.metric(
|
||||
"SERP Opportunities",
|
||||
len(results.get('serp_analysis', {}).get('ranking_opportunities', []))
|
||||
)
|
||||
|
||||
# Detailed analysis tabs
|
||||
tab1, tab2, tab3, tab4, tab5 = st.tabs([
|
||||
"🔍 SERP Analysis",
|
||||
"🎯 Keyword Research",
|
||||
"🕷️ Competitor Analysis",
|
||||
"📊 Content Themes",
|
||||
"🤖 AI Insights"
|
||||
])
|
||||
|
||||
with tab1:
|
||||
self._render_serp_analysis(results.get('serp_analysis', {}))
|
||||
|
||||
with tab2:
|
||||
self._render_keyword_analysis(results.get('keyword_expansion', {}))
|
||||
|
||||
with tab3:
|
||||
self._render_competitor_analysis(results.get('competitor_content', {}))
|
||||
|
||||
with tab4:
|
||||
self._render_content_themes(results.get('content_themes', {}))
|
||||
|
||||
with tab5:
|
||||
self._render_ai_insights(results.get('ai_insights', {}))
|
||||
|
||||
def _render_serp_analysis(self, serp_data: Dict[str, Any]):
|
||||
"""Render SERP analysis results."""
|
||||
st.subheader("🔍 SERP Landscape Analysis")
|
||||
|
||||
if not serp_data:
|
||||
st.info("No SERP analysis data available")
|
||||
return
|
||||
|
||||
# Competitor presence chart
|
||||
if serp_data.get('competitor_presence'):
|
||||
st.subheader("🏆 Competitor SERP Presence")
|
||||
presence_df = pd.DataFrame(
|
||||
list(serp_data['competitor_presence'].items()),
|
||||
columns=['Domain', 'Keywords Ranking']
|
||||
)
|
||||
st.bar_chart(presence_df.set_index('Domain'))
|
||||
|
||||
# Ranking opportunities
|
||||
if serp_data.get('ranking_opportunities'):
|
||||
st.subheader("🎯 Ranking Opportunities")
|
||||
opportunities_df = pd.DataFrame(serp_data['ranking_opportunities'])
|
||||
st.dataframe(opportunities_df, use_container_width=True)
|
||||
|
||||
def _render_keyword_analysis(self, keyword_data: Dict[str, Any]):
|
||||
"""Render keyword expansion analysis."""
|
||||
st.subheader("🎯 Keyword Research Expansion")
|
||||
|
||||
if not keyword_data:
|
||||
st.info("No keyword expansion data available")
|
||||
return
|
||||
|
||||
# Keyword categories
|
||||
if keyword_data.get('keyword_categories'):
|
||||
st.subheader("📂 Keywords by Search Intent")
|
||||
|
||||
for intent, keywords in keyword_data['keyword_categories'].items():
|
||||
if keywords:
|
||||
with st.expander(f"{intent.title()} Keywords ({len(keywords)})"):
|
||||
for kw in keywords[:20]: # Show first 20
|
||||
st.write(f"• {kw}")
|
||||
|
||||
# Long-tail opportunities
|
||||
if keyword_data.get('long_tail_opportunities'):
|
||||
st.subheader("🎣 Long-tail Opportunities")
|
||||
long_tail_df = pd.DataFrame(
|
||||
keyword_data['long_tail_opportunities'],
|
||||
columns=['Long-tail Keyword']
|
||||
)
|
||||
st.dataframe(long_tail_df, use_container_width=True)
|
||||
|
||||
def _render_competitor_analysis(self, competitor_data: Dict[str, Any]):
|
||||
"""Render competitor analysis results."""
|
||||
st.subheader("🕷️ Deep Competitor Analysis")
|
||||
|
||||
if not competitor_data.get('crawl_results'):
|
||||
st.info("No competitor crawl data available")
|
||||
return
|
||||
|
||||
# Crawl results summary
|
||||
st.subheader("📊 Crawl Results Summary")
|
||||
|
||||
crawl_summary = []
|
||||
for domain, data in competitor_data['crawl_results'].items():
|
||||
crawl_summary.append({
|
||||
'Domain': domain,
|
||||
'Pages Crawled': data.get('total_pages', 0),
|
||||
'Avg Content Length': round(data.get('content_length_stats', {}).get('mean', 0))
|
||||
})
|
||||
|
||||
if crawl_summary:
|
||||
summary_df = pd.DataFrame(crawl_summary)
|
||||
st.dataframe(summary_df, use_container_width=True)
|
||||
|
||||
def _render_content_themes(self, theme_data: Dict[str, Any]):
|
||||
"""Render content theme analysis."""
|
||||
st.subheader("📊 Content Theme Analysis")
|
||||
|
||||
if not theme_data:
|
||||
st.info("No content theme data available")
|
||||
return
|
||||
|
||||
# Dominant themes
|
||||
if theme_data.get('dominant_themes'):
|
||||
st.subheader("🎯 Dominant Content Themes")
|
||||
themes_df = pd.DataFrame(theme_data['dominant_themes'])
|
||||
st.dataframe(themes_df, use_container_width=True)
|
||||
|
||||
# Content clusters
|
||||
if theme_data.get('content_clusters'):
|
||||
st.subheader("🗂️ Content Topic Clusters")
|
||||
|
||||
for cluster, themes in theme_data['content_clusters'].items():
|
||||
if themes:
|
||||
with st.expander(f"{cluster.replace('_', ' ').title()} ({len(themes)} themes)"):
|
||||
for theme in themes[:10]: # Show first 10
|
||||
st.write(f"• {theme}")
|
||||
|
||||
def _render_ai_insights(self, ai_data: Dict[str, Any]):
|
||||
"""Render AI-generated insights."""
|
||||
st.subheader("🤖 AI-Powered Strategic Insights")
|
||||
|
||||
if not ai_data:
|
||||
st.info("No AI insights available")
|
||||
return
|
||||
|
||||
# Strategic recommendations
|
||||
if ai_data.get('recommendations'):
|
||||
st.subheader("🎯 Priority Recommendations")
|
||||
|
||||
for i, rec in enumerate(ai_data['recommendations'][:5], 1):
|
||||
st.markdown(f"**{i}. {rec}**")
|
||||
|
||||
# Implementation timeline
|
||||
if ai_data.get('implementation_timeline'):
|
||||
st.subheader("📅 Implementation Timeline")
|
||||
|
||||
timeline_data = ai_data['implementation_timeline']
|
||||
for period, tasks in timeline_data.items():
|
||||
with st.expander(f"{period} Plan"):
|
||||
for task in tasks:
|
||||
st.write(f"• {task}")
|
||||
@@ -1,787 +0,0 @@
|
||||
"""
|
||||
Enhanced UI for Content Gap Analysis with Advertools Integration.
|
||||
|
||||
This module provides a comprehensive Streamlit interface for content gap analysis
|
||||
using the EnhancedContentGapAnalyzer with advertools and AI insights.
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
from typing import Dict, Any, List
|
||||
import json
|
||||
from datetime import datetime
|
||||
import io
|
||||
import base64
|
||||
|
||||
from .enhanced_analyzer import EnhancedContentGapAnalyzer
|
||||
from lib.alwrity_ui.dashboard_styles import apply_dashboard_style, render_dashboard_header
|
||||
|
||||
class EnhancedContentGapAnalysisUI:
|
||||
"""Enhanced UI for content gap analysis."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the enhanced UI."""
|
||||
self.analyzer = EnhancedContentGapAnalyzer()
|
||||
|
||||
# Apply dashboard styling
|
||||
apply_dashboard_style()
|
||||
|
||||
def render(self):
|
||||
"""Render the enhanced content gap analysis interface."""
|
||||
|
||||
# Enhanced dashboard header
|
||||
render_dashboard_header(
|
||||
"🎯 Enhanced Content Gap Analysis",
|
||||
"Discover content opportunities with AI-powered insights using advertools, SERP analysis, competitor crawling, and strategic recommendations."
|
||||
)
|
||||
|
||||
# Main content area
|
||||
with st.container():
|
||||
# Analysis input form
|
||||
self._render_analysis_form()
|
||||
|
||||
# Session state for results
|
||||
if 'gap_analysis_results' in st.session_state and st.session_state.gap_analysis_results:
|
||||
st.markdown("---")
|
||||
self._render_results_dashboard(st.session_state.gap_analysis_results)
|
||||
|
||||
def _render_analysis_form(self):
|
||||
"""Render the analysis input form."""
|
||||
st.markdown("## 🚀 Setup Your Content Gap Analysis")
|
||||
|
||||
with st.form("enhanced_gap_analysis_form"):
|
||||
# Target website input
|
||||
col1, col2 = st.columns([2, 1])
|
||||
|
||||
with col1:
|
||||
target_url = st.text_input(
|
||||
"🎯 Your Website URL",
|
||||
placeholder="https://yourwebsite.com",
|
||||
help="Enter your website URL to analyze"
|
||||
)
|
||||
|
||||
with col2:
|
||||
industry = st.selectbox(
|
||||
"🏭 Industry",
|
||||
options=[
|
||||
"general", "technology", "healthcare", "finance",
|
||||
"ecommerce", "education", "real estate", "travel",
|
||||
"food", "fitness", "marketing", "consulting"
|
||||
],
|
||||
help="Select your industry for better analysis context"
|
||||
)
|
||||
|
||||
# Competitor URLs
|
||||
st.markdown("### 🏆 Competitor Analysis")
|
||||
competitor_urls_text = st.text_area(
|
||||
"Competitor URLs (one per line, max 5)",
|
||||
placeholder="https://competitor1.com\nhttps://competitor2.com\nhttps://competitor3.com",
|
||||
height=120,
|
||||
help="Enter up to 5 competitor URLs for comprehensive analysis"
|
||||
)
|
||||
|
||||
# Target keywords
|
||||
st.markdown("### 🎯 Keyword Focus")
|
||||
target_keywords_text = st.text_input(
|
||||
"Primary Keywords (comma-separated)",
|
||||
placeholder="seo, content marketing, digital marketing",
|
||||
help="Enter your main keywords to analyze and expand"
|
||||
)
|
||||
|
||||
# Analysis options
|
||||
st.markdown("### ⚙️ Analysis Options")
|
||||
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
enable_serp = st.checkbox(
|
||||
"🔍 SERP Analysis",
|
||||
value=True,
|
||||
help="Analyze competitor positions in search results"
|
||||
)
|
||||
|
||||
with col2:
|
||||
enable_crawling = st.checkbox(
|
||||
"🕷️ Deep Crawling",
|
||||
value=True,
|
||||
help="Perform comprehensive competitor content crawling"
|
||||
)
|
||||
|
||||
with col3:
|
||||
enable_ai_insights = st.checkbox(
|
||||
"🤖 AI Insights",
|
||||
value=True,
|
||||
help="Generate AI-powered strategic recommendations"
|
||||
)
|
||||
|
||||
# Submit button
|
||||
submitted = st.form_submit_button(
|
||||
"🚀 Start Enhanced Analysis",
|
||||
use_container_width=True,
|
||||
type="primary"
|
||||
)
|
||||
|
||||
if submitted:
|
||||
# Validate inputs
|
||||
if not target_url or not target_url.startswith(('http://', 'https://')):
|
||||
st.error("❌ Please enter a valid target URL starting with http:// or https://")
|
||||
return
|
||||
|
||||
if not target_keywords_text.strip():
|
||||
st.error("❌ Please enter at least one target keyword")
|
||||
return
|
||||
|
||||
# Process inputs
|
||||
competitor_urls = [
|
||||
url.strip() for url in competitor_urls_text.split('\n')
|
||||
if url.strip() and url.strip().startswith(('http://', 'https://'))
|
||||
]
|
||||
|
||||
if not competitor_urls:
|
||||
st.error("❌ Please enter at least one valid competitor URL")
|
||||
return
|
||||
|
||||
target_keywords = [
|
||||
kw.strip() for kw in target_keywords_text.split(',')
|
||||
if kw.strip()
|
||||
]
|
||||
|
||||
# Run analysis
|
||||
self._run_enhanced_analysis(
|
||||
target_url=target_url,
|
||||
competitor_urls=competitor_urls,
|
||||
target_keywords=target_keywords,
|
||||
industry=industry,
|
||||
options={
|
||||
'enable_serp': enable_serp,
|
||||
'enable_crawling': enable_crawling,
|
||||
'enable_ai_insights': enable_ai_insights
|
||||
}
|
||||
)
|
||||
|
||||
def _run_enhanced_analysis(self, target_url: str, competitor_urls: List[str],
|
||||
target_keywords: List[str], industry: str, options: Dict[str, bool]):
|
||||
"""Run the enhanced content gap analysis."""
|
||||
|
||||
try:
|
||||
with st.spinner("🔄 Running Enhanced Content Gap Analysis..."):
|
||||
|
||||
# Initialize progress tracking
|
||||
progress_bar = st.progress(0)
|
||||
status_text = st.empty()
|
||||
|
||||
# Update progress
|
||||
progress_bar.progress(10)
|
||||
status_text.text("🚀 Initializing analysis...")
|
||||
|
||||
# Run comprehensive analysis
|
||||
results = self.analyzer.analyze_comprehensive_gap(
|
||||
target_url=target_url,
|
||||
competitor_urls=competitor_urls,
|
||||
target_keywords=target_keywords,
|
||||
industry=industry
|
||||
)
|
||||
|
||||
progress_bar.progress(100)
|
||||
status_text.text("✅ Analysis complete!")
|
||||
|
||||
# Store results in session state
|
||||
st.session_state.gap_analysis_results = results
|
||||
|
||||
# Clear progress indicators
|
||||
progress_bar.empty()
|
||||
status_text.empty()
|
||||
|
||||
if 'error' in results:
|
||||
st.error(f"❌ Analysis failed: {results['error']}")
|
||||
else:
|
||||
st.success("🎉 Enhanced Content Gap Analysis completed successfully!")
|
||||
st.balloons()
|
||||
|
||||
# Rerun to show results
|
||||
st.rerun()
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"❌ Error running analysis: {str(e)}")
|
||||
|
||||
def _render_results_dashboard(self, results: Dict[str, Any]):
|
||||
"""Render the comprehensive results dashboard."""
|
||||
|
||||
if 'error' in results:
|
||||
st.error(f"❌ Analysis Error: {results['error']}")
|
||||
return
|
||||
|
||||
# Results header
|
||||
st.markdown("## 📊 Enhanced Content Gap Analysis Results")
|
||||
|
||||
# Key metrics overview
|
||||
self._render_metrics_overview(results)
|
||||
|
||||
# Detailed analysis tabs
|
||||
self._render_detailed_analysis(results)
|
||||
|
||||
# Export functionality
|
||||
self._render_export_options(results)
|
||||
|
||||
def _render_metrics_overview(self, results: Dict[str, Any]):
|
||||
"""Render key metrics overview."""
|
||||
|
||||
st.markdown("### 📈 Analysis Overview")
|
||||
|
||||
# Create metrics columns
|
||||
col1, col2, col3, col4, col5 = st.columns(5)
|
||||
|
||||
with col1:
|
||||
st.metric(
|
||||
"🎯 Keywords Analyzed",
|
||||
len(results.get('target_keywords', [])),
|
||||
help="Number of primary keywords analyzed"
|
||||
)
|
||||
|
||||
with col2:
|
||||
st.metric(
|
||||
"🏆 Competitors Crawled",
|
||||
len(results.get('competitor_urls', [])),
|
||||
help="Number of competitor websites analyzed"
|
||||
)
|
||||
|
||||
with col3:
|
||||
expanded_keywords = results.get('keyword_expansion', {}).get('expanded_keywords', [])
|
||||
st.metric(
|
||||
"🔍 Keywords Discovered",
|
||||
len(expanded_keywords),
|
||||
help="Additional keywords discovered through expansion"
|
||||
)
|
||||
|
||||
with col4:
|
||||
ranking_opportunities = results.get('serp_analysis', {}).get('ranking_opportunities', [])
|
||||
st.metric(
|
||||
"🚀 SERP Opportunities",
|
||||
len(ranking_opportunities),
|
||||
help="Keywords with ranking opportunities identified"
|
||||
)
|
||||
|
||||
with col5:
|
||||
recommendations = results.get('recommendations', [])
|
||||
st.metric(
|
||||
"💡 AI Recommendations",
|
||||
len(recommendations),
|
||||
help="AI-generated strategic recommendations"
|
||||
)
|
||||
|
||||
# Analysis timestamp
|
||||
if results.get('analysis_timestamp'):
|
||||
timestamp = datetime.fromisoformat(results['analysis_timestamp'].replace('Z', '+00:00'))
|
||||
st.caption(f"📅 Analysis completed: {timestamp.strftime('%Y-%m-%d %H:%M:%S UTC')}")
|
||||
|
||||
def _render_detailed_analysis(self, results: Dict[str, Any]):
|
||||
"""Render detailed analysis in tabs."""
|
||||
|
||||
# Create main analysis tabs
|
||||
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
|
||||
"🔍 SERP Analysis",
|
||||
"🎯 Keyword Research",
|
||||
"🕷️ Competitor Intelligence",
|
||||
"📊 Content Themes",
|
||||
"🤖 AI Strategic Insights",
|
||||
"📋 Action Plan"
|
||||
])
|
||||
|
||||
with tab1:
|
||||
self._render_serp_analysis(results.get('serp_analysis', {}))
|
||||
|
||||
with tab2:
|
||||
self._render_keyword_research(results.get('keyword_expansion', {}))
|
||||
|
||||
with tab3:
|
||||
self._render_competitor_intelligence(results.get('competitor_content', {}))
|
||||
|
||||
with tab4:
|
||||
self._render_content_themes(results.get('content_themes', {}))
|
||||
|
||||
with tab5:
|
||||
self._render_ai_insights(results.get('ai_insights', {}))
|
||||
|
||||
with tab6:
|
||||
self._render_action_plan(results)
|
||||
|
||||
def _render_serp_analysis(self, serp_data: Dict[str, Any]):
|
||||
"""Render SERP analysis results."""
|
||||
|
||||
st.markdown("### 🔍 Search Engine Results Analysis")
|
||||
|
||||
if not serp_data:
|
||||
st.info("No SERP analysis data available")
|
||||
return
|
||||
|
||||
# Competitor SERP presence
|
||||
if serp_data.get('competitor_presence'):
|
||||
st.markdown("#### 🏆 Competitor SERP Dominance")
|
||||
|
||||
presence_data = serp_data['competitor_presence']
|
||||
presence_df = pd.DataFrame(
|
||||
list(presence_data.items()),
|
||||
columns=['Domain', 'Keywords Ranking']
|
||||
)
|
||||
|
||||
# Display as chart
|
||||
st.bar_chart(presence_df.set_index('Domain'))
|
||||
|
||||
# Top performers
|
||||
st.markdown("**🥇 Top Performing Competitors:**")
|
||||
for domain, count in list(presence_data.items())[:3]:
|
||||
st.write(f"• **{domain}**: Ranking for {count} keywords")
|
||||
|
||||
# Ranking opportunities
|
||||
if serp_data.get('ranking_opportunities'):
|
||||
st.markdown("#### 🚀 Ranking Opportunities")
|
||||
|
||||
opportunities = serp_data['ranking_opportunities']
|
||||
|
||||
if opportunities:
|
||||
opp_df = pd.DataFrame(opportunities)
|
||||
st.dataframe(opp_df, use_container_width=True)
|
||||
|
||||
st.info(f"💡 Found {len(opportunities)} keywords where you're not ranking in top 10!")
|
||||
else:
|
||||
st.success("🎉 You're already ranking well for your target keywords!")
|
||||
|
||||
# SERP features analysis
|
||||
if serp_data.get('keyword_rankings'):
|
||||
st.markdown("#### 🎯 SERP Features Opportunities")
|
||||
|
||||
all_features = []
|
||||
for keyword_data in serp_data['keyword_rankings'].values():
|
||||
all_features.extend(keyword_data.get('serp_features', []))
|
||||
|
||||
if all_features:
|
||||
feature_counts = pd.Series(all_features).value_counts()
|
||||
st.bar_chart(feature_counts)
|
||||
|
||||
st.markdown("**🎯 Focus on these SERP features:**")
|
||||
for feature, count in feature_counts.head(3).items():
|
||||
st.write(f"• **{feature.replace('_', ' ').title()}**: Appears in {count} keyword searches")
|
||||
|
||||
def _render_keyword_research(self, keyword_data: Dict[str, Any]):
|
||||
"""Render keyword research results."""
|
||||
|
||||
st.markdown("### 🎯 Advanced Keyword Research")
|
||||
|
||||
if not keyword_data:
|
||||
st.info("No keyword expansion data available")
|
||||
return
|
||||
|
||||
# Seed vs expanded keywords
|
||||
seed_keywords = keyword_data.get('seed_keywords', [])
|
||||
expanded_keywords = keyword_data.get('expanded_keywords', [])
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.metric("🌱 Seed Keywords", len(seed_keywords))
|
||||
if seed_keywords:
|
||||
for kw in seed_keywords:
|
||||
st.write(f"• {kw}")
|
||||
|
||||
with col2:
|
||||
st.metric("🔍 Expanded Keywords", len(expanded_keywords))
|
||||
st.write(f"**Expansion Factor:** {len(expanded_keywords) / len(seed_keywords) if seed_keywords else 0:.1f}x")
|
||||
|
||||
# Search intent categorization
|
||||
if keyword_data.get('keyword_categories'):
|
||||
st.markdown("#### 🧠 Search Intent Analysis")
|
||||
|
||||
categories = keyword_data['keyword_categories']
|
||||
|
||||
# Create intent distribution chart
|
||||
intent_counts = {intent: len(keywords) for intent, keywords in categories.items() if keywords}
|
||||
|
||||
if intent_counts:
|
||||
intent_df = pd.DataFrame(
|
||||
list(intent_counts.items()),
|
||||
columns=['Search Intent', 'Keywords']
|
||||
)
|
||||
st.bar_chart(intent_df.set_index('Search Intent'))
|
||||
|
||||
# Detailed breakdown
|
||||
for intent, keywords in categories.items():
|
||||
if keywords:
|
||||
with st.expander(f"📂 {intent.title()} Keywords ({len(keywords)})"):
|
||||
for kw in keywords[:20]: # Show first 20
|
||||
st.write(f"• {kw}")
|
||||
|
||||
# Long-tail opportunities
|
||||
if keyword_data.get('long_tail_opportunities'):
|
||||
st.markdown("#### 🎣 Long-tail Keyword Opportunities")
|
||||
|
||||
long_tail = keyword_data['long_tail_opportunities']
|
||||
|
||||
if long_tail:
|
||||
st.info(f"🎯 Found {len(long_tail)} long-tail opportunities with lower competition!")
|
||||
|
||||
# Display in expandable format
|
||||
with st.expander("View Long-tail Keywords"):
|
||||
for i, kw in enumerate(long_tail, 1):
|
||||
st.write(f"{i}. {kw}")
|
||||
else:
|
||||
st.warning("No long-tail opportunities identified")
|
||||
|
||||
def _render_competitor_intelligence(self, competitor_data: Dict[str, Any]):
|
||||
"""Render competitor intelligence results."""
|
||||
|
||||
st.markdown("### 🕷️ Competitive Intelligence")
|
||||
|
||||
if not competitor_data.get('crawl_results'):
|
||||
st.info("No competitor crawl data available")
|
||||
return
|
||||
|
||||
# Crawl summary
|
||||
crawl_results = competitor_data['crawl_results']
|
||||
|
||||
st.markdown("#### 📊 Competitor Content Overview")
|
||||
|
||||
# Create summary table
|
||||
summary_data = []
|
||||
for domain, data in crawl_results.items():
|
||||
summary_data.append({
|
||||
'Competitor': domain,
|
||||
'Pages Crawled': data.get('total_pages', 0),
|
||||
'Avg Content Length': f"{data.get('content_length_stats', {}).get('mean', 0):,.0f} chars",
|
||||
'Success Rate': f"{data.get('status_codes', {}).get(200, 0) / data.get('total_pages', 1) * 100:.1f}%"
|
||||
})
|
||||
|
||||
if summary_data:
|
||||
summary_df = pd.DataFrame(summary_data)
|
||||
st.dataframe(summary_df, use_container_width=True)
|
||||
|
||||
# Page type analysis
|
||||
st.markdown("#### 📄 Content Type Distribution")
|
||||
|
||||
for domain, data in crawl_results.items():
|
||||
page_types = data.get('page_types', {})
|
||||
|
||||
if page_types:
|
||||
with st.expander(f"📊 {domain} Content Types"):
|
||||
|
||||
# Create chart data
|
||||
types_df = pd.DataFrame(
|
||||
list(page_types.items()),
|
||||
columns=['Page Type', 'Count']
|
||||
)
|
||||
|
||||
if not types_df.empty:
|
||||
st.bar_chart(types_df.set_index('Page Type'))
|
||||
|
||||
# Key insights
|
||||
total_pages = sum(page_types.values())
|
||||
if total_pages > 0:
|
||||
blog_ratio = page_types.get('blog_posts', 0) / total_pages * 100
|
||||
product_ratio = page_types.get('product_pages', 0) / total_pages * 100
|
||||
|
||||
st.write("**Content Strategy Insights:**")
|
||||
st.write(f"• Blog content: {blog_ratio:.1f}% of pages")
|
||||
st.write(f"• Product focus: {product_ratio:.1f}% of pages")
|
||||
|
||||
# Content structure insights
|
||||
if competitor_data.get('content_structure'):
|
||||
st.markdown("#### 🏗️ Content Structure Analysis")
|
||||
|
||||
structure_data = competitor_data['content_structure']
|
||||
|
||||
for domain, structure in structure_data.items():
|
||||
with st.expander(f"🔍 {domain} Structure Analysis"):
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.metric("Avg Title Length", f"{structure.get('avg_title_length', 0):.0f} chars")
|
||||
st.metric("H1 Usage", f"{structure.get('h1_usage', 0):.1f}%")
|
||||
|
||||
with col2:
|
||||
st.metric("Avg Meta Desc Length", f"{structure.get('avg_meta_desc_length', 0):.0f} chars")
|
||||
st.metric("Internal Links", f"{structure.get('internal_links_avg', 0):.1f} avg")
|
||||
|
||||
def _render_content_themes(self, theme_data: Dict[str, Any]):
|
||||
"""Render content theme analysis."""
|
||||
|
||||
st.markdown("### 📊 Content Theme Intelligence")
|
||||
|
||||
if not theme_data:
|
||||
st.info("No content theme data available")
|
||||
return
|
||||
|
||||
# Dominant themes
|
||||
if theme_data.get('dominant_themes'):
|
||||
st.markdown("#### 🎯 Dominant Content Themes")
|
||||
|
||||
themes = theme_data['dominant_themes']
|
||||
|
||||
if themes:
|
||||
themes_df = pd.DataFrame(themes)
|
||||
st.dataframe(themes_df, use_container_width=True)
|
||||
|
||||
# Top themes highlight
|
||||
st.markdown("**🔥 Top Content Themes:**")
|
||||
for i, theme in enumerate(themes[:5], 1):
|
||||
word = theme.get('word', theme.get('text', 'Unknown'))
|
||||
freq = theme.get('freq', theme.get('frequency', 0))
|
||||
st.write(f"{i}. **{word}** (appears {freq} times)")
|
||||
|
||||
# Content clusters
|
||||
if theme_data.get('content_clusters'):
|
||||
st.markdown("#### 🗂️ Topic Cluster Analysis")
|
||||
|
||||
clusters = theme_data['content_clusters']
|
||||
|
||||
# Cluster distribution
|
||||
cluster_counts = {name: len(themes) for name, themes in clusters.items() if themes}
|
||||
|
||||
if cluster_counts:
|
||||
cluster_df = pd.DataFrame(
|
||||
list(cluster_counts.items()),
|
||||
columns=['Topic Cluster', 'Theme Count']
|
||||
)
|
||||
st.bar_chart(cluster_df.set_index('Topic Cluster'))
|
||||
|
||||
# Detailed cluster view
|
||||
for cluster_name, themes in clusters.items():
|
||||
if themes:
|
||||
with st.expander(f"📂 {cluster_name.replace('_', ' ').title()} ({len(themes)} themes)"):
|
||||
for theme in themes[:15]: # Show first 15
|
||||
st.write(f"• {theme}")
|
||||
|
||||
# Content gaps and opportunities
|
||||
if theme_data.get('content_opportunities'):
|
||||
st.markdown("#### 🎯 Content Gap Opportunities")
|
||||
|
||||
opportunities = theme_data['content_opportunities']
|
||||
|
||||
if opportunities:
|
||||
for opp in opportunities:
|
||||
st.write(f"🎯 **{opp}**")
|
||||
else:
|
||||
st.info("No specific content opportunities identified in theme analysis")
|
||||
|
||||
def _render_ai_insights(self, ai_data: Dict[str, Any]):
|
||||
"""Render AI-generated strategic insights."""
|
||||
|
||||
st.markdown("### 🤖 AI-Powered Strategic Insights")
|
||||
|
||||
if not ai_data:
|
||||
st.info("No AI insights available")
|
||||
return
|
||||
|
||||
# Strategic recommendations
|
||||
if ai_data.get('recommendations'):
|
||||
st.markdown("#### 🎯 Priority Strategic Recommendations")
|
||||
|
||||
recommendations = ai_data['recommendations']
|
||||
|
||||
for i, rec in enumerate(recommendations[:5], 1):
|
||||
with st.expander(f"🎯 Recommendation {i}"):
|
||||
st.markdown(rec)
|
||||
|
||||
# Competitive positioning
|
||||
if ai_data.get('competitive_positioning'):
|
||||
st.markdown("#### 🏆 Competitive Positioning Insights")
|
||||
st.markdown(ai_data['competitive_positioning'])
|
||||
|
||||
# Content strategy insights
|
||||
if ai_data.get('content_strategy'):
|
||||
st.markdown("#### 📝 Content Strategy Recommendations")
|
||||
st.markdown(ai_data['content_strategy'])
|
||||
|
||||
# Implementation timeline
|
||||
if ai_data.get('implementation_timeline'):
|
||||
st.markdown("#### 📅 Implementation Roadmap")
|
||||
|
||||
timeline = ai_data['implementation_timeline']
|
||||
|
||||
for period, tasks in timeline.items():
|
||||
with st.expander(f"📅 {period.replace('_', ' ').title()} Plan"):
|
||||
for task in tasks:
|
||||
st.write(f"• {task}")
|
||||
|
||||
# Technical SEO opportunities
|
||||
if ai_data.get('technical_opportunities'):
|
||||
st.markdown("#### ⚙️ Technical SEO Opportunities")
|
||||
|
||||
tech_opps = ai_data['technical_opportunities']
|
||||
|
||||
for opp in tech_opps:
|
||||
st.write(f"⚙️ {opp}")
|
||||
|
||||
def _render_action_plan(self, results: Dict[str, Any]):
|
||||
"""Render actionable implementation plan."""
|
||||
|
||||
st.markdown("### 📋 Your Content Gap Action Plan")
|
||||
|
||||
# Quick wins section
|
||||
st.markdown("#### 🚀 Quick Wins (Week 1-2)")
|
||||
|
||||
quick_wins = []
|
||||
|
||||
# SERP opportunities
|
||||
serp_opportunities = results.get('serp_analysis', {}).get('ranking_opportunities', [])
|
||||
if serp_opportunities:
|
||||
quick_wins.append(f"🎯 Target {len(serp_opportunities)} keywords where you're not ranking")
|
||||
|
||||
# Long-tail keywords
|
||||
long_tail = results.get('keyword_expansion', {}).get('long_tail_opportunities', [])
|
||||
if long_tail:
|
||||
quick_wins.append(f"🎣 Create content for {min(5, len(long_tail))} high-potential long-tail keywords")
|
||||
|
||||
# Content themes
|
||||
themes = results.get('content_themes', {}).get('dominant_themes', [])
|
||||
if themes:
|
||||
top_theme = themes[0].get('word', 'top theme') if themes else 'content optimization'
|
||||
quick_wins.append(f"📊 Optimize existing content around '{top_theme}' theme")
|
||||
|
||||
for i, win in enumerate(quick_wins, 1):
|
||||
st.write(f"{i}. {win}")
|
||||
|
||||
# Medium-term strategy
|
||||
st.markdown("#### 📈 Medium-term Strategy (Month 1-3)")
|
||||
|
||||
medium_term = [
|
||||
"🕷️ Conduct regular competitor content audits",
|
||||
"🎯 Develop content calendar based on keyword gaps",
|
||||
"📊 Implement content theme clusters",
|
||||
"🤖 Set up automated SERP monitoring"
|
||||
]
|
||||
|
||||
for i, strategy in enumerate(medium_term, 1):
|
||||
st.write(f"{i}. {strategy}")
|
||||
|
||||
# Long-term vision
|
||||
st.markdown("#### 🎯 Long-term Vision (Quarter 2+)")
|
||||
|
||||
long_term = [
|
||||
"🏆 Establish thought leadership in identified content gaps",
|
||||
"🌐 Build comprehensive content hub around dominant themes",
|
||||
"📈 Scale content production based on proven gaps",
|
||||
"🤝 Develop strategic partnerships for content collaboration"
|
||||
]
|
||||
|
||||
for i, vision in enumerate(long_term, 1):
|
||||
st.write(f"{i}. {vision}")
|
||||
|
||||
# Success metrics
|
||||
st.markdown("#### 📊 Success Metrics to Track")
|
||||
|
||||
metrics = [
|
||||
"🎯 Keyword ranking improvements for target terms",
|
||||
"📈 Organic traffic growth from new content",
|
||||
"🔍 SERP feature acquisitions (featured snippets, etc.)",
|
||||
"🏆 Competitive ranking gains in content themes",
|
||||
"📊 Content engagement metrics and user behavior"
|
||||
]
|
||||
|
||||
for metric in metrics:
|
||||
st.write(f"• {metric}")
|
||||
|
||||
def _render_export_options(self, results: Dict[str, Any]):
|
||||
"""Render export options for analysis results."""
|
||||
|
||||
st.markdown("---")
|
||||
st.markdown("### 📥 Export Analysis Results")
|
||||
|
||||
col1, col2, col3 = st.columns(3)
|
||||
|
||||
with col1:
|
||||
# JSON export
|
||||
if st.button("📄 Export as JSON", use_container_width=True):
|
||||
json_data = json.dumps(results, indent=2, default=str)
|
||||
|
||||
st.download_button(
|
||||
label="⬇️ Download JSON Report",
|
||||
data=json_data,
|
||||
file_name=f"content_gap_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
|
||||
mime="application/json",
|
||||
use_container_width=True
|
||||
)
|
||||
|
||||
with col2:
|
||||
# CSV export for keywords
|
||||
if st.button("📊 Export Keywords CSV", use_container_width=True):
|
||||
expanded_keywords = results.get('keyword_expansion', {}).get('expanded_keywords', [])
|
||||
|
||||
if expanded_keywords:
|
||||
keywords_df = pd.DataFrame(expanded_keywords, columns=['Keyword'])
|
||||
csv_data = keywords_df.to_csv(index=False)
|
||||
|
||||
st.download_button(
|
||||
label="⬇️ Download Keywords CSV",
|
||||
data=csv_data,
|
||||
file_name=f"discovered_keywords_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
|
||||
mime="text/csv",
|
||||
use_container_width=True
|
||||
)
|
||||
else:
|
||||
st.warning("No keywords available for export")
|
||||
|
||||
with col3:
|
||||
# Summary report
|
||||
if st.button("📋 Generate Summary Report", use_container_width=True):
|
||||
summary = self._generate_summary_report(results)
|
||||
|
||||
st.download_button(
|
||||
label="⬇️ Download Summary Report",
|
||||
data=summary,
|
||||
file_name=f"content_gap_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
|
||||
mime="text/plain",
|
||||
use_container_width=True
|
||||
)
|
||||
|
||||
def _generate_summary_report(self, results: Dict[str, Any]) -> str:
|
||||
"""Generate a text summary report."""
|
||||
|
||||
target_url = results.get('target_url', 'Unknown')
|
||||
timestamp = results.get('analysis_timestamp', datetime.now().isoformat())
|
||||
|
||||
summary = f"""
|
||||
ENHANCED CONTENT GAP ANALYSIS REPORT
|
||||
=====================================
|
||||
|
||||
Target Website: {target_url}
|
||||
Analysis Date: {timestamp}
|
||||
Industry: {results.get('industry', 'General')}
|
||||
|
||||
EXECUTIVE SUMMARY
|
||||
-----------------
|
||||
Keywords Analyzed: {len(results.get('target_keywords', []))}
|
||||
Competitors Analyzed: {len(results.get('competitor_urls', []))}
|
||||
Keywords Discovered: {len(results.get('keyword_expansion', {}).get('expanded_keywords', []))}
|
||||
SERP Opportunities: {len(results.get('serp_analysis', {}).get('ranking_opportunities', []))}
|
||||
|
||||
RANKING OPPORTUNITIES
|
||||
---------------------
|
||||
"""
|
||||
|
||||
# Add ranking opportunities
|
||||
opportunities = results.get('serp_analysis', {}).get('ranking_opportunities', [])
|
||||
for i, opp in enumerate(opportunities[:10], 1):
|
||||
summary += f"{i}. {opp.get('keyword', 'Unknown keyword')}\n"
|
||||
|
||||
# Add top keywords discovered
|
||||
summary += "\nTOP DISCOVERED KEYWORDS\n-----------------------\n"
|
||||
expanded_keywords = results.get('keyword_expansion', {}).get('expanded_keywords', [])
|
||||
for i, kw in enumerate(expanded_keywords[:20], 1):
|
||||
summary += f"{i}. {kw}\n"
|
||||
|
||||
# Add AI recommendations
|
||||
recommendations = results.get('ai_insights', {}).get('recommendations', [])
|
||||
if recommendations:
|
||||
summary += "\nAI STRATEGIC RECOMMENDATIONS\n----------------------------\n"
|
||||
for i, rec in enumerate(recommendations[:5], 1):
|
||||
summary += f"{i}. {rec}\n"
|
||||
|
||||
summary += f"\n\nReport generated by ALwrity Enhanced Content Gap Analysis\nTimestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
|
||||
|
||||
return summary
|
||||
|
||||
# Render function for integration with main dashboard
|
||||
def render_enhanced_content_gap_analysis():
|
||||
"""Render the enhanced content gap analysis UI."""
|
||||
ui = EnhancedContentGapAnalysisUI()
|
||||
ui.render()
|
||||
@@ -1,649 +0,0 @@
|
||||
"""
|
||||
Keyword researcher for content gap analysis.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.data_collector import DataCollector
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.content_parser import ContentParser
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.ai_processor import AIProcessor, ProgressTracker
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
from lib.ai_seo_tools.content_title_generator import ai_title_generator
|
||||
from lib.ai_seo_tools.meta_desc_generator import metadesc_generator_main
|
||||
from lib.ai_seo_tools.seo_structured_data import ai_structured_data
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/keyword_researcher.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
class KeywordResearcher:
|
||||
"""Researches and analyzes keywords for content strategy."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the keyword researcher."""
|
||||
self.ai_processor = AIProcessor()
|
||||
self.progress = ProgressTracker()
|
||||
|
||||
# Define analysis stages
|
||||
self.stages = {
|
||||
'keyword_analysis': {
|
||||
'name': 'Keyword Analysis',
|
||||
'steps': [
|
||||
'Initializing keyword research',
|
||||
'Analyzing keyword trends',
|
||||
'Evaluating search intent',
|
||||
'Identifying opportunities',
|
||||
'Generating keyword insights'
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
def analyze(self, industry: str, url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze keywords for content strategy.
|
||||
|
||||
Args:
|
||||
industry: Industry category
|
||||
url: Target website URL
|
||||
|
||||
Returns:
|
||||
Dictionary containing analysis results
|
||||
"""
|
||||
try:
|
||||
self.progress.start_stage('keyword_analysis')
|
||||
self.progress.next_step()
|
||||
|
||||
# Analyze keyword trends
|
||||
trend_analysis = self._analyze_keyword_trends(industry)
|
||||
self.progress.next_step()
|
||||
|
||||
# Evaluate search intent
|
||||
intent_analysis = self._evaluate_search_intent(trend_analysis)
|
||||
self.progress.next_step()
|
||||
|
||||
# Identify opportunities
|
||||
opportunities = self._identify_opportunities(trend_analysis, intent_analysis)
|
||||
self.progress.next_step()
|
||||
|
||||
# Generate insights
|
||||
insights = self._generate_keyword_insights(trend_analysis, intent_analysis, opportunities)
|
||||
self.progress.next_step()
|
||||
|
||||
self.progress.complete_stage()
|
||||
|
||||
return {
|
||||
'trend_analysis': trend_analysis,
|
||||
'intent_analysis': intent_analysis,
|
||||
'opportunities': opportunities,
|
||||
'insights': insights
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
if self.progress.current_stage:
|
||||
self.progress.update_progress(0, f"Error in {self.progress.stages[self.progress.current_stage]['name']}: {str(e)}")
|
||||
st.error(f"Error analyzing keywords: {str(e)}")
|
||||
return {
|
||||
'error': str(e),
|
||||
'trend_analysis': {},
|
||||
'intent_analysis': {},
|
||||
'opportunities': [],
|
||||
'insights': []
|
||||
}
|
||||
|
||||
def _analyze_keyword_trends(self, industry: str) -> Dict[str, Any]:
|
||||
"""Analyze keyword trends."""
|
||||
try:
|
||||
# Get AI analysis for keyword trends
|
||||
analysis = self.ai_processor.analyze_keywords({
|
||||
'industry': industry,
|
||||
'keywords': {} # Keywords will be fetched by AI processor
|
||||
})
|
||||
|
||||
return {
|
||||
'trends': analysis.get('keyword_trends', {}),
|
||||
'search_intent': analysis.get('search_intent', {}),
|
||||
'keyword_insights': analysis.get('keyword_insights', {})
|
||||
}
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing keyword trends: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _evaluate_search_intent(self, trend_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Evaluate search intent."""
|
||||
try:
|
||||
intent_analysis = {
|
||||
'informational': [],
|
||||
'transactional': [],
|
||||
'navigational': [],
|
||||
'commercial': []
|
||||
}
|
||||
|
||||
# Categorize keywords by intent
|
||||
for keyword, data in trend_analysis.get('trends', {}).items():
|
||||
intent = data.get('intent', 'informational')
|
||||
if intent in intent_analysis:
|
||||
intent_analysis[intent].append({
|
||||
'keyword': keyword,
|
||||
'volume': data.get('volume', 0),
|
||||
'difficulty': data.get('difficulty', 0)
|
||||
})
|
||||
|
||||
return intent_analysis
|
||||
except Exception as e:
|
||||
st.error(f"Error evaluating search intent: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _identify_opportunities(self, trend_analysis: Dict[str, Any], intent_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Identify keyword opportunities."""
|
||||
try:
|
||||
opportunities = []
|
||||
|
||||
# Analyze each intent category
|
||||
for intent, keywords in intent_analysis.items():
|
||||
for keyword_data in keywords:
|
||||
# Calculate opportunity score
|
||||
volume = keyword_data.get('volume', 0)
|
||||
difficulty = keyword_data.get('difficulty', 0)
|
||||
opportunity_score = volume * (1 - difficulty/100)
|
||||
|
||||
if opportunity_score > 50: # Threshold for good opportunities
|
||||
opportunities.append({
|
||||
'keyword': keyword_data['keyword'],
|
||||
'intent': intent,
|
||||
'volume': volume,
|
||||
'difficulty': difficulty,
|
||||
'opportunity_score': opportunity_score
|
||||
})
|
||||
|
||||
# Sort by opportunity score
|
||||
opportunities.sort(key=lambda x: x['opportunity_score'], reverse=True)
|
||||
|
||||
return opportunities
|
||||
except Exception as e:
|
||||
st.error(f"Error identifying opportunities: {str(e)}")
|
||||
return []
|
||||
|
||||
def _generate_keyword_insights(self, trend_analysis: Dict[str, Any], intent_analysis: Dict[str, Any], opportunities: List[Dict[str, Any]]) -> List[str]:
|
||||
"""Generate keyword insights."""
|
||||
try:
|
||||
insights = []
|
||||
|
||||
# Trend insights
|
||||
if trend_analysis.get('trends'):
|
||||
insights.append(f"Analyzed {len(trend_analysis['trends'])} keywords for trends")
|
||||
|
||||
# Intent insights
|
||||
for intent, keywords in intent_analysis.items():
|
||||
if keywords:
|
||||
insights.append(f"Found {len(keywords)} {intent} keywords")
|
||||
|
||||
# Opportunity insights
|
||||
if opportunities:
|
||||
insights.append(f"Identified {len(opportunities)} high-potential keyword opportunities")
|
||||
|
||||
return insights
|
||||
except Exception as e:
|
||||
st.error(f"Error generating keyword insights: {str(e)}")
|
||||
return []
|
||||
|
||||
def _generate_titles(self, industry: str) -> dict:
|
||||
"""
|
||||
Generate keyword-based titles using the title generator.
|
||||
|
||||
Args:
|
||||
industry (str): The industry to generate titles for
|
||||
|
||||
Returns:
|
||||
dict: Generated titles and patterns
|
||||
"""
|
||||
return ai_title_generator(industry)
|
||||
|
||||
def _analyze_meta_descriptions(self, industry: str) -> dict:
|
||||
"""
|
||||
Analyze meta descriptions for keyword usage.
|
||||
|
||||
Args:
|
||||
industry (str): The industry to analyze
|
||||
|
||||
Returns:
|
||||
dict: Meta description analysis results
|
||||
"""
|
||||
return metadesc_generator_main(industry)
|
||||
|
||||
def _analyze_structured_data(self, industry: str) -> dict:
|
||||
"""
|
||||
Analyze structured data implementation.
|
||||
|
||||
Args:
|
||||
industry (str): The industry to analyze
|
||||
|
||||
Returns:
|
||||
dict: Structured data analysis results
|
||||
"""
|
||||
return ai_structured_data(industry)
|
||||
|
||||
def _extract_keywords(self, titles: dict, meta_analysis: dict) -> list:
|
||||
"""
|
||||
Extract keywords from titles and meta descriptions.
|
||||
|
||||
Args:
|
||||
titles (dict): Generated titles
|
||||
meta_analysis (dict): Meta description analysis
|
||||
|
||||
Returns:
|
||||
list: Extracted keywords with metrics
|
||||
"""
|
||||
prompt = f"""
|
||||
As an SEO expert, analyze the following content and extract relevant keywords with their metrics:
|
||||
|
||||
Titles: {titles}
|
||||
Meta Descriptions: {meta_analysis}
|
||||
|
||||
Please provide a JSON response with the following structure:
|
||||
{{
|
||||
"keywords": [
|
||||
{{
|
||||
"keyword": "string",
|
||||
"search_volume": "number",
|
||||
"difficulty": "number",
|
||||
"relevance_score": "number",
|
||||
"content_type": "string"
|
||||
}}
|
||||
],
|
||||
"summary": {{
|
||||
"total_keywords": "number",
|
||||
"high_opportunity_keywords": "number",
|
||||
"recommended_focus_areas": ["string"]
|
||||
}}
|
||||
}}
|
||||
|
||||
Focus on:
|
||||
1. Primary keywords and their variations
|
||||
2. Long-tail keywords
|
||||
3. Industry-specific terminology
|
||||
4. Search volume and difficulty metrics
|
||||
5. Content type recommendations
|
||||
"""
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt, json_struct={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"keywords": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"keyword": {"type": "string"},
|
||||
"search_volume": {"type": "number"},
|
||||
"difficulty": {"type": "number"},
|
||||
"relevance_score": {"type": "number"},
|
||||
"content_type": {"type": "string"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"summary": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"total_keywords": {"type": "number"},
|
||||
"high_opportunity_keywords": {"type": "number"},
|
||||
"recommended_focus_areas": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
return response
|
||||
except Exception as e:
|
||||
st.error(f"Error extracting keywords: {e}")
|
||||
return []
|
||||
|
||||
def _analyze_search_intent(self, ai_insights: dict) -> dict:
|
||||
"""
|
||||
Analyze search intent from AI insights.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
dict: Search intent analysis
|
||||
"""
|
||||
prompt = f"""
|
||||
As an SEO expert, analyze the following content insights and determine the search intent:
|
||||
|
||||
Content Insights: {ai_insights}
|
||||
|
||||
Please provide a JSON response with the following structure:
|
||||
{{
|
||||
"informational": [
|
||||
{{
|
||||
"keyword": "string",
|
||||
"intent_type": "string",
|
||||
"content_suggestions": ["string"]
|
||||
}}
|
||||
],
|
||||
"transactional": [
|
||||
{{
|
||||
"keyword": "string",
|
||||
"intent_type": "string",
|
||||
"content_suggestions": ["string"]
|
||||
}}
|
||||
],
|
||||
"navigational": [
|
||||
{{
|
||||
"keyword": "string",
|
||||
"intent_type": "string",
|
||||
"content_suggestions": ["string"]
|
||||
}}
|
||||
],
|
||||
"summary": {{
|
||||
"dominant_intent": "string",
|
||||
"content_strategy_recommendations": ["string"]
|
||||
}}
|
||||
}}
|
||||
|
||||
Focus on:
|
||||
1. Identifying primary search intent for each keyword
|
||||
2. Suggesting appropriate content types
|
||||
3. Providing content strategy recommendations
|
||||
4. Analyzing user behavior patterns
|
||||
"""
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt, json_struct={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"informational": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"keyword": {"type": "string"},
|
||||
"intent_type": {"type": "string"},
|
||||
"content_suggestions": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"transactional": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"keyword": {"type": "string"},
|
||||
"intent_type": {"type": "string"},
|
||||
"content_suggestions": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"navigational": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"keyword": {"type": "string"},
|
||||
"intent_type": {"type": "string"},
|
||||
"content_suggestions": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"summary": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"dominant_intent": {"type": "string"},
|
||||
"content_strategy_recommendations": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
return response
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing search intent: {e}")
|
||||
return {
|
||||
'informational': [],
|
||||
'transactional': [],
|
||||
'navigational': []
|
||||
}
|
||||
|
||||
def _suggest_content_formats(self, ai_insights: dict) -> list:
|
||||
"""
|
||||
Suggest content formats based on AI insights.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
list: Suggested content formats
|
||||
"""
|
||||
prompt = f"""
|
||||
As a content strategy expert, analyze the following insights and suggest appropriate content formats:
|
||||
|
||||
AI Insights: {ai_insights}
|
||||
|
||||
Please provide a JSON response with the following structure:
|
||||
{{
|
||||
"content_formats": [
|
||||
{{
|
||||
"format": "string",
|
||||
"description": "string",
|
||||
"use_cases": ["string"],
|
||||
"recommended_topics": ["string"],
|
||||
"estimated_impact": "string"
|
||||
}}
|
||||
],
|
||||
"format_strategy": {{
|
||||
"primary_formats": ["string"],
|
||||
"secondary_formats": ["string"],
|
||||
"implementation_priority": ["string"]
|
||||
}}
|
||||
}}
|
||||
|
||||
Focus on:
|
||||
1. Identifying the most effective content formats
|
||||
2. Matching formats to user intent
|
||||
3. Suggesting specific use cases
|
||||
4. Providing implementation guidance
|
||||
"""
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt, json_struct={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content_formats": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"format": {"type": "string"},
|
||||
"description": {"type": "string"},
|
||||
"use_cases": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"recommended_topics": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"estimated_impact": {"type": "string"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"format_strategy": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"primary_formats": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"secondary_formats": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"implementation_priority": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
return response
|
||||
except Exception as e:
|
||||
st.error(f"Error suggesting content formats: {e}")
|
||||
return []
|
||||
|
||||
def _create_topic_clusters(self, ai_insights: dict) -> dict:
|
||||
"""
|
||||
Create topic clusters from AI insights.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
dict: Topic clusters and relationships
|
||||
"""
|
||||
prompt = f"""
|
||||
As a content organization expert, analyze the following insights and create topic clusters:
|
||||
|
||||
AI Insights: {ai_insights}
|
||||
|
||||
Please provide a JSON response with the following structure:
|
||||
{{
|
||||
"clusters": [
|
||||
{{
|
||||
"cluster_name": "string",
|
||||
"main_topics": ["string"],
|
||||
"subtopics": ["string"],
|
||||
"related_keywords": ["string"],
|
||||
"content_opportunities": ["string"]
|
||||
}}
|
||||
],
|
||||
"relationships": {{
|
||||
"cluster_connections": [
|
||||
{{
|
||||
"source": "string",
|
||||
"target": "string",
|
||||
"relationship_type": "string",
|
||||
"strength": "number"
|
||||
}}
|
||||
],
|
||||
"content_hierarchy": {{
|
||||
"primary_topics": ["string"],
|
||||
"secondary_topics": ["string"],
|
||||
"tertiary_topics": ["string"]
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
|
||||
Focus on:
|
||||
1. Identifying main topic clusters
|
||||
2. Organizing subtopics and related keywords
|
||||
3. Mapping relationships between clusters
|
||||
4. Suggesting content opportunities
|
||||
"""
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt, json_struct={
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"clusters": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cluster_name": {"type": "string"},
|
||||
"main_topics": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"subtopics": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"related_keywords": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"content_opportunities": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"relationships": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cluster_connections": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"source": {"type": "string"},
|
||||
"target": {"type": "string"},
|
||||
"relationship_type": {"type": "string"},
|
||||
"strength": {"type": "number"}
|
||||
}
|
||||
}
|
||||
},
|
||||
"content_hierarchy": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"primary_topics": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"secondary_topics": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
},
|
||||
"tertiary_topics": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
return response
|
||||
except Exception as e:
|
||||
st.error(f"Error creating topic clusters: {e}")
|
||||
return {
|
||||
'clusters': [],
|
||||
'relationships': {}
|
||||
}
|
||||
@@ -1,361 +0,0 @@
|
||||
"""
|
||||
Main module for content gap analysis.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from .competitor_analyzer import CompetitorAnalyzer
|
||||
from .keyword_researcher import KeywordResearcher
|
||||
from .recommendation_engine import RecommendationEngine
|
||||
from .utils.ai_processor import AIProcessor, ProgressTracker
|
||||
from .utils.storage import ContentGapAnalysisStorage
|
||||
from datetime import datetime
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
from .utils.content_parser import ContentParser
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/content_gap_analysis.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
class ContentGapAnalysis:
|
||||
"""Main class for content gap analysis."""
|
||||
|
||||
def __init__(self, db_session=None):
|
||||
"""Initialize the content gap analysis components."""
|
||||
self.website_analyzer = WebsiteAnalyzer()
|
||||
self.competitor_analyzer = CompetitorAnalyzer()
|
||||
self.keyword_researcher = KeywordResearcher()
|
||||
self.recommendation_engine = RecommendationEngine()
|
||||
self.ai_processor = AIProcessor()
|
||||
self.progress = ProgressTracker()
|
||||
self.storage = ContentGapAnalysisStorage(db_session) if db_session else None
|
||||
|
||||
# Define analysis phases
|
||||
self.phases = {
|
||||
'website_analysis': {
|
||||
'name': 'Website Analysis',
|
||||
'steps': [
|
||||
'Initializing website analysis',
|
||||
'Analyzing website content',
|
||||
'Evaluating SEO elements',
|
||||
'Generating website insights'
|
||||
]
|
||||
},
|
||||
'competitor_analysis': {
|
||||
'name': 'Competitor Analysis',
|
||||
'steps': [
|
||||
'Initializing competitor analysis',
|
||||
'Analyzing competitor content',
|
||||
'Comparing market position',
|
||||
'Generating competitive insights'
|
||||
]
|
||||
},
|
||||
'keyword_analysis': {
|
||||
'name': 'Keyword Analysis',
|
||||
'steps': [
|
||||
'Initializing keyword research',
|
||||
'Analyzing keyword trends',
|
||||
'Evaluating search intent',
|
||||
'Generating keyword insights'
|
||||
]
|
||||
},
|
||||
'recommendation_generation': {
|
||||
'name': 'Recommendation Generation',
|
||||
'steps': [
|
||||
'Initializing recommendation engine',
|
||||
'Analyzing content gaps',
|
||||
'Generating recommendations',
|
||||
'Creating implementation plan'
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("ContentGapAnalysis initialized")
|
||||
|
||||
def analyze(self, url: str, industry: str, competitor_urls: Optional[List[str]] = None, user_id: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Run the complete content gap analysis workflow.
|
||||
|
||||
Args:
|
||||
url: Target website URL
|
||||
industry: Industry category
|
||||
competitor_urls: Optional list of competitor URLs
|
||||
user_id: Optional user ID for storing results
|
||||
|
||||
Returns:
|
||||
Dictionary containing analysis results
|
||||
"""
|
||||
try:
|
||||
results = {}
|
||||
start_time = datetime.utcnow()
|
||||
|
||||
# Phase 1: Website Analysis
|
||||
self.progress.start_stage('website_analysis')
|
||||
self.progress.next_step()
|
||||
|
||||
website_analysis = self.website_analyzer.analyze(url)
|
||||
results['website'] = website_analysis
|
||||
|
||||
self.progress.next_step()
|
||||
self.progress.complete_stage()
|
||||
|
||||
# Phase 2: Competitor Analysis
|
||||
if competitor_urls:
|
||||
self.progress.start_stage('competitor_analysis')
|
||||
self.progress.next_step()
|
||||
|
||||
competitor_analysis = self.competitor_analyzer.analyze(competitor_urls, industry)
|
||||
results['competitors'] = competitor_analysis
|
||||
|
||||
self.progress.next_step()
|
||||
self.progress.complete_stage()
|
||||
|
||||
# Phase 3: Keyword Analysis
|
||||
self.progress.start_stage('keyword_analysis')
|
||||
self.progress.next_step()
|
||||
|
||||
keyword_analysis = self.keyword_researcher.analyze(industry, url)
|
||||
results['keywords'] = keyword_analysis
|
||||
|
||||
self.progress.next_step()
|
||||
self.progress.complete_stage()
|
||||
|
||||
# Phase 4: Recommendation Generation
|
||||
self.progress.start_stage('recommendation_generation')
|
||||
self.progress.next_step()
|
||||
|
||||
recommendations = self.recommendation_engine.generate_recommendations(
|
||||
website_analysis,
|
||||
competitor_analysis if competitor_urls else None,
|
||||
keyword_analysis
|
||||
)
|
||||
results['recommendations'] = recommendations
|
||||
|
||||
self.progress.next_step()
|
||||
self.progress.complete_stage()
|
||||
|
||||
# Calculate analysis duration
|
||||
end_time = datetime.utcnow()
|
||||
results['duration'] = (end_time - start_time).total_seconds()
|
||||
|
||||
# Store results if user_id is provided and storage is available
|
||||
if user_id and self.storage:
|
||||
analysis_id = self.storage.save_analysis(user_id, url, industry, results)
|
||||
if analysis_id:
|
||||
results['analysis_id'] = analysis_id
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
if self.progress.current_stage:
|
||||
self.progress.update_progress(0, f"Error in {self.progress.stages[self.progress.current_stage]['name']}: {str(e)}")
|
||||
st.error(f"Error in content gap analysis: {str(e)}")
|
||||
return {
|
||||
'error': str(e),
|
||||
'website': {},
|
||||
'competitors': [],
|
||||
'keywords': {},
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
def get_analysis(self, analysis_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve stored analysis results.
|
||||
|
||||
Args:
|
||||
analysis_id: Analysis ID
|
||||
|
||||
Returns:
|
||||
Dictionary containing analysis results if found, None otherwise
|
||||
"""
|
||||
if not self.storage:
|
||||
st.error("Storage not initialized")
|
||||
return None
|
||||
return self.storage.get_analysis(analysis_id)
|
||||
|
||||
def get_user_analyses(self, user_id: int) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all analyses for a user.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
|
||||
Returns:
|
||||
List of analysis summaries
|
||||
"""
|
||||
if not self.storage:
|
||||
st.error("Storage not initialized")
|
||||
return []
|
||||
return self.storage.get_user_analyses(user_id)
|
||||
|
||||
def update_recommendation_status(self, recommendation_id: int, status: str) -> bool:
|
||||
"""
|
||||
Update the status of a recommendation.
|
||||
|
||||
Args:
|
||||
recommendation_id: Recommendation ID
|
||||
status: New status
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
if not self.storage:
|
||||
st.error("Storage not initialized")
|
||||
return False
|
||||
return self.storage.update_recommendation_status(recommendation_id, status)
|
||||
|
||||
def delete_analysis(self, analysis_id: int) -> bool:
|
||||
"""
|
||||
Delete an analysis and all related data.
|
||||
|
||||
Args:
|
||||
analysis_id: Analysis ID
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
if not self.storage:
|
||||
st.error("Storage not initialized")
|
||||
return False
|
||||
return self.storage.delete_analysis(analysis_id)
|
||||
|
||||
def get_analysis_summary(self, results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate a summary of the analysis results.
|
||||
|
||||
Args:
|
||||
results: Dictionary containing analysis results
|
||||
|
||||
Returns:
|
||||
Dictionary containing summary metrics and insights
|
||||
"""
|
||||
try:
|
||||
self.progress.start_stage('summary_generation')
|
||||
self.progress.next_step()
|
||||
|
||||
summary = {
|
||||
'website_metrics': self._summarize_website_metrics(results.get('website', {})),
|
||||
'competitor_insights': self._summarize_competitor_insights(results.get('competitors', {})),
|
||||
'keyword_opportunities': self._summarize_keyword_opportunities(results.get('keywords', {})),
|
||||
'recommendation_highlights': self._summarize_recommendations(results.get('recommendations', {})),
|
||||
'ai_insights': results.get('ai_insights', {})
|
||||
}
|
||||
|
||||
self.progress.complete_stage()
|
||||
return summary
|
||||
|
||||
except Exception as e:
|
||||
if self.progress.current_stage:
|
||||
self.progress.update_progress(0, f"Error generating summary: {str(e)}")
|
||||
st.error(f"Error generating analysis summary: {str(e)}")
|
||||
return {
|
||||
'error': str(e),
|
||||
'website_metrics': {},
|
||||
'competitor_insights': {},
|
||||
'keyword_opportunities': {},
|
||||
'recommendation_highlights': {},
|
||||
'ai_insights': {}
|
||||
}
|
||||
|
||||
def export_results(self, results: Dict[str, Any], format: str = 'json') -> str:
|
||||
"""
|
||||
Export analysis results in the specified format.
|
||||
|
||||
Args:
|
||||
results: Dictionary containing analysis results
|
||||
format: Export format ('json' or 'csv')
|
||||
|
||||
Returns:
|
||||
String containing exported results
|
||||
"""
|
||||
try:
|
||||
self.progress.start_stage('export')
|
||||
self.progress.next_step()
|
||||
|
||||
if format.lower() == 'json':
|
||||
import json
|
||||
exported = json.dumps(results, indent=2)
|
||||
elif format.lower() == 'csv':
|
||||
import pandas as pd
|
||||
# Convert results to DataFrame and then to CSV
|
||||
df = pd.DataFrame(results)
|
||||
exported = df.to_csv(index=False)
|
||||
else:
|
||||
raise ValueError(f"Unsupported export format: {format}")
|
||||
|
||||
self.progress.complete_stage()
|
||||
return exported
|
||||
|
||||
except Exception as e:
|
||||
if self.progress.current_stage:
|
||||
self.progress.update_progress(0, f"Error exporting results: {str(e)}")
|
||||
st.error(f"Error exporting results: {str(e)}")
|
||||
return str(e)
|
||||
|
||||
def _summarize_website_metrics(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate summary of website metrics."""
|
||||
try:
|
||||
return {
|
||||
'content_score': website_data.get('content_score', 0),
|
||||
'seo_score': website_data.get('seo_score', 0),
|
||||
'structure_score': website_data.get('structure_score', 0),
|
||||
'key_insights': website_data.get('insights', [])[:5] # Top 5 insights
|
||||
}
|
||||
except Exception as e:
|
||||
st.error(f"Error summarizing website metrics: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _summarize_competitor_insights(self, competitor_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate summary of competitor insights."""
|
||||
try:
|
||||
return {
|
||||
'market_position': competitor_data.get('market_position', {}),
|
||||
'content_gaps': competitor_data.get('content_gaps', [])[:5], # Top 5 gaps
|
||||
'competitive_advantages': competitor_data.get('advantages', [])[:5] # Top 5 advantages
|
||||
}
|
||||
except Exception as e:
|
||||
st.error(f"Error summarizing competitor insights: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _summarize_keyword_opportunities(self, keyword_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate summary of keyword opportunities."""
|
||||
try:
|
||||
return {
|
||||
'top_keywords': keyword_data.get('top_keywords', [])[:10], # Top 10 keywords
|
||||
'search_intent': keyword_data.get('search_intent', {}),
|
||||
'opportunities': keyword_data.get('opportunities', [])[:5] # Top 5 opportunities
|
||||
}
|
||||
except Exception as e:
|
||||
st.error(f"Error summarizing keyword opportunities: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _summarize_recommendations(self, recommendation_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Generate summary of recommendations."""
|
||||
try:
|
||||
return {
|
||||
'priority_recommendations': recommendation_data.get('priority_recommendations', [])[:5], # Top 5 recommendations
|
||||
'implementation_timeline': recommendation_data.get('timeline', {}),
|
||||
'expected_impact': recommendation_data.get('impact', {})
|
||||
}
|
||||
except Exception as e:
|
||||
st.error(f"Error summarizing recommendations: {str(e)}")
|
||||
return {}
|
||||
@@ -1,41 +0,0 @@
|
||||
"""
|
||||
Navigation component for Content Gap Analysis tool.
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
|
||||
def show_content_gap_analysis_nav():
|
||||
"""Show navigation for Content Gap Analysis tool."""
|
||||
st.sidebar.title("Content Gap Analysis")
|
||||
st.sidebar.markdown("""
|
||||
Analyze your content strategy, identify gaps, and get AI-powered recommendations.
|
||||
""")
|
||||
|
||||
# Navigation options
|
||||
nav_option = st.sidebar.radio(
|
||||
"Select Analysis Type",
|
||||
["Website Analysis", "Competitor Analysis", "Keyword Research", "Recommendations"]
|
||||
)
|
||||
|
||||
# Tool description
|
||||
st.sidebar.markdown("""
|
||||
### Features
|
||||
- Website content analysis
|
||||
- Competitor content comparison
|
||||
- Keyword research and trends
|
||||
- AI-powered recommendations
|
||||
- Content gap identification
|
||||
- Implementation timeline
|
||||
""")
|
||||
|
||||
# Help section
|
||||
with st.sidebar.expander("How to Use"):
|
||||
st.markdown("""
|
||||
1. Start with Website Analysis
|
||||
2. Add competitor URLs
|
||||
3. Research keywords
|
||||
4. Get recommendations
|
||||
5. Export results
|
||||
""")
|
||||
|
||||
return nav_option
|
||||
@@ -1,440 +0,0 @@
|
||||
"""
|
||||
Recommendation engine for content gap analysis.
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
from typing import Dict, Any, List, Optional
|
||||
from loguru import logger
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.data_collector import DataCollector
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.content_parser import ContentParser
|
||||
from lib.ai_seo_tools.content_gap_analysis.utils.ai_processor import AIProcessor, ProgressTracker
|
||||
from lib.ai_seo_tools.content_title_generator import ai_title_generator
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/recommendation_engine.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
class RecommendationEngine:
|
||||
"""
|
||||
Generates content recommendations based on analysis results.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the recommendation engine with required components."""
|
||||
self.ai_processor = AIProcessor()
|
||||
self.progress = ProgressTracker()
|
||||
|
||||
# Define analysis stages
|
||||
self.stages = {
|
||||
'recommendation_generation': {
|
||||
'name': 'Recommendation Generation',
|
||||
'steps': [
|
||||
'Initializing recommendation engine',
|
||||
'Analyzing content gaps',
|
||||
'Evaluating opportunities',
|
||||
'Generating recommendations',
|
||||
'Creating implementation plan'
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
def generate_recommendations(self, website_analysis: Dict[str, Any], competitor_analysis: Optional[Dict[str, Any]], keyword_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate content recommendations.
|
||||
|
||||
Args:
|
||||
website_analysis: Website analysis results
|
||||
competitor_analysis: Optional competitor analysis results
|
||||
keyword_analysis: Keyword analysis results
|
||||
|
||||
Returns:
|
||||
Dictionary containing recommendations
|
||||
"""
|
||||
try:
|
||||
self.progress.start_stage('recommendation_generation')
|
||||
self.progress.next_step()
|
||||
|
||||
# Analyze content gaps
|
||||
content_gaps = self._analyze_content_gaps(website_analysis, competitor_analysis, keyword_analysis)
|
||||
self.progress.next_step()
|
||||
|
||||
# Evaluate opportunities
|
||||
opportunities = self._evaluate_opportunities(content_gaps, keyword_analysis)
|
||||
self.progress.next_step()
|
||||
|
||||
# Generate recommendations
|
||||
recommendations = self._generate_recommendations(content_gaps, opportunities)
|
||||
self.progress.next_step()
|
||||
|
||||
# Create implementation plan
|
||||
implementation_plan = self._create_implementation_plan(recommendations)
|
||||
self.progress.next_step()
|
||||
|
||||
self.progress.complete_stage()
|
||||
|
||||
return {
|
||||
'content_gaps': content_gaps,
|
||||
'opportunities': opportunities,
|
||||
'recommendations': recommendations,
|
||||
'implementation_plan': implementation_plan
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
if self.progress.current_stage:
|
||||
self.progress.update_progress(0, f"Error in {self.progress.stages[self.progress.current_stage]['name']}: {str(e)}")
|
||||
st.error(f"Error generating recommendations: {str(e)}")
|
||||
return {
|
||||
'error': str(e),
|
||||
'content_gaps': [],
|
||||
'opportunities': [],
|
||||
'recommendations': [],
|
||||
'implementation_plan': {}
|
||||
}
|
||||
|
||||
def _analyze_content_gaps(self, website_analysis: Dict[str, Any], competitor_analysis: Optional[Dict[str, Any]], keyword_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Analyze content gaps."""
|
||||
try:
|
||||
content_gaps = []
|
||||
|
||||
# Analyze website content gaps
|
||||
website_gaps = self._analyze_website_gaps(website_analysis)
|
||||
content_gaps.extend(website_gaps)
|
||||
|
||||
# Analyze competitor gaps if available
|
||||
if competitor_analysis:
|
||||
competitor_gaps = self._analyze_competitor_gaps(competitor_analysis)
|
||||
content_gaps.extend(competitor_gaps)
|
||||
|
||||
# Analyze keyword gaps
|
||||
keyword_gaps = self._analyze_keyword_gaps(keyword_analysis)
|
||||
content_gaps.extend(keyword_gaps)
|
||||
|
||||
return content_gaps
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing content gaps: {str(e)}")
|
||||
return []
|
||||
|
||||
def _analyze_website_gaps(self, website_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Analyze website content gaps."""
|
||||
try:
|
||||
gaps = []
|
||||
|
||||
# Check content quality
|
||||
quality_metrics = website_analysis.get('quality_metrics', {})
|
||||
if quality_metrics.get('readability_score', 0) < 70:
|
||||
gaps.append({
|
||||
'type': 'content_quality',
|
||||
'issue': 'Low readability score',
|
||||
'score': quality_metrics.get('readability_score', 0),
|
||||
'recommendation': 'Improve content readability'
|
||||
})
|
||||
|
||||
# Check SEO elements
|
||||
seo_metrics = website_analysis.get('seo_metrics', {})
|
||||
if seo_metrics.get('seo_score', 0) < 70:
|
||||
gaps.append({
|
||||
'type': 'seo',
|
||||
'issue': 'Low SEO score',
|
||||
'score': seo_metrics.get('seo_score', 0),
|
||||
'recommendation': 'Enhance SEO optimization'
|
||||
})
|
||||
|
||||
return gaps
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing website gaps: {str(e)}")
|
||||
return []
|
||||
|
||||
def _analyze_competitor_gaps(self, competitor_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Analyze competitor content gaps."""
|
||||
try:
|
||||
gaps = []
|
||||
|
||||
# Check content gaps
|
||||
content_gaps = competitor_analysis.get('content_gaps', [])
|
||||
for gap in content_gaps:
|
||||
gaps.append({
|
||||
'type': 'competitor',
|
||||
'issue': f"Missing topic: {', '.join(gap.get('missing_topics', []))}",
|
||||
'recommendation': 'Create content for missing topics'
|
||||
})
|
||||
|
||||
return gaps
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing competitor gaps: {str(e)}")
|
||||
return []
|
||||
|
||||
def _analyze_keyword_gaps(self, keyword_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Analyze keyword gaps."""
|
||||
try:
|
||||
gaps = []
|
||||
|
||||
# Check keyword opportunities
|
||||
opportunities = keyword_analysis.get('opportunities', [])
|
||||
for opportunity in opportunities:
|
||||
gaps.append({
|
||||
'type': 'keyword',
|
||||
'issue': f"Keyword opportunity: {opportunity.get('keyword')}",
|
||||
'volume': opportunity.get('volume', 0),
|
||||
'difficulty': opportunity.get('difficulty', 0),
|
||||
'recommendation': f"Target keyword: {opportunity.get('keyword')}"
|
||||
})
|
||||
|
||||
return gaps
|
||||
except Exception as e:
|
||||
st.error(f"Error analyzing keyword gaps: {str(e)}")
|
||||
return []
|
||||
|
||||
def _evaluate_opportunities(self, content_gaps: List[Dict[str, Any]], keyword_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Evaluate content opportunities."""
|
||||
try:
|
||||
opportunities = []
|
||||
|
||||
# Evaluate each gap
|
||||
for gap in content_gaps:
|
||||
# Calculate priority score
|
||||
priority_score = self._calculate_priority_score(gap, keyword_analysis)
|
||||
|
||||
if priority_score > 50: # Threshold for good opportunities
|
||||
opportunities.append({
|
||||
'type': gap.get('type'),
|
||||
'issue': gap.get('issue'),
|
||||
'recommendation': gap.get('recommendation'),
|
||||
'priority_score': priority_score
|
||||
})
|
||||
|
||||
# Sort by priority score
|
||||
opportunities.sort(key=lambda x: x['priority_score'], reverse=True)
|
||||
|
||||
return opportunities
|
||||
except Exception as e:
|
||||
st.error(f"Error evaluating opportunities: {str(e)}")
|
||||
return []
|
||||
|
||||
def _calculate_priority_score(self, gap: Dict[str, Any], keyword_analysis: Dict[str, Any]) -> float:
|
||||
"""Calculate priority score for a gap."""
|
||||
try:
|
||||
base_score = 0
|
||||
|
||||
# Base score based on gap type
|
||||
if gap.get('type') == 'content_quality':
|
||||
base_score = 70
|
||||
elif gap.get('type') == 'seo':
|
||||
base_score = 80
|
||||
elif gap.get('type') == 'competitor':
|
||||
base_score = 60
|
||||
elif gap.get('type') == 'keyword':
|
||||
base_score = 50
|
||||
|
||||
# Adjust score based on keyword data
|
||||
if gap.get('type') == 'keyword':
|
||||
keyword = gap.get('issue', '').split(': ')[-1]
|
||||
keyword_data = keyword_analysis.get('trend_analysis', {}).get('trends', {}).get(keyword, {})
|
||||
if keyword_data:
|
||||
base_score += keyword_data.get('volume', 0) * 0.1
|
||||
base_score -= keyword_data.get('difficulty', 0) * 0.2
|
||||
|
||||
return min(100, max(0, base_score))
|
||||
except Exception as e:
|
||||
st.error(f"Error calculating priority score: {str(e)}")
|
||||
return 0
|
||||
|
||||
def _generate_recommendations(self, content_gaps: List[Dict[str, Any]], opportunities: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Generate content recommendations."""
|
||||
try:
|
||||
recommendations = []
|
||||
|
||||
# Generate recommendations for each opportunity
|
||||
for opportunity in opportunities:
|
||||
recommendations.append({
|
||||
'type': opportunity.get('type'),
|
||||
'issue': opportunity.get('issue'),
|
||||
'recommendation': opportunity.get('recommendation'),
|
||||
'priority': opportunity.get('priority_score', 0),
|
||||
'implementation_steps': self._generate_implementation_steps(opportunity)
|
||||
})
|
||||
|
||||
return recommendations
|
||||
except Exception as e:
|
||||
st.error(f"Error generating recommendations: {str(e)}")
|
||||
return []
|
||||
|
||||
def _generate_implementation_steps(self, opportunity: Dict[str, Any]) -> List[str]:
|
||||
"""Generate implementation steps for a recommendation."""
|
||||
try:
|
||||
steps = []
|
||||
|
||||
if opportunity.get('type') == 'content_quality':
|
||||
steps = [
|
||||
'Review current content structure',
|
||||
'Improve readability and formatting',
|
||||
'Enhance content organization',
|
||||
'Update content based on best practices'
|
||||
]
|
||||
elif opportunity.get('type') == 'seo':
|
||||
steps = [
|
||||
'Audit current SEO implementation',
|
||||
'Optimize meta tags and descriptions',
|
||||
'Improve content structure for SEO',
|
||||
'Implement technical SEO improvements'
|
||||
]
|
||||
elif opportunity.get('type') == 'competitor':
|
||||
steps = [
|
||||
'Research competitor content',
|
||||
'Identify unique value proposition',
|
||||
'Create content for missing topics',
|
||||
'Optimize content for target keywords'
|
||||
]
|
||||
elif opportunity.get('type') == 'keyword':
|
||||
steps = [
|
||||
'Research keyword intent',
|
||||
'Create content strategy',
|
||||
'Develop content for target keyword',
|
||||
'Optimize content for search'
|
||||
]
|
||||
|
||||
return steps
|
||||
except Exception as e:
|
||||
st.error(f"Error generating implementation steps: {str(e)}")
|
||||
return []
|
||||
|
||||
def _create_implementation_plan(self, recommendations: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Create implementation plan."""
|
||||
try:
|
||||
plan = {
|
||||
'phases': [],
|
||||
'timeline': {},
|
||||
'resources': {},
|
||||
'success_metrics': {}
|
||||
}
|
||||
|
||||
# Create phases based on recommendation types
|
||||
phases = {
|
||||
'content_quality': 'Content Enhancement',
|
||||
'seo': 'SEO Optimization',
|
||||
'competitor': 'Competitive Content',
|
||||
'keyword': 'Keyword Targeting'
|
||||
}
|
||||
|
||||
# Group recommendations by phase
|
||||
for phase_name in phases.values():
|
||||
phase_recommendations = [
|
||||
rec for rec in recommendations
|
||||
if phases.get(rec.get('type')) == phase_name
|
||||
]
|
||||
|
||||
if phase_recommendations:
|
||||
plan['phases'].append({
|
||||
'name': phase_name,
|
||||
'recommendations': phase_recommendations,
|
||||
'duration': '2-4 weeks',
|
||||
'resources': ['Content team', 'SEO team'],
|
||||
'success_metrics': [
|
||||
'Content quality score',
|
||||
'SEO performance',
|
||||
'User engagement'
|
||||
]
|
||||
})
|
||||
|
||||
return plan
|
||||
except Exception as e:
|
||||
st.error(f"Error creating implementation plan: {str(e)}")
|
||||
return {}
|
||||
|
||||
def _generate_content_topics(self, ai_insights: dict) -> list:
|
||||
"""
|
||||
Generate content topic suggestions.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
list: Content topic suggestions
|
||||
"""
|
||||
# TODO: Implement content topic generation
|
||||
return []
|
||||
|
||||
def _suggest_content_formats(self, ai_insights: dict) -> list:
|
||||
"""
|
||||
Suggest content formats based on analysis.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
list: Content format suggestions
|
||||
"""
|
||||
# TODO: Implement content format suggestions
|
||||
return []
|
||||
|
||||
def _calculate_priority_scores(self, ai_insights: dict) -> dict:
|
||||
"""
|
||||
Calculate priority scores for recommendations.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
dict: Priority scores for each recommendation
|
||||
"""
|
||||
# TODO: Implement priority scoring
|
||||
return {}
|
||||
|
||||
def _create_timeline(self, ai_insights: dict) -> dict:
|
||||
"""
|
||||
Create implementation timeline for recommendations.
|
||||
|
||||
Args:
|
||||
ai_insights (dict): AI-processed insights
|
||||
|
||||
Returns:
|
||||
dict: Implementation timeline
|
||||
"""
|
||||
# TODO: Implement timeline creation
|
||||
return {
|
||||
'short_term': [],
|
||||
'medium_term': [],
|
||||
'long_term': []
|
||||
}
|
||||
|
||||
def _generate_specific_suggestions(self, recommendations: dict, analysis_results: dict) -> dict:
|
||||
"""
|
||||
Generate specific content suggestions using existing tools.
|
||||
|
||||
Args:
|
||||
recommendations (dict): General recommendations
|
||||
analysis_results (dict): Analysis results
|
||||
|
||||
Returns:
|
||||
dict: Specific content suggestions
|
||||
"""
|
||||
suggestions = {}
|
||||
|
||||
# Generate titles for suggested topics
|
||||
for topic in recommendations['content_topics']:
|
||||
suggestions[topic] = {
|
||||
'titles': ai_title_generator(topic),
|
||||
'meta_descriptions': metadesc_generator_main(topic),
|
||||
'structured_data': ai_structured_data(topic)
|
||||
}
|
||||
|
||||
return suggestions
|
||||
@@ -1,769 +0,0 @@
|
||||
"""
|
||||
Streamlit UI for Content Gap Analysis workflow.
|
||||
"""
|
||||
|
||||
import streamlit as st
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
import plotly.graph_objects as go
|
||||
import json
|
||||
from datetime import datetime
|
||||
from .main import ContentGapAnalysis
|
||||
from .keyword_researcher import KeywordResearcher
|
||||
from .competitor_analyzer import CompetitorAnalyzer
|
||||
from .website_analyzer import WebsiteAnalyzer
|
||||
from .recommendation_engine import RecommendationEngine
|
||||
from .utils.ai_processor import AIProcessor
|
||||
from .navigation import show_content_gap_analysis_nav
|
||||
from typing import Dict, Any
|
||||
import logging
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ContentGapAnalysisUI:
|
||||
"""Streamlit UI for Content Gap Analysis workflow."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the UI components."""
|
||||
# Initialize session state for progress tracking
|
||||
if 'current_step' not in st.session_state:
|
||||
st.session_state.current_step = 1
|
||||
if 'analysis_results' not in st.session_state:
|
||||
st.session_state.analysis_results = {}
|
||||
|
||||
# Initialize analysis components
|
||||
self.analyzer = ContentGapAnalysis()
|
||||
self.keyword_researcher = KeywordResearcher()
|
||||
self.competitor_analyzer = CompetitorAnalyzer()
|
||||
self.website_analyzer = WebsiteAnalyzer()
|
||||
self.recommendation_engine = RecommendationEngine()
|
||||
self.ai_processor = AIProcessor()
|
||||
|
||||
def run(self):
|
||||
"""Run the Streamlit interface."""
|
||||
try:
|
||||
# Show navigation
|
||||
nav_option = show_content_gap_analysis_nav()
|
||||
|
||||
# Main content area
|
||||
st.title("Content Gap Analysis")
|
||||
st.markdown("""
|
||||
This tool helps you identify content gaps and opportunities by analyzing your website,
|
||||
competitors, and market trends. Follow the steps below to get started.
|
||||
""")
|
||||
|
||||
# Progress tracking
|
||||
self._show_progress()
|
||||
|
||||
# Main workflow steps
|
||||
if nav_option == "Website Analysis" or st.session_state.current_step == 1:
|
||||
self._website_analysis_step()
|
||||
elif nav_option == "Competitor Analysis" or st.session_state.current_step == 2:
|
||||
self._competitor_analysis_step()
|
||||
elif nav_option == "Keyword Research" or st.session_state.current_step == 3:
|
||||
self._keyword_research_step()
|
||||
elif nav_option == "Recommendations" or st.session_state.current_step == 4:
|
||||
self._recommendations_step()
|
||||
else:
|
||||
self._export_results()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in run method: {str(e)}", exc_info=True)
|
||||
st.error(f"An error occurred: {str(e)}")
|
||||
|
||||
def _show_progress(self):
|
||||
"""Display progress tracking."""
|
||||
steps = [
|
||||
"Website Analysis",
|
||||
"Competitor Analysis",
|
||||
"Keyword Research",
|
||||
"Recommendations",
|
||||
"Export Results"
|
||||
]
|
||||
|
||||
progress = st.session_state.current_step / len(steps)
|
||||
st.progress(progress)
|
||||
|
||||
cols = st.columns(len(steps))
|
||||
for i, col in enumerate(cols):
|
||||
with col:
|
||||
if i + 1 < st.session_state.current_step:
|
||||
st.success(f"✓ {steps[i]}")
|
||||
elif i + 1 == st.session_state.current_step:
|
||||
st.info(f"→ {steps[i]}")
|
||||
else:
|
||||
st.text(f"○ {steps[i]}")
|
||||
|
||||
def _website_analysis_step(self):
|
||||
"""Website analysis step UI."""
|
||||
try:
|
||||
st.header("Step 1: Website Analysis")
|
||||
|
||||
# Display previous results if they exist
|
||||
if 'website' in st.session_state.analysis_results:
|
||||
st.info("Previous analysis results found. You can analyze a new website or proceed to the next step.")
|
||||
self._display_website_analysis(st.session_state.analysis_results['website'])
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
if st.button("Analyze New Website"):
|
||||
st.session_state.analysis_results.pop('website', None)
|
||||
st.rerun()
|
||||
with col2:
|
||||
if st.button("Proceed to Competitor Analysis"):
|
||||
st.session_state.current_step = 2
|
||||
st.rerun()
|
||||
return
|
||||
|
||||
# Create form for new analysis
|
||||
with st.form("website_analysis_form"):
|
||||
website_url = st.text_input("Enter your website URL")
|
||||
industry = st.text_input("Enter your industry/niche")
|
||||
|
||||
submitted = st.form_submit_button("Analyze Website")
|
||||
|
||||
# Handle form submission outside the form
|
||||
if submitted and website_url and industry:
|
||||
# Initialize progress tracking
|
||||
if 'analysis_progress' not in st.session_state:
|
||||
st.session_state.analysis_progress = {
|
||||
'status': 'initializing',
|
||||
'current_step': 'Starting Analysis',
|
||||
'progress': 0,
|
||||
'details': 'Initializing analysis...'
|
||||
}
|
||||
|
||||
# Create progress container
|
||||
progress_container = st.empty()
|
||||
status_container = st.empty()
|
||||
details_container = st.empty()
|
||||
|
||||
# Update progress display
|
||||
def update_progress_display():
|
||||
progress = st.session_state.analysis_progress
|
||||
|
||||
# Update progress bar
|
||||
with progress_container:
|
||||
st.progress(progress['progress'] / 100)
|
||||
|
||||
# Update status
|
||||
with status_container:
|
||||
if progress['status'] == 'error':
|
||||
st.error(f"Error: {progress['current_step']}")
|
||||
elif progress['status'] == 'completed':
|
||||
st.success(f"✓ {progress['current_step']}")
|
||||
else:
|
||||
st.info(f"→ {progress['current_step']}")
|
||||
|
||||
# Update details
|
||||
with details_container:
|
||||
st.write(progress['details'])
|
||||
|
||||
# Initial progress display
|
||||
update_progress_display()
|
||||
|
||||
try:
|
||||
# Get basic analysis
|
||||
results = self.website_analyzer.analyze(website_url)
|
||||
|
||||
# Update progress from analyzer
|
||||
st.session_state.analysis_progress = self.website_analyzer.progress.get_progress()
|
||||
update_progress_display()
|
||||
|
||||
if isinstance(results, dict) and 'error' in results:
|
||||
st.error(f"Error in website analysis: {results['error']}")
|
||||
return
|
||||
|
||||
# Get AI-enhanced analysis
|
||||
st.session_state.analysis_progress.update({
|
||||
'current_step': 'AI Analysis',
|
||||
'progress': 95,
|
||||
'details': 'Performing AI-enhanced analysis...'
|
||||
})
|
||||
update_progress_display()
|
||||
|
||||
ai_analysis = self.ai_processor.analyze_content({
|
||||
'url': website_url,
|
||||
'industry': industry,
|
||||
'content': results
|
||||
})
|
||||
|
||||
# Combine results
|
||||
if isinstance(results, dict):
|
||||
results.update(ai_analysis)
|
||||
else:
|
||||
results = {'error': 'Invalid analysis results format'}
|
||||
|
||||
# Store results in session state
|
||||
st.session_state.analysis_results['website'] = results
|
||||
|
||||
# Update final progress
|
||||
st.session_state.analysis_progress.update({
|
||||
'status': 'completed',
|
||||
'current_step': 'Analysis Complete',
|
||||
'progress': 100,
|
||||
'details': 'Analysis completed successfully!'
|
||||
})
|
||||
update_progress_display()
|
||||
|
||||
# Display results
|
||||
self._display_website_analysis(results)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during website analysis: {str(e)}", exc_info=True)
|
||||
st.session_state.analysis_progress.update({
|
||||
'status': 'error',
|
||||
'current_step': 'Analysis Failed',
|
||||
'details': f"Error during website analysis: {str(e)}"
|
||||
})
|
||||
update_progress_display()
|
||||
st.error(f"Error during website analysis: {str(e)}")
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in website analysis step: {str(e)}", exc_info=True)
|
||||
st.error(f"Error in website analysis: {str(e)}")
|
||||
|
||||
def _display_website_analysis(self, results: Dict[str, Any]):
|
||||
"""Display website analysis results."""
|
||||
try:
|
||||
if not isinstance(results, dict):
|
||||
st.error("Invalid analysis results format")
|
||||
return
|
||||
|
||||
if 'error' in results:
|
||||
st.error(f"Error in analysis: {results['error']}")
|
||||
return
|
||||
|
||||
# Content Metrics
|
||||
st.subheader("Content Metrics")
|
||||
content_metrics = results.get('content_metrics', {})
|
||||
|
||||
if content_metrics:
|
||||
# Basic metrics in columns
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
with col1:
|
||||
st.metric("Word Count", f"{content_metrics.get('word_count', 0):,}")
|
||||
with col2:
|
||||
st.metric("Headings", f"{content_metrics.get('heading_count', 0):,}")
|
||||
with col3:
|
||||
st.metric("Images", f"{content_metrics.get('image_count', 0):,}")
|
||||
with col4:
|
||||
st.metric("Links", f"{content_metrics.get('link_count', 0):,}")
|
||||
|
||||
# Content Structure Visualization
|
||||
st.write("Content Structure")
|
||||
heading_data = {
|
||||
'Type': ['H1', 'H2', 'H3', 'Paragraphs'],
|
||||
'Count': [
|
||||
content_metrics.get('h1_count', 0),
|
||||
content_metrics.get('h2_count', 0),
|
||||
content_metrics.get('h3_count', 0),
|
||||
content_metrics.get('paragraph_count', 0)
|
||||
]
|
||||
}
|
||||
fig = px.bar(
|
||||
heading_data,
|
||||
x='Type',
|
||||
y='Count',
|
||||
title="Content Structure Distribution",
|
||||
color='Type',
|
||||
color_discrete_sequence=px.colors.qualitative.Set3
|
||||
)
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
# Content Features
|
||||
st.write("Content Features")
|
||||
features = {
|
||||
'Feature': ['Meta Description', 'Robots.txt', 'Sitemap'],
|
||||
'Status': [
|
||||
content_metrics.get('has_meta_description', False),
|
||||
content_metrics.get('has_robots_txt', False),
|
||||
content_metrics.get('has_sitemap', False)
|
||||
]
|
||||
}
|
||||
fig = px.bar(
|
||||
features,
|
||||
x='Feature',
|
||||
y='Status',
|
||||
title="Content Features Status",
|
||||
color='Status',
|
||||
color_discrete_sequence=['red', 'green']
|
||||
)
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
# SEO Metrics
|
||||
st.subheader("SEO Metrics")
|
||||
seo_metrics = results.get('seo_metrics', {})
|
||||
|
||||
if seo_metrics:
|
||||
# Basic metrics in columns
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
with col1:
|
||||
st.metric("Overall Score", f"{seo_metrics.get('overall_score', 0):.1f}%")
|
||||
with col2:
|
||||
content_quality = seo_metrics.get('content', {}).get('content_quality_score', 0)
|
||||
st.metric("Content Quality", f"{content_quality:.1f}%")
|
||||
with col3:
|
||||
readability = seo_metrics.get('content', {}).get('readability_score', 0)
|
||||
st.metric("Readability", f"{readability:.1f}%")
|
||||
with col4:
|
||||
keyword_density = seo_metrics.get('content', {}).get('keyword_density', 0)
|
||||
st.metric("Keyword Density", f"{keyword_density:.1f}%")
|
||||
|
||||
# SEO Scores Radar Chart
|
||||
seo_scores = {
|
||||
'Metric': ['Overall', 'Content Quality', 'Readability', 'Keyword Density'],
|
||||
'Score': [
|
||||
seo_metrics.get('overall_score', 0),
|
||||
content_quality,
|
||||
readability,
|
||||
keyword_density
|
||||
]
|
||||
}
|
||||
fig = px.line_polar(
|
||||
seo_scores,
|
||||
r='Score',
|
||||
theta='Metric',
|
||||
line_close=True,
|
||||
title="SEO Performance Overview"
|
||||
)
|
||||
fig.update_traces(fill='toself')
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
# Meta Tags Analysis
|
||||
st.write("Meta Tags Analysis")
|
||||
meta_tags = seo_metrics.get('meta_tags', {})
|
||||
if meta_tags:
|
||||
# Title Analysis
|
||||
title = meta_tags.get('title', {})
|
||||
st.write("Title Tag")
|
||||
st.write(f"Status: {'✅' if title.get('status') == 'good' else '❌'}")
|
||||
st.write(f"Value: {title.get('value', 'N/A')}")
|
||||
st.write(f"Length: {title.get('length', 0)} characters")
|
||||
st.write(f"Score: {title.get('score', 0)}%")
|
||||
if title.get('recommendation'):
|
||||
st.warning(title.get('recommendation'))
|
||||
|
||||
# Description Analysis
|
||||
desc = meta_tags.get('description', {})
|
||||
st.write("Meta Description")
|
||||
st.write(f"Status: {'✅' if desc.get('status') == 'good' else '❌'}")
|
||||
st.write(f"Value: {desc.get('value', 'N/A')}")
|
||||
st.write(f"Length: {desc.get('length', 0)} characters")
|
||||
st.write(f"Score: {desc.get('score', 0)}%")
|
||||
if desc.get('recommendation'):
|
||||
st.warning(desc.get('recommendation'))
|
||||
|
||||
# Keywords Analysis
|
||||
keywords = meta_tags.get('keywords', {})
|
||||
st.write("Meta Keywords")
|
||||
st.write(f"Status: {'✅' if keywords.get('status') == 'good' else '❌'}")
|
||||
st.write(f"Value: {keywords.get('value', 'N/A')}")
|
||||
if keywords.get('recommendation'):
|
||||
st.warning(keywords.get('recommendation'))
|
||||
|
||||
# Technical Metrics
|
||||
st.subheader("Technical Metrics")
|
||||
technical_info = results.get('technical_info', {})
|
||||
|
||||
if technical_info:
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
st.write("Basic Information")
|
||||
st.metric("Status Code", technical_info.get('status_code', 'N/A'))
|
||||
st.metric("Server", technical_info.get('server_info', {}).get('server', 'N/A'))
|
||||
st.metric("Content Type", technical_info.get('server_info', {}).get('content_type', 'N/A'))
|
||||
with col2:
|
||||
st.write("Security Information")
|
||||
security_info = technical_info.get('security_info', {})
|
||||
security_data = {
|
||||
'Feature': ['SSL', 'HSTS', 'XSS Protection'],
|
||||
'Status': [
|
||||
security_info.get('ssl', False),
|
||||
security_info.get('hsts', False),
|
||||
security_info.get('xss_protection', False)
|
||||
]
|
||||
}
|
||||
fig = px.bar(
|
||||
security_data,
|
||||
x='Feature',
|
||||
y='Status',
|
||||
title="Security Features Status",
|
||||
color='Status',
|
||||
color_discrete_sequence=['red', 'green']
|
||||
)
|
||||
st.plotly_chart(fig, use_container_width=True)
|
||||
|
||||
# Performance Metrics
|
||||
st.subheader("Performance Metrics")
|
||||
performance = results.get('performance', {})
|
||||
|
||||
if performance:
|
||||
# Basic metrics in columns
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
with col1:
|
||||
st.metric("Load Time", f"{performance.get('load_time', 0):.2f}s")
|
||||
with col2:
|
||||
st.metric("Page Size", f"{performance.get('page_size', 0):.1f} KB")
|
||||
with col3:
|
||||
st.metric("Status Code", performance.get('status_code', 'N/A'))
|
||||
with col4:
|
||||
st.metric("Response Time", f"{performance.get('response_time', 0):.2f}s")
|
||||
|
||||
# Insights and Recommendations
|
||||
st.subheader("Insights and Recommendations")
|
||||
insights = results.get('insights', [])
|
||||
if insights:
|
||||
for insight in insights:
|
||||
st.info(f"• {insight}")
|
||||
else:
|
||||
st.info("No specific insights available")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error displaying website analysis: {str(e)}", exc_info=True)
|
||||
st.error(f"Error displaying website analysis: {str(e)}")
|
||||
|
||||
def _competitor_analysis_step(self):
|
||||
"""Competitor analysis step UI."""
|
||||
try:
|
||||
st.header("Step 2: Competitor Analysis")
|
||||
|
||||
with st.form("competitor_analysis_form"):
|
||||
competitors = st.text_area(
|
||||
"Enter competitor URLs (one per line)",
|
||||
help="Enter the URLs of your main competitors"
|
||||
)
|
||||
|
||||
submitted = st.form_submit_button("Analyze Competitors")
|
||||
|
||||
if submitted and competitors:
|
||||
with st.spinner("Analyzing competitors..."):
|
||||
competitor_urls = [url.strip() for url in competitors.split('\n') if url.strip()]
|
||||
results = self.competitor_analyzer.analyze(competitor_urls)
|
||||
|
||||
# Get AI-enhanced competitor analysis
|
||||
ai_analysis = self.ai_processor.analyze_competitors({
|
||||
'competitors': competitor_urls,
|
||||
'analysis': results
|
||||
})
|
||||
|
||||
# Combine results
|
||||
results.update(ai_analysis)
|
||||
st.session_state.analysis_results['competitors'] = results
|
||||
|
||||
# Display results
|
||||
self._display_competitor_analysis(results)
|
||||
|
||||
# Move to next step
|
||||
st.session_state.current_step = 3
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in competitor analysis step: {str(e)}", exc_info=True)
|
||||
st.error(f"Error in competitor analysis: {str(e)}")
|
||||
|
||||
def _display_competitor_analysis(self, results: dict):
|
||||
"""Display competitor analysis results."""
|
||||
st.subheader("Competitor Analysis Results")
|
||||
|
||||
# Competitor comparison
|
||||
st.subheader("Competitor Comparison")
|
||||
comp_data = pd.DataFrame(results.get('comparison', []))
|
||||
if not comp_data.empty:
|
||||
fig = px.bar(
|
||||
comp_data,
|
||||
x='competitor',
|
||||
y='score',
|
||||
color='metric',
|
||||
title="Competitor Comparison"
|
||||
)
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# AI-Enhanced Competitor Analysis
|
||||
st.subheader("AI-Enhanced Competitor Analysis")
|
||||
|
||||
# Competitor Trend Analysis
|
||||
trend_data = results.get('competitor_trends', {})
|
||||
if trend_data:
|
||||
fig = go.Figure()
|
||||
for competitor, trends in trend_data.items():
|
||||
fig.add_trace(go.Scatter(
|
||||
x=trends.get('timeline', []),
|
||||
y=trends.get('scores', []),
|
||||
name=competitor,
|
||||
mode='lines+markers'
|
||||
))
|
||||
fig.update_layout(
|
||||
title="Competitor Performance Trends",
|
||||
xaxis_title="Timeline",
|
||||
yaxis_title="Score"
|
||||
)
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# Content gaps
|
||||
st.subheader("Content Gaps")
|
||||
gaps = results.get('content_gaps', [])
|
||||
for gap in gaps:
|
||||
st.info(f"• {gap}")
|
||||
|
||||
# AI-Generated Competitive Insights
|
||||
st.subheader("Competitive Insights")
|
||||
insights = results.get('competitive_insights', {})
|
||||
if insights:
|
||||
for category, points in insights.items():
|
||||
with st.expander(f"{category.title()} Analysis"):
|
||||
for point in points:
|
||||
st.success(f"• {point}")
|
||||
|
||||
def _keyword_research_step(self):
|
||||
"""Keyword research step UI."""
|
||||
try:
|
||||
st.header("Step 3: Keyword Research")
|
||||
|
||||
with st.form("keyword_research_form"):
|
||||
industry = st.text_input(
|
||||
"Enter your industry/niche",
|
||||
value=st.session_state.analysis_results.get('website', {}).get('industry', '')
|
||||
)
|
||||
|
||||
submitted = st.form_submit_button("Research Keywords")
|
||||
|
||||
if submitted and industry:
|
||||
with st.spinner("Researching keywords..."):
|
||||
results = self.keyword_researcher.research(industry)
|
||||
|
||||
# Get AI-enhanced keyword analysis
|
||||
ai_analysis = self.ai_processor.analyze_keywords({
|
||||
'industry': industry,
|
||||
'keywords': results
|
||||
})
|
||||
|
||||
# Combine results
|
||||
results.update(ai_analysis)
|
||||
st.session_state.analysis_results['keywords'] = results
|
||||
|
||||
# Display results
|
||||
self._display_keyword_research(results)
|
||||
|
||||
# Move to next step
|
||||
st.session_state.current_step = 4
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in keyword research step: {str(e)}", exc_info=True)
|
||||
st.error(f"Error in keyword research: {str(e)}")
|
||||
|
||||
def _display_keyword_research(self, results: dict):
|
||||
"""Display keyword research results."""
|
||||
st.subheader("Keyword Research Results")
|
||||
|
||||
# Keyword metrics
|
||||
st.subheader("Keyword Metrics")
|
||||
keyword_data = pd.DataFrame(results.get('keywords', []))
|
||||
if not keyword_data.empty:
|
||||
fig = px.scatter(
|
||||
keyword_data,
|
||||
x='search_volume',
|
||||
y='difficulty',
|
||||
size='relevance_score',
|
||||
hover_data=['keyword'],
|
||||
title="Keyword Opportunities"
|
||||
)
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# AI-Enhanced Keyword Analysis
|
||||
st.subheader("AI-Enhanced Keyword Analysis")
|
||||
|
||||
# Keyword Trend Analysis
|
||||
trend_data = results.get('keyword_trends', {})
|
||||
if trend_data:
|
||||
fig = go.Figure()
|
||||
for keyword, trends in trend_data.items():
|
||||
fig.add_trace(go.Scatter(
|
||||
x=trends.get('timeline', []),
|
||||
y=trends.get('scores', []),
|
||||
name=keyword,
|
||||
mode='lines+markers'
|
||||
))
|
||||
fig.update_layout(
|
||||
title="Keyword Trend Analysis",
|
||||
xaxis_title="Timeline",
|
||||
yaxis_title="Trend Score"
|
||||
)
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# Search intent distribution
|
||||
st.subheader("Search Intent Distribution")
|
||||
intent_data = pd.DataFrame(results.get('search_intent', {}).get('summary', {}))
|
||||
if not intent_data.empty:
|
||||
fig = px.pie(
|
||||
intent_data,
|
||||
values='count',
|
||||
names='intent',
|
||||
title="Search Intent Distribution"
|
||||
)
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# Content format suggestions
|
||||
st.subheader("Content Format Suggestions")
|
||||
formats = results.get('content_formats', [])
|
||||
for format in formats:
|
||||
st.info(f"• {format}")
|
||||
|
||||
# AI-Generated Keyword Insights
|
||||
st.subheader("Keyword Insights")
|
||||
insights = results.get('keyword_insights', {})
|
||||
if insights:
|
||||
for category, points in insights.items():
|
||||
with st.expander(f"{category.title()} Insights"):
|
||||
for point in points:
|
||||
st.success(f"• {point}")
|
||||
|
||||
def _recommendations_step(self):
|
||||
"""Recommendations step UI."""
|
||||
try:
|
||||
st.header("Step 4: Content Recommendations")
|
||||
|
||||
with st.spinner("Generating recommendations..."):
|
||||
results = self.recommendation_engine.generate_recommendations(
|
||||
st.session_state.analysis_results
|
||||
)
|
||||
|
||||
# Get AI-enhanced recommendations
|
||||
ai_recommendations = self.ai_processor.analyze_recommendations({
|
||||
'recommendations': results,
|
||||
'analysis': st.session_state.analysis_results
|
||||
})
|
||||
|
||||
# Combine results
|
||||
results.update(ai_recommendations)
|
||||
st.session_state.analysis_results['recommendations'] = results
|
||||
|
||||
# Display results
|
||||
self._display_recommendations(results)
|
||||
|
||||
# Move to next step
|
||||
st.session_state.current_step = 5
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
logger.error(f"Error in recommendations step: {str(e)}", exc_info=True)
|
||||
st.error(f"Error in recommendations: {str(e)}")
|
||||
|
||||
def _display_recommendations(self, results: dict):
|
||||
"""Display content recommendations."""
|
||||
st.subheader("Content Recommendations")
|
||||
|
||||
# Priority recommendations
|
||||
st.subheader("Priority Recommendations")
|
||||
priorities = results.get('priorities', [])
|
||||
for priority in priorities:
|
||||
st.success(f"• {priority}")
|
||||
|
||||
# AI-Enhanced Recommendations
|
||||
st.subheader("AI-Enhanced Recommendations")
|
||||
|
||||
# Recommendation Impact Analysis
|
||||
impact_data = results.get('impact_analysis', {})
|
||||
if impact_data:
|
||||
fig = go.Figure()
|
||||
for metric, values in impact_data.items():
|
||||
fig.add_trace(go.Bar(
|
||||
name=metric,
|
||||
x=values.get('categories', []),
|
||||
y=values.get('scores', [])
|
||||
))
|
||||
fig.update_layout(
|
||||
title="Recommendation Impact Analysis",
|
||||
xaxis_title="Categories",
|
||||
yaxis_title="Impact Score",
|
||||
barmode='group'
|
||||
)
|
||||
st.plotly_chart(fig)
|
||||
|
||||
# Implementation timeline
|
||||
st.subheader("Implementation Timeline")
|
||||
timeline = results.get('timeline', [])
|
||||
for item in timeline:
|
||||
st.info(f"• {item}")
|
||||
|
||||
# Expected impact
|
||||
st.subheader("Expected Impact")
|
||||
impact = results.get('impact', {})
|
||||
for metric, value in impact.items():
|
||||
st.metric(metric, value)
|
||||
|
||||
# AI-Generated Strategic Insights
|
||||
st.subheader("Strategic Insights")
|
||||
insights = results.get('strategic_insights', {})
|
||||
if insights:
|
||||
for category, points in insights.items():
|
||||
with st.expander(f"{category.title()} Strategy"):
|
||||
for point in points:
|
||||
st.success(f"• {point}")
|
||||
|
||||
def _export_results(self):
|
||||
"""Export results step UI."""
|
||||
st.header("Step 5: Export Results")
|
||||
|
||||
# Export options
|
||||
export_format = st.radio(
|
||||
"Choose export format",
|
||||
["JSON", "CSV", "PDF"]
|
||||
)
|
||||
|
||||
if st.button("Export Results"):
|
||||
if export_format == "JSON":
|
||||
self._export_json()
|
||||
elif export_format == "CSV":
|
||||
self._export_csv()
|
||||
else:
|
||||
st.info("PDF export coming soon!")
|
||||
|
||||
def _export_json(self):
|
||||
"""Export results as JSON."""
|
||||
results = st.session_state.analysis_results
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"content_gap_analysis_{timestamp}.json"
|
||||
|
||||
st.download_button(
|
||||
"Download JSON",
|
||||
data=json.dumps(results, indent=2),
|
||||
file_name=filename,
|
||||
mime="application/json"
|
||||
)
|
||||
|
||||
def _export_csv(self):
|
||||
"""Export results as CSV."""
|
||||
results = st.session_state.analysis_results
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# Convert results to CSV format
|
||||
csv_data = []
|
||||
for section, data in results.items():
|
||||
if isinstance(data, list):
|
||||
for item in data:
|
||||
if isinstance(item, dict):
|
||||
item['section'] = section
|
||||
csv_data.append(item)
|
||||
elif isinstance(data, dict):
|
||||
data['section'] = section
|
||||
csv_data.append(data)
|
||||
|
||||
if csv_data:
|
||||
df = pd.DataFrame(csv_data)
|
||||
filename = f"content_gap_analysis_{timestamp}.csv"
|
||||
|
||||
st.download_button(
|
||||
"Download CSV",
|
||||
data=df.to_csv(index=False),
|
||||
file_name=filename,
|
||||
mime="text/csv"
|
||||
)
|
||||
|
||||
def main():
|
||||
"""Main entry point for the Streamlit app."""
|
||||
ui = ContentGapAnalysisUI()
|
||||
ui.run()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,249 +0,0 @@
|
||||
# Content Gap Analysis Utils
|
||||
|
||||
This directory contains utility modules that power the Content Gap Analysis tool. These modules provide core functionality for data collection, processing, analysis, and storage.
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
utils/
|
||||
├── README.md
|
||||
├── ai_processor.py # AI-powered content analysis and processing
|
||||
├── content_parser.py # Content structure parsing and analysis
|
||||
├── data_collector.py # Website data collection and processing
|
||||
└── storage.py # Analysis results storage and retrieval
|
||||
```
|
||||
|
||||
## Module Descriptions
|
||||
|
||||
### 1. AI Processor (`ai_processor.py`)
|
||||
|
||||
The AI Processor module enhances content analysis using AI techniques. It provides intelligent analysis of website content, competitor data, and keyword research.
|
||||
|
||||
#### Key Features:
|
||||
- Content quality assessment
|
||||
- Topic analysis and clustering
|
||||
- Performance metrics analysis
|
||||
- Strategic recommendations generation
|
||||
- Progress tracking for analysis tasks
|
||||
|
||||
#### Main Components:
|
||||
- `AIProcessor`: Main class for AI-powered analysis
|
||||
- `ProgressTracker`: Tracks analysis progress and status
|
||||
|
||||
#### Usage Example:
|
||||
```python
|
||||
from utils.ai_processor import AIProcessor
|
||||
|
||||
processor = AIProcessor()
|
||||
analysis = processor.analyze_content({
|
||||
'url': 'https://example.com',
|
||||
'industry': 'technology',
|
||||
'content': content_data
|
||||
})
|
||||
```
|
||||
|
||||
### 2. Content Parser (`content_parser.py`)
|
||||
|
||||
The Content Parser module handles the parsing and analysis of website content structure. It provides detailed insights into content organization and quality.
|
||||
|
||||
#### Key Features:
|
||||
- Content structure analysis
|
||||
- Text statistics calculation
|
||||
- Topic extraction
|
||||
- Readability analysis
|
||||
- Content hierarchy analysis
|
||||
|
||||
#### Main Components:
|
||||
- `ContentParser`: Main class for content parsing and analysis
|
||||
|
||||
#### Usage Example:
|
||||
```python
|
||||
from utils.content_parser import ContentParser
|
||||
|
||||
parser = ContentParser()
|
||||
structure = parser.parse_structure({
|
||||
'main_content': content,
|
||||
'html': html_content,
|
||||
'headings': headings_data
|
||||
})
|
||||
```
|
||||
|
||||
### 3. Data Collector (`data_collector.py`)
|
||||
|
||||
The Data Collector module is responsible for gathering website data for analysis. It handles web scraping and data extraction.
|
||||
|
||||
#### Key Features:
|
||||
- Website content collection
|
||||
- Meta data extraction
|
||||
- Heading structure analysis
|
||||
- Link and image extraction
|
||||
- Error handling and retry logic
|
||||
|
||||
#### Main Components:
|
||||
- `DataCollector`: Main class for data collection
|
||||
|
||||
#### Usage Example:
|
||||
```python
|
||||
from utils.data_collector import DataCollector
|
||||
|
||||
collector = DataCollector()
|
||||
data = collector.collect('https://example.com')
|
||||
```
|
||||
|
||||
### 4. Storage (`storage.py`)
|
||||
|
||||
The Storage module manages the persistence and retrieval of analysis results. It provides a robust database interface for storing and accessing analysis data.
|
||||
|
||||
#### Key Features:
|
||||
- Analysis results storage
|
||||
- Historical data management
|
||||
- Recommendation tracking
|
||||
- User-specific analysis storage
|
||||
- Error handling and rollback support
|
||||
|
||||
#### Main Components:
|
||||
- `ContentGapAnalysisStorage`: Main class for storage operations
|
||||
|
||||
#### Usage Example:
|
||||
```python
|
||||
from utils.storage import ContentGapAnalysisStorage
|
||||
|
||||
storage = ContentGapAnalysisStorage(db_session)
|
||||
analysis_id = storage.save_analysis(
|
||||
user_id=1,
|
||||
website_url='https://example.com',
|
||||
industry='technology',
|
||||
results=analysis_results
|
||||
)
|
||||
```
|
||||
|
||||
## Integration Points
|
||||
|
||||
### 1. Website Analysis Integration
|
||||
```python
|
||||
from utils.data_collector import DataCollector
|
||||
from utils.content_parser import ContentParser
|
||||
from utils.ai_processor import AIProcessor
|
||||
|
||||
# Collect data
|
||||
collector = DataCollector()
|
||||
data = collector.collect(url)
|
||||
|
||||
# Parse content
|
||||
parser = ContentParser()
|
||||
structure = parser.parse_structure(data)
|
||||
|
||||
# Process with AI
|
||||
processor = AIProcessor()
|
||||
analysis = processor.analyze_content({
|
||||
'url': url,
|
||||
'content': structure
|
||||
})
|
||||
```
|
||||
|
||||
### 2. Storage Integration
|
||||
```python
|
||||
from utils.storage import ContentGapAnalysisStorage
|
||||
|
||||
# Store analysis results
|
||||
storage = ContentGapAnalysisStorage(db_session)
|
||||
analysis_id = storage.save_analysis(
|
||||
user_id=user_id,
|
||||
website_url=url,
|
||||
industry=industry,
|
||||
results=analysis_results
|
||||
)
|
||||
|
||||
# Retrieve analysis
|
||||
results = storage.get_analysis(analysis_id)
|
||||
```
|
||||
|
||||
## Error Handling
|
||||
|
||||
All modules implement comprehensive error handling:
|
||||
|
||||
1. **Data Collection Errors**
|
||||
- Network timeouts
|
||||
- Invalid URLs
|
||||
- Access restrictions
|
||||
- Parsing errors
|
||||
|
||||
2. **Processing Errors**
|
||||
- Invalid data formats
|
||||
- AI processing failures
|
||||
- Resource limitations
|
||||
- Analysis timeouts
|
||||
|
||||
3. **Storage Errors**
|
||||
- Database connection issues
|
||||
- Transaction failures
|
||||
- Data validation errors
|
||||
- Concurrent access conflicts
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Data Collection**
|
||||
- Implement rate limiting
|
||||
- Use proper user agents
|
||||
- Handle redirects
|
||||
- Validate input data
|
||||
|
||||
2. **Content Processing**
|
||||
- Clean and normalize data
|
||||
- Handle encoding issues
|
||||
- Implement fallback strategies
|
||||
- Cache processed results
|
||||
|
||||
3. **Storage Management**
|
||||
- Use transactions
|
||||
- Implement data validation
|
||||
- Handle concurrent access
|
||||
- Maintain data integrity
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
1. **Performance Optimizations**
|
||||
- Implement parallel processing
|
||||
- Add caching layer
|
||||
- Optimize database queries
|
||||
- Enhance error recovery
|
||||
|
||||
2. **Feature Additions**
|
||||
- Content performance tracking
|
||||
- Automated content planning
|
||||
- Enhanced competitive intelligence
|
||||
- Advanced topic clustering
|
||||
|
||||
3. **Integration Improvements**
|
||||
- API endpoints
|
||||
- Export capabilities
|
||||
- Data visualization
|
||||
- Progress tracking
|
||||
|
||||
4. **UI/UX Enhancements**
|
||||
- Interactive visualizations
|
||||
- Real-time progress updates
|
||||
- Export interfaces
|
||||
- Customization options
|
||||
|
||||
## Contributing
|
||||
|
||||
When contributing to these utility modules:
|
||||
|
||||
1. Follow the existing code structure
|
||||
2. Add comprehensive error handling
|
||||
3. Include unit tests
|
||||
4. Update documentation
|
||||
5. Follow PEP 8 style guide
|
||||
|
||||
## Dependencies
|
||||
|
||||
- BeautifulSoup4: HTML parsing
|
||||
- NLTK: Natural language processing
|
||||
- SQLAlchemy: Database operations
|
||||
- Streamlit: UI components
|
||||
- Requests: HTTP requests
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License - see the LICENSE file for details.
|
||||
@@ -1,13 +0,0 @@
|
||||
"""
|
||||
Utility modules for content gap analysis.
|
||||
"""
|
||||
|
||||
from .data_collector import DataCollector
|
||||
from .content_parser import ContentParser
|
||||
from .ai_processor import AIProcessor
|
||||
|
||||
__all__ = [
|
||||
'DataCollector',
|
||||
'ContentParser',
|
||||
'AIProcessor'
|
||||
]
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,236 +0,0 @@
|
||||
"""
|
||||
Content parser utility for analyzing website content structure.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
import nltk
|
||||
from nltk.tokenize import sent_tokenize, word_tokenize
|
||||
from nltk.corpus import stopwords
|
||||
from collections import Counter
|
||||
|
||||
class ContentParser:
|
||||
"""Parser for analyzing website content structure."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the content parser."""
|
||||
try:
|
||||
nltk.data.find('tokenizers/punkt')
|
||||
except LookupError:
|
||||
nltk.download('punkt')
|
||||
try:
|
||||
nltk.data.find('corpora/stopwords')
|
||||
except LookupError:
|
||||
nltk.download('stopwords')
|
||||
|
||||
self.stop_words = set(stopwords.words('english'))
|
||||
|
||||
def parse_structure(self, content: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse and analyze the structure of website content.
|
||||
|
||||
Args:
|
||||
content: Dictionary containing website content
|
||||
|
||||
Returns:
|
||||
Dictionary containing parsed content structure
|
||||
"""
|
||||
try:
|
||||
# Parse main content
|
||||
main_content = content.get('main_content', '')
|
||||
soup = BeautifulSoup(content.get('html', ''), 'html.parser')
|
||||
|
||||
# Extract text statistics
|
||||
text_stats = self._analyze_text(main_content)
|
||||
|
||||
# Extract content sections
|
||||
sections = self._extract_sections(soup)
|
||||
|
||||
# Extract topics
|
||||
topics = self._extract_topics(main_content)
|
||||
|
||||
# Analyze readability
|
||||
readability = self._analyze_readability(main_content)
|
||||
|
||||
# Analyze content hierarchy
|
||||
hierarchy = self._analyze_hierarchy(content.get('headings', []))
|
||||
|
||||
return {
|
||||
'text_statistics': text_stats,
|
||||
'sections': sections,
|
||||
'topics': topics,
|
||||
'readability': readability,
|
||||
'hierarchy': hierarchy,
|
||||
'metadata': content.get('metadata', {})
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'error': str(e),
|
||||
'text_statistics': {},
|
||||
'sections': [],
|
||||
'topics': [],
|
||||
'readability': {},
|
||||
'hierarchy': {},
|
||||
'metadata': {}
|
||||
}
|
||||
|
||||
def _analyze_text(self, text: str) -> Dict[str, Any]:
|
||||
"""Analyze text statistics."""
|
||||
sentences = sent_tokenize(text)
|
||||
words = word_tokenize(text.lower())
|
||||
words = [w for w in words if w.isalnum() and w not in self.stop_words]
|
||||
|
||||
return {
|
||||
'word_count': len(words),
|
||||
'sentence_count': len(sentences),
|
||||
'average_sentence_length': len(words) / max(len(sentences), 1),
|
||||
'unique_words': len(set(words)),
|
||||
'stop_words': len([w for w in word_tokenize(text.lower()) if w in self.stop_words]),
|
||||
'characters': len(text),
|
||||
'paragraphs': len(text.split('\n\n')),
|
||||
'sentences': sentences
|
||||
}
|
||||
|
||||
def _extract_sections(self, soup: BeautifulSoup) -> List[Dict[str, Any]]:
|
||||
"""Extract content sections."""
|
||||
sections = []
|
||||
|
||||
# Find main content containers
|
||||
containers = soup.find_all(['article', 'section', 'div'], class_=re.compile(r'content|main|article|section'))
|
||||
|
||||
for container in containers:
|
||||
# Get section heading
|
||||
heading = container.find(['h1', 'h2', 'h3'])
|
||||
heading_text = heading.get_text().strip() if heading else 'Untitled Section'
|
||||
|
||||
# Get section content
|
||||
content = container.get_text().strip()
|
||||
|
||||
# Get section type
|
||||
section_type = container.name
|
||||
if container.get('class'):
|
||||
section_type = ' '.join(container.get('class'))
|
||||
|
||||
sections.append({
|
||||
'heading': heading_text,
|
||||
'content': content,
|
||||
'type': section_type,
|
||||
'word_count': len(word_tokenize(content)),
|
||||
'position': self._get_element_position(container)
|
||||
})
|
||||
|
||||
return sections
|
||||
|
||||
def _extract_topics(self, text: str) -> List[Dict[str, Any]]:
|
||||
"""Extract main topics from content."""
|
||||
# Tokenize and clean text
|
||||
words = word_tokenize(text.lower())
|
||||
words = [w for w in words if w.isalnum() and w not in self.stop_words]
|
||||
|
||||
# Get word frequencies
|
||||
word_freq = Counter(words)
|
||||
|
||||
# Get top topics
|
||||
topics = []
|
||||
for word, freq in word_freq.most_common(10):
|
||||
topics.append({
|
||||
'topic': word,
|
||||
'frequency': freq,
|
||||
'percentage': freq / len(words) * 100
|
||||
})
|
||||
|
||||
return topics
|
||||
|
||||
def _analyze_readability(self, text: str) -> Dict[str, float]:
|
||||
"""Analyze text readability."""
|
||||
sentences = sent_tokenize(text)
|
||||
words = word_tokenize(text.lower())
|
||||
words = [w for w in words if w.isalnum()]
|
||||
|
||||
# Calculate average sentence length
|
||||
avg_sentence_length = len(words) / max(len(sentences), 1)
|
||||
|
||||
# Calculate average word length
|
||||
avg_word_length = sum(len(w) for w in words) / max(len(words), 1)
|
||||
|
||||
# Calculate Flesch Reading Ease score
|
||||
# Formula: 206.835 - 1.015(total words/total sentences) - 84.6(total syllables/total words)
|
||||
syllables = sum(self._count_syllables(w) for w in words)
|
||||
flesch_score = 206.835 - 1.015 * avg_sentence_length - 84.6 * (syllables / max(len(words), 1))
|
||||
|
||||
return {
|
||||
'flesch_score': max(0, min(100, flesch_score)),
|
||||
'avg_sentence_length': avg_sentence_length,
|
||||
'avg_word_length': avg_word_length,
|
||||
'syllables_per_word': syllables / max(len(words), 1)
|
||||
}
|
||||
|
||||
def _analyze_hierarchy(self, headings: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""Analyze content hierarchy."""
|
||||
# Group headings by level
|
||||
heading_levels = {}
|
||||
for heading in headings:
|
||||
level = heading['level']
|
||||
if level not in heading_levels:
|
||||
heading_levels[level] = []
|
||||
heading_levels[level].append(heading)
|
||||
|
||||
# Calculate hierarchy metrics
|
||||
total_headings = len(headings)
|
||||
max_depth = max(int(level[1]) for level in heading_levels.keys()) if heading_levels else 0
|
||||
|
||||
return {
|
||||
'total_headings': total_headings,
|
||||
'max_depth': max_depth,
|
||||
'heading_distribution': {level: len(headings) for level, headings in heading_levels.items()},
|
||||
'has_proper_hierarchy': self._check_proper_hierarchy(heading_levels)
|
||||
}
|
||||
|
||||
def _check_proper_hierarchy(self, heading_levels: Dict[str, List[Dict[str, Any]]]) -> bool:
|
||||
"""Check if headings follow proper hierarchy."""
|
||||
if not heading_levels:
|
||||
return False
|
||||
|
||||
# Check if h1 exists
|
||||
if 'h1' not in heading_levels:
|
||||
return False
|
||||
|
||||
# Check if h1 is unique
|
||||
if len(heading_levels['h1']) > 1:
|
||||
return False
|
||||
|
||||
# Check if levels are sequential
|
||||
levels = sorted(int(level[1]) for level in heading_levels.keys())
|
||||
return all(levels[i] - levels[i-1] <= 1 for i in range(1, len(levels)))
|
||||
|
||||
def _count_syllables(self, word: str) -> int:
|
||||
"""Count syllables in a word."""
|
||||
word = word.lower()
|
||||
count = 0
|
||||
vowels = 'aeiouy'
|
||||
word = word.lower()
|
||||
if word[0] in vowels:
|
||||
count += 1
|
||||
for index in range(1, len(word)):
|
||||
if word[index] in vowels and word[index - 1] not in vowels:
|
||||
count += 1
|
||||
if word.endswith('e'):
|
||||
count -= 1
|
||||
if count == 0:
|
||||
count += 1
|
||||
return count
|
||||
|
||||
def _get_element_position(self, element) -> Dict[str, int]:
|
||||
"""Get element position in the document."""
|
||||
try:
|
||||
return {
|
||||
'top': element.sourceline,
|
||||
'left': element.sourcepos
|
||||
}
|
||||
except:
|
||||
return {
|
||||
'top': 0,
|
||||
'left': 0
|
||||
}
|
||||
@@ -1,112 +0,0 @@
|
||||
"""
|
||||
Data collector utility for content gap analysis.
|
||||
"""
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from typing import Dict, Any
|
||||
|
||||
class DataCollector:
|
||||
"""
|
||||
Collects and processes website data for analysis.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the data collector."""
|
||||
self.headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
|
||||
def collect(self, url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Collect website data for analysis.
|
||||
|
||||
Args:
|
||||
url (str): The URL to collect data from
|
||||
|
||||
Returns:
|
||||
dict: Collected website data
|
||||
"""
|
||||
try:
|
||||
# Fetch webpage content
|
||||
response = requests.get(url, headers=self.headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse HTML content
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Extract relevant data
|
||||
data = {
|
||||
'url': url,
|
||||
'title': self._extract_title(soup),
|
||||
'meta_description': self._extract_meta_description(soup),
|
||||
'headings': self._extract_headings(soup),
|
||||
'content': self._extract_content(soup),
|
||||
'links': self._extract_links(soup),
|
||||
'images': self._extract_images(soup)
|
||||
}
|
||||
|
||||
return data
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
'error': str(e),
|
||||
'url': url
|
||||
}
|
||||
|
||||
def _extract_title(self, soup: BeautifulSoup) -> str:
|
||||
"""Extract page title."""
|
||||
title = soup.find('title')
|
||||
return title.text if title else ''
|
||||
|
||||
def _extract_meta_description(self, soup: BeautifulSoup) -> str:
|
||||
"""Extract meta description."""
|
||||
meta = soup.find('meta', attrs={'name': 'description'})
|
||||
return meta.get('content', '') if meta else ''
|
||||
|
||||
def _extract_headings(self, soup: BeautifulSoup) -> Dict[str, list]:
|
||||
"""Extract all headings."""
|
||||
headings = {}
|
||||
for i in range(1, 7):
|
||||
tags = soup.find_all(f'h{i}')
|
||||
headings[f'h{i}'] = [tag.text.strip() for tag in tags]
|
||||
return headings
|
||||
|
||||
def _extract_content(self, soup: BeautifulSoup) -> str:
|
||||
"""Extract main content."""
|
||||
# Remove script and style elements
|
||||
for script in soup(['script', 'style']):
|
||||
script.decompose()
|
||||
|
||||
# Get text content
|
||||
text = soup.get_text()
|
||||
|
||||
# Clean up text
|
||||
lines = (line.strip() for line in text.splitlines())
|
||||
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
||||
text = ' '.join(chunk for chunk in chunks if chunk)
|
||||
|
||||
return text
|
||||
|
||||
def _extract_links(self, soup: BeautifulSoup) -> list:
|
||||
"""Extract all links."""
|
||||
links = []
|
||||
for link in soup.find_all('a'):
|
||||
href = link.get('href')
|
||||
if href:
|
||||
links.append({
|
||||
'url': href,
|
||||
'text': link.text.strip()
|
||||
})
|
||||
return links
|
||||
|
||||
def _extract_images(self, soup: BeautifulSoup) -> list:
|
||||
"""Extract all images."""
|
||||
images = []
|
||||
for img in soup.find_all('img'):
|
||||
images.append({
|
||||
'src': img.get('src', ''),
|
||||
'alt': img.get('alt', ''),
|
||||
'title': img.get('title', '')
|
||||
})
|
||||
return images
|
||||
@@ -1,237 +0,0 @@
|
||||
"""
|
||||
SEO analyzer utility for content gap analysis.
|
||||
"""
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urlparse, urljoin
|
||||
import re
|
||||
from typing import Dict, Any, List, Optional
|
||||
from ....utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
|
||||
def analyze_onpage_seo(url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze on-page SEO elements of a website.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
Dictionary containing SEO analysis results
|
||||
"""
|
||||
try:
|
||||
# Use the combined website analyzer
|
||||
analyzer = WebsiteAnalyzer()
|
||||
analysis = analyzer.analyze_website(url)
|
||||
|
||||
if not analysis.get('success', False):
|
||||
return {
|
||||
'error': analysis.get('error', 'Unknown error in SEO analysis'),
|
||||
'meta_title': '',
|
||||
'meta_description': '',
|
||||
'has_robots_txt': False,
|
||||
'has_sitemap': False,
|
||||
'mobile_friendly': False,
|
||||
'load_time': 0
|
||||
}
|
||||
|
||||
# Extract relevant information from the analysis
|
||||
seo_info = analysis['data']['analysis']['seo_info']
|
||||
basic_info = analysis['data']['analysis']['basic_info']
|
||||
performance = analysis['data']['analysis']['performance']
|
||||
|
||||
return {
|
||||
'meta_tags': seo_info.get('meta_tags', {}),
|
||||
'content': seo_info.get('content', {}),
|
||||
'meta_title': basic_info.get('title', ''),
|
||||
'meta_description': basic_info.get('meta_description', ''),
|
||||
'has_robots_txt': bool(basic_info.get('robots_txt')),
|
||||
'has_sitemap': bool(basic_info.get('sitemap')),
|
||||
'mobile_friendly': True, # This would need to be implemented separately
|
||||
'load_time': performance.get('load_time', 0)
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'error': str(e),
|
||||
'meta_title': '',
|
||||
'meta_description': '',
|
||||
'has_robots_txt': False,
|
||||
'has_sitemap': False,
|
||||
'mobile_friendly': False,
|
||||
'load_time': 0
|
||||
}
|
||||
|
||||
def _analyze_meta_tags(soup: BeautifulSoup) -> Dict[str, Any]:
|
||||
"""Analyze meta tags of the webpage."""
|
||||
meta_tags = {}
|
||||
|
||||
# Title tag
|
||||
title_tag = soup.find('title')
|
||||
if title_tag:
|
||||
meta_tags['title'] = title_tag.string.strip()
|
||||
|
||||
# Meta description
|
||||
meta_desc = soup.find('meta', {'name': 'description'})
|
||||
if meta_desc:
|
||||
meta_tags['description'] = meta_desc.get('content', '').strip()
|
||||
|
||||
# Meta keywords
|
||||
meta_keywords = soup.find('meta', {'name': 'keywords'})
|
||||
if meta_keywords:
|
||||
meta_tags['keywords'] = meta_keywords.get('content', '').strip()
|
||||
|
||||
# Open Graph tags
|
||||
og_tags = {}
|
||||
for tag in soup.find_all('meta', property=re.compile(r'^og:')):
|
||||
og_tags[tag['property']] = tag.get('content', '')
|
||||
meta_tags['og_tags'] = og_tags
|
||||
|
||||
# Twitter Card tags
|
||||
twitter_tags = {}
|
||||
for tag in soup.find_all('meta', name=re.compile(r'^twitter:')):
|
||||
twitter_tags[tag['name']] = tag.get('content', '')
|
||||
meta_tags['twitter_tags'] = twitter_tags
|
||||
|
||||
return meta_tags
|
||||
|
||||
def _analyze_headings(soup: BeautifulSoup) -> Dict[str, Any]:
|
||||
"""Analyze heading structure of the webpage."""
|
||||
headings = {
|
||||
'h1': [],
|
||||
'h2': [],
|
||||
'h3': [],
|
||||
'h4': [],
|
||||
'h5': [],
|
||||
'h6': []
|
||||
}
|
||||
|
||||
for tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
||||
for heading in soup.find_all(tag):
|
||||
headings[tag].append(heading.get_text().strip())
|
||||
|
||||
return headings
|
||||
|
||||
def _analyze_content(soup: BeautifulSoup) -> Dict[str, Any]:
|
||||
"""Analyze main content of the webpage."""
|
||||
# Find main content
|
||||
main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile(r'content|main|article'))
|
||||
|
||||
if not main_content:
|
||||
return {
|
||||
'word_count': 0,
|
||||
'paragraph_count': 0,
|
||||
'content': ''
|
||||
}
|
||||
|
||||
# Get text content
|
||||
content = main_content.get_text()
|
||||
|
||||
# Count words and paragraphs
|
||||
words = content.split()
|
||||
paragraphs = main_content.find_all('p')
|
||||
|
||||
return {
|
||||
'word_count': len(words),
|
||||
'paragraph_count': len(paragraphs),
|
||||
'content': content
|
||||
}
|
||||
|
||||
def _analyze_links(soup: BeautifulSoup, base_url: str) -> Dict[str, Any]:
|
||||
"""Analyze links on the webpage."""
|
||||
links = {
|
||||
'internal': [],
|
||||
'external': [],
|
||||
'broken': []
|
||||
}
|
||||
|
||||
base_domain = urlparse(base_url).netloc
|
||||
|
||||
for link in soup.find_all('a', href=True):
|
||||
href = link['href']
|
||||
|
||||
# Handle relative URLs
|
||||
if not href.startswith(('http://', 'https://')):
|
||||
href = urljoin(base_url, href)
|
||||
|
||||
# Categorize link
|
||||
if urlparse(href).netloc == base_domain:
|
||||
links['internal'].append({
|
||||
'url': href,
|
||||
'text': link.get_text().strip(),
|
||||
'title': link.get('title', '')
|
||||
})
|
||||
else:
|
||||
links['external'].append({
|
||||
'url': href,
|
||||
'text': link.get_text().strip(),
|
||||
'title': link.get('title', '')
|
||||
})
|
||||
|
||||
return links
|
||||
|
||||
def _analyze_images(soup: BeautifulSoup) -> Dict[str, Any]:
|
||||
"""Analyze images on the webpage."""
|
||||
images = []
|
||||
|
||||
for img in soup.find_all('img'):
|
||||
image_data = {
|
||||
'src': img.get('src', ''),
|
||||
'alt': img.get('alt', ''),
|
||||
'title': img.get('title', ''),
|
||||
'width': img.get('width', ''),
|
||||
'height': img.get('height', ''),
|
||||
'has_alt': bool(img.get('alt')),
|
||||
'has_title': bool(img.get('title')),
|
||||
'has_dimensions': bool(img.get('width') and img.get('height'))
|
||||
}
|
||||
images.append(image_data)
|
||||
|
||||
return {
|
||||
'total': len(images),
|
||||
'with_alt': sum(1 for img in images if img['has_alt']),
|
||||
'with_title': sum(1 for img in images if img['has_title']),
|
||||
'with_dimensions': sum(1 for img in images if img['has_dimensions']),
|
||||
'images': images
|
||||
}
|
||||
|
||||
def _check_technical_elements(soup: BeautifulSoup, url: str) -> Dict[str, Any]:
|
||||
"""Check technical SEO elements."""
|
||||
base_url = urlparse(url)
|
||||
domain = base_url.netloc
|
||||
|
||||
# Check robots.txt
|
||||
robots_url = f"{base_url.scheme}://{domain}/robots.txt"
|
||||
try:
|
||||
robots_response = requests.get(robots_url, timeout=5)
|
||||
has_robots_txt = robots_response.status_code == 200
|
||||
except:
|
||||
has_robots_txt = False
|
||||
|
||||
# Check sitemap
|
||||
sitemap_url = f"{base_url.scheme}://{domain}/sitemap.xml"
|
||||
try:
|
||||
sitemap_response = requests.get(sitemap_url, timeout=5)
|
||||
has_sitemap = sitemap_response.status_code == 200
|
||||
except:
|
||||
has_sitemap = False
|
||||
|
||||
# Check mobile friendliness
|
||||
viewport = soup.find('meta', {'name': 'viewport'})
|
||||
has_viewport = bool(viewport)
|
||||
|
||||
# Check canonical URL
|
||||
canonical = soup.find('link', {'rel': 'canonical'})
|
||||
has_canonical = bool(canonical)
|
||||
|
||||
# Check language
|
||||
html_lang = soup.find('html').get('lang', '')
|
||||
has_language = bool(html_lang)
|
||||
|
||||
return {
|
||||
'has_robots_txt': has_robots_txt,
|
||||
'has_sitemap': has_sitemap,
|
||||
'mobile_friendly': has_viewport,
|
||||
'has_canonical': has_canonical,
|
||||
'has_language': has_language,
|
||||
'language': html_lang
|
||||
}
|
||||
@@ -1,270 +0,0 @@
|
||||
"""
|
||||
Storage module for content gap analysis results.
|
||||
"""
|
||||
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
import streamlit as st
|
||||
|
||||
class ContentGapAnalysisStorage:
|
||||
"""Handles storage and retrieval of content gap analysis results."""
|
||||
|
||||
def __init__(self, db_session: Session):
|
||||
"""Initialize the storage handler."""
|
||||
self.db = db_session
|
||||
|
||||
def save_analysis(self, user_id: int, website_url: str, industry: str, results: Dict[str, Any]) -> Optional[int]:
|
||||
"""
|
||||
Save content gap analysis results.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
website_url: Target website URL
|
||||
industry: Industry category
|
||||
results: Analysis results dictionary
|
||||
|
||||
Returns:
|
||||
Analysis ID if successful, None otherwise
|
||||
"""
|
||||
try:
|
||||
# Create main analysis record
|
||||
analysis = ContentGapAnalysis(
|
||||
user_id=user_id,
|
||||
website_url=website_url,
|
||||
industry=industry,
|
||||
status='completed',
|
||||
metadata={'version': '1.0'}
|
||||
)
|
||||
self.db.add(analysis)
|
||||
self.db.flush() # Get the ID without committing
|
||||
|
||||
# Save website analysis
|
||||
website_analysis = WebsiteAnalysis(
|
||||
content_gap_analysis_id=analysis.id,
|
||||
content_score=results.get('website', {}).get('content_score', 0),
|
||||
seo_score=results.get('website', {}).get('seo_score', 0),
|
||||
structure_score=results.get('website', {}).get('structure_score', 0),
|
||||
content_metrics=results.get('website', {}).get('content_metrics', {}),
|
||||
seo_metrics=results.get('website', {}).get('seo_metrics', {}),
|
||||
technical_metrics=results.get('website', {}).get('technical_metrics', {}),
|
||||
ai_insights=results.get('website', {}).get('ai_insights', {})
|
||||
)
|
||||
self.db.add(website_analysis)
|
||||
|
||||
# Save competitor analysis if available
|
||||
if 'competitors' in results:
|
||||
for competitor in results['competitors']:
|
||||
competitor_analysis = CompetitorAnalysis(
|
||||
content_gap_analysis_id=analysis.id,
|
||||
competitor_url=competitor.get('url'),
|
||||
market_position=competitor.get('market_position', {}),
|
||||
content_gaps=competitor.get('content_gaps', []),
|
||||
competitive_advantages=competitor.get('competitive_advantages', []),
|
||||
trend_analysis=competitor.get('trend_analysis', {})
|
||||
)
|
||||
self.db.add(competitor_analysis)
|
||||
|
||||
# Save keyword analysis
|
||||
keyword_analysis = KeywordAnalysis(
|
||||
content_gap_analysis_id=analysis.id,
|
||||
top_keywords=results.get('keywords', {}).get('top_keywords', []),
|
||||
search_intent=results.get('keywords', {}).get('search_intent', {}),
|
||||
opportunities=results.get('keywords', {}).get('opportunities', []),
|
||||
trend_analysis=results.get('keywords', {}).get('trend_analysis', {})
|
||||
)
|
||||
self.db.add(keyword_analysis)
|
||||
|
||||
# Save recommendations
|
||||
for recommendation in results.get('recommendations', []):
|
||||
content_recommendation = ContentRecommendation(
|
||||
content_gap_analysis_id=analysis.id,
|
||||
recommendation_type=recommendation.get('type'),
|
||||
priority_score=recommendation.get('priority_score', 0),
|
||||
recommendation=recommendation.get('recommendation', ''),
|
||||
implementation_steps=recommendation.get('implementation_steps', []),
|
||||
expected_impact=recommendation.get('expected_impact', {}),
|
||||
status='pending'
|
||||
)
|
||||
self.db.add(content_recommendation)
|
||||
|
||||
# Save analysis history
|
||||
history = AnalysisHistory(
|
||||
content_gap_analysis_id=analysis.id,
|
||||
status='completed',
|
||||
metrics={'duration': results.get('duration', 0)}
|
||||
)
|
||||
self.db.add(history)
|
||||
|
||||
# Commit all changes
|
||||
self.db.commit()
|
||||
return analysis.id
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
self.db.rollback()
|
||||
st.error(f"Error saving analysis results: {str(e)}")
|
||||
return None
|
||||
|
||||
def get_analysis(self, analysis_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve content gap analysis results.
|
||||
|
||||
Args:
|
||||
analysis_id: Analysis ID
|
||||
|
||||
Returns:
|
||||
Dictionary containing analysis results if found, None otherwise
|
||||
"""
|
||||
try:
|
||||
analysis = self.db.query(ContentGapAnalysis).get(analysis_id)
|
||||
if not analysis:
|
||||
return None
|
||||
|
||||
# Get website analysis
|
||||
website_analysis = self.db.query(WebsiteAnalysis).filter_by(
|
||||
content_gap_analysis_id=analysis_id
|
||||
).first()
|
||||
|
||||
# Get competitor analysis
|
||||
competitor_analyses = self.db.query(CompetitorAnalysis).filter_by(
|
||||
content_gap_analysis_id=analysis_id
|
||||
).all()
|
||||
|
||||
# Get keyword analysis
|
||||
keyword_analysis = self.db.query(KeywordAnalysis).filter_by(
|
||||
content_gap_analysis_id=analysis_id
|
||||
).first()
|
||||
|
||||
# Get recommendations
|
||||
recommendations = self.db.query(ContentRecommendation).filter_by(
|
||||
content_gap_analysis_id=analysis_id
|
||||
).all()
|
||||
|
||||
# Get analysis history
|
||||
history = self.db.query(AnalysisHistory).filter_by(
|
||||
content_gap_analysis_id=analysis_id
|
||||
).order_by(AnalysisHistory.run_date.desc()).all()
|
||||
|
||||
return {
|
||||
'id': analysis.id,
|
||||
'website_url': analysis.website_url,
|
||||
'industry': analysis.industry,
|
||||
'analysis_date': analysis.analysis_date,
|
||||
'status': analysis.status,
|
||||
'website': {
|
||||
'content_score': website_analysis.content_score,
|
||||
'seo_score': website_analysis.seo_score,
|
||||
'structure_score': website_analysis.structure_score,
|
||||
'content_metrics': website_analysis.content_metrics,
|
||||
'seo_metrics': website_analysis.seo_metrics,
|
||||
'technical_metrics': website_analysis.technical_metrics,
|
||||
'ai_insights': website_analysis.ai_insights
|
||||
} if website_analysis else {},
|
||||
'competitors': [{
|
||||
'url': ca.competitor_url,
|
||||
'market_position': ca.market_position,
|
||||
'content_gaps': ca.content_gaps,
|
||||
'competitive_advantages': ca.competitive_advantages,
|
||||
'trend_analysis': ca.trend_analysis
|
||||
} for ca in competitor_analyses],
|
||||
'keywords': {
|
||||
'top_keywords': keyword_analysis.top_keywords,
|
||||
'search_intent': keyword_analysis.search_intent,
|
||||
'opportunities': keyword_analysis.opportunities,
|
||||
'trend_analysis': keyword_analysis.trend_analysis
|
||||
} if keyword_analysis else {},
|
||||
'recommendations': [{
|
||||
'type': r.recommendation_type,
|
||||
'priority_score': r.priority_score,
|
||||
'recommendation': r.recommendation,
|
||||
'implementation_steps': r.implementation_steps,
|
||||
'expected_impact': r.expected_impact,
|
||||
'status': r.status
|
||||
} for r in recommendations],
|
||||
'history': [{
|
||||
'run_date': h.run_date,
|
||||
'status': h.status,
|
||||
'metrics': h.metrics,
|
||||
'error_log': h.error_log
|
||||
} for h in history]
|
||||
}
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
st.error(f"Error retrieving analysis results: {str(e)}")
|
||||
return None
|
||||
|
||||
def get_user_analyses(self, user_id: int) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all analyses for a user.
|
||||
|
||||
Args:
|
||||
user_id: User ID
|
||||
|
||||
Returns:
|
||||
List of analysis summaries
|
||||
"""
|
||||
try:
|
||||
analyses = self.db.query(ContentGapAnalysis).filter_by(
|
||||
user_id=user_id
|
||||
).order_by(ContentGapAnalysis.analysis_date.desc()).all()
|
||||
|
||||
return [{
|
||||
'id': analysis.id,
|
||||
'website_url': analysis.website_url,
|
||||
'industry': analysis.industry,
|
||||
'analysis_date': analysis.analysis_date,
|
||||
'status': analysis.status
|
||||
} for analysis in analyses]
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
st.error(f"Error retrieving user analyses: {str(e)}")
|
||||
return []
|
||||
|
||||
def update_recommendation_status(self, recommendation_id: int, status: str) -> bool:
|
||||
"""
|
||||
Update the status of a recommendation.
|
||||
|
||||
Args:
|
||||
recommendation_id: Recommendation ID
|
||||
status: New status
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
recommendation = self.db.query(ContentRecommendation).get(recommendation_id)
|
||||
if recommendation:
|
||||
recommendation.status = status
|
||||
recommendation.updated_at = datetime.utcnow()
|
||||
self.db.commit()
|
||||
return True
|
||||
return False
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
self.db.rollback()
|
||||
st.error(f"Error updating recommendation status: {str(e)}")
|
||||
return False
|
||||
|
||||
def delete_analysis(self, analysis_id: int) -> bool:
|
||||
"""
|
||||
Delete an analysis and all related data.
|
||||
|
||||
Args:
|
||||
analysis_id: Analysis ID
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
analysis = self.db.query(ContentGapAnalysis).get(analysis_id)
|
||||
if analysis:
|
||||
self.db.delete(analysis)
|
||||
self.db.commit()
|
||||
return True
|
||||
return False
|
||||
|
||||
except SQLAlchemyError as e:
|
||||
self.db.rollback()
|
||||
st.error(f"Error deleting analysis: {str(e)}")
|
||||
return False
|
||||
@@ -1,291 +0,0 @@
|
||||
"""Website analyzer module for content gap analysis."""
|
||||
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from typing import Dict, Any, List, Optional
|
||||
import asyncio
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer as BaseWebsiteAnalyzer
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/content_gap_website_analyzer.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
class WebsiteAnalyzer(BaseWebsiteAnalyzer):
|
||||
"""Extended website analyzer for content gap analysis."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the website analyzer."""
|
||||
super().__init__()
|
||||
logger.info("ContentGapWebsiteAnalyzer initialized")
|
||||
|
||||
def analyze_content_gaps(self, url: str, competitor_urls: List[str]) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze content gaps between the target website and competitors.
|
||||
|
||||
Args:
|
||||
url: The target URL to analyze
|
||||
competitor_urls: List of competitor URLs to compare against
|
||||
|
||||
Returns:
|
||||
Dictionary containing content gap analysis results
|
||||
"""
|
||||
try:
|
||||
# Analyze target website
|
||||
target_analysis = self.analyze_website(url)
|
||||
if not target_analysis.get('success', False):
|
||||
return {
|
||||
'error': target_analysis.get('error', 'Unknown error in target analysis'),
|
||||
'gaps': [],
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
# Analyze competitor websites
|
||||
competitor_analyses = []
|
||||
for competitor_url in competitor_urls:
|
||||
analysis = self.analyze_website(competitor_url)
|
||||
if analysis.get('success', False):
|
||||
competitor_analyses.append(analysis['data'])
|
||||
|
||||
# Generate content gap analysis using AI
|
||||
prompt = f"""Analyze content gaps between the target website and competitors:
|
||||
|
||||
Target Website:
|
||||
{json.dumps(target_analysis['data'], indent=2)}
|
||||
|
||||
Competitor Websites:
|
||||
{json.dumps(competitor_analyses, indent=2)}
|
||||
|
||||
Identify:
|
||||
1. Missing content topics
|
||||
2. Content depth differences
|
||||
3. Keyword gaps
|
||||
4. Content structure improvements
|
||||
5. Content quality recommendations
|
||||
|
||||
Format the response as JSON with 'gaps' and 'recommendations' keys."""
|
||||
|
||||
# Get AI analysis
|
||||
analysis = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt="You are an SEO expert specializing in content gap analysis.",
|
||||
response_format="json_object"
|
||||
)
|
||||
|
||||
if not analysis:
|
||||
return {
|
||||
'error': 'Failed to generate content gap analysis',
|
||||
'gaps': [],
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
return {
|
||||
'gaps': analysis.get('gaps', []),
|
||||
'recommendations': analysis.get('recommendations', [])
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error analyzing content gaps: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'gaps': [],
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
def analyze(self, url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze a website for content gaps and SEO opportunities.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
Dictionary containing analysis results
|
||||
"""
|
||||
try:
|
||||
# Initialize progress tracking
|
||||
progress = {
|
||||
'status': 'in_progress',
|
||||
'current_stage': 'content_analysis',
|
||||
'current_step': 'Initializing analysis',
|
||||
'progress': 0,
|
||||
'details': 'Starting website analysis...'
|
||||
}
|
||||
self.progress.update(progress)
|
||||
|
||||
# Get base website analysis
|
||||
logger.info("Starting base website analysis")
|
||||
website_analysis = self.analyze_website(url)
|
||||
|
||||
if not website_analysis.get('success', False):
|
||||
error_msg = website_analysis.get('error', 'Unknown error in website analysis')
|
||||
logger.error(f"Error in website analysis: {error_msg}")
|
||||
progress['status'] = 'error'
|
||||
progress['details'] = error_msg
|
||||
self.progress.update(progress)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'error_details': website_analysis.get('error_details', {}),
|
||||
'progress': progress
|
||||
}
|
||||
|
||||
# Extract SEO metrics from the analysis
|
||||
seo_metrics = self._extract_seo_metrics(website_analysis['data'])
|
||||
|
||||
# Extract performance metrics
|
||||
performance_metrics = self._extract_performance_metrics(website_analysis['data'])
|
||||
|
||||
# Update progress
|
||||
progress['status'] = 'completed'
|
||||
progress['progress'] = 100
|
||||
progress['details'] = 'Analysis completed successfully'
|
||||
self.progress.update(progress)
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'data': {
|
||||
'seo_metrics': seo_metrics,
|
||||
'performance_metrics': performance_metrics,
|
||||
'website_analysis': website_analysis['data']
|
||||
},
|
||||
'progress': progress
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in content gap analysis: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
progress['status'] = 'error'
|
||||
progress['details'] = error_msg
|
||||
self.progress.update(progress)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'error_details': {
|
||||
'type': type(e).__name__,
|
||||
'traceback': str(e.__traceback__)
|
||||
},
|
||||
'progress': progress
|
||||
}
|
||||
|
||||
def _extract_seo_metrics(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract SEO-related metrics from website analysis."""
|
||||
try:
|
||||
seo_info = website_analysis.get('analysis', {}).get('seo_info', {})
|
||||
return {
|
||||
'overall_score': seo_info.get('overall_score', 0),
|
||||
'meta_tags': {
|
||||
'title': seo_info.get('meta_tags', {}).get('title', {}),
|
||||
'description': seo_info.get('meta_tags', {}).get('description', {}),
|
||||
'keywords': seo_info.get('meta_tags', {}).get('keywords', {})
|
||||
},
|
||||
'content': {
|
||||
'word_count': seo_info.get('content', {}).get('word_count', 0),
|
||||
'readability_score': seo_info.get('content', {}).get('readability_score', 0),
|
||||
'content_quality_score': seo_info.get('content', {}).get('content_quality_score', 0)
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting SEO metrics: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def _extract_performance_metrics(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract performance metrics from website analysis."""
|
||||
try:
|
||||
performance_info = website_analysis.get('analysis', {}).get('performance', {})
|
||||
return {
|
||||
'load_time': performance_info.get('load_time', 0),
|
||||
'page_size': performance_info.get('page_size', 0),
|
||||
'resource_count': performance_info.get('resource_count', 0),
|
||||
'performance_score': performance_info.get('performance_score', 0)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting performance metrics: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def _extract_content_metrics(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract content-related metrics from website analysis."""
|
||||
try:
|
||||
content_info = website_analysis['analysis']['content_info']
|
||||
return {
|
||||
'word_count': content_info.get('word_count', 0),
|
||||
'heading_count': content_info.get('heading_count', 0),
|
||||
'image_count': content_info.get('image_count', 0),
|
||||
'link_count': content_info.get('link_count', 0),
|
||||
'has_meta_description': content_info.get('has_meta_description', False),
|
||||
'has_robots_txt': content_info.get('has_robots_txt', False),
|
||||
'has_sitemap': content_info.get('has_sitemap', False)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content metrics: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def _extract_technical_info(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Extract technical information from website analysis."""
|
||||
try:
|
||||
basic_info = website_analysis.get('analysis', {}).get('basic_info', {})
|
||||
return {
|
||||
'title': basic_info.get('title', ''),
|
||||
'meta_description': basic_info.get('meta_description', ''),
|
||||
'headers': basic_info.get('headers', {}),
|
||||
'robots_txt': basic_info.get('robots_txt', ''),
|
||||
'sitemap': basic_info.get('sitemap', ''),
|
||||
'server_info': basic_info.get('server_info', {}),
|
||||
'security_info': basic_info.get('security_info', {})
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting technical info: {str(e)}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def _generate_insights(self, content_metrics: Dict[str, Any], seo_metrics: Dict[str, Any]) -> List[str]:
|
||||
"""Generate content insights based on analysis results."""
|
||||
try:
|
||||
insights = []
|
||||
|
||||
# Content insights
|
||||
if content_metrics['word_count'] < 300:
|
||||
insights.append("Content length is below recommended minimum (300 words)")
|
||||
elif content_metrics['word_count'] > 2000:
|
||||
insights.append("Content length is above recommended maximum (2000 words)")
|
||||
|
||||
if content_metrics['heading_count'] < 2:
|
||||
insights.append("Content structure could be improved with more headings")
|
||||
|
||||
if content_metrics['image_count'] == 0:
|
||||
insights.append("Consider adding images to improve content engagement")
|
||||
|
||||
# SEO insights
|
||||
if seo_metrics.get('overall_score', 0) < 60:
|
||||
insights.append("SEO optimization needs significant improvement")
|
||||
elif seo_metrics.get('overall_score', 0) < 80:
|
||||
insights.append("SEO optimization has room for improvement")
|
||||
|
||||
if not content_metrics['has_meta_description']:
|
||||
insights.append("Missing meta description - important for SEO")
|
||||
|
||||
if not content_metrics['has_robots_txt']:
|
||||
insights.append("Missing robots.txt - important for search engine crawling")
|
||||
|
||||
if not content_metrics['has_sitemap']:
|
||||
insights.append("Missing sitemap.xml - important for search engine indexing")
|
||||
|
||||
return insights
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating insights: {str(e)}", exc_info=True)
|
||||
return []
|
||||
@@ -1,160 +0,0 @@
|
||||
"""Content title generator module."""
|
||||
|
||||
import os
|
||||
import json
|
||||
import streamlit as st
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
)
|
||||
from loguru import logger
|
||||
from typing import Dict, Any, List, Optional
|
||||
import asyncio
|
||||
import sys
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/content_title_generator.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
def ai_title_generator(url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate SEO-optimized titles using AI.
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
Dictionary containing title suggestions and analysis
|
||||
"""
|
||||
try:
|
||||
# Initialize analyzer
|
||||
analyzer = WebsiteAnalyzer()
|
||||
|
||||
# Analyze website
|
||||
analysis = analyzer.analyze_website(url)
|
||||
if not analysis.get('success', False):
|
||||
return {
|
||||
'error': analysis.get('error', 'Unknown error in analysis'),
|
||||
'patterns': {},
|
||||
'suggestions': []
|
||||
}
|
||||
|
||||
# Extract content and meta information
|
||||
content_info = analysis['data']['analysis']['content_info']
|
||||
seo_info = analysis['data']['analysis']['seo_info']
|
||||
|
||||
# Generate title suggestions using AI
|
||||
prompt = f"""Based on the following website content and SEO analysis, generate 5 SEO-optimized title suggestions:
|
||||
|
||||
Content Analysis:
|
||||
- Word Count: {content_info.get('word_count', 0)}
|
||||
- Heading Structure: {content_info.get('heading_structure', {})}
|
||||
|
||||
SEO Analysis:
|
||||
- Meta Title: {seo_info.get('meta_tags', {}).get('title', {}).get('value', '')}
|
||||
- Meta Description: {seo_info.get('meta_tags', {}).get('description', {}).get('value', '')}
|
||||
- Keywords: {seo_info.get('meta_tags', {}).get('keywords', {}).get('value', '')}
|
||||
|
||||
Generate 5 title suggestions that are:
|
||||
1. SEO-optimized
|
||||
2. Engaging and click-worthy
|
||||
3. Between 50-60 characters
|
||||
4. Include relevant keywords
|
||||
5. Follow best practices for title optimization
|
||||
|
||||
Format the response as JSON with 'suggestions' and 'patterns' keys."""
|
||||
|
||||
# Get AI suggestions
|
||||
suggestions = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt="You are an SEO expert specializing in title optimization.",
|
||||
response_format="json_object"
|
||||
)
|
||||
|
||||
if not suggestions:
|
||||
return {
|
||||
'error': 'Failed to generate title suggestions',
|
||||
'patterns': {},
|
||||
'suggestions': []
|
||||
}
|
||||
|
||||
return {
|
||||
'patterns': suggestions.get('patterns', {}),
|
||||
'suggestions': suggestions.get('suggestions', [])
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error generating title suggestions: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
'error': error_msg,
|
||||
'patterns': {},
|
||||
'suggestions': []
|
||||
}
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_random_exponential(min=1, max=4))
|
||||
def generate_blog_titles(input_blog_keywords, input_blog_content, input_title_type, input_title_intent, input_language):
|
||||
""" Generate SEO optimized blog titles using AI """
|
||||
if input_blog_content and input_blog_keywords:
|
||||
prompt = f"""As a SEO expert, I will provide you with main 'blog keywords' and 'blog content'.
|
||||
Your task is to write 5 SEO optimized blog titles from the given blog keywords and content.
|
||||
|
||||
Follow the below guidelines for generating the blog titles:
|
||||
1. Follow all best practices for SEO optimized blog titles.
|
||||
2. Optimize your response around the given keywords and content.
|
||||
3. Optimize your response for web search intent {input_title_intent}.
|
||||
4. Optimize your response for blog type {input_title_type}.
|
||||
5. The blog titles should be in {input_language} language.
|
||||
|
||||
Blog keywords: '{input_blog_keywords}'
|
||||
Blog content: '{input_blog_content}'
|
||||
"""
|
||||
elif input_blog_keywords and not input_blog_content:
|
||||
prompt = f"""As a SEO expert, I will provide you with the main 'keywords' of a blog.
|
||||
Your task is to write 5 SEO optimized blog titles from the given blog keywords.
|
||||
|
||||
Follow the below guidelines for generating the blog titles:
|
||||
1. Follow all best practices for SEO optimized blog titles.
|
||||
2. Optimize your response around the given keywords.
|
||||
3. Optimize your response for web search intent {input_title_intent}.
|
||||
4. Optimize your response for blog type {input_title_type}.
|
||||
5. The blog titles should be in {input_language} language.
|
||||
|
||||
Blog keywords: '{input_blog_keywords}'
|
||||
"""
|
||||
elif input_blog_content and not input_blog_keywords:
|
||||
prompt = f"""As a SEO expert, I will provide you with the 'blog content'.
|
||||
Your task is to write 5 SEO optimized blog titles from the given blog content.
|
||||
|
||||
Follow the below guidelines for generating the blog titles:
|
||||
1. Follow all best practices for SEO optimized blog titles.
|
||||
2. Optimize your response around the given content.
|
||||
3. Optimize your response for web search intent {input_title_intent}.
|
||||
4. Optimize your response for blog type {input_title_type}.
|
||||
5. The blog titles should be in {input_language} language.
|
||||
|
||||
Blog content: '{input_blog_content}'
|
||||
"""
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
st.error(f"Exit: Failed to get response from LLM: {err}")
|
||||
@@ -1,115 +0,0 @@
|
||||
from typing import List, Dict, Union
|
||||
#from nltk import tokenize, stem, pos_tag
|
||||
from textblob import TextBlob
|
||||
import enchant
|
||||
|
||||
class TextPreprocessor:
|
||||
def preprocess_text(self, text: str) -> str:
|
||||
# Tokenize the text
|
||||
tokens = tokenize.word_tokenize(text)
|
||||
|
||||
# Stem the tokens
|
||||
stemmer = stem.PorterStemmer()
|
||||
stemmed_tokens = [stemmer.stem(token) for token in tokens]
|
||||
|
||||
# Join the stemmed tokens back into a string
|
||||
preprocessed_text = ' '.join(stemmed_tokens)
|
||||
|
||||
return preprocessed_text
|
||||
|
||||
class SEOAnalyzer:
|
||||
def calculate_seo_percentage(self, text: str, keywords: List[str]) -> float:
|
||||
# Calculate the keyword density
|
||||
keyword_density = self.calculate_keyword_density(text, keywords)
|
||||
|
||||
# Calculate the readability score
|
||||
readability_score = self.calculate_readability_score(text)
|
||||
|
||||
# Perform semantic analysis
|
||||
semantic_score = self.perform_semantic_analysis(text)
|
||||
|
||||
# Calculate the SEO percentage based on the metrics
|
||||
seo_percentage = (keyword_density + readability_score + semantic_score) / 3
|
||||
|
||||
return seo_percentage
|
||||
|
||||
def calculate_keyword_density(self, text: str, keywords: List[str]) -> float:
|
||||
# Count the number of occurrences of each keyword in the text
|
||||
keyword_counts = {keyword: text.lower().count(keyword.lower()) for keyword in keywords}
|
||||
|
||||
# Calculate the total number of words in the text
|
||||
word_count = len(tokenize.word_tokenize(text))
|
||||
|
||||
# Calculate the keyword density
|
||||
keyword_density = sum(keyword_counts.values()) / word_count
|
||||
|
||||
return keyword_density
|
||||
|
||||
def calculate_readability_score(self, text: str) -> float:
|
||||
# Calculate the average number of words per sentence
|
||||
sentences = tokenize.sent_tokenize(text)
|
||||
word_count = sum(len(tokenize.word_tokenize(sentence)) for sentence in sentences)
|
||||
sentence_count = len(sentences)
|
||||
average_words_per_sentence = word_count / sentence_count
|
||||
|
||||
# Calculate the readability score
|
||||
readability_score = 1 / average_words_per_sentence
|
||||
|
||||
return readability_score
|
||||
|
||||
def perform_semantic_analysis(self, text: str) -> float:
|
||||
# Perform part-of-speech tagging on the text
|
||||
tagged_text = pos_tag(tokenize.word_tokenize(text))
|
||||
|
||||
# Calculate the semantic score based on the number of nouns and verbs
|
||||
noun_count = sum(1 for word, pos in tagged_text if pos.startswith('N'))
|
||||
verb_count = sum(1 for word, pos in tagged_text if pos.startswith('V'))
|
||||
semantic_score = (noun_count + verb_count) / len(tagged_text)
|
||||
|
||||
return semantic_score
|
||||
|
||||
class SpellChecker:
|
||||
def check_spelling(self, text: str) -> List[str]:
|
||||
# Create a spellchecker object
|
||||
spellchecker = enchant.Dict("en_US")
|
||||
|
||||
# Tokenize the text
|
||||
tokens = tokenize.word_tokenize(text)
|
||||
|
||||
# Check the spelling of each token
|
||||
misspelled_words = [token for token in tokens if not spellchecker.check(token)]
|
||||
|
||||
return misspelled_words
|
||||
|
||||
class SEOAnalysisModule:
|
||||
def __init__(self):
|
||||
self.text_preprocessor = TextPreprocessor()
|
||||
self.seo_analyzer = SEOAnalyzer()
|
||||
self.spell_checker = SpellChecker()
|
||||
|
||||
def analyze_text(self, text: str, keywords: List[str]) -> Dict[str, Union[float, List[str]]]:
|
||||
# Preprocess the text
|
||||
preprocessed_text = self.text_preprocessor.preprocess_text(text)
|
||||
|
||||
# Calculate the SEO percentage
|
||||
seo_percentage = self.seo_analyzer.calculate_seo_percentage(preprocessed_text, keywords)
|
||||
|
||||
# Calculate the keyword density
|
||||
keyword_density = self.seo_analyzer.calculate_keyword_density(preprocessed_text, keywords)
|
||||
|
||||
# Calculate the readability score
|
||||
readability_score = self.seo_analyzer.calculate_readability_score(preprocessed_text)
|
||||
|
||||
# Perform semantic analysis
|
||||
semantic_score = self.seo_analyzer.perform_semantic_analysis(preprocessed_text)
|
||||
|
||||
# Check the spelling
|
||||
spelling_errors = self.spell_checker.check_spelling(preprocessed_text)
|
||||
|
||||
return {
|
||||
'seo_percentage': seo_percentage,
|
||||
'keyword_density': keyword_density,
|
||||
'readability_score': readability_score,
|
||||
'semantic_score': semantic_score,
|
||||
'spelling_errors': spelling_errors
|
||||
}
|
||||
340
lib/ai_seo_tools/seo_analyzer_api.py
Normal file
340
lib/ai_seo_tools/seo_analyzer_api.py
Normal file
@@ -0,0 +1,340 @@
|
||||
"""
|
||||
FastAPI endpoint for the Comprehensive SEO Analyzer
|
||||
Provides data for the React SEO Dashboard
|
||||
"""
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
from typing import List, Optional, Dict, Any
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
from .comprehensive_seo_analyzer import ComprehensiveSEOAnalyzer, SEOAnalysisResult
|
||||
|
||||
app = FastAPI(
|
||||
title="Comprehensive SEO Analyzer API",
|
||||
description="API for analyzing website SEO performance with actionable insights",
|
||||
version="1.0.0"
|
||||
)
|
||||
|
||||
# Initialize the analyzer
|
||||
seo_analyzer = ComprehensiveSEOAnalyzer()
|
||||
|
||||
class SEOAnalysisRequest(BaseModel):
|
||||
url: HttpUrl
|
||||
target_keywords: Optional[List[str]] = None
|
||||
|
||||
class SEOAnalysisResponse(BaseModel):
|
||||
url: str
|
||||
timestamp: datetime
|
||||
overall_score: int
|
||||
health_status: str
|
||||
critical_issues: List[str]
|
||||
warnings: List[str]
|
||||
recommendations: List[str]
|
||||
data: Dict[str, Any]
|
||||
success: bool
|
||||
message: str
|
||||
|
||||
@app.post("/analyze-seo", response_model=SEOAnalysisResponse)
|
||||
async def analyze_seo(request: SEOAnalysisRequest):
|
||||
"""
|
||||
Analyze a URL for comprehensive SEO performance
|
||||
|
||||
Args:
|
||||
request: SEOAnalysisRequest containing URL and optional target keywords
|
||||
|
||||
Returns:
|
||||
SEOAnalysisResponse with detailed analysis results
|
||||
"""
|
||||
try:
|
||||
# Convert URL to string
|
||||
url_str = str(request.url)
|
||||
|
||||
# Perform analysis
|
||||
result = seo_analyzer.analyze_url(url_str, request.target_keywords)
|
||||
|
||||
# Convert to response format
|
||||
response_data = {
|
||||
'url': result.url,
|
||||
'timestamp': result.timestamp,
|
||||
'overall_score': result.overall_score,
|
||||
'health_status': result.health_status,
|
||||
'critical_issues': result.critical_issues,
|
||||
'warnings': result.warnings,
|
||||
'recommendations': result.recommendations,
|
||||
'data': result.data,
|
||||
'success': True,
|
||||
'message': f"SEO analysis completed successfully for {result.url}"
|
||||
}
|
||||
|
||||
return SEOAnalysisResponse(**response_data)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error analyzing SEO: {str(e)}"
|
||||
)
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint"""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"timestamp": datetime.now(),
|
||||
"service": "Comprehensive SEO Analyzer API"
|
||||
}
|
||||
|
||||
@app.get("/analysis-summary/{url:path}")
|
||||
async def get_analysis_summary(url: str):
|
||||
"""
|
||||
Get a quick summary of SEO analysis for a URL
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
Summary of SEO analysis
|
||||
"""
|
||||
try:
|
||||
# Ensure URL has protocol
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
url = f"https://{url}"
|
||||
|
||||
# Perform analysis
|
||||
result = seo_analyzer.analyze_url(url)
|
||||
|
||||
# Create summary
|
||||
summary = {
|
||||
"url": result.url,
|
||||
"overall_score": result.overall_score,
|
||||
"health_status": result.health_status,
|
||||
"critical_issues_count": len(result.critical_issues),
|
||||
"warnings_count": len(result.warnings),
|
||||
"recommendations_count": len(result.recommendations),
|
||||
"top_issues": result.critical_issues[:3],
|
||||
"top_recommendations": result.recommendations[:3],
|
||||
"analysis_timestamp": result.timestamp.isoformat()
|
||||
}
|
||||
|
||||
return summary
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error getting analysis summary: {str(e)}"
|
||||
)
|
||||
|
||||
@app.get("/seo-metrics/{url:path}")
|
||||
async def get_seo_metrics(url: str):
|
||||
"""
|
||||
Get detailed SEO metrics for dashboard display
|
||||
|
||||
Args:
|
||||
url: The URL to analyze
|
||||
|
||||
Returns:
|
||||
Detailed SEO metrics for React dashboard
|
||||
"""
|
||||
try:
|
||||
# Ensure URL has protocol
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
url = f"https://{url}"
|
||||
|
||||
# Perform analysis
|
||||
result = seo_analyzer.analyze_url(url)
|
||||
|
||||
# Extract metrics for dashboard
|
||||
metrics = {
|
||||
"overall_score": result.overall_score,
|
||||
"health_status": result.health_status,
|
||||
"url_structure_score": result.data.get('url_structure', {}).get('score', 0),
|
||||
"meta_data_score": result.data.get('meta_data', {}).get('score', 0),
|
||||
"content_score": result.data.get('content_analysis', {}).get('score', 0),
|
||||
"technical_score": result.data.get('technical_seo', {}).get('score', 0),
|
||||
"performance_score": result.data.get('performance', {}).get('score', 0),
|
||||
"accessibility_score": result.data.get('accessibility', {}).get('score', 0),
|
||||
"user_experience_score": result.data.get('user_experience', {}).get('score', 0),
|
||||
"security_score": result.data.get('security_headers', {}).get('score', 0)
|
||||
}
|
||||
|
||||
# Add detailed data for each category
|
||||
dashboard_data = {
|
||||
"metrics": metrics,
|
||||
"critical_issues": result.critical_issues,
|
||||
"warnings": result.warnings,
|
||||
"recommendations": result.recommendations,
|
||||
"detailed_analysis": {
|
||||
"url_structure": result.data.get('url_structure', {}),
|
||||
"meta_data": result.data.get('meta_data', {}),
|
||||
"content_analysis": result.data.get('content_analysis', {}),
|
||||
"technical_seo": result.data.get('technical_seo', {}),
|
||||
"performance": result.data.get('performance', {}),
|
||||
"accessibility": result.data.get('accessibility', {}),
|
||||
"user_experience": result.data.get('user_experience', {}),
|
||||
"security_headers": result.data.get('security_headers', {}),
|
||||
"keyword_analysis": result.data.get('keyword_analysis', {})
|
||||
},
|
||||
"timestamp": result.timestamp.isoformat(),
|
||||
"url": result.url
|
||||
}
|
||||
|
||||
return dashboard_data
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error getting SEO metrics: {str(e)}"
|
||||
)
|
||||
|
||||
@app.post("/batch-analyze")
|
||||
async def batch_analyze(urls: List[str]):
|
||||
"""
|
||||
Analyze multiple URLs in batch
|
||||
|
||||
Args:
|
||||
urls: List of URLs to analyze
|
||||
|
||||
Returns:
|
||||
Batch analysis results
|
||||
"""
|
||||
try:
|
||||
results = []
|
||||
|
||||
for url in urls:
|
||||
try:
|
||||
# Ensure URL has protocol
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
url = f"https://{url}"
|
||||
|
||||
# Perform analysis
|
||||
result = seo_analyzer.analyze_url(url)
|
||||
|
||||
# Add to results
|
||||
results.append({
|
||||
"url": result.url,
|
||||
"overall_score": result.overall_score,
|
||||
"health_status": result.health_status,
|
||||
"critical_issues_count": len(result.critical_issues),
|
||||
"warnings_count": len(result.warnings),
|
||||
"success": True
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
# Add error result
|
||||
results.append({
|
||||
"url": url,
|
||||
"overall_score": 0,
|
||||
"health_status": "error",
|
||||
"critical_issues_count": 0,
|
||||
"warnings_count": 0,
|
||||
"success": False,
|
||||
"error": str(e)
|
||||
})
|
||||
|
||||
return {
|
||||
"total_urls": len(urls),
|
||||
"successful_analyses": len([r for r in results if r['success']]),
|
||||
"failed_analyses": len([r for r in results if not r['success']]),
|
||||
"results": results
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error in batch analysis: {str(e)}"
|
||||
)
|
||||
|
||||
# Enhanced prompts for better results
|
||||
ENHANCED_PROMPTS = {
|
||||
"critical_issue": "🚨 CRITICAL: This issue is severely impacting your SEO performance and must be fixed immediately.",
|
||||
"warning": "⚠️ WARNING: This could be improved to boost your search rankings.",
|
||||
"recommendation": "💡 RECOMMENDATION: Implement this to improve your SEO score.",
|
||||
"excellent": "🎉 EXCELLENT: Your SEO is performing very well in this area!",
|
||||
"good": "✅ GOOD: Your SEO is performing well, with room for minor improvements.",
|
||||
"needs_improvement": "🔧 NEEDS IMPROVEMENT: Several areas need attention to boost your SEO.",
|
||||
"poor": "❌ POOR: Significant improvements needed across multiple areas."
|
||||
}
|
||||
|
||||
def enhance_analysis_result(result: SEOAnalysisResult) -> SEOAnalysisResult:
|
||||
"""
|
||||
Enhance analysis results with better prompts and user-friendly language
|
||||
"""
|
||||
# Enhance critical issues
|
||||
enhanced_critical_issues = []
|
||||
for issue in result.critical_issues:
|
||||
enhanced_issue = f"{ENHANCED_PROMPTS['critical_issue']} {issue}"
|
||||
enhanced_critical_issues.append(enhanced_issue)
|
||||
|
||||
# Enhance warnings
|
||||
enhanced_warnings = []
|
||||
for warning in result.warnings:
|
||||
enhanced_warning = f"{ENHANCED_PROMPTS['warning']} {warning}"
|
||||
enhanced_warnings.append(enhanced_warning)
|
||||
|
||||
# Enhance recommendations
|
||||
enhanced_recommendations = []
|
||||
for rec in result.recommendations:
|
||||
enhanced_rec = f"{ENHANCED_PROMPTS['recommendation']} {rec}"
|
||||
enhanced_recommendations.append(enhanced_rec)
|
||||
|
||||
# Create enhanced result
|
||||
enhanced_result = SEOAnalysisResult(
|
||||
url=result.url,
|
||||
timestamp=result.timestamp,
|
||||
overall_score=result.overall_score,
|
||||
health_status=result.health_status,
|
||||
critical_issues=enhanced_critical_issues,
|
||||
warnings=enhanced_warnings,
|
||||
recommendations=enhanced_recommendations,
|
||||
data=result.data
|
||||
)
|
||||
|
||||
return enhanced_result
|
||||
|
||||
@app.post("/analyze-seo-enhanced", response_model=SEOAnalysisResponse)
|
||||
async def analyze_seo_enhanced(request: SEOAnalysisRequest):
|
||||
"""
|
||||
Analyze a URL with enhanced, user-friendly prompts
|
||||
|
||||
Args:
|
||||
request: SEOAnalysisRequest containing URL and optional target keywords
|
||||
|
||||
Returns:
|
||||
SEOAnalysisResponse with enhanced, user-friendly analysis results
|
||||
"""
|
||||
try:
|
||||
# Convert URL to string
|
||||
url_str = str(request.url)
|
||||
|
||||
# Perform analysis
|
||||
result = seo_analyzer.analyze_url(url_str, request.target_keywords)
|
||||
|
||||
# Enhance results
|
||||
enhanced_result = enhance_analysis_result(result)
|
||||
|
||||
# Convert to response format
|
||||
response_data = {
|
||||
'url': enhanced_result.url,
|
||||
'timestamp': enhanced_result.timestamp,
|
||||
'overall_score': enhanced_result.overall_score,
|
||||
'health_status': enhanced_result.health_status,
|
||||
'critical_issues': enhanced_result.critical_issues,
|
||||
'warnings': enhanced_result.warnings,
|
||||
'recommendations': enhanced_result.recommendations,
|
||||
'data': enhanced_result.data,
|
||||
'success': True,
|
||||
'message': f"Enhanced SEO analysis completed successfully for {enhanced_result.url}"
|
||||
}
|
||||
|
||||
return SEOAnalysisResponse(**response_data)
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"Error analyzing SEO: {str(e)}"
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
@@ -1,98 +0,0 @@
|
||||
import streamlit as st
|
||||
import openai
|
||||
import os
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
|
||||
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
def scrape_url_content(url):
|
||||
"""
|
||||
Scrapes the content from the provided URL.
|
||||
|
||||
Args:
|
||||
url (str): The URL to scrape content from.
|
||||
|
||||
Returns:
|
||||
str: The extracted text content from the webpage.
|
||||
"""
|
||||
# FIXME: Use firecrawl metadata option for this.
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
text = ' '.join([p.text for p in soup.find_all('p')])
|
||||
return text
|
||||
except requests.exceptions.RequestException as e:
|
||||
st.error(f"Error fetching the URL content: {e}")
|
||||
return ""
|
||||
except Exception as e:
|
||||
st.error(f"Error parsing the HTML content: {e}")
|
||||
return ""
|
||||
|
||||
def generate_twitter_tags(topic, scraped_content=""):
|
||||
"""
|
||||
Generates a list of relevant Twitter hashtags based on the topic and optional scraped content.
|
||||
|
||||
Args:
|
||||
topic (str): The main topic or key phrase.
|
||||
scraped_content (str): Optional scraped content to add more context.
|
||||
|
||||
Returns:
|
||||
str: A list of Twitter hashtags as a string.
|
||||
"""
|
||||
prompt = f"Generate a list of highly relevant and trending Twitter hashtags based on the topic '{topic}'"
|
||||
|
||||
if scraped_content:
|
||||
prompt += f" and the following content: {scraped_content[:700]}..." # Limit content to keep prompt manageable.
|
||||
|
||||
prompt += " Make sure the hashtags are popular and relevant to the topic. Follow Latest best practices for twitter tags."
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
st.error(f"Failed to generate Open Graph tags: {err}")
|
||||
return None
|
||||
|
||||
|
||||
def display_app():
|
||||
"""
|
||||
Displays the Streamlit app UI and handles user interactions.
|
||||
"""
|
||||
st.title("AI Twitter Tag Generator")
|
||||
|
||||
st.write(
|
||||
"Generate trending and highly relevant Twitter tags with minimal input. "
|
||||
"Optionally, provide a URL to make the tags even more targeted."
|
||||
)
|
||||
|
||||
# User Inputs
|
||||
topic = st.text_input(
|
||||
"Enter the topic or key phrase for Twitter tags",
|
||||
placeholder="e.g., AI in marketing"
|
||||
)
|
||||
|
||||
url = st.text_input(
|
||||
"Optional: Enter a URL to scrape for more targeted tags",
|
||||
placeholder="e.g., https://example.com/article"
|
||||
)
|
||||
|
||||
if topic:
|
||||
if url:
|
||||
with st.spinner("Scraping content from the provided URL..."):
|
||||
scraped_content = scrape_url_content(url)
|
||||
if not scraped_content:
|
||||
st.info("No content could be extracted from the provided URL.")
|
||||
else:
|
||||
scraped_content = ""
|
||||
|
||||
if st.button("Generate Twitter Tags"):
|
||||
with st.spinner("Generating Twitter tags..."):
|
||||
tags = generate_twitter_tags(topic, scraped_content)
|
||||
if tags:
|
||||
st.success("Twitter tags generated successfully!")
|
||||
st.write(tags)
|
||||
else:
|
||||
st.info("Please enter a topic or key phrase to generate Twitter tags.")
|
||||
@@ -1,116 +0,0 @@
|
||||
"""Webpage content analysis tool."""
|
||||
|
||||
import streamlit as st
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
import nltk
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.util import ngrams
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import ConversationChain
|
||||
from urllib.parse import urlparse
|
||||
|
||||
st.title("🧠 Web Content Analyzer: Uncover Hidden Insights with AI! 🧠")
|
||||
st.write("""
|
||||
Welcome! This tool leverages the power of AI to analyze your web page's content. It goes beyond just keywords -
|
||||
we'll use cutting-edge technology to uncover valuable insights and unlock new ways to boost your website!
|
||||
""")
|
||||
|
||||
# --- User Input ---
|
||||
|
||||
url_input = st.text_input("Paste your URL here:", "https://www.example.com/")
|
||||
language_input = st.selectbox("What language is your content?", ('English', 'Italian', 'Albanian'))
|
||||
st.write(" ")
|
||||
|
||||
# --- AI Model Setup ---
|
||||
|
||||
llm = OpenAI(temperature=0.7)
|
||||
conversation_chain = ConversationChain(llm=llm)
|
||||
|
||||
# --- Analyze Button & Processing ---
|
||||
|
||||
if st.button("Analyze with AI!"):
|
||||
with st.spinner('Analyzing your content...'):
|
||||
url = url_input.strip()
|
||||
language = language_input.lower()
|
||||
|
||||
if not url.startswith("http"):
|
||||
st.error("Oops! Looks like you forgot 'http://' or 'https://' at the beginning of your URL. Please add it and try again! 😊")
|
||||
st.stop()
|
||||
|
||||
try:
|
||||
# Validate URL
|
||||
parsed_url = urlparse(url)
|
||||
if not parsed_url.scheme:
|
||||
url = "https://" + url
|
||||
|
||||
# Fetch webpage content
|
||||
response = requests.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse HTML
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Extract content
|
||||
title = soup.title.string if soup.title else "No title found"
|
||||
meta_description = soup.find('meta', {'name': 'description'})
|
||||
description = meta_description['content'] if meta_description else "No description found"
|
||||
|
||||
# Display results
|
||||
st.subheader("Page Analysis")
|
||||
st.metric("Title", title)
|
||||
st.metric("Description", description)
|
||||
|
||||
# Content statistics
|
||||
text_content = soup.get_text()
|
||||
words = text_content.split()
|
||||
st.metric("Word Count", len(words))
|
||||
st.metric("Unique Words", len(set(words)))
|
||||
|
||||
# Frequency analysis (same as before)
|
||||
freq = nltk.FreqDist(words)
|
||||
keywords = freq.most_common(10)
|
||||
df_keywords = pd.DataFrame(keywords, columns=("Keyword", "Frequency"))
|
||||
|
||||
# --- AI-Powered Insights ---
|
||||
st.subheader("AI Insights:")
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("**Main Theme:**")
|
||||
ai_theme = conversation_chain.run(f"What is the main theme or topic of this content? \n {text_content}")
|
||||
st.markdown(f" {ai_theme}")
|
||||
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("**Suggested Keywords:**")
|
||||
ai_keywords = conversation_chain.run(f"What other relevant keywords might be helpful to target for this content? \n {text_content}")
|
||||
st.markdown(f" {ai_keywords}")
|
||||
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("**Content Improvement:**")
|
||||
ai_improvement = conversation_chain.run(f"What could be done to improve this content for clarity, engagement, or SEO? \n {text_content}")
|
||||
st.markdown(f" {ai_improvement}")
|
||||
|
||||
# --- Display Frequency Results ---
|
||||
st.write(" ")
|
||||
|
||||
st.subheader("Top Keywords:")
|
||||
st.write(" ")
|
||||
st.dataframe(df_keywords)
|
||||
|
||||
st.subheader("What's the Value of This AI Analysis?")
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("""
|
||||
* **Uncover Hidden Insights:** AI can analyze your content in much more nuanced ways, helping you spot connections and trends you might have missed.
|
||||
* **Go Beyond Keywords:** AI can provide in-depth insights into your content's main themes, tone, and even suggest relevant topics to explore further.
|
||||
* **AI as a Partner:** Think of this AI as your content strategist, offering guidance and actionable steps to make your content even better.
|
||||
|
||||
Ready to leverage the power of AI to optimize your content? Start putting the suggestions and insights you just received into practice. See what difference AI can make in your writing! 🚀
|
||||
""")
|
||||
except requests.exceptions.RequestException as e:
|
||||
st.error(f"Oops! Something went wrong fetching the URL. Error: {e}")
|
||||
except Exception as e:
|
||||
st.error(f"An error occurred: {e}")
|
||||
@@ -1,377 +0,0 @@
|
||||
import streamlit as st
|
||||
import advertools as adv
|
||||
import pandas as pd
|
||||
from urllib.parse import urlparse
|
||||
import requests
|
||||
from datetime import datetime
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
|
||||
# Title and introduction
|
||||
def show_title_and_intro():
|
||||
st.title("🌟 URL SEO Checkup: Your Link's Health Report 🌟")
|
||||
st.write("""
|
||||
Welcome to the URL SEO Checkup! This tool is like a doctor for your website links.
|
||||
Just paste your URL, and we'll check if it's healthy and ready to climb the search engine ladder.
|
||||
""")
|
||||
|
||||
|
||||
# Basic HTTPS Check
|
||||
def check_https(url):
|
||||
st.subheader("The Basics - Are We Looking Good?")
|
||||
st.write("---")
|
||||
|
||||
if url.startswith("https://"):
|
||||
st.success("✨ You're using HTTPS! This adds extra security, and Google rewards that with better rankings. Keep it up! ✨")
|
||||
else:
|
||||
st.warning("🚧 Heads Up! Your URL doesn't use 'https://'. This is a red flag for Google.")
|
||||
st.info("🔧 **How to fix:** Contact your hosting provider or website developer to install an SSL certificate. This will secure your site with HTTPS.")
|
||||
|
||||
|
||||
# URL Length Check
|
||||
def check_url_length(path):
|
||||
st.subheader("The Length Test - Keep it Short and Sweet!")
|
||||
st.write("---")
|
||||
|
||||
if len(path) <= 50:
|
||||
st.success("🏆 Great! Your URL is short and user-friendly. Google loves short URLs! 🏆")
|
||||
else:
|
||||
st.warning("🧭 Tip: Try shortening your URL. Shorter URLs are easier to remember and better for SEO.")
|
||||
st.info("🔧 **How to fix:** Consider removing unnecessary words or folders in the URL. Aim for concise, descriptive URLs that are easy for users to read.")
|
||||
|
||||
|
||||
# Hyphen Check
|
||||
def check_hyphens(path):
|
||||
st.subheader("The Hyphen Check - Use Hyphens for Clear Separation!")
|
||||
st.write("---")
|
||||
|
||||
if "-" in path:
|
||||
st.success("😎 You're on the right track! Using hyphens makes your URL more readable for both users and Google. 😎")
|
||||
else:
|
||||
st.warning("❓ Did you know? Using hyphens between words (like 'shoes-for-sale') helps Google understand your URL better!")
|
||||
st.info("🔧 **How to fix:** Update your URL to use hyphens (-) instead of spaces or underscores (_). For example, 'shoes-for-sale' instead of 'shoes_for_sale'.")
|
||||
|
||||
|
||||
# File Extension Check
|
||||
def check_file_extension(path):
|
||||
st.subheader("File Extension Check - Showing Your Files With Pride!")
|
||||
st.write("---")
|
||||
|
||||
if "." in path:
|
||||
st.success("🥳 File Extension Check: Your URL includes a file extension like '.html', which helps Google categorize your page. Nice job! 🥳")
|
||||
else:
|
||||
st.warning("🤔 Your URL seems to be missing a file extension like '.html' or '.php'.")
|
||||
st.info("🔧 **How to fix:** While file extensions are not always required, adding them to static pages (like .html or .php) can improve clarity for search engines.")
|
||||
|
||||
|
||||
# Keyword Insights
|
||||
def show_keyword_insights(netloc, path):
|
||||
st.subheader("Bonus Insight - Let's Talk Keywords")
|
||||
st.write("---")
|
||||
|
||||
st.info("Keywords are the words people use to search for information online. Your goal is to help Google understand what your page is about by using the right keywords in your URL!")
|
||||
|
||||
st.markdown(f"""
|
||||
**Your Domain:** {netloc}
|
||||
**Your URL Path:** {path}
|
||||
|
||||
**Suggestion:** Consider adding a primary keyword to your URL if it aligns with your page content. But don't overdo it – too many keywords can hurt your SEO. Keep it natural!
|
||||
""")
|
||||
|
||||
|
||||
# Enhanced HTTP Headers Analysis using advertools
|
||||
def analyze_http_headers(url):
|
||||
"""Analyze HTTP headers using advertools for comprehensive SEO insights."""
|
||||
st.subheader("🔍 Advanced HTTP Headers Analysis")
|
||||
st.write("---")
|
||||
|
||||
try:
|
||||
with st.spinner("Analyzing HTTP headers..."):
|
||||
# Create a temporary file for output
|
||||
with tempfile.NamedTemporaryFile(mode='w', suffix='.jl', delete=False) as tmp_file:
|
||||
temp_filename = tmp_file.name
|
||||
|
||||
# Use advertools to crawl headers
|
||||
adv.crawl_headers([url], temp_filename)
|
||||
|
||||
# Read the results
|
||||
headers_df = pd.read_json(temp_filename, lines=True)
|
||||
|
||||
# Clean up temp file
|
||||
os.unlink(temp_filename)
|
||||
|
||||
if not headers_df.empty:
|
||||
# Display key SEO-relevant headers
|
||||
st.success("✅ Successfully analyzed HTTP headers!")
|
||||
|
||||
# Create tabs for different header categories
|
||||
tab1, tab2, tab3, tab4 = st.tabs(["🔒 Security", "📈 SEO Headers", "⚡ Performance", "📊 Technical Details"])
|
||||
|
||||
with tab1:
|
||||
st.write("### Security Headers Analysis")
|
||||
security_headers = {
|
||||
'resp_headers_X-Frame-Options': 'X-Frame-Options',
|
||||
'resp_headers_X-Content-Type-Options': 'X-Content-Type-Options',
|
||||
'resp_headers_X-XSS-Protection': 'X-XSS-Protection',
|
||||
'resp_headers_Strict-Transport-Security': 'Strict-Transport-Security',
|
||||
'resp_headers_Content-Security-Policy': 'Content-Security-Policy',
|
||||
'resp_headers_Referrer-Policy': 'Referrer-Policy'
|
||||
}
|
||||
|
||||
for header_key, header_name in security_headers.items():
|
||||
if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
|
||||
st.success(f"✅ **{header_name}**: Present")
|
||||
with st.expander(f"View {header_name} Details"):
|
||||
st.code(headers_df[header_key].iloc[0])
|
||||
else:
|
||||
st.warning(f"⚠️ **{header_name}**: Missing")
|
||||
st.info(f"💡 **Recommendation**: Add {header_name} header for better security")
|
||||
|
||||
with tab2:
|
||||
st.write("### SEO-Related Headers")
|
||||
seo_headers = {
|
||||
'resp_headers_Content-Type': 'Content-Type',
|
||||
'resp_headers_Content-Language': 'Content-Language',
|
||||
'resp_headers_Cache-Control': 'Cache-Control',
|
||||
'resp_headers_Expires': 'Expires',
|
||||
'resp_headers_Last-Modified': 'Last-Modified',
|
||||
'resp_headers_ETag': 'ETag'
|
||||
}
|
||||
|
||||
for header_key, header_name in seo_headers.items():
|
||||
if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
|
||||
st.success(f"✅ **{header_name}**: {headers_df[header_key].iloc[0]}")
|
||||
else:
|
||||
st.info(f"ℹ️ **{header_name}**: Not set or not detected")
|
||||
|
||||
# Special handling for content-type
|
||||
if 'resp_headers_Content-Type' in headers_df.columns:
|
||||
content_type = headers_df['resp_headers_Content-Type'].iloc[0]
|
||||
if 'text/html' in str(content_type):
|
||||
st.success("🎯 **Content-Type**: Properly set for HTML content")
|
||||
if 'charset=utf-8' in str(content_type):
|
||||
st.success("🌍 **Character Encoding**: UTF-8 detected - Great for international SEO!")
|
||||
|
||||
with tab3:
|
||||
st.write("### Performance Headers")
|
||||
perf_headers = {
|
||||
'resp_headers_Server': 'Server',
|
||||
'resp_headers_X-Powered-By': 'X-Powered-By',
|
||||
'resp_headers_Connection': 'Connection',
|
||||
'resp_headers_Transfer-Encoding': 'Transfer-Encoding',
|
||||
'resp_headers_Content-Encoding': 'Content-Encoding',
|
||||
'resp_headers_Content-Length': 'Content-Length'
|
||||
}
|
||||
|
||||
for header_key, header_name in perf_headers.items():
|
||||
if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
|
||||
st.info(f"📊 **{header_name}**: {headers_df[header_key].iloc[0]}")
|
||||
|
||||
# Check for compression
|
||||
if 'resp_headers_Content-Encoding' in headers_df.columns:
|
||||
encoding = headers_df['resp_headers_Content-Encoding'].iloc[0]
|
||||
if 'gzip' in str(encoding) or 'br' in str(encoding):
|
||||
st.success("🚀 **Compression**: Enabled - Great for page speed!")
|
||||
else:
|
||||
st.warning("⚠️ **Compression**: Consider enabling GZIP or Brotli compression")
|
||||
else:
|
||||
st.warning("⚠️ **Compression**: Not detected - Consider enabling compression")
|
||||
|
||||
# Check status code
|
||||
if 'status' in headers_df.columns:
|
||||
status = headers_df['status'].iloc[0]
|
||||
if status == 200:
|
||||
st.success(f"✅ **HTTP Status**: {status} OK")
|
||||
else:
|
||||
st.warning(f"⚠️ **HTTP Status**: {status}")
|
||||
|
||||
with tab4:
|
||||
st.write("### Complete Headers Analysis")
|
||||
|
||||
# Show response headers only (more relevant for SEO)
|
||||
response_headers = {col: col.replace('resp_headers_', '') for col in headers_df.columns if col.startswith('resp_headers_')}
|
||||
if response_headers:
|
||||
st.write("**Response Headers:**")
|
||||
for col, display_name in response_headers.items():
|
||||
if not pd.isna(headers_df[col].iloc[0]):
|
||||
st.write(f"**{display_name}**: `{headers_df[col].iloc[0]}`")
|
||||
|
||||
# Show crawl metadata
|
||||
st.write("**Crawl Information:**")
|
||||
metadata_cols = ['url', 'status', 'crawl_time', 'download_latency']
|
||||
for col in metadata_cols:
|
||||
if col in headers_df.columns:
|
||||
st.write(f"**{col.replace('_', ' ').title()}**: `{headers_df[col].iloc[0]}`")
|
||||
|
||||
# Download option
|
||||
csv = headers_df.to_csv(index=False)
|
||||
st.download_button(
|
||||
label="📥 Download Complete Headers Data as CSV",
|
||||
data=csv,
|
||||
file_name=f"headers_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
|
||||
mime="text/csv"
|
||||
)
|
||||
|
||||
else:
|
||||
st.error("❌ Could not retrieve headers data")
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"❌ Error analyzing headers: {str(e)}")
|
||||
st.info("💡 **Tip**: Make sure the URL is accessible and try again")
|
||||
|
||||
|
||||
# Enhanced robots.txt and sitemap detection
|
||||
def check_robots_and_sitemap(url):
|
||||
"""Check for robots.txt and sitemap files."""
|
||||
st.subheader("🤖 Robots.txt & Sitemap Detection")
|
||||
st.write("---")
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||
|
||||
# Check robots.txt
|
||||
try:
|
||||
robots_url = f"{base_url}/robots.txt"
|
||||
response = requests.get(robots_url, timeout=10)
|
||||
if response.status_code == 200:
|
||||
st.success(f"✅ **Robots.txt found**: {robots_url}")
|
||||
with st.expander("View robots.txt content"):
|
||||
st.code(response.text[:1000]) # Show first 1000 characters
|
||||
else:
|
||||
st.warning(f"⚠️ **Robots.txt not found**: Consider creating one at {robots_url}")
|
||||
except:
|
||||
st.error("❌ Could not check robots.txt")
|
||||
|
||||
# Check common sitemap locations
|
||||
sitemap_locations = [
|
||||
f"{base_url}/sitemap.xml",
|
||||
f"{base_url}/sitemap_index.xml",
|
||||
f"{base_url}/sitemaps.xml"
|
||||
]
|
||||
|
||||
sitemap_found = False
|
||||
for sitemap_url in sitemap_locations:
|
||||
try:
|
||||
response = requests.get(sitemap_url, timeout=10)
|
||||
if response.status_code == 200:
|
||||
st.success(f"✅ **Sitemap found**: {sitemap_url}")
|
||||
sitemap_found = True
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
if not sitemap_found:
|
||||
st.warning("⚠️ **Sitemap not found**: Consider creating an XML sitemap")
|
||||
st.info("💡 **Recommendation**: Submit your sitemap to Google Search Console")
|
||||
|
||||
|
||||
# Enhanced URL structure analysis
|
||||
def enhanced_url_analysis(url):
|
||||
"""Provide enhanced URL structure analysis."""
|
||||
st.subheader("🔗 Enhanced URL Structure Analysis")
|
||||
st.write("---")
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
|
||||
# URL components analysis
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.write("**URL Components:**")
|
||||
st.info(f"**Protocol**: {parsed_url.scheme}")
|
||||
st.info(f"**Domain**: {parsed_url.netloc}")
|
||||
st.info(f"**Path**: {parsed_url.path}")
|
||||
if parsed_url.query:
|
||||
st.info(f"**Query**: {parsed_url.query}")
|
||||
if parsed_url.fragment:
|
||||
st.info(f"**Fragment**: {parsed_url.fragment}")
|
||||
|
||||
with col2:
|
||||
st.write("**SEO Analysis:**")
|
||||
|
||||
# URL length analysis
|
||||
url_length = len(url)
|
||||
if url_length <= 60:
|
||||
st.success(f"✅ **URL Length**: {url_length} characters (Excellent)")
|
||||
elif url_length <= 100:
|
||||
st.warning(f"⚠️ **URL Length**: {url_length} characters (Good, but could be shorter)")
|
||||
else:
|
||||
st.error(f"❌ **URL Length**: {url_length} characters (Too long)")
|
||||
|
||||
# Path depth analysis
|
||||
path_segments = [seg for seg in parsed_url.path.split('/') if seg]
|
||||
depth = len(path_segments)
|
||||
if depth <= 3:
|
||||
st.success(f"✅ **URL Depth**: {depth} levels (Good)")
|
||||
else:
|
||||
st.warning(f"⚠️ **URL Depth**: {depth} levels (Consider flattening)")
|
||||
|
||||
# Special characters check
|
||||
special_chars = set(url) - set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~:/?#[]@!$&\'()*+,;=')
|
||||
if not special_chars:
|
||||
st.success("✅ **Special Characters**: Clean URL structure")
|
||||
else:
|
||||
st.warning(f"⚠️ **Special Characters**: Found {len(special_chars)} special characters")
|
||||
|
||||
|
||||
# Enhanced main function to run the analysis
|
||||
def run_analysis(url):
|
||||
# Parse the URL
|
||||
parsed_url = urlparse(url)
|
||||
netloc = parsed_url.netloc # Domain name
|
||||
path = parsed_url.path # Path after the domain
|
||||
|
||||
# Run existing checks
|
||||
check_https(url)
|
||||
check_url_length(path)
|
||||
check_hyphens(path)
|
||||
check_file_extension(path)
|
||||
|
||||
# Add new enhanced analyses
|
||||
enhanced_url_analysis(url)
|
||||
analyze_http_headers(url)
|
||||
check_robots_and_sitemap(url)
|
||||
|
||||
# Keep existing keyword insights
|
||||
show_keyword_insights(netloc, path)
|
||||
|
||||
# Add summary section
|
||||
st.subheader("📋 Analysis Summary & Recommendations")
|
||||
st.write("---")
|
||||
st.success("🎉 **Analysis Complete!** Review the findings above and implement the recommendations for better SEO performance.")
|
||||
|
||||
recommendations = [
|
||||
"✅ Ensure HTTPS is enabled for security and SEO benefits",
|
||||
"🔗 Keep URLs short, descriptive, and user-friendly",
|
||||
"🔒 Implement security headers to protect your site",
|
||||
"🤖 Create and maintain robots.txt and XML sitemaps",
|
||||
"⚡ Enable compression and optimize HTTP headers for performance",
|
||||
"📊 Monitor your URL structure and avoid excessive depth"
|
||||
]
|
||||
|
||||
st.write("**Key Recommendations:**")
|
||||
for rec in recommendations:
|
||||
st.write(rec)
|
||||
|
||||
|
||||
# Display the app
|
||||
def url_seo_checker():
|
||||
show_title_and_intro()
|
||||
|
||||
# User input for URL
|
||||
url_input = st.text_input("Paste your URL here:", "https://www.example.com/")
|
||||
st.write(" ") # Add spacing
|
||||
|
||||
# When the analyze button is clicked
|
||||
if st.button("Let's Analyze!"):
|
||||
with st.spinner('Checking your link...'):
|
||||
url = url_input.strip() # Clean up the input
|
||||
|
||||
# Validate URL format
|
||||
if not url.startswith(("http://", "https://")):
|
||||
st.error("Oops! It seems like your URL needs 'http://' or 'https://' at the beginning. Please add it!")
|
||||
st.stop()
|
||||
|
||||
# Run the analysis
|
||||
run_analysis(url)
|
||||
@@ -1,113 +0,0 @@
|
||||
"""Word cloud generation tool."""
|
||||
|
||||
import streamlit as st
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
import nltk
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.util import ngrams
|
||||
from wordcloud import WordCloud
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
st.title("🔎 Web Content Analyzer: Uncover Your Words' Power! 🔎")
|
||||
st.write("""
|
||||
Welcome! This tool helps you understand the words that drive your website content. Just paste in your web page's
|
||||
URL, and we'll give you insights you can use to improve your content and reach more people!
|
||||
""")
|
||||
|
||||
url_input = st.text_input("Paste your URL here:", "https://www.example.com/")
|
||||
language_input = st.selectbox("What language is your content?", ('English', 'Italian', 'Albanian'))
|
||||
num_results_input = st.slider("How many top words/phrases should we show?", min_value=10, max_value=150, value=50)
|
||||
st.write(" ")
|
||||
|
||||
authorized_domains = ["example.com", "another-example.com"]
|
||||
|
||||
if st.button("Analyze Your Content!"):
|
||||
with st.spinner('Analyzing your content...'):
|
||||
url = url_input.strip()
|
||||
language = language_input.lower()
|
||||
num_results = num_results_input
|
||||
|
||||
if not url.startswith("http"):
|
||||
st.error("Oops! Looks like you forgot 'http://' or 'https://' at the beginning of your URL. Please add it and try again! 😊")
|
||||
st.stop()
|
||||
|
||||
from urllib.parse import urlparse
|
||||
parsed_url = urlparse(url)
|
||||
if parsed_url.netloc not in authorized_domains:
|
||||
st.error("The domain of the provided URL is not authorized. Please use an authorized domain.")
|
||||
st.stop()
|
||||
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response.raise_for_status() # Check for errors
|
||||
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
body_txt = soup.find('body').text
|
||||
|
||||
words = [w.lower() for w in word_tokenize(body_txt)]
|
||||
stopw = nltk.corpus.stopwords.words(language)
|
||||
|
||||
final_words = [w for w in words if w not in stopw and w.isalpha()]
|
||||
|
||||
# Frequency analysis
|
||||
freq = nltk.FreqDist(final_words)
|
||||
keywords = freq.most_common(num_results)
|
||||
|
||||
bigrams = ngrams(final_words, 2)
|
||||
freq_bigrams = nltk.FreqDist(bigrams)
|
||||
bigrams_freq = freq_bigrams.most_common(num_results)
|
||||
|
||||
# Create DataFrames for Display
|
||||
df_keywords = pd.DataFrame(keywords, columns=("Keyword", "Frequency"))
|
||||
df_bigrams = pd.DataFrame(bigrams_freq, columns=("Bigram", "Frequency"))
|
||||
|
||||
st.subheader("Top Keywords and Phrases:")
|
||||
st.write(" ")
|
||||
st.dataframe(df_keywords)
|
||||
|
||||
st.write(" ")
|
||||
|
||||
st.subheader("Top Two-Word Phrases:")
|
||||
st.write(" ")
|
||||
st.dataframe(df_bigrams)
|
||||
|
||||
st.write(" ")
|
||||
st.subheader("What's the Value of This Analysis?")
|
||||
st.write(" ")
|
||||
|
||||
st.markdown("""
|
||||
* **See What Resonates:** Discover the most popular words and phrases used on your website. This can reveal themes and topics that your audience is interested in.
|
||||
* **Find Keywords for SEO:** The analysis helps identify relevant keywords you could use for your website content and marketing efforts.
|
||||
* **Improve Your Content:** You can understand how people might search for similar content and ensure you're providing the right keywords.
|
||||
* **Stand Out:** Compare your results to other websites or competitors to understand how you can differentiate your content.
|
||||
|
||||
Ready to dive deeper into your content's vocabulary? Start by making some of the keywords you just discovered the stars of your next blog post or social media message. You might be surprised at the impact! 🚀
|
||||
""")
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
st.error(f"Oops! Something went wrong fetching the URL. Error: {e}")
|
||||
|
||||
def generate_wordcloud(text):
|
||||
"""Generate a word cloud from the given text."""
|
||||
if not text:
|
||||
st.warning("Please enter some text to generate a word cloud.")
|
||||
return
|
||||
|
||||
# Create and generate a word cloud image
|
||||
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
|
||||
|
||||
# Display the word cloud
|
||||
st.subheader("Word Cloud Visualization")
|
||||
fig, ax = plt.subplots(figsize=(10, 5))
|
||||
ax.imshow(wordcloud, interpolation='bilinear')
|
||||
ax.axis('off')
|
||||
st.pyplot(fig)
|
||||
|
||||
# Add some statistics
|
||||
st.subheader("Text Statistics")
|
||||
words = text.split()
|
||||
unique_words = set(words)
|
||||
st.metric("Total Words", len(words))
|
||||
st.metric("Unique Words", len(unique_words))
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
import streamlit as st
|
||||
from google import genai
|
||||
import google.genai as genai
|
||||
from google.genai import types
|
||||
from google.genai.types import Tool, GenerateContentConfig, GoogleSearch
|
||||
|
||||
# Set page config
|
||||
|
||||
@@ -1,99 +0,0 @@
|
||||
# Content Generation Dashboard
|
||||
|
||||
## Overview
|
||||
The Content Generation Dashboard is a central hub for ALwrity's content creation tools, providing an intuitive interface for accessing various AI-powered content generation capabilities.
|
||||
|
||||
## Features
|
||||
|
||||
### 1. Modality-Based Organization
|
||||
- **Text Generation**
|
||||
- Blog Writing
|
||||
- Story Creation
|
||||
- Product Descriptions
|
||||
- News Articles
|
||||
- Long-form Content
|
||||
|
||||
- **Social Media**
|
||||
- Instagram Posts
|
||||
- LinkedIn Content
|
||||
- YouTube Scripts
|
||||
|
||||
- **Image Generation**
|
||||
- AI Image Creation
|
||||
- Visual Content Tools
|
||||
|
||||
- **Audio/Video**
|
||||
- Speech to Blog
|
||||
- Audio Transcription
|
||||
|
||||
### 2. Smart Navigation
|
||||
- Quick access to recently used tools
|
||||
- Favorite tools management
|
||||
- Hierarchical navigation structure
|
||||
- Minimal-click access to tools
|
||||
|
||||
### 3. Error Handling
|
||||
- Custom exception handling
|
||||
- User-friendly error messages
|
||||
- Automatic error recovery
|
||||
- Detailed error logging
|
||||
|
||||
### 4. State Management
|
||||
- Persistent tool states
|
||||
- Usage analytics tracking
|
||||
- Performance monitoring
|
||||
- Session management
|
||||
|
||||
## Architecture
|
||||
|
||||
### Core Components
|
||||
1. **Dashboard UI (`dashboard.py`)**
|
||||
- Main interface rendering
|
||||
- Tool card management
|
||||
- Navigation controls
|
||||
- User interaction handling
|
||||
|
||||
2. **State Manager (`state_manager.py`)**
|
||||
- Tool state tracking
|
||||
- Usage metrics collection
|
||||
- State persistence
|
||||
- Navigation history
|
||||
|
||||
3. **Error Handler (`error_handler.py`)**
|
||||
- Custom exceptions
|
||||
- Error logging
|
||||
- Recovery mechanisms
|
||||
- User feedback
|
||||
|
||||
## Implementation Status
|
||||
|
||||
### Completed Features
|
||||
- ✅ Basic dashboard layout
|
||||
- ✅ Tool card implementation
|
||||
- ✅ Error handling system
|
||||
- ✅ State management
|
||||
- ✅ Navigation structure
|
||||
|
||||
### In Progress
|
||||
- 🔄 Performance optimization
|
||||
- 🔄 User analytics integration
|
||||
- 🔄 Tool loading improvements
|
||||
|
||||
### Planned Features
|
||||
- ⏳ Advanced error recovery
|
||||
- ⏳ Tool usage suggestions
|
||||
- ⏳ Accessibility improvements
|
||||
- ⏳ Performance monitoring
|
||||
|
||||
## Usage
|
||||
|
||||
### For Users
|
||||
1. Access the dashboard through ALwrity's main interface
|
||||
2. Select desired content generation modality
|
||||
3. Choose specific tool from available options
|
||||
4. Follow tool-specific workflows
|
||||
|
||||
### For Developers
|
||||
1. Error Handling:
|
||||
```python
|
||||
from content_generation.error_handler import DashboardError
|
||||
@@ -1,629 +0,0 @@
|
||||
import streamlit as st
|
||||
from typing import Dict, List
|
||||
from functools import lru_cache
|
||||
from datetime import datetime
|
||||
from loguru import logger
|
||||
|
||||
# Import all necessary AI writer functions
|
||||
from lib.ai_writers.ai_blog_writer.ai_blog_generator import ai_blog_writer_page
|
||||
from lib.ai_writers.ai_essay_writer import ai_essay_generator
|
||||
from lib.ai_writers.ai_news_article_writer import ai_news_generation
|
||||
from lib.utils.alwrity_utils import ai_news_writer, ai_finance_ta_writer, ai_social_writer, essay_writer
|
||||
from lib.ai_writers.ai_facebook_writer.facebook_ai_writer import facebook_main_menu
|
||||
from lib.ai_writers.linkedin_writer.linkedin_ai_writer import linkedin_main_menu
|
||||
from lib.ai_writers.twitter_writers import run_dashboard as twitter_writer
|
||||
from lib.ai_writers.insta_ai_writer import insta_writer
|
||||
from lib.ai_writers.youtube_writers.youtube_ai_writer import youtube_main_menu
|
||||
from lib.ai_writers.ai_agents_crew_writer import ai_agents_writers
|
||||
from lib.utils.alwrity_utils import ai_agents_team
|
||||
|
||||
# Import SEO tools from ai_seo_tools
|
||||
from lib.ai_seo_tools.on_page_seo_analyzer import analyze_onpage_seo
|
||||
from lib.ai_seo_tools.weburl_seo_checker import url_seo_checker
|
||||
from lib.ai_seo_tools.content_title_generator import ai_title_generator, generate_blog_titles
|
||||
from lib.ai_seo_tools.meta_desc_generator import metadesc_generator_main
|
||||
from lib.ai_seo_tools.seo_structured_data import ai_structured_data
|
||||
from lib.ai_seo_tools.image_alt_text_generator import alt_text_gen
|
||||
from lib.ai_seo_tools.opengraph_generator import og_tag_generator
|
||||
from lib.ai_seo_tools.google_pagespeed_insights import google_pagespeed_insights
|
||||
from lib.ai_seo_tools.sitemap_analysis import main as sitemap_analyzer
|
||||
from lib.ai_seo_tools.twitter_tags_generator import display_app as twitter_tags_app
|
||||
from lib.ai_seo_tools.enterprise_seo_suite import render_enterprise_seo_suite
|
||||
from lib.alwrity_ui.seo_tools_dashboard import ai_seo_tools
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_tool_implementations() -> Dict[str, callable]:
|
||||
"""
|
||||
Return a mapping of tool names to their implementation functions.
|
||||
Uses caching to avoid repeated imports.
|
||||
"""
|
||||
tool_mapping = {
|
||||
# Text Generation Tools
|
||||
"AI Blog Writer": ai_blog_writer_page,
|
||||
"AI Essay Writer": essay_writer,
|
||||
"AI News Writer": ai_news_writer,
|
||||
"AI Content Team": ai_agents_team,
|
||||
|
||||
# Business Content Tools
|
||||
"Financial TA Writer": ai_finance_ta_writer,
|
||||
"AI Social Media": ai_social_writer,
|
||||
|
||||
# Social Media Specific Tools
|
||||
"Facebook Writer": facebook_main_menu,
|
||||
"LinkedIn Writer": linkedin_main_menu,
|
||||
"Twitter Writer": twitter_writer,
|
||||
"Instagram Writer": insta_writer,
|
||||
"YouTube Writer": youtube_main_menu,
|
||||
|
||||
# SEO & Optimization Tools
|
||||
"SEO Dashboard": ai_seo_tools,
|
||||
"On-Page SEO Analyzer": analyze_onpage_seo,
|
||||
"URL SEO Checker": url_seo_checker,
|
||||
"AI Title Generator": lambda: _render_seo_tool("AI Title Generator", generate_blog_titles),
|
||||
"Meta Description Generator": metadesc_generator_main,
|
||||
"Structured Data Generator": ai_structured_data,
|
||||
"Alt Text Generator": alt_text_gen,
|
||||
"OpenGraph Tags": og_tag_generator,
|
||||
"Page Speed Insights": google_pagespeed_insights,
|
||||
"Sitemap Analyzer": sitemap_analyzer,
|
||||
"Twitter Cards Generator": twitter_tags_app,
|
||||
"Enterprise SEO Suite": render_enterprise_seo_suite,
|
||||
|
||||
# Creative Content Tools - placeholder functions for now
|
||||
"Story Generator": lambda: st.info("Story Generator coming soon!"),
|
||||
"Poetry Writer": lambda: st.info("Poetry Writer coming soon!"),
|
||||
"Script Writer": lambda: st.info("Script Writer coming soon!"),
|
||||
"Email Templates": lambda: st.info("Email Templates coming soon!"),
|
||||
|
||||
# Marketing Content Tools - placeholder functions
|
||||
"Ad Copy Generator": lambda: st.info("Ad Copy Generator coming soon!"),
|
||||
"Product Descriptions": lambda: st.info("Product Descriptions coming soon!"),
|
||||
"Press Releases": lambda: st.info("Press Releases coming soon!"),
|
||||
"Landing Page Copy": lambda: st.info("Landing Page Copy coming soon!"),
|
||||
|
||||
# Educational Content Tools - placeholder functions
|
||||
"Course Content": lambda: st.info("Course Content coming soon!"),
|
||||
"Tutorial Writer": lambda: st.info("Tutorial Writer coming soon!"),
|
||||
"Quiz Generator": lambda: st.info("Quiz Generator coming soon!"),
|
||||
"Study Guides": lambda: st.info("Study Guides coming soon!")
|
||||
}
|
||||
|
||||
# Handle import errors gracefully
|
||||
failed_imports = []
|
||||
working_tools = {}
|
||||
|
||||
for tool_name, tool_func in tool_mapping.items():
|
||||
try:
|
||||
# Test if the function is callable
|
||||
if callable(tool_func):
|
||||
working_tools[tool_name] = tool_func
|
||||
else:
|
||||
failed_imports.append(tool_name)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load tool {tool_name}: {e}")
|
||||
failed_imports.append(tool_name)
|
||||
|
||||
if failed_imports:
|
||||
logger.info(f"Some tools are not available: {failed_imports}")
|
||||
|
||||
return working_tools
|
||||
|
||||
def _render_seo_tool(tool_name: str, tool_function):
|
||||
"""Render SEO tools with consistent styling and handle errors."""
|
||||
st.markdown(f"## 🔍 {tool_name}")
|
||||
st.markdown("---")
|
||||
|
||||
# Handle AI Title Generator specifically
|
||||
if "Title Generator" in tool_name:
|
||||
_render_title_generator_ui()
|
||||
else:
|
||||
# For other SEO tools, call them directly
|
||||
try:
|
||||
if callable(tool_function):
|
||||
tool_function()
|
||||
else:
|
||||
st.warning(f"Tool '{tool_name}' is not properly configured.")
|
||||
except Exception as e:
|
||||
st.error(f"Error loading tool: {str(e)}")
|
||||
logger.error(f"Error in SEO tool {tool_name}: {str(e)}")
|
||||
|
||||
def _render_title_generator_ui():
|
||||
"""Render a custom UI for the AI Title Generator."""
|
||||
st.markdown("### Generate SEO-Optimized Titles")
|
||||
|
||||
# Input form
|
||||
with st.form("title_generator_form"):
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
keywords = st.text_input(
|
||||
"Blog Keywords",
|
||||
placeholder="Enter your main keywords (comma-separated)",
|
||||
help="Primary keywords for your content"
|
||||
)
|
||||
|
||||
title_type = st.selectbox(
|
||||
"Content Type",
|
||||
["How-to Guide", "Listicle", "News Article", "Product Review", "Tutorial", "Case Study", "Opinion", "Research"]
|
||||
)
|
||||
|
||||
with col2:
|
||||
content = st.text_area(
|
||||
"Blog Content (Optional)",
|
||||
placeholder="Paste your blog content here for more targeted titles...",
|
||||
height=100,
|
||||
help="Optional: Paste existing content for more relevant titles"
|
||||
)
|
||||
|
||||
title_intent = st.selectbox(
|
||||
"Search Intent",
|
||||
["Informational", "Commercial", "Transactional", "Navigational"]
|
||||
)
|
||||
|
||||
language = st.selectbox(
|
||||
"Language",
|
||||
["English", "Spanish", "French", "German", "Italian", "Portuguese", "Hindi"]
|
||||
)
|
||||
|
||||
submitted = st.form_submit_button("🚀 Generate Titles", use_container_width=True)
|
||||
|
||||
if submitted:
|
||||
if not keywords:
|
||||
st.warning("Please enter at least some keywords to generate titles.")
|
||||
return
|
||||
|
||||
with st.spinner("🎯 Generating SEO-optimized titles..."):
|
||||
try:
|
||||
# Import and call the title generation function
|
||||
from lib.ai_seo_tools.content_title_generator import generate_blog_titles
|
||||
|
||||
result = generate_blog_titles(
|
||||
input_blog_keywords=keywords,
|
||||
input_blog_content=content if content else None,
|
||||
input_title_type=title_type,
|
||||
input_title_intent=title_intent,
|
||||
input_language=language
|
||||
)
|
||||
|
||||
if result:
|
||||
st.success("✅ Titles generated successfully!")
|
||||
st.markdown("### 🎯 Your SEO-Optimized Titles:")
|
||||
|
||||
# Display the result in a nice format
|
||||
st.markdown(f"```\n{result}\n```")
|
||||
|
||||
# Add copy buttons or additional features
|
||||
if st.button("📋 Copy All Titles"):
|
||||
st.success("Titles copied to clipboard! (Feature coming soon)")
|
||||
else:
|
||||
st.error("Failed to generate titles. Please try again.")
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error generating titles: {str(e)}")
|
||||
logger.error(f"Title generation error: {str(e)}")
|
||||
|
||||
def render_content_generation_dashboard():
|
||||
"""Main function to render the content generation dashboard."""
|
||||
# Initialize dashboard state
|
||||
dashboard_state = DashboardState()
|
||||
|
||||
# Apply modern CSS
|
||||
apply_modern_css()
|
||||
|
||||
# Main dashboard header
|
||||
st.markdown("""
|
||||
<div class="main-dashboard">
|
||||
<div class="dashboard-title">🚀 Alwrity Content Hub</div>
|
||||
<div class="dashboard-subtitle">
|
||||
Complete AI-powered content creation and SEO optimization suite. From writing to ranking - everything you need in one place.
|
||||
</div>
|
||||
<div style="display: flex; justify-content: center; gap: 2rem; margin-top: 1rem; flex-wrap: wrap;">
|
||||
<div style="text-align: center;">
|
||||
<div style="font-size: 2rem;">✍️</div>
|
||||
<div style="font-size: 0.9rem; opacity: 0.8;">AI Writing</div>
|
||||
</div>
|
||||
<div style="text-align: center;">
|
||||
<div style="font-size: 2rem;">🔍</div>
|
||||
<div style="font-size: 0.9rem; opacity: 0.8;">SEO Tools</div>
|
||||
</div>
|
||||
<div style="text-align: center;">
|
||||
<div style="font-size: 2rem;">📱</div>
|
||||
<div style="font-size: 0.9rem; opacity: 0.8;">Social Media</div>
|
||||
</div>
|
||||
<div style="text-align: center;">
|
||||
<div style="font-size: 2rem;">📊</div>
|
||||
<div style="font-size: 0.9rem; opacity: 0.8;">Analytics</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Quick access section
|
||||
st.markdown("""
|
||||
<div class="quick-access">
|
||||
<div class="section-title">⚡ Quick Access</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Recent tools
|
||||
if st.session_state.get('recent_tools'):
|
||||
st.markdown("### 📝 Recently Used")
|
||||
cols = st.columns(min(len(st.session_state.recent_tools), 5))
|
||||
for idx, tool in enumerate(st.session_state.recent_tools[:5]):
|
||||
with cols[idx]:
|
||||
if st.button(f"🔄 {tool}", key=f"recent_{tool}_{idx}"):
|
||||
handle_tool_selection(tool, dashboard_state)
|
||||
|
||||
# Popular tools
|
||||
popular_tools = ToolAnalytics.get_popular_tools()
|
||||
if popular_tools:
|
||||
st.markdown("### 🔥 Popular Tools")
|
||||
cols = st.columns(min(len(popular_tools), 5))
|
||||
for idx, tool in enumerate(popular_tools[:5]):
|
||||
with cols[idx]:
|
||||
if st.button(f"⭐ {tool}", key=f"popular_{tool}_{idx}"):
|
||||
handle_tool_selection(tool, dashboard_state)
|
||||
|
||||
# Content tools by category
|
||||
content_tools = {
|
||||
"Text Generation": {
|
||||
"tools": [
|
||||
{"name": "AI Blog Writer", "icon": "✍️", "desc": "Create SEO-optimized blog posts with AI assistance"},
|
||||
{"name": "AI Essay Writer", "icon": "📝", "desc": "Generate academic essays and research papers"},
|
||||
{"name": "AI News Writer", "icon": "📰", "desc": "Write breaking news articles and reports"},
|
||||
{"name": "AI Content Team", "icon": "👥", "desc": "Collaborative AI writing team for complex projects"}
|
||||
]
|
||||
},
|
||||
"SEO & Optimization": {
|
||||
"tools": [
|
||||
{"name": "SEO Dashboard", "icon": "🔍", "desc": "Comprehensive SEO tools and analytics dashboard"},
|
||||
{"name": "On-Page SEO Analyzer", "icon": "📊", "desc": "Analyze and optimize individual page SEO elements"},
|
||||
{"name": "AI Title Generator", "icon": "🏷️", "desc": "Generate SEO-optimized titles for better rankings"},
|
||||
{"name": "Meta Description Generator", "icon": "📄", "desc": "Create compelling meta descriptions that drive clicks"},
|
||||
{"name": "Structured Data Generator", "icon": "🏗️", "desc": "Generate schema markup for rich search results"},
|
||||
{"name": "Page Speed Insights", "icon": "⚡", "desc": "Analyze and improve website performance metrics"},
|
||||
{"name": "Enterprise SEO Suite", "icon": "🏢", "desc": "Advanced SEO workflows for enterprise needs"}
|
||||
]
|
||||
},
|
||||
"Business Content": {
|
||||
"tools": [
|
||||
{"name": "Financial TA Writer", "icon": "📊", "desc": "Generate technical analysis reports for stocks"},
|
||||
{"name": "Email Templates", "icon": "📧", "desc": "Professional email templates for business"},
|
||||
{"name": "Press Releases", "icon": "📢", "desc": "Company announcements and press releases"},
|
||||
{"name": "Landing Page Copy", "icon": "🌐", "desc": "High-converting landing page content"}
|
||||
]
|
||||
},
|
||||
"Social Media": {
|
||||
"tools": [
|
||||
{"name": "Facebook Writer", "icon": "📘", "desc": "Facebook posts, ads, and content strategies"},
|
||||
{"name": "LinkedIn Writer", "icon": "💼", "desc": "Professional LinkedIn articles and posts"},
|
||||
{"name": "Twitter Writer", "icon": "🐦", "desc": "Engaging tweets and Twitter threads"},
|
||||
{"name": "Instagram Writer", "icon": "📷", "desc": "Instagram captions and story content"},
|
||||
{"name": "YouTube Writer", "icon": "🎬", "desc": "YouTube descriptions and video scripts"},
|
||||
{"name": "OpenGraph Tags", "icon": "🔗", "desc": "Optimize social media sharing with Open Graph tags"},
|
||||
{"name": "Twitter Cards Generator", "icon": "🐦", "desc": "Create Twitter Card markup for rich previews"}
|
||||
]
|
||||
},
|
||||
"Creative Content": {
|
||||
"tools": [
|
||||
{"name": "Story Generator", "icon": "📚", "desc": "Creative short stories and narratives"},
|
||||
{"name": "Poetry Writer", "icon": "🎭", "desc": "Beautiful poems and verses"},
|
||||
{"name": "Script Writer", "icon": "🎬", "desc": "Scripts for videos, plays, and presentations"},
|
||||
{"name": "Song Lyrics", "icon": "🎵", "desc": "Original song lyrics and musical content"}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
# Render categories
|
||||
for category, category_data in content_tools.items():
|
||||
st.markdown(f"""
|
||||
<div class="category-section">
|
||||
<div class="category-header">{category}</div>
|
||||
<div class="category-grid">
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Create columns for tools in this category
|
||||
tools = category_data["tools"]
|
||||
cols = st.columns(min(len(tools), 3))
|
||||
|
||||
for idx, tool in enumerate(tools):
|
||||
col_idx = idx % 3
|
||||
with cols[col_idx]:
|
||||
# Create tool card with button
|
||||
if st.button(
|
||||
f"{tool['icon']} {tool['name']}\n{tool['desc']}",
|
||||
key=f"tool_{tool['name']}_{category}",
|
||||
help=tool['desc']
|
||||
):
|
||||
handle_tool_selection(tool['name'], dashboard_state)
|
||||
|
||||
st.markdown("</div></div>", unsafe_allow_html=True)
|
||||
|
||||
# Footer with statistics
|
||||
st.markdown("---")
|
||||
st.markdown("### 📈 Alwrity Analytics")
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
|
||||
total_tools = len(get_tool_implementations())
|
||||
seo_tools_count = len([tool for category in content_tools.values() for tool in category["tools"] if "SEO" in category.get("name", "") or any(seo_keyword in tool["name"] for seo_keyword in ["SEO", "Meta", "Title", "Structured", "Speed", "OpenGraph"])])
|
||||
|
||||
with col1:
|
||||
st.metric("🛠️ Total Tools", total_tools)
|
||||
with col2:
|
||||
st.metric("🔍 SEO Tools", 12) # Based on our SEO tool count
|
||||
with col3:
|
||||
st.metric("📝 Recent Tools", len(st.session_state.get('recent_tools', [])))
|
||||
with col4:
|
||||
st.metric("⭐ Favorites", len(st.session_state.get('favorite_tools', [])))
|
||||
|
||||
# Add capability showcase
|
||||
st.markdown("""
|
||||
<div style="background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); padding: 1.5rem; border-radius: 10px; margin-top: 1rem;">
|
||||
<h4 style="color: #2c3e50; margin-bottom: 1rem;">✨ Why Choose Alwrity?</h4>
|
||||
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1rem;">
|
||||
<div>
|
||||
<strong>🎯 All-in-One Solution</strong><br>
|
||||
<small>Content creation, SEO optimization, and social media management in one platform</small>
|
||||
</div>
|
||||
<div>
|
||||
<strong>🤖 AI-Powered Intelligence</strong><br>
|
||||
<small>Advanced AI models for content generation and SEO analysis</small>
|
||||
</div>
|
||||
<div>
|
||||
<strong>📊 Enterprise-Ready</strong><br>
|
||||
<small>Scalable tools designed for teams and enterprise workflows</small>
|
||||
</div>
|
||||
<div>
|
||||
<strong>🚀 Continuously Updated</strong><br>
|
||||
<small>Regular updates with new tools and enhanced capabilities</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
class DashboardState:
|
||||
"""Manage dashboard state and user preferences."""
|
||||
|
||||
def __init__(self):
|
||||
self.initialize_session_state()
|
||||
|
||||
def initialize_session_state(self):
|
||||
"""Initialize session state variables."""
|
||||
if 'recent_tools' not in st.session_state:
|
||||
st.session_state.recent_tools = []
|
||||
if 'favorite_tools' not in st.session_state:
|
||||
st.session_state.favorite_tools = []
|
||||
if 'tool_usage_count' not in st.session_state:
|
||||
st.session_state.tool_usage_count = {}
|
||||
|
||||
def add_recent_tool(self, tool_name: str):
|
||||
"""Add a tool to recent tools list."""
|
||||
if tool_name in st.session_state.recent_tools:
|
||||
st.session_state.recent_tools.remove(tool_name)
|
||||
st.session_state.recent_tools.insert(0, tool_name)
|
||||
# Keep only last 5 recent tools
|
||||
st.session_state.recent_tools = st.session_state.recent_tools[:5]
|
||||
|
||||
def toggle_favorite(self, tool_name: str):
|
||||
"""Toggle tool favorite status."""
|
||||
if tool_name in st.session_state.favorite_tools:
|
||||
st.session_state.favorite_tools.remove(tool_name)
|
||||
else:
|
||||
st.session_state.favorite_tools.append(tool_name)
|
||||
|
||||
def increment_usage(self, tool_name: str):
|
||||
"""Increment tool usage count."""
|
||||
st.session_state.tool_usage_count[tool_name] = st.session_state.tool_usage_count.get(tool_name, 0) + 1
|
||||
|
||||
class ToolAnalytics:
|
||||
"""Analytics for tool usage and recommendations."""
|
||||
|
||||
@staticmethod
|
||||
def get_popular_tools(limit: int = 5) -> List[str]:
|
||||
"""Get most popular tools based on usage."""
|
||||
usage_count = st.session_state.get('tool_usage_count', {})
|
||||
if not usage_count:
|
||||
# Return default popular tools showcasing Alwrity's key capabilities
|
||||
return ["AI Blog Writer", "SEO Dashboard", "AI Title Generator", "Meta Description Generator", "On-Page SEO Analyzer"]
|
||||
|
||||
sorted_tools = sorted(usage_count.items(), key=lambda x: x[1], reverse=True)
|
||||
return [tool[0] for tool in sorted_tools[:limit]]
|
||||
|
||||
def apply_modern_css():
|
||||
"""Apply modern CSS styling to the dashboard."""
|
||||
st.markdown("""
|
||||
<style>
|
||||
/* Main dashboard styling */
|
||||
.main-dashboard {
|
||||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||||
padding: 2rem;
|
||||
border-radius: 15px;
|
||||
margin-bottom: 2rem;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.dashboard-title {
|
||||
font-size: 3rem;
|
||||
font-weight: 700;
|
||||
text-align: center;
|
||||
margin-bottom: 1rem;
|
||||
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
|
||||
}
|
||||
|
||||
.dashboard-subtitle {
|
||||
font-size: 1.2rem;
|
||||
text-align: center;
|
||||
opacity: 0.9;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
/* Tool cards */
|
||||
.tool-card {
|
||||
background: white;
|
||||
border-radius: 12px;
|
||||
padding: 1.5rem;
|
||||
margin: 0.5rem;
|
||||
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
||||
transition: all 0.3s ease;
|
||||
cursor: pointer;
|
||||
border: 2px solid transparent;
|
||||
height: 200px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.tool-card:hover {
|
||||
transform: translateY(-5px);
|
||||
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15);
|
||||
border-color: #667eea;
|
||||
}
|
||||
|
||||
.tool-icon {
|
||||
font-size: 2.5rem;
|
||||
text-align: center;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.tool-title {
|
||||
font-size: 1.1rem;
|
||||
font-weight: 600;
|
||||
color: #333;
|
||||
text-align: center;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.tool-description {
|
||||
font-size: 0.9rem;
|
||||
color: #666;
|
||||
text-align: center;
|
||||
line-height: 1.4;
|
||||
}
|
||||
|
||||
/* Quick access section */
|
||||
.quick-access {
|
||||
background: #f8f9fa;
|
||||
border-radius: 10px;
|
||||
padding: 1.5rem;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
.section-title {
|
||||
font-size: 1.5rem;
|
||||
font-weight: 600;
|
||||
color: #333;
|
||||
margin-bottom: 1rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
/* Recent tools styling */
|
||||
.recent-tool {
|
||||
background: linear-gradient(135deg, #ff6b6b, #ee5a24);
|
||||
color: white;
|
||||
padding: 0.75rem 1rem;
|
||||
border-radius: 8px;
|
||||
margin: 0.25rem;
|
||||
font-weight: 500;
|
||||
text-align: center;
|
||||
cursor: pointer;
|
||||
transition: all 0.3s ease;
|
||||
}
|
||||
|
||||
.recent-tool:hover {
|
||||
transform: scale(1.05);
|
||||
box-shadow: 0 4px 12px rgba(255, 107, 107, 0.4);
|
||||
}
|
||||
|
||||
/* Category sections */
|
||||
.category-section {
|
||||
margin-bottom: 3rem;
|
||||
}
|
||||
|
||||
.category-header {
|
||||
background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
|
||||
color: white;
|
||||
padding: 1rem 1.5rem;
|
||||
border-radius: 10px 10px 0 0;
|
||||
font-size: 1.3rem;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.category-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
|
||||
gap: 1rem;
|
||||
padding: 1.5rem;
|
||||
background: #f8f9fa;
|
||||
border-radius: 0 0 10px 10px;
|
||||
}
|
||||
|
||||
/* Responsive design */
|
||||
@media (max-width: 768px) {
|
||||
.dashboard-title {
|
||||
font-size: 2rem;
|
||||
}
|
||||
.category-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
.tool-card {
|
||||
height: auto;
|
||||
min-height: 150px;
|
||||
}
|
||||
}
|
||||
|
||||
/* Success and info messages */
|
||||
.success-message {
|
||||
background: linear-gradient(135deg, #56ab2f, #a8e6cf);
|
||||
color: white;
|
||||
padding: 1rem;
|
||||
border-radius: 8px;
|
||||
margin: 1rem 0;
|
||||
}
|
||||
|
||||
.info-message {
|
||||
background: linear-gradient(135deg, #74b9ff, #0984e3);
|
||||
color: white;
|
||||
padding: 1rem;
|
||||
border-radius: 8px;
|
||||
margin: 1rem 0;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
def handle_tool_selection(tool_name: str, dashboard_state: DashboardState):
|
||||
"""Handle tool selection and navigation."""
|
||||
try:
|
||||
# Update usage statistics
|
||||
dashboard_state.add_recent_tool(tool_name)
|
||||
dashboard_state.increment_usage(tool_name)
|
||||
|
||||
# Get tool implementations
|
||||
tools = get_tool_implementations()
|
||||
|
||||
if tool_name in tools:
|
||||
st.markdown(f"<div class='success-message'>🚀 Launching {tool_name}...</div>", unsafe_allow_html=True)
|
||||
|
||||
# Show loading state
|
||||
with st.spinner(f"Loading {tool_name}..."):
|
||||
try:
|
||||
# Execute the tool function
|
||||
tools[tool_name]()
|
||||
logger.info(f"Successfully launched tool: {tool_name}")
|
||||
except Exception as e:
|
||||
st.error(f"Error running {tool_name}: {str(e)}")
|
||||
logger.error(f"Error running tool {tool_name}: {e}")
|
||||
else:
|
||||
st.warning(f"Tool '{tool_name}' is not available yet.")
|
||||
|
||||
except ImportError as e:
|
||||
st.error(f"Unable to load {tool_name}. Some dependencies may be missing.")
|
||||
logger.error(f"Import error for {tool_name}: {e}")
|
||||
except Exception as e:
|
||||
st.error(f"An unexpected error occurred: {str(e)}")
|
||||
logger.error(f"Unexpected error in tool selection: {e}")
|
||||
|
||||
# Main entry point
|
||||
if __name__ == "__main__":
|
||||
render_content_generation_dashboard()
|
||||
@@ -1,92 +0,0 @@
|
||||
Overview
|
||||
The AI Writer Blog Post-Processing module provides various utilities for enhancing, formatting, and managing blog content. The tools available in this module help automate tasks such as proof-reading, converting content to Markdown, converting Markdown to HTML, humanizing blog content, and saving processed blog content to a file.
|
||||
|
||||
Modules
|
||||
1. blog_proof_reader.py
|
||||
Description:
|
||||
This module provides functionality for proofreading blog content. It corrects grammar, enhances vocabulary, improves sentence structure, aligns tone and brand voice, optimizes content structure, and simplifies concepts.
|
||||
|
||||
Usage:
|
||||
|
||||
```
|
||||
from blog_proof_reader import blog_proof_editor
|
||||
|
||||
# Example usage
|
||||
blog_content = "Your raw blog content here"
|
||||
edited_content = blog_proof_editor(blog_content)
|
||||
print(edited_content)
|
||||
|
||||
```
|
||||
|
||||
2. convert_content_to_markdown.py
|
||||
Description:
|
||||
This module converts blog content to Markdown format to enhance readability and visual appeal. It follows best practices for structuring content using Markdown.
|
||||
|
||||
Usage:
|
||||
|
||||
```
|
||||
from convert_content_to_markdown import convert_tomarkdown_format
|
||||
|
||||
# Example usage
|
||||
blog_content = "Your raw blog content here"
|
||||
markdown_content = convert_tomarkdown_format(blog_content, gpt_provider="openai")
|
||||
print(markdown_content)
|
||||
|
||||
```
|
||||
|
||||
3. convert_markdown_to_html.py
|
||||
Description:
|
||||
This module converts Markdown content to HTML. (Implementation details are required to provide a specific example).
|
||||
|
||||
Usage:
|
||||
```
|
||||
from convert_markdown_to_html import convert_to_html
|
||||
|
||||
# Example usage
|
||||
markdown_content = "Your Markdown content here"
|
||||
html_content = convert_to_html(markdown_content)
|
||||
print(html_content)
|
||||
|
||||
```
|
||||
|
||||
4. humanize_blog.py
|
||||
Description:
|
||||
This module "humanizes" blog content by avoiding overused and robotic phrases, replacing them with more natural language to improve readability and engagement.
|
||||
|
||||
Usage:
|
||||
|
||||
```
|
||||
from humanize_blog import blog_humanize
|
||||
|
||||
# Example usage
|
||||
blog_content = "Your raw blog content here"
|
||||
humanized_content = blog_humanize(blog_content)
|
||||
print(humanized_content)
|
||||
|
||||
```
|
||||
|
||||
5. save_blog_to_file.py
|
||||
Description:
|
||||
This module saves processed blog content to a file. (Implementation details are required to provide a specific example).
|
||||
|
||||
Usage:
|
||||
|
||||
```
|
||||
from save_blog_to_file import save_to_file
|
||||
|
||||
# Example usage
|
||||
blog_content = "Your processed blog content here"
|
||||
file_path = "path/to/save/blog.txt"
|
||||
save_to_file(blog_content, file_path)
|
||||
```
|
||||
|
||||
~/AI-Writer/lib/blog_postprocessing
|
||||
├── blog_proof_reader.py
|
||||
├── convert_content_to_markdown.py
|
||||
├── convert_markdown_to_html.py
|
||||
├── humanize_blog.py
|
||||
└── save_blog_to_file.py
|
||||
|
||||
This README file should help you understand the purpose and functionality of each module within the AI Writer Blog Post-Processing directory. Adjust the usage examples and descriptions as per the actual implementations and additional details of your modules.
|
||||
|
||||
|
||||
@@ -1,120 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import configparser
|
||||
|
||||
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
def blog_proof_editor(blog_content):
|
||||
""" Helper for blog proof reading. """
|
||||
|
||||
try:
|
||||
config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'main_config'))
|
||||
config = configparser.ConfigParser()
|
||||
#config = configparser.RawConfigParser()
|
||||
config.read(config_path, encoding='utf-8')
|
||||
except Exception as err:
|
||||
print(f"ProofReader: Failed to read values from config: {err}")
|
||||
|
||||
prompt = f"""As an expert content writer and editor, I will provide you with 'my blog' content.
|
||||
Your task is to rewrite my blog, by following the guidelines below.
|
||||
|
||||
Below are the guidelines to follow:
|
||||
|
||||
1). You must respond in {config.get('blog_characteristics', 'blog_language')} language.
|
||||
2). Vocabulary and Grammar Enhancement: Directly correct any grammatical errors and upgrade the
|
||||
vocabulary for better readability.
|
||||
3). Improve Sentence Structure: Enhance sentence construction for better clarity and conversational flow.
|
||||
4). Tone and Brand Alignment: Adjust tone, voice, personality for {config.get('blog_characteristics', 'blog_tone')} audience.
|
||||
5). Optimize Content Structure: Reorganize content for more impactful presentation, including better paragraphing & transitions.
|
||||
6). Simplify content: Simplify concepts and replace overly complex words. Use simple english words.
|
||||
7). Make sure your response content length is of {config.get('blog_characteristics', 'blog_length')} words.
|
||||
|
||||
\n\nMy Blog: '{blog_content}'. """
|
||||
|
||||
try:
|
||||
response = llm_text_gen(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Error Blog Proof Reading: {err}")
|
||||
|
||||
import streamlit as st
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas as pd
|
||||
import nltk
|
||||
from nltk.tokenize import word_tokenize
|
||||
from nltk.util import ngrams
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.chains import ConversationChain
|
||||
|
||||
# ... (rest of your code)
|
||||
|
||||
if st.button("Analyze with AI!"):
|
||||
# ... (fetch and process content as before)
|
||||
|
||||
with st.spinner('Analyzing your content...'):
|
||||
st.subheader("AI Insights:")
|
||||
st.write(" ")
|
||||
|
||||
# 1. Overall Critique
|
||||
st.markdown("**Overall Evaluation:**")
|
||||
ai_overall = conversation_chain.run(f"""Analyze the provided article and give a constructive critique, focusing on its strengths and weaknesses regarding:
|
||||
* Informativeness: Does it offer valuable information the reader might not know, or strengthen their understanding?
|
||||
* Authority: Does the author demonstrate expertise and credibility, backing up claims with evidence?
|
||||
* Captivatingness: Does it effectively engage the reader, capture attention, and make them want to continue reading?
|
||||
|
||||
Provide specific examples to support your evaluation.
|
||||
""")
|
||||
st.markdown(f" {ai_overall}")
|
||||
st.write(" ")
|
||||
|
||||
# 2. Structure & Organization
|
||||
st.markdown("**Structure and Organization:**")
|
||||
ai_structure = conversation_chain.run(f"""Analyze the structure and organization of the provided article.
|
||||
* Does it flow logically, with a clear beginning, middle, and end?
|
||||
* Are subheadings effectively used to break down the content and guide the reader?
|
||||
* Is the writing style consistent throughout the article?
|
||||
|
||||
Suggest improvements for clarity and readability.
|
||||
""")
|
||||
st.markdown(f" {ai_structure}")
|
||||
st.write(" ")
|
||||
|
||||
# 3. Content Quality
|
||||
st.markdown("**Content Quality:**")
|
||||
ai_content = conversation_chain.run(f"""Critique the content of the article, considering:
|
||||
* Is the value of the article clear?
|
||||
* Does it address a pain point or a need for the target audience?
|
||||
* Are the arguments compelling and supported by evidence or examples?
|
||||
* Are any technical terms explained well?
|
||||
|
||||
Identify areas where the content could be strengthened or improved.
|
||||
""")
|
||||
st.markdown(f" {ai_content}")
|
||||
st.write(" ")
|
||||
|
||||
# 4. Call to Action & Headline
|
||||
st.markdown("**Headline and Call to Action:**")
|
||||
ai_headline = conversation_chain.run(f"""Evaluate the effectiveness of the headline and call to action (CTA) in the provided article.
|
||||
* Does the headline accurately and compellingly summarize the article's content?
|
||||
* Is the CTA clear, actionable, and positioned well within the text?
|
||||
|
||||
Provide suggestions for improving the headline and CTA.
|
||||
""")
|
||||
st.markdown(f" {ai_headline}")
|
||||
st.write(" ")
|
||||
|
||||
# 5. Writing Style & Tone
|
||||
st.markdown("**Writing Style and Tone:**")
|
||||
ai_style = conversation_chain.run(f"""Assess the overall writing style and tone of the article.
|
||||
* Does it use jargon or overly technical language that might be inaccessible to the target audience?
|
||||
* Is the tone appropriate for the topic and target audience (e.g., professional, conversational, humorous)?
|
||||
* Is the writing clear, concise, and engaging?
|
||||
|
||||
Suggest ways to improve the writing style and make the article more accessible and compelling for the intended reader.
|
||||
""")
|
||||
st.markdown(f" {ai_style}")
|
||||
|
||||
# --- Display Keyword Results (same as before) ---
|
||||
# ... (rest of your code)
|
||||
@@ -1,75 +0,0 @@
|
||||
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from .gpt_providers.gemini_pro_text import gemini_text_response
|
||||
|
||||
|
||||
def convert_tomarkdown_format(blog_content, gpt_provider="openai"):
|
||||
""" Helper for converting content to markdown format for static sites. """
|
||||
|
||||
prompt = f"""As an expert in markdown language format and font matter,
|
||||
I will provide you with a blog post.
|
||||
Your task is to only Improve the formatting and structure of a blog post to enhance readability, visual appeal, and overall user experience. Do not alter the content of the provided blog. Modify only for the formatting.
|
||||
Dont provide explanations, just your final response.
|
||||
|
||||
Guidelines to do formatting:
|
||||
1. **Headings for Structure:**
|
||||
- Use # for the main title of the blog post.
|
||||
- Use ## for subheadings that divide the post into clear sections.
|
||||
- Use ###, ####, etc. for additional subheadings as needed.
|
||||
- Keep the headings concise and descriptive.
|
||||
|
||||
2. **Emphasizing Text:**
|
||||
- Use * or _ for italicizing important words or phrases.
|
||||
- Use ** or __ for bolding key points.
|
||||
- Use *** or ___ for bold italicizing very important text.
|
||||
- Use sparingly to avoid overwhelming the reader.
|
||||
|
||||
3. **Lists:**
|
||||
- Use - or * for unordered lists.
|
||||
- Use 1., 2., etc. for ordered lists.
|
||||
- Keep list items concise and to the point.
|
||||
- Use consistent formatting for all lists.
|
||||
|
||||
4. **Blockquotes:**
|
||||
- Use > to indent and highlight quotes or important information.
|
||||
- Use additional > for nested blockquotes.
|
||||
- Attribute quotes to their original source if applicable.
|
||||
|
||||
5. **Code Blocks:**
|
||||
- Use backticks ` for inline code.
|
||||
- Use triple backticks ``` for code blocks.
|
||||
- Specify the language of the code block for syntax highlighting, e.g., ```python```.
|
||||
- Use code blocks to display code snippets or technical information.
|
||||
|
||||
6. **Horizontal Lines:**
|
||||
- Use three or more asterisks, dashes, or underscores to create a horizontal line, e.g., ***, ---, or ___
|
||||
- Use horizontal lines to separate different sections of the blog post.
|
||||
|
||||
7. **Table Formatting:**
|
||||
- Use pipes | and dashes - to create tables.
|
||||
- Align text within columns using colons :.
|
||||
- Use tables to present data or information in a structured format.
|
||||
|
||||
8. **Other Best Practices:**
|
||||
- Use emojis sparingly and appropriately to add visual interest and enhance the reader's experience.
|
||||
- Proofread carefully for any errors in grammar, spelling, or formatting.
|
||||
- Keep the blog post organized and easy to navigate.
|
||||
- Use a consistent formatting style throughout the post.
|
||||
|
||||
Blog Post: '{blog_content}'"""
|
||||
|
||||
if 'openai' in gpt_provider.lower():
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Openai Error in converting to Markdown format.")
|
||||
elif 'gemini' in gpt_provider.lower():
|
||||
|
||||
prompt = f""" Convert the given blog post into well structured MARKDOWN content.
|
||||
Do not alter the given blog post.
|
||||
blog post: "{blog_content}" """
|
||||
try:
|
||||
response = gemini_text_response(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Gemini Error in converting to Markdown format.")
|
||||
@@ -1,37 +0,0 @@
|
||||
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
|
||||
def convert_markdown_to_html(md_content):
|
||||
""" Helper function to convert given text to HTML
|
||||
"""
|
||||
prompt =f"""
|
||||
You are a skilled web developer tasked with converting a Markdown-formatted text to HTML.
|
||||
You will be given text in markdown format. Follow these steps to perform the conversion:
|
||||
|
||||
1. Parse User's Markdown Input: You will receive a Markdown-formatted text as input from the user.
|
||||
Carefully analyze the provided Markdown text, paying attention to different elements such as headings (#),
|
||||
lists (unordered and ordered), bold and italic text, links, images, and code blocks.
|
||||
2. Generate and Validate HTML: Generate corresponding HTML code for each Markdown element following
|
||||
the conversion guidelines below. Ensure the generated HTML is well-structured and syntactically correct.
|
||||
3. Preserve Line Breaks: Markdown line breaks (soft breaks) represented by two spaces at the end of a
|
||||
line should be converted to <br> tags in HTML to preserve the line breaks.
|
||||
4. REMEMBER to generate complete, valid HTML response only.
|
||||
|
||||
Follow below Conversion Guidelines:
|
||||
- Headers: Convert Markdown headers (#, ##, ###, etc.) to corresponding HTML header tags (<h1>, <h2>, <h3>, etc.).
|
||||
- Lists: Convert unordered lists (*) and ordered lists (1., 2., 3., etc.) to <ul> and <ol> HTML tags, respectively.
|
||||
List items should be enclosed in <li> tags.
|
||||
- Emphasis: Convert bold (**) and italic (*) text to <strong> and <em> HTML tags, respectively.
|
||||
- Links: Convert Markdown links ([text](url)) to HTML anchor (<a>) tags. Ensure the href attribute contains the correct URL.
|
||||
- Images: Convert Markdown image tags () to HTML image (<img>) tags.
|
||||
Include the alt attribute for accessibility.
|
||||
- Code: Convert inline code (`code`) to <code> HTML tags. Convert code blocks (```) to <pre> HTML tags
|
||||
for preserving formatting.
|
||||
- Blockquotes: Convert blockquotes (>) to <blockquote> HTML tags.
|
||||
Convert the following Markdown text to HTML: {md_content}
|
||||
"""
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in convert to HTML")
|
||||
@@ -1,34 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
def blog_humanize(blog_content):
|
||||
""" Helper for blog proof reading. """
|
||||
|
||||
prompt = f"""The following is what I will refer to as an 'Exception-list'.
|
||||
Do Not include any of the words or phrases on this list in your future responses to this chat thread.
|
||||
These instructions are critical and require absolute adherence!
|
||||
|
||||
Exception-list: ["Informed decisions", "blueprint", "realm", "holistic", "fosters", "informed investment decisions", "informed", "more than just", "it’s about" "navigating", "beacon", "bustling", "treasure trove", "landscape", "tailored", "tailor", “roadmap” , “are more than just”, "tailoring", "dive in", "delving", “streamlining” "dynamic", "robust", "stay tuned", "in conclusion", "seamless", "bustling", “isn't just”, “not just a”, “isn't merely an”, “cornerstone”, “bridge”, “whopping”, “testament”, “paramount” ,“diving into”, “delve into”, “pivotal”, “navigating”, “This isn't a”, “isn't just about“ ,“dives deep”, "It's not just about", “delve”, “harness”, journey”, “elevate”, “maze”, “puzzle”, “overwhelmed” , "and other robotic cliches”, 'It’s important to note', 'Delve into', 'Tapestry', 'Bustling', 'In summary', 'In conclusion', 'Unleash', 'Unveiling', 'ever-evolving', '', 'Remember that', 'Take a dive into', 'Navigating', 'Navigating the landscape', 'Navigating the complexities of', 'Landscape', 'The landscape of', 'Testament', 'a testament to', 'In the world of', 'Realm', 'Embark', 'virtuoso', 'Let's explore', 'symphony', 'Harnessing', 'Revolutionizing', 'Empower', 'game changing', 'ever-changing', 'Embrace', 'Embracing', 'game-changing', 'ever-evolving']
|
||||
|
||||
As an expert content writer and editor, I will provide you with blog content.
|
||||
Your task is to replace all occurances of words from Exception-list from given blog content below.
|
||||
Before generating any text, examine the Exception-list and avoid all cases of these words and phrases.
|
||||
|
||||
\n\nBlog Content: '{blog_content}'
|
||||
"""
|
||||
try:
|
||||
response = llm_text_gen(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Openai Error Blog Proof Reading: {err}")
|
||||
raise err
|
||||
@@ -1,111 +0,0 @@
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import datetime
|
||||
import random
|
||||
from dateutil.relativedelta import relativedelta
|
||||
from textwrap import dedent
|
||||
import logging
|
||||
from zoneinfo import ZoneInfo
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def random_date_last_three_months():
|
||||
current_date = datetime.datetime.now(ZoneInfo('Asia/Kolkata'))
|
||||
three_months_ago = current_date - relativedelta(months=3)
|
||||
|
||||
# Generate a random date between three_months_ago and current_date
|
||||
random_date = three_months_ago + datetime.timedelta(
|
||||
seconds=random.randint(0, int((current_date - three_months_ago).total_seconds()))
|
||||
)
|
||||
|
||||
return random_date.strftime('%Y-%m-%d %H:%M:%S %z')
|
||||
|
||||
|
||||
def save_blog_to_file(blog_content, blog_title, blog_meta_desc, blog_tags, blog_categories, main_img_path=None, file_type="md"):
|
||||
"""
|
||||
Saves the provided blog content to a file in the specified format.
|
||||
|
||||
Args:
|
||||
blog_content (str): The main content of the blog.
|
||||
blog_title (str): Title of the blog.
|
||||
blog_meta_desc (str): Meta description of the blog.
|
||||
blog_tags (list): List of tags associated with the blog.
|
||||
blog_categories (list): List of categories associated with the blog.
|
||||
main_img_path (str): Path to the main image of the blog.
|
||||
output_path (str): Path to the directory where the blog will be saved.
|
||||
file_type (str, optional): The file format for saving the blog ('md' for Markdown or 'html' for HTML). Defaults to 'md'.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the output_path does not exist.
|
||||
Exception: If the blog content cannot be written to the file.
|
||||
"""
|
||||
blog_frontmatter = ''
|
||||
# Sanitize and prepare the blog title
|
||||
# Remove colon and ampersand
|
||||
blog_title_md = blog_title.replace(":", "").replace("&", "")
|
||||
# Replace spaces with hyphens
|
||||
blog_title_md = blog_title_md.replace(" ", "-")
|
||||
blog_title_md = re.sub('[^A-Za-z0-9-]', '', blog_title_md)
|
||||
# Replace multiple consecutive dashes with a single dash
|
||||
blog_title_md = re.sub('-+', '-', blog_title_md)
|
||||
#blog_title_md = remove_stop_words(blog_title_md)
|
||||
logger.debug(f"Blog Title is: {blog_title_md}")
|
||||
|
||||
# Check if output path exists
|
||||
output_path = os.getenv('CONTENT_SAVE_DIR')
|
||||
if not os.path.exists(output_path):
|
||||
logger.error(f"Error: Blog output directory is set to {output_path}, which does not exist.")
|
||||
raise FileNotFoundError(f"Output directory does not exist: {output_path}")
|
||||
|
||||
# Handle Markdown file type
|
||||
if file_type == "md":
|
||||
logger.info("Writing/Saving the resultant blog content in Markdown format.")
|
||||
# Hmmmm, bulk generation will benefit from randomizing publishing dates.
|
||||
#dtobj = datetime.datetime.now(ZoneInfo('Asia/Kolkata'))
|
||||
#formatted_date = dtobj.strftime('%Y-%m-%d %H:%M:%S %z')
|
||||
formatted_date = random_date_last_three_months()
|
||||
blog_title = blog_title.replace(":", "-").replace('"', '').replace('**', '')
|
||||
if main_img_path:
|
||||
blog_frontmatter = dedent(f"""\
|
||||
---
|
||||
title: {blog_title}
|
||||
date: {formatted_date}
|
||||
categories: [{blog_categories}]
|
||||
tags: [{blog_tags}]
|
||||
description: {blog_meta_desc.replace(":", "-").replace('**', '')}
|
||||
img_path: '/assets/'
|
||||
image:
|
||||
path: {os.path.basename(main_img_path)}
|
||||
alt: {blog_title}
|
||||
---\n\n""")
|
||||
else:
|
||||
blog_frontmatter = dedent(f"""\
|
||||
---
|
||||
title: {blog_title}
|
||||
date: {formatted_date}
|
||||
categories: [{blog_categories}]
|
||||
tags: [{blog_tags}]
|
||||
description: {blog_meta_desc.replace(":", "-")}
|
||||
---\n\n""").strip()
|
||||
|
||||
blog_output_path = os.path.join(
|
||||
output_path,
|
||||
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
|
||||
)
|
||||
|
||||
# Write to the file
|
||||
try:
|
||||
with open(blog_output_path, "w", encoding="utf-8") as f:
|
||||
f.write(blog_frontmatter)
|
||||
f.write(blog_content)
|
||||
except Exception as e:
|
||||
raise Exception(f"Failed to write blog content: {e}")
|
||||
|
||||
logger.info(f"Successfully saved and posted blog at: {blog_output_path}")
|
||||
return(blog_output_path)
|
||||
@@ -1,113 +0,0 @@
|
||||
# AI Agents Content Planner
|
||||
|
||||
This document describes the `ai_agents_planner` module, a sophisticated tool for creating highly detailed and SEO-optimized content calendars. This module leverages AI agents to perform web research, trend analysis, and content planning.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
To use this module, ensure the following are installed:
|
||||
- Python 3.6 or higher
|
||||
- Streamlit
|
||||
- Crewai
|
||||
- Crewai Tools
|
||||
- Langchain Google GenAI
|
||||
- Google Gemini API key
|
||||
|
||||
## Installation
|
||||
|
||||
Install the required Python packages using pip:
|
||||
|
||||
```bash
|
||||
pip install streamlit crewai crewai_tools langchain_google_genai
|
||||
```
|
||||
|
||||
## Environment Setup
|
||||
|
||||
Ensure that you have set up the following environment variables:
|
||||
|
||||
- `GEMINI_API_KEY`: Your Google Gemini API key.
|
||||
- `SEARCH_SAVE_FILE`: Path to the file where search results are saved.
|
||||
|
||||
## Module Overview
|
||||
|
||||
The `ai_agents_planner` module consists of several key functions:
|
||||
|
||||
- **create_agents(search_keywords, already_written_on)**
|
||||
- This function creates the AI agents required for content research and planning. Each agent is assigned a specific role and set of tools to achieve their goals.
|
||||
- Agents:
|
||||
- **content_researcher**: Conducts web research to identify content opportunities.
|
||||
- **content_planner**: Develops a content calendar based on the research.
|
||||
- **google_trends_researcher**: Analyzes Google Trends data to suggest relevant keywords and titles.
|
||||
- **content_marketing_manager**: Ensures the content calendar is optimized and avoids keyword cannibalization.
|
||||
|
||||
- **create_tasks(agents, search_keywords, already_written_on)**
|
||||
- This function creates tasks for each agent, including web analysis, Google Trends analysis, content calendar development, and final review.
|
||||
|
||||
- **execute_tasks(agents, tasks)**
|
||||
- Executes the tasks assigned to each agent. The results are compiled into a comprehensive content calendar.
|
||||
|
||||
- **ai_agents_planner(search_keywords)**
|
||||
- The main function that orchestrates the creation of agents, assignment of tasks, and execution of the content planning process. It performs Google Trends analysis and generates the final content calendar.
|
||||
|
||||
## Example Usage
|
||||
|
||||
To use the `ai_agents_planner` module, follow these steps:
|
||||
|
||||
1. Set up the environment variables.
|
||||
2. Import the module and call the `ai_agents_planner` function with your target keywords.
|
||||
|
||||
```python
|
||||
import os
|
||||
from your_module import ai_agents_planner
|
||||
|
||||
# Set up environment variables
|
||||
os.environ['GEMINI_API_KEY'] = 'your_google_gemini_api_key'
|
||||
os.environ['SEARCH_SAVE_FILE'] = '/path/to/search_save_file.txt'
|
||||
|
||||
# Run the planner
|
||||
ai_agents_planner('your_target_keywords')
|
||||
```
|
||||
|
||||
## Detailed Agent Roles and Responsibilities
|
||||
|
||||
### Content Researcher: Aisha Sharma
|
||||
**Role**: Senior Web Research Analyst (Content Strategy)
|
||||
**Goal**: Create a detailed content calendar focused on specific keywords.
|
||||
**Responsibilities**:
|
||||
- Conduct web research and competitor analysis.
|
||||
- Identify high-value content opportunities.
|
||||
|
||||
### Content Planner: Ted XingPi
|
||||
**Role**: Senior Content Strategist & Planner
|
||||
**Goal**: Craft a series of content titles for a 2-month-long series.
|
||||
**Responsibilities**:
|
||||
- Develop a content calendar with unique and non-repetitive titles.
|
||||
- Ensure alignment with SEO best practices.
|
||||
|
||||
### Google Trends Researcher: Sarah Qureshi
|
||||
**Role**: Content Marketing & Google Trends Specialist
|
||||
**Goal**: Analyze Google Trends data and provide keyword recommendations.
|
||||
**Responsibilities**:
|
||||
- Identify high-volume, low-competition keywords.
|
||||
- Collaborate on content strategy and planning.
|
||||
|
||||
### Content Marketing Manager: Diksha Yuj
|
||||
**Role**: Content Marketing Manager
|
||||
**Goal**: Optimize the content calendar and ensure no keyword cannibalization.
|
||||
**Responsibilities**:
|
||||
- Review and finalize the content calendar.
|
||||
- Ensure all content is unique and SEO-optimized.
|
||||
|
||||
## Final Content Calendar
|
||||
|
||||
The result of the `ai_agents_planner` module is a highly detailed content calendar that positions your target keywords effectively. The content calendar includes:
|
||||
- Head Term Keyword
|
||||
- Long-Tail Keyword
|
||||
- Blog Post Title
|
||||
|
||||
This structured approach ensures a comprehensive content strategy, optimized for search engines and tailored to your audience.
|
||||
|
||||
## Conclusion
|
||||
|
||||
The `ai_agents_planner` module provides a robust framework for content planning and strategy. By leveraging AI agents and integrating web research, trend analysis, and content planning, it delivers a detailed content calendar tailored to your audience and optimized for search engines.
|
||||
|
||||
For further information and detailed documentation, refer to the module's code and comments.
|
||||
@@ -1,33 +0,0 @@
|
||||
* **Trending:** How Open-Source AI is Changing the Future of Content Creation
|
||||
* **Seasonal:** 5 Ways Open-Source AI Can Help You Write More Engaging Holiday Content
|
||||
* **Trending:** The Best Open-Source AI Writing Tools for Every Need
|
||||
* **Seasonal:** Open-Source AI Writing: A Threat to Human Writers or a Valuable Tool?
|
||||
* **Trending:** The Ethics of Open-Source AI Writing: What You Need to Know
|
||||
* **Seasonal:** How to Use Open-Source AI to Write Festive Social Media Posts
|
||||
* **Evergreen:** Open-Source AI Writing: The Good, the Bad, and the Ugly
|
||||
* **Trending:** How Open-Source AI Can Help You Write More Effective Blog Posts
|
||||
* **Seasonal:** Open-Source AI Writing: How to Create Holiday-Themed Website Content
|
||||
* **Evergreen:** How to Use Open-Source AI to Write Better Social Media Content
|
||||
* **Trending:** The Best Open-Source AI Writing Tools for Bloggers
|
||||
* **Seasonal:** How to Use Open-Source AI to Write Holiday-Themed Email Marketing Campaigns
|
||||
* **Seasonal:** Open Source AI Writers for Holiday Content Creation
|
||||
* **Evergreen:** How to Write Great Content with Open Source AI Writers: A Step-by-Step Guide
|
||||
* **Trending:** The Role of Open Source AI Writers in SEO
|
||||
* **Seasonal:** Open Source AI Writers for Black Friday and Cyber Monday
|
||||
* **Trending:** Open Source AI Writers and the Future of Content Consumption
|
||||
* **Trending:** Open Source AI Writers and the Rise of Personalized Content
|
||||
* **Trending:** Open Source AI Writers and the Future of Content Strategy
|
||||
* **Seasonal:** Open Source AI Writers for Back-to-School Content
|
||||
* **Trending:** Open Source AI Writers and the Rise of AI-Generated Art
|
||||
* How AI Writers Can Help You Create High-Quality Blog Posts
|
||||
* The Role of AI Writers in the Future of Content Creation
|
||||
* AI Writers vs. Human Writers: Which Is Better for Your Content?
|
||||
* The Dos and Don'ts of Using AI Writers
|
||||
| 5 | AI writing tool news | News about AI writing tools | The Latest News about AI Writing Tools |
|
||||
| 6 | AI writing tool resources | Resources for AI writing tools | The Best Resources for AI Writing Tools |
|
||||
| 6 | AI writing tool community | Community for AI writing tools | The Best Community for AI Writing Tools |
|
||||
| 6 | AI writing tool support | Support for AI writing tools | The Best Support for AI Writing Tools |
|
||||
| 7 | AI writing tool training | Training for AI writing tools | The Best Training for AI Writing Tools |
|
||||
| 7 | AI writing tool certification | Certification for AI writing tools | The Best Certification for AI Writing Tools |
|
||||
| 7 | AI writing tool courses | Courses for AI writing tools | The Best Courses for AI Writing Tools |
|
||||
| 8 | AI writing tool workshops | Workshops for AI writing tools | The Best Workshops for AI Writing Tools |
|
||||
@@ -1,241 +0,0 @@
|
||||
import os
|
||||
import streamlit as st
|
||||
|
||||
from crewai import Agent, Task, Crew
|
||||
from crewai_tools import SerperDevTool
|
||||
from langchain_google_genai import ChatGoogleGenerativeAI
|
||||
from crewai_tools import ScrapeWebsiteTool
|
||||
from crewai_tools import FileReadTool
|
||||
|
||||
from ..ai_web_researcher.google_trends_researcher import do_google_trends_analysis
|
||||
|
||||
|
||||
def create_agents(search_keywords, already_written_on):
|
||||
|
||||
# Tools for the agents.
|
||||
search_tool = SerperDevTool()
|
||||
|
||||
# To enable scrapping any website it finds during it's execution
|
||||
#scrape_tool = ScrapeWebsiteTool()
|
||||
|
||||
# To read results from a file.
|
||||
# Initialize the tool to read any files the agents knows or lean the path for
|
||||
# file_read_tool = FileReadTool()
|
||||
# Initialize the tool with a specific file path, so the agent can only read the content of the specified file
|
||||
file_read_tool = FileReadTool(file_path=os.getenv('SEARCH_SAVE_FILE'))
|
||||
# The manager keeps an eye on the content already planned to give new ideas.
|
||||
# TBD: Accept the user website urls and populate the file with sitemap.xml
|
||||
manager_read_tool = FileReadTool(file_path=already_written_on)
|
||||
|
||||
# Load the google gemini api key
|
||||
google_api_key = os.getenv("GEMINI_API_KEY")
|
||||
|
||||
# Set gemini pro as llm
|
||||
llm = ChatGoogleGenerativeAI(
|
||||
model="gemini-pro", verbose=True, temperature=0.7, google_api_key=google_api_key
|
||||
)
|
||||
|
||||
content_researcher = Agent(
|
||||
role = 'Senior Web Research Analyst (Content Strategy): Aisha Sharma',
|
||||
goal = f"""Help Create a highly detailed 2 month-long content calender, focused around keywords: {search_keywords}.
|
||||
Provide web researched titles to be used for content calender & planning to Ted XingPi""",
|
||||
backstory = f"""
|
||||
|
||||
Your Focus: Content Opportunity Analysis & Keyword Research ({search_keywords}).
|
||||
|
||||
Your Skills:
|
||||
|
||||
1). Web Research & Content Gap Identification (Expert).
|
||||
2). SEO Best Practices, Keyword Research & content planning expert (Advanced).
|
||||
3). Analyzes search trends and competitor content.
|
||||
4). Fuel company's content strategy with data-driven insights to attract and educate online readers.
|
||||
5). Identifies high-volume, low-competition keywords relevant to {search_keywords}.
|
||||
|
||||
Responsibilities:
|
||||
|
||||
1). Recommend high-value content opportunities through in-depth web research and competitor analysis.
|
||||
2). Provide your research to Senior Content Strategist & planner - Ted XingPi
|
||||
|
||||
""",
|
||||
tools = [search_tool],
|
||||
memory = True, # Enable memory
|
||||
verbose = True,
|
||||
max_rpm = None, # No limit on requests per minute
|
||||
max_iter = 10, # Default value for maximum iterations
|
||||
allow_delegation = False,
|
||||
llm = llm
|
||||
)
|
||||
|
||||
content_planner = Agent(
|
||||
role = 'Senior Content Strategist & planner - Ted XingPi',
|
||||
goal = f"""
|
||||
Craft a series of content titles around {search_keywords} that can be expanded into 2 month-long series.
|
||||
Do not repeat the blog titles, always consult the previously written blog titles from the file: {already_written_on}.""",
|
||||
|
||||
backstory = """You are Ted XingPi, with Experience of 15 years.
|
||||
|
||||
Your Skills:
|
||||
1). Content Opportunity Analysis & Content calender planning (Expert).
|
||||
2). AI Applications for Content Marketing (Highly Knowledgeable).
|
||||
3). Content Strategy Development & keyword research for content opportunities.
|
||||
|
||||
|
||||
Your Responsibilties:
|
||||
|
||||
1). Employ a balance of head terms (broad topics) and long-tail keywords (specific phrases) for optimal reach and targeting.
|
||||
2). Review & Include suggestions from Content Marketing & Google Trends Specialist - Sarah Qureshi.
|
||||
3). Identify content topics and keywords for {search_keywords}.
|
||||
4). Senior Web Research Analyst (Content Strategy): Aisha Sharma
|
||||
5). Create content calender that showcases the value proposition around {search_keywords}.
|
||||
6). New content should target unique keywords to avoid competition with existing content.
|
||||
7). Focus on specific aspects within a theme to differentiate semantically similar keywords for {search_keywords}.
|
||||
8). Collaborate with team to identify content gaps and trending topics, relevant to given keywords.
|
||||
9). Develop content calender with a focus on organic marketing to attract online customers.
|
||||
10). The content calender should include, Head Term Keyword, Long-Tail Keyword and Blog Post Title.
|
||||
""",
|
||||
memory = True, # Enable memory
|
||||
verbose = True,
|
||||
tools = [manager_read_tool],
|
||||
max_rpm = None, # No limit on requests per minute
|
||||
max_iter = 15, # Default value for maximum iterations
|
||||
allow_delegation = False,
|
||||
llm = llm
|
||||
)
|
||||
|
||||
google_trends_researcher = Agent(
|
||||
role = 'Content Marketing & Google Trends Specialist - Sarah Qureshi.',
|
||||
|
||||
goal = f"""Help Create a highly detailed 2 month-long content calender, focused around keywords: {search_keywords}.
|
||||
Analyse & provide Google trends data for content calender & planning to Ted XingPi""",
|
||||
|
||||
backstory = f"""You are Sarah Qureshi, with 10 years as a content writer and planner.
|
||||
Your Skills:
|
||||
1). Proven experience in using Google Trends for keyword research.
|
||||
2). Strong understanding of SEO best practices.
|
||||
3). Reading files and understanding long table with data.
|
||||
|
||||
Your responsibilties:
|
||||
1). Collaborate on content strategy, provide keyword, titles recommendations to Ted XingPi.
|
||||
2). Recommend high-volume, low-competition keywords, titles with strong user intent.
|
||||
3). Recommend, Rising search queries related to {search_keywords}.
|
||||
4). Recommend keywords, blog titles for preparing/planning the content calender.
|
||||
5). Provide your research to Senior Content Strategist & planner - Ted XingPi
|
||||
""",
|
||||
memory = True, # Enable memory
|
||||
tools = [file_read_tool],
|
||||
verbose = True,
|
||||
max_rpm = None, # No limit on requests per minute
|
||||
max_iter = 15, # Default value for maximum iterations
|
||||
allow_delegation = False,
|
||||
llm = llm
|
||||
)
|
||||
|
||||
content_marketing_manager = Agent(
|
||||
role="Content Marketing Manager - Diksha Yuj",
|
||||
goal=f"""Create highly detailed 2 month-long content calender, focused around keywords: {search_keywords}.
|
||||
Use insights and context from team members: Sarah Qureshi, Ted XingPi and Aisha Sharma""",
|
||||
backstory="""
|
||||
Content Marketing Manager: Diksha Yuj
|
||||
Experience: Digital Marketing Veteran (15+ years)
|
||||
|
||||
Mission: Supercharge organic growth of the company, with content marketing.
|
||||
|
||||
Responsibilities:
|
||||
|
||||
1). Ensures that content titles are not repeated & No keyword cannabilization.
|
||||
2). Maintains and consults a file for all previous written titles({already_written_on}).
|
||||
3). Develops a content calendar aligned and optimized around {search_keywords}.
|
||||
4). Keenly follows & learns the research and communication of other team members.
|
||||
5). The content calender should include, Head Term Keyword, Long-Tail Keyword and Blog Post Title.
|
||||
6). Use insights and context from team members: Sarah Qureshi, Ted XingPi and Aisha Sharma
|
||||
""",
|
||||
memory=True, # Enable memory
|
||||
verbose=True,
|
||||
tools = [manager_read_tool],
|
||||
max_rpm=None, # No limit on requests per minute
|
||||
max_iter=10, # Default value for maximum iterations
|
||||
allow_delegation=False,
|
||||
llm=llm
|
||||
)
|
||||
|
||||
return [content_researcher, google_trends_researcher, content_planner, content_marketing_manager]
|
||||
|
||||
|
||||
def create_tasks(agents, search_keywords, already_written_on):
|
||||
research_task = Task(
|
||||
description=f"""Conduct web analysis on "{search_keywords}",for content calender.
|
||||
Set the input parameter 'search_query' to query""",
|
||||
expected_output=f"""Provide comprehensive content calender ideas to Senior Content Strategist & planner - Ted XingPi""",
|
||||
agent=agents[0] # Assign to the researcher agent
|
||||
)
|
||||
|
||||
google_trends_task = Task(
|
||||
description=f"""Conduct Google Trends analysis, on keywords: {search_keywords}, from the file({os.getenv('SEARCH_SAVE_FILE')}).
|
||||
Suggest blog titles for content calender. Recommend high-volume, low-competition keywords with strong user intent.
|
||||
Set the input parameter 'file_path' to {os.getenv('SEARCH_SAVE_FILE')}""",
|
||||
expected_output=f"Provide comprehensive content calender ideas to Senior Content Strategist & planner - Ted XingPi",
|
||||
agent=agents[1] # Assign to the researcher agent
|
||||
)
|
||||
planner_task = Task(
|
||||
description=f"""Develop a content calendar for {search_keywords}, based team member's.
|
||||
New content should target unique keywords to avoid competition with existing content.
|
||||
Use context & insights from Aisha Sharma & Sarah Qureshi.
|
||||
Set the input parameter file_path to {already_written_on}""",
|
||||
expected_output=f"""A Highly detailed content calender that positions {search_keywords} as a must-read for industry insiders and newcomers alike. Final content calender for the next 2 months. Targeting 5 articles per week.
|
||||
""",
|
||||
#human_input=True,
|
||||
agent=agents[2] # Assign to the outliner agent
|
||||
)
|
||||
|
||||
marketing_manager_task = Task(
|
||||
description=f"""Make sure the content calender is optimised for keywords: '{search_keywords}'.
|
||||
Make sure the titles are unique, semantically unique and mitigate keyword cannabilization.
|
||||
Use context & insights from Aisha Sharma, Ted XingPi & Sarah Qureshi.
|
||||
Set the input parameter 'file_path' to {already_written_on}
|
||||
""",
|
||||
expected_output=f"""Final content calender for the next 2 months. Targeting 5 articles per week.
|
||||
Make sure to present the content calender in tabular format. Include details of how to use the content calender.
|
||||
""",
|
||||
agent=agents[3] # Assign to the reviewer agent
|
||||
)
|
||||
|
||||
return [research_task, google_trends_task, planner_task, marketing_manager_task]
|
||||
|
||||
|
||||
def execute_tasks(agents, tasks):
|
||||
""" WIP """
|
||||
result = None
|
||||
crew = Crew(
|
||||
agents=agents,
|
||||
tasks=tasks,
|
||||
verbose=2, # You can set it to 1 or 2 for different logging levels
|
||||
#process=Process.sequential,
|
||||
#memory=True,
|
||||
language="en"
|
||||
)
|
||||
try:
|
||||
result = crew.kickoff()
|
||||
return result
|
||||
except Exception as err:
|
||||
print(err)
|
||||
|
||||
|
||||
def ai_agents_content_planner(search_keywords):
|
||||
already_written_on = os.path.join(os.getcwd(), "lib", "content_planning_calender", "content_already_planned.txt")
|
||||
do_google_trends_analysis(search_keywords)
|
||||
result = None
|
||||
#setup_environment()
|
||||
try:
|
||||
agents = create_agents(search_keywords, already_written_on)
|
||||
except Exception as err:
|
||||
st.error(f"Failed in Creating in Agents: {err}")
|
||||
try:
|
||||
tasks = create_tasks(agents, search_keywords, already_written_on)
|
||||
except Exception as err:
|
||||
st.error(f"Failed to Create Agent Tasks: {err}")
|
||||
try:
|
||||
result = execute_tasks(agents, tasks)
|
||||
except Exception as err:
|
||||
st.error(f"Failed to execute Agent Tasks: {err}")
|
||||
st.markdown("### Final Content Calender:")
|
||||
st.markdown(result)
|
||||
@@ -1,309 +0,0 @@
|
||||
"""
|
||||
Gemini Audio Text Generation Module
|
||||
|
||||
This module provides a comprehensive interface for working with audio files using Google's Gemini API.
|
||||
It supports various audio processing capabilities including transcription, summarization, and analysis.
|
||||
|
||||
Key Features:
|
||||
------------
|
||||
1. Audio Transcription: Convert speech in audio files to text
|
||||
2. Audio Summarization: Generate concise summaries of audio content
|
||||
3. Segment Analysis: Analyze specific time segments of audio files
|
||||
4. Timestamped Transcription: Generate transcriptions with timestamps
|
||||
5. Token Counting: Count tokens in audio files
|
||||
6. Format Support: Information about supported audio formats
|
||||
|
||||
Supported Audio Formats:
|
||||
----------------------
|
||||
- WAV (audio/wav)
|
||||
- MP3 (audio/mp3)
|
||||
- AIFF (audio/aiff)
|
||||
- AAC (audio/aac)
|
||||
- OGG Vorbis (audio/ogg)
|
||||
- FLAC (audio/flac)
|
||||
|
||||
Technical Details:
|
||||
----------------
|
||||
- Each second of audio is represented as 32 tokens
|
||||
- Maximum supported length of audio data in a single prompt is 9.5 hours
|
||||
- Audio files are downsampled to 16 Kbps data resolution
|
||||
- Multi-channel audio is combined into a single channel
|
||||
|
||||
Usage:
|
||||
------
|
||||
```python
|
||||
from lib.gpt_providers.audio_to_text_generation.gemini_audio_text import transcribe_audio, summarize_audio
|
||||
|
||||
# Basic transcription
|
||||
transcript = transcribe_audio("path/to/audio.mp3")
|
||||
print(transcript)
|
||||
|
||||
# Summarization
|
||||
summary = summarize_audio("path/to/audio.mp3")
|
||||
print(summary)
|
||||
|
||||
# Analyze specific segment
|
||||
segment_analysis = analyze_audio_segment("path/to/audio.mp3", "02:30", "03:29")
|
||||
print(segment_analysis)
|
||||
```
|
||||
|
||||
Requirements:
|
||||
------------
|
||||
- GEMINI_API_KEY environment variable must be set
|
||||
- google-generativeai Python package
|
||||
- python-dotenv for environment variable management
|
||||
- loguru for logging
|
||||
|
||||
Dependencies:
|
||||
------------
|
||||
- google.genai
|
||||
- dotenv
|
||||
- loguru
|
||||
- os, sys, base64, typing
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import base64
|
||||
from typing import Optional, Dict, Any, List, Union
|
||||
from dotenv import load_dotenv
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def load_environment():
|
||||
"""Loads environment variables from a .env file."""
|
||||
load_dotenv()
|
||||
logger.info("Environment variables loaded successfully.")
|
||||
|
||||
|
||||
def configure_google_api():
|
||||
"""Configures the Google Gemini API for audio transcription.
|
||||
|
||||
Raises:
|
||||
ValueError: If the GEMINI_API_KEY environment variable is not set.
|
||||
"""
|
||||
api_key = os.getenv("GEMINI_API_KEY")
|
||||
if not api_key:
|
||||
error_message = "Google API key not found. Please set the GEMINI_API_KEY environment variable."
|
||||
logger.error(error_message)
|
||||
raise ValueError(error_message)
|
||||
|
||||
genai.configure(api_key=api_key)
|
||||
logger.info("Google Gemini API configured successfully.")
|
||||
|
||||
|
||||
def transcribe_audio(audio_file_path: str, prompt: str = "Transcribe the following audio:") -> Optional[str]:
|
||||
"""
|
||||
Transcribes audio using Google's Gemini model.
|
||||
|
||||
Args:
|
||||
audio_file_path (str): The path to the audio file to be transcribed.
|
||||
prompt (str, optional): The prompt to guide the transcription. Defaults to "Transcribe the following audio:".
|
||||
|
||||
Returns:
|
||||
str: The transcribed text from the audio.
|
||||
Returns None if transcription fails.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the audio file is not found.
|
||||
"""
|
||||
try:
|
||||
# Load environment variables and configure the Google API
|
||||
load_environment()
|
||||
configure_google_api()
|
||||
|
||||
logger.info(f"Attempting to transcribe audio file: {audio_file_path}")
|
||||
|
||||
# Check if file exists
|
||||
if not os.path.exists(audio_file_path):
|
||||
error_message = f"FileNotFoundError: The audio file at {audio_file_path} does not exist."
|
||||
logger.error(error_message)
|
||||
raise FileNotFoundError(error_message)
|
||||
|
||||
# Initialize a Gemini model appropriate for audio understanding
|
||||
model = genai.GenerativeModel(model_name="gemini-1.5-flash")
|
||||
|
||||
# Upload the audio file
|
||||
try:
|
||||
audio_file = genai.upload_file(audio_file_path)
|
||||
logger.info(f"Audio file uploaded successfully: {audio_file=}")
|
||||
except FileNotFoundError:
|
||||
error_message = f"FileNotFoundError: The audio file at {audio_file_path} does not exist."
|
||||
logger.error(error_message)
|
||||
raise FileNotFoundError(error_message)
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading audio file: {e}")
|
||||
return None
|
||||
|
||||
# Generate the transcription
|
||||
try:
|
||||
response = model.generate_content([
|
||||
prompt,
|
||||
audio_file
|
||||
])
|
||||
|
||||
# Check for valid response and extract text
|
||||
if response and hasattr(response, 'text'):
|
||||
transcript = response.text
|
||||
logger.info(f"Transcription successful:\n{transcript}")
|
||||
return transcript
|
||||
else:
|
||||
logger.warning("Transcription failed: Invalid or empty response from API.")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during transcription: {e}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"An unexpected error occurred: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def summarize_audio(audio_file_path: str) -> Optional[str]:
|
||||
"""
|
||||
Summarizes the content of an audio file using Google's Gemini model.
|
||||
|
||||
Args:
|
||||
audio_file_path (str): The path to the audio file to be summarized.
|
||||
|
||||
Returns:
|
||||
str: A summary of the audio content.
|
||||
Returns None if summarization fails.
|
||||
"""
|
||||
return transcribe_audio(audio_file_path, prompt="Please summarize the audio content:")
|
||||
|
||||
|
||||
def analyze_audio_segment(audio_file_path: str, start_time: str, end_time: str) -> Optional[str]:
|
||||
"""
|
||||
Analyzes a specific segment of an audio file using timestamps.
|
||||
|
||||
Args:
|
||||
audio_file_path (str): The path to the audio file.
|
||||
start_time (str): Start time in MM:SS format.
|
||||
end_time (str): End time in MM:SS format.
|
||||
|
||||
Returns:
|
||||
str: Analysis of the specified audio segment.
|
||||
Returns None if analysis fails.
|
||||
"""
|
||||
prompt = f"Analyze the audio content from {start_time} to {end_time}."
|
||||
return transcribe_audio(audio_file_path, prompt=prompt)
|
||||
|
||||
|
||||
def transcribe_with_timestamps(audio_file_path: str) -> Optional[str]:
|
||||
"""
|
||||
Transcribes audio with timestamps for each segment.
|
||||
|
||||
Args:
|
||||
audio_file_path (str): The path to the audio file.
|
||||
|
||||
Returns:
|
||||
str: Transcription with timestamps.
|
||||
Returns None if transcription fails.
|
||||
"""
|
||||
return transcribe_audio(audio_file_path, prompt="Transcribe the audio with timestamps for each segment:")
|
||||
|
||||
|
||||
def count_tokens(audio_file_path: str) -> Optional[int]:
|
||||
"""
|
||||
Counts the number of tokens in an audio file.
|
||||
|
||||
Args:
|
||||
audio_file_path (str): The path to the audio file.
|
||||
|
||||
Returns:
|
||||
int: Number of tokens in the audio file.
|
||||
Returns None if counting fails.
|
||||
"""
|
||||
try:
|
||||
# Load environment variables and configure the Google API
|
||||
load_environment()
|
||||
configure_google_api()
|
||||
|
||||
logger.info(f"Attempting to count tokens in audio file: {audio_file_path}")
|
||||
|
||||
# Check if file exists
|
||||
if not os.path.exists(audio_file_path):
|
||||
error_message = f"FileNotFoundError: The audio file at {audio_file_path} does not exist."
|
||||
logger.error(error_message)
|
||||
raise FileNotFoundError(error_message)
|
||||
|
||||
# Initialize a Gemini model
|
||||
model = genai.GenerativeModel(model_name="gemini-1.5-flash")
|
||||
|
||||
# Upload the audio file
|
||||
try:
|
||||
audio_file = genai.upload_file(audio_file_path)
|
||||
logger.info(f"Audio file uploaded successfully: {audio_file=}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading audio file: {e}")
|
||||
return None
|
||||
|
||||
# Count tokens
|
||||
try:
|
||||
response = model.count_tokens([audio_file])
|
||||
token_count = response.total_tokens
|
||||
logger.info(f"Token count: {token_count}")
|
||||
return token_count
|
||||
except Exception as e:
|
||||
logger.error(f"Error counting tokens: {e}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"An unexpected error occurred: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def get_supported_formats() -> List[str]:
|
||||
"""
|
||||
Returns a list of supported audio formats.
|
||||
|
||||
Returns:
|
||||
List[str]: List of supported MIME types.
|
||||
"""
|
||||
return [
|
||||
"audio/wav",
|
||||
"audio/mp3",
|
||||
"audio/aiff",
|
||||
"audio/aac",
|
||||
"audio/ogg",
|
||||
"audio/flac"
|
||||
]
|
||||
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
# Example 1: Basic transcription
|
||||
audio_path = "path/to/your/audio.mp3"
|
||||
transcript = transcribe_audio(audio_path)
|
||||
print(f"Transcript: {transcript}")
|
||||
|
||||
# Example 2: Summarization
|
||||
summary = summarize_audio(audio_path)
|
||||
print(f"Summary: {summary}")
|
||||
|
||||
# Example 3: Analyze specific segment
|
||||
segment_analysis = analyze_audio_segment(audio_path, "02:30", "03:29")
|
||||
print(f"Segment Analysis: {segment_analysis}")
|
||||
|
||||
# Example 4: Transcription with timestamps
|
||||
timestamped_transcript = transcribe_with_timestamps(audio_path)
|
||||
print(f"Timestamped Transcript: {timestamped_transcript}")
|
||||
|
||||
# Example 5: Count tokens
|
||||
token_count = count_tokens(audio_path)
|
||||
print(f"Token Count: {token_count}")
|
||||
|
||||
# Example 6: Get supported formats
|
||||
formats = get_supported_formats()
|
||||
print(f"Supported Formats: {formats}")
|
||||
@@ -1,206 +0,0 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
from pytubefix import YouTube
|
||||
from loguru import logger
|
||||
from openai import OpenAI
|
||||
from tqdm import tqdm
|
||||
import streamlit as st
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
from .gemini_audio_text import transcribe_audio
|
||||
|
||||
|
||||
def progress_function(stream, chunk, bytes_remaining):
|
||||
# Calculate the percentage completion
|
||||
current = ((stream.filesize - bytes_remaining) / stream.filesize)
|
||||
progress_bar.update(current - progress_bar.n) # Update the progress bar
|
||||
|
||||
|
||||
def rename_file_with_underscores(file_path):
|
||||
"""Rename a file by replacing spaces and special characters with underscores.
|
||||
|
||||
Args:
|
||||
file_path (str): The original file path.
|
||||
|
||||
Returns:
|
||||
str: The new file path with underscores.
|
||||
"""
|
||||
# Extract the directory and the filename
|
||||
dir_name, original_filename = os.path.split(file_path)
|
||||
|
||||
# Replace spaces and special characters with underscores in the filename
|
||||
new_filename = re.sub(r'[^\w\-_\.]', '_', original_filename)
|
||||
|
||||
# Create the new file path
|
||||
new_file_path = os.path.join(dir_name, new_filename)
|
||||
|
||||
# Rename the file
|
||||
os.rename(file_path, new_file_path)
|
||||
|
||||
return new_file_path
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def speech_to_text(video_url):
|
||||
"""
|
||||
Transcribes speech to text from a YouTube video URL using OpenAI's Whisper model.
|
||||
|
||||
Args:
|
||||
video_url (str): URL of the YouTube video to transcribe.
|
||||
output_path (str, optional): Directory where the audio file will be saved. Defaults to '.'.
|
||||
|
||||
Returns:
|
||||
str: The transcribed text from the video.
|
||||
|
||||
Raises:
|
||||
SystemExit: If a critical error occurs that prevents successful execution.
|
||||
"""
|
||||
output_path = os.getenv("CONTENT_SAVE_DIR")
|
||||
yt = None
|
||||
audio_file = None
|
||||
with st.status("Started Writing..", expanded=False) as status:
|
||||
try:
|
||||
if video_url.startswith("https://www.youtube.com/") or video_url.startswith("http://www.youtube.com/"):
|
||||
logger.info(f"Accessing YouTube URL: {video_url}")
|
||||
status.update(label=f"Accessing YouTube URL: {video_url}")
|
||||
try:
|
||||
vid_id = video_url.split("=")[1]
|
||||
yt = YouTube(video_url, on_progress_callback=progress_function)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get pytube stream object: {err}")
|
||||
st.stop()
|
||||
|
||||
logger.info(f"Fetching the highest quality audio stream:{yt.title}")
|
||||
status.update(label=f"Fetching the highest quality audio stream: {yt.title}")
|
||||
try:
|
||||
audio_stream = yt.streams.filter(only_audio=True).first()
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to Download Youtube Audio: {err}")
|
||||
st.stop()
|
||||
|
||||
if audio_stream is None:
|
||||
logger.warning("No audio stream found for this video.")
|
||||
st.warning("No audio stream found for this video.")
|
||||
st.stop()
|
||||
|
||||
logger.info(f"Downloading audio for: {yt.title}")
|
||||
status.update(label=f"Downloading audio for: {yt.title}")
|
||||
global progress_bar
|
||||
progress_bar = tqdm(total=1.0, unit='iB', unit_scale=True, desc=yt.title)
|
||||
try:
|
||||
audio_filename = re.sub(r'[^\w\-_\.]', '_', yt.title) + '.mp4'
|
||||
audio_file = audio_stream.download(
|
||||
output_path=os.getenv("CONTENT_SAVE_DIR"),
|
||||
filename=audio_filename)
|
||||
#audio_file = rename_file_with_underscores(audio_file)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to download audio file: {audio_file}")
|
||||
|
||||
progress_bar.close()
|
||||
logger.info(f"Audio downloaded: {yt.title} to {audio_file}")
|
||||
status.update(label=f"Audio downloaded: {yt.title} to {output_path}")
|
||||
# Audio filepath from local directory.
|
||||
elif os.path.exists(audio_input):
|
||||
audio_file = video_url
|
||||
|
||||
# Checking file size
|
||||
max_file_size = 24 * 1024 * 1024 # 24MB
|
||||
file_size = os.path.getsize(audio_file)
|
||||
# Convert file size to MB for logging
|
||||
file_size_MB = file_size / (1024 * 1024) # Convert bytes to MB
|
||||
|
||||
logger.info(f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
|
||||
status.update(label=f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
|
||||
|
||||
if file_size > max_file_size:
|
||||
logger.error("File size exceeds 24MB limit.")
|
||||
# FIXME: We can chunk hour long videos, the code is not tested.
|
||||
#long_video(audio_file)
|
||||
sys.exit("File size limit exceeded.")
|
||||
st.error("Audio File size limit exceeded. File a fixme/issues at ALwrity github.")
|
||||
|
||||
try:
|
||||
print(f"Audio File: {audio_file}")
|
||||
transcript = transcribe_audio(audio_file)
|
||||
print(f"\n\n\n--- Tracribe: {transcript} ----\n\n\n")
|
||||
exit(1)
|
||||
status.update(label=f"Initializing OpenAI client for transcription: {audio_file}")
|
||||
logger.info(f"Initializing OpenAI client for transcription: {audio_file}")
|
||||
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
||||
|
||||
logger.info("Transcribing using OpenAI's Whisper model.")
|
||||
transcript = client.audio.transcriptions.create(
|
||||
model="whisper-1",
|
||||
file=open(audio_file, "rb"),
|
||||
response_format="text"
|
||||
)
|
||||
logger.info(f"\nYouTube video transcription:\n{yt.title}\n{transcript}\n")
|
||||
status.update(label=f"\nYouTube video transcription:\n{yt.title}\n{transcript}\n")
|
||||
return transcript, yt.title
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed in Whisper transcription: {e}")
|
||||
st.warning(f"Failed in Openai Whisper transcription: {e}")
|
||||
transcript = transcribe_audio(audio_file)
|
||||
print(f"\n\n\n--- Tracribe: {transcript} ----\n\n\n")
|
||||
return transcript, yt.title
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"An error occurred during YouTube video processing: {e}")
|
||||
|
||||
finally:
|
||||
try:
|
||||
if os.path.exists(audio_file):
|
||||
os.remove(audio_file)
|
||||
logger.info("Temporary audio file removed.")
|
||||
except PermissionError:
|
||||
st.error(f"Permission error: Cannot remove '{audio_file}'. Please make sure of necessary permissions.")
|
||||
except Exception as e:
|
||||
st.error(f"An error occurred removing audio file: {e}")
|
||||
|
||||
|
||||
def long_video(temp_file_name):
|
||||
"""
|
||||
Transcribes a YouTube video using OpenAI's Whisper API by processing the video in chunks.
|
||||
|
||||
This function handles videos longer than the context limit of the Whisper API by dividing the video into
|
||||
10-minute segments, transcribing each segment individually, and then combining the results.
|
||||
|
||||
Key Changes and Notes:
|
||||
1. Video Splitting: Splits the audio into 10-minute chunks using the moviepy library.
|
||||
2. Chunk Transcription: Each audio chunk is transcribed separately and the results are concatenated.
|
||||
3. Temporary Files for Chunks: Uses temporary files for each audio chunk for transcription.
|
||||
4. Error Handling: Exception handling is included to capture and return any errors during the process.
|
||||
5. Logging: Process steps are logged for debugging and monitoring.
|
||||
6. Cleaning Up: Removes temporary files for both the entire video and individual audio chunks after processing.
|
||||
|
||||
Args:
|
||||
video_url (str): URL of the YouTube video to be transcribed.
|
||||
"""
|
||||
# Extract audio and split into chunks
|
||||
logger.info(f"Processing the YT video: {temp_file_name}")
|
||||
full_audio = mp.AudioFileClip(temp_file_name)
|
||||
duration = full_audio.duration
|
||||
chunk_length = 600 # 10 minutes in seconds
|
||||
chunks = [full_audio.subclip(start, min(start + chunk_length, duration)) for start in range(0, int(duration), chunk_length)]
|
||||
|
||||
combined_transcript = ""
|
||||
for i, chunk in enumerate(chunks):
|
||||
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as audio_chunk_file:
|
||||
chunk.write_audiofile(audio_chunk_file.name, codec="mp3")
|
||||
with open(audio_chunk_file.name, "rb", encoding="utf-8") as audio_file:
|
||||
# Transcribe each chunk using OpenAI's Whisper API
|
||||
app.logger.info(f"Transcribing chunk {i+1}/{len(chunks)}")
|
||||
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
||||
combined_transcript += transcript['text'] + "\n\n"
|
||||
|
||||
# Remove the chunk audio file
|
||||
os.remove(audio_chunk_file.name)
|
||||
|
||||
@@ -1,105 +0,0 @@
|
||||
"""Configuration management for GPT providers."""
|
||||
|
||||
import os
|
||||
import json
|
||||
from loguru import logger
|
||||
import sys
|
||||
|
||||
# Configure logger to output to both file and stdout
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/config.log",
|
||||
rotation="500 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
def load_config() -> Optional[Dict]:
|
||||
"""
|
||||
Load configuration from environment or config file.
|
||||
|
||||
Returns:
|
||||
Optional[Dict]: Configuration dictionary or None if loading fails
|
||||
"""
|
||||
try:
|
||||
logger.info("[load_config] Starting configuration load")
|
||||
|
||||
# First try to load from environment variable
|
||||
config_str = os.getenv('ALWRITY_CONFIG')
|
||||
if config_str:
|
||||
logger.debug("[load_config] Found configuration in environment variable")
|
||||
try:
|
||||
config = json.loads(config_str)
|
||||
logger.info("[load_config] Successfully loaded configuration from environment")
|
||||
return config
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"[load_config] Failed to parse environment config: {str(e)}")
|
||||
|
||||
# If no environment variable, try to load from file
|
||||
config_path = os.getenv('ALWRITY_CONFIG', 'config.json')
|
||||
logger.debug(f"[load_config] Attempting to load config from file: {config_path}")
|
||||
|
||||
if os.path.exists(config_path):
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
config = json.load(f)
|
||||
logger.info("[load_config] Successfully loaded configuration from file")
|
||||
return config
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"[load_config] Failed to parse config file: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error(f"[load_config] Error reading config file: {str(e)}")
|
||||
else:
|
||||
logger.error(f"[load_config] Config file not found: {config_path}")
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[load_config] Unexpected error loading configuration: {str(e)}")
|
||||
return None
|
||||
|
||||
def read_return_config_section(section: str) -> tuple:
|
||||
"""
|
||||
Read a specific section from the configuration.
|
||||
|
||||
Args:
|
||||
section (str): The section to read
|
||||
|
||||
Returns:
|
||||
tuple: Configuration values
|
||||
"""
|
||||
try:
|
||||
logger.info(f"[read_return_config_section] Reading section: {section}")
|
||||
|
||||
config = load_config()
|
||||
if not config:
|
||||
logger.error("[read_return_config_section] No configuration available")
|
||||
return None, None, None, None, None, None, None
|
||||
|
||||
section_config = config.get(section, {})
|
||||
logger.debug(f"[read_return_config_section] Section config: {section_config}")
|
||||
|
||||
# Extract values with defaults
|
||||
gpt_provider = section_config.get('gpt_provider', 'openai')
|
||||
model = section_config.get('model', 'gpt-3.5-turbo')
|
||||
temperature = float(section_config.get('temperature', 0.7))
|
||||
max_tokens = int(section_config.get('max_tokens', 2000))
|
||||
top_p = float(section_config.get('top_p', 1.0))
|
||||
n = int(section_config.get('n', 1))
|
||||
fp = section_config.get('fp', 'json')
|
||||
|
||||
logger.info(f"[read_return_config_section] Successfully read configuration for {section}")
|
||||
logger.debug(f"[read_return_config_section] Values: provider={gpt_provider}, model={model}, "
|
||||
f"temperature={temperature}, max_tokens={max_tokens}")
|
||||
|
||||
return gpt_provider, model, temperature, max_tokens, top_p, n, fp
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[read_return_config_section] Error reading configuration section: {str(e)}")
|
||||
return None, None, None, None, None, None, None
|
||||
@@ -1,116 +0,0 @@
|
||||
"""
|
||||
Gemini Image Description Module
|
||||
|
||||
This module provides functionality to generate text descriptions of images using Google's Gemini API.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional, Union, List
|
||||
|
||||
from google import genai
|
||||
from PIL import Image
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def describe_image(image_path: str, prompt: str = "Describe this image in detail:") -> Optional[str]:
|
||||
"""
|
||||
Generate a text description of an image using Google's Gemini API.
|
||||
|
||||
Parameters:
|
||||
image_path (str): Path to the image file.
|
||||
prompt (str, optional): Custom prompt to guide the image description.
|
||||
Defaults to "Describe this image in detail:".
|
||||
|
||||
Returns:
|
||||
Optional[str]: The generated description of the image, or None if an error occurs.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the image file does not exist.
|
||||
ValueError: If the API key is not set.
|
||||
"""
|
||||
try:
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Check if API key is set
|
||||
api_key = os.getenv('GEMINI_API_KEY')
|
||||
if not api_key:
|
||||
error_message = "GEMINI_API_KEY environment variable is not set"
|
||||
logger.error(error_message)
|
||||
raise ValueError(error_message)
|
||||
|
||||
# Check if image file exists
|
||||
if not os.path.exists(image_path):
|
||||
error_message = f"Image file not found: {image_path}"
|
||||
logger.error(error_message)
|
||||
raise FileNotFoundError(error_message)
|
||||
|
||||
# Initialize the Gemini client
|
||||
client = genai.Client(api_key=api_key)
|
||||
|
||||
# Open and process the image
|
||||
try:
|
||||
image = Image.open(image_path)
|
||||
logger.info(f"Successfully opened image: {image_path}")
|
||||
except Exception as e:
|
||||
error_message = f"Failed to open image: {e}"
|
||||
logger.error(error_message)
|
||||
return None
|
||||
|
||||
# Generate content description
|
||||
try:
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.0-flash',
|
||||
contents=[
|
||||
prompt,
|
||||
image
|
||||
]
|
||||
)
|
||||
|
||||
# Extract and return the text
|
||||
description = response.text
|
||||
logger.info(f"Successfully generated description for image: {image_path}")
|
||||
return description
|
||||
|
||||
except Exception as e:
|
||||
error_message = f"Failed to generate content: {e}"
|
||||
logger.error(error_message)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
error_message = f"An unexpected error occurred: {e}"
|
||||
logger.error(error_message)
|
||||
return None
|
||||
|
||||
|
||||
def analyze_image_with_prompt(image_path: str, prompt: str) -> Optional[str]:
|
||||
"""
|
||||
Analyze an image with a custom prompt using Google's Gemini API.
|
||||
|
||||
Parameters:
|
||||
image_path (str): Path to the image file.
|
||||
prompt (str): Custom prompt for analyzing the image.
|
||||
|
||||
Returns:
|
||||
Optional[str]: The generated analysis of the image, or None if an error occurs.
|
||||
"""
|
||||
return describe_image(image_path, prompt)
|
||||
|
||||
|
||||
# Example usage
|
||||
if __name__ == "__main__":
|
||||
# Example usage of the function
|
||||
image_path = "path/to/your/image.jpg"
|
||||
description = describe_image(image_path)
|
||||
if description:
|
||||
print(f"Image description: {description}")
|
||||
else:
|
||||
print("Failed to generate image description")
|
||||
@@ -1,79 +0,0 @@
|
||||
"""
|
||||
This module provides functionality to analyze images using OpenAI's Vision API.
|
||||
It encodes an image to a base64 string and sends a request to the OpenAI API
|
||||
to interpret the contents of the image, returning a textual description.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import sys
|
||||
import re
|
||||
import base64
|
||||
|
||||
def analyze_and_extract_details_from_image(image_path, api_key):
|
||||
"""
|
||||
Analyzes an image using OpenAI's Vision API and extracts Alt Text, Description, Title, and Caption.
|
||||
|
||||
Args:
|
||||
image_path (str): Path to the image file.
|
||||
api_key (str): Your OpenAI API key.
|
||||
|
||||
Returns:
|
||||
dict: Extracted details including Alt Text, Description, Title, and Caption.
|
||||
"""
|
||||
def encode_image(path):
|
||||
""" Encodes an image to a base64 string. """
|
||||
with open(path, "rb", encoding="utf-8") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode('utf-8')
|
||||
|
||||
base64_image = encode_image(image_path)
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": "gpt-4-vision-preview",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "The given image is used in blog content. Analyze the given image and suggest alternative(alt) test, description, title, caption."
|
||||
},
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/jpeg;base64,{base64_image}"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"max_tokens": 300
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
|
||||
assistant_message = response.json()['choices'][0]['message']['content']
|
||||
|
||||
# Extracting details using regular expressions
|
||||
alt_text_match = re.search(r'Alt Text: "(.*?)"', assistant_message)
|
||||
description_match = re.search(r'Description: (.*?)\n\n', assistant_message)
|
||||
title_match = re.search(r'Title: "(.*?)"', assistant_message)
|
||||
caption_match = re.search(r'Caption: "(.*?)"', assistant_message)
|
||||
|
||||
return {
|
||||
'alt_text': alt_text_match.group(1) if alt_text_match else None,
|
||||
'description': description_match.group(1) if description_match else None,
|
||||
'title': title_match.group(1) if title_match else None,
|
||||
'caption': caption_match.group(1) if caption_match else None
|
||||
}
|
||||
|
||||
except requests.RequestException as e:
|
||||
sys.exit(f"Error: Failed to communicate with OpenAI API. Error: {e}")
|
||||
except Exception as e:
|
||||
sys.exit(f"Error occurred: {e}")
|
||||
@@ -1,157 +0,0 @@
|
||||
# AI Text Generation Guide for Content Creators
|
||||
|
||||
## What is AI Text Generation?
|
||||
|
||||
AI Text Generation is a powerful tool that helps content creators generate high-quality, engaging content using advanced artificial intelligence models. This tool supports multiple AI providers, each offering unique strengths for different types of content creation.
|
||||
|
||||
## Available AI Models
|
||||
|
||||
### 1. OpenAI's GPT Models
|
||||
**Best for:** General content creation, creative writing, and detailed analysis
|
||||
|
||||
**Key Features:**
|
||||
- **Advanced Understanding**: Deep comprehension of context and nuance
|
||||
- **Creative Flexibility**: Adapts to various writing styles and tones
|
||||
- **Consistent Quality**: Reliable output for long-form content
|
||||
- **Streaming Responses**: Real-time content generation
|
||||
|
||||
**Use Cases:**
|
||||
- Blog posts and articles
|
||||
- Creative storytelling
|
||||
- Technical writing
|
||||
- Content analysis and summaries
|
||||
|
||||
### 2. Google's Gemini Pro
|
||||
**Best for:** Balanced content creation and factual accuracy
|
||||
|
||||
**Key Features:**
|
||||
- **Factual Accuracy**: Strong focus on reliable information
|
||||
- **Balanced Output**: Good mix of creativity and precision
|
||||
- **Multilingual Support**: Works well across different languages
|
||||
- **Contextual Understanding**: Strong grasp of context
|
||||
|
||||
**Use Cases:**
|
||||
- Educational content
|
||||
- Fact-based articles
|
||||
- Multilingual content
|
||||
- Research-based writing
|
||||
|
||||
### 3. Anthropic's Claude
|
||||
**Best for:** Professional and academic content
|
||||
|
||||
**Key Features:**
|
||||
- **Professional Tone**: Excellent for formal writing
|
||||
- **Detailed Analysis**: Strong analytical capabilities
|
||||
- **Ethical Considerations**: Built-in ethical guidelines
|
||||
- **Long-form Excellence**: Great for extended content
|
||||
|
||||
**Use Cases:**
|
||||
- Academic writing
|
||||
- Professional documentation
|
||||
- Research papers
|
||||
- Policy documents
|
||||
|
||||
### 4. DeepSeek
|
||||
**Best for:** Technical and specialized content
|
||||
|
||||
**Key Features:**
|
||||
- **Technical Precision**: Excellent for technical writing
|
||||
- **Specialized Knowledge**: Strong in specific domains
|
||||
- **Efficient Processing**: Fast response times
|
||||
- **Customizable Output**: Flexible formatting options
|
||||
|
||||
**Use Cases:**
|
||||
- Technical documentation
|
||||
- Industry-specific content
|
||||
- Scientific writing
|
||||
- Specialized reports
|
||||
|
||||
## How to Use the Text Generation Tool
|
||||
|
||||
### 1. Setting Up Your Content Parameters
|
||||
Before generating content, you can specify:
|
||||
- **Language**: Choose your preferred writing language
|
||||
- **Tone**: Select the appropriate tone (formal, casual, technical, etc.)
|
||||
- **Content Length**: Set your desired word count
|
||||
- **Content Type**: Specify the type of content (blog, article, etc.)
|
||||
- **Target Audience**: Define your reader demographic
|
||||
- **Output Format**: Choose your preferred format (Markdown, HTML, etc.)
|
||||
|
||||
### 2. Content Generation Process
|
||||
1. **Input Your Requirements**: Provide your content specifications
|
||||
2. **Select Your Model**: Choose the AI model best suited for your needs
|
||||
3. **Generate Content**: Let the AI create your content
|
||||
4. **Review and Edit**: Polish the generated content as needed
|
||||
|
||||
### 3. Customization Options
|
||||
You can adjust various parameters to fine-tune your content:
|
||||
- **Temperature**: Control creativity (lower = more focused, higher = more creative)
|
||||
- **Maximum Length**: Set content length limits
|
||||
- **Output Format**: Choose how you want the content structured
|
||||
- **Language Style**: Adjust the writing style and complexity
|
||||
|
||||
## Best Practices for Content Creation
|
||||
|
||||
### 1. Before Generation
|
||||
- Clearly define your content goals
|
||||
- Identify your target audience
|
||||
- Choose the appropriate model for your needs
|
||||
- Set clear parameters for tone and style
|
||||
|
||||
### 2. During Generation
|
||||
- Monitor the content quality
|
||||
- Ensure it aligns with your brand voice
|
||||
- Check for factual accuracy
|
||||
- Maintain consistency with your style guide
|
||||
|
||||
### 3. After Generation
|
||||
- Review and edit the content
|
||||
- Fact-check important information
|
||||
- Optimize for SEO if needed
|
||||
- Add your personal touch
|
||||
|
||||
## Tips for Optimal Results
|
||||
|
||||
1. **Be Specific**: Provide clear instructions for the AI
|
||||
2. **Use Examples**: Share examples of your desired style
|
||||
3. **Iterate**: Don't hesitate to regenerate if needed
|
||||
4. **Review**: Always review and edit generated content
|
||||
5. **Optimize**: Fine-tune parameters for better results
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### Blog Writing
|
||||
- Generate engaging blog posts
|
||||
- Create consistent content series
|
||||
- Develop topic outlines
|
||||
- Write product reviews
|
||||
|
||||
### Article Creation
|
||||
- Research-based articles
|
||||
- Opinion pieces
|
||||
- How-to guides
|
||||
- Industry analysis
|
||||
|
||||
### Technical Writing
|
||||
- Documentation
|
||||
- User guides
|
||||
- Technical specifications
|
||||
- Process descriptions
|
||||
|
||||
### Creative Writing
|
||||
- Story development
|
||||
- Character creation
|
||||
- Plot outlines
|
||||
- Scene descriptions
|
||||
|
||||
## Need Help?
|
||||
|
||||
If you encounter any issues or need assistance:
|
||||
1. Check the model-specific documentation
|
||||
2. Review your input parameters
|
||||
3. Try adjusting the generation settings
|
||||
4. Contact support for technical issues
|
||||
|
||||
---
|
||||
|
||||
*Note: This tool is designed to assist content creators in generating high-quality content. While AI can help with content creation, it's important to review and edit the generated content to ensure it meets your standards and brand guidelines.*
|
||||
@@ -1,121 +0,0 @@
|
||||
import os
|
||||
import anthropic
|
||||
import asyncio
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
# Configure standard logging
|
||||
import logging
|
||||
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def test_anthropic_api_key(api_key: str) -> tuple[bool, str]:
|
||||
"""
|
||||
Test if the provided Anthropic API key is valid.
|
||||
|
||||
Args:
|
||||
api_key (str): The Anthropic API key to test
|
||||
|
||||
Returns:
|
||||
tuple[bool, str]: A tuple containing (is_valid, message)
|
||||
"""
|
||||
try:
|
||||
# Create Anthropic client with the provided key
|
||||
client = anthropic.Anthropic(api_key=api_key)
|
||||
|
||||
# Try a simple completion as a test
|
||||
response = client.messages.create(
|
||||
model="claude-3-haiku-20240307",
|
||||
max_tokens=10,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": "Say hello"
|
||||
}]
|
||||
)
|
||||
|
||||
# If we get here, the key is valid
|
||||
return True, "Anthropic API key is valid"
|
||||
|
||||
except anthropic.AuthenticationError:
|
||||
return False, "Invalid Anthropic API key"
|
||||
except anthropic.RateLimitError:
|
||||
return False, "Rate limit exceeded. Please try again later."
|
||||
except Exception as e:
|
||||
return False, f"Error testing Anthropic API key: {str(e)}"
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def anthropic_text_response(prompt, model="claude-3-haiku-20240307", temperature=0.7, max_tokens=2048, top_p=0.9, n=1, system_prompt="You are a helpful AI assistant."):
|
||||
"""
|
||||
Generate text using Anthropic's Claude model with retry logic.
|
||||
|
||||
Args:
|
||||
prompt (str): The input text to generate completion for
|
||||
model (str, optional): Model to use. Defaults to "claude-3-haiku-20240307"
|
||||
temperature (float, optional): Controls randomness. Defaults to 0.7
|
||||
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
|
||||
top_p (float, optional): Controls diversity. Defaults to 0.9
|
||||
n (int, optional): Number of completions to generate. Defaults to 1
|
||||
system_prompt (str, optional): System prompt to guide the model. Defaults to "You are a helpful AI assistant."
|
||||
|
||||
Returns:
|
||||
str: The generated text completion
|
||||
"""
|
||||
try:
|
||||
# Create Anthropic client
|
||||
client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
|
||||
|
||||
# Generate completion
|
||||
response = client.messages.create(
|
||||
model=model,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
)
|
||||
|
||||
# Return the generated text
|
||||
return response.content[0].text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Anthropic text generation: {e}")
|
||||
raise SystemExit from e
|
||||
|
||||
def anthropic_text_gen(prompt, model="claude-3-haiku-20240307", temperature=0.7, max_tokens=2048):
|
||||
"""
|
||||
Generate text using Anthropic's Claude model.
|
||||
|
||||
Args:
|
||||
prompt (str): The input text to generate completion for
|
||||
model (str, optional): Model to use. Defaults to "claude-3-haiku-20240307"
|
||||
temperature (float, optional): Controls randomness. Defaults to 0.7
|
||||
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
|
||||
|
||||
Returns:
|
||||
str: The generated text completion
|
||||
"""
|
||||
try:
|
||||
# Create Anthropic client
|
||||
client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
|
||||
|
||||
# Generate completion
|
||||
response = client.messages.create(
|
||||
model=model,
|
||||
max_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
}]
|
||||
)
|
||||
|
||||
# Return the generated text
|
||||
return response.content[0].text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Anthropic text generation: {e}")
|
||||
return str(e)
|
||||
@@ -1,139 +0,0 @@
|
||||
import os
|
||||
import time
|
||||
import logging
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
)
|
||||
import openai
|
||||
import asyncio
|
||||
|
||||
# Configure standard logging
|
||||
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def deepseek_text_response(prompt, model, temperature, max_tokens, top_p, n, system_prompt):
|
||||
"""
|
||||
Wrapper function for DeepSeek's text generation.
|
||||
|
||||
Args:
|
||||
prompt (str): The input text to generate completion for.
|
||||
model (str, optional): Model to be used for the completion. Defaults to "deepseek-chat".
|
||||
temperature (float, optional): Controls randomness. Lower values make responses more deterministic. Defaults to 0.2.
|
||||
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 4096.
|
||||
top_p (float, optional): Controls diversity. Defaults to 0.9.
|
||||
n (int, optional): Number of completions to generate. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
str: The generated text completion.
|
||||
|
||||
Raises:
|
||||
SystemExit: If an API error, connection error, or rate limit error occurs.
|
||||
"""
|
||||
# Wait for 10 seconds to comply with rate limits
|
||||
for _ in range(10):
|
||||
time.sleep(1)
|
||||
|
||||
try:
|
||||
client = DeepSeek(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url="https://api.deepseek.com")
|
||||
response = client.reasoning.create(
|
||||
model=model,
|
||||
context=system_prompt,
|
||||
query=prompt,
|
||||
max_tokens=max_tokens,
|
||||
n=n,
|
||||
top_p=top_p,
|
||||
stream=True,
|
||||
temperature=temperature
|
||||
)
|
||||
|
||||
# Create variables to collect the stream of chunks
|
||||
collected_chunks = []
|
||||
collected_messages = []
|
||||
full_reply_content = None
|
||||
|
||||
# Iterate through the stream of events
|
||||
for chunk in response:
|
||||
collected_chunks.append(chunk) # save the event response
|
||||
chunk_message = chunk.result # extract the message
|
||||
collected_messages.append(chunk_message) # save the message
|
||||
print(chunk.result, end="", flush=True)
|
||||
|
||||
# Clean None in collected_messages
|
||||
collected_messages = [m for m in collected_messages if m is not None]
|
||||
full_reply_content = ''.join([m for m in collected_messages])
|
||||
return full_reply_content
|
||||
|
||||
except Exception as err:
|
||||
logger.error(f"DeepSeek error: {err}")
|
||||
raise SystemExit from err
|
||||
|
||||
async def test_deepseek_api_key(api_key: str) -> tuple[bool, str]:
|
||||
"""
|
||||
Test if the provided DeepSeek API key is valid.
|
||||
|
||||
Args:
|
||||
api_key (str): The DeepSeek API key to test
|
||||
|
||||
Returns:
|
||||
tuple[bool, str]: A tuple containing (is_valid, message)
|
||||
"""
|
||||
try:
|
||||
# Create OpenAI client with DeepSeek base URL
|
||||
client = openai.OpenAI(
|
||||
api_key=api_key,
|
||||
base_url="https://api.deepseek.com/v1"
|
||||
)
|
||||
|
||||
# Try to list models as a simple API test
|
||||
models = client.models.list()
|
||||
|
||||
# If we get here, the key is valid
|
||||
return True, "DeepSeek API key is valid"
|
||||
|
||||
except openai.AuthenticationError:
|
||||
return False, "Invalid DeepSeek API key"
|
||||
except openai.RateLimitError:
|
||||
return False, "Rate limit exceeded. Please try again later."
|
||||
except Exception as e:
|
||||
return False, f"Error testing DeepSeek API key: {str(e)}"
|
||||
|
||||
def deepseek_text_gen(prompt, model="deepseek-chat", temperature=0.7, max_tokens=2048):
|
||||
"""
|
||||
Generate text using DeepSeek's API.
|
||||
|
||||
Args:
|
||||
prompt (str): The input text to generate completion for
|
||||
model (str, optional): Model to use. Defaults to "deepseek-chat"
|
||||
temperature (float, optional): Controls randomness. Defaults to 0.7
|
||||
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
|
||||
|
||||
Returns:
|
||||
str: The generated text completion
|
||||
"""
|
||||
try:
|
||||
# Create OpenAI client with DeepSeek base URL
|
||||
client = openai.OpenAI(
|
||||
api_key=os.getenv('DEEPSEEK_API_KEY'),
|
||||
base_url="https://api.deepseek.com/v1"
|
||||
)
|
||||
|
||||
# Generate chat completion
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
}],
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens
|
||||
)
|
||||
|
||||
# Return the generated text
|
||||
return response.choices[0].message.content
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in DeepSeek text generation: {e}")
|
||||
return str(e)
|
||||
@@ -1,232 +0,0 @@
|
||||
# Using Gemini Pro LLM model
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../../../.env'))
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
)
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import re
|
||||
|
||||
# Configure standard logging
|
||||
import logging
|
||||
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def gemini_text_response(prompt, temperature, top_p, n, max_tokens, system_prompt):
|
||||
""" Common functiont to get response from gemini pro Text. """
|
||||
#FIXME: Include : https://github.com/google-gemini/cookbook/blob/main/quickstarts/rest/System_instructions_REST.ipynb
|
||||
try:
|
||||
client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to configure Gemini: {err}")
|
||||
logger.info(f"Temp: {temperature}, MaxTokens: {max_tokens}, TopP: {top_p}, N: {n}")
|
||||
# Set up AI model config
|
||||
generation_config = {
|
||||
"temperature": temperature,
|
||||
"top_p": top_p,
|
||||
"top_k": n,
|
||||
"max_output_tokens": max_tokens,
|
||||
}
|
||||
# FIXME: Expose model_name in main_config
|
||||
try:
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.0-flash-001',
|
||||
contents=prompt,
|
||||
config=types.GenerateContentConfig(
|
||||
system_instruction=system_prompt,
|
||||
max_output_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
top_k=n,
|
||||
),
|
||||
)
|
||||
|
||||
#logger.info(f"Number of Token in Prompt Sent: {model.count_tokens(prompt)}")
|
||||
return response.text
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from Gemini: {err}. Retrying.")
|
||||
|
||||
|
||||
#@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
#def gemini_blog_metadata_json(blog_content):
|
||||
# """ Common functiont to get response from gemini pro Text. """
|
||||
# prompt = f"I will provide you with the content of a blog post. Based on this content, you need to generate the following elements in JSON format:\n\n1. **Blog Title**: A compelling and relevant title that summarizes the blog content.\n2. **Meta Description**: A concise meta description (up to 160 characters) that captures the essence of the blog post and encourages clicks.\n3. **Tags**: A list of 5-10 relevant tags that represent the key topics covered in the blog post.\n4. **Categories**: A list of 1-3 appropriate categories that best describe the blog post's main themes.\n\nOutput your response in the following JSON format:\n\n```json\n{\n \"type\": \"object\",\n \"properties\": {\n \"blog_title\": {\n \"type\": \"string\"\n },\n \"meta_description\": {\n \"type\": \"string\"\n },\n \"tags\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"string\"\n }\n },\n \"categories\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"string\"\n }\n }\n }\n}\n\n. The Blog Content is given below: \n\n{blog_content}\n\n"
|
||||
#
|
||||
# try:
|
||||
# genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
|
||||
# except Exception as err:
|
||||
# logger.error(f"Failed to configure Gemini: {err}")
|
||||
#
|
||||
# # Create the model
|
||||
# generation_config = {
|
||||
# "temperature": 1,
|
||||
# "top_p": 0.95,
|
||||
# "top_k": 64,
|
||||
# "max_output_tokens": 8192,
|
||||
# "response_schema": content.Schema(
|
||||
# type = content.Type.OBJECT,
|
||||
# properties = {
|
||||
# "response": content.Schema(
|
||||
# type = content.Type.STRING,
|
||||
# ),
|
||||
# },
|
||||
# ),
|
||||
# "response_mime_type": "application/json",
|
||||
# }
|
||||
#
|
||||
# model = genai.GenerativeModel(
|
||||
# model_name="gemini-1.5-flash",
|
||||
# generation_config=generation_config,
|
||||
# # safety_settings = Adjust safety settings
|
||||
# # See https://ai.google.dev/gemini-api/docs/safety-settings
|
||||
# )
|
||||
#
|
||||
# try:
|
||||
# # text_response = []
|
||||
# response = model.generate_content(prompt)
|
||||
# if response:
|
||||
# logger.info(f"Number of Token in Prompt Sent: {model.count_tokens(prompt)}")
|
||||
# return response.text
|
||||
# except Exception as err:
|
||||
# logger.error(f"Failed to get SEO METADATA from Gemini: {err}. Retrying.")
|
||||
|
||||
async def test_gemini_api_key(api_key: str) -> tuple[bool, str]:
|
||||
"""
|
||||
Test if the provided Gemini API key is valid.
|
||||
|
||||
Args:
|
||||
api_key (str): The Gemini API key to test
|
||||
|
||||
Returns:
|
||||
tuple[bool, str]: A tuple containing (is_valid, message)
|
||||
"""
|
||||
try:
|
||||
# Configure Gemini with the provided key
|
||||
genai.configure(api_key=api_key)
|
||||
|
||||
# Try to list models as a simple API test
|
||||
models = genai.list_models()
|
||||
|
||||
# Check if Gemini Pro is available
|
||||
if any(model.name == "gemini-pro" for model in models):
|
||||
return True, "Gemini API key is valid"
|
||||
else:
|
||||
return False, "Gemini Pro model not available with this API key"
|
||||
|
||||
except Exception as e:
|
||||
return False, f"Error testing Gemini API key: {str(e)}"
|
||||
|
||||
def gemini_pro_text_gen(prompt, temperature=0.7, top_p=0.9, top_k=40, max_tokens=2048):
|
||||
"""
|
||||
Generate text using Google's Gemini Pro model.
|
||||
|
||||
Args:
|
||||
prompt (str): The input text to generate completion for
|
||||
temperature (float, optional): Controls randomness. Defaults to 0.7
|
||||
top_p (float, optional): Controls diversity. Defaults to 0.9
|
||||
top_k (int, optional): Controls vocabulary size. Defaults to 40
|
||||
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
|
||||
|
||||
Returns:
|
||||
str: The generated text completion
|
||||
"""
|
||||
try:
|
||||
# Configure the model
|
||||
model = genai.GenerativeModel('gemini-pro')
|
||||
|
||||
# Generate content
|
||||
response = model.generate_content(
|
||||
prompt,
|
||||
generation_config=genai.types.GenerationConfig(
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
top_k=top_k,
|
||||
max_output_tokens=max_tokens,
|
||||
)
|
||||
)
|
||||
|
||||
# Return the generated text
|
||||
return response.text
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Gemini Pro text generation: {e}")
|
||||
return str(e)
|
||||
|
||||
def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9, top_k=40, max_tokens=2048, system_prompt=None):
|
||||
"""
|
||||
Generate structured JSON response using Google's Gemini Pro model.
|
||||
|
||||
Args:
|
||||
prompt (str): The input text to generate completion for
|
||||
schema (dict): The JSON schema to follow for the response
|
||||
temperature (float, optional): Controls randomness. Defaults to 0.7
|
||||
top_p (float, optional): Controls diversity. Defaults to 0.9
|
||||
top_k (int, optional): Controls vocabulary size. Defaults to 40
|
||||
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
|
||||
system_prompt (str, optional): System instructions for the model
|
||||
|
||||
Returns:
|
||||
dict: The generated structured JSON response
|
||||
"""
|
||||
try:
|
||||
# Configure the model
|
||||
client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
|
||||
|
||||
# Set up generation config
|
||||
generation_config = {
|
||||
"temperature": temperature,
|
||||
"top_p": top_p,
|
||||
"top_k": top_k,
|
||||
"max_output_tokens": max_tokens,
|
||||
}
|
||||
|
||||
# Generate content with structured response
|
||||
response = client.models.generate_content(
|
||||
model='gemini-2.0-flash',
|
||||
contents=prompt,
|
||||
config=types.GenerateContentConfig(
|
||||
system_instruction=system_prompt,
|
||||
max_output_tokens=max_tokens,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
top_k=top_k,
|
||||
response_mime_type='application/json',
|
||||
response_schema=schema
|
||||
),
|
||||
)
|
||||
|
||||
# Parse the response
|
||||
try:
|
||||
# First try to get the parsed response
|
||||
if hasattr(response, 'parsed'):
|
||||
return response.parsed
|
||||
|
||||
# If parsed is not available, try to parse the text
|
||||
response_text = response.text
|
||||
return json.loads(response_text)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Error parsing JSON response: {e}")
|
||||
return {"error": f"Failed to parse JSON response: {e}", "raw_response": response_text}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Gemini Pro structured JSON generation: {e}")
|
||||
return {"error": str(e)}
|
||||
@@ -1,219 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../.env'))
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
from .openai_text_gen import openai_chatgpt
|
||||
from .gemini_pro_text import gemini_text_response, gemini_structured_json_response
|
||||
from .anthropic_text_gen import anthropic_text_response
|
||||
from .deepseek_text_gen import deepseek_text_response
|
||||
from ...utils.read_main_config_params import read_return_config_section
|
||||
|
||||
|
||||
def llm_text_gen(prompt, system_prompt=None, json_struct=None):
|
||||
"""
|
||||
Generate text using Language Model (LLM) based on the provided prompt.
|
||||
Args:
|
||||
prompt (str): The prompt to generate text from.
|
||||
system_prompt (str, optional): Custom system prompt to use instead of the default one.
|
||||
json_struct (dict, optional): JSON schema structure for structured responses.
|
||||
Returns:
|
||||
str: Generated text based on the prompt.
|
||||
"""
|
||||
try:
|
||||
logger.info("[llm_text_gen] Starting text generation")
|
||||
logger.debug(f"[llm_text_gen] Prompt length: {len(prompt)} characters")
|
||||
|
||||
try:
|
||||
# Set default values for LLM parameters
|
||||
gpt_provider = "google"
|
||||
model = "gemini-1.5-flash-latest"
|
||||
temperature = 0.7
|
||||
max_tokens = 4000
|
||||
top_p = 0.9
|
||||
n = 1
|
||||
fp = 16
|
||||
frequency_penalty = 0.0
|
||||
presence_penalty = 0.0
|
||||
|
||||
# Default blog characteristics
|
||||
blog_tone = "Professional"
|
||||
blog_demographic = "Professional"
|
||||
blog_type = "Informational"
|
||||
blog_language = "English"
|
||||
blog_output_format = "markdown"
|
||||
blog_length = 2000
|
||||
|
||||
# Try to read values from config, but keep defaults if any key is missing
|
||||
try:
|
||||
# Read LLM config
|
||||
llm_config = read_return_config_section('llm_config')
|
||||
if llm_config and len(llm_config) >= 4:
|
||||
gpt_provider = llm_config[0] if llm_config[0] else gpt_provider
|
||||
model = llm_config[1] if llm_config[1] else model
|
||||
temperature = llm_config[2] if llm_config[2] else temperature
|
||||
max_tokens = llm_config[3] if llm_config[3] else max_tokens
|
||||
|
||||
# Handle additional parameters with defaults if they're missing
|
||||
if len(llm_config) > 4:
|
||||
top_p = llm_config[4] if llm_config[4] else top_p
|
||||
if len(llm_config) > 5:
|
||||
# Try to get n parameter (could be either 'N' or 'n' in config)
|
||||
n = llm_config[5] if llm_config[5] else n
|
||||
if len(llm_config) > 6:
|
||||
frequency_penalty = llm_config[6] if llm_config[6] else frequency_penalty
|
||||
|
||||
logger.debug(f"[llm_text_gen] LLM Config loaded: Provider={gpt_provider}, Model={model}, Temp={temperature}")
|
||||
except Exception as err:
|
||||
logger.warning(f"[llm_text_gen] Couldn't load LLM config completely, using defaults where needed: {err}")
|
||||
|
||||
try:
|
||||
# Read blog characteristics
|
||||
blog_chars = read_return_config_section('blog_characteristics')
|
||||
if blog_chars and len(blog_chars) >= 6:
|
||||
blog_tone = blog_chars[0] if blog_chars[0] else blog_tone
|
||||
blog_demographic = blog_chars[1] if blog_chars[1] else blog_demographic
|
||||
blog_type = blog_chars[2] if blog_chars[2] else blog_type
|
||||
blog_language = blog_chars[3] if blog_chars[3] else blog_language
|
||||
blog_output_format = blog_chars[4] if blog_chars[4] else blog_output_format
|
||||
blog_length = blog_chars[5] if blog_chars[5] else blog_length
|
||||
logger.debug(f"[llm_text_gen] Blog characteristics loaded: Tone={blog_tone}, Type={blog_type}")
|
||||
except Exception as err:
|
||||
logger.warning(f"[llm_text_gen] Couldn't load blog characteristics completely, using defaults where needed: {err}")
|
||||
|
||||
except Exception as err:
|
||||
logger.warning(f"[llm_text_gen] Using default settings due to config read error: {err}")
|
||||
|
||||
# Construct the system prompt with the sidebar config params if no custom system_prompt is provided
|
||||
if system_prompt is None:
|
||||
system_instructions = f"""You are a highly skilled content writer with a knack for creating engaging and informative content.
|
||||
Your expertise spans various writing styles and formats.
|
||||
|
||||
Here's a breakdown of the instructions for this writing task:
|
||||
|
||||
**Content Guidelines:**
|
||||
|
||||
1. **Language:** Your response must be in **{blog_language}** language.
|
||||
2. **Tone and Brand Alignment:** Adjust your tone, voice, and personality to be appropriate for a **{blog_tone}** audience.
|
||||
3. **Content Length:** Ensure your response is approximately **{blog_length}** words in length.
|
||||
4. **Blog Type:** The type of blog is **{blog_type}**. Write accordingly, adhering to the conventions and expectations of this type of content.
|
||||
5. **Target Audience:** The demographic for this content is **{blog_demographic}**. Keep their interests and needs in mind.
|
||||
6. **Output Format:** Your response should be in **{blog_output_format}** format. This could be Markdown, HTML, or a specific structured format, depending on the user's preference.
|
||||
|
||||
**Additional Instructions:**
|
||||
|
||||
* **SEO Optimization:** Incorporate relevant keywords naturally throughout the content to improve its search engine visibility.
|
||||
* **Call to Action:** Include a call to action if appropriate for the blog type and target audience.
|
||||
* **Factual Accuracy:** Ensure your content is accurate and reliable. Back up any claims with credible sources.
|
||||
* **Unique Voice and Style:** Inject your unique voice and writing style to make the content engaging and memorable. """
|
||||
else:
|
||||
system_instructions = system_prompt
|
||||
logger.info("[llm_text_gen] Using custom system prompt")
|
||||
|
||||
# Check if API key is provided for the given gpt_provider
|
||||
get_api_key(gpt_provider)
|
||||
|
||||
# Perform text generation using the specified LLM parameters and prompt
|
||||
if 'google' in gpt_provider.lower():
|
||||
try:
|
||||
logger.info("Using Google Gemini Pro text generation model.")
|
||||
if json_struct:
|
||||
response = gemini_structured_json_response(prompt, json_struct, temperature, top_p, n, max_tokens, system_instructions)
|
||||
else:
|
||||
response = gemini_text_response(prompt, temperature, top_p, n, max_tokens, system_instructions)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from gemini: {err}")
|
||||
raise err
|
||||
elif 'openai' in gpt_provider.lower():
|
||||
try:
|
||||
logger.info(f"Using OpenAI Model: {model} for text Generation.")
|
||||
response = openai_chatgpt(prompt, model, temperature, max_tokens, top_p, n, fp, system_instructions)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from Openai: {err}")
|
||||
raise err
|
||||
elif 'anthropic' in gpt_provider.lower():
|
||||
try:
|
||||
logger.info(f"Using Anthropic Model: {model} for text Generation.")
|
||||
response = anthropic_text_response(prompt, model, temperature, max_tokens, top_p, n, system_instructions)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from Anthropic: {err}")
|
||||
raise err
|
||||
elif 'deepseek' in gpt_provider.lower():
|
||||
try:
|
||||
logger.info(f"Using DeepSeek Model: {model} for text Generation.")
|
||||
response = deepseek_text_response(prompt, model, temperature, max_tokens, top_p, n, system_instructions)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from DeepSeek: {err}")
|
||||
raise err
|
||||
else:
|
||||
logger.warning(f"Unknown provider '{gpt_provider}', falling back to Google Gemini")
|
||||
response = gemini_text_response(prompt, temperature, top_p, n, max_tokens, system_instructions)
|
||||
return response
|
||||
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to generate text: {err}")
|
||||
raise
|
||||
|
||||
|
||||
def check_gpt_provider(gpt_provider):
|
||||
"""
|
||||
Check if the specified GPT provider matches the environment variable GPT_PROVIDER,
|
||||
assign and export the GPT_PROVIDER value from the config file if missing,
|
||||
and continue.
|
||||
|
||||
Args:
|
||||
gpt_provider (str): The specified GPT provider.
|
||||
|
||||
Raises:
|
||||
ValueError: If both the specified GPT provider and environment variable GPT_PROVIDER are missing.
|
||||
"""
|
||||
env_gpt_provider = os.getenv('GPT_PROVIDER')
|
||||
if gpt_provider and gpt_provider.lower() != env_gpt_provider.lower():
|
||||
logger.warning(f"Config: '{gpt_provider}' different to environment variable 'GPT_PROVIDER' '{env_gpt_provider}'")
|
||||
gpt_provider = env_gpt_provider
|
||||
|
||||
return gpt_provider
|
||||
|
||||
|
||||
def get_api_key(gpt_provider):
|
||||
"""
|
||||
Get the API key for the specified GPT provider.
|
||||
|
||||
Args:
|
||||
gpt_provider (str): The specified GPT provider.
|
||||
|
||||
Returns:
|
||||
str: The API key for the specified GPT provider.
|
||||
|
||||
Raises:
|
||||
ValueError: If no API key is found for the specified GPT provider.
|
||||
"""
|
||||
api_key = None
|
||||
|
||||
if gpt_provider.lower() == 'google':
|
||||
api_key = os.getenv('GEMINI_API_KEY')
|
||||
elif gpt_provider.lower() == 'openai':
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
elif gpt_provider.lower() == 'anthropic':
|
||||
api_key = os.getenv('ANTHROPIC_API_KEY')
|
||||
elif gpt_provider.lower() == 'deepseek':
|
||||
api_key = os.getenv('DEEPSEEK_API_KEY')
|
||||
|
||||
if not api_key:
|
||||
raise ValueError(f"No API key found for the specified GPT provider: '{gpt_provider}'")
|
||||
|
||||
logger.info(f"Using API key for {gpt_provider}")
|
||||
return api_key
|
||||
@@ -1,144 +0,0 @@
|
||||
import os
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from mistralai import Mistral
|
||||
import asyncio
|
||||
from loguru import logger
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../../.env'))
|
||||
|
||||
# Configure standard logging
|
||||
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def test_mistral_api_key(api_key: str) -> tuple[bool, str]:
|
||||
"""
|
||||
Test if the provided Mistral API key is valid.
|
||||
|
||||
Args:
|
||||
api_key (str): The Mistral API key to test
|
||||
|
||||
Returns:
|
||||
tuple[bool, str]: A tuple containing (is_valid, message)
|
||||
"""
|
||||
try:
|
||||
async with Mistral(api_key=api_key) as client:
|
||||
# Try a simple completion as a test
|
||||
response = await client.chat.complete_async(
|
||||
model="mistral-small-latest",
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": "Hello"
|
||||
}],
|
||||
max_tokens=10
|
||||
)
|
||||
|
||||
if response and response.choices:
|
||||
return True, "Mistral API key is valid"
|
||||
else:
|
||||
return False, "Invalid response from Mistral API"
|
||||
|
||||
except Exception as e:
|
||||
return False, f"Error testing Mistral API key: {str(e)}"
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
async def mistral_chat_completion_async(
|
||||
prompt: str,
|
||||
model: str = "mistral-small-latest",
|
||||
temperature: float = 0.7,
|
||||
max_tokens: int = 2048,
|
||||
top_p: float = 0.9,
|
||||
system_prompt: str = "You are a helpful AI assistant."
|
||||
) -> str:
|
||||
"""
|
||||
Generate text using Mistral's chat completion API asynchronously.
|
||||
|
||||
Args:
|
||||
prompt (str): The input text to generate completion for
|
||||
model (str, optional): Model to use. Defaults to "mistral-small-latest"
|
||||
temperature (float, optional): Controls randomness. Defaults to 0.7
|
||||
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
|
||||
top_p (float, optional): Controls diversity. Defaults to 0.9
|
||||
system_prompt (str, optional): System prompt to guide the model. Defaults to "You are a helpful AI assistant."
|
||||
|
||||
Returns:
|
||||
str: The generated text completion
|
||||
"""
|
||||
try:
|
||||
async with Mistral(api_key=os.getenv('MISTRAL_API_KEY')) as client:
|
||||
messages = []
|
||||
|
||||
# Add system message if provided
|
||||
if system_prompt:
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": system_prompt
|
||||
})
|
||||
|
||||
# Add user message
|
||||
messages.append({
|
||||
"role": "user",
|
||||
"content": prompt
|
||||
})
|
||||
|
||||
# Generate chat completion
|
||||
response = await client.chat.complete_async(
|
||||
model=model,
|
||||
messages=messages,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p
|
||||
)
|
||||
|
||||
if response and response.choices:
|
||||
return response.choices[0].message.content
|
||||
else:
|
||||
raise Exception("No response generated")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Mistral chat completion: {e}")
|
||||
raise SystemExit from e
|
||||
|
||||
# Synchronous wrapper for compatibility
|
||||
def mistral_chat_completion(
|
||||
prompt: str,
|
||||
model: str = "mistral-small-latest",
|
||||
temperature: float = 0.7,
|
||||
max_tokens: int = 2048,
|
||||
top_p: float = 0.9,
|
||||
system_prompt: str = "You are a helpful AI assistant."
|
||||
) -> str:
|
||||
"""
|
||||
Synchronous wrapper for mistral_chat_completion_async.
|
||||
"""
|
||||
try:
|
||||
return asyncio.run(mistral_chat_completion_async(
|
||||
prompt=prompt,
|
||||
model=model,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
top_p=top_p,
|
||||
system_prompt=system_prompt
|
||||
))
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Mistral chat completion: {e}")
|
||||
return str(e)
|
||||
|
||||
# For backward compatibility
|
||||
def mistral_text_response(prompt, model="mistral-small-latest", temperature=0.7, max_tokens=2048):
|
||||
"""
|
||||
Legacy function for backward compatibility.
|
||||
"""
|
||||
return mistral_chat_completion(
|
||||
prompt=prompt,
|
||||
model=model,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens
|
||||
)
|
||||
@@ -1,109 +0,0 @@
|
||||
import os
|
||||
import time #IWish
|
||||
import openai
|
||||
import asyncio
|
||||
|
||||
# Configure standard logging
|
||||
import logging
|
||||
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
|
||||
async def test_openai_api_key(api_key: str) -> tuple[bool, str]:
|
||||
"""
|
||||
Test if the provided OpenAI API key is valid.
|
||||
|
||||
Args:
|
||||
api_key (str): The OpenAI API key to test
|
||||
|
||||
Returns:
|
||||
tuple[bool, str]: A tuple containing (is_valid, message)
|
||||
"""
|
||||
try:
|
||||
# Create OpenAI client with the provided key
|
||||
client = openai.OpenAI(api_key=api_key)
|
||||
|
||||
# Try to list models as a simple API test
|
||||
models = client.models.list()
|
||||
|
||||
# If we get here, the key is valid
|
||||
return True, "OpenAI API key is valid"
|
||||
|
||||
except openai.AuthenticationError:
|
||||
return False, "Invalid OpenAI API key"
|
||||
except openai.RateLimitError:
|
||||
return False, "Rate limit exceeded. Please try again later."
|
||||
except Exception as e:
|
||||
return False, f"Error testing OpenAI API key: {str(e)}"
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def openai_chatgpt(prompt, model, temperature, max_tokens, top_p, n, fp, system_prompt):
|
||||
"""
|
||||
Wrapper function for OpenAI's ChatGPT completion.
|
||||
|
||||
Args:
|
||||
prompt (str): The input text to generate completion for.
|
||||
model (str, optional): Model to be used for the completion. Defaults to "gpt-4o".
|
||||
temperature (float, optional): Controls randomness. Lower values make responses more deterministic. Defaults to 0.2.
|
||||
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 4096
|
||||
top_p (float, optional): Controls diversity. Defaults to 0.9.
|
||||
n (int, optional): Number of completions to generate. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
str: The generated text completion.
|
||||
|
||||
Raises:
|
||||
SystemExit: If an API error, connection error, or rate limit error occurs.
|
||||
"""
|
||||
# Wait for 10 seconds to comply with rate limits
|
||||
for _ in range(5):
|
||||
time.sleep(1)
|
||||
|
||||
try:
|
||||
# Create variables to collect the stream of chunks
|
||||
collected_chunks = []
|
||||
collected_messages = []
|
||||
full_reply_content = None
|
||||
|
||||
client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": prompt}],
|
||||
max_tokens=max_tokens,
|
||||
n=n,
|
||||
top_p=top_p,
|
||||
stream=True,
|
||||
frequency_penalty=fp
|
||||
# Additional parameters can be included here
|
||||
)
|
||||
|
||||
# Iterate through the stream of events
|
||||
for chunk in response:
|
||||
collected_chunks.append(chunk) # save the event response
|
||||
chunk_message = chunk.choices[0].delta.content # extract the message
|
||||
collected_messages.append(chunk_message) # save the message
|
||||
print(chunk.choices[0].delta.content, end = "", flush = True)
|
||||
|
||||
# Clean None in collected_messages
|
||||
collected_messages = [m for m in collected_messages if m is not None]
|
||||
full_reply_content = ''.join([m for m in collected_messages])
|
||||
return full_reply_content
|
||||
|
||||
except openai.APIError as e:
|
||||
logger.error(f"OpenAI API Error: {e}")
|
||||
raise SystemExit from e
|
||||
except openai.APIConnectionError as e:
|
||||
logger.error(f"Failed to connect to OpenAI API: {e}")
|
||||
raise SystemExit from e
|
||||
except openai.RateLimitError as e:
|
||||
logger.error(f"Rate limit exceeded on OpenAI API request: {e}")
|
||||
raise SystemExit from e
|
||||
except Exception as err:
|
||||
logger.error(f"OpenAI error: {err}")
|
||||
raise SystemExit from e
|
||||
@@ -1,56 +0,0 @@
|
||||
from openai import OpenAI
|
||||
from loguru import logger
|
||||
import sys
|
||||
|
||||
from .save_image import save_generated_image
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
|
||||
def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd", n=1):
|
||||
"""
|
||||
Generates images using the DALL-E 3 model based on a given text prompt.
|
||||
|
||||
Args:
|
||||
img_prompt (str): Text prompt to generate the image.
|
||||
image_dir (str): Directory where the generated image will be saved.
|
||||
size (str, optional): Size of the generated images. Defaults to "1024x1024".
|
||||
quality (str, optional): Quality of the generated images. Defaults to "hd".
|
||||
n (int, optional): Number of images to generate. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
str: Path to the saved image.
|
||||
|
||||
Raises:
|
||||
SystemExit: If an error occurs in image generation or saving.
|
||||
"""
|
||||
try:
|
||||
logger.info("Generating Dall-e-3 image for the blog.")
|
||||
client = OpenAI()
|
||||
|
||||
img_generation_response = client.images.generate(
|
||||
model="dall-e-3",
|
||||
prompt=img_prompt,
|
||||
size=size,
|
||||
quality=quality,
|
||||
n=n
|
||||
)
|
||||
# Save the generated image locally.
|
||||
try:
|
||||
img_path = save_generated_image(img_generation_response, image_dir)
|
||||
return img_path
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to Save generated image: {err}")
|
||||
|
||||
except openai.OpenAIError as e:
|
||||
logger.error(f"Dalle-3 image generation error: HTTP Status {e.http_status}, Error: {e.error}")
|
||||
sys.exit("Exiting due to Dalle-3 image generation error.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate images with Dalle3: {e}")
|
||||
sys.exit("Exiting due to a general error in image generation.")
|
||||
@@ -1,53 +0,0 @@
|
||||
from openai import OpenAI
|
||||
from loguru import logger
|
||||
import sys
|
||||
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
from .save_image import save_generated_image
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
|
||||
def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd", n=1):
|
||||
"""
|
||||
Generates images using the DALL-E 3 model based on a given text prompt.
|
||||
|
||||
Args:
|
||||
img_prompt (str): Text prompt to generate the image.
|
||||
image_dir (str): Directory where the generated image will be saved.
|
||||
size (str, optional): Size of the generated images. Defaults to "1024x1024".
|
||||
quality (str, optional): Quality of the generated images. Defaults to "hd".
|
||||
n (int, optional): Number of images to generate. Defaults to 1.
|
||||
|
||||
Returns:
|
||||
str: Path to the saved image.
|
||||
|
||||
Raises:
|
||||
SystemExit: If an error occurs in image generation or saving.
|
||||
"""
|
||||
try:
|
||||
logger.info("Generating Dall-e-3 image for the blog.")
|
||||
client = OpenAI()
|
||||
|
||||
img_generation_response = client.images.generate(
|
||||
model="dall-e-3",
|
||||
prompt=img_prompt,
|
||||
size=size,
|
||||
quality=quality,
|
||||
n=n
|
||||
)
|
||||
|
||||
img_path = save_generated_image(img_generation_response, image_dir)
|
||||
return img_path
|
||||
|
||||
except openai.OpenAIError as e:
|
||||
logger.error(f"Dalle-3 image generation error: HTTP Status {e.http_status}, Error: {e.error}")
|
||||
sys.exit("Exiting due to Dalle-3 image generation error.")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate images with Dalle3: {e}")
|
||||
sys.exit("Exiting due to a general error in image generation.")
|
||||
@@ -1,423 +0,0 @@
|
||||
import os
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
import PIL
|
||||
import streamlit as st
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
import logging
|
||||
import datetime
|
||||
import base64
|
||||
import random
|
||||
import time
|
||||
|
||||
|
||||
from .save_image import save_generated_image
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger('gemini_image_generator')
|
||||
|
||||
# With image generation in Gemini, your imagination is the limit.
|
||||
# If what you see doesn't quite match what you had in mind, try adding more details to the prompt.
|
||||
# The more specific you are, the better Gemini can create images that reflect your vision.
|
||||
|
||||
# Generate images using Gemini
|
||||
# Gemini 2.0 Flash Experimental supports the ability to output text and inline images.
|
||||
# This lets you use Gemini to conversationally edit images or generate outputs with interwoven text (for example, generating a blog post with text and images in a single turn).
|
||||
# Note: Make sure to include responseModalities: ["Text", "Image"] in your generation configuration for text and image output with gemini-2.0-flash-exp-image-generation. Image only is not allowed.
|
||||
|
||||
|
||||
class AIPromptGenerator:
|
||||
"""
|
||||
Generates enhanced AI image prompts based on user keywords,
|
||||
following the guidelines of the Imagen documentation.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.photography_styles = ["photo", "photograph"]
|
||||
self.art_styles = ["painting", "sketch", "drawing", "illustration", "digital art", "render"]
|
||||
self.art_techniques = ["technical pencil drawing", "charcoal drawing", "color pencil drawing", "pastel painting", "digital art", "art deco (poster)", "impressionist painting", "renaissance painting", "pop art"]
|
||||
self.camera_proximity = ["close-up", "zoomed out", "taken from far away"]
|
||||
self.camera_position = ["aerial", "from below"]
|
||||
self.lighting = ["natural lighting", "dramatic lighting", "warm lighting", "cold lighting", "studio lighting", "golden hour lighting"]
|
||||
self.camera_settings = ["motion blur", "soft focus", "bokeh", "portrait"]
|
||||
self.lens_types = ["35mm lens", "50mm lens", "fisheye lens", "wide angle lens", "macro lens", "telephoto lens"]
|
||||
self.film_types = ["black and white film", "polaroid"]
|
||||
self.materials = ["made of cheese", "made of paper", "made of neon tubes", "metallic", "glass", "wooden", "stone"]
|
||||
self.shapes = ["in the shape of a bird", "angular", "curved", "geometric"]
|
||||
self.quality_modifiers_general = ["high-quality", "beautiful", "stylized", "detailed", "epic", "grand"]
|
||||
self.quality_modifiers_photo = ["4K", "HDR", "studio photo", "professional photo", "photorealistic"]
|
||||
self.quality_modifiers_art = ["by a professional artist", "intricate details", "masterpiece"]
|
||||
self.aspect_ratios = ["1:1 aspect ratio", "4:3 aspect ratio", "3:4 aspect ratio", "16:9 aspect ratio", "9:16 aspect ratio"]
|
||||
self.photorealistic_modifiers = {
|
||||
"portraits": ["prime lens", "zoom lens", "24-35mm", "black and white film", "film noir", "shallow depth of field", "duotone (mention two colors)"],
|
||||
"objects": ["macro lens", "60-105mm", "high detail", "precise focusing", "controlled lighting"],
|
||||
"motion": ["telephoto zoom lens", "100-400mm", "fast shutter speed", "action shot", "movement tracking"],
|
||||
"wide-angle": ["wide-angle lens", "10-24mm", "long exposure", "sharp focus", "smooth water or clouds", "astro photography"]
|
||||
}
|
||||
|
||||
def generate_prompt(self, keywords):
|
||||
"""
|
||||
Generates an enhanced AI image prompt based on user-provided keywords.
|
||||
|
||||
Args:
|
||||
keywords (list): A list of keywords describing the desired image.
|
||||
|
||||
Returns:
|
||||
str: An enhanced AI image prompt.
|
||||
"""
|
||||
if not keywords:
|
||||
return "A beautiful image."
|
||||
|
||||
prompt_parts = []
|
||||
subject = " ".join(keywords)
|
||||
prompt_parts.append(subject)
|
||||
|
||||
# Add context and background (optional)
|
||||
context_options = ["in a detailed background", "outdoors", "indoors", "in a studio", "with a blurred background"]
|
||||
if random.random() < 0.6: # Add context with a probability
|
||||
prompt_parts.append(random.choice(context_options))
|
||||
|
||||
# Add style (optional)
|
||||
style_options = self.photography_styles + [f"{art} of" for art in self.art_styles]
|
||||
if random.random() < 0.7:
|
||||
prompt_parts.insert(0, random.choice(style_options))
|
||||
if prompt_parts[0].startswith("painting of") or prompt_parts[0].startswith("sketch of") or prompt_parts[0].startswith("drawing of"):
|
||||
if random.random() < 0.5:
|
||||
prompt_parts.append(f"in the style of {random.choice(self.art_techniques)}")
|
||||
|
||||
# Add photography modifiers (if photography style is chosen)
|
||||
if any(style in prompt_parts[0] for style in self.photography_styles):
|
||||
if random.random() < 0.4:
|
||||
prompt_parts.append(random.choice(self.camera_proximity))
|
||||
if random.random() < 0.3:
|
||||
prompt_parts.append(random.choice(self.camera_position))
|
||||
if random.random() < 0.5:
|
||||
prompt_parts.append(random.choice(self.lighting))
|
||||
if random.random() < 0.3:
|
||||
prompt_parts.append(random.choice(self.camera_settings))
|
||||
if random.random() < 0.2:
|
||||
prompt_parts.append(random.choice(self.lens_types))
|
||||
if random.random() < 0.1:
|
||||
prompt_parts.append(random.choice(self.film_types))
|
||||
|
||||
# Add shapes and materials (optional)
|
||||
if random.random() < 0.3:
|
||||
prompt_parts.append(random.choice(self.materials))
|
||||
if random.random() < 0.2:
|
||||
prompt_parts.append(random.choice(self.shapes))
|
||||
|
||||
# Add quality modifiers (optional)
|
||||
if random.random() < 0.6:
|
||||
quality_options = self.quality_modifiers_general
|
||||
if any(style in prompt_parts[0] for style in self.photography_styles):
|
||||
quality_options += self.quality_modifiers_photo
|
||||
else:
|
||||
quality_options += self.quality_modifiers_art
|
||||
prompt_parts.append(random.choice(list(set(quality_options)))) # Avoid duplicates
|
||||
|
||||
# Add aspect ratio (optional)
|
||||
if random.random() < 0.2:
|
||||
prompt_parts.append(random.choice(self.aspect_ratios))
|
||||
|
||||
return ", ".join(prompt_parts)
|
||||
|
||||
def generate_photorealistic_prompt(self, keywords, focus=""):
|
||||
"""
|
||||
Generates an enhanced AI image prompt specifically for photorealistic images.
|
||||
|
||||
Args:
|
||||
keywords (list): A list of keywords describing the desired image.
|
||||
focus (str, optional): The focus of the photorealistic image (e.g., "portraits", "objects", "motion", "wide-angle"). Defaults to "".
|
||||
|
||||
Returns:
|
||||
str: An enhanced photorealistic AI image prompt.
|
||||
"""
|
||||
if not keywords:
|
||||
return "A photorealistic image."
|
||||
|
||||
prompt_parts = ["A photo of", "photorealistic"]
|
||||
prompt_parts.append(" ".join(keywords))
|
||||
|
||||
if focus and focus in self.photorealistic_modifiers:
|
||||
modifiers = self.photorealistic_modifiers[focus]
|
||||
if modifiers:
|
||||
num_modifiers = random.randint(1, min(3, len(modifiers)))
|
||||
selected_modifiers = random.sample(modifiers, num_modifiers)
|
||||
prompt_parts.extend(selected_modifiers)
|
||||
|
||||
# Add general quality modifiers
|
||||
if random.random() < 0.5:
|
||||
prompt_parts.append(random.choice(self.quality_modifiers_photo))
|
||||
|
||||
# Add lighting
|
||||
if random.random() < 0.4:
|
||||
prompt_parts.append(random.choice(self.lighting))
|
||||
|
||||
return ", ".join(prompt_parts)
|
||||
|
||||
|
||||
def generate_gemini_image(prompt, keywords=None, style=None, focus=None, enhance_prompt=True, max_retries=3, initial_retry_delay=2, aspect_ratio="16:9"):
|
||||
"""
|
||||
Generate images using Gemini
|
||||
Depending on the prompt and context, Gemini will generate content in different modes (text to image, text to image and text, etc.).
|
||||
Here are some examples:
|
||||
|
||||
1). Text to image
|
||||
Example prompt: "Generate an image of the Eiffel tower with fireworks in the background."
|
||||
2). Text to image(s) and text (interleaved)
|
||||
Example prompt: "Generate an illustrated recipe for a paella."
|
||||
|
||||
Image generation may not always trigger:
|
||||
- The model may output text only. Try asking for image outputs explicitly (e.g. "generate an image", "provide images as you go along", "update the image").
|
||||
- The model may stop generating partway through. Try again or try a different prompt.
|
||||
|
||||
Args:
|
||||
prompt (str): The prompt to generate the image from.
|
||||
keywords (list, optional): Keywords to enhance the prompt. Defaults to None.
|
||||
style (str, optional): The style of the image. Defaults to None.
|
||||
focus (str, optional): The focus of the image (e.g., "portraits", "objects", "motion", "wide-angle"). Defaults to None.
|
||||
enhance_prompt (bool, optional): Whether to enhance the prompt using AIPromptGenerator. Defaults to True.
|
||||
max_retries (int, optional): Maximum number of retry attempts for handling 503 errors. Defaults to 3.
|
||||
initial_retry_delay (int, optional): Initial delay in seconds before retrying. Defaults to 2.
|
||||
aspect_ratio (str, optional): The aspect ratio for the generated image. Must be one of "16:9", "9:16", "4:3", "3:4", or "1:1". Defaults to "16:9".
|
||||
|
||||
Returns:
|
||||
str: The path to the generated image.
|
||||
"""
|
||||
logger.info(f"Generating image with prompt: '{prompt[:100]}...'")
|
||||
|
||||
# Check if the GEMINI_API_KEY is available
|
||||
api_key = os.getenv("GEMINI_API_KEY")
|
||||
if not api_key:
|
||||
error_msg = "GEMINI_API_KEY is missing. Please set it in your environment variables."
|
||||
logger.error(error_msg)
|
||||
st.error(f"🔑 {error_msg}")
|
||||
return None
|
||||
|
||||
# Enhance the prompt if requested
|
||||
if enhance_prompt and keywords:
|
||||
prompt_generator = AIPromptGenerator()
|
||||
if style == "photorealistic" and focus:
|
||||
logger.info(f"Generating photorealistic prompt with focus: {focus}")
|
||||
enhanced_prompt = prompt_generator.generate_photorealistic_prompt(keywords, focus)
|
||||
else:
|
||||
logger.info("Generating enhanced prompt")
|
||||
enhanced_prompt = prompt_generator.generate_prompt(keywords)
|
||||
|
||||
# Combine the enhanced prompt with the original prompt
|
||||
prompt = f"{prompt}\n\nEnhanced prompt: {enhanced_prompt}"
|
||||
logger.info(f"Final prompt: '{prompt[:100]}...'")
|
||||
|
||||
# Add aspect ratio to the prompt
|
||||
if aspect_ratio:
|
||||
prompt += f"\n\nPlease generate the image with {aspect_ratio} aspect ratio."
|
||||
|
||||
retry_count = 0
|
||||
retry_delay = initial_retry_delay
|
||||
|
||||
while retry_count <= max_retries:
|
||||
try:
|
||||
client = genai.Client(api_key=api_key)
|
||||
contents = (prompt)
|
||||
|
||||
logger.info("Sending request to Gemini API")
|
||||
response = client.models.generate_content(
|
||||
model="gemini-2.0-flash-exp-image-generation",
|
||||
contents=contents,
|
||||
config=types.GenerateContentConfig(
|
||||
response_modalities=['Text', 'Image']
|
||||
)
|
||||
)
|
||||
logger.info("Received response from Gemini API")
|
||||
|
||||
img_name = None
|
||||
for part in response.candidates[0].content.parts:
|
||||
if part.text is not None:
|
||||
logger.info(f"Received text response: '{part.text[:100]}...'")
|
||||
print(part.text)
|
||||
elif part.inline_data is not None:
|
||||
logger.info("Received image data from Gemini")
|
||||
image = Image.open(BytesIO((part.inline_data.data)))
|
||||
|
||||
# Resize image to match aspect ratio if needed
|
||||
if aspect_ratio:
|
||||
current_width, current_height = image.size
|
||||
target_width = current_width
|
||||
target_height = current_height
|
||||
|
||||
# Calculate target dimensions based on aspect ratio
|
||||
if aspect_ratio == "16:9":
|
||||
target_height = int(current_width * 9/16)
|
||||
elif aspect_ratio == "9:16":
|
||||
target_width = int(current_height * 9/16)
|
||||
elif aspect_ratio == "4:3":
|
||||
target_height = int(current_width * 3/4)
|
||||
elif aspect_ratio == "3:4":
|
||||
target_width = int(current_height * 3/4)
|
||||
elif aspect_ratio == "1:1":
|
||||
target_size = min(current_width, current_height)
|
||||
target_width = target_size
|
||||
target_height = target_size
|
||||
|
||||
logger.info(f"Resizing image from {current_width}x{current_height} to {target_width}x{target_height}")
|
||||
|
||||
# Create a new image with the target dimensions
|
||||
resized_image = Image.new('RGB', (target_width, target_height), (255, 255, 255))
|
||||
|
||||
# Calculate position to paste the original image
|
||||
paste_x = (target_width - current_width) // 2
|
||||
paste_y = (target_height - current_height) // 2
|
||||
|
||||
# Paste the original image onto the new canvas
|
||||
resized_image.paste(image, (paste_x, paste_y))
|
||||
image = resized_image
|
||||
|
||||
if part.text is not None:
|
||||
img_name = f'{part.text}-gemini-native-image.png'
|
||||
else:
|
||||
img_name = f'gemini-native-image-{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}.png'
|
||||
try:
|
||||
logger.info(f"Saving image to: {img_name}")
|
||||
image.save(img_name)
|
||||
|
||||
# Create a dictionary with the expected format for save_generated_image
|
||||
img_response = {
|
||||
"artifacts": [
|
||||
{
|
||||
"base64": base64.b64encode(open(img_name, "rb").read()).decode('utf-8')
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Call save_generated_image with the correct format
|
||||
save_generated_image(img_response)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to save image: {err}")
|
||||
st.error(f"Failed to save image: {err}")
|
||||
|
||||
logger.info(f"Image generation completed. Image name: {img_name}")
|
||||
return img_name
|
||||
except Exception as err:
|
||||
error_message = str(err)
|
||||
logger.error(f"Error in generate_gemini_image: {err}")
|
||||
|
||||
# Check if this is a 503 UNAVAILABLE error
|
||||
if "503 UNAVAILABLE" in error_message and retry_count < max_retries:
|
||||
retry_count += 1
|
||||
logger.info(f"Model is overloaded. Retrying in {retry_delay} seconds (attempt {retry_count}/{max_retries})")
|
||||
st.warning(f"The image generation service is currently busy. Retrying in {retry_delay} seconds...")
|
||||
time.sleep(retry_delay)
|
||||
# Exponential backoff
|
||||
retry_delay *= 2
|
||||
else:
|
||||
st.error(f"Error generating image: {err}")
|
||||
return None
|
||||
|
||||
# If we've exhausted all retries
|
||||
st.error("The image generation service is currently unavailable. Please try again later.")
|
||||
return None
|
||||
|
||||
|
||||
def edit_image(image_path, prompt, max_retries=3, initial_retry_delay=2):
|
||||
"""
|
||||
- Image editing (text and image to image)
|
||||
Example prompt: "Edit this image to make it look like a cartoon"
|
||||
Example prompt: [image of a cat] + [image of a pillow] + "Create a cross stitch of my cat on this pillow."
|
||||
|
||||
- Multi-turn image editing (chat)
|
||||
Example prompts: [upload an image of a blue car.] "Turn this car into a convertible." "Now change the color to yellow."
|
||||
|
||||
Image editing with Gemini
|
||||
To perform image editing, add an image as input.
|
||||
The following example demonstrats uploading base64 encoded images.
|
||||
For multiple images and larger payloads, check the image input section.
|
||||
|
||||
Args:
|
||||
image_path (str): The path to the image to edit.
|
||||
prompt (str): The prompt to edit the image with.
|
||||
max_retries (int, optional): Maximum number of retry attempts for handling 503 errors. Defaults to 3.
|
||||
initial_retry_delay (int, optional): Initial delay in seconds before retrying. Defaults to 2.
|
||||
|
||||
Returns:
|
||||
str: The path to the edited image.
|
||||
"""
|
||||
import PIL.Image
|
||||
image = PIL.Image.open(image_path)
|
||||
|
||||
retry_count = 0
|
||||
retry_delay = initial_retry_delay
|
||||
|
||||
while retry_count <= max_retries:
|
||||
try:
|
||||
client = genai.Client()
|
||||
text_input = (prompt)
|
||||
|
||||
logger.info("Sending request to Gemini API for image editing")
|
||||
response = client.models.generate_content(
|
||||
model="gemini-2.0-flash-exp-image-generation",
|
||||
contents=[text_input, image],
|
||||
config=types.GenerateContentConfig(
|
||||
response_modalities=['Text', 'Image']
|
||||
)
|
||||
)
|
||||
logger.info("Received response from Gemini API for image editing")
|
||||
|
||||
edited_img_name = None
|
||||
for part in response.candidates[0].content.parts:
|
||||
if part.text is not None:
|
||||
logger.info(f"Received text response: '{part.text[:100]}...'")
|
||||
st.write(part.text)
|
||||
elif part.inline_data is not None:
|
||||
logger.info("Received edited image data from Gemini")
|
||||
edited_image = Image.open(BytesIO(part.inline_data.data))
|
||||
edited_image.show()
|
||||
|
||||
# Save the edited image
|
||||
edited_img_name = f'edited-{os.path.basename(image_path)}'
|
||||
try:
|
||||
logger.info(f"Saving edited image to: {edited_img_name}")
|
||||
edited_image.save(edited_img_name)
|
||||
|
||||
# Create a dictionary with the expected format for save_generated_image
|
||||
img_response = {
|
||||
"artifacts": [
|
||||
{
|
||||
"base64": base64.b64encode(open(edited_img_name, "rb").read()).decode('utf-8')
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Call save_generated_image with the correct format
|
||||
save_generated_image(img_response)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to save edited image: {err}")
|
||||
st.error(f"Failed to save edited image: {err}")
|
||||
|
||||
logger.info(f"Image editing completed. Edited image name: {edited_img_name}")
|
||||
return edited_img_name
|
||||
except Exception as err:
|
||||
error_message = str(err)
|
||||
logger.error(f"Error in edit_image: {err}")
|
||||
|
||||
# Check if this is a 503 UNAVAILABLE error
|
||||
if "503 UNAVAILABLE" in error_message and retry_count < max_retries:
|
||||
retry_count += 1
|
||||
logger.info(f"Model is overloaded. Retrying in {retry_delay} seconds (attempt {retry_count}/{max_retries})")
|
||||
st.warning(f"The image editing service is currently busy. Retrying in {retry_delay} seconds...")
|
||||
time.sleep(retry_delay)
|
||||
# Exponential backoff
|
||||
retry_delay *= 2
|
||||
else:
|
||||
st.error(f"Error editing image: {err}")
|
||||
return None
|
||||
|
||||
# If we've exhausted all retries
|
||||
st.error("The image editing service is currently unavailable. Please try again later.")
|
||||
return None
|
||||
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
# Ensure you sign up for an account to obtain an API key:
|
||||
# https://platform.stability.ai/
|
||||
# Your API key can be found here after account creation:
|
||||
# https://platform.stability.ai/account/keys
|
||||
|
||||
import base64
|
||||
import os
|
||||
import requests
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
import streamlit as st
|
||||
|
||||
from .save_image import save_generated_image
|
||||
|
||||
|
||||
def generate_stable_diffusion_image(prompt):
|
||||
engine_id = "stable-diffusion-xl-1024-v1-0"
|
||||
api_host = os.getenv('API_HOST', 'https://api.stability.ai')
|
||||
api_key = os.getenv("STABILITY_API_KEY")
|
||||
|
||||
if api_key is None:
|
||||
st.warning("Missing Stability API key.")
|
||||
|
||||
response = requests.post(
|
||||
f"{api_host}/v1/generation/{engine_id}/text-to-image",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json",
|
||||
"Authorization": f"Bearer {api_key}"
|
||||
},
|
||||
json={
|
||||
"text_prompts": [
|
||||
{
|
||||
"text": prompt
|
||||
}
|
||||
],
|
||||
"cfg_scale": 7,
|
||||
"height": 1024,
|
||||
"width": 1024,
|
||||
"samples": 1,
|
||||
"steps": 30,
|
||||
},
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise Exception("Non-200 response: " + str(response.text))
|
||||
|
||||
data = response.json()
|
||||
img_path = save_generated_image(data)
|
||||
|
||||
for i, image in enumerate(data["artifacts"]):
|
||||
# Decode base64 image data
|
||||
img_data = base64.b64decode(image["base64"])
|
||||
# Open image using PIL
|
||||
img = Image.open(BytesIO(img_data))
|
||||
# Display the image
|
||||
img.show()
|
||||
|
||||
return img_path
|
||||
@@ -1,51 +0,0 @@
|
||||
from loguru import logger
|
||||
import sys
|
||||
from PIL import Image
|
||||
from openai import OpenAI
|
||||
|
||||
def gen_new_from_given_img(img_path, image_dir, num_img=1, img_size="1024x1024", response_format="url"):
|
||||
"""
|
||||
Generates variations of a given image using OpenAI's image variation API.
|
||||
|
||||
This function takes an existing image, processes it, and generates a specified number of new images based on it.
|
||||
These generated images are variations of the original, providing creative flexibility.
|
||||
|
||||
Args:
|
||||
img_path (str): Path to the original image file.
|
||||
image_dir (str): Directory where the generated images will be saved.
|
||||
num_img (int, optional): Number of image variations to generate. Defaults to 1.
|
||||
img_size (str, optional): Size of the generated images. Defaults to "1024x1024".
|
||||
response_format (str, optional): Format in which the generated images are returned. Defaults to "url".
|
||||
|
||||
Returns:
|
||||
str: Path to the saved image variation.
|
||||
|
||||
Raises:
|
||||
SystemExit: If a critical error occurs that prevents successful execution.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Starting image variation generation for: {img_path}")
|
||||
|
||||
# Convert and prepare the image
|
||||
png = Image.open(img_path).convert('RGBA')
|
||||
background = Image.new('RGBA', png.size, (255, 255, 255))
|
||||
alpha_composite = Image.alpha_composite(background, png)
|
||||
alpha_composite.save(img_path, 'PNG', quality=80)
|
||||
logger.info("Image prepared for variation generation.")
|
||||
|
||||
client = OpenAI()
|
||||
variation_response = client.images.create_variation(
|
||||
image=open(img_path, "rb", encoding="utf-8"),
|
||||
n=num_img,
|
||||
size=img_size,
|
||||
response_format=response_format
|
||||
)
|
||||
|
||||
# Saving the generated image
|
||||
generated_image_path = save_generated_image(variation_response, image_dir)
|
||||
logger.info(f"Image variation generated and saved to: {generated_image_path}")
|
||||
return generated_image_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error occurred during image variation generation: {e}")
|
||||
sys.exit(f"Exiting due to critical error: {e}")
|
||||
@@ -1,163 +0,0 @@
|
||||
#########################################################
|
||||
#
|
||||
# This module will generate images for the blogs using APIs
|
||||
# from Dall-E and other free resources. Given a prompt, the
|
||||
# images will be stored in local directory.
|
||||
# Required: openai API key.
|
||||
#
|
||||
#########################################################
|
||||
|
||||
# imports
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import streamlit as st
|
||||
|
||||
import openai # OpenAI Python library to make API calls
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
#from .gen_dali2_images
|
||||
from .gen_dali3_images import generate_dalle3_images
|
||||
from .gen_stabl_diff_img import generate_stable_diffusion_image
|
||||
from ..text_generation.main_text_generation import llm_text_gen
|
||||
from .gen_gemini_images import generate_gemini_image
|
||||
|
||||
def generate_image(user_prompt, title=None, description=None, tags=None, content=None, aspect_ratio="16:9"):
|
||||
"""
|
||||
The generation API endpoint creates an image based on a text prompt.
|
||||
|
||||
Required inputs:
|
||||
prompt (str): A text description of the desired image(s). The maximum length is 1000 characters.
|
||||
|
||||
Optional inputs:
|
||||
--> image_engine: dalle2, dalle3, stable diffusion are supported.
|
||||
--> num_images (int): The number of images to generate. Must be between 1 and 10. Defaults to 1.
|
||||
--> size (str): The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024".
|
||||
Smaller images are faster. Defaults to "1024x1024".
|
||||
-->response_format (str): The format in which the generated images are returned.
|
||||
Must be one of "url" or "b64_json". Defaults to "url".
|
||||
--> user (str): A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
|
||||
--> aspect_ratio (str): The aspect ratio for the generated image. Must be one of "16:9", "4:3", or "1:1". Defaults to "16:9".
|
||||
"""
|
||||
# FIXME: Need to remove default value to match sidebar input.
|
||||
image_engine = 'Gemini-AI'
|
||||
image_stored_at = None
|
||||
|
||||
if user_prompt:
|
||||
try:
|
||||
# Use enhanced prompt generator with all available parameters
|
||||
img_prompt = generate_enhanced_img_prompt(user_prompt, title, description, tags, content)
|
||||
|
||||
# Add aspect ratio to the prompt
|
||||
if aspect_ratio:
|
||||
img_prompt += f"\n\nAspect ratio: {aspect_ratio}"
|
||||
|
||||
if 'Dalle3' in image_engine:
|
||||
logger.info(f"Calling Dalle3 text-to-image with prompt: {img_prompt}")
|
||||
image_stored_at = generate_dalle3_images(img_prompt)
|
||||
elif 'Stability-AI' in image_engine:
|
||||
logger.info(f"Calling Stable diffusion text-to-image with prompt: \n{img_prompt}")
|
||||
image_stored_at = generate_stable_diffusion_image(img_prompt)
|
||||
elif 'Gemini-AI' in image_engine:
|
||||
logger.info(f"Calling Gemini text-to-image with prompt: \n{img_prompt}")
|
||||
image_stored_at = generate_gemini_image(img_prompt, aspect_ratio=aspect_ratio)
|
||||
return image_stored_at
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to generate Image: {err}")
|
||||
st.warning(f"Failed to generate Image: {err}")
|
||||
else:
|
||||
logger.error("Skipping Image creation, No prompt provided.")
|
||||
|
||||
|
||||
def generate_img_prompt(user_prompt):
|
||||
"""
|
||||
Given prompt, this functions generated a prompt for image generation.
|
||||
"""
|
||||
prompt = f"""
|
||||
As an expert prompt generator for AI text to image models and artist, I will provide you with 'user text' for creating images.
|
||||
Your task is to create a prompt for a highly relevant image from given 'user text'.
|
||||
\n
|
||||
Choose from various art styles, utilize light & shadow effects etc.
|
||||
Make sure to avoid common image generation mistakes.
|
||||
Reply with only one answer, no descrition and in plaintext.
|
||||
Make sure your prompt is detailed and creative descriptions that will inspire unique and interesting images from the AI.
|
||||
|
||||
\n\nuser text:
|
||||
'''{user_prompt}'''"""
|
||||
|
||||
response = llm_text_gen(prompt)
|
||||
return response
|
||||
|
||||
|
||||
def generate_enhanced_img_prompt(user_prompt, title=None, description=None, tags=None, content=None):
|
||||
"""
|
||||
Given user prompt and additional context (title, description, tags, content),
|
||||
this function generates an enhanced prompt for better image generation.
|
||||
|
||||
Args:
|
||||
user_prompt (str): Base prompt from the user
|
||||
title (str, optional): Blog title or content title
|
||||
description (str, optional): Blog or content description/summary
|
||||
tags (list, optional): List of tags related to the content
|
||||
content (str, optional): Actual content or excerpt
|
||||
|
||||
Returns:
|
||||
str: Enhanced prompt for image generation
|
||||
"""
|
||||
# Start with the base prompt
|
||||
context_parts = [user_prompt]
|
||||
|
||||
# Add relevant context if available
|
||||
if title:
|
||||
context_parts.append(f"Title: {title}")
|
||||
|
||||
if description:
|
||||
context_parts.append(f"Description: {description}")
|
||||
|
||||
if tags and len(tags) > 0:
|
||||
tag_text = ", ".join(tags[:5]) # Limit to 5 tags to avoid too much noise
|
||||
context_parts.append(f"Tags: {tag_text}")
|
||||
|
||||
# Create a combined context
|
||||
combined_context = "\n".join(context_parts)
|
||||
|
||||
# Add some content excerpt if available (limited to avoid token limits)
|
||||
content_excerpt = ""
|
||||
if content:
|
||||
# Just use the first few hundred characters as excerpt
|
||||
content_excerpt = content[:300] + "..." if len(content) > 300 else content
|
||||
|
||||
# Create the prompt for LLM
|
||||
prompt = f"""
|
||||
As an expert prompt engineer for AI image generation models, create a detailed, creative prompt
|
||||
for generating a high-quality, relevant image based on the following context:
|
||||
|
||||
{combined_context}
|
||||
|
||||
Additional content excerpt:
|
||||
{content_excerpt}
|
||||
|
||||
Your task is to:
|
||||
1. Analyze the context and content to understand the main theme and subject
|
||||
2. Create a rich, detailed prompt for image generation (50-75 words)
|
||||
3. Include specific visual details, art style, mood, lighting, composition
|
||||
4. Make sure the prompt is highly relevant to the original context
|
||||
5. Avoid prohibited content or anything that violates image generation guidelines
|
||||
|
||||
Reply with ONLY the final prompt. No explanations or other text.
|
||||
"""
|
||||
|
||||
# Generate the enhanced prompt
|
||||
try:
|
||||
enhanced_prompt = llm_text_gen(prompt)
|
||||
logger.info(f"Generated enhanced image prompt: {enhanced_prompt[:100]}...")
|
||||
return enhanced_prompt
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating enhanced prompt: {e}")
|
||||
# Fall back to the simple prompt generation if enhanced fails
|
||||
return generate_img_prompt(user_prompt)
|
||||
@@ -1,39 +0,0 @@
|
||||
import base64
|
||||
import datetime
|
||||
import os
|
||||
import requests
|
||||
from PIL import Image
|
||||
import logging
|
||||
|
||||
def save_generated_image(img_generation_response):
|
||||
"""
|
||||
Save generated images for blog, ensuring unique names for SEO.
|
||||
"""
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Get image save directory with fallback to a local directory
|
||||
image_save_dir = os.getenv('IMG_SAVE_DIR', 'generated_images')
|
||||
|
||||
# Create the directory if it doesn't exist
|
||||
if not os.path.exists(image_save_dir):
|
||||
logger.info(f"Creating image save directory: {image_save_dir}")
|
||||
os.makedirs(image_save_dir, exist_ok=True)
|
||||
|
||||
generated_image_name = f"generated_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.webp"
|
||||
generated_image_filepath = os.path.join(image_save_dir, generated_image_name)
|
||||
|
||||
try:
|
||||
for i, image in enumerate(img_generation_response["artifacts"]):
|
||||
with open(generated_image_filepath, "wb") as f:
|
||||
f.write(base64.b64decode(image["base64"]))
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Failed to get generated image content: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving image: {e}")
|
||||
return None
|
||||
|
||||
logger.info(f"Saved image at path: {generated_image_filepath}")
|
||||
|
||||
return generated_image_filepath
|
||||
@@ -1,88 +0,0 @@
|
||||
# Content Style Analyzer Guide
|
||||
|
||||
## What is the Content Style Analyzer?
|
||||
|
||||
The Content Style Analyzer is an AI-powered tool that helps you understand and improve your writing style. It analyzes your content to provide detailed insights about your writing approach, helping you create more consistent and engaging content.
|
||||
|
||||
## What Can It Do?
|
||||
|
||||
### 1. Writing Style Analysis
|
||||
The analyzer examines your content to identify:
|
||||
- **Tone**: Whether your writing is formal, casual, technical, or conversational
|
||||
- **Voice**: If you're using active or passive voice
|
||||
- **Complexity**: How complex your writing is (simple, moderate, or complex)
|
||||
- **Engagement Level**: How engaging your content is (low, medium, or high)
|
||||
|
||||
### 2. Content Characteristics
|
||||
It provides insights about:
|
||||
- **Sentence Structure**: How your sentences are organized
|
||||
- **Vocabulary Level**: Whether you're using basic, intermediate, or advanced vocabulary
|
||||
- **Paragraph Organization**: How your paragraphs flow together
|
||||
- **Content Flow**: How well your ideas progress throughout the content
|
||||
|
||||
### 3. Target Audience Analysis
|
||||
The tool helps you understand:
|
||||
- **Demographics**: Who your content appeals to
|
||||
- **Expertise Level**: Whether it's suitable for beginners, intermediate, or advanced readers
|
||||
- **Industry Focus**: Which industry your content is targeting
|
||||
- **Geographic Focus**: Which regions your content is most relevant for
|
||||
|
||||
### 4. Content Type Assessment
|
||||
It identifies:
|
||||
- **Primary Type**: Whether it's a blog post, article, product description, etc.
|
||||
- **Secondary Types**: Other content categories it might fit into
|
||||
- **Purpose**: Whether it's meant to inform, entertain, persuade, etc.
|
||||
- **Call to Action**: How effectively you're guiding readers to take action
|
||||
|
||||
### 5. Style Pattern Analysis
|
||||
The analyzer also looks for specific patterns in your writing:
|
||||
- **Sentence Patterns**: How your sentences are structured
|
||||
- **Word Patterns**: Your vocabulary choices and frequency
|
||||
- **Rhetorical Devices**: Literary techniques you're using
|
||||
|
||||
## How to Use It
|
||||
|
||||
1. **Input Your Content**: Provide your content, including:
|
||||
- Main content text
|
||||
- Title
|
||||
- Description
|
||||
|
||||
2. **Get Analysis**: The tool will analyze your content and provide detailed insights
|
||||
|
||||
3. **Review Recommendations**: Receive suggestions for:
|
||||
- Writing tone
|
||||
- Target audience
|
||||
- Content type
|
||||
- Creativity level
|
||||
- Geographic focus
|
||||
|
||||
## Benefits for Content Creators
|
||||
|
||||
1. **Consistency**: Maintain a consistent writing style across your content
|
||||
2. **Audience Alignment**: Ensure your content matches your target audience's expectations
|
||||
3. **Quality Improvement**: Identify areas where your writing can be enhanced
|
||||
4. **Style Optimization**: Get recommendations for better engagement
|
||||
5. **Content Strategy**: Make data-driven decisions about your content approach
|
||||
|
||||
## Tips for Best Results
|
||||
|
||||
1. **Provide Complete Content**: Include all relevant sections (title, description, main content)
|
||||
2. **Keep Content Length Reasonable**: The analyzer works best with content up to 4000 characters
|
||||
3. **Review All Sections**: Pay attention to all aspects of the analysis for comprehensive insights
|
||||
4. **Use Recommendations**: Apply the suggested improvements to enhance your content
|
||||
|
||||
## Understanding the Results
|
||||
|
||||
The analysis results are presented in a clear, structured format that helps you:
|
||||
- Identify your current writing style
|
||||
- Understand your content's strengths
|
||||
- Spot areas for improvement
|
||||
- Make informed decisions about future content
|
||||
|
||||
## Need Help?
|
||||
|
||||
If you encounter any issues or have questions about the analysis results, please refer to your content team or technical support for assistance.
|
||||
|
||||
---
|
||||
|
||||
*Note: This tool is designed to help content creators improve their writing style and content quality. It uses advanced AI technology to provide detailed insights and recommendations.*
|
||||
@@ -1,203 +0,0 @@
|
||||
"""Style analyzer module for analyzing content style using LLM."""
|
||||
|
||||
from typing import Dict, List, Optional
|
||||
from loguru import logger
|
||||
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
import json
|
||||
import re
|
||||
|
||||
class StyleAnalyzer:
|
||||
"""Analyzer for content style using LLM."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the style analyzer."""
|
||||
logger.info("[StyleAnalyzer.__init__] Initializing style analyzer")
|
||||
|
||||
def _clean_json_response(self, text: str) -> str:
|
||||
"""
|
||||
Clean the LLM response to extract valid JSON.
|
||||
|
||||
Args:
|
||||
text (str): Raw response from LLM
|
||||
|
||||
Returns:
|
||||
str: Cleaned JSON string
|
||||
"""
|
||||
try:
|
||||
# Remove markdown code block markers
|
||||
cleaned_string = text.replace("```json", "").replace("```", "").strip()
|
||||
|
||||
# Log the cleaned JSON for debugging
|
||||
logger.debug(f"[StyleAnalyzer._clean_json_response] Cleaned JSON: {cleaned_string}")
|
||||
|
||||
return cleaned_string
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[StyleAnalyzer._clean_json_response] Error cleaning response: {str(e)}")
|
||||
return ""
|
||||
|
||||
def analyze_content_style(self, content: Dict) -> Dict:
|
||||
"""
|
||||
Analyze the style of the provided content.
|
||||
|
||||
Args:
|
||||
content (Dict): Content to analyze, containing main_content, title, etc.
|
||||
|
||||
Returns:
|
||||
Dict: Analysis results
|
||||
"""
|
||||
try:
|
||||
logger.info("[StyleAnalyzer.analyze_content_style] Starting content style analysis")
|
||||
|
||||
# Prepare content for analysis
|
||||
main_content = content.get("main_content", "")
|
||||
title = content.get("title", "")
|
||||
description = content.get("description", "")
|
||||
|
||||
# Construct the analysis prompt
|
||||
prompt = f"""Analyze the following content and provide a comprehensive writing style analysis.
|
||||
Focus on identifying the writing style, tone, and characteristics that make this content unique.
|
||||
|
||||
Title: {title}
|
||||
Description: {description}
|
||||
Content: {main_content[:4000]} # Limit content length for API
|
||||
|
||||
IMPORTANT: Respond ONLY with a JSON object in the following format. Do not include any additional text, explanations, or markdown formatting:
|
||||
{{
|
||||
"writing_style": {{
|
||||
"tone": "formal/casual/technical/etc",
|
||||
"voice": "active/passive",
|
||||
"complexity": "simple/moderate/complex",
|
||||
"engagement_level": "low/medium/high"
|
||||
}},
|
||||
"content_characteristics": {{
|
||||
"sentence_structure": "description",
|
||||
"vocabulary_level": "basic/intermediate/advanced",
|
||||
"paragraph_organization": "description",
|
||||
"content_flow": "description"
|
||||
}},
|
||||
"target_audience": {{
|
||||
"demographics": ["list"],
|
||||
"expertise_level": "beginner/intermediate/advanced",
|
||||
"industry_focus": "primary industry",
|
||||
"geographic_focus": "primary region"
|
||||
}},
|
||||
"content_type": {{
|
||||
"primary_type": "blog/article/product/etc",
|
||||
"secondary_types": ["list"],
|
||||
"purpose": "inform/entertain/persuade/etc",
|
||||
"call_to_action": "type and frequency"
|
||||
}},
|
||||
"recommended_settings": {{
|
||||
"writing_tone": "recommended tone",
|
||||
"target_audience": "recommended audience",
|
||||
"content_type": "recommended type",
|
||||
"creativity_level": "low/medium/high",
|
||||
"geographic_location": "recommended location"
|
||||
}}
|
||||
}}"""
|
||||
|
||||
# Get analysis from LLM
|
||||
logger.debug("[StyleAnalyzer.analyze_content_style] Sending prompt to LLM")
|
||||
analysis_text = llm_text_gen(prompt)
|
||||
|
||||
try:
|
||||
# Clean and parse the JSON response
|
||||
cleaned_json = self._clean_json_response(analysis_text)
|
||||
if not cleaned_json:
|
||||
raise ValueError("No valid JSON found in response")
|
||||
|
||||
# Log the cleaned JSON for debugging
|
||||
logger.debug(f"[StyleAnalyzer.analyze_content_style] Cleaned JSON: {cleaned_json}")
|
||||
|
||||
# Try to parse the cleaned JSON
|
||||
try:
|
||||
analysis = json.loads(cleaned_json)
|
||||
except json.JSONDecodeError as e:
|
||||
# If parsing fails, try to fix common JSON issues
|
||||
logger.warning(f"[StyleAnalyzer.analyze_content_style] Initial JSON parsing failed: {e}")
|
||||
|
||||
# Fix any remaining issues
|
||||
cleaned_json = re.sub(r'([^"\\])\n', r'\1 ', cleaned_json)
|
||||
cleaned_json = re.sub(r'\\n', ' ', cleaned_json)
|
||||
|
||||
# Try parsing again
|
||||
analysis = json.loads(cleaned_json)
|
||||
|
||||
logger.info("[StyleAnalyzer.analyze_content_style] Successfully parsed analysis results")
|
||||
return analysis
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"[StyleAnalyzer.analyze_content_style] Failed to parse JSON response: {e}")
|
||||
logger.debug(f"[StyleAnalyzer.analyze_content_style] Raw response: {analysis_text}")
|
||||
return {
|
||||
"error": "Failed to parse analysis results",
|
||||
"raw_response": analysis_text
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[StyleAnalyzer.analyze_content_style] Error during analysis: {str(e)}")
|
||||
return {
|
||||
"error": str(e),
|
||||
"success": False
|
||||
}
|
||||
|
||||
def analyze_style_patterns(self, content: Dict) -> Dict:
|
||||
"""
|
||||
Analyze specific writing style patterns in the content.
|
||||
|
||||
Args:
|
||||
content (Dict): Content to analyze
|
||||
|
||||
Returns:
|
||||
Dict: Pattern analysis results
|
||||
"""
|
||||
try:
|
||||
main_content = content.get("main_content", "")
|
||||
|
||||
prompt = f"""Analyze the following content for specific writing style patterns.
|
||||
Focus on identifying recurring patterns in sentence structure, word choice, and rhetorical devices.
|
||||
|
||||
Content: {main_content[:4000]}
|
||||
|
||||
IMPORTANT: Respond ONLY with a JSON object in the following format. Do not include any additional text, explanations, or markdown formatting:
|
||||
{{
|
||||
"sentence_patterns": {{
|
||||
"structure": ["list of patterns"],
|
||||
"length": "short/medium/long",
|
||||
"complexity": "simple/moderate/complex"
|
||||
}},
|
||||
"word_patterns": {{
|
||||
"vocabulary": ["list of patterns"],
|
||||
"frequency": "low/medium/high",
|
||||
"diversity": "low/medium/high"
|
||||
}},
|
||||
"rhetorical_devices": {{
|
||||
"types": ["list of devices"],
|
||||
"frequency": "low/medium/high",
|
||||
"effectiveness": "low/medium/high"
|
||||
}}
|
||||
}}"""
|
||||
|
||||
analysis_text = llm_text_gen(prompt)
|
||||
|
||||
try:
|
||||
cleaned_json = self._clean_json_response(analysis_text)
|
||||
if not cleaned_json:
|
||||
raise ValueError("No valid JSON found in response")
|
||||
|
||||
analysis = json.loads(cleaned_json)
|
||||
return analysis
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"[StyleAnalyzer.analyze_style_patterns] Failed to parse JSON response: {e}")
|
||||
return {
|
||||
"error": "Failed to parse pattern analysis results",
|
||||
"raw_response": analysis_text
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[StyleAnalyzer.analyze_style_patterns] Error during analysis: {str(e)}")
|
||||
return {
|
||||
"error": str(e),
|
||||
"success": False
|
||||
}
|
||||
@@ -1,159 +0,0 @@
|
||||
# ALwrity Setup Guide: API Key Manager
|
||||
|
||||
## What is the API Key Manager?
|
||||
|
||||
The API Key Manager is a crucial component of ALwrity that helps you set up and configure all the necessary API keys and settings for your content creation workflow. It provides a user-friendly wizard interface to guide you through the setup process step by step.
|
||||
|
||||
## Setup Wizard Steps
|
||||
|
||||
### 1. Website Setup
|
||||
- **Purpose**: Configure your website's basic information
|
||||
- **Features**:
|
||||
- Website URL configuration
|
||||
- Site structure setup
|
||||
- Basic SEO settings
|
||||
- Content organization preferences
|
||||
|
||||
### 2. AI Research Setup
|
||||
- **Purpose**: Set up AI-powered research capabilities
|
||||
- **Features**:
|
||||
- Research parameters configuration
|
||||
- Data collection preferences
|
||||
- Analysis settings
|
||||
- Research depth options
|
||||
|
||||
### 3. AI Providers Configuration
|
||||
- **Purpose**: Configure AI service providers
|
||||
- **Supported Providers**:
|
||||
- OpenAI (GPT models)
|
||||
- Google (Gemini Pro)
|
||||
- Anthropic (Claude)
|
||||
- DeepSeek
|
||||
- **Features**:
|
||||
- API key management
|
||||
- Model selection
|
||||
- Usage preferences
|
||||
- Cost optimization settings
|
||||
|
||||
### 4. Personalization Setup
|
||||
- **Purpose**: Customize your content creation experience
|
||||
- **Features**:
|
||||
- Writing style preferences
|
||||
- Tone settings
|
||||
- Content structure templates
|
||||
- Brand voice configuration
|
||||
|
||||
### 5. ALwrity Integrations
|
||||
- **Purpose**: Set up additional tools and services
|
||||
- **Features**:
|
||||
- Third-party service connections
|
||||
- Plugin configurations
|
||||
- API integrations
|
||||
- Workflow automation settings
|
||||
|
||||
### 6. Final Setup
|
||||
- **Purpose**: Complete and verify your configuration
|
||||
- **Features**:
|
||||
- Configuration review
|
||||
- Settings verification
|
||||
- Test connections
|
||||
- Setup completion
|
||||
|
||||
## How to Use the Setup Wizard
|
||||
|
||||
### 1. Starting the Setup
|
||||
1. Launch ALwrity
|
||||
2. Navigate to the Setup section
|
||||
3. Begin the wizard process
|
||||
|
||||
### 2. Navigation
|
||||
- Use the step indicator to track progress
|
||||
- Navigate between steps using buttons
|
||||
- Save progress automatically
|
||||
- Return to previous steps if needed
|
||||
|
||||
### 3. Configuration Process
|
||||
1. **Enter Information**: Fill in required details
|
||||
2. **Verify Settings**: Review your inputs
|
||||
3. **Test Connections**: Ensure everything works
|
||||
4. **Complete Setup**: Finalize your configuration
|
||||
|
||||
## Managing API Keys
|
||||
|
||||
### 1. Key Storage
|
||||
- Secure storage of API keys
|
||||
- Environment variable management
|
||||
- Key rotation support
|
||||
- Access control
|
||||
|
||||
### 2. Key Validation
|
||||
- Automatic key verification
|
||||
- Usage monitoring
|
||||
- Error handling
|
||||
- Expiration tracking
|
||||
|
||||
### 3. Security Features
|
||||
- Encrypted storage
|
||||
- Access logging
|
||||
- Permission management
|
||||
- Secure transmission
|
||||
|
||||
## Progress Tracking
|
||||
|
||||
### 1. Setup Progress
|
||||
- Visual progress indicator
|
||||
- Step completion tracking
|
||||
- Overall setup status
|
||||
- Remaining tasks
|
||||
|
||||
### 2. Status Monitoring
|
||||
- API key status
|
||||
- Connection status
|
||||
- Configuration status
|
||||
- Error reporting
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Before Setup
|
||||
- Gather all necessary API keys
|
||||
- Review provider documentation
|
||||
- Plan your configuration
|
||||
- Backup existing settings
|
||||
|
||||
### 2. During Setup
|
||||
- Follow the wizard steps
|
||||
- Verify each configuration
|
||||
- Test connections
|
||||
- Save progress regularly
|
||||
|
||||
### 3. After Setup
|
||||
- Review all settings
|
||||
- Test functionality
|
||||
- Document configurations
|
||||
- Monitor usage
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### 1. Common Issues
|
||||
- Invalid API keys
|
||||
- Connection problems
|
||||
- Configuration errors
|
||||
- Setup interruptions
|
||||
|
||||
### 2. Solutions
|
||||
- Key verification
|
||||
- Connection testing
|
||||
- Error logging
|
||||
- Support resources
|
||||
|
||||
## Need Help?
|
||||
|
||||
If you encounter any issues during setup:
|
||||
1. Check the error messages
|
||||
2. Review the documentation
|
||||
3. Verify your API keys
|
||||
4. Contact ALwrity support
|
||||
|
||||
---
|
||||
|
||||
*Note: Keep your API keys secure and never share them. The API Key Manager helps you manage these keys safely while setting up ALwrity for optimal content creation.*
|
||||
@@ -1,37 +1,54 @@
|
||||
"""API key manager package."""
|
||||
"""API Key Manager package for ALwrity."""
|
||||
|
||||
from .manager import APIKeyManager
|
||||
from .api_key_manager import (
|
||||
initialize_wizard_state,
|
||||
update_progress,
|
||||
check_all_api_keys,
|
||||
render,
|
||||
render_navigation
|
||||
from .api_key_manager import render, check_onboarding_completion, get_onboarding_status, reset_onboarding
|
||||
from .onboarding_progress import (
|
||||
OnboardingProgress,
|
||||
get_onboarding_progress,
|
||||
render_progress_indicator,
|
||||
render_resume_message,
|
||||
StepStatus,
|
||||
StepData
|
||||
)
|
||||
from .components import (
|
||||
render_website_setup,
|
||||
render_ai_research_setup,
|
||||
render_ai_providers,
|
||||
render_final_setup,
|
||||
render_personalization_setup,
|
||||
render_alwrity_integrations,
|
||||
from .validation import check_all_api_keys
|
||||
from .components.base import (
|
||||
render_step_indicator,
|
||||
render_navigation_buttons,
|
||||
render_step_indicator
|
||||
render_step_validation,
|
||||
render_resume_options
|
||||
)
|
||||
|
||||
# Export all public components
|
||||
__all__ = [
|
||||
# Main classes
|
||||
'APIKeyManager',
|
||||
'initialize_wizard_state',
|
||||
'update_progress',
|
||||
'check_all_api_keys',
|
||||
'OnboardingProgress',
|
||||
'StepStatus',
|
||||
'StepData',
|
||||
|
||||
# Main functions
|
||||
'render',
|
||||
'render_navigation',
|
||||
'render_website_setup',
|
||||
'render_ai_research_setup',
|
||||
'render_ai_providers',
|
||||
'render_final_setup',
|
||||
'render_personalization_setup',
|
||||
'render_alwrity_integrations',
|
||||
'check_onboarding_completion',
|
||||
'get_onboarding_status',
|
||||
'reset_onboarding',
|
||||
'get_onboarding_progress',
|
||||
|
||||
# UI components
|
||||
'render_progress_indicator',
|
||||
'render_resume_message',
|
||||
'render_step_indicator',
|
||||
'render_navigation_buttons',
|
||||
'render_step_indicator'
|
||||
]
|
||||
'render_step_validation',
|
||||
'render_resume_options',
|
||||
|
||||
# Validation
|
||||
'check_all_api_keys'
|
||||
]
|
||||
|
||||
# Version information
|
||||
__version__ = "2.0.0"
|
||||
__author__ = "ALwrity Team"
|
||||
__description__ = "Comprehensive API key management and onboarding system for ALwrity"
|
||||
|
||||
# Note: FastAPI endpoints have been moved to the backend/ directory
|
||||
# for better separation of concerns and enterprise architecture.
|
||||
@@ -1,165 +0,0 @@
|
||||
"""API key manager for handling various API keys."""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from loguru import logger
|
||||
import streamlit as st
|
||||
import os
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from dotenv import load_dotenv
|
||||
from .components.website_setup import render_website_setup
|
||||
from .components.ai_research_setup import render_ai_research_setup
|
||||
from .components.ai_providers import render_ai_providers
|
||||
from .components.final_setup import render_final_setup
|
||||
from .components.personalization_setup import render_personalization_setup
|
||||
from .components.alwrity_integrations import render_alwrity_integrations
|
||||
from .components.base import render_navigation_buttons, render_step_indicator
|
||||
from .wizard_state import initialize_wizard_state, get_current_step, next_step, previous_step
|
||||
from .manager import APIKeyManager
|
||||
from .validation import check_all_api_keys
|
||||
|
||||
# Configure logger to output to both file and stdout
|
||||
logger.remove() # Remove default handler
|
||||
logger.add("logs/api_key_manager.log",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
|
||||
level="DEBUG")
|
||||
logger.add(sys.stdout,
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
|
||||
level="INFO")
|
||||
|
||||
def initialize_wizard_state():
|
||||
"""Initialize or get the wizard state from session"""
|
||||
logger.debug("Initializing wizard state")
|
||||
if 'wizard_state' not in st.session_state:
|
||||
st.session_state.wizard_state = {
|
||||
'current_step': 0,
|
||||
'total_steps': 0,
|
||||
'completed_steps': set(),
|
||||
'api_keys_status': {},
|
||||
'setup_progress': 0
|
||||
}
|
||||
logger.info("Created new wizard state")
|
||||
|
||||
def update_progress():
|
||||
"""Update the overall setup progress"""
|
||||
logger.debug("Updating setup progress")
|
||||
try:
|
||||
# Get the API key manager instance from session state
|
||||
api_key_manager = st.session_state.get('api_key_manager')
|
||||
if not api_key_manager:
|
||||
logger.warning("API key manager not found in session state")
|
||||
return
|
||||
|
||||
total_keys = sum(len(keys) for keys in api_key_manager.api_key_groups.values())
|
||||
configured_keys = sum(1 for status in st.session_state.wizard_state['api_keys_status'].values()
|
||||
if status.get('configured', False))
|
||||
progress = (configured_keys / total_keys) * 100
|
||||
st.session_state.wizard_state['setup_progress'] = progress
|
||||
logger.info(f"Updated progress to {progress:.1f}%")
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating progress: {str(e)}", exc_info=True)
|
||||
|
||||
def render(api_key_manager: APIKeyManager) -> Dict[str, Any]:
|
||||
"""
|
||||
Render the API key manager interface.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Current state
|
||||
"""
|
||||
try:
|
||||
logger.info("[render] Rendering API key manager interface")
|
||||
|
||||
# Initialize session state for current step if not exists
|
||||
if "current_step" not in st.session_state:
|
||||
st.session_state.current_step = 1
|
||||
logger.info("[render] Initialized current_step to 1")
|
||||
|
||||
# Display step indicator
|
||||
render_step_indicator(st.session_state.current_step, 6)
|
||||
|
||||
# Render appropriate step based on current_step
|
||||
if st.session_state.current_step == 1:
|
||||
logger.info("[render] Rendering AI providers setup")
|
||||
return render_ai_providers(api_key_manager)
|
||||
elif st.session_state.current_step == 2:
|
||||
logger.info("[render] Rendering website setup")
|
||||
return render_website_setup(api_key_manager)
|
||||
elif st.session_state.current_step == 3:
|
||||
logger.info("[render] Rendering AI Research setup")
|
||||
return render_ai_research_setup(api_key_manager)
|
||||
elif st.session_state.current_step == 4:
|
||||
logger.info("[render] Rendering personalization setup")
|
||||
return render_personalization_setup(api_key_manager)
|
||||
elif st.session_state.current_step == 5:
|
||||
logger.info("[render] Rendering ALwrity integrations setup")
|
||||
return render_alwrity_integrations(api_key_manager)
|
||||
elif st.session_state.current_step == 6:
|
||||
logger.info("[render] Rendering final setup")
|
||||
return render_final_setup(api_key_manager)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in API key manager: {str(e)}"
|
||||
logger.error(f"[render] {error_msg}")
|
||||
st.error(error_msg)
|
||||
return {"current_step": st.session_state.current_step, "error": error_msg}
|
||||
|
||||
def render_navigation(self):
|
||||
"""Render navigation buttons with proper state handling"""
|
||||
st.markdown("""
|
||||
<div class="nav-buttons">
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Back button
|
||||
if self.current_step > 1:
|
||||
if st.button("← Back", key="back_button"):
|
||||
self.current_step -= 1
|
||||
st.rerun()
|
||||
|
||||
# Next/Continue button
|
||||
if self.current_step < 3:
|
||||
if st.button("Continue →", key="next_button"):
|
||||
if self.current_step == 1:
|
||||
# Validate at least one provider is configured
|
||||
if not self.validate_providers():
|
||||
st.error("Please configure at least one AI provider to continue.")
|
||||
return
|
||||
|
||||
# Store all API keys in session state
|
||||
st.session_state['api_keys'] = {
|
||||
'openai': self.openai_key,
|
||||
'google': self.google_key,
|
||||
'anthropic': self.anthropic_key,
|
||||
'mistral': self.mistral_key,
|
||||
'serpapi': self.serpapi_key,
|
||||
'google_search': self.google_search_key,
|
||||
'google_search_cx': self.google_search_cx,
|
||||
'bing_search': self.bing_search_key,
|
||||
'tavily': self.tavily_key,
|
||||
'metaphor': self.metaphor_key,
|
||||
'wordpress': {
|
||||
'url': self.wordpress_url,
|
||||
'username': self.wordpress_username,
|
||||
'password': self.wordpress_password,
|
||||
'app_password': self.wordpress_app_password
|
||||
}
|
||||
}
|
||||
self.current_step = 2
|
||||
st.rerun()
|
||||
elif self.current_step == 2:
|
||||
# Validate WordPress credentials
|
||||
if not self.validate_wordpress_credentials():
|
||||
st.error("Please configure valid WordPress credentials to continue.")
|
||||
return
|
||||
|
||||
# Store WordPress credentials in session state
|
||||
st.session_state['wordpress_credentials'] = {
|
||||
'url': self.wordpress_url,
|
||||
'username': self.wordpress_username,
|
||||
'password': self.wordpress_password,
|
||||
'app_password': self.wordpress_app_password
|
||||
}
|
||||
self.current_step = 3
|
||||
st.rerun()
|
||||
|
||||
st.markdown("</div>", unsafe_allow_html=True)
|
||||
@@ -1,76 +0,0 @@
|
||||
"""API key manager components."""
|
||||
|
||||
import asyncio
|
||||
import streamlit as st
|
||||
import os
|
||||
from loguru import logger
|
||||
from .styles import API_KEY_MANAGER_STYLES
|
||||
from .config import FEATURE_PREVIEWS, API_KEY_CONFIGS
|
||||
from .wizard_state import (
|
||||
get_current_step,
|
||||
next_step,
|
||||
previous_step,
|
||||
set_selected_providers,
|
||||
get_selected_providers,
|
||||
set_website_url,
|
||||
get_website_url,
|
||||
set_api_key,
|
||||
get_api_key,
|
||||
can_proceed_to_next_step,
|
||||
get_api_keys
|
||||
)
|
||||
from .health_monitor import APIKeyHealthMonitor
|
||||
from .key_rotation import KeyRotationManager
|
||||
from ...utils.website_analyzer import analyze_website
|
||||
from .api_key_tests import (
|
||||
test_openai_api_key,
|
||||
test_gemini_api_key,
|
||||
test_anthropic_api_key,
|
||||
test_deepseek_api_key,
|
||||
test_mistral_api_key
|
||||
)
|
||||
from .components.base import render_step_indicator, render_navigation_buttons, render_success_message
|
||||
from .components import (
|
||||
render_ai_providers,
|
||||
render_website_setup,
|
||||
render_health_monitoring,
|
||||
render_ai_research_setup,
|
||||
render_final_setup
|
||||
)
|
||||
|
||||
def render_wizard():
|
||||
"""Render the main wizard interface."""
|
||||
st.title("API Key Setup Wizard")
|
||||
|
||||
# Get current step
|
||||
current_step = get_current_step()
|
||||
|
||||
# Render step indicator
|
||||
render_step_indicator()
|
||||
|
||||
# Render current step content
|
||||
if current_step == 1:
|
||||
render_ai_providers()
|
||||
elif current_step == 2:
|
||||
render_website_setup()
|
||||
elif current_step == 3:
|
||||
render_ai_research_setup()
|
||||
elif current_step == 4:
|
||||
render_final_setup()
|
||||
elif current_step == 5:
|
||||
render_health_monitoring()
|
||||
|
||||
# Render navigation buttons
|
||||
render_navigation_buttons()
|
||||
|
||||
__all__ = [
|
||||
'render_wizard',
|
||||
'render_step_indicator',
|
||||
'render_navigation_buttons',
|
||||
'render_success_message',
|
||||
'render_ai_providers',
|
||||
'render_website_setup',
|
||||
'render_ai_research_setup',
|
||||
'render_health_monitoring',
|
||||
'render_final_setup'
|
||||
]
|
||||
@@ -1,281 +0,0 @@
|
||||
"""AI providers setup component."""
|
||||
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from typing import Dict, Any
|
||||
from ..manager import APIKeyManager
|
||||
from .base import render_navigation_buttons, render_step_indicator, render_tab_style
|
||||
from ..wizard_state import next_step, update_progress
|
||||
from datetime import datetime
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
def validate_api_key(key: str) -> bool:
|
||||
"""Validate if an API key is properly formatted."""
|
||||
if not key:
|
||||
return False
|
||||
# Basic validation - check if key is not empty and has minimum length
|
||||
return len(key.strip()) > 0
|
||||
|
||||
def save_to_env_file(key_name: str, key_value: str) -> bool:
|
||||
"""Save API key to .env file."""
|
||||
try:
|
||||
env_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))), '.env')
|
||||
|
||||
# Read existing .env file
|
||||
env_contents = []
|
||||
if os.path.exists(env_path):
|
||||
with open(env_path, 'r') as f:
|
||||
env_contents = f.readlines()
|
||||
|
||||
# Check if key already exists
|
||||
key_exists = False
|
||||
for i, line in enumerate(env_contents):
|
||||
if line.startswith(f"{key_name}="):
|
||||
env_contents[i] = f"{key_name}={key_value}\n"
|
||||
key_exists = True
|
||||
break
|
||||
|
||||
# Add new key if it doesn't exist
|
||||
if not key_exists:
|
||||
env_contents.append(f"{key_name}={key_value}\n")
|
||||
|
||||
# Write back to .env file
|
||||
with open(env_path, 'w') as f:
|
||||
f.writelines(env_contents)
|
||||
|
||||
# Reload environment variables to ensure consistency
|
||||
load_dotenv(override=True)
|
||||
|
||||
logger.info(f"[save_to_env_file] Successfully saved {key_name} to .env file")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"[save_to_env_file] Error saving to .env file: {str(e)}")
|
||||
return False
|
||||
|
||||
def render_ai_providers(api_key_manager: APIKeyManager) -> Dict[str, Any]:
|
||||
"""Render the AI providers setup step."""
|
||||
logger.info("[render_ai_providers] Starting AI providers setup")
|
||||
try:
|
||||
# Load environment variables
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Get existing API keys from .env
|
||||
openai_key = os.getenv('OPENAI_API_KEY', '')
|
||||
gemini_key = os.getenv('GEMINI_API_KEY', '')
|
||||
|
||||
# Initialize wizard state if not already initialized
|
||||
if 'wizard_state' not in st.session_state:
|
||||
st.session_state.wizard_state = {
|
||||
'current_step': 1,
|
||||
'total_steps': 6,
|
||||
'progress': 0,
|
||||
'completed_steps': set(),
|
||||
'last_updated': datetime.now()
|
||||
}
|
||||
logger.info("[render_ai_providers] Initialized wizard state")
|
||||
|
||||
# Store API key manager in session state for update_progress
|
||||
st.session_state['api_key_manager'] = api_key_manager
|
||||
|
||||
# Main content
|
||||
st.markdown("""
|
||||
<div class='setup-header'><h2>🤖 AI LLM Providers Setup</h2></div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Create tabs for different AI providers
|
||||
tabs = st.tabs(["Primary Providers", "Additional Providers"])
|
||||
|
||||
# Track if any changes were made
|
||||
changes_made = False
|
||||
has_valid_key = False
|
||||
validation_message = ""
|
||||
|
||||
with tabs[0]:
|
||||
st.markdown("### Primary AI Providers")
|
||||
|
||||
# Create a grid layout for AI provider cards
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
# OpenAI Card
|
||||
with st.container():
|
||||
openai_input = st.text_input(
|
||||
"OpenAI API Key",
|
||||
value=openai_key,
|
||||
type="password",
|
||||
key="openai_key",
|
||||
help="Enter your OpenAI API key",
|
||||
placeholder="Power your content generation with GPT-4 AI models"
|
||||
)
|
||||
|
||||
if openai_key:
|
||||
st.success("✅ OpenAI API key found in environment")
|
||||
elif openai_input:
|
||||
if validate_api_key(openai_input):
|
||||
st.markdown("""
|
||||
<div class="ai-provider-status status-valid">
|
||||
✓ API key configured
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
else:
|
||||
st.markdown("""
|
||||
<div class="ai-provider-status status-invalid">
|
||||
⚠️ Invalid API key format
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
with st.expander("📋 How to get your OpenAI API key", expanded=False):
|
||||
st.markdown("""
|
||||
**Step-by-step guide:**
|
||||
1. Go to [OpenAI's website](https://platform.openai.com)
|
||||
2. Sign up or log in to your account
|
||||
3. Navigate to the API section
|
||||
4. Click "Create new secret key"
|
||||
5. Copy the generated key and paste it here
|
||||
|
||||
**Note:** Keep your API key secure and never share it publicly.
|
||||
""")
|
||||
|
||||
with col2:
|
||||
# Google Card
|
||||
with st.container():
|
||||
gemini_input = st.text_input(
|
||||
"Google Gemini API Key",
|
||||
value=gemini_key,
|
||||
type="password",
|
||||
key="google_key",
|
||||
help="Enter your Google API key",
|
||||
placeholder="Power your content generation with Gemini AI models"
|
||||
)
|
||||
|
||||
if gemini_key:
|
||||
st.success("✅ Gemini API key found in environment")
|
||||
elif gemini_input:
|
||||
if validate_api_key(gemini_input):
|
||||
st.markdown("""
|
||||
<div class="ai-provider-status status-valid">
|
||||
✓ API key configured
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
else:
|
||||
st.markdown("""
|
||||
<div class="ai-provider-status status-invalid">
|
||||
⚠️ Invalid API key format
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
with st.expander("📋 How to get your Google API key", expanded=False):
|
||||
st.markdown("""
|
||||
**Step-by-step guide:**
|
||||
1. Visit [Google AI Studio](https://makersuite.google.com/app/apikey)
|
||||
2. Sign in with your Google account
|
||||
3. Click "Create API key"
|
||||
4. Copy the generated key and paste it here
|
||||
|
||||
**Note:** Make sure to enable the Gemini API in your Google Cloud Console.
|
||||
""")
|
||||
|
||||
with tabs[1]:
|
||||
st.markdown("### Additional AI Providers")
|
||||
st.markdown("Configure additional AI providers for enhanced capabilities")
|
||||
|
||||
# Create a grid layout for additional provider cards
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
# Anthropic Card (Coming Soon)
|
||||
with st.container():
|
||||
st.markdown("""
|
||||
<div class="ai-provider-card disabled">
|
||||
<div class="ai-provider-header">
|
||||
<div class="ai-provider-icon">🧠</div>
|
||||
<div class="ai-provider-title">Anthropic <span class="coming-soon-badge">Coming Soon</span></div>
|
||||
</div>
|
||||
<div class="ai-provider-content">
|
||||
<p>Access Claude for advanced content generation</p>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
st.info("Anthropic integration will be available in the next update")
|
||||
|
||||
with col2:
|
||||
# Mistral Card (Coming Soon)
|
||||
with st.container():
|
||||
st.markdown("""
|
||||
<div class="ai-provider-card disabled">
|
||||
<div class="ai-provider-header">
|
||||
<div class="ai-provider-icon">⚡</div>
|
||||
<div class="ai-provider-title">Mistral <span class="coming-soon-badge">Coming Soon</span></div>
|
||||
</div>
|
||||
<div class="ai-provider-content">
|
||||
<p>Use Mistral's efficient language models</p>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
st.info("Mistral integration will be available in the next update")
|
||||
|
||||
# Track changes and validate keys
|
||||
if any([openai_input, gemini_input]):
|
||||
changes_made = True
|
||||
# Check if at least one valid API key is provided
|
||||
if validate_api_key(openai_input) or validate_api_key(gemini_input):
|
||||
has_valid_key = True
|
||||
validation_message = "✅ At least one AI provider configured successfully"
|
||||
else:
|
||||
validation_message = "⚠️ Please provide at least one valid API key"
|
||||
else:
|
||||
validation_message = "⚠️ Please configure at least one AI provider to continue"
|
||||
|
||||
# Display validation message
|
||||
if validation_message:
|
||||
if "✅" in validation_message:
|
||||
st.success(validation_message)
|
||||
else:
|
||||
st.warning(validation_message)
|
||||
|
||||
# Navigation buttons
|
||||
if render_navigation_buttons(1, 6, changes_made):
|
||||
if has_valid_key:
|
||||
# Save API keys to .env file
|
||||
if validate_api_key(openai_input):
|
||||
if save_to_env_file("OPENAI_API_KEY", openai_input):
|
||||
logger.info("[render_ai_providers] OpenAI API key saved to .env file")
|
||||
else:
|
||||
st.error("Failed to save OpenAI API key to .env file")
|
||||
return {"current_step": 1, "error": "Failed to save OpenAI API key"}
|
||||
|
||||
if validate_api_key(gemini_input):
|
||||
if save_to_env_file("GEMINI_API_KEY", gemini_input):
|
||||
logger.info("[render_ai_providers] Google Gemini API key saved to .env file")
|
||||
else:
|
||||
st.error("Failed to save Gemini API key to .env file")
|
||||
return {"current_step": 1, "error": "Failed to save Gemini API key"}
|
||||
|
||||
# Reload environment variables to ensure consistency
|
||||
load_dotenv(override=True)
|
||||
|
||||
# Get updated API keys from environment
|
||||
updated_openai_key = os.getenv('OPENAI_API_KEY', '')
|
||||
updated_gemini_key = os.getenv('GEMINI_API_KEY', '')
|
||||
|
||||
# Store the API keys in session state
|
||||
st.session_state['api_keys'] = {
|
||||
'openai': updated_openai_key,
|
||||
'google': updated_gemini_key
|
||||
}
|
||||
|
||||
# Update progress and move to next step
|
||||
st.session_state['current_step'] = 2 # Set the next step explicitly
|
||||
update_progress()
|
||||
st.rerun() # Rerun to apply the changes
|
||||
else:
|
||||
st.error("Please configure at least one valid AI provider to continue")
|
||||
|
||||
return {"current_step": 1, "changes_made": changes_made}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in AI providers setup: {str(e)}"
|
||||
logger.error(f"[render_ai_providers] {error_msg}")
|
||||
st.error(error_msg)
|
||||
return {"current_step": 1, "error": error_msg}
|
||||
@@ -1,400 +0,0 @@
|
||||
"""AI research setup component for the API key manager."""
|
||||
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from typing import Dict, Any
|
||||
from ..manager import APIKeyManager
|
||||
from .base import render_navigation_buttons
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import sys
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/ai_research_setup.log",
|
||||
rotation="500 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
def get_existing_api_key(key_name: str) -> str:
|
||||
"""Get existing API key from environment or .env file.
|
||||
|
||||
Args:
|
||||
key_name (str): Name of the API key to retrieve
|
||||
|
||||
Returns:
|
||||
str: The API key value if found, empty string otherwise
|
||||
"""
|
||||
# First try to get from environment
|
||||
api_key = os.getenv(key_name)
|
||||
|
||||
# If not in environment, try to get from .env file
|
||||
if not api_key and os.path.exists('.env'):
|
||||
try:
|
||||
with open('.env', 'r') as f:
|
||||
for line in f:
|
||||
if line.strip().startswith(f"{key_name}="):
|
||||
api_key = line.strip().split('=')[1]
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"[get_existing_api_key] Failed to read {key_name} from .env: {str(e)}")
|
||||
|
||||
return api_key if api_key else ""
|
||||
|
||||
def update_env_file(api_keys: Dict[str, str]) -> None:
|
||||
"""Update the .env file with new API keys, avoiding duplicates.
|
||||
|
||||
Args:
|
||||
api_keys (Dict[str, str]): Dictionary of API keys to update
|
||||
"""
|
||||
try:
|
||||
# Read existing .env file content
|
||||
env_content = []
|
||||
if os.path.exists('.env'):
|
||||
with open('.env', 'r') as f:
|
||||
env_content = f.readlines()
|
||||
|
||||
# Remove trailing newlines and empty lines
|
||||
env_content = [line.strip() for line in env_content if line.strip()]
|
||||
|
||||
# Create a dictionary of existing variables
|
||||
env_dict = {}
|
||||
for line in env_content:
|
||||
if '=' in line:
|
||||
key, value = line.split('=', 1)
|
||||
env_dict[key.strip()] = value.strip()
|
||||
|
||||
# Update with new values
|
||||
env_dict.update(api_keys)
|
||||
|
||||
# Write back to .env file
|
||||
with open('.env', 'w') as f:
|
||||
for key, value in env_dict.items():
|
||||
f.write(f"{key}={value}\n")
|
||||
|
||||
logger.info("[update_env_file] Successfully updated .env file with API keys")
|
||||
except Exception as e:
|
||||
logger.error(f"[update_env_file] Error updating .env file: {str(e)}")
|
||||
raise
|
||||
|
||||
def render_ai_research_setup(api_key_manager: APIKeyManager) -> Dict[str, Any]:
|
||||
"""Render the AI research setup step."""
|
||||
logger.info("[render_ai_research_setup] Rendering AI research setup component")
|
||||
|
||||
st.markdown("""
|
||||
<div class='setup-header'><h2>🔍 AI Web Research API Setup</h2></div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Create two columns for different search types
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.markdown("### The Usual")
|
||||
|
||||
# Get existing API keys
|
||||
existing_serpapi_key = get_existing_api_key("SERPAPI_KEY")
|
||||
existing_firecrawl_key = get_existing_api_key("FIRECRAWL_API_KEY")
|
||||
|
||||
serpapi_key = st.text_input(
|
||||
"## Enter 🔎 SerpAPI",
|
||||
value=existing_serpapi_key,
|
||||
type="password",
|
||||
key="serpapi_key",
|
||||
help="Enter your SerpAPI key",
|
||||
placeholder="Access search engine results for research"
|
||||
)
|
||||
|
||||
if serpapi_key or existing_serpapi_key:
|
||||
st.markdown("""
|
||||
<div class="ai-provider-status status-valid">
|
||||
✓ API key configured
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
st.markdown("""
|
||||
<div class="api-info-section">
|
||||
<details>
|
||||
<summary>📋 How to get your SerpAPI key</summary>
|
||||
<div class="api-info-content">
|
||||
<p><strong>Step-by-step guide:</strong></p>
|
||||
<ol>
|
||||
<li>Visit <a href="https://serpapi.com" target="_blank">SerpAPI</a></li>
|
||||
<li>Create an account</li>
|
||||
<li>Go to your dashboard</li>
|
||||
<li>Copy your API key</li>
|
||||
<li>Paste it here</li>
|
||||
</ol>
|
||||
<p><strong>Note:</strong> SerpAPI provides real-time search results from multiple engines.</p>
|
||||
</div>
|
||||
</details>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
st.markdown("</div></div>", unsafe_allow_html=True)
|
||||
|
||||
firecrawl_key = st.text_input(
|
||||
"Enter 🕷️ Firecrawl API Key",
|
||||
value=existing_firecrawl_key,
|
||||
type="password",
|
||||
key="firecrawl_key",
|
||||
help="Enter your Firecrawl API key",
|
||||
placeholder="Web content extraction and analysis"
|
||||
)
|
||||
|
||||
if firecrawl_key or existing_firecrawl_key:
|
||||
st.markdown("""
|
||||
<div class="ai-provider-status status-valid">
|
||||
✓ Firecrawl API key configured
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
st.markdown("""
|
||||
<div class="api-info-section">
|
||||
<details>
|
||||
<summary>📋 How to get your Firecrawl API key</summary>
|
||||
<div class="api-info-content">
|
||||
<p><strong>Step-by-step guide:</strong></p>
|
||||
<ol>
|
||||
<li>Visit <a href="https://www.firecrawl.dev/account" target="_blank">Firecrawl</a></li>
|
||||
<li>Create an account</li>
|
||||
<li>Go to your dashboard</li>
|
||||
<li>Generate your API key</li>
|
||||
<li>Copy and paste it here</li>
|
||||
</ol>
|
||||
<p><strong>Note:</strong> Firecrawl provides powerful web content extraction and analysis capabilities.</p>
|
||||
</div>
|
||||
</details>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
st.markdown("</div></div>", unsafe_allow_html=True)
|
||||
|
||||
with col2:
|
||||
st.markdown("### AI Deep Research")
|
||||
|
||||
# Get existing API keys
|
||||
existing_tavily_key = get_existing_api_key("TAVILY_API_KEY")
|
||||
existing_metaphor_key = get_existing_api_key("METAPHOR_API_KEY")
|
||||
|
||||
tavily_key = st.text_input(
|
||||
"Enter 🤖 Tavily API Key",
|
||||
value=existing_tavily_key,
|
||||
type="password",
|
||||
key="tavily_key",
|
||||
help="Enter your Tavily API key",
|
||||
placeholder="AI-powered search with semantic understanding"
|
||||
)
|
||||
|
||||
if tavily_key or existing_tavily_key:
|
||||
st.markdown("""
|
||||
<div class="ai-provider-status status-valid">
|
||||
✓ Tavily API key configured
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
st.markdown("""
|
||||
<div class="api-info-section">
|
||||
<details>
|
||||
<summary>📋 How to get your Tavily API key</summary>
|
||||
<div class="api-info-content">
|
||||
<p><strong>Step-by-step guide:</strong></p>
|
||||
<ol>
|
||||
<li>Visit <a href="https://tavily.com" target="_blank">Tavily</a></li>
|
||||
<li>Create an account</li>
|
||||
<li>Go to API settings</li>
|
||||
<li>Generate a new API key</li>
|
||||
<li>Copy and paste it here</li>
|
||||
</ol>
|
||||
<p><strong>Note:</strong> Tavily provides AI-powered semantic search capabilities.</p>
|
||||
</div>
|
||||
</details>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
st.markdown("</div></div>", unsafe_allow_html=True)
|
||||
|
||||
metaphor_key = st.text_input(
|
||||
"Enter 🧠 Metaphor/Exa API Key",
|
||||
value=existing_metaphor_key,
|
||||
type="password",
|
||||
key="metaphor_key",
|
||||
help="Enter your Metaphor/Exa API key",
|
||||
placeholder="Neural search engine for deep research"
|
||||
)
|
||||
|
||||
if metaphor_key or existing_metaphor_key:
|
||||
st.markdown("""
|
||||
<div class="ai-provider-status status-valid">
|
||||
✓ API key configured
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
st.markdown("""
|
||||
<div class="api-info-section">
|
||||
<details>
|
||||
<summary>📋 How to get your Metaphor/Exa API key</summary>
|
||||
<div class="api-info-content">
|
||||
<p><strong>Step-by-step guide:</strong></p>
|
||||
<ol>
|
||||
<li>Visit <a href="https://metaphor.systems" target="_blank">Metaphor/Exa</a></li>
|
||||
<li>Create an account</li>
|
||||
<li>Navigate to API settings</li>
|
||||
<li>Generate your API key</li>
|
||||
<li>Copy and paste it here</li>
|
||||
</ol>
|
||||
<p><strong>Note:</strong> Metaphor/Exa provides neural search capabilities for deep research.</p>
|
||||
</div>
|
||||
</details>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
st.markdown("</div></div>", unsafe_allow_html=True)
|
||||
|
||||
|
||||
|
||||
# Track changes
|
||||
changes_made = bool(serpapi_key or tavily_key or metaphor_key or firecrawl_key)
|
||||
|
||||
# Navigation buttons with correct arguments
|
||||
if render_navigation_buttons(3, 5, changes_made):
|
||||
if changes_made:
|
||||
try:
|
||||
# Prepare API keys dictionary with only non-empty values
|
||||
api_keys = {}
|
||||
if serpapi_key:
|
||||
api_keys['SERPAPI_KEY'] = serpapi_key
|
||||
if tavily_key:
|
||||
api_keys['TAVILY_API_KEY'] = tavily_key
|
||||
if metaphor_key:
|
||||
api_keys['METAPHOR_API_KEY'] = metaphor_key
|
||||
if firecrawl_key:
|
||||
api_keys['FIRECRAWL_API_KEY'] = firecrawl_key
|
||||
|
||||
# Update .env file with new API keys
|
||||
update_env_file(api_keys)
|
||||
|
||||
# Update environment variables
|
||||
for key, value in api_keys.items():
|
||||
os.environ[key] = value
|
||||
|
||||
# Store the API keys in session state
|
||||
st.session_state['api_keys'] = {
|
||||
'serpapi': serpapi_key,
|
||||
'tavily': tavily_key,
|
||||
'metaphor': metaphor_key,
|
||||
'firecrawl': firecrawl_key
|
||||
}
|
||||
|
||||
# Update progress and move to next step
|
||||
st.session_state['current_step'] = 4
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
error_msg = f"Error saving API keys: {str(e)}"
|
||||
logger.error(f"[render_ai_research_setup] {error_msg}")
|
||||
st.error(error_msg)
|
||||
else:
|
||||
st.error("Please configure at least one research provider to continue")
|
||||
|
||||
# Detailed Information Section
|
||||
st.markdown("---")
|
||||
st.markdown("### Understanding Your Research Options")
|
||||
|
||||
# Create four columns for the information popovers
|
||||
info_col1, info_col2, info_col3, info_col4 = st.columns(4)
|
||||
|
||||
# The Usual: Traditional Search Popover
|
||||
with info_col1:
|
||||
with st.popover("#### The Usual: Traditional Search"):
|
||||
st.markdown("""
|
||||
**SerpAPI**
|
||||
- Real-time search results from multiple search engines
|
||||
- Access to structured data from search results
|
||||
- Great for gathering general information and market research
|
||||
- Includes features like:
|
||||
- Web search results
|
||||
- News articles
|
||||
- Knowledge graphs
|
||||
- Related questions
|
||||
""")
|
||||
|
||||
# AI Deep Research Popover
|
||||
with info_col2:
|
||||
with st.popover("#### AI Deep Research: Advanced Search Capabilities"):
|
||||
st.markdown("""
|
||||
**Tavily AI**
|
||||
- AI-powered search with semantic understanding
|
||||
- Automatically summarizes and analyzes search results
|
||||
- Perfect for:
|
||||
- Deep research tasks
|
||||
- Academic research
|
||||
- Fact-checking
|
||||
- Real-time information gathering
|
||||
|
||||
**Metaphor/Exa**
|
||||
- Neural search engine that understands context and meaning
|
||||
- Specialized in finding highly relevant content
|
||||
- Ideal for:
|
||||
- Technical research
|
||||
- Finding similar content
|
||||
- Discovering patterns in research
|
||||
- Understanding topic landscapes
|
||||
""")
|
||||
|
||||
# Choosing the Right Tool Popover
|
||||
with info_col3:
|
||||
with st.popover("#### Choosing the Right Tool"):
|
||||
st.markdown("""
|
||||
1. **For General Research:**
|
||||
- Start with SerpAPI for broad coverage and structured data
|
||||
|
||||
2. **For Deep Analysis:**
|
||||
- Use Tavily AI when you need AI-powered insights
|
||||
- Choose Metaphor/Exa for neural search and pattern discovery
|
||||
|
||||
3. **For Comprehensive Research:**
|
||||
- Combine multiple tools to get the most complete picture
|
||||
- Use SerpAPI for initial research
|
||||
- Follow up with AI tools for deeper insights
|
||||
|
||||
> **Pro Tip:** Configure multiple providers to ensure you have backup options and can cross-reference results for better accuracy.
|
||||
""")
|
||||
|
||||
# Coming Soon Popover
|
||||
with info_col4:
|
||||
with st.popover("#### 🔜 Coming Soon - More Search Options"):
|
||||
st.markdown("""
|
||||
**Bing Search API**
|
||||
- Microsoft's powerful search API with comprehensive capabilities
|
||||
- Features include:
|
||||
- Web search with advanced filtering
|
||||
- News articles with sentiment analysis
|
||||
- Image search with visual recognition
|
||||
- Video search with content understanding
|
||||
- Custom search parameters for targeted results
|
||||
|
||||
**Google Search API**
|
||||
- Google's programmable search engine with extensive features
|
||||
- Capabilities include:
|
||||
- Custom search engine creation
|
||||
- Site-specific search
|
||||
- Image and video search
|
||||
- News search with time-based filtering
|
||||
- Knowledge graph integration
|
||||
|
||||
**Additional Planned Integrations:**
|
||||
- **DuckDuckGo API**: Privacy-focused search with no tracking
|
||||
- **Brave Search API**: Independent search engine with unique features
|
||||
- **Perplexity API**: AI-powered research assistant with real-time data
|
||||
|
||||
> **Note:** These integrations are under active development and will be available in future updates.
|
||||
""")
|
||||
|
||||
return {"current_step": 3, "changes_made": changes_made}
|
||||
@@ -1,226 +0,0 @@
|
||||
"""ALwrity integrations setup component."""
|
||||
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
from ..manager import APIKeyManager
|
||||
from .base import render_navigation_buttons, render_step_indicator, render_tab_style
|
||||
|
||||
def update_env_file(env_vars: Dict[str, str]) -> None:
|
||||
"""Update the .env file with new environment variables, avoiding duplicates.
|
||||
|
||||
Args:
|
||||
env_vars (Dict[str, str]): Dictionary of environment variables to update
|
||||
"""
|
||||
try:
|
||||
# Read existing .env file content
|
||||
env_content = []
|
||||
if os.path.exists('.env'):
|
||||
with open('.env', 'r') as f:
|
||||
env_content = f.readlines()
|
||||
|
||||
# Remove trailing newlines and empty lines
|
||||
env_content = [line.strip() for line in env_content if line.strip()]
|
||||
|
||||
# Create a dictionary of existing variables
|
||||
env_dict = {}
|
||||
for line in env_content:
|
||||
if '=' in line:
|
||||
key, value = line.split('=', 1)
|
||||
env_dict[key.strip()] = value.strip()
|
||||
|
||||
# Update with new values
|
||||
env_dict.update(env_vars)
|
||||
|
||||
# Write back to .env file
|
||||
with open('.env', 'w') as f:
|
||||
for key, value in env_dict.items():
|
||||
f.write(f"{key}={value}\n")
|
||||
|
||||
logger.info("[update_env_file] Successfully updated .env file")
|
||||
except Exception as e:
|
||||
logger.error(f"[update_env_file] Error updating .env file: {str(e)}")
|
||||
raise
|
||||
|
||||
def render_alwrity_integrations(api_key_manager: APIKeyManager) -> Dict[str, Any]:
|
||||
"""Render the ALwrity integrations setup step."""
|
||||
try:
|
||||
# Apply enhanced tab styling
|
||||
render_tab_style()
|
||||
|
||||
st.markdown("""
|
||||
<div class='setup-header'>
|
||||
<h2>🔄 ALwrity Integrations</h2>
|
||||
<p>Connect your content platforms and tools</p>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Create tabs for different integration types
|
||||
tabs = st.tabs(["Website Platforms", "Social Media", "Analytics Tools"])
|
||||
|
||||
changes_made = False
|
||||
has_valid_integrations = False
|
||||
validation_message = ""
|
||||
|
||||
with tabs[0]:
|
||||
st.markdown("""
|
||||
<div class="tab-content">
|
||||
<h3>Website Platforms</h3>
|
||||
<p>Connect your website platforms for seamless content publishing</p>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Website Platforms Grid
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
# WordPress Card (Coming Soon)
|
||||
with st.container():
|
||||
st.markdown("""
|
||||
<div class="integration-card disabled">
|
||||
<div class="integration-header">
|
||||
<div class="integration-icon">🌐</div>
|
||||
<div class="integration-title">WordPress <span class="coming-soon-badge">Coming Soon</span></div>
|
||||
</div>
|
||||
<div class="integration-content">
|
||||
<p>Connect your WordPress site for direct content publishing.</p>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
st.info("WordPress integration will be available in the next update")
|
||||
|
||||
with col2:
|
||||
# Wix Card (Coming Soon)
|
||||
with st.container():
|
||||
st.markdown("""
|
||||
<div class="integration-card disabled">
|
||||
<div class="integration-header">
|
||||
<div class="integration-icon">🎨</div>
|
||||
<div class="integration-title">Wix <span class="coming-soon-badge">Coming Soon</span></div>
|
||||
</div>
|
||||
<div class="integration-content">
|
||||
<p>Connect your Wix site for direct content publishing.</p>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
st.info("Wix integration will be available in the next update")
|
||||
|
||||
with tabs[1]:
|
||||
st.markdown("""
|
||||
<div class="tab-content">
|
||||
<h3>Social Media</h3>
|
||||
<p>Connect your social media accounts for content distribution</p>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Social Media Grid
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
# Facebook Card (Coming Soon)
|
||||
with st.container():
|
||||
st.markdown("""
|
||||
<div class="integration-card disabled">
|
||||
<div class="integration-header">
|
||||
<div class="integration-icon">📘</div>
|
||||
<div class="integration-title">Facebook <span class="coming-soon-badge">Coming Soon</span></div>
|
||||
</div>
|
||||
<div class="integration-content">
|
||||
<p>Connect your Facebook account for content sharing.</p>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
st.info("Facebook integration will be available in the next update")
|
||||
|
||||
with col2:
|
||||
# Instagram Card (Coming Soon)
|
||||
with st.container():
|
||||
st.markdown("""
|
||||
<div class="integration-card disabled">
|
||||
<div class="integration-header">
|
||||
<div class="integration-icon">📸</div>
|
||||
<div class="integration-title">Instagram <span class="coming-soon-badge">Coming Soon</span></div>
|
||||
</div>
|
||||
<div class="integration-content">
|
||||
<p>Connect your Instagram account for content sharing.</p>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
st.info("Instagram integration will be available in the next update")
|
||||
|
||||
with tabs[2]:
|
||||
st.markdown("""
|
||||
<div class="tab-content">
|
||||
<h3>Analytics Tools</h3>
|
||||
<p>Connect your analytics tools for content performance tracking</p>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Google Search Console Card (Coming Soon)
|
||||
with st.container():
|
||||
st.markdown("""
|
||||
<div class="integration-card disabled">
|
||||
<div class="integration-header">
|
||||
<div class="integration-icon">📊</div>
|
||||
<div class="integration-title">Google Search Console <span class="coming-soon-badge">Coming Soon</span></div>
|
||||
</div>
|
||||
<div class="integration-content">
|
||||
<p>Connect your Google Search Console for SEO insights.</p>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
st.info("Google Search Console integration will be available in the next update")
|
||||
|
||||
# Validate integrations
|
||||
changes_made = True # Always allow proceeding since integrations are coming soon
|
||||
has_valid_integrations = True
|
||||
validation_message = "✅ Website platform integrations will be available in the next update"
|
||||
|
||||
# Display validation message
|
||||
if validation_message:
|
||||
if "✅" in validation_message:
|
||||
st.success(validation_message)
|
||||
else:
|
||||
st.warning(validation_message)
|
||||
|
||||
# Navigation buttons
|
||||
if render_navigation_buttons(5, 6, changes_made):
|
||||
if has_valid_integrations:
|
||||
try:
|
||||
# Store integration settings in session state
|
||||
st.session_state['integrations'] = {
|
||||
'coming_soon': {
|
||||
'wordpress': True,
|
||||
'wix': True,
|
||||
'facebook': True,
|
||||
'instagram': True,
|
||||
'google_search_console': True
|
||||
}
|
||||
}
|
||||
|
||||
# Update INTEGRATION_DONE in .env file and environment
|
||||
env_vars = {'INTEGRATION_DONE': 'True'}
|
||||
update_env_file(env_vars)
|
||||
|
||||
# Update environment variable
|
||||
os.environ['INTEGRATION_DONE'] = 'True'
|
||||
logger.info("Updated INTEGRATION_DONE status")
|
||||
|
||||
# Update progress and move to next step
|
||||
st.session_state['current_step'] = 6
|
||||
st.rerun()
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to update integration status: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
st.error(error_msg)
|
||||
else:
|
||||
st.error("Please configure at least one integration to continue")
|
||||
|
||||
return {"current_step": 5, "changes_made": changes_made}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in ALwrity integrations setup: {str(e)}"
|
||||
logger.error(f"[render_alwrity_integrations] {error_msg}")
|
||||
st.error(error_msg)
|
||||
return {"current_step": 5, "error": error_msg}
|
||||
@@ -1,181 +0,0 @@
|
||||
"""Base components for the API key manager."""
|
||||
|
||||
import streamlit as st
|
||||
from typing import Dict, Any
|
||||
from loguru import logger
|
||||
from ..styles import API_KEY_MANAGER_STYLES
|
||||
|
||||
def render_step_indicator(current_step: int, total_steps: int) -> None:
|
||||
"""Render the step indicator."""
|
||||
try:
|
||||
st.markdown("""
|
||||
<style>
|
||||
.step-indicator {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
margin-bottom: 2rem;
|
||||
padding: 1rem;
|
||||
background: #f0f2f6;
|
||||
border-radius: 10px;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
.step {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
padding: 0.5rem 1rem;
|
||||
border-radius: 20px;
|
||||
background: #ffffff;
|
||||
transition: all 0.3s ease;
|
||||
}
|
||||
.step.active {
|
||||
background: #1f77b4;
|
||||
color: white;
|
||||
}
|
||||
.step.completed {
|
||||
background: #2ecc71;
|
||||
color: white;
|
||||
}
|
||||
.step-icon {
|
||||
font-size: 1.2rem;
|
||||
}
|
||||
.step-number {
|
||||
font-weight: bold;
|
||||
}
|
||||
.step-title {
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
.step-line {
|
||||
flex: 1;
|
||||
height: 2px;
|
||||
background: #e0e0e0;
|
||||
margin: 0 1rem;
|
||||
}
|
||||
.step-line.active {
|
||||
background: #1f77b4;
|
||||
}
|
||||
.step-line.completed {
|
||||
background: #2ecc71;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
steps = [
|
||||
("🔑", "AI LLM", 1),
|
||||
("🔍", "Website Analysis", 2),
|
||||
("👤", "AI Research", 3),
|
||||
("🎨", "Personalization", 4),
|
||||
("🔄", "Integrations", 5),
|
||||
("✅", "Complete", 6)
|
||||
]
|
||||
|
||||
html = '<div class="step-indicator">'
|
||||
for i, (icon, title, step) in enumerate(steps):
|
||||
step_class = "active" if step == current_step else "completed" if step < current_step else ""
|
||||
line_class = "active" if step == current_step else "completed" if step < current_step else ""
|
||||
|
||||
html += f'''
|
||||
<div class="step {step_class}">
|
||||
<span class="step-icon">{icon}</span>
|
||||
<span class="step-number">{step}</span>
|
||||
<span class="step-title">{title}</span>
|
||||
</div>
|
||||
'''
|
||||
if i < len(steps) - 1:
|
||||
html += f'<div class="step-line {line_class}"></div>'
|
||||
html += '</div>'
|
||||
|
||||
st.markdown(html, unsafe_allow_html=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rendering step indicator: {str(e)}")
|
||||
st.error("Error displaying step indicator")
|
||||
|
||||
def render_navigation_buttons(current_step: int, total_steps: int, changes_made: bool = True) -> bool:
|
||||
"""Render the navigation buttons with modern glassmorphic styling.
|
||||
|
||||
Args:
|
||||
current_step (int): Current step number
|
||||
total_steps (int): Total number of steps
|
||||
changes_made (bool): Whether changes were made in the current step
|
||||
|
||||
Returns:
|
||||
bool: True if next/complete button was clicked, False otherwise
|
||||
"""
|
||||
col1, col2, col3 = st.columns([1, 2, 1])
|
||||
|
||||
with col1:
|
||||
if current_step > 1:
|
||||
if st.button("**← Back**", use_container_width=True, key="back_button"):
|
||||
from ..wizard_state import previous_step
|
||||
previous_step()
|
||||
st.rerun()
|
||||
|
||||
with col3:
|
||||
if current_step < total_steps:
|
||||
next_text = "**Continue →**"
|
||||
if st.button(next_text, use_container_width=True, disabled=not changes_made, key="next_button"):
|
||||
# Don't call next_step() here, let the component handle it
|
||||
return True
|
||||
else:
|
||||
if st.button("**Complete Setup ✓**", use_container_width=True, type="primary", key="complete_button"):
|
||||
# Save the configuration
|
||||
st.success("✅ Setup completed successfully!")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def render_tab_style() -> None:
|
||||
"""Render enhanced tab styling."""
|
||||
st.markdown("""
|
||||
<style>
|
||||
.stTabs [data-baseweb="tab-list"] {
|
||||
gap: 2rem;
|
||||
background: #f8f9fa;
|
||||
padding: 0.5rem;
|
||||
border-radius: 10px;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
.stTabs [data-baseweb="tab"] {
|
||||
padding: 0.75rem 1.5rem;
|
||||
border-radius: 25px;
|
||||
transition: all 0.3s ease;
|
||||
background: transparent;
|
||||
color: #495057;
|
||||
font-weight: 500;
|
||||
}
|
||||
.stTabs [data-baseweb="tab"]:hover {
|
||||
background: #e9ecef;
|
||||
color: #1f77b4;
|
||||
}
|
||||
.stTabs [aria-selected="true"] {
|
||||
background: #1f77b4 !important;
|
||||
color: white !important;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
.stTabs [data-baseweb="tab-list"] button:nth-child(1) {
|
||||
margin-left: 0.5rem;
|
||||
}
|
||||
.stTabs [data-baseweb="tab-list"] button:nth-child(3) {
|
||||
margin-right: 0.5rem;
|
||||
}
|
||||
.tab-content {
|
||||
background: white;
|
||||
padding: 1.5rem;
|
||||
border-radius: 10px;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
||||
margin-top: 1rem;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
def render_success_message():
|
||||
"""Render the success message with glassmorphic design."""
|
||||
st.markdown("""
|
||||
<div class="success-message">
|
||||
<h3 style='color: white; margin-bottom: 12px; font-size: 1.4em;'>✅ API keys saved successfully!</h3>
|
||||
<p style='color: rgba(255,255,255,0.95); font-size: 1.1em;'>
|
||||
Please restart the application for the changes to take effect.
|
||||
</p>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
@@ -1,272 +0,0 @@
|
||||
"""Final setup component for the API key manager."""
|
||||
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
from ..manager import APIKeyManager
|
||||
from ..validation import check_all_api_keys
|
||||
|
||||
# Configure logger to output to both file and stdout
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/final_setup.log",
|
||||
rotation="500 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
def load_main_config() -> Dict[str, Any]:
|
||||
"""Load the main configuration file."""
|
||||
config_path = os.path.join("lib", "workspace", "alwrity_config", "main_config.json")
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading main_config.json: {str(e)}")
|
||||
return {}
|
||||
|
||||
def render_final_setup(api_key_manager: APIKeyManager) -> Dict[str, Any]:
|
||||
"""Render the final setup step.
|
||||
|
||||
Args:
|
||||
api_key_manager (APIKeyManager): The API key manager instance
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Current state
|
||||
"""
|
||||
logger.info("[render_final_setup] Rendering final setup component")
|
||||
|
||||
st.markdown("### Step 6: Final Setup & Validation")
|
||||
|
||||
# Load main config
|
||||
main_config = load_main_config()
|
||||
|
||||
# Create tabs for each step
|
||||
tabs = st.tabs([
|
||||
"Step 1: AI LLM Setup",
|
||||
"Step 2: Website Analysis",
|
||||
"Step 3: AI Research",
|
||||
"Step 4: Personalization",
|
||||
"Step 5: Integrations"
|
||||
])
|
||||
|
||||
# Step 1: AI LLM Setup
|
||||
with tabs[0]:
|
||||
st.markdown("#### AI LLM Configuration")
|
||||
|
||||
# Get API keys from environment
|
||||
openai_key = os.getenv('OPENAI_API_KEY', 'Not configured')
|
||||
gemini_key = os.getenv('GEMINI_API_KEY', 'Not configured')
|
||||
anthropic_key = os.getenv('ANTHROPIC_API_KEY', 'Not configured')
|
||||
mistral_key = os.getenv('MISTRAL_API_KEY', 'Not configured')
|
||||
|
||||
# Display API keys (masked)
|
||||
st.markdown("##### API Keys")
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.markdown(f"**OpenAI API Key:** {'*' * 8}{openai_key[-4:] if openai_key != 'Not configured' else ''}")
|
||||
st.markdown(f"**Google Gemini API Key:** {'*' * 8}{gemini_key[-4:] if gemini_key != 'Not configured' else ''}")
|
||||
|
||||
with col2:
|
||||
st.markdown(f"**Anthropic API Key:** {'*' * 8}{anthropic_key[-4:] if anthropic_key != 'Not configured' else ''}")
|
||||
st.markdown(f"**Mistral API Key:** {'*' * 8}{mistral_key[-4:] if mistral_key != 'Not configured' else ''}")
|
||||
|
||||
# Step 2: Website Analysis
|
||||
with tabs[1]:
|
||||
st.markdown("#### Website Analysis Configuration")
|
||||
|
||||
# Get website URL from environment
|
||||
website_url = os.getenv('WEBSITE_URL', 'Not configured')
|
||||
|
||||
# Display website URL
|
||||
st.markdown("##### Website URL")
|
||||
st.markdown(f"**Website URL:** {website_url}")
|
||||
|
||||
# Display website analysis settings
|
||||
st.markdown("##### Analysis Settings")
|
||||
st.markdown("Website analysis settings will be used to understand your content style and preferences.")
|
||||
|
||||
# Step 3: AI Research
|
||||
with tabs[2]:
|
||||
st.markdown("#### AI Research Configuration")
|
||||
|
||||
# Get research API keys from environment
|
||||
serpapi_key = os.getenv('SERPAPI_KEY', 'Not configured')
|
||||
tavily_key = os.getenv('TAVILY_API_KEY', 'Not configured')
|
||||
metaphor_key = os.getenv('METAPHOR_API_KEY', 'Not configured')
|
||||
firecrawl_key = os.getenv('FIRECRAWL_API_KEY', 'Not configured')
|
||||
|
||||
# Display API keys (masked)
|
||||
st.markdown("##### Research API Keys")
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
st.markdown(f"**SerpAPI Key:** {'*' * 8}{serpapi_key[-4:] if serpapi_key != 'Not configured' else ''}")
|
||||
st.markdown(f"**Tavily API Key:** {'*' * 8}{tavily_key[-4:] if tavily_key != 'Not configured' else ''}")
|
||||
|
||||
with col2:
|
||||
st.markdown(f"**Metaphor API Key:** {'*' * 8}{metaphor_key[-4:] if metaphor_key != 'Not configured' else ''}")
|
||||
st.markdown(f"**Firecrawl API Key:** {'*' * 8}{firecrawl_key[-4:] if firecrawl_key != 'Not configured' else ''}")
|
||||
|
||||
# Step 4: Personalization
|
||||
with tabs[3]:
|
||||
st.markdown("#### Personalization Configuration")
|
||||
|
||||
# Display personalization settings from main config
|
||||
with st.popover("Blog Content Characteristics", help="Click to see details about blog content settings"):
|
||||
st.markdown("##### Blog Content Characteristics")
|
||||
blog_settings = main_config.get("Blog Content Characteristics", {})
|
||||
st.write(f"- Blog Length: {blog_settings.get('Blog Length', '2000')}")
|
||||
st.write(f"- Blog Tone: {blog_settings.get('Blog Tone', 'Professional')}")
|
||||
st.write(f"- Blog Demographic: {blog_settings.get('Blog Demographic', 'Professional')}")
|
||||
st.write(f"- Blog Type: {blog_settings.get('Blog Type', 'Informational')}")
|
||||
st.write(f"- Blog Language: {blog_settings.get('Blog Language', 'English')}")
|
||||
st.write(f"- Blog Output Format: {blog_settings.get('Blog Output Format', 'markdown')}")
|
||||
st.markdown("These settings define the overall structure and style of your blog content.")
|
||||
|
||||
with st.popover("Blog Images Details", help="Click to see details about image generation settings"):
|
||||
st.markdown("##### Blog Images Details")
|
||||
image_settings = main_config.get("Blog Images Details", {})
|
||||
st.write(f"- Image Generation Model: {image_settings.get('Image Generation Model', 'stable-diffusion')}")
|
||||
st.write(f"- Number of Blog Images: {image_settings.get('Number of Blog Images', 1)}")
|
||||
st.markdown("These settings control how images are generated for your blog posts.")
|
||||
|
||||
with st.popover("LLM Options", help="Click to see details about language model settings"):
|
||||
st.markdown("##### LLM Options")
|
||||
llm_settings = main_config.get("LLM Options", {})
|
||||
st.write(f"- GPT Provider: {llm_settings.get('GPT Provider', 'google')}")
|
||||
st.write(f"- Model: {llm_settings.get('Model', 'gemini-1.5-flash-latest')}")
|
||||
st.write(f"- Temperature: {llm_settings.get('Temperature', 0.7)}")
|
||||
st.write(f"- Top-p: {llm_settings.get('Top-p', 0.9)}")
|
||||
st.write(f"- Max Tokens: {llm_settings.get('Max Tokens', 4000)}")
|
||||
st.write(f"- Frequency Penalty: {llm_settings.get('Frequency Penalty', 1.0)}")
|
||||
st.write(f"- Presence Penalty: {llm_settings.get('Presence Penalty', 1.0)}")
|
||||
st.markdown("These settings control the behavior of the language model used for content generation.")
|
||||
|
||||
with st.popover("Search Engine Parameters", help="Click to see details about search engine settings"):
|
||||
st.markdown("##### Search Engine Parameters")
|
||||
search_settings = main_config.get("Search Engine Parameters", {})
|
||||
st.write(f"- Geographic Location: {search_settings.get('Geographic Location', 'us')}")
|
||||
st.write(f"- Search Language: {search_settings.get('Search Language', 'en')}")
|
||||
st.write(f"- Number of Results: {search_settings.get('Number of Results', 10)}")
|
||||
st.write(f"- Time Range: {search_settings.get('Time Range', 'anytime')}")
|
||||
st.markdown("These settings control how search engines are used for research and content creation.")
|
||||
|
||||
# Step 5: Integrations
|
||||
with tabs[4]:
|
||||
st.markdown("#### ALwrity Integrations Configuration")
|
||||
|
||||
# Display integrations settings
|
||||
st.markdown("##### Website Platforms")
|
||||
st.info("WordPress integration will be available in the next update")
|
||||
st.info("Wix integration will be available in the next update")
|
||||
|
||||
st.markdown("##### Social Media")
|
||||
st.info("Facebook integration will be available in the next update")
|
||||
st.info("Instagram integration will be available in the next update")
|
||||
|
||||
st.markdown("##### Analytics Tools")
|
||||
st.info("Google Search Console integration will be available in the next update")
|
||||
|
||||
# Navigation buttons
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
if st.button("← Back to Personalization"):
|
||||
logger.info("[render_final_setup] User clicked back to personalization")
|
||||
st.session_state.current_step = 4
|
||||
st.session_state.next_step = "personalization_setup"
|
||||
st.rerun()
|
||||
|
||||
with col2:
|
||||
if st.button("Complete Setup →"):
|
||||
logger.info("[render_final_setup] User clicked complete setup")
|
||||
try:
|
||||
# First set FINAL_SETUP_COMPLETE to True
|
||||
try:
|
||||
# Read existing .env content
|
||||
env_lines = []
|
||||
if os.path.exists('.env'):
|
||||
with open('.env', 'r') as f:
|
||||
env_lines = f.readlines()
|
||||
|
||||
# Remove any existing FINAL_SETUP_COMPLETE entries
|
||||
env_lines = [line for line in env_lines if not line.startswith('FINAL_SETUP_COMPLETE=')]
|
||||
|
||||
# Add the new FINAL_SETUP_COMPLETE entry
|
||||
env_lines.append("FINAL_SETUP_COMPLETE=True\n")
|
||||
|
||||
# Write back to .env file
|
||||
with open('.env', 'w') as f:
|
||||
f.writelines(env_lines)
|
||||
|
||||
# Set environment variable
|
||||
os.environ['FINAL_SETUP_COMPLETE'] = "True"
|
||||
logger.info("[render_final_setup] Set FINAL_SETUP_COMPLETE=True")
|
||||
except Exception as e:
|
||||
logger.error(f"[render_final_setup] Error setting FINAL_SETUP_COMPLETE: {str(e)}")
|
||||
st.error("Error updating setup status. Please try again.")
|
||||
return {"current_step": 6, "changes_made": False}
|
||||
|
||||
# Now validate all steps
|
||||
validation_result = check_all_api_keys(api_key_manager)
|
||||
if not validation_result:
|
||||
# If validation fails, revert FINAL_SETUP_COMPLETE
|
||||
try:
|
||||
env_lines = [line for line in env_lines if not line.startswith('FINAL_SETUP_COMPLETE=')]
|
||||
env_lines.append("FINAL_SETUP_COMPLETE=False\n")
|
||||
with open('.env', 'w') as f:
|
||||
f.writelines(env_lines)
|
||||
os.environ['FINAL_SETUP_COMPLETE'] = "False"
|
||||
except Exception:
|
||||
pass # Ignore reversion errors
|
||||
|
||||
st.error("Setup validation failed. Please ensure all required steps are completed.")
|
||||
logger.error("[render_final_setup] Validation failed")
|
||||
return {"current_step": 6, "changes_made": False}
|
||||
|
||||
# Log the current API keys in the manager
|
||||
logger.info("[render_final_setup] Current API keys in manager:")
|
||||
for key, value in api_key_manager.api_keys.items():
|
||||
if value:
|
||||
logger.info(f" - {key}: {'*' * 8}{value[-4:]}")
|
||||
else:
|
||||
logger.info(f" - {key}: Not set")
|
||||
|
||||
# Save main configuration
|
||||
config_path = os.path.join("lib", "workspace", "alwrity_config", "main_config.json")
|
||||
with open(config_path, 'w') as f:
|
||||
json.dump(main_config, f, indent=4)
|
||||
logger.info("[render_final_setup] Saved main configuration")
|
||||
|
||||
# Show success message
|
||||
st.success("✅ Setup completed successfully! Redirecting to main application...")
|
||||
|
||||
# Set setup completion flag in session state
|
||||
st.session_state['setup_completed'] = True
|
||||
st.session_state['redirect_to_main'] = True
|
||||
|
||||
# Clear the current step to ensure proper redirection
|
||||
if 'current_step' in st.session_state:
|
||||
del st.session_state['current_step']
|
||||
|
||||
# Rerun to trigger redirection
|
||||
st.rerun()
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error completing setup: {str(e)}"
|
||||
logger.error(f"[render_final_setup] {error_msg}")
|
||||
st.error(error_msg)
|
||||
return {"current_step": 6, "changes_made": False}
|
||||
|
||||
return {"current_step": 6, "changes_made": True}
|
||||
@@ -1,39 +0,0 @@
|
||||
"""Health monitoring component for the API key manager."""
|
||||
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from ..health_monitor import APIKeyHealthMonitor
|
||||
from ..key_rotation import KeyRotationManager
|
||||
from ..wizard_state import get_api_keys
|
||||
|
||||
def render_health_monitoring():
|
||||
"""Render the API key health monitoring dashboard."""
|
||||
st.header("API Key Health & Rotation")
|
||||
|
||||
# Initialize managers
|
||||
health_monitor = APIKeyHealthMonitor()
|
||||
rotation_manager = KeyRotationManager()
|
||||
|
||||
# Create tabs for different views
|
||||
health_tab, rotation_tab = st.tabs(["Health Monitor", "Key Rotation"])
|
||||
|
||||
with health_tab:
|
||||
health_monitor.get_health_dashboard()
|
||||
|
||||
with rotation_tab:
|
||||
rotation_manager.display_rotation_dashboard()
|
||||
|
||||
# Manual rotation controls
|
||||
st.subheader("Manual Controls")
|
||||
key_type = st.selectbox(
|
||||
"Select Key Type",
|
||||
options=[k.split('_')[0] for k in get_api_keys()]
|
||||
)
|
||||
|
||||
if key_type:
|
||||
if st.button("Force Rotation"):
|
||||
new_key = rotation_manager.rotate_if_needed(key_type)
|
||||
if new_key:
|
||||
st.success(f"Rotated to new key: {new_key}")
|
||||
else:
|
||||
st.warning("No suitable key available for rotation")
|
||||
@@ -1,487 +0,0 @@
|
||||
"""Personalization setup component for the API key manager."""
|
||||
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
from ..manager import APIKeyManager
|
||||
from ....web_crawlers.async_web_crawler import AsyncWebCrawlerService
|
||||
from ....personalization.style_analyzer import StyleAnalyzer
|
||||
from lib.utils.style_utils import (
|
||||
get_test_config_styles,
|
||||
get_glass_container,
|
||||
get_info_section,
|
||||
get_example_box,
|
||||
get_analysis_section,
|
||||
get_style_guide_html
|
||||
)
|
||||
from .base import render_navigation_buttons
|
||||
from .alwrity_integrations import render_alwrity_integrations
|
||||
import asyncio
|
||||
import os
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
|
||||
# Configure logger to output to both file and stdout
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/personalization_setup.log",
|
||||
rotation="500 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
def load_main_config() -> Dict[str, Any]:
|
||||
"""Load the main configuration file."""
|
||||
config_path = os.path.join("lib", "workspace", "alwrity_config", "main_config.json")
|
||||
try:
|
||||
with open(config_path, 'r') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading main_config.json: {str(e)}")
|
||||
return {}
|
||||
|
||||
def save_main_config(config: Dict[str, Any]) -> bool:
|
||||
"""Save the main configuration file."""
|
||||
try:
|
||||
config_path = os.path.join("lib", "workspace", "alwrity_config", "main_config.json")
|
||||
os.makedirs(os.path.dirname(config_path), exist_ok=True)
|
||||
with open(config_path, 'w') as f:
|
||||
json.dump(config, f, indent=4)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving main_config.json: {str(e)}")
|
||||
return False
|
||||
|
||||
def display_style_analysis(analysis_results: dict):
|
||||
"""Display the style analysis results in a structured format."""
|
||||
try:
|
||||
# Writing Style Section
|
||||
writing_style = analysis_results.get("writing_style", {})
|
||||
writing_style_content = f"""
|
||||
<ul>
|
||||
<li><strong>Tone:</strong> {writing_style.get("tone", "N/A")}</li>
|
||||
<li><strong>Voice:</strong> {writing_style.get("voice", "N/A")}</li>
|
||||
<li><strong>Complexity:</strong> {writing_style.get("complexity", "N/A")}</li>
|
||||
<li><strong>Formality:</strong> {writing_style.get("formality", "N/A")}</li>
|
||||
</ul>
|
||||
"""
|
||||
st.markdown(get_analysis_section("Writing Style", writing_style_content), unsafe_allow_html=True)
|
||||
|
||||
# Target Audience Section
|
||||
target_audience = analysis_results.get("target_audience", {})
|
||||
target_audience_content = f"""
|
||||
<ul>
|
||||
<li><strong>Demographics:</strong> {', '.join(target_audience.get("demographics", ["N/A"]))}</li>
|
||||
<li><strong>Expertise Level:</strong> {target_audience.get("expertise_level", "N/A")}</li>
|
||||
<li><strong>Industry Focus:</strong> {target_audience.get("industry_focus", "N/A")}</li>
|
||||
<li><strong>Geographic Focus:</strong> {target_audience.get("geographic_focus", "N/A")}</li>
|
||||
</ul>
|
||||
"""
|
||||
st.markdown(get_analysis_section("Target Audience", target_audience_content), unsafe_allow_html=True)
|
||||
|
||||
# Content Type Section
|
||||
content_type = analysis_results.get("content_type", {})
|
||||
content_type_content = f"""
|
||||
<ul>
|
||||
<li><strong>Primary Type:</strong> {content_type.get("primary_type", "N/A")}</li>
|
||||
<li><strong>Secondary Types:</strong> {', '.join(content_type.get("secondary_types", ["N/A"]))}</li>
|
||||
<li><strong>Purpose:</strong> {content_type.get("purpose", "N/A")}</li>
|
||||
<li><strong>Call to Action:</strong> {content_type.get("call_to_action", "N/A")}</li>
|
||||
</ul>
|
||||
"""
|
||||
st.markdown(get_analysis_section("Content Type", content_type_content), unsafe_allow_html=True)
|
||||
|
||||
# Recommended Settings Section
|
||||
recommended = analysis_results.get("recommended_settings", {})
|
||||
recommended_content = f"""
|
||||
<ul>
|
||||
<li><strong>Writing Tone:</strong> {recommended.get("writing_tone", "N/A")}</li>
|
||||
<li><strong>Target Audience:</strong> {recommended.get("target_audience", "N/A")}</li>
|
||||
<li><strong>Content Type:</strong> {recommended.get("content_type", "N/A")}</li>
|
||||
<li><strong>Creativity Level:</strong> {recommended.get("creativity_level", "N/A")}</li>
|
||||
<li><strong>Geographic Location:</strong> {recommended.get("geographic_location", "N/A")}</li>
|
||||
</ul>
|
||||
"""
|
||||
st.markdown(get_analysis_section("Recommended Settings", recommended_content), unsafe_allow_html=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error displaying style analysis: {str(e)}")
|
||||
st.error(f"Error displaying analysis results: {str(e)}")
|
||||
|
||||
def render_personalization_setup(api_key_manager: APIKeyManager) -> Dict[str, Any]:
|
||||
"""Render the personalization setup step."""
|
||||
logger.info("[render_personalization_setup] Rendering personalization setup component")
|
||||
|
||||
st.markdown("""
|
||||
# ✨ Personalization Setup
|
||||
Configure your content generation preferences and writing style
|
||||
""")
|
||||
|
||||
# Main section selection using radio buttons
|
||||
setup_mode = st.radio(
|
||||
"Choose Setup Mode",
|
||||
["Manual Settings", "ALwrity Personalization"],
|
||||
horizontal=True,
|
||||
label_visibility="collapsed"
|
||||
)
|
||||
|
||||
if setup_mode == "Manual Settings":
|
||||
# Create tabs for different settings categories
|
||||
tabs = st.tabs([
|
||||
"Blog Content Characteristics",
|
||||
"Blog Images",
|
||||
"AI Generation Settings",
|
||||
"Search Settings"
|
||||
])
|
||||
|
||||
# Blog Content Characteristics Tab
|
||||
with tabs[0]:
|
||||
col1, col2 = st.columns([1, 1])
|
||||
|
||||
with col1:
|
||||
st.markdown("#### Blog Content Characteristics")
|
||||
|
||||
blog_length = st.text_input(
|
||||
"Blog Length",
|
||||
value="2000",
|
||||
placeholder="e.g., 2000",
|
||||
help="Target word count for your blog posts"
|
||||
)
|
||||
|
||||
blog_tone = st.selectbox(
|
||||
"Blog Tone",
|
||||
["Professional", "Casual", "Technical", "Conversational"],
|
||||
help="The overall tone of your content"
|
||||
)
|
||||
|
||||
blog_demographic = st.selectbox(
|
||||
"Target Demographic",
|
||||
["Professional", "General", "Technical", "Academic"],
|
||||
help="Your primary audience demographic"
|
||||
)
|
||||
|
||||
blog_type = st.selectbox(
|
||||
"Content Type",
|
||||
["Informational", "Educational", "Entertainment", "Technical"],
|
||||
help="The primary type of content you create"
|
||||
)
|
||||
|
||||
blog_language = st.selectbox(
|
||||
"Content Language",
|
||||
["English", "Spanish", "French", "German", "Other"],
|
||||
help="Primary language for your content"
|
||||
)
|
||||
|
||||
blog_format = st.selectbox(
|
||||
"Output Format",
|
||||
["markdown", "html", "plain text"],
|
||||
help="Format of the generated content"
|
||||
)
|
||||
|
||||
with col2:
|
||||
st.markdown("### Blog Content Settings Guide")
|
||||
|
||||
st.markdown("""
|
||||
#### Blog Length
|
||||
- Determines word count target
|
||||
- Affects content depth
|
||||
- Impacts SEO performance
|
||||
|
||||
#### Blog Tone
|
||||
- Professional: Business-oriented
|
||||
- Casual: Friendly, approachable
|
||||
- Technical: Detailed, precise
|
||||
|
||||
#### Best Practices
|
||||
- Match tone to audience
|
||||
- Consider SEO requirements
|
||||
- Maintain consistency
|
||||
""")
|
||||
|
||||
# Blog Images Tab
|
||||
with tabs[1]:
|
||||
col1, col2 = st.columns([1, 1])
|
||||
|
||||
with col1:
|
||||
st.markdown("#### Blog Images Settings")
|
||||
|
||||
image_model = st.selectbox(
|
||||
"Image Generation Model",
|
||||
["stable-diffusion", "dall-e", "midjourney"],
|
||||
help="AI model for generating images"
|
||||
)
|
||||
|
||||
num_images = st.number_input(
|
||||
"Number of Images",
|
||||
min_value=1,
|
||||
max_value=5,
|
||||
value=1,
|
||||
help="Number of images per blog post"
|
||||
)
|
||||
|
||||
image_style = st.selectbox(
|
||||
"Image Style",
|
||||
["Realistic", "Artistic", "Professional", "Creative"],
|
||||
help="Style of generated images"
|
||||
)
|
||||
|
||||
with col2:
|
||||
st.markdown("### Image Generation Guide")
|
||||
|
||||
st.markdown("""
|
||||
#### Model Selection
|
||||
- Stable Diffusion: Versatile, fast
|
||||
- DALL-E: High quality, creative
|
||||
- Midjourney: Artistic, detailed
|
||||
|
||||
#### Best Practices
|
||||
- Consider content type
|
||||
- Balance quality vs. speed
|
||||
- Optimize for platforms
|
||||
""")
|
||||
|
||||
# AI Generation Settings Tab
|
||||
with tabs[2]:
|
||||
col1, col2 = st.columns([1, 1])
|
||||
|
||||
with col1:
|
||||
st.markdown("#### AI Generation Settings")
|
||||
|
||||
gpt_provider = st.selectbox(
|
||||
"AI Provider",
|
||||
["google", "openai", "anthropic"],
|
||||
help="Choose your preferred AI provider"
|
||||
)
|
||||
|
||||
model = st.text_input(
|
||||
"Model",
|
||||
value="gemini-1.5-flash-latest",
|
||||
help="The specific AI model to use"
|
||||
)
|
||||
|
||||
temperature = st.slider(
|
||||
"Creativity Level",
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=0.7,
|
||||
help="Higher = more creative, lower = more focused"
|
||||
)
|
||||
|
||||
max_tokens = st.number_input(
|
||||
"Maximum Length",
|
||||
min_value=100,
|
||||
max_value=8000,
|
||||
value=4000,
|
||||
help="Maximum length of generated content"
|
||||
)
|
||||
|
||||
with col2:
|
||||
st.markdown("### AI Settings Guide")
|
||||
|
||||
st.markdown("""
|
||||
#### Provider Selection
|
||||
- Google: Balanced, reliable
|
||||
- OpenAI: Creative, versatile
|
||||
- Anthropic: Precise, ethical
|
||||
|
||||
#### Temperature Guide
|
||||
- 0.0-0.3: Focused, consistent
|
||||
- 0.4-0.7: Balanced creativity
|
||||
- 0.8-1.0: Highly creative
|
||||
""")
|
||||
|
||||
# Search Settings Tab
|
||||
with tabs[3]:
|
||||
col1, col2 = st.columns([1, 1])
|
||||
|
||||
with col1:
|
||||
st.markdown("#### Search Settings")
|
||||
|
||||
geo_location = st.text_input(
|
||||
"Geographic Location",
|
||||
value="us",
|
||||
help="Target geographic location for search"
|
||||
)
|
||||
|
||||
search_language = st.selectbox(
|
||||
"Search Language",
|
||||
["en", "es", "fr", "de", "other"],
|
||||
help="Language for search results"
|
||||
)
|
||||
|
||||
num_results = st.number_input(
|
||||
"Number of Results",
|
||||
min_value=1,
|
||||
max_value=50,
|
||||
value=10,
|
||||
help="Number of search results to analyze"
|
||||
)
|
||||
|
||||
time_range = st.selectbox(
|
||||
"Time Range",
|
||||
["anytime", "day", "week", "month", "year"],
|
||||
help="Time range for search results"
|
||||
)
|
||||
|
||||
with col2:
|
||||
st.markdown("### Search Settings Guide")
|
||||
|
||||
st.markdown("""
|
||||
#### Location & Language
|
||||
- Affects result relevance
|
||||
- Impacts local SEO
|
||||
- Consider target market
|
||||
|
||||
#### Search Optimization
|
||||
- Balance quantity vs. quality
|
||||
- Consider time sensitivity
|
||||
- Optimize for accuracy
|
||||
""")
|
||||
|
||||
# Save button for manual settings
|
||||
if st.button("Save Manual Settings", type="primary", use_container_width=True):
|
||||
try:
|
||||
# Save to main_config.json
|
||||
config = {
|
||||
"Blog Content Characteristics": {
|
||||
"Blog Length": blog_length,
|
||||
"Blog Tone": blog_tone,
|
||||
"Blog Demographic": blog_demographic,
|
||||
"Blog Type": blog_type,
|
||||
"Blog Language": blog_language,
|
||||
"Blog Output Format": blog_format
|
||||
},
|
||||
"Blog Images Details": {
|
||||
"Image Generation Model": image_model,
|
||||
"Number of Blog Images": num_images,
|
||||
"Image Style": image_style
|
||||
},
|
||||
"LLM Options": {
|
||||
"GPT Provider": gpt_provider,
|
||||
"Model": model,
|
||||
"Temperature": temperature,
|
||||
"Max Tokens": max_tokens
|
||||
},
|
||||
"Search Engine Parameters": {
|
||||
"Geographic Location": geo_location,
|
||||
"Search Language": search_language,
|
||||
"Number of Results": num_results,
|
||||
"Time Range": time_range
|
||||
}
|
||||
}
|
||||
|
||||
if save_main_config(config):
|
||||
try:
|
||||
# Read existing .env file content
|
||||
env_lines = []
|
||||
if os.path.exists('.env'):
|
||||
with open('.env', 'r') as f:
|
||||
env_lines = f.readlines()
|
||||
|
||||
# Remove any existing PERSONALIZATION_DONE entries
|
||||
env_lines = [line for line in env_lines if not line.startswith('PERSONALIZATION_DONE=')]
|
||||
|
||||
# Add new PERSONALIZATION_DONE entry
|
||||
env_lines.append("PERSONALIZATION_DONE=True\n")
|
||||
|
||||
# Write back to .env file
|
||||
with open('.env', 'w') as f:
|
||||
f.writelines(env_lines)
|
||||
|
||||
# Update environment variable and session state
|
||||
os.environ['PERSONALIZATION_DONE'] = "True"
|
||||
st.session_state['personalization_saved'] = True
|
||||
logger.info("Successfully set PERSONALIZATION_DONE=True in .env and environment")
|
||||
st.success("✅ Your personalization settings have been saved successfully!")
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating PERSONALIZATION_DONE: {str(e)}")
|
||||
st.error("Settings saved but failed to update environment. Please try again.")
|
||||
else:
|
||||
st.error("Unable to save settings. Please try again.")
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving settings: {str(e)}")
|
||||
st.error(f"Failed to save settings: {str(e)}")
|
||||
|
||||
else: # ALwrity Personalization
|
||||
col1, col2 = st.columns([2, 1])
|
||||
|
||||
with col1:
|
||||
st.markdown("### Website URL")
|
||||
url = st.text_input(
|
||||
"Enter your website URL",
|
||||
placeholder="https://example.com",
|
||||
help="Provide your website URL to analyze your content style"
|
||||
)
|
||||
|
||||
if not url:
|
||||
st.markdown("### Written Samples")
|
||||
st.info("No website URL? No problem! Provide written samples instead.")
|
||||
samples = st.text_area(
|
||||
"Paste your content samples here",
|
||||
help="Paste 2-3 samples of your best content"
|
||||
)
|
||||
|
||||
if st.button("🎨 Analyze Style", use_container_width=True):
|
||||
# Existing style analysis code...
|
||||
pass
|
||||
|
||||
with col2:
|
||||
st.markdown("### How ALwrity Discovers Your Style")
|
||||
|
||||
st.markdown("""
|
||||
#### AI-Powered Analysis
|
||||
ALwrity analyzes your content to understand:
|
||||
- Writing tone and voice
|
||||
- Content structure
|
||||
- Target audience
|
||||
- Engagement style
|
||||
|
||||
#### Personalized Recommendations
|
||||
We provide:
|
||||
- Writing guidelines
|
||||
- Content templates
|
||||
- Style recommendations
|
||||
- Audience insights
|
||||
""")
|
||||
|
||||
# Navigation buttons
|
||||
if render_navigation_buttons(4, 6, changes_made=True):
|
||||
try:
|
||||
# If user hasn't saved settings manually, mark as skipped
|
||||
if 'personalization_saved' not in st.session_state or not st.session_state.get('personalization_saved'):
|
||||
# Read existing .env file content
|
||||
env_lines = []
|
||||
if os.path.exists('.env'):
|
||||
with open('.env', 'r') as f:
|
||||
env_lines = f.readlines()
|
||||
|
||||
# Remove any existing PERSONALIZATION_DONE entries
|
||||
env_lines = [line for line in env_lines if not line.startswith('PERSONALIZATION_DONE=')]
|
||||
|
||||
# Add PERSONALIZATION_DONE=False since user skipped
|
||||
env_lines.append("PERSONALIZATION_DONE=False\n")
|
||||
|
||||
# Write back to .env file
|
||||
with open('.env', 'w') as f:
|
||||
f.writelines(env_lines)
|
||||
|
||||
# Update environment variable
|
||||
os.environ['PERSONALIZATION_DONE'] = "False"
|
||||
logger.info("User skipped personalization. Set PERSONALIZATION_DONE=False")
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating PERSONALIZATION_DONE on skip: {str(e)}")
|
||||
st.error("Error updating environment. You may need to configure personalization later.")
|
||||
|
||||
st.session_state.current_step = 5
|
||||
st.rerun()
|
||||
|
||||
return {"current_step": 4, "changes_made": True}
|
||||
@@ -1,312 +0,0 @@
|
||||
"""Website setup component for the API key manager."""
|
||||
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from ...website_analyzer import analyze_website
|
||||
from ...website_analyzer.analyzer import WebsiteAnalyzer
|
||||
import asyncio
|
||||
import sys
|
||||
from typing import Dict, Any
|
||||
from ..manager import APIKeyManager
|
||||
from .base import render_navigation_buttons
|
||||
import os
|
||||
|
||||
# Configure logger to output to both file and stdout
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/website_setup.log",
|
||||
rotation="50 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Ensure logs directory exists
|
||||
os.makedirs("logs", exist_ok=True)
|
||||
|
||||
def render_website_setup(api_key_manager: APIKeyManager) -> Dict[str, Any]:
|
||||
"""Render the website setup step.
|
||||
|
||||
Args:
|
||||
api_key_manager (APIKeyManager): The API key manager instance
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Current state
|
||||
"""
|
||||
logger.info("[render_website_setup] Rendering website setup component")
|
||||
|
||||
st.markdown("### Step 2: Enter Your Website URL for Analysis (Optional)")
|
||||
|
||||
# Create two columns for input and results
|
||||
col1, col2 = st.columns([1, 1])
|
||||
|
||||
with col1:
|
||||
# Get existing website URL from environment or .env file
|
||||
existing_url = os.getenv('WEBSITE_URL', None)
|
||||
if not existing_url and os.path.exists('.env'):
|
||||
try:
|
||||
with open('.env', 'r') as f:
|
||||
for line in f:
|
||||
if line.strip().startswith('WEBSITE_URL='):
|
||||
existing_url = line.strip().split('=')[1]
|
||||
break
|
||||
except Exception as e:
|
||||
logger.error(f"[render_website_setup] Failed to read existing URL from .env: {str(e)}")
|
||||
|
||||
# If existing_url is 'no_website_provided', set it to empty for better UX
|
||||
if existing_url == 'no_website_provided':
|
||||
existing_url = ''
|
||||
|
||||
url = st.text_input(
|
||||
"Enter your website URL, if you own one",
|
||||
value=existing_url if existing_url else "",
|
||||
placeholder="https://example.com"
|
||||
)
|
||||
logger.info(f"[render_website_setup] URL input value: {url}")
|
||||
|
||||
# Save URL to .env file
|
||||
try:
|
||||
# Check if WEBSITE_URL already exists in .env file
|
||||
website_url_exists = False
|
||||
env_lines = []
|
||||
|
||||
if os.path.exists('.env'):
|
||||
with open('.env', 'r') as f:
|
||||
for line in f:
|
||||
if line.strip().startswith('WEBSITE_URL='):
|
||||
website_url_exists = True
|
||||
# Replace the existing WEBSITE_URL line with the new value
|
||||
if url:
|
||||
env_lines.append(f"WEBSITE_URL={url}\n")
|
||||
else:
|
||||
env_lines.append("WEBSITE_URL=no_website_provided\n")
|
||||
else:
|
||||
env_lines.append(line)
|
||||
|
||||
# If WEBSITE_URL doesn't exist, add it
|
||||
if not website_url_exists:
|
||||
if url:
|
||||
env_lines.append(f"WEBSITE_URL={url}\n")
|
||||
else:
|
||||
env_lines.append("WEBSITE_URL=no_website_provided\n")
|
||||
|
||||
# Write all lines back to the .env file
|
||||
with open('.env', 'w') as f:
|
||||
f.writelines(env_lines)
|
||||
|
||||
# Set environment variable
|
||||
if url:
|
||||
os.environ['WEBSITE_URL'] = url
|
||||
logger.info(f"[render_website_setup] Saved website URL to .env: {url}")
|
||||
else:
|
||||
os.environ['WEBSITE_URL'] = "no_website_provided"
|
||||
logger.info("[render_website_setup] Set default website URL: no_website_provided")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[render_website_setup] Failed to save website URL: {str(e)}")
|
||||
|
||||
analyze_type = st.radio(
|
||||
"Analysis Type",
|
||||
["Basic Website Analysis", "Full Website Analysis with SEO"],
|
||||
horizontal=True,
|
||||
label_visibility="hidden",
|
||||
help="Choose between basic website analysis or comprehensive SEO analysis"
|
||||
)
|
||||
|
||||
if st.button("Analyze Website"):
|
||||
if url:
|
||||
with st.spinner("Analyzing website..."):
|
||||
try:
|
||||
logger.info(f"[render_website_setup] Starting website analysis for URL: {url}")
|
||||
|
||||
# Call the analyze_website function
|
||||
results = analyze_website(url)
|
||||
|
||||
# Replace the old SEO analysis code with the new analyzer
|
||||
analyzer = WebsiteAnalyzer()
|
||||
seo_results = analyzer.analyze_website(url)
|
||||
if seo_results.get('success', False):
|
||||
results['data']['seo_analysis'] = seo_results['data']['analysis']['seo_info']
|
||||
else:
|
||||
results['data']['seo_analysis'] = {
|
||||
'error': seo_results.get('error', 'Unknown error in SEO analysis'),
|
||||
'overall_score': 0,
|
||||
'meta_tags': {},
|
||||
'content': {},
|
||||
'recommendations': []
|
||||
}
|
||||
|
||||
logger.debug(f"[render_website_setup] Analysis results received: {results.get('success', False)}")
|
||||
|
||||
# Store results in session state
|
||||
st.session_state.website_analysis = results
|
||||
logger.info("[render_website_setup] Results stored in session state")
|
||||
|
||||
if not results.get('success', False):
|
||||
error_msg = results.get('error', 'Analysis failed')
|
||||
logger.error(f"[render_website_setup] Analysis failed: {error_msg}")
|
||||
st.error(error_msg)
|
||||
else:
|
||||
logger.info("[render_website_setup] Analysis completed successfully")
|
||||
st.success("✅ Website analysis completed successfully!")
|
||||
except Exception as e:
|
||||
error_msg = f"Analysis failed: {str(e)}"
|
||||
logger.error(f"[render_website_setup] {error_msg}")
|
||||
st.error(error_msg)
|
||||
else:
|
||||
logger.warning("[render_website_setup] No URL provided")
|
||||
st.warning("Please enter a valid URL")
|
||||
|
||||
with col2:
|
||||
# Check if we have analysis results
|
||||
if 'website_analysis' in st.session_state:
|
||||
results = st.session_state.website_analysis
|
||||
|
||||
if results.get('success', False):
|
||||
data = results.get('data', {})
|
||||
analysis = data.get('analysis', {})
|
||||
|
||||
# Create tabs for different sections
|
||||
if analyze_type == "Full Website Analysis with SEO":
|
||||
tab1, tab2, tab3, tab4, tab5 = st.tabs([
|
||||
"Basic Metrics",
|
||||
"Content Analysis",
|
||||
"SEO Analysis",
|
||||
"Technical SEO",
|
||||
"Strategy"
|
||||
])
|
||||
else:
|
||||
tab1, tab2, tab3, tab4 = st.tabs([
|
||||
"Basic Metrics",
|
||||
"Content Analysis",
|
||||
"Technical Info",
|
||||
"Strategy"
|
||||
])
|
||||
|
||||
with tab1:
|
||||
st.markdown("##### Basic Metrics")
|
||||
basic_info = analysis.get('basic_info', {})
|
||||
st.write(f"Status Code: {basic_info.get('status_code')}")
|
||||
st.write(f"Content Type: {basic_info.get('content_type')}")
|
||||
st.write(f"Title: {basic_info.get('title')}")
|
||||
st.write(f"Meta Description: {basic_info.get('meta_description')}")
|
||||
|
||||
# SSL Info
|
||||
ssl_info = analysis.get('ssl_info', {})
|
||||
if ssl_info.get('has_ssl'):
|
||||
st.success("SSL Certificate is valid")
|
||||
st.write(f"Expiry: {ssl_info.get('expiry')}")
|
||||
else:
|
||||
st.error("No valid SSL certificate found")
|
||||
|
||||
with tab2:
|
||||
st.markdown("##### Content Analysis")
|
||||
content_info = analysis.get('content_info', {})
|
||||
|
||||
# Content Overview
|
||||
st.markdown("###### 📊 Content Overview")
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
with col1:
|
||||
st.metric("Word Count", content_info.get('word_count', 0))
|
||||
with col2:
|
||||
st.metric("Headings", content_info.get('heading_count', 0))
|
||||
with col3:
|
||||
st.metric("Images", content_info.get('image_count', 0))
|
||||
with col4:
|
||||
st.metric("Links", content_info.get('link_count', 0))
|
||||
|
||||
if analyze_type == "Full Analysis with SEO":
|
||||
with tab3:
|
||||
st.markdown("##### SEO Analysis")
|
||||
seo_data = data.get('seo_analysis', {})
|
||||
|
||||
# Display SEO Score
|
||||
seo_score = seo_data.get('overall_score', 0)
|
||||
st.markdown(f"### SEO Score: {seo_score}/100")
|
||||
st.progress(seo_score / 100)
|
||||
|
||||
# Meta Tags Analysis
|
||||
st.markdown("#### Meta Tags Analysis")
|
||||
meta_analysis = seo_data.get('meta_tags', {})
|
||||
for key, value in meta_analysis.items():
|
||||
if isinstance(value, bool):
|
||||
st.write(f"{'✅' if value else '❌'} {key.replace('_', ' ').title()}")
|
||||
elif isinstance(value, dict):
|
||||
st.write(f"**{key.replace('_', ' ').title()}:**")
|
||||
st.write(f"Status: {value.get('status', 'N/A')}")
|
||||
st.write(f"Value: {value.get('value', 'N/A')}")
|
||||
if value.get('recommendation'):
|
||||
st.write(f"Recommendation: {value['recommendation']}")
|
||||
else:
|
||||
st.write(f"**{key.replace('_', ' ').title()}:** {value}")
|
||||
|
||||
# Content Analysis
|
||||
st.markdown("#### AI Content Analysis")
|
||||
content_analysis = seo_data.get('content', {})
|
||||
st.write(f"**Word Count:** {content_analysis.get('word_count', 0)}")
|
||||
st.write(f"**Readability Score:** {content_analysis.get('readability_score', 0)}/100")
|
||||
st.write(f"**Content Quality Score:** {content_analysis.get('content_quality_score', 0)}/100")
|
||||
|
||||
# Recommendations
|
||||
st.markdown("#### SEO Recommendations")
|
||||
recommendations = seo_data.get('recommendations', [])
|
||||
for rec in recommendations:
|
||||
st.write(f"**{rec.get('priority', '').upper()} Priority - {rec.get('category', '')}**")
|
||||
st.write(f"Issue: {rec.get('issue', '')}")
|
||||
st.write(f"Recommendation: {rec.get('recommendation', '')}")
|
||||
st.write(f"Impact: {rec.get('impact', '')}")
|
||||
st.write("---")
|
||||
|
||||
with tab4:
|
||||
st.markdown("##### Technical SEO")
|
||||
technical_seo = seo_data.get('technical_analysis', {})
|
||||
|
||||
# Mobile Friendliness
|
||||
st.markdown("#### Mobile Friendliness")
|
||||
mobile_friendly = technical_seo.get('mobile_friendly', False)
|
||||
st.write(f"{'✅' if mobile_friendly else '❌'} Mobile Friendly")
|
||||
|
||||
# Page Speed
|
||||
st.markdown("#### Page Speed")
|
||||
speed_metrics = technical_seo.get('speed_metrics', {})
|
||||
for metric, value in speed_metrics.items():
|
||||
st.write(f"**{metric.replace('_', ' ').title()}:** {value}")
|
||||
|
||||
# Technical Issues
|
||||
st.markdown("#### Technical Issues")
|
||||
issues = technical_seo.get('issues', [])
|
||||
for issue in issues:
|
||||
st.write(f"• {issue}")
|
||||
|
||||
with tab4 if analyze_type == "Basic Website Analysis" else tab5:
|
||||
st.markdown("##### Strategy Recommendations")
|
||||
strategy_info = analysis.get('strategy', {})
|
||||
|
||||
if strategy_info:
|
||||
for category, recommendations in strategy_info.items():
|
||||
st.markdown(f"###### {category.replace('_', ' ').title()}")
|
||||
for rec in recommendations:
|
||||
st.write(f"• {rec}")
|
||||
else:
|
||||
st.info("No strategy recommendations available")
|
||||
else:
|
||||
error_msg = results.get('error', 'Analysis failed')
|
||||
logger.error(f"[render_website_setup] Displaying error: {error_msg}")
|
||||
st.error(error_msg)
|
||||
else:
|
||||
logger.debug("[render_website_setup] No analysis results in session state")
|
||||
st.info("Enter a URL and click 'Analyze Website' to see results")
|
||||
|
||||
# Navigation buttons
|
||||
if render_navigation_buttons(2, 5, True):
|
||||
# Move to next step (AI Research Setup)
|
||||
st.session_state.current_step = 3
|
||||
st.session_state.next_step = "ai_research_setup"
|
||||
st.rerun()
|
||||
|
||||
return {"current_step": 2, "changes_made": True}
|
||||
@@ -1,121 +0,0 @@
|
||||
"""API Key Rotation Manager."""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Dict, Optional, List
|
||||
import streamlit as st
|
||||
from .health_monitor import APIKeyHealthMonitor
|
||||
from .wizard_state import get_api_keys, set_api_key
|
||||
|
||||
class KeyRotationManager:
|
||||
"""Manages automatic rotation of API keys based on health metrics."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the key rotation manager."""
|
||||
self.health_monitor = APIKeyHealthMonitor()
|
||||
if 'active_keys' not in st.session_state:
|
||||
st.session_state.active_keys = {}
|
||||
|
||||
def get_active_key(self, key_type: str) -> str:
|
||||
"""Get the currently active key for a given type."""
|
||||
return st.session_state.active_keys.get(key_type)
|
||||
|
||||
def set_active_key(self, key_type: str, key_name: str) -> None:
|
||||
"""Set the active key for a given type."""
|
||||
st.session_state.active_keys[key_type] = key_name
|
||||
|
||||
def rotate_if_needed(self, key_type: str) -> Optional[str]:
|
||||
"""Check and rotate key if needed based on health metrics."""
|
||||
current_key = self.get_active_key(key_type)
|
||||
|
||||
# If no current key or current key needs rotation
|
||||
if not current_key or self.health_monitor.should_rotate_key(current_key):
|
||||
new_key = self.health_monitor.get_best_available_key(key_type)
|
||||
|
||||
if new_key and new_key != current_key:
|
||||
# Set cooldown on the old key if it exists
|
||||
if current_key:
|
||||
self.health_monitor.set_cooldown(current_key, duration_minutes=30)
|
||||
|
||||
# Update the active key
|
||||
self.set_active_key(key_type, new_key)
|
||||
return new_key
|
||||
|
||||
return current_key
|
||||
|
||||
def get_rotation_status(self) -> Dict[str, Dict]:
|
||||
"""Get rotation status for all key types."""
|
||||
status = {}
|
||||
api_keys = get_api_keys()
|
||||
|
||||
for key_name in api_keys:
|
||||
key_type = key_name.split('_')[0] # e.g., OPENAI from OPENAI_API_KEY
|
||||
|
||||
active_key = self.get_active_key(key_type)
|
||||
health = self.health_monitor.get_key_health(key_name)
|
||||
|
||||
if key_type not in status:
|
||||
status[key_type] = {
|
||||
'active_key': active_key,
|
||||
'available_keys': [],
|
||||
'cooldown_keys': []
|
||||
}
|
||||
|
||||
if health and health['in_cooldown']:
|
||||
status[key_type]['cooldown_keys'].append(key_name)
|
||||
else:
|
||||
status[key_type]['available_keys'].append(key_name)
|
||||
|
||||
return status
|
||||
|
||||
def display_rotation_dashboard(self) -> None:
|
||||
"""Display the key rotation dashboard."""
|
||||
st.subheader("🔄 API Key Rotation Status")
|
||||
|
||||
rotation_status = self.get_rotation_status()
|
||||
if not rotation_status:
|
||||
st.info("No API keys configured for rotation.")
|
||||
return
|
||||
|
||||
for key_type, status in rotation_status.items():
|
||||
with st.expander(f"{key_type} Rotation Status"):
|
||||
# Active Key
|
||||
st.write("**Active Key:**")
|
||||
if status['active_key']:
|
||||
st.success(status['active_key'])
|
||||
else:
|
||||
st.warning("No active key")
|
||||
|
||||
# Available Keys
|
||||
st.write("**Available Keys:**")
|
||||
if status['available_keys']:
|
||||
for key in status['available_keys']:
|
||||
st.write(f"- {key}")
|
||||
else:
|
||||
st.warning("No available keys")
|
||||
|
||||
# Cooldown Keys
|
||||
if status['cooldown_keys']:
|
||||
st.write("**Keys in Cooldown:**")
|
||||
for key in status['cooldown_keys']:
|
||||
health = self.health_monitor.get_key_health(key)
|
||||
if health and health['cooldown_until']:
|
||||
time_left = (health['cooldown_until'] - datetime.now())
|
||||
minutes_left = int(time_left.total_seconds() / 60)
|
||||
st.info(f"- {key} (Cooldown: {minutes_left} minutes remaining)")
|
||||
|
||||
def initialize_rotation(self) -> None:
|
||||
"""Initialize key rotation for all API key types."""
|
||||
api_keys = get_api_keys()
|
||||
key_types = set()
|
||||
|
||||
# Get unique key types
|
||||
for key_name in api_keys:
|
||||
key_type = key_name.split('_')[0]
|
||||
key_types.add(key_type)
|
||||
|
||||
# Initialize rotation for each key type
|
||||
for key_type in key_types:
|
||||
if not self.get_active_key(key_type):
|
||||
best_key = self.health_monitor.get_best_available_key(key_type)
|
||||
if best_key:
|
||||
self.set_active_key(key_type, best_key)
|
||||
@@ -1,238 +0,0 @@
|
||||
"""API key manager class."""
|
||||
|
||||
from typing import Dict, Any, Optional
|
||||
from loguru import logger
|
||||
import streamlit as st
|
||||
import os
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Configure logger to output to both file and stdout
|
||||
logger.remove() # Remove default handler
|
||||
logger.add("logs/api_key_manager.log",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
|
||||
level="DEBUG")
|
||||
logger.add(sys.stdout,
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
|
||||
level="INFO")
|
||||
|
||||
class APIKeyManager:
|
||||
"""Manager for handling API keys."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the API key manager."""
|
||||
logger.info("[APIKeyManager.__init__] Initializing API key manager")
|
||||
self.api_keys = {}
|
||||
self.load_api_keys()
|
||||
self.api_key_groups = {
|
||||
"Create": {
|
||||
"GEMINI_API_KEY": {
|
||||
"url": "https://makersuite.google.com/app/apikey",
|
||||
"description": "Google's Gemini AI for content generation",
|
||||
"setup_steps": [
|
||||
"Visit Google AI Studio",
|
||||
"Create a Google Cloud account",
|
||||
"Enable Gemini API",
|
||||
"Generate API key"
|
||||
]
|
||||
},
|
||||
"OPENAI_API_KEY": {
|
||||
"url": "https://platform.openai.com/api-keys",
|
||||
"description": "OpenAI's GPT models for content creation",
|
||||
"setup_steps": [
|
||||
"Go to OpenAI platform",
|
||||
"Create an account",
|
||||
"Navigate to API keys",
|
||||
"Create new API key"
|
||||
]
|
||||
},
|
||||
"MISTRAL_API_KEY": {
|
||||
"url": "https://console.mistral.ai/api-keys/",
|
||||
"description": "Mistral AI for efficient content generation",
|
||||
"setup_steps": [
|
||||
"Visit Mistral AI website",
|
||||
"Sign up for an account",
|
||||
"Access API section",
|
||||
"Generate API key"
|
||||
]
|
||||
}
|
||||
},
|
||||
"Research": {
|
||||
"TAVILY_API_KEY": {
|
||||
"url": "https://tavily.com/#api",
|
||||
"description": "Powers intelligent web research features",
|
||||
"setup_steps": [
|
||||
"Go to Tavily's website",
|
||||
"Create an account",
|
||||
"Access your API dashboard",
|
||||
"Generate a new API key"
|
||||
]
|
||||
},
|
||||
"SERPER_API_KEY": {
|
||||
"url": "https://serper.dev/signup",
|
||||
"description": "Enables Google search functionality",
|
||||
"setup_steps": [
|
||||
"Visit Serper.dev",
|
||||
"Sign up for an account",
|
||||
"Go to API section",
|
||||
"Create your API key"
|
||||
]
|
||||
}
|
||||
},
|
||||
"Deep Search": {
|
||||
"METAPHOR_API_KEY": {
|
||||
"url": "https://dashboard.exa.ai/login",
|
||||
"description": "Enables advanced web search capabilities",
|
||||
"setup_steps": [
|
||||
"Visit the Exa AI dashboard",
|
||||
"Sign up for a free account",
|
||||
"Navigate to API Keys section",
|
||||
"Create a new API key"
|
||||
]
|
||||
},
|
||||
"FIRECRAWL_API_KEY": {
|
||||
"url": "https://www.firecrawl.dev/account",
|
||||
"description": "Enables web content extraction",
|
||||
"setup_steps": [
|
||||
"Visit Firecrawl website",
|
||||
"Sign up for an account",
|
||||
"Access API dashboard",
|
||||
"Create your API key"
|
||||
]
|
||||
}
|
||||
},
|
||||
"Integrations": {
|
||||
"STABILITY_API_KEY": {
|
||||
"url": "https://platform.stability.ai/",
|
||||
"description": "Enables AI image generation",
|
||||
"setup_steps": [
|
||||
"Access Stability AI platform",
|
||||
"Create an account",
|
||||
"Navigate to API settings",
|
||||
"Generate your API key"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def load_api_keys(self):
|
||||
"""Load API keys from environment variables."""
|
||||
try:
|
||||
logger.info("[APIKeyManager.load_api_keys] Loading API keys from environment")
|
||||
|
||||
# Get the current working directory and .env file path
|
||||
current_dir = os.getcwd()
|
||||
env_path = os.path.join(current_dir, '.env')
|
||||
logger.info(f"[APIKeyManager.load_api_keys] Looking for .env file at: {env_path}")
|
||||
|
||||
# Check if .env file exists
|
||||
if not os.path.exists(env_path):
|
||||
logger.warning(f"[APIKeyManager.load_api_keys] .env file not found at {env_path}")
|
||||
return
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv(env_path, override=True)
|
||||
logger.debug("[APIKeyManager.load_api_keys] Environment variables loaded")
|
||||
|
||||
# Define all possible API key providers
|
||||
all_providers = [
|
||||
# AI Providers
|
||||
'OPENAI_API_KEY',
|
||||
'GEMINI_API_KEY',
|
||||
'ANTHROPIC_API_KEY',
|
||||
'MISTRAL_API_KEY',
|
||||
# Research Providers
|
||||
'SERPER_API_KEY',
|
||||
'TAVILY_API_KEY',
|
||||
'METAPHOR_API_KEY',
|
||||
'FIRECRAWL_API_KEY'
|
||||
]
|
||||
|
||||
# Load API keys from environment variables
|
||||
for provider in all_providers:
|
||||
value = os.getenv(provider)
|
||||
if value:
|
||||
self.api_keys[provider] = value
|
||||
logger.info(f"[APIKeyManager.load_api_keys] Loaded {provider} from environment")
|
||||
else:
|
||||
logger.debug(f"[APIKeyManager.load_api_keys] {provider} not found in environment")
|
||||
|
||||
logger.info(f"[APIKeyManager.load_api_keys] Loaded {len(self.api_keys)} API keys")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[APIKeyManager.load_api_keys] Error loading API keys: {str(e)}")
|
||||
|
||||
def save_api_key(self, provider: str, api_key: str) -> bool:
|
||||
"""
|
||||
Save an API key for a provider.
|
||||
|
||||
Args:
|
||||
provider: The provider name (e.g., 'openai', 'gemini')
|
||||
api_key: The API key value
|
||||
|
||||
Returns:
|
||||
bool: True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
logger.info(f"[APIKeyManager] Saving API key for {provider}")
|
||||
|
||||
# Map provider to environment variable name
|
||||
env_var_map = {
|
||||
'openai': 'OPENAI_API_KEY',
|
||||
'gemini': 'GEMINI_API_KEY',
|
||||
'mistral': 'MISTRAL_API_KEY',
|
||||
'anthropic': 'ANTHROPIC_API_KEY',
|
||||
'serpapi': 'SERPAPI_API_KEY',
|
||||
'tavily': 'TAVILY_API_KEY',
|
||||
'metaphor': 'METAPHOR_API_KEY',
|
||||
'firecrawl': 'FIRECRAWL_API_KEY'
|
||||
}
|
||||
|
||||
env_var = env_var_map.get(provider)
|
||||
if not env_var:
|
||||
logger.error(f"[APIKeyManager] Unknown provider: {provider}")
|
||||
return False
|
||||
|
||||
# Update the in-memory dictionary
|
||||
self.api_keys[provider] = api_key
|
||||
|
||||
# Update environment variable
|
||||
os.environ[env_var] = api_key
|
||||
|
||||
# Read existing .env file content
|
||||
env_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), '.env')
|
||||
try:
|
||||
with open(env_path, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
except FileNotFoundError:
|
||||
lines = []
|
||||
|
||||
# Update or add the API key
|
||||
key_found = False
|
||||
updated_lines = []
|
||||
for line in lines:
|
||||
if line.startswith(f"{env_var}="):
|
||||
updated_lines.append(f"{env_var}={api_key}\n")
|
||||
key_found = True
|
||||
else:
|
||||
updated_lines.append(line)
|
||||
|
||||
if not key_found:
|
||||
updated_lines.append(f"{env_var}={api_key}\n")
|
||||
|
||||
# Write back to .env file
|
||||
with open(env_path, 'w', encoding='utf-8') as f:
|
||||
f.writelines(updated_lines)
|
||||
|
||||
logger.info(f"[APIKeyManager] Successfully saved API key for {provider}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[APIKeyManager] Error saving API key for {provider}: {str(e)}")
|
||||
return False
|
||||
|
||||
def get_api_key(self, provider: str) -> Optional[str]:
|
||||
"""Get an API key."""
|
||||
return self.api_keys.get(provider)
|
||||
@@ -1,37 +0,0 @@
|
||||
"""State management for the API key manager."""
|
||||
|
||||
import streamlit as st
|
||||
from datetime import datetime
|
||||
|
||||
def initialize_wizard_state():
|
||||
"""Initialize or get the wizard state from session."""
|
||||
if 'wizard_state' not in st.session_state:
|
||||
st.session_state.wizard_state = {
|
||||
'current_step': 0,
|
||||
'total_steps': 0,
|
||||
'completed_steps': set(),
|
||||
'api_keys_status': {},
|
||||
'setup_progress': 0
|
||||
}
|
||||
|
||||
def update_progress(api_keys_config):
|
||||
"""Update the overall setup progress."""
|
||||
total_keys = sum(len(keys) for keys in api_keys_config.values())
|
||||
configured_keys = sum(1 for status in st.session_state.wizard_state['api_keys_status'].values()
|
||||
if status.get('configured', False))
|
||||
st.session_state.wizard_state['setup_progress'] = (configured_keys / total_keys) * 100
|
||||
|
||||
def update_key_status(key):
|
||||
"""Update the status of an API key in the wizard state."""
|
||||
st.session_state.wizard_state['api_keys_status'][key] = {
|
||||
'configured': True,
|
||||
'timestamp': datetime.now().isoformat()
|
||||
}
|
||||
|
||||
def get_key_status(key):
|
||||
"""Get the current status of an API key."""
|
||||
return st.session_state.wizard_state['api_keys_status'].get(key, {})
|
||||
|
||||
def get_progress():
|
||||
"""Get the current setup progress."""
|
||||
return st.session_state.wizard_state['setup_progress']
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user