ALwrity Version 0.5.0 (Fastapi + React )

This commit is contained in:
ajaysi
2025-08-06 12:48:02 +05:30
parent f28a919caa
commit 32f97fa6b3
476 changed files with 115544 additions and 28747 deletions

View File

@@ -572,6 +572,7 @@ def render_ai_content_strategy():
budget = st.selectbox(
"Monthly Content Budget",
[
"No budget",
"Under $1,000",
"$1,000 - $5,000",
"$5,000 - $10,000",

View File

@@ -1,135 +0,0 @@
###################################################
#
# The script covers many SEO factors, including keyword presence, title length,
# meta description, images, img alt text, headings, internal links, external links,
# spelling errors, grammar errors, and readability.
#
##################################################
import re
from bs4 import BeautifulSoup
from textstat import flesch_reading_ease
import spellchecker
class SEOAnalyzer:
def __init__(self, html_content, target_keywords):
self.html_content = html_content
self.target_keywords = target_keywords
def analyze_html_content(self):
try:
soup = BeautifulSoup(self.html_content, 'html.parser')
# Extract and clean text from HTML
text = ' '.join(soup.stripped_strings)
text = re.sub(r'\s+', ' ', text)
# Calculate keyword density
keyword_density = {}
for keyword in self.target_keywords:
keyword_density[keyword] = (text.lower().count(keyword.lower()) / len(text.split())) * 100
# Check for the presence of keywords in the title
title_tag = soup.find('title')
title_text = title_tag.text.lower() if title_tag else ''
keyword_presence_in_title = {keyword: keyword.lower() in title_text for keyword in self.target_keywords}
# Check for the presence of images and keywords in image alt text
images = soup.find_all('img')
img_alt_text = [img.get('alt', '').lower() for img in images]
keyword_presence_in_img_alt_text = {keyword: any(keyword.lower() in alt_text for alt_text in img_alt_text) for keyword in self.target_keywords}
# Check for the presence of headings
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
headings_text = ' '.join(heading.text.lower() for heading in headings)
# Check for the presence of internal and external links
internal_links = len([link for link in soup.find_all('a') if '#' not in link.get('href', '')])
external_links = len([link for link in soup.find_all('a') if 'http' in link.get('href', '')])
# Calculate readability score
readability_score = flesch_reading_ease(text)
# Check for spelling and grammar errors
spell = spellchecker.SpellChecker()
spelling_errors = len(spell.unknown(text.split()))
grammar_errors = len(spell.check_grammar(text))
# Calculate SEO score
seo_score = 0
# Check for the presence of relevant keywords
for keyword in self.target_keywords:
if keyword in text.lower():
seo_score += 1
# Check for title length
title_length = len(title_text.split()) if title_text else 0
recommended_title_length = (50, 70)
if recommended_title_length[0] <= title_length <= recommended_title_length[1]:
seo_score += 1
# Generate suggestions for improvement
suggestions = []
if seo_score < 5:
suggestions.append("Add more relevant keywords to your HTML content.")
suggestions.append("Make sure your title contains keywords.")
suggestions.append("Add keywords to image alt text.")
suggestions.append("Add headings to your HTML content.")
suggestions.append("Add internal links to your HTML content.")
return {
'Keyword Density': keyword_density,
'Keyword Presence in Title': keyword_presence_in_title,
'Keyword Presence in Image Alt Text': keyword_presence_in_img_alt_text,
'Headings Text': headings_text,
'Internal Links': internal_links,
'External Links': external_links,
'Readability Score': readability_score,
'Spelling Errors': spelling_errors,
'Grammar Errors': grammar_errors,
'SEO Score': seo_score,
'Suggestions': suggestions
}
except Exception as e:
return {'error': str(e)}
# Example usage:
if __name__ == "__main__":
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>SEO Analyzer - Sample Page</title>
<meta name="description" content="This is a sample page for SEO analysis.">
</head>
<body>
<h1>Welcome to the SEO Analyzer</h1>
<p>This is a sample page with some sample content for SEO analysis. It mentions the target keywords SEO, keywords, and content.</p>
<img src="image1.jpg" alt="SEO image">
<img src="image2.jpg" alt="Keywords image">
</body>
</html>
"""
keywords = ['SEO', 'keywords', 'content'] # Replace with your target keywords
seo_analyzer = SEOAnalyzer(html_content, keywords)
results = seo_analyzer.analyze_html_content()
print("SEO Analysis Results:")
print(f"Keyword Density: {results['Keyword Density']}")
print(f"Keyword Presence in Title: {results['Keyword Presence in Title']}")
print(f"Keyword Presence in Image Alt Text: {results['Keyword Presence in Image Alt Text']}")
print(f"Headings Text: {results['Headings Text']}")
print(f"Internal Links: {results['Internal Links']}")
print(f"External Links: {results['External Links']}")
print(f"Readability Score: {results['Readability Score']}")
print(f"Spelling Errors: {results['Spelling Errors']}")
print(f"Grammar Errors: {results['Grammar Errors']}")
print(f"SEO Score: {results['SEO Score']}")
print("Suggestions:")
for suggestion in results['Suggestions']:
print(suggestion)

View File

@@ -1,182 +0,0 @@
# Content Gap Analysis Tool
A comprehensive AI-powered tool for analyzing content gaps and generating strategic content recommendations.
## Overview
The Content Gap Analysis tool combines multiple SEO tools to provide a complete analysis of your content strategy, identify opportunities, and generate actionable recommendations. It leverages existing AI SEO tools and adds new capabilities for comprehensive content analysis.
## Workflow Design
### 1. Website Analysis
**Input:** Website URL
**Tools Integration:**
- `analyze_onpage_seo()`: Analyze content quality and structure
- `url_seo_checker()`: Check technical SEO aspects
- `google_pagespeed_insights()`: Assess page performance
**Analysis Components:**
- Content structure mapping
- Topic categorization
- Content depth assessment
- Performance metrics
### 2. Competitor Analysis
**Input:** Competitor URLs
**Tools Integration:**
- `url_seo_checker()`: Analyze competitor URLs
- `analyze_onpage_seo()`: Compare content quality
- `ai_title_generator()`: Analyze title patterns
**Analysis Components:**
- Content strategy comparison
- Topic coverage gaps
- Content format analysis
- Title pattern analysis
### 3. Keyword Research
**Input:** Industry/Niche
**Tools Integration:**
- `ai_title_generator()`: Generate keyword-based titles
- `metadesc_generator_main()`: Analyze meta descriptions for keyword usage
- `ai_structured_data()`: Check structured data implementation
**Analysis Components:**
- Keyword opportunity identification
- Search intent analysis
- Content format suggestions
- Topic clustering
### 4. AI-Powered Recommendations
**Tools Integration:**
- `ai_title_generator()`: Generate content titles
- `metadesc_generator_main()`: Create content summaries
- `ai_structured_data()`: Suggest structured data implementation
**Output Components:**
- Content topic suggestions
- Format recommendations
- Priority scoring
- Implementation timeline
## Implementation Plan
### Phase 1: Core Infrastructure
1. Create base classes and interfaces
2. Implement data collection modules
3. Set up AI model integration
4. Develop data storage system
### Phase 2: Tool Integration
1. Integrate existing SEO tools
2. Create unified API for tool interaction
3. Implement data sharing between tools
4. Develop result aggregation system
### Phase 3: Analysis Engine
1. Implement content structure analysis
2. Develop competitor analysis algorithms
3. Create keyword research system
4. Build recommendation engine
### Phase 4: UI/UX Development
1. Create step-by-step workflow interface
2. Implement progress tracking
3. Develop visualization components
4. Add export functionality
## Technical Requirements
### Dependencies
- Existing SEO tools from `lib/ai_seo_tools/`
- AI models for content analysis
- Web scraping capabilities
- Data storage system
### File Structure
```
content_gap_analysis/
├── __init__.py
├── main.py
├── website_analyzer.py
├── competitor_analyzer.py
├── keyword_researcher.py
├── recommendation_engine.py
├── utils/
│ ├── __init__.py
│ ├── data_collector.py
│ ├── content_parser.py
│ └── ai_processor.py
└── tests/
├── __init__.py
├── test_website_analyzer.py
├── test_competitor_analyzer.py
└── test_keyword_researcher.py
```
## Integration Points
### Existing Tools
1. **On-Page SEO Analyzer**
- Function: `analyze_onpage_seo()`
- Purpose: Content quality assessment
- Integration: Content structure analysis
2. **URL SEO Checker**
- Function: `url_seo_checker()`
- Purpose: Technical optimization
- Integration: URL structure analysis
3. **Blog Title Generator**
- Function: `ai_title_generator()`
- Purpose: Content ideas
- Integration: Keyword analysis
4. **Meta Description Generator**
- Function: `metadesc_generator_main()`
- Purpose: Content summaries
- Integration: Content optimization
5. **Structured Data Generator**
- Function: `ai_structured_data()`
- Purpose: Rich snippets
- Integration: Content enhancement
### New Components
1. **Content Structure Analyzer**
- Purpose: Map website content structure
- Output: Content hierarchy and relationships
2. **Competitor Content Analyzer**
- Purpose: Analyze competitor content strategy
- Output: Content gaps and opportunities
3. **Keyword Opportunity Finder**
- Purpose: Identify keyword gaps
- Output: Keyword recommendations
4. **AI Recommendation Engine**
- Purpose: Generate content recommendations
- Output: Actionable content strategy
## Future Enhancements
1. **Advanced Analytics**
- Content performance tracking
- ROI analysis
- Trend prediction
2. **Automation Features**
- Automated content planning
- Schedule generation
- Priority scoring
3. **Integration Expansion**
- CMS integration
- Analytics platform connection
- Social media analysis
4. **AI Improvements**
- Advanced topic modeling
- Sentiment analysis
- Content quality scoring

View File

@@ -1,36 +0,0 @@
"""
Content Gap Analysis Tool for Alwrity.
"""
from .ui import ContentGapAnalysisUI
from .main import ContentGapAnalysis
from .keyword_researcher import KeywordResearcher
from .competitor_analyzer import CompetitorAnalyzer
from .website_analyzer import WebsiteAnalyzer
from .recommendation_engine import RecommendationEngine
from .utils.ai_processor import AIProcessor
__all__ = [
'ContentGapAnalysisUI',
'ContentGapAnalysis',
'KeywordResearcher',
'CompetitorAnalyzer',
'WebsiteAnalyzer',
'RecommendationEngine',
'AIProcessor'
]
def run_content_gap_analysis():
"""Run the Content Gap Analysis tool."""
# Initialize the UI with proper configuration
ui = ContentGapAnalysisUI()
# Set up the page configuration
st.set_page_config(
page_title="Content Gap Analysis",
page_icon="📊",
layout="wide"
)
# Run the UI
ui.run()

View File

@@ -1,711 +0,0 @@
"""
Competitor analyzer for content gap analysis.
"""
from typing import Dict, Any, List, Optional
import streamlit as st
from collections import Counter, defaultdict
from loguru import logger
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
from lib.ai_seo_tools.content_gap_analysis.utils.data_collector import DataCollector
from lib.ai_seo_tools.content_gap_analysis.utils.content_parser import ContentParser
from lib.ai_seo_tools.content_gap_analysis.utils.ai_processor import AIProcessor, ProgressTracker
import asyncio
import sys
import os
import json
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
# Configure logger
logger.remove() # Remove default handler
logger.add(
"logs/competitor_analyzer.log",
rotation="50 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
# Ensure logs directory exists
os.makedirs("logs", exist_ok=True)
class CompetitorAnalyzer:
"""Analyzes competitor content and market position."""
def __init__(self):
"""Initialize the competitor analyzer."""
self.website_analyzer = WebsiteAnalyzer()
self.ai_processor = AIProcessor()
self.progress = ProgressTracker()
# Define analysis stages
self.stages = {
'competitor_analysis': {
'name': 'Competitor Analysis',
'steps': [
'Initializing competitor analysis',
'Analyzing competitor content',
'Evaluating market position',
'Identifying content gaps',
'Generating competitive insights'
]
}
}
logger.info("CompetitorAnalyzer initialized")
def analyze(self, competitor_urls: List[str], industry: str) -> Dict[str, Any]:
"""
Analyze competitor websites.
Args:
competitor_urls: List of competitor URLs to analyze
industry: Industry category
Returns:
Dictionary containing competitor analysis results
"""
try:
results = {
'competitors': [],
'market_position': {},
'content_gaps': [],
'advantages': []
}
# Analyze each competitor
for url in competitor_urls:
competitor_analysis = self.website_analyzer.analyze_website(url)
if competitor_analysis.get('success', False):
results['competitors'].append({
'url': url,
'analysis': competitor_analysis['data']
})
# Generate market position analysis using AI
prompt = f"""Analyze the market position of competitors in the {industry} industry:
Competitor Analyses:
{json.dumps(results['competitors'], indent=2)}
Provide:
1. Market position analysis
2. Content gaps
3. Competitive advantages
Format the response as JSON with 'market_position', 'content_gaps', and 'advantages' keys."""
# Get AI analysis
analysis = llm_text_gen(
prompt=prompt,
system_prompt="You are an SEO expert specializing in competitive analysis.",
response_format="json_object"
)
if analysis:
results['market_position'] = analysis.get('market_position', {})
results['content_gaps'] = analysis.get('content_gaps', [])
results['advantages'] = analysis.get('advantages', [])
return results
except Exception as e:
error_msg = f"Error analyzing competitors: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
'error': error_msg,
'competitors': [],
'market_position': {},
'content_gaps': [],
'advantages': []
}
def _analyze_competitor_content(self, competitor_urls: List[str]) -> Dict[str, Any]:
"""Analyze competitor content."""
try:
content_analysis = {}
for url in competitor_urls:
# Get AI analysis for each competitor
analysis = self.ai_processor.analyze_content({
'url': url,
'content': {} # Content will be fetched by AI processor
})
content_analysis[url] = {
'content_metrics': analysis.get('content_metrics', {}),
'content_evolution': analysis.get('content_evolution', {}),
'topic_trends': analysis.get('topic_trends', {}),
'performance_trends': analysis.get('performance_trends', {})
}
return content_analysis
except Exception as e:
st.error(f"Error analyzing competitor content: {str(e)}")
return {}
def _evaluate_market_position(self, content_analysis: Dict[str, Any], industry: str) -> Dict[str, Any]:
"""Evaluate market position."""
try:
market_position = {
'industry_rank': 0,
'content_quality_rank': 0,
'market_share': 0,
'competitive_advantages': [],
'competitive_disadvantages': []
}
# Calculate industry rank based on content quality
content_quality_scores = [
analysis.get('content_metrics', {}).get('quality_score', 0)
for analysis in content_analysis.values()
]
if content_quality_scores:
market_position['content_quality_rank'] = sum(content_quality_scores) / len(content_quality_scores)
# Identify competitive advantages and disadvantages
for url, analysis in content_analysis.items():
quality_score = analysis.get('content_metrics', {}).get('quality_score', 0)
if quality_score > market_position['content_quality_rank']:
market_position['competitive_advantages'].append({
'url': url,
'advantage': 'Higher content quality',
'score': quality_score
})
elif quality_score < market_position['content_quality_rank']:
market_position['competitive_disadvantages'].append({
'url': url,
'disadvantage': 'Lower content quality',
'score': quality_score
})
return market_position
except Exception as e:
st.error(f"Error evaluating market position: {str(e)}")
return {}
def _identify_content_gaps(self, content_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Identify content gaps."""
try:
content_gaps = []
# Analyze content coverage
all_topics = set()
for analysis in content_analysis.values():
topics = analysis.get('topic_trends', {}).get('topics', [])
all_topics.update(topics)
# Identify missing topics for each competitor
for url, analysis in content_analysis.items():
covered_topics = set(analysis.get('topic_trends', {}).get('topics', []))
missing_topics = all_topics - covered_topics
if missing_topics:
content_gaps.append({
'url': url,
'missing_topics': list(missing_topics),
'gap_type': 'topic_coverage'
})
return content_gaps
except Exception as e:
st.error(f"Error identifying content gaps: {str(e)}")
return []
def _generate_competitive_insights(self, content_analysis: Dict[str, Any], market_position: Dict[str, Any], content_gaps: List[Dict[str, Any]]) -> List[str]:
"""Generate competitive insights."""
try:
insights = []
# Market position insights
if market_position.get('content_quality_rank', 0) > 80:
insights.append("Strong market position with high content quality")
elif market_position.get('content_quality_rank', 0) > 60:
insights.append("Moderate market position with room for improvement")
else:
insights.append("Weak market position requiring significant improvement")
# Content gap insights
if content_gaps:
insights.append(f"Identified {len(content_gaps)} content gaps across competitors")
# Competitive advantage insights
if market_position.get('competitive_advantages'):
insights.append(f"Found {len(market_position['competitive_advantages'])} competitive advantages")
return insights
except Exception as e:
st.error(f"Error generating competitive insights: {str(e)}")
return []
def _run_seo_analysis(self, url: str) -> dict:
"""
Run SEO analysis on competitor website.
Args:
url (str): The URL to analyze
Returns:
dict: SEO analysis results
"""
# Run website analysis using the new analyzer
analysis = self.website_analyzer.analyze_website(url)
if not analysis.get('success', False):
return {
'error': analysis.get('error', 'Unknown error in SEO analysis'),
'onpage_seo': {},
'url_seo': {}
}
# Extract SEO information from the analysis
seo_info = analysis['data']['analysis']['seo_info']
basic_info = analysis['data']['analysis']['basic_info']
return {
'onpage_seo': {
'meta_tags': seo_info.get('meta_tags', {}),
'content': seo_info.get('content', {}),
'recommendations': seo_info.get('recommendations', [])
},
'url_seo': {
'title': basic_info.get('title', ''),
'meta_description': basic_info.get('meta_description', ''),
'has_robots_txt': bool(basic_info.get('robots_txt')),
'has_sitemap': bool(basic_info.get('sitemap'))
}
}
def _analyze_title_patterns(self, url: str) -> dict:
"""
Analyze title patterns using the title generator.
Args:
url (str): The URL to analyze
Returns:
dict: Title pattern analysis results
"""
# Use title generator to analyze patterns
title_analysis = ai_title_generator(url)
return {
'patterns': title_analysis.get('patterns', {}),
'suggestions': title_analysis.get('suggestions', [])
}
def _compare_competitors(self, results: dict) -> dict:
"""
Compare results across all competitors.
Args:
results (dict): Analysis results for all competitors
Returns:
dict: Comparative analysis results
"""
comparison = {
'content_comparison': self._compare_content(results),
'seo_comparison': self._compare_seo(results),
'title_comparison': self._compare_titles(results),
'performance_metrics': self._compare_performance(results),
'content_gaps': self._identify_content_gaps(results)
}
# Add AI-enhanced insights
comparison['ai_insights'] = self.ai_processor.analyze_competitor_comparison(comparison)
return comparison
def _compare_content(self, results: dict) -> dict:
"""Compare content structure across competitors."""
content_comparison = {
'topic_distribution': self._analyze_topic_distribution(results),
'content_depth': self._analyze_content_depth(results),
'content_formats': self._analyze_content_formats(results),
'content_quality': self._analyze_content_quality(results)
}
return content_comparison
def _analyze_topic_distribution(self, results: dict) -> dict:
"""Analyze topic distribution across competitors."""
all_topics = []
topic_frequency = Counter()
for url, data in results.items():
topics = data['content_structure'].get('topics', [])
all_topics.extend([t['topic'] for t in topics])
topic_frequency.update([t['topic'] for t in topics])
return {
'common_topics': [topic for topic, count in topic_frequency.most_common(10)],
'unique_topics': list(set(all_topics)),
'topic_frequency': dict(topic_frequency.most_common()),
'topic_coverage': len(set(all_topics)) / len(all_topics) if all_topics else 0
}
def _analyze_content_depth(self, results: dict) -> dict:
"""Analyze content depth across competitors."""
depth_metrics = {
'word_counts': {},
'section_counts': {},
'heading_distribution': defaultdict(list),
'content_hierarchy': {}
}
for url, data in results.items():
content_structure = data['content_structure']
# Word count analysis
depth_metrics['word_counts'][url] = content_structure.get('text_statistics', {}).get('word_count', 0)
# Section analysis
depth_metrics['section_counts'][url] = len(content_structure.get('sections', []))
# Heading distribution
for level, count in content_structure.get('hierarchy', {}).get('heading_distribution', {}).items():
depth_metrics['heading_distribution'][level].append(count)
# Content hierarchy
depth_metrics['content_hierarchy'][url] = content_structure.get('hierarchy', {})
return depth_metrics
def _analyze_content_formats(self, results: dict) -> dict:
"""Analyze content formats across competitors."""
format_analysis = {
'format_types': defaultdict(int),
'format_distribution': defaultdict(list),
'format_effectiveness': {}
}
for url, data in results.items():
sections = data['content_structure'].get('sections', [])
for section in sections:
format_type = section.get('type', 'unknown')
format_analysis['format_types'][format_type] += 1
format_analysis['format_distribution'][format_type].append({
'url': url,
'heading': section.get('heading', ''),
'word_count': section.get('word_count', 0)
})
return format_analysis
def _analyze_content_quality(self, results: dict) -> dict:
"""Analyze content quality across competitors."""
quality_metrics = {
'readability_scores': {},
'content_structure_scores': {},
'engagement_metrics': {},
'overall_quality': {}
}
for url, data in results.items():
content_structure = data['content_structure']
# Readability analysis
readability = content_structure.get('readability', {})
quality_metrics['readability_scores'][url] = {
'flesch_score': readability.get('flesch_score', 0),
'avg_sentence_length': readability.get('avg_sentence_length', 0),
'avg_word_length': readability.get('avg_word_length', 0)
}
# Structure analysis
hierarchy = content_structure.get('hierarchy', {})
quality_metrics['content_structure_scores'][url] = {
'has_proper_hierarchy': hierarchy.get('has_proper_hierarchy', False),
'heading_distribution': hierarchy.get('heading_distribution', {}),
'max_depth': hierarchy.get('max_depth', 0)
}
return quality_metrics
def _compare_seo(self, results: dict) -> dict:
"""Compare SEO metrics across competitors."""
seo_comparison = {
'onpage_metrics': defaultdict(list),
'technical_metrics': defaultdict(list),
'content_metrics': defaultdict(list),
'overall_seo_score': {}
}
for url, data in results.items():
seo_info = data.get('website_analysis', {}).get('analysis', {}).get('seo_info', {})
# On-page SEO metrics
meta_tags = seo_info.get('meta_tags', {})
seo_comparison['onpage_metrics']['title_score'].append(
100 if meta_tags.get('title', {}).get('status') == 'good' else 50
)
seo_comparison['onpage_metrics']['description_score'].append(
100 if meta_tags.get('description', {}).get('status') == 'good' else 50
)
seo_comparison['onpage_metrics']['keywords_score'].append(
100 if meta_tags.get('keywords', {}).get('status') == 'good' else 50
)
# Technical SEO metrics
technical = data.get('website_analysis', {}).get('analysis', {}).get('basic_info', {})
seo_comparison['technical_metrics']['has_robots_txt'].append(
100 if technical.get('robots_txt') else 0
)
seo_comparison['technical_metrics']['has_sitemap'].append(
100 if technical.get('sitemap') else 0
)
# Content SEO metrics
content = seo_info.get('content', {})
seo_comparison['content_metrics']['readability_score'].append(
content.get('readability_score', 0)
)
seo_comparison['content_metrics']['content_quality_score'].append(
content.get('content_quality_score', 0)
)
# Overall SEO score
seo_comparison['overall_seo_score'][url] = seo_info.get('overall_score', 0)
return seo_comparison
def _compare_titles(self, results: dict) -> dict:
"""Compare title patterns across competitors."""
title_comparison = {
'pattern_distribution': defaultdict(int),
'length_distribution': defaultdict(list),
'keyword_usage': defaultdict(int),
'format_preferences': defaultdict(int)
}
for url, data in results.items():
title_patterns = data['title_patterns']
# Pattern analysis
for pattern in title_patterns.get('patterns', {}):
title_comparison['pattern_distribution'][pattern] += 1
# Length analysis
for suggestion in title_patterns.get('suggestions', []):
title_comparison['length_distribution'][len(suggestion)].append(suggestion)
# Keyword analysis
for suggestion in title_patterns.get('suggestions', []):
words = suggestion.lower().split()
for word in words:
if len(word) > 3: # Filter out short words
title_comparison['keyword_usage'][word] += 1
return title_comparison
def _compare_performance(self, results: dict) -> dict:
"""Compare performance metrics across competitors."""
performance_metrics = {
'content_effectiveness': {},
'engagement_metrics': {},
'technical_performance': {},
'overall_performance': {}
}
for url, data in results.items():
# Content effectiveness
content_structure = data['content_structure']
performance_metrics['content_effectiveness'][url] = {
'content_depth': content_structure.get('text_statistics', {}).get('word_count', 0),
'content_quality': content_structure.get('readability', {}).get('flesch_score', 0),
'content_structure': content_structure.get('hierarchy', {}).get('has_proper_hierarchy', False)
}
# Technical performance
seo_analysis = data['seo_analysis']
performance_metrics['technical_performance'][url] = {
'onpage_score': sum(1 for v in seo_analysis.get('onpage_seo', {}).values() if v),
'technical_score': sum(1 for v in seo_analysis.get('url_seo', {}).values() if v)
}
return performance_metrics
def _find_missing_topics(self, results: dict) -> List[Dict[str, Any]]:
"""Find topics that are missing or underrepresented."""
all_topics = set()
topic_coverage = defaultdict(int)
# Collect all topics and their coverage
for url, data in results.items():
topics = data['content_structure'].get('topics', [])
for topic in topics:
all_topics.add(topic['topic'])
topic_coverage[topic['topic']] += 1
# Identify missing or underrepresented topics
missing_topics = []
total_competitors = len(results)
for topic in all_topics:
coverage = topic_coverage[topic] / total_competitors
if coverage < 0.5: # Topic covered by less than 50% of competitors
missing_topics.append({
'topic': topic,
'coverage': coverage,
'opportunity_score': 1 - coverage
})
return sorted(missing_topics, key=lambda x: x['opportunity_score'], reverse=True)
def _identify_opportunities(self, results: dict) -> List[Dict[str, Any]]:
"""Identify content opportunities based on analysis."""
opportunities = []
# Analyze content depth opportunities
depth_metrics = self._analyze_content_depth(results)
avg_word_count = sum(depth_metrics['word_counts'].values()) / len(depth_metrics['word_counts'])
for url, word_count in depth_metrics['word_counts'].items():
if word_count < avg_word_count * 0.7: # Content depth significantly below average
opportunities.append({
'type': 'content_depth',
'url': url,
'current_value': word_count,
'target_value': avg_word_count,
'opportunity_score': (avg_word_count - word_count) / avg_word_count
})
# Analyze format opportunities
format_analysis = self._analyze_content_formats(results)
for format_type, distribution in format_analysis['format_distribution'].items():
if len(distribution) < len(results) * 0.3: # Format used by less than 30% of competitors
opportunities.append({
'type': 'content_format',
'format': format_type,
'current_coverage': len(distribution) / len(results),
'opportunity_score': 1 - (len(distribution) / len(results))
})
return sorted(opportunities, key=lambda x: x['opportunity_score'], reverse=True)
def _analyze_format_gaps(self, results: dict) -> List[Dict[str, Any]]:
"""Analyze gaps in content formats."""
format_gaps = []
format_analysis = self._analyze_content_formats(results)
# Identify underutilized formats
for format_type, count in format_analysis['format_types'].items():
if count < len(results) * 0.3: # Format used by less than 30% of competitors
format_gaps.append({
'format': format_type,
'current_usage': count,
'potential_impact': 'high' if count < len(results) * 0.2 else 'medium',
'suggested_implementation': self._generate_format_suggestions(format_type)
})
return format_gaps
def _analyze_quality_gaps(self, results: dict) -> List[Dict[str, Any]]:
"""Analyze gaps in content quality."""
quality_gaps = []
quality_metrics = self._analyze_content_quality(results)
# Analyze readability gaps
readability_scores = quality_metrics['readability_scores']
avg_flesch = sum(score['flesch_score'] for score in readability_scores.values()) / len(readability_scores)
for url, scores in readability_scores.items():
if scores['flesch_score'] < avg_flesch * 0.8: # Readability significantly below average
quality_gaps.append({
'type': 'readability',
'url': url,
'current_score': scores['flesch_score'],
'target_score': avg_flesch,
'improvement_needed': avg_flesch - scores['flesch_score']
})
return quality_gaps
def _analyze_seo_gaps(self, results: dict) -> List[Dict[str, Any]]:
"""Analyze gaps in SEO implementation."""
seo_gaps = []
seo_comparison = self._compare_seo(results)
# Analyze on-page SEO gaps
for metric, values in seo_comparison['onpage_metrics'].items():
avg_value = sum(values) / len(values)
for url, value in zip(results.keys(), values):
if value < avg_value * 0.7: # Significantly below average
seo_gaps.append({
'type': 'onpage_seo',
'metric': metric,
'url': url,
'current_value': value,
'target_value': avg_value,
'improvement_needed': avg_value - value
})
# Analyze technical SEO gaps
for metric, values in seo_comparison['technical_metrics'].items():
avg_value = sum(values) / len(values)
for url, value in zip(results.keys(), values):
if value < avg_value * 0.7: # Significantly below average
seo_gaps.append({
'type': 'technical_seo',
'metric': metric,
'url': url,
'current_value': value,
'target_value': avg_value,
'improvement_needed': avg_value - value
})
# Analyze content SEO gaps
for metric, values in seo_comparison['content_metrics'].items():
avg_value = sum(values) / len(values)
for url, value in zip(results.keys(), values):
if value < avg_value * 0.7: # Significantly below average
seo_gaps.append({
'type': 'content_seo',
'metric': metric,
'url': url,
'current_value': value,
'target_value': avg_value,
'improvement_needed': avg_value - value
})
return seo_gaps
def _generate_format_suggestions(self, format_type: str) -> List[str]:
"""Generate suggestions for implementing specific content formats."""
format_suggestions = {
'article': [
'Create in-depth articles with comprehensive coverage',
'Include expert quotes and statistics',
'Add visual elements and infographics'
],
'blog_post': [
'Write engaging blog posts with personal insights',
'Include call-to-actions',
'Add social sharing buttons'
],
'how-to': [
'Create step-by-step guides',
'Include screenshots or videos',
'Add troubleshooting sections'
],
'case_study': [
'Present real-world examples',
'Include metrics and results',
'Add client testimonials'
]
}
return format_suggestions.get(format_type, [
'Research successful examples',
'Analyze competitor implementation',
'Create unique value proposition'
])

View File

@@ -1,674 +0,0 @@
"""
Enhanced Content Gap Analysis with Advertools Integration and AI Insights.
This module provides comprehensive content gap analysis using:
- adv.serp_goog: Competitor SERP analysis
- adv.kw_generate: Keyword research expansion
- adv.crawl: Deep competitor content analysis
- adv.word_frequency: Content theme identification
- llm_text_gen: AI-powered insights and recommendations
"""
import streamlit as st
import pandas as pd
import advertools as adv
from typing import Dict, Any, List, Optional, Tuple
from urllib.parse import urlparse
import tempfile
import os
from datetime import datetime
import asyncio
import json
from collections import Counter, defaultdict
from loguru import logger
# Import existing modules
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
from .utils.ai_processor import AIProcessor, ProgressTracker
class EnhancedContentGapAnalyzer:
"""Enhanced content gap analyzer with advertools and AI integration."""
def __init__(self):
"""Initialize the enhanced analyzer."""
self.website_analyzer = WebsiteAnalyzer()
self.ai_processor = AIProcessor()
self.progress = ProgressTracker()
# Temporary directories for crawl data
self.temp_dir = tempfile.mkdtemp()
logger.info("EnhancedContentGapAnalyzer initialized")
def analyze_comprehensive_gap(self, target_url: str, competitor_urls: List[str],
target_keywords: List[str], industry: str = "general") -> Dict[str, Any]:
"""
Perform comprehensive content gap analysis.
Args:
target_url: Your website URL
competitor_urls: List of competitor URLs (max 5 for performance)
target_keywords: List of primary keywords to analyze
industry: Industry category for context
Returns:
Comprehensive analysis results
"""
try:
st.info("🚀 Starting Enhanced Content Gap Analysis...")
# Initialize results structure
results = {
'analysis_timestamp': datetime.utcnow().isoformat(),
'target_url': target_url,
'competitor_urls': competitor_urls[:5], # Limit to 5 competitors
'target_keywords': target_keywords,
'industry': industry,
'serp_analysis': {},
'keyword_expansion': {},
'competitor_content': {},
'content_themes': {},
'gap_analysis': {},
'ai_insights': {},
'recommendations': []
}
# Phase 1: SERP Analysis using adv.serp_goog
with st.expander("🔍 SERP Analysis Progress", expanded=True):
serp_results = self._analyze_serp_landscape(target_keywords, competitor_urls)
results['serp_analysis'] = serp_results
st.success(f"✅ Analyzed {len(target_keywords)} keywords across SERPs")
# Phase 2: Keyword Expansion using adv.kw_generate
with st.expander("🎯 Keyword Research Expansion", expanded=True):
expanded_keywords = self._expand_keyword_research(target_keywords, industry)
results['keyword_expansion'] = expanded_keywords
st.success(f"✅ Generated {len(expanded_keywords.get('expanded_keywords', []))} additional keywords")
# Phase 3: Deep Competitor Analysis using adv.crawl
with st.expander("🕷️ Deep Competitor Content Analysis", expanded=True):
competitor_content = self._analyze_competitor_content_deep(competitor_urls)
results['competitor_content'] = competitor_content
st.success(f"✅ Crawled and analyzed {len(competitor_urls)} competitor websites")
# Phase 4: Content Theme Analysis using adv.word_frequency
with st.expander("📊 Content Theme & Gap Identification", expanded=True):
content_themes = self._analyze_content_themes(results['competitor_content'])
results['content_themes'] = content_themes
st.success("✅ Identified content themes and topic clusters")
# Phase 5: AI-Powered Gap Analysis and Insights
with st.expander("🤖 AI-Powered Insights Generation", expanded=True):
ai_insights = self._generate_ai_insights(results)
results['ai_insights'] = ai_insights
results['recommendations'] = ai_insights.get('recommendations', [])
st.success("✅ Generated AI-powered insights and recommendations")
return results
except Exception as e:
error_msg = f"Error in comprehensive gap analysis: {str(e)}"
logger.error(error_msg, exc_info=True)
st.error(error_msg)
return {'error': error_msg}
def _analyze_serp_landscape(self, keywords: List[str], competitor_urls: List[str]) -> Dict[str, Any]:
"""Analyze SERP landscape using adv.serp_goog."""
try:
st.info("🔍 Analyzing SERP landscape for competitor positions...")
serp_results = {
'keyword_rankings': {},
'competitor_presence': {},
'serp_features': {},
'ranking_opportunities': []
}
# Note: adv.serp_goog requires API key setup
# For demo purposes, we'll simulate SERP analysis
for keyword in keywords[:10]: # Limit to prevent API overuse
try:
# In production, use: serp_data = adv.serp_goog(q=keyword, cx='your_cx', key='your_key')
# For now, we'll create structured placeholder data
serp_results['keyword_rankings'][keyword] = {
'top_10_domains': [urlparse(url).netloc for url in competitor_urls],
'serp_features': ['featured_snippet', 'people_also_ask', 'related_searches'],
'competitor_positions': {
urlparse(url).netloc: f"Position {i+3}" for i, url in enumerate(competitor_urls[:5])
}
}
st.write(f"• Analyzed keyword: '{keyword}'")
except Exception as e:
st.warning(f"Could not analyze SERP for '{keyword}': {str(e)}")
continue
# Analyze competitor SERP presence
domain_counts = Counter()
for keyword_data in serp_results['keyword_rankings'].values():
for domain in keyword_data.get('top_10_domains', []):
domain_counts[domain] += 1
serp_results['competitor_presence'] = dict(domain_counts.most_common(10))
# Identify ranking opportunities
for keyword, data in serp_results['keyword_rankings'].items():
target_domain = urlparse(competitor_urls[0] if competitor_urls else "").netloc
if target_domain not in data.get('competitor_positions', {}):
serp_results['ranking_opportunities'].append({
'keyword': keyword,
'opportunity': 'Not ranking in top 10',
'serp_features': data.get('serp_features', [])
})
return serp_results
except Exception as e:
st.error(f"Error in SERP analysis: {str(e)}")
return {}
def _expand_keyword_research(self, seed_keywords: List[str], industry: str) -> Dict[str, Any]:
"""Expand keyword research using adv.kw_generate."""
try:
st.info("🎯 Expanding keyword research...")
expanded_results = {
'seed_keywords': seed_keywords,
'expanded_keywords': [],
'keyword_categories': {},
'search_intent_analysis': {},
'long_tail_opportunities': []
}
# Use adv.kw_generate for keyword expansion
all_expanded = []
for seed_keyword in seed_keywords[:5]: # Limit to prevent overload
try:
# Generate keyword variations using advertools
broad_keywords = adv.kw_generate(
products=[seed_keyword],
words=["best", "top", "how to", "guide", "tips", "vs", "review", "comparison"],
max_len=4
)
# Add phrase match keywords
phrase_keywords = adv.kw_generate(
products=[seed_keyword],
words=[industry, "strategy", "analysis", "optimization", "techniques"],
max_len=3
)
all_expanded.extend(broad_keywords)
all_expanded.extend(phrase_keywords)
st.write(f"• Generated variations for: '{seed_keyword}'")
except Exception as e:
st.warning(f"Could not expand keyword '{seed_keyword}': {str(e)}")
continue
# Remove duplicates and clean
expanded_results['expanded_keywords'] = list(set(all_expanded))
# Categorize keywords by intent
intent_categories = {
'informational': [],
'commercial': [],
'navigational': [],
'transactional': []
}
for keyword in expanded_results['expanded_keywords']:
keyword_lower = keyword.lower()
if any(word in keyword_lower for word in ['how', 'what', 'why', 'guide', 'tips']):
intent_categories['informational'].append(keyword)
elif any(word in keyword_lower for word in ['best', 'top', 'review', 'comparison']):
intent_categories['commercial'].append(keyword)
elif any(word in keyword_lower for word in ['buy', 'purchase', 'price', 'cost']):
intent_categories['transactional'].append(keyword)
else:
intent_categories['navigational'].append(keyword)
expanded_results['keyword_categories'] = intent_categories
# Identify long-tail opportunities
long_tail = [kw for kw in expanded_results['expanded_keywords'] if len(kw.split()) >= 3]
expanded_results['long_tail_opportunities'] = long_tail[:20] # Top 20 long-tail
return expanded_results
except Exception as e:
st.error(f"Error in keyword expansion: {str(e)}")
return {}
def _analyze_competitor_content_deep(self, competitor_urls: List[str]) -> Dict[str, Any]:
"""Deep competitor content analysis using adv.crawl."""
try:
st.info("🕷️ Performing deep competitor content analysis...")
competitor_analysis = {
'crawl_results': {},
'content_structure': {},
'page_analysis': {},
'technical_insights': {}
}
for i, url in enumerate(competitor_urls[:3]): # Limit to 3 for performance
try:
domain = urlparse(url).netloc
st.write(f"🔍 Analyzing competitor {i+1}: {domain}")
# Create temporary file for crawl results
crawl_file = os.path.join(self.temp_dir, f"crawl_{domain.replace('.', '_')}.jl")
# Use adv.crawl for comprehensive analysis
# Note: This is a simplified crawl - in production, customize settings
adv.crawl(
url_list=[url],
output_file=crawl_file,
follow_links=True,
custom_settings={
'DEPTH_LIMIT': 2, # Crawl 2 levels deep
'CLOSESPIDER_PAGECOUNT': 50, # Limit pages
'DOWNLOAD_DELAY': 1, # Be respectful
}
)
# Read and analyze crawl results
if os.path.exists(crawl_file):
crawl_df = pd.read_json(crawl_file, lines=True)
competitor_analysis['crawl_results'][domain] = {
'total_pages': len(crawl_df),
'status_codes': crawl_df['status'].value_counts().to_dict(),
'page_types': self._categorize_pages(crawl_df),
'content_length_stats': {
'mean': crawl_df['size'].mean() if 'size' in crawl_df.columns else 0,
'median': crawl_df['size'].median() if 'size' in crawl_df.columns else 0
}
}
# Analyze content structure
competitor_analysis['content_structure'][domain] = self._analyze_content_structure(crawl_df)
st.success(f"✅ Crawled {len(crawl_df)} pages from {domain}")
else:
st.warning(f"⚠️ No crawl data available for {domain}")
except Exception as e:
st.warning(f"Could not crawl {url}: {str(e)}")
continue
return competitor_analysis
except Exception as e:
st.error(f"Error in deep competitor analysis: {str(e)}")
return {}
def _analyze_content_themes(self, competitor_content: Dict[str, Any]) -> Dict[str, Any]:
"""Analyze content themes using adv.word_frequency."""
try:
st.info("📊 Analyzing content themes and topics...")
theme_analysis = {
'dominant_themes': {},
'content_clusters': {},
'topic_gaps': [],
'content_opportunities': []
}
all_content_text = ""
# Extract content from crawl results
for domain, crawl_data in competitor_content.get('crawl_results', {}).items():
try:
# In a real implementation, you'd extract text content from crawled pages
# For now, we'll simulate content analysis
# Simulate word frequency analysis using domain and page data
sample_content = f"content marketing seo optimization digital strategy {domain} website analysis competitor research keyword targeting"
all_content_text += " " + sample_content
except Exception as e:
continue
if all_content_text.strip():
# Use adv.word_frequency for theme analysis
word_freq = adv.word_frequency(
text_list=[all_content_text],
phrase_len=2, # Analyze 2-word phrases
rm_words=['the', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by']
)
# Process word frequency results
if not word_freq.empty:
top_themes = word_freq.head(20)
theme_analysis['dominant_themes'] = top_themes.to_dict('records')
# Categorize themes into clusters
theme_analysis['content_clusters'] = self._cluster_themes(top_themes)
st.success("✅ Identified dominant content themes")
return theme_analysis
except Exception as e:
st.error(f"Error in content theme analysis: {str(e)}")
return {}
def _generate_ai_insights(self, analysis_results: Dict[str, Any]) -> Dict[str, Any]:
"""Generate AI-powered insights using llm_text_gen."""
try:
st.info("🤖 Generating AI-powered insights...")
# Prepare analysis summary for AI
analysis_summary = {
'target_url': analysis_results.get('target_url', ''),
'industry': analysis_results.get('industry', ''),
'serp_opportunities': len(analysis_results.get('serp_analysis', {}).get('ranking_opportunities', [])),
'expanded_keywords_count': len(analysis_results.get('keyword_expansion', {}).get('expanded_keywords', [])),
'competitors_analyzed': len(analysis_results.get('competitor_urls', [])),
'dominant_themes': analysis_results.get('content_themes', {}).get('dominant_themes', [])[:10]
}
# Generate comprehensive AI insights
prompt = f"""
As an expert SEO content strategist, analyze this comprehensive content gap analysis data and provide actionable insights:
TARGET ANALYSIS:
- Website: {analysis_summary['target_url']}
- Industry: {analysis_summary['industry']}
- SERP Opportunities: {analysis_summary['serp_opportunities']} keywords not ranking
- Keyword Expansion: {analysis_summary['expanded_keywords_count']} additional keywords identified
- Competitors Analyzed: {analysis_summary['competitors_analyzed']} websites
DOMINANT CONTENT THEMES:
{json.dumps(analysis_summary['dominant_themes'], indent=2)}
PROVIDE:
1. Strategic Content Gap Analysis
2. Priority Content Recommendations (top 5)
3. Keyword Strategy Insights
4. Competitive Positioning Advice
5. Content Format Recommendations
6. Technical SEO Opportunities
7. Implementation Timeline (30/60/90 days)
Format as JSON with clear, actionable recommendations.
"""
ai_response = llm_text_gen(
prompt=prompt,
system_prompt="You are an expert SEO content strategist with 15+ years of experience in content gap analysis and competitive intelligence.",
response_format="json_object"
)
if ai_response:
st.success("✅ Generated comprehensive AI insights")
return ai_response
else:
st.warning("⚠️ Could not generate AI insights")
return {}
except Exception as e:
st.error(f"Error generating AI insights: {str(e)}")
return {}
def _categorize_pages(self, crawl_df: pd.DataFrame) -> Dict[str, int]:
"""Categorize crawled pages by type."""
page_categories = {
'blog_posts': 0,
'product_pages': 0,
'category_pages': 0,
'landing_pages': 0,
'other': 0
}
if 'url' in crawl_df.columns:
for url in crawl_df['url']:
url_lower = url.lower()
if any(indicator in url_lower for indicator in ['/blog/', '/post/', '/article/', '/news/']):
page_categories['blog_posts'] += 1
elif any(indicator in url_lower for indicator in ['/product/', '/item/', '/shop/']):
page_categories['product_pages'] += 1
elif any(indicator in url_lower for indicator in ['/category/', '/collection/', '/browse/']):
page_categories['category_pages'] += 1
elif any(indicator in url_lower for indicator in ['/landing/', '/promo/', '/campaign/']):
page_categories['landing_pages'] += 1
else:
page_categories['other'] += 1
return page_categories
def _analyze_content_structure(self, crawl_df: pd.DataFrame) -> Dict[str, Any]:
"""Analyze content structure from crawl data."""
structure_analysis = {
'avg_title_length': 0,
'avg_meta_desc_length': 0,
'h1_usage': 0,
'internal_links_avg': 0,
'external_links_avg': 0
}
# Analyze available columns
if 'title' in crawl_df.columns:
structure_analysis['avg_title_length'] = crawl_df['title'].str.len().mean()
if 'meta_desc' in crawl_df.columns:
structure_analysis['avg_meta_desc_length'] = crawl_df['meta_desc'].str.len().mean()
# Add more structure analysis based on available crawl data
return structure_analysis
def _cluster_themes(self, themes_df: pd.DataFrame) -> Dict[str, List[str]]:
"""Cluster themes into topic groups."""
clusters = {
'technical_seo': [],
'content_marketing': [],
'business_strategy': [],
'user_experience': [],
'other': []
}
# Simple keyword-based clustering
for _, row in themes_df.iterrows():
word = row.get('word', '') if 'word' in row else str(row.get(0, ''))
word_lower = word.lower()
if any(term in word_lower for term in ['seo', 'optimization', 'ranking', 'search']):
clusters['technical_seo'].append(word)
elif any(term in word_lower for term in ['content', 'marketing', 'blog', 'article']):
clusters['content_marketing'].append(word)
elif any(term in word_lower for term in ['business', 'strategy', 'revenue', 'growth']):
clusters['business_strategy'].append(word)
elif any(term in word_lower for term in ['user', 'experience', 'interface', 'design']):
clusters['user_experience'].append(word)
else:
clusters['other'].append(word)
return clusters
def render_analysis_dashboard(self, results: Dict[str, Any]):
"""Render comprehensive analysis dashboard."""
if not results or 'error' in results:
st.error("❌ Analysis failed or no results available")
return
st.markdown("## 🎯 Enhanced Content Gap Analysis Results")
# Overview metrics
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric(
"Keywords Analyzed",
len(results.get('target_keywords', []))
)
with col2:
st.metric(
"Competitors Crawled",
len(results.get('competitor_urls', []))
)
with col3:
st.metric(
"Expanded Keywords",
len(results.get('keyword_expansion', {}).get('expanded_keywords', []))
)
with col4:
st.metric(
"SERP Opportunities",
len(results.get('serp_analysis', {}).get('ranking_opportunities', []))
)
# Detailed analysis tabs
tab1, tab2, tab3, tab4, tab5 = st.tabs([
"🔍 SERP Analysis",
"🎯 Keyword Research",
"🕷️ Competitor Analysis",
"📊 Content Themes",
"🤖 AI Insights"
])
with tab1:
self._render_serp_analysis(results.get('serp_analysis', {}))
with tab2:
self._render_keyword_analysis(results.get('keyword_expansion', {}))
with tab3:
self._render_competitor_analysis(results.get('competitor_content', {}))
with tab4:
self._render_content_themes(results.get('content_themes', {}))
with tab5:
self._render_ai_insights(results.get('ai_insights', {}))
def _render_serp_analysis(self, serp_data: Dict[str, Any]):
"""Render SERP analysis results."""
st.subheader("🔍 SERP Landscape Analysis")
if not serp_data:
st.info("No SERP analysis data available")
return
# Competitor presence chart
if serp_data.get('competitor_presence'):
st.subheader("🏆 Competitor SERP Presence")
presence_df = pd.DataFrame(
list(serp_data['competitor_presence'].items()),
columns=['Domain', 'Keywords Ranking']
)
st.bar_chart(presence_df.set_index('Domain'))
# Ranking opportunities
if serp_data.get('ranking_opportunities'):
st.subheader("🎯 Ranking Opportunities")
opportunities_df = pd.DataFrame(serp_data['ranking_opportunities'])
st.dataframe(opportunities_df, use_container_width=True)
def _render_keyword_analysis(self, keyword_data: Dict[str, Any]):
"""Render keyword expansion analysis."""
st.subheader("🎯 Keyword Research Expansion")
if not keyword_data:
st.info("No keyword expansion data available")
return
# Keyword categories
if keyword_data.get('keyword_categories'):
st.subheader("📂 Keywords by Search Intent")
for intent, keywords in keyword_data['keyword_categories'].items():
if keywords:
with st.expander(f"{intent.title()} Keywords ({len(keywords)})"):
for kw in keywords[:20]: # Show first 20
st.write(f"{kw}")
# Long-tail opportunities
if keyword_data.get('long_tail_opportunities'):
st.subheader("🎣 Long-tail Opportunities")
long_tail_df = pd.DataFrame(
keyword_data['long_tail_opportunities'],
columns=['Long-tail Keyword']
)
st.dataframe(long_tail_df, use_container_width=True)
def _render_competitor_analysis(self, competitor_data: Dict[str, Any]):
"""Render competitor analysis results."""
st.subheader("🕷️ Deep Competitor Analysis")
if not competitor_data.get('crawl_results'):
st.info("No competitor crawl data available")
return
# Crawl results summary
st.subheader("📊 Crawl Results Summary")
crawl_summary = []
for domain, data in competitor_data['crawl_results'].items():
crawl_summary.append({
'Domain': domain,
'Pages Crawled': data.get('total_pages', 0),
'Avg Content Length': round(data.get('content_length_stats', {}).get('mean', 0))
})
if crawl_summary:
summary_df = pd.DataFrame(crawl_summary)
st.dataframe(summary_df, use_container_width=True)
def _render_content_themes(self, theme_data: Dict[str, Any]):
"""Render content theme analysis."""
st.subheader("📊 Content Theme Analysis")
if not theme_data:
st.info("No content theme data available")
return
# Dominant themes
if theme_data.get('dominant_themes'):
st.subheader("🎯 Dominant Content Themes")
themes_df = pd.DataFrame(theme_data['dominant_themes'])
st.dataframe(themes_df, use_container_width=True)
# Content clusters
if theme_data.get('content_clusters'):
st.subheader("🗂️ Content Topic Clusters")
for cluster, themes in theme_data['content_clusters'].items():
if themes:
with st.expander(f"{cluster.replace('_', ' ').title()} ({len(themes)} themes)"):
for theme in themes[:10]: # Show first 10
st.write(f"{theme}")
def _render_ai_insights(self, ai_data: Dict[str, Any]):
"""Render AI-generated insights."""
st.subheader("🤖 AI-Powered Strategic Insights")
if not ai_data:
st.info("No AI insights available")
return
# Strategic recommendations
if ai_data.get('recommendations'):
st.subheader("🎯 Priority Recommendations")
for i, rec in enumerate(ai_data['recommendations'][:5], 1):
st.markdown(f"**{i}. {rec}**")
# Implementation timeline
if ai_data.get('implementation_timeline'):
st.subheader("📅 Implementation Timeline")
timeline_data = ai_data['implementation_timeline']
for period, tasks in timeline_data.items():
with st.expander(f"{period} Plan"):
for task in tasks:
st.write(f"{task}")

View File

@@ -1,787 +0,0 @@
"""
Enhanced UI for Content Gap Analysis with Advertools Integration.
This module provides a comprehensive Streamlit interface for content gap analysis
using the EnhancedContentGapAnalyzer with advertools and AI insights.
"""
import streamlit as st
import pandas as pd
from typing import Dict, Any, List
import json
from datetime import datetime
import io
import base64
from .enhanced_analyzer import EnhancedContentGapAnalyzer
from lib.alwrity_ui.dashboard_styles import apply_dashboard_style, render_dashboard_header
class EnhancedContentGapAnalysisUI:
"""Enhanced UI for content gap analysis."""
def __init__(self):
"""Initialize the enhanced UI."""
self.analyzer = EnhancedContentGapAnalyzer()
# Apply dashboard styling
apply_dashboard_style()
def render(self):
"""Render the enhanced content gap analysis interface."""
# Enhanced dashboard header
render_dashboard_header(
"🎯 Enhanced Content Gap Analysis",
"Discover content opportunities with AI-powered insights using advertools, SERP analysis, competitor crawling, and strategic recommendations."
)
# Main content area
with st.container():
# Analysis input form
self._render_analysis_form()
# Session state for results
if 'gap_analysis_results' in st.session_state and st.session_state.gap_analysis_results:
st.markdown("---")
self._render_results_dashboard(st.session_state.gap_analysis_results)
def _render_analysis_form(self):
"""Render the analysis input form."""
st.markdown("## 🚀 Setup Your Content Gap Analysis")
with st.form("enhanced_gap_analysis_form"):
# Target website input
col1, col2 = st.columns([2, 1])
with col1:
target_url = st.text_input(
"🎯 Your Website URL",
placeholder="https://yourwebsite.com",
help="Enter your website URL to analyze"
)
with col2:
industry = st.selectbox(
"🏭 Industry",
options=[
"general", "technology", "healthcare", "finance",
"ecommerce", "education", "real estate", "travel",
"food", "fitness", "marketing", "consulting"
],
help="Select your industry for better analysis context"
)
# Competitor URLs
st.markdown("### 🏆 Competitor Analysis")
competitor_urls_text = st.text_area(
"Competitor URLs (one per line, max 5)",
placeholder="https://competitor1.com\nhttps://competitor2.com\nhttps://competitor3.com",
height=120,
help="Enter up to 5 competitor URLs for comprehensive analysis"
)
# Target keywords
st.markdown("### 🎯 Keyword Focus")
target_keywords_text = st.text_input(
"Primary Keywords (comma-separated)",
placeholder="seo, content marketing, digital marketing",
help="Enter your main keywords to analyze and expand"
)
# Analysis options
st.markdown("### ⚙️ Analysis Options")
col1, col2, col3 = st.columns(3)
with col1:
enable_serp = st.checkbox(
"🔍 SERP Analysis",
value=True,
help="Analyze competitor positions in search results"
)
with col2:
enable_crawling = st.checkbox(
"🕷️ Deep Crawling",
value=True,
help="Perform comprehensive competitor content crawling"
)
with col3:
enable_ai_insights = st.checkbox(
"🤖 AI Insights",
value=True,
help="Generate AI-powered strategic recommendations"
)
# Submit button
submitted = st.form_submit_button(
"🚀 Start Enhanced Analysis",
use_container_width=True,
type="primary"
)
if submitted:
# Validate inputs
if not target_url or not target_url.startswith(('http://', 'https://')):
st.error("❌ Please enter a valid target URL starting with http:// or https://")
return
if not target_keywords_text.strip():
st.error("❌ Please enter at least one target keyword")
return
# Process inputs
competitor_urls = [
url.strip() for url in competitor_urls_text.split('\n')
if url.strip() and url.strip().startswith(('http://', 'https://'))
]
if not competitor_urls:
st.error("❌ Please enter at least one valid competitor URL")
return
target_keywords = [
kw.strip() for kw in target_keywords_text.split(',')
if kw.strip()
]
# Run analysis
self._run_enhanced_analysis(
target_url=target_url,
competitor_urls=competitor_urls,
target_keywords=target_keywords,
industry=industry,
options={
'enable_serp': enable_serp,
'enable_crawling': enable_crawling,
'enable_ai_insights': enable_ai_insights
}
)
def _run_enhanced_analysis(self, target_url: str, competitor_urls: List[str],
target_keywords: List[str], industry: str, options: Dict[str, bool]):
"""Run the enhanced content gap analysis."""
try:
with st.spinner("🔄 Running Enhanced Content Gap Analysis..."):
# Initialize progress tracking
progress_bar = st.progress(0)
status_text = st.empty()
# Update progress
progress_bar.progress(10)
status_text.text("🚀 Initializing analysis...")
# Run comprehensive analysis
results = self.analyzer.analyze_comprehensive_gap(
target_url=target_url,
competitor_urls=competitor_urls,
target_keywords=target_keywords,
industry=industry
)
progress_bar.progress(100)
status_text.text("✅ Analysis complete!")
# Store results in session state
st.session_state.gap_analysis_results = results
# Clear progress indicators
progress_bar.empty()
status_text.empty()
if 'error' in results:
st.error(f"❌ Analysis failed: {results['error']}")
else:
st.success("🎉 Enhanced Content Gap Analysis completed successfully!")
st.balloons()
# Rerun to show results
st.rerun()
except Exception as e:
st.error(f"❌ Error running analysis: {str(e)}")
def _render_results_dashboard(self, results: Dict[str, Any]):
"""Render the comprehensive results dashboard."""
if 'error' in results:
st.error(f"❌ Analysis Error: {results['error']}")
return
# Results header
st.markdown("## 📊 Enhanced Content Gap Analysis Results")
# Key metrics overview
self._render_metrics_overview(results)
# Detailed analysis tabs
self._render_detailed_analysis(results)
# Export functionality
self._render_export_options(results)
def _render_metrics_overview(self, results: Dict[str, Any]):
"""Render key metrics overview."""
st.markdown("### 📈 Analysis Overview")
# Create metrics columns
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
st.metric(
"🎯 Keywords Analyzed",
len(results.get('target_keywords', [])),
help="Number of primary keywords analyzed"
)
with col2:
st.metric(
"🏆 Competitors Crawled",
len(results.get('competitor_urls', [])),
help="Number of competitor websites analyzed"
)
with col3:
expanded_keywords = results.get('keyword_expansion', {}).get('expanded_keywords', [])
st.metric(
"🔍 Keywords Discovered",
len(expanded_keywords),
help="Additional keywords discovered through expansion"
)
with col4:
ranking_opportunities = results.get('serp_analysis', {}).get('ranking_opportunities', [])
st.metric(
"🚀 SERP Opportunities",
len(ranking_opportunities),
help="Keywords with ranking opportunities identified"
)
with col5:
recommendations = results.get('recommendations', [])
st.metric(
"💡 AI Recommendations",
len(recommendations),
help="AI-generated strategic recommendations"
)
# Analysis timestamp
if results.get('analysis_timestamp'):
timestamp = datetime.fromisoformat(results['analysis_timestamp'].replace('Z', '+00:00'))
st.caption(f"📅 Analysis completed: {timestamp.strftime('%Y-%m-%d %H:%M:%S UTC')}")
def _render_detailed_analysis(self, results: Dict[str, Any]):
"""Render detailed analysis in tabs."""
# Create main analysis tabs
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
"🔍 SERP Analysis",
"🎯 Keyword Research",
"🕷️ Competitor Intelligence",
"📊 Content Themes",
"🤖 AI Strategic Insights",
"📋 Action Plan"
])
with tab1:
self._render_serp_analysis(results.get('serp_analysis', {}))
with tab2:
self._render_keyword_research(results.get('keyword_expansion', {}))
with tab3:
self._render_competitor_intelligence(results.get('competitor_content', {}))
with tab4:
self._render_content_themes(results.get('content_themes', {}))
with tab5:
self._render_ai_insights(results.get('ai_insights', {}))
with tab6:
self._render_action_plan(results)
def _render_serp_analysis(self, serp_data: Dict[str, Any]):
"""Render SERP analysis results."""
st.markdown("### 🔍 Search Engine Results Analysis")
if not serp_data:
st.info("No SERP analysis data available")
return
# Competitor SERP presence
if serp_data.get('competitor_presence'):
st.markdown("#### 🏆 Competitor SERP Dominance")
presence_data = serp_data['competitor_presence']
presence_df = pd.DataFrame(
list(presence_data.items()),
columns=['Domain', 'Keywords Ranking']
)
# Display as chart
st.bar_chart(presence_df.set_index('Domain'))
# Top performers
st.markdown("**🥇 Top Performing Competitors:**")
for domain, count in list(presence_data.items())[:3]:
st.write(f"• **{domain}**: Ranking for {count} keywords")
# Ranking opportunities
if serp_data.get('ranking_opportunities'):
st.markdown("#### 🚀 Ranking Opportunities")
opportunities = serp_data['ranking_opportunities']
if opportunities:
opp_df = pd.DataFrame(opportunities)
st.dataframe(opp_df, use_container_width=True)
st.info(f"💡 Found {len(opportunities)} keywords where you're not ranking in top 10!")
else:
st.success("🎉 You're already ranking well for your target keywords!")
# SERP features analysis
if serp_data.get('keyword_rankings'):
st.markdown("#### 🎯 SERP Features Opportunities")
all_features = []
for keyword_data in serp_data['keyword_rankings'].values():
all_features.extend(keyword_data.get('serp_features', []))
if all_features:
feature_counts = pd.Series(all_features).value_counts()
st.bar_chart(feature_counts)
st.markdown("**🎯 Focus on these SERP features:**")
for feature, count in feature_counts.head(3).items():
st.write(f"• **{feature.replace('_', ' ').title()}**: Appears in {count} keyword searches")
def _render_keyword_research(self, keyword_data: Dict[str, Any]):
"""Render keyword research results."""
st.markdown("### 🎯 Advanced Keyword Research")
if not keyword_data:
st.info("No keyword expansion data available")
return
# Seed vs expanded keywords
seed_keywords = keyword_data.get('seed_keywords', [])
expanded_keywords = keyword_data.get('expanded_keywords', [])
col1, col2 = st.columns(2)
with col1:
st.metric("🌱 Seed Keywords", len(seed_keywords))
if seed_keywords:
for kw in seed_keywords:
st.write(f"{kw}")
with col2:
st.metric("🔍 Expanded Keywords", len(expanded_keywords))
st.write(f"**Expansion Factor:** {len(expanded_keywords) / len(seed_keywords) if seed_keywords else 0:.1f}x")
# Search intent categorization
if keyword_data.get('keyword_categories'):
st.markdown("#### 🧠 Search Intent Analysis")
categories = keyword_data['keyword_categories']
# Create intent distribution chart
intent_counts = {intent: len(keywords) for intent, keywords in categories.items() if keywords}
if intent_counts:
intent_df = pd.DataFrame(
list(intent_counts.items()),
columns=['Search Intent', 'Keywords']
)
st.bar_chart(intent_df.set_index('Search Intent'))
# Detailed breakdown
for intent, keywords in categories.items():
if keywords:
with st.expander(f"📂 {intent.title()} Keywords ({len(keywords)})"):
for kw in keywords[:20]: # Show first 20
st.write(f"{kw}")
# Long-tail opportunities
if keyword_data.get('long_tail_opportunities'):
st.markdown("#### 🎣 Long-tail Keyword Opportunities")
long_tail = keyword_data['long_tail_opportunities']
if long_tail:
st.info(f"🎯 Found {len(long_tail)} long-tail opportunities with lower competition!")
# Display in expandable format
with st.expander("View Long-tail Keywords"):
for i, kw in enumerate(long_tail, 1):
st.write(f"{i}. {kw}")
else:
st.warning("No long-tail opportunities identified")
def _render_competitor_intelligence(self, competitor_data: Dict[str, Any]):
"""Render competitor intelligence results."""
st.markdown("### 🕷️ Competitive Intelligence")
if not competitor_data.get('crawl_results'):
st.info("No competitor crawl data available")
return
# Crawl summary
crawl_results = competitor_data['crawl_results']
st.markdown("#### 📊 Competitor Content Overview")
# Create summary table
summary_data = []
for domain, data in crawl_results.items():
summary_data.append({
'Competitor': domain,
'Pages Crawled': data.get('total_pages', 0),
'Avg Content Length': f"{data.get('content_length_stats', {}).get('mean', 0):,.0f} chars",
'Success Rate': f"{data.get('status_codes', {}).get(200, 0) / data.get('total_pages', 1) * 100:.1f}%"
})
if summary_data:
summary_df = pd.DataFrame(summary_data)
st.dataframe(summary_df, use_container_width=True)
# Page type analysis
st.markdown("#### 📄 Content Type Distribution")
for domain, data in crawl_results.items():
page_types = data.get('page_types', {})
if page_types:
with st.expander(f"📊 {domain} Content Types"):
# Create chart data
types_df = pd.DataFrame(
list(page_types.items()),
columns=['Page Type', 'Count']
)
if not types_df.empty:
st.bar_chart(types_df.set_index('Page Type'))
# Key insights
total_pages = sum(page_types.values())
if total_pages > 0:
blog_ratio = page_types.get('blog_posts', 0) / total_pages * 100
product_ratio = page_types.get('product_pages', 0) / total_pages * 100
st.write("**Content Strategy Insights:**")
st.write(f"• Blog content: {blog_ratio:.1f}% of pages")
st.write(f"• Product focus: {product_ratio:.1f}% of pages")
# Content structure insights
if competitor_data.get('content_structure'):
st.markdown("#### 🏗️ Content Structure Analysis")
structure_data = competitor_data['content_structure']
for domain, structure in structure_data.items():
with st.expander(f"🔍 {domain} Structure Analysis"):
col1, col2 = st.columns(2)
with col1:
st.metric("Avg Title Length", f"{structure.get('avg_title_length', 0):.0f} chars")
st.metric("H1 Usage", f"{structure.get('h1_usage', 0):.1f}%")
with col2:
st.metric("Avg Meta Desc Length", f"{structure.get('avg_meta_desc_length', 0):.0f} chars")
st.metric("Internal Links", f"{structure.get('internal_links_avg', 0):.1f} avg")
def _render_content_themes(self, theme_data: Dict[str, Any]):
"""Render content theme analysis."""
st.markdown("### 📊 Content Theme Intelligence")
if not theme_data:
st.info("No content theme data available")
return
# Dominant themes
if theme_data.get('dominant_themes'):
st.markdown("#### 🎯 Dominant Content Themes")
themes = theme_data['dominant_themes']
if themes:
themes_df = pd.DataFrame(themes)
st.dataframe(themes_df, use_container_width=True)
# Top themes highlight
st.markdown("**🔥 Top Content Themes:**")
for i, theme in enumerate(themes[:5], 1):
word = theme.get('word', theme.get('text', 'Unknown'))
freq = theme.get('freq', theme.get('frequency', 0))
st.write(f"{i}. **{word}** (appears {freq} times)")
# Content clusters
if theme_data.get('content_clusters'):
st.markdown("#### 🗂️ Topic Cluster Analysis")
clusters = theme_data['content_clusters']
# Cluster distribution
cluster_counts = {name: len(themes) for name, themes in clusters.items() if themes}
if cluster_counts:
cluster_df = pd.DataFrame(
list(cluster_counts.items()),
columns=['Topic Cluster', 'Theme Count']
)
st.bar_chart(cluster_df.set_index('Topic Cluster'))
# Detailed cluster view
for cluster_name, themes in clusters.items():
if themes:
with st.expander(f"📂 {cluster_name.replace('_', ' ').title()} ({len(themes)} themes)"):
for theme in themes[:15]: # Show first 15
st.write(f"{theme}")
# Content gaps and opportunities
if theme_data.get('content_opportunities'):
st.markdown("#### 🎯 Content Gap Opportunities")
opportunities = theme_data['content_opportunities']
if opportunities:
for opp in opportunities:
st.write(f"🎯 **{opp}**")
else:
st.info("No specific content opportunities identified in theme analysis")
def _render_ai_insights(self, ai_data: Dict[str, Any]):
"""Render AI-generated strategic insights."""
st.markdown("### 🤖 AI-Powered Strategic Insights")
if not ai_data:
st.info("No AI insights available")
return
# Strategic recommendations
if ai_data.get('recommendations'):
st.markdown("#### 🎯 Priority Strategic Recommendations")
recommendations = ai_data['recommendations']
for i, rec in enumerate(recommendations[:5], 1):
with st.expander(f"🎯 Recommendation {i}"):
st.markdown(rec)
# Competitive positioning
if ai_data.get('competitive_positioning'):
st.markdown("#### 🏆 Competitive Positioning Insights")
st.markdown(ai_data['competitive_positioning'])
# Content strategy insights
if ai_data.get('content_strategy'):
st.markdown("#### 📝 Content Strategy Recommendations")
st.markdown(ai_data['content_strategy'])
# Implementation timeline
if ai_data.get('implementation_timeline'):
st.markdown("#### 📅 Implementation Roadmap")
timeline = ai_data['implementation_timeline']
for period, tasks in timeline.items():
with st.expander(f"📅 {period.replace('_', ' ').title()} Plan"):
for task in tasks:
st.write(f"{task}")
# Technical SEO opportunities
if ai_data.get('technical_opportunities'):
st.markdown("#### ⚙️ Technical SEO Opportunities")
tech_opps = ai_data['technical_opportunities']
for opp in tech_opps:
st.write(f"⚙️ {opp}")
def _render_action_plan(self, results: Dict[str, Any]):
"""Render actionable implementation plan."""
st.markdown("### 📋 Your Content Gap Action Plan")
# Quick wins section
st.markdown("#### 🚀 Quick Wins (Week 1-2)")
quick_wins = []
# SERP opportunities
serp_opportunities = results.get('serp_analysis', {}).get('ranking_opportunities', [])
if serp_opportunities:
quick_wins.append(f"🎯 Target {len(serp_opportunities)} keywords where you're not ranking")
# Long-tail keywords
long_tail = results.get('keyword_expansion', {}).get('long_tail_opportunities', [])
if long_tail:
quick_wins.append(f"🎣 Create content for {min(5, len(long_tail))} high-potential long-tail keywords")
# Content themes
themes = results.get('content_themes', {}).get('dominant_themes', [])
if themes:
top_theme = themes[0].get('word', 'top theme') if themes else 'content optimization'
quick_wins.append(f"📊 Optimize existing content around '{top_theme}' theme")
for i, win in enumerate(quick_wins, 1):
st.write(f"{i}. {win}")
# Medium-term strategy
st.markdown("#### 📈 Medium-term Strategy (Month 1-3)")
medium_term = [
"🕷️ Conduct regular competitor content audits",
"🎯 Develop content calendar based on keyword gaps",
"📊 Implement content theme clusters",
"🤖 Set up automated SERP monitoring"
]
for i, strategy in enumerate(medium_term, 1):
st.write(f"{i}. {strategy}")
# Long-term vision
st.markdown("#### 🎯 Long-term Vision (Quarter 2+)")
long_term = [
"🏆 Establish thought leadership in identified content gaps",
"🌐 Build comprehensive content hub around dominant themes",
"📈 Scale content production based on proven gaps",
"🤝 Develop strategic partnerships for content collaboration"
]
for i, vision in enumerate(long_term, 1):
st.write(f"{i}. {vision}")
# Success metrics
st.markdown("#### 📊 Success Metrics to Track")
metrics = [
"🎯 Keyword ranking improvements for target terms",
"📈 Organic traffic growth from new content",
"🔍 SERP feature acquisitions (featured snippets, etc.)",
"🏆 Competitive ranking gains in content themes",
"📊 Content engagement metrics and user behavior"
]
for metric in metrics:
st.write(f"{metric}")
def _render_export_options(self, results: Dict[str, Any]):
"""Render export options for analysis results."""
st.markdown("---")
st.markdown("### 📥 Export Analysis Results")
col1, col2, col3 = st.columns(3)
with col1:
# JSON export
if st.button("📄 Export as JSON", use_container_width=True):
json_data = json.dumps(results, indent=2, default=str)
st.download_button(
label="⬇️ Download JSON Report",
data=json_data,
file_name=f"content_gap_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
mime="application/json",
use_container_width=True
)
with col2:
# CSV export for keywords
if st.button("📊 Export Keywords CSV", use_container_width=True):
expanded_keywords = results.get('keyword_expansion', {}).get('expanded_keywords', [])
if expanded_keywords:
keywords_df = pd.DataFrame(expanded_keywords, columns=['Keyword'])
csv_data = keywords_df.to_csv(index=False)
st.download_button(
label="⬇️ Download Keywords CSV",
data=csv_data,
file_name=f"discovered_keywords_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
mime="text/csv",
use_container_width=True
)
else:
st.warning("No keywords available for export")
with col3:
# Summary report
if st.button("📋 Generate Summary Report", use_container_width=True):
summary = self._generate_summary_report(results)
st.download_button(
label="⬇️ Download Summary Report",
data=summary,
file_name=f"content_gap_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
mime="text/plain",
use_container_width=True
)
def _generate_summary_report(self, results: Dict[str, Any]) -> str:
"""Generate a text summary report."""
target_url = results.get('target_url', 'Unknown')
timestamp = results.get('analysis_timestamp', datetime.now().isoformat())
summary = f"""
ENHANCED CONTENT GAP ANALYSIS REPORT
=====================================
Target Website: {target_url}
Analysis Date: {timestamp}
Industry: {results.get('industry', 'General')}
EXECUTIVE SUMMARY
-----------------
Keywords Analyzed: {len(results.get('target_keywords', []))}
Competitors Analyzed: {len(results.get('competitor_urls', []))}
Keywords Discovered: {len(results.get('keyword_expansion', {}).get('expanded_keywords', []))}
SERP Opportunities: {len(results.get('serp_analysis', {}).get('ranking_opportunities', []))}
RANKING OPPORTUNITIES
---------------------
"""
# Add ranking opportunities
opportunities = results.get('serp_analysis', {}).get('ranking_opportunities', [])
for i, opp in enumerate(opportunities[:10], 1):
summary += f"{i}. {opp.get('keyword', 'Unknown keyword')}\n"
# Add top keywords discovered
summary += "\nTOP DISCOVERED KEYWORDS\n-----------------------\n"
expanded_keywords = results.get('keyword_expansion', {}).get('expanded_keywords', [])
for i, kw in enumerate(expanded_keywords[:20], 1):
summary += f"{i}. {kw}\n"
# Add AI recommendations
recommendations = results.get('ai_insights', {}).get('recommendations', [])
if recommendations:
summary += "\nAI STRATEGIC RECOMMENDATIONS\n----------------------------\n"
for i, rec in enumerate(recommendations[:5], 1):
summary += f"{i}. {rec}\n"
summary += f"\n\nReport generated by ALwrity Enhanced Content Gap Analysis\nTimestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
return summary
# Render function for integration with main dashboard
def render_enhanced_content_gap_analysis():
"""Render the enhanced content gap analysis UI."""
ui = EnhancedContentGapAnalysisUI()
ui.render()

View File

@@ -1,649 +0,0 @@
"""
Keyword researcher for content gap analysis.
"""
from typing import Dict, Any, List, Optional
import streamlit as st
from loguru import logger
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
from lib.ai_seo_tools.content_gap_analysis.utils.data_collector import DataCollector
from lib.ai_seo_tools.content_gap_analysis.utils.content_parser import ContentParser
from lib.ai_seo_tools.content_gap_analysis.utils.ai_processor import AIProcessor, ProgressTracker
import asyncio
import sys
import os
import json
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
from lib.ai_seo_tools.content_title_generator import ai_title_generator
from lib.ai_seo_tools.meta_desc_generator import metadesc_generator_main
from lib.ai_seo_tools.seo_structured_data import ai_structured_data
# Configure logger
logger.remove() # Remove default handler
logger.add(
"logs/keyword_researcher.log",
rotation="50 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
# Ensure logs directory exists
os.makedirs("logs", exist_ok=True)
class KeywordResearcher:
"""Researches and analyzes keywords for content strategy."""
def __init__(self):
"""Initialize the keyword researcher."""
self.ai_processor = AIProcessor()
self.progress = ProgressTracker()
# Define analysis stages
self.stages = {
'keyword_analysis': {
'name': 'Keyword Analysis',
'steps': [
'Initializing keyword research',
'Analyzing keyword trends',
'Evaluating search intent',
'Identifying opportunities',
'Generating keyword insights'
]
}
}
def analyze(self, industry: str, url: str) -> Dict[str, Any]:
"""
Analyze keywords for content strategy.
Args:
industry: Industry category
url: Target website URL
Returns:
Dictionary containing analysis results
"""
try:
self.progress.start_stage('keyword_analysis')
self.progress.next_step()
# Analyze keyword trends
trend_analysis = self._analyze_keyword_trends(industry)
self.progress.next_step()
# Evaluate search intent
intent_analysis = self._evaluate_search_intent(trend_analysis)
self.progress.next_step()
# Identify opportunities
opportunities = self._identify_opportunities(trend_analysis, intent_analysis)
self.progress.next_step()
# Generate insights
insights = self._generate_keyword_insights(trend_analysis, intent_analysis, opportunities)
self.progress.next_step()
self.progress.complete_stage()
return {
'trend_analysis': trend_analysis,
'intent_analysis': intent_analysis,
'opportunities': opportunities,
'insights': insights
}
except Exception as e:
if self.progress.current_stage:
self.progress.update_progress(0, f"Error in {self.progress.stages[self.progress.current_stage]['name']}: {str(e)}")
st.error(f"Error analyzing keywords: {str(e)}")
return {
'error': str(e),
'trend_analysis': {},
'intent_analysis': {},
'opportunities': [],
'insights': []
}
def _analyze_keyword_trends(self, industry: str) -> Dict[str, Any]:
"""Analyze keyword trends."""
try:
# Get AI analysis for keyword trends
analysis = self.ai_processor.analyze_keywords({
'industry': industry,
'keywords': {} # Keywords will be fetched by AI processor
})
return {
'trends': analysis.get('keyword_trends', {}),
'search_intent': analysis.get('search_intent', {}),
'keyword_insights': analysis.get('keyword_insights', {})
}
except Exception as e:
st.error(f"Error analyzing keyword trends: {str(e)}")
return {}
def _evaluate_search_intent(self, trend_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""Evaluate search intent."""
try:
intent_analysis = {
'informational': [],
'transactional': [],
'navigational': [],
'commercial': []
}
# Categorize keywords by intent
for keyword, data in trend_analysis.get('trends', {}).items():
intent = data.get('intent', 'informational')
if intent in intent_analysis:
intent_analysis[intent].append({
'keyword': keyword,
'volume': data.get('volume', 0),
'difficulty': data.get('difficulty', 0)
})
return intent_analysis
except Exception as e:
st.error(f"Error evaluating search intent: {str(e)}")
return {}
def _identify_opportunities(self, trend_analysis: Dict[str, Any], intent_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Identify keyword opportunities."""
try:
opportunities = []
# Analyze each intent category
for intent, keywords in intent_analysis.items():
for keyword_data in keywords:
# Calculate opportunity score
volume = keyword_data.get('volume', 0)
difficulty = keyword_data.get('difficulty', 0)
opportunity_score = volume * (1 - difficulty/100)
if opportunity_score > 50: # Threshold for good opportunities
opportunities.append({
'keyword': keyword_data['keyword'],
'intent': intent,
'volume': volume,
'difficulty': difficulty,
'opportunity_score': opportunity_score
})
# Sort by opportunity score
opportunities.sort(key=lambda x: x['opportunity_score'], reverse=True)
return opportunities
except Exception as e:
st.error(f"Error identifying opportunities: {str(e)}")
return []
def _generate_keyword_insights(self, trend_analysis: Dict[str, Any], intent_analysis: Dict[str, Any], opportunities: List[Dict[str, Any]]) -> List[str]:
"""Generate keyword insights."""
try:
insights = []
# Trend insights
if trend_analysis.get('trends'):
insights.append(f"Analyzed {len(trend_analysis['trends'])} keywords for trends")
# Intent insights
for intent, keywords in intent_analysis.items():
if keywords:
insights.append(f"Found {len(keywords)} {intent} keywords")
# Opportunity insights
if opportunities:
insights.append(f"Identified {len(opportunities)} high-potential keyword opportunities")
return insights
except Exception as e:
st.error(f"Error generating keyword insights: {str(e)}")
return []
def _generate_titles(self, industry: str) -> dict:
"""
Generate keyword-based titles using the title generator.
Args:
industry (str): The industry to generate titles for
Returns:
dict: Generated titles and patterns
"""
return ai_title_generator(industry)
def _analyze_meta_descriptions(self, industry: str) -> dict:
"""
Analyze meta descriptions for keyword usage.
Args:
industry (str): The industry to analyze
Returns:
dict: Meta description analysis results
"""
return metadesc_generator_main(industry)
def _analyze_structured_data(self, industry: str) -> dict:
"""
Analyze structured data implementation.
Args:
industry (str): The industry to analyze
Returns:
dict: Structured data analysis results
"""
return ai_structured_data(industry)
def _extract_keywords(self, titles: dict, meta_analysis: dict) -> list:
"""
Extract keywords from titles and meta descriptions.
Args:
titles (dict): Generated titles
meta_analysis (dict): Meta description analysis
Returns:
list: Extracted keywords with metrics
"""
prompt = f"""
As an SEO expert, analyze the following content and extract relevant keywords with their metrics:
Titles: {titles}
Meta Descriptions: {meta_analysis}
Please provide a JSON response with the following structure:
{{
"keywords": [
{{
"keyword": "string",
"search_volume": "number",
"difficulty": "number",
"relevance_score": "number",
"content_type": "string"
}}
],
"summary": {{
"total_keywords": "number",
"high_opportunity_keywords": "number",
"recommended_focus_areas": ["string"]
}}
}}
Focus on:
1. Primary keywords and their variations
2. Long-tail keywords
3. Industry-specific terminology
4. Search volume and difficulty metrics
5. Content type recommendations
"""
try:
response = llm_text_gen(prompt, json_struct={
"type": "object",
"properties": {
"keywords": {
"type": "array",
"items": {
"type": "object",
"properties": {
"keyword": {"type": "string"},
"search_volume": {"type": "number"},
"difficulty": {"type": "number"},
"relevance_score": {"type": "number"},
"content_type": {"type": "string"}
}
}
},
"summary": {
"type": "object",
"properties": {
"total_keywords": {"type": "number"},
"high_opportunity_keywords": {"type": "number"},
"recommended_focus_areas": {
"type": "array",
"items": {"type": "string"}
}
}
}
}
})
return response
except Exception as e:
st.error(f"Error extracting keywords: {e}")
return []
def _analyze_search_intent(self, ai_insights: dict) -> dict:
"""
Analyze search intent from AI insights.
Args:
ai_insights (dict): AI-processed insights
Returns:
dict: Search intent analysis
"""
prompt = f"""
As an SEO expert, analyze the following content insights and determine the search intent:
Content Insights: {ai_insights}
Please provide a JSON response with the following structure:
{{
"informational": [
{{
"keyword": "string",
"intent_type": "string",
"content_suggestions": ["string"]
}}
],
"transactional": [
{{
"keyword": "string",
"intent_type": "string",
"content_suggestions": ["string"]
}}
],
"navigational": [
{{
"keyword": "string",
"intent_type": "string",
"content_suggestions": ["string"]
}}
],
"summary": {{
"dominant_intent": "string",
"content_strategy_recommendations": ["string"]
}}
}}
Focus on:
1. Identifying primary search intent for each keyword
2. Suggesting appropriate content types
3. Providing content strategy recommendations
4. Analyzing user behavior patterns
"""
try:
response = llm_text_gen(prompt, json_struct={
"type": "object",
"properties": {
"informational": {
"type": "array",
"items": {
"type": "object",
"properties": {
"keyword": {"type": "string"},
"intent_type": {"type": "string"},
"content_suggestions": {
"type": "array",
"items": {"type": "string"}
}
}
}
},
"transactional": {
"type": "array",
"items": {
"type": "object",
"properties": {
"keyword": {"type": "string"},
"intent_type": {"type": "string"},
"content_suggestions": {
"type": "array",
"items": {"type": "string"}
}
}
}
},
"navigational": {
"type": "array",
"items": {
"type": "object",
"properties": {
"keyword": {"type": "string"},
"intent_type": {"type": "string"},
"content_suggestions": {
"type": "array",
"items": {"type": "string"}
}
}
}
},
"summary": {
"type": "object",
"properties": {
"dominant_intent": {"type": "string"},
"content_strategy_recommendations": {
"type": "array",
"items": {"type": "string"}
}
}
}
}
})
return response
except Exception as e:
st.error(f"Error analyzing search intent: {e}")
return {
'informational': [],
'transactional': [],
'navigational': []
}
def _suggest_content_formats(self, ai_insights: dict) -> list:
"""
Suggest content formats based on AI insights.
Args:
ai_insights (dict): AI-processed insights
Returns:
list: Suggested content formats
"""
prompt = f"""
As a content strategy expert, analyze the following insights and suggest appropriate content formats:
AI Insights: {ai_insights}
Please provide a JSON response with the following structure:
{{
"content_formats": [
{{
"format": "string",
"description": "string",
"use_cases": ["string"],
"recommended_topics": ["string"],
"estimated_impact": "string"
}}
],
"format_strategy": {{
"primary_formats": ["string"],
"secondary_formats": ["string"],
"implementation_priority": ["string"]
}}
}}
Focus on:
1. Identifying the most effective content formats
2. Matching formats to user intent
3. Suggesting specific use cases
4. Providing implementation guidance
"""
try:
response = llm_text_gen(prompt, json_struct={
"type": "object",
"properties": {
"content_formats": {
"type": "array",
"items": {
"type": "object",
"properties": {
"format": {"type": "string"},
"description": {"type": "string"},
"use_cases": {
"type": "array",
"items": {"type": "string"}
},
"recommended_topics": {
"type": "array",
"items": {"type": "string"}
},
"estimated_impact": {"type": "string"}
}
}
},
"format_strategy": {
"type": "object",
"properties": {
"primary_formats": {
"type": "array",
"items": {"type": "string"}
},
"secondary_formats": {
"type": "array",
"items": {"type": "string"}
},
"implementation_priority": {
"type": "array",
"items": {"type": "string"}
}
}
}
}
})
return response
except Exception as e:
st.error(f"Error suggesting content formats: {e}")
return []
def _create_topic_clusters(self, ai_insights: dict) -> dict:
"""
Create topic clusters from AI insights.
Args:
ai_insights (dict): AI-processed insights
Returns:
dict: Topic clusters and relationships
"""
prompt = f"""
As a content organization expert, analyze the following insights and create topic clusters:
AI Insights: {ai_insights}
Please provide a JSON response with the following structure:
{{
"clusters": [
{{
"cluster_name": "string",
"main_topics": ["string"],
"subtopics": ["string"],
"related_keywords": ["string"],
"content_opportunities": ["string"]
}}
],
"relationships": {{
"cluster_connections": [
{{
"source": "string",
"target": "string",
"relationship_type": "string",
"strength": "number"
}}
],
"content_hierarchy": {{
"primary_topics": ["string"],
"secondary_topics": ["string"],
"tertiary_topics": ["string"]
}}
}}
}}
Focus on:
1. Identifying main topic clusters
2. Organizing subtopics and related keywords
3. Mapping relationships between clusters
4. Suggesting content opportunities
"""
try:
response = llm_text_gen(prompt, json_struct={
"type": "object",
"properties": {
"clusters": {
"type": "array",
"items": {
"type": "object",
"properties": {
"cluster_name": {"type": "string"},
"main_topics": {
"type": "array",
"items": {"type": "string"}
},
"subtopics": {
"type": "array",
"items": {"type": "string"}
},
"related_keywords": {
"type": "array",
"items": {"type": "string"}
},
"content_opportunities": {
"type": "array",
"items": {"type": "string"}
}
}
}
},
"relationships": {
"type": "object",
"properties": {
"cluster_connections": {
"type": "array",
"items": {
"type": "object",
"properties": {
"source": {"type": "string"},
"target": {"type": "string"},
"relationship_type": {"type": "string"},
"strength": {"type": "number"}
}
}
},
"content_hierarchy": {
"type": "object",
"properties": {
"primary_topics": {
"type": "array",
"items": {"type": "string"}
},
"secondary_topics": {
"type": "array",
"items": {"type": "string"}
},
"tertiary_topics": {
"type": "array",
"items": {"type": "string"}
}
}
}
}
}
}
})
return response
except Exception as e:
st.error(f"Error creating topic clusters: {e}")
return {
'clusters': [],
'relationships': {}
}

View File

@@ -1,361 +0,0 @@
"""
Main module for content gap analysis.
"""
from typing import Dict, Any, List, Optional
import streamlit as st
from loguru import logger
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
from .competitor_analyzer import CompetitorAnalyzer
from .keyword_researcher import KeywordResearcher
from .recommendation_engine import RecommendationEngine
from .utils.ai_processor import AIProcessor, ProgressTracker
from .utils.storage import ContentGapAnalysisStorage
from datetime import datetime
import asyncio
import sys
import os
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
from .utils.content_parser import ContentParser
# Configure logger
logger.remove() # Remove default handler
logger.add(
"logs/content_gap_analysis.log",
rotation="50 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
# Ensure logs directory exists
os.makedirs("logs", exist_ok=True)
class ContentGapAnalysis:
"""Main class for content gap analysis."""
def __init__(self, db_session=None):
"""Initialize the content gap analysis components."""
self.website_analyzer = WebsiteAnalyzer()
self.competitor_analyzer = CompetitorAnalyzer()
self.keyword_researcher = KeywordResearcher()
self.recommendation_engine = RecommendationEngine()
self.ai_processor = AIProcessor()
self.progress = ProgressTracker()
self.storage = ContentGapAnalysisStorage(db_session) if db_session else None
# Define analysis phases
self.phases = {
'website_analysis': {
'name': 'Website Analysis',
'steps': [
'Initializing website analysis',
'Analyzing website content',
'Evaluating SEO elements',
'Generating website insights'
]
},
'competitor_analysis': {
'name': 'Competitor Analysis',
'steps': [
'Initializing competitor analysis',
'Analyzing competitor content',
'Comparing market position',
'Generating competitive insights'
]
},
'keyword_analysis': {
'name': 'Keyword Analysis',
'steps': [
'Initializing keyword research',
'Analyzing keyword trends',
'Evaluating search intent',
'Generating keyword insights'
]
},
'recommendation_generation': {
'name': 'Recommendation Generation',
'steps': [
'Initializing recommendation engine',
'Analyzing content gaps',
'Generating recommendations',
'Creating implementation plan'
]
}
}
logger.info("ContentGapAnalysis initialized")
def analyze(self, url: str, industry: str, competitor_urls: Optional[List[str]] = None, user_id: Optional[int] = None) -> Dict[str, Any]:
"""
Run the complete content gap analysis workflow.
Args:
url: Target website URL
industry: Industry category
competitor_urls: Optional list of competitor URLs
user_id: Optional user ID for storing results
Returns:
Dictionary containing analysis results
"""
try:
results = {}
start_time = datetime.utcnow()
# Phase 1: Website Analysis
self.progress.start_stage('website_analysis')
self.progress.next_step()
website_analysis = self.website_analyzer.analyze(url)
results['website'] = website_analysis
self.progress.next_step()
self.progress.complete_stage()
# Phase 2: Competitor Analysis
if competitor_urls:
self.progress.start_stage('competitor_analysis')
self.progress.next_step()
competitor_analysis = self.competitor_analyzer.analyze(competitor_urls, industry)
results['competitors'] = competitor_analysis
self.progress.next_step()
self.progress.complete_stage()
# Phase 3: Keyword Analysis
self.progress.start_stage('keyword_analysis')
self.progress.next_step()
keyword_analysis = self.keyword_researcher.analyze(industry, url)
results['keywords'] = keyword_analysis
self.progress.next_step()
self.progress.complete_stage()
# Phase 4: Recommendation Generation
self.progress.start_stage('recommendation_generation')
self.progress.next_step()
recommendations = self.recommendation_engine.generate_recommendations(
website_analysis,
competitor_analysis if competitor_urls else None,
keyword_analysis
)
results['recommendations'] = recommendations
self.progress.next_step()
self.progress.complete_stage()
# Calculate analysis duration
end_time = datetime.utcnow()
results['duration'] = (end_time - start_time).total_seconds()
# Store results if user_id is provided and storage is available
if user_id and self.storage:
analysis_id = self.storage.save_analysis(user_id, url, industry, results)
if analysis_id:
results['analysis_id'] = analysis_id
return results
except Exception as e:
if self.progress.current_stage:
self.progress.update_progress(0, f"Error in {self.progress.stages[self.progress.current_stage]['name']}: {str(e)}")
st.error(f"Error in content gap analysis: {str(e)}")
return {
'error': str(e),
'website': {},
'competitors': [],
'keywords': {},
'recommendations': []
}
def get_analysis(self, analysis_id: int) -> Optional[Dict[str, Any]]:
"""
Retrieve stored analysis results.
Args:
analysis_id: Analysis ID
Returns:
Dictionary containing analysis results if found, None otherwise
"""
if not self.storage:
st.error("Storage not initialized")
return None
return self.storage.get_analysis(analysis_id)
def get_user_analyses(self, user_id: int) -> List[Dict[str, Any]]:
"""
Get all analyses for a user.
Args:
user_id: User ID
Returns:
List of analysis summaries
"""
if not self.storage:
st.error("Storage not initialized")
return []
return self.storage.get_user_analyses(user_id)
def update_recommendation_status(self, recommendation_id: int, status: str) -> bool:
"""
Update the status of a recommendation.
Args:
recommendation_id: Recommendation ID
status: New status
Returns:
True if successful, False otherwise
"""
if not self.storage:
st.error("Storage not initialized")
return False
return self.storage.update_recommendation_status(recommendation_id, status)
def delete_analysis(self, analysis_id: int) -> bool:
"""
Delete an analysis and all related data.
Args:
analysis_id: Analysis ID
Returns:
True if successful, False otherwise
"""
if not self.storage:
st.error("Storage not initialized")
return False
return self.storage.delete_analysis(analysis_id)
def get_analysis_summary(self, results: Dict[str, Any]) -> Dict[str, Any]:
"""
Generate a summary of the analysis results.
Args:
results: Dictionary containing analysis results
Returns:
Dictionary containing summary metrics and insights
"""
try:
self.progress.start_stage('summary_generation')
self.progress.next_step()
summary = {
'website_metrics': self._summarize_website_metrics(results.get('website', {})),
'competitor_insights': self._summarize_competitor_insights(results.get('competitors', {})),
'keyword_opportunities': self._summarize_keyword_opportunities(results.get('keywords', {})),
'recommendation_highlights': self._summarize_recommendations(results.get('recommendations', {})),
'ai_insights': results.get('ai_insights', {})
}
self.progress.complete_stage()
return summary
except Exception as e:
if self.progress.current_stage:
self.progress.update_progress(0, f"Error generating summary: {str(e)}")
st.error(f"Error generating analysis summary: {str(e)}")
return {
'error': str(e),
'website_metrics': {},
'competitor_insights': {},
'keyword_opportunities': {},
'recommendation_highlights': {},
'ai_insights': {}
}
def export_results(self, results: Dict[str, Any], format: str = 'json') -> str:
"""
Export analysis results in the specified format.
Args:
results: Dictionary containing analysis results
format: Export format ('json' or 'csv')
Returns:
String containing exported results
"""
try:
self.progress.start_stage('export')
self.progress.next_step()
if format.lower() == 'json':
import json
exported = json.dumps(results, indent=2)
elif format.lower() == 'csv':
import pandas as pd
# Convert results to DataFrame and then to CSV
df = pd.DataFrame(results)
exported = df.to_csv(index=False)
else:
raise ValueError(f"Unsupported export format: {format}")
self.progress.complete_stage()
return exported
except Exception as e:
if self.progress.current_stage:
self.progress.update_progress(0, f"Error exporting results: {str(e)}")
st.error(f"Error exporting results: {str(e)}")
return str(e)
def _summarize_website_metrics(self, website_data: Dict[str, Any]) -> Dict[str, Any]:
"""Generate summary of website metrics."""
try:
return {
'content_score': website_data.get('content_score', 0),
'seo_score': website_data.get('seo_score', 0),
'structure_score': website_data.get('structure_score', 0),
'key_insights': website_data.get('insights', [])[:5] # Top 5 insights
}
except Exception as e:
st.error(f"Error summarizing website metrics: {str(e)}")
return {}
def _summarize_competitor_insights(self, competitor_data: Dict[str, Any]) -> Dict[str, Any]:
"""Generate summary of competitor insights."""
try:
return {
'market_position': competitor_data.get('market_position', {}),
'content_gaps': competitor_data.get('content_gaps', [])[:5], # Top 5 gaps
'competitive_advantages': competitor_data.get('advantages', [])[:5] # Top 5 advantages
}
except Exception as e:
st.error(f"Error summarizing competitor insights: {str(e)}")
return {}
def _summarize_keyword_opportunities(self, keyword_data: Dict[str, Any]) -> Dict[str, Any]:
"""Generate summary of keyword opportunities."""
try:
return {
'top_keywords': keyword_data.get('top_keywords', [])[:10], # Top 10 keywords
'search_intent': keyword_data.get('search_intent', {}),
'opportunities': keyword_data.get('opportunities', [])[:5] # Top 5 opportunities
}
except Exception as e:
st.error(f"Error summarizing keyword opportunities: {str(e)}")
return {}
def _summarize_recommendations(self, recommendation_data: Dict[str, Any]) -> Dict[str, Any]:
"""Generate summary of recommendations."""
try:
return {
'priority_recommendations': recommendation_data.get('priority_recommendations', [])[:5], # Top 5 recommendations
'implementation_timeline': recommendation_data.get('timeline', {}),
'expected_impact': recommendation_data.get('impact', {})
}
except Exception as e:
st.error(f"Error summarizing recommendations: {str(e)}")
return {}

View File

@@ -1,41 +0,0 @@
"""
Navigation component for Content Gap Analysis tool.
"""
import streamlit as st
def show_content_gap_analysis_nav():
"""Show navigation for Content Gap Analysis tool."""
st.sidebar.title("Content Gap Analysis")
st.sidebar.markdown("""
Analyze your content strategy, identify gaps, and get AI-powered recommendations.
""")
# Navigation options
nav_option = st.sidebar.radio(
"Select Analysis Type",
["Website Analysis", "Competitor Analysis", "Keyword Research", "Recommendations"]
)
# Tool description
st.sidebar.markdown("""
### Features
- Website content analysis
- Competitor content comparison
- Keyword research and trends
- AI-powered recommendations
- Content gap identification
- Implementation timeline
""")
# Help section
with st.sidebar.expander("How to Use"):
st.markdown("""
1. Start with Website Analysis
2. Add competitor URLs
3. Research keywords
4. Get recommendations
5. Export results
""")
return nav_option

View File

@@ -1,440 +0,0 @@
"""
Recommendation engine for content gap analysis.
"""
import streamlit as st
from typing import Dict, Any, List, Optional
from loguru import logger
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
from lib.ai_seo_tools.content_gap_analysis.utils.data_collector import DataCollector
from lib.ai_seo_tools.content_gap_analysis.utils.content_parser import ContentParser
from lib.ai_seo_tools.content_gap_analysis.utils.ai_processor import AIProcessor, ProgressTracker
from lib.ai_seo_tools.content_title_generator import ai_title_generator
import asyncio
import sys
import os
import json
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
# Configure logger
logger.remove() # Remove default handler
logger.add(
"logs/recommendation_engine.log",
rotation="50 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
# Ensure logs directory exists
os.makedirs("logs", exist_ok=True)
class RecommendationEngine:
"""
Generates content recommendations based on analysis results.
"""
def __init__(self):
"""Initialize the recommendation engine with required components."""
self.ai_processor = AIProcessor()
self.progress = ProgressTracker()
# Define analysis stages
self.stages = {
'recommendation_generation': {
'name': 'Recommendation Generation',
'steps': [
'Initializing recommendation engine',
'Analyzing content gaps',
'Evaluating opportunities',
'Generating recommendations',
'Creating implementation plan'
]
}
}
def generate_recommendations(self, website_analysis: Dict[str, Any], competitor_analysis: Optional[Dict[str, Any]], keyword_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""
Generate content recommendations.
Args:
website_analysis: Website analysis results
competitor_analysis: Optional competitor analysis results
keyword_analysis: Keyword analysis results
Returns:
Dictionary containing recommendations
"""
try:
self.progress.start_stage('recommendation_generation')
self.progress.next_step()
# Analyze content gaps
content_gaps = self._analyze_content_gaps(website_analysis, competitor_analysis, keyword_analysis)
self.progress.next_step()
# Evaluate opportunities
opportunities = self._evaluate_opportunities(content_gaps, keyword_analysis)
self.progress.next_step()
# Generate recommendations
recommendations = self._generate_recommendations(content_gaps, opportunities)
self.progress.next_step()
# Create implementation plan
implementation_plan = self._create_implementation_plan(recommendations)
self.progress.next_step()
self.progress.complete_stage()
return {
'content_gaps': content_gaps,
'opportunities': opportunities,
'recommendations': recommendations,
'implementation_plan': implementation_plan
}
except Exception as e:
if self.progress.current_stage:
self.progress.update_progress(0, f"Error in {self.progress.stages[self.progress.current_stage]['name']}: {str(e)}")
st.error(f"Error generating recommendations: {str(e)}")
return {
'error': str(e),
'content_gaps': [],
'opportunities': [],
'recommendations': [],
'implementation_plan': {}
}
def _analyze_content_gaps(self, website_analysis: Dict[str, Any], competitor_analysis: Optional[Dict[str, Any]], keyword_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Analyze content gaps."""
try:
content_gaps = []
# Analyze website content gaps
website_gaps = self._analyze_website_gaps(website_analysis)
content_gaps.extend(website_gaps)
# Analyze competitor gaps if available
if competitor_analysis:
competitor_gaps = self._analyze_competitor_gaps(competitor_analysis)
content_gaps.extend(competitor_gaps)
# Analyze keyword gaps
keyword_gaps = self._analyze_keyword_gaps(keyword_analysis)
content_gaps.extend(keyword_gaps)
return content_gaps
except Exception as e:
st.error(f"Error analyzing content gaps: {str(e)}")
return []
def _analyze_website_gaps(self, website_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Analyze website content gaps."""
try:
gaps = []
# Check content quality
quality_metrics = website_analysis.get('quality_metrics', {})
if quality_metrics.get('readability_score', 0) < 70:
gaps.append({
'type': 'content_quality',
'issue': 'Low readability score',
'score': quality_metrics.get('readability_score', 0),
'recommendation': 'Improve content readability'
})
# Check SEO elements
seo_metrics = website_analysis.get('seo_metrics', {})
if seo_metrics.get('seo_score', 0) < 70:
gaps.append({
'type': 'seo',
'issue': 'Low SEO score',
'score': seo_metrics.get('seo_score', 0),
'recommendation': 'Enhance SEO optimization'
})
return gaps
except Exception as e:
st.error(f"Error analyzing website gaps: {str(e)}")
return []
def _analyze_competitor_gaps(self, competitor_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Analyze competitor content gaps."""
try:
gaps = []
# Check content gaps
content_gaps = competitor_analysis.get('content_gaps', [])
for gap in content_gaps:
gaps.append({
'type': 'competitor',
'issue': f"Missing topic: {', '.join(gap.get('missing_topics', []))}",
'recommendation': 'Create content for missing topics'
})
return gaps
except Exception as e:
st.error(f"Error analyzing competitor gaps: {str(e)}")
return []
def _analyze_keyword_gaps(self, keyword_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Analyze keyword gaps."""
try:
gaps = []
# Check keyword opportunities
opportunities = keyword_analysis.get('opportunities', [])
for opportunity in opportunities:
gaps.append({
'type': 'keyword',
'issue': f"Keyword opportunity: {opportunity.get('keyword')}",
'volume': opportunity.get('volume', 0),
'difficulty': opportunity.get('difficulty', 0),
'recommendation': f"Target keyword: {opportunity.get('keyword')}"
})
return gaps
except Exception as e:
st.error(f"Error analyzing keyword gaps: {str(e)}")
return []
def _evaluate_opportunities(self, content_gaps: List[Dict[str, Any]], keyword_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Evaluate content opportunities."""
try:
opportunities = []
# Evaluate each gap
for gap in content_gaps:
# Calculate priority score
priority_score = self._calculate_priority_score(gap, keyword_analysis)
if priority_score > 50: # Threshold for good opportunities
opportunities.append({
'type': gap.get('type'),
'issue': gap.get('issue'),
'recommendation': gap.get('recommendation'),
'priority_score': priority_score
})
# Sort by priority score
opportunities.sort(key=lambda x: x['priority_score'], reverse=True)
return opportunities
except Exception as e:
st.error(f"Error evaluating opportunities: {str(e)}")
return []
def _calculate_priority_score(self, gap: Dict[str, Any], keyword_analysis: Dict[str, Any]) -> float:
"""Calculate priority score for a gap."""
try:
base_score = 0
# Base score based on gap type
if gap.get('type') == 'content_quality':
base_score = 70
elif gap.get('type') == 'seo':
base_score = 80
elif gap.get('type') == 'competitor':
base_score = 60
elif gap.get('type') == 'keyword':
base_score = 50
# Adjust score based on keyword data
if gap.get('type') == 'keyword':
keyword = gap.get('issue', '').split(': ')[-1]
keyword_data = keyword_analysis.get('trend_analysis', {}).get('trends', {}).get(keyword, {})
if keyword_data:
base_score += keyword_data.get('volume', 0) * 0.1
base_score -= keyword_data.get('difficulty', 0) * 0.2
return min(100, max(0, base_score))
except Exception as e:
st.error(f"Error calculating priority score: {str(e)}")
return 0
def _generate_recommendations(self, content_gaps: List[Dict[str, Any]], opportunities: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Generate content recommendations."""
try:
recommendations = []
# Generate recommendations for each opportunity
for opportunity in opportunities:
recommendations.append({
'type': opportunity.get('type'),
'issue': opportunity.get('issue'),
'recommendation': opportunity.get('recommendation'),
'priority': opportunity.get('priority_score', 0),
'implementation_steps': self._generate_implementation_steps(opportunity)
})
return recommendations
except Exception as e:
st.error(f"Error generating recommendations: {str(e)}")
return []
def _generate_implementation_steps(self, opportunity: Dict[str, Any]) -> List[str]:
"""Generate implementation steps for a recommendation."""
try:
steps = []
if opportunity.get('type') == 'content_quality':
steps = [
'Review current content structure',
'Improve readability and formatting',
'Enhance content organization',
'Update content based on best practices'
]
elif opportunity.get('type') == 'seo':
steps = [
'Audit current SEO implementation',
'Optimize meta tags and descriptions',
'Improve content structure for SEO',
'Implement technical SEO improvements'
]
elif opportunity.get('type') == 'competitor':
steps = [
'Research competitor content',
'Identify unique value proposition',
'Create content for missing topics',
'Optimize content for target keywords'
]
elif opportunity.get('type') == 'keyword':
steps = [
'Research keyword intent',
'Create content strategy',
'Develop content for target keyword',
'Optimize content for search'
]
return steps
except Exception as e:
st.error(f"Error generating implementation steps: {str(e)}")
return []
def _create_implementation_plan(self, recommendations: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Create implementation plan."""
try:
plan = {
'phases': [],
'timeline': {},
'resources': {},
'success_metrics': {}
}
# Create phases based on recommendation types
phases = {
'content_quality': 'Content Enhancement',
'seo': 'SEO Optimization',
'competitor': 'Competitive Content',
'keyword': 'Keyword Targeting'
}
# Group recommendations by phase
for phase_name in phases.values():
phase_recommendations = [
rec for rec in recommendations
if phases.get(rec.get('type')) == phase_name
]
if phase_recommendations:
plan['phases'].append({
'name': phase_name,
'recommendations': phase_recommendations,
'duration': '2-4 weeks',
'resources': ['Content team', 'SEO team'],
'success_metrics': [
'Content quality score',
'SEO performance',
'User engagement'
]
})
return plan
except Exception as e:
st.error(f"Error creating implementation plan: {str(e)}")
return {}
def _generate_content_topics(self, ai_insights: dict) -> list:
"""
Generate content topic suggestions.
Args:
ai_insights (dict): AI-processed insights
Returns:
list: Content topic suggestions
"""
# TODO: Implement content topic generation
return []
def _suggest_content_formats(self, ai_insights: dict) -> list:
"""
Suggest content formats based on analysis.
Args:
ai_insights (dict): AI-processed insights
Returns:
list: Content format suggestions
"""
# TODO: Implement content format suggestions
return []
def _calculate_priority_scores(self, ai_insights: dict) -> dict:
"""
Calculate priority scores for recommendations.
Args:
ai_insights (dict): AI-processed insights
Returns:
dict: Priority scores for each recommendation
"""
# TODO: Implement priority scoring
return {}
def _create_timeline(self, ai_insights: dict) -> dict:
"""
Create implementation timeline for recommendations.
Args:
ai_insights (dict): AI-processed insights
Returns:
dict: Implementation timeline
"""
# TODO: Implement timeline creation
return {
'short_term': [],
'medium_term': [],
'long_term': []
}
def _generate_specific_suggestions(self, recommendations: dict, analysis_results: dict) -> dict:
"""
Generate specific content suggestions using existing tools.
Args:
recommendations (dict): General recommendations
analysis_results (dict): Analysis results
Returns:
dict: Specific content suggestions
"""
suggestions = {}
# Generate titles for suggested topics
for topic in recommendations['content_topics']:
suggestions[topic] = {
'titles': ai_title_generator(topic),
'meta_descriptions': metadesc_generator_main(topic),
'structured_data': ai_structured_data(topic)
}
return suggestions

View File

@@ -1,769 +0,0 @@
"""
Streamlit UI for Content Gap Analysis workflow.
"""
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import json
from datetime import datetime
from .main import ContentGapAnalysis
from .keyword_researcher import KeywordResearcher
from .competitor_analyzer import CompetitorAnalyzer
from .website_analyzer import WebsiteAnalyzer
from .recommendation_engine import RecommendationEngine
from .utils.ai_processor import AIProcessor
from .navigation import show_content_gap_analysis_nav
from typing import Dict, Any
import logging
# Configure logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
class ContentGapAnalysisUI:
"""Streamlit UI for Content Gap Analysis workflow."""
def __init__(self):
"""Initialize the UI components."""
# Initialize session state for progress tracking
if 'current_step' not in st.session_state:
st.session_state.current_step = 1
if 'analysis_results' not in st.session_state:
st.session_state.analysis_results = {}
# Initialize analysis components
self.analyzer = ContentGapAnalysis()
self.keyword_researcher = KeywordResearcher()
self.competitor_analyzer = CompetitorAnalyzer()
self.website_analyzer = WebsiteAnalyzer()
self.recommendation_engine = RecommendationEngine()
self.ai_processor = AIProcessor()
def run(self):
"""Run the Streamlit interface."""
try:
# Show navigation
nav_option = show_content_gap_analysis_nav()
# Main content area
st.title("Content Gap Analysis")
st.markdown("""
This tool helps you identify content gaps and opportunities by analyzing your website,
competitors, and market trends. Follow the steps below to get started.
""")
# Progress tracking
self._show_progress()
# Main workflow steps
if nav_option == "Website Analysis" or st.session_state.current_step == 1:
self._website_analysis_step()
elif nav_option == "Competitor Analysis" or st.session_state.current_step == 2:
self._competitor_analysis_step()
elif nav_option == "Keyword Research" or st.session_state.current_step == 3:
self._keyword_research_step()
elif nav_option == "Recommendations" or st.session_state.current_step == 4:
self._recommendations_step()
else:
self._export_results()
except Exception as e:
logger.error(f"Error in run method: {str(e)}", exc_info=True)
st.error(f"An error occurred: {str(e)}")
def _show_progress(self):
"""Display progress tracking."""
steps = [
"Website Analysis",
"Competitor Analysis",
"Keyword Research",
"Recommendations",
"Export Results"
]
progress = st.session_state.current_step / len(steps)
st.progress(progress)
cols = st.columns(len(steps))
for i, col in enumerate(cols):
with col:
if i + 1 < st.session_state.current_step:
st.success(f"{steps[i]}")
elif i + 1 == st.session_state.current_step:
st.info(f"{steps[i]}")
else:
st.text(f"{steps[i]}")
def _website_analysis_step(self):
"""Website analysis step UI."""
try:
st.header("Step 1: Website Analysis")
# Display previous results if they exist
if 'website' in st.session_state.analysis_results:
st.info("Previous analysis results found. You can analyze a new website or proceed to the next step.")
self._display_website_analysis(st.session_state.analysis_results['website'])
col1, col2 = st.columns(2)
with col1:
if st.button("Analyze New Website"):
st.session_state.analysis_results.pop('website', None)
st.rerun()
with col2:
if st.button("Proceed to Competitor Analysis"):
st.session_state.current_step = 2
st.rerun()
return
# Create form for new analysis
with st.form("website_analysis_form"):
website_url = st.text_input("Enter your website URL")
industry = st.text_input("Enter your industry/niche")
submitted = st.form_submit_button("Analyze Website")
# Handle form submission outside the form
if submitted and website_url and industry:
# Initialize progress tracking
if 'analysis_progress' not in st.session_state:
st.session_state.analysis_progress = {
'status': 'initializing',
'current_step': 'Starting Analysis',
'progress': 0,
'details': 'Initializing analysis...'
}
# Create progress container
progress_container = st.empty()
status_container = st.empty()
details_container = st.empty()
# Update progress display
def update_progress_display():
progress = st.session_state.analysis_progress
# Update progress bar
with progress_container:
st.progress(progress['progress'] / 100)
# Update status
with status_container:
if progress['status'] == 'error':
st.error(f"Error: {progress['current_step']}")
elif progress['status'] == 'completed':
st.success(f"{progress['current_step']}")
else:
st.info(f"{progress['current_step']}")
# Update details
with details_container:
st.write(progress['details'])
# Initial progress display
update_progress_display()
try:
# Get basic analysis
results = self.website_analyzer.analyze(website_url)
# Update progress from analyzer
st.session_state.analysis_progress = self.website_analyzer.progress.get_progress()
update_progress_display()
if isinstance(results, dict) and 'error' in results:
st.error(f"Error in website analysis: {results['error']}")
return
# Get AI-enhanced analysis
st.session_state.analysis_progress.update({
'current_step': 'AI Analysis',
'progress': 95,
'details': 'Performing AI-enhanced analysis...'
})
update_progress_display()
ai_analysis = self.ai_processor.analyze_content({
'url': website_url,
'industry': industry,
'content': results
})
# Combine results
if isinstance(results, dict):
results.update(ai_analysis)
else:
results = {'error': 'Invalid analysis results format'}
# Store results in session state
st.session_state.analysis_results['website'] = results
# Update final progress
st.session_state.analysis_progress.update({
'status': 'completed',
'current_step': 'Analysis Complete',
'progress': 100,
'details': 'Analysis completed successfully!'
})
update_progress_display()
# Display results
self._display_website_analysis(results)
except Exception as e:
logger.error(f"Error during website analysis: {str(e)}", exc_info=True)
st.session_state.analysis_progress.update({
'status': 'error',
'current_step': 'Analysis Failed',
'details': f"Error during website analysis: {str(e)}"
})
update_progress_display()
st.error(f"Error during website analysis: {str(e)}")
return
except Exception as e:
logger.error(f"Error in website analysis step: {str(e)}", exc_info=True)
st.error(f"Error in website analysis: {str(e)}")
def _display_website_analysis(self, results: Dict[str, Any]):
"""Display website analysis results."""
try:
if not isinstance(results, dict):
st.error("Invalid analysis results format")
return
if 'error' in results:
st.error(f"Error in analysis: {results['error']}")
return
# Content Metrics
st.subheader("Content Metrics")
content_metrics = results.get('content_metrics', {})
if content_metrics:
# Basic metrics in columns
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Word Count", f"{content_metrics.get('word_count', 0):,}")
with col2:
st.metric("Headings", f"{content_metrics.get('heading_count', 0):,}")
with col3:
st.metric("Images", f"{content_metrics.get('image_count', 0):,}")
with col4:
st.metric("Links", f"{content_metrics.get('link_count', 0):,}")
# Content Structure Visualization
st.write("Content Structure")
heading_data = {
'Type': ['H1', 'H2', 'H3', 'Paragraphs'],
'Count': [
content_metrics.get('h1_count', 0),
content_metrics.get('h2_count', 0),
content_metrics.get('h3_count', 0),
content_metrics.get('paragraph_count', 0)
]
}
fig = px.bar(
heading_data,
x='Type',
y='Count',
title="Content Structure Distribution",
color='Type',
color_discrete_sequence=px.colors.qualitative.Set3
)
st.plotly_chart(fig, use_container_width=True)
# Content Features
st.write("Content Features")
features = {
'Feature': ['Meta Description', 'Robots.txt', 'Sitemap'],
'Status': [
content_metrics.get('has_meta_description', False),
content_metrics.get('has_robots_txt', False),
content_metrics.get('has_sitemap', False)
]
}
fig = px.bar(
features,
x='Feature',
y='Status',
title="Content Features Status",
color='Status',
color_discrete_sequence=['red', 'green']
)
st.plotly_chart(fig, use_container_width=True)
# SEO Metrics
st.subheader("SEO Metrics")
seo_metrics = results.get('seo_metrics', {})
if seo_metrics:
# Basic metrics in columns
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Overall Score", f"{seo_metrics.get('overall_score', 0):.1f}%")
with col2:
content_quality = seo_metrics.get('content', {}).get('content_quality_score', 0)
st.metric("Content Quality", f"{content_quality:.1f}%")
with col3:
readability = seo_metrics.get('content', {}).get('readability_score', 0)
st.metric("Readability", f"{readability:.1f}%")
with col4:
keyword_density = seo_metrics.get('content', {}).get('keyword_density', 0)
st.metric("Keyword Density", f"{keyword_density:.1f}%")
# SEO Scores Radar Chart
seo_scores = {
'Metric': ['Overall', 'Content Quality', 'Readability', 'Keyword Density'],
'Score': [
seo_metrics.get('overall_score', 0),
content_quality,
readability,
keyword_density
]
}
fig = px.line_polar(
seo_scores,
r='Score',
theta='Metric',
line_close=True,
title="SEO Performance Overview"
)
fig.update_traces(fill='toself')
st.plotly_chart(fig, use_container_width=True)
# Meta Tags Analysis
st.write("Meta Tags Analysis")
meta_tags = seo_metrics.get('meta_tags', {})
if meta_tags:
# Title Analysis
title = meta_tags.get('title', {})
st.write("Title Tag")
st.write(f"Status: {'' if title.get('status') == 'good' else ''}")
st.write(f"Value: {title.get('value', 'N/A')}")
st.write(f"Length: {title.get('length', 0)} characters")
st.write(f"Score: {title.get('score', 0)}%")
if title.get('recommendation'):
st.warning(title.get('recommendation'))
# Description Analysis
desc = meta_tags.get('description', {})
st.write("Meta Description")
st.write(f"Status: {'' if desc.get('status') == 'good' else ''}")
st.write(f"Value: {desc.get('value', 'N/A')}")
st.write(f"Length: {desc.get('length', 0)} characters")
st.write(f"Score: {desc.get('score', 0)}%")
if desc.get('recommendation'):
st.warning(desc.get('recommendation'))
# Keywords Analysis
keywords = meta_tags.get('keywords', {})
st.write("Meta Keywords")
st.write(f"Status: {'' if keywords.get('status') == 'good' else ''}")
st.write(f"Value: {keywords.get('value', 'N/A')}")
if keywords.get('recommendation'):
st.warning(keywords.get('recommendation'))
# Technical Metrics
st.subheader("Technical Metrics")
technical_info = results.get('technical_info', {})
if technical_info:
col1, col2 = st.columns(2)
with col1:
st.write("Basic Information")
st.metric("Status Code", technical_info.get('status_code', 'N/A'))
st.metric("Server", technical_info.get('server_info', {}).get('server', 'N/A'))
st.metric("Content Type", technical_info.get('server_info', {}).get('content_type', 'N/A'))
with col2:
st.write("Security Information")
security_info = technical_info.get('security_info', {})
security_data = {
'Feature': ['SSL', 'HSTS', 'XSS Protection'],
'Status': [
security_info.get('ssl', False),
security_info.get('hsts', False),
security_info.get('xss_protection', False)
]
}
fig = px.bar(
security_data,
x='Feature',
y='Status',
title="Security Features Status",
color='Status',
color_discrete_sequence=['red', 'green']
)
st.plotly_chart(fig, use_container_width=True)
# Performance Metrics
st.subheader("Performance Metrics")
performance = results.get('performance', {})
if performance:
# Basic metrics in columns
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Load Time", f"{performance.get('load_time', 0):.2f}s")
with col2:
st.metric("Page Size", f"{performance.get('page_size', 0):.1f} KB")
with col3:
st.metric("Status Code", performance.get('status_code', 'N/A'))
with col4:
st.metric("Response Time", f"{performance.get('response_time', 0):.2f}s")
# Insights and Recommendations
st.subheader("Insights and Recommendations")
insights = results.get('insights', [])
if insights:
for insight in insights:
st.info(f"{insight}")
else:
st.info("No specific insights available")
except Exception as e:
logger.error(f"Error displaying website analysis: {str(e)}", exc_info=True)
st.error(f"Error displaying website analysis: {str(e)}")
def _competitor_analysis_step(self):
"""Competitor analysis step UI."""
try:
st.header("Step 2: Competitor Analysis")
with st.form("competitor_analysis_form"):
competitors = st.text_area(
"Enter competitor URLs (one per line)",
help="Enter the URLs of your main competitors"
)
submitted = st.form_submit_button("Analyze Competitors")
if submitted and competitors:
with st.spinner("Analyzing competitors..."):
competitor_urls = [url.strip() for url in competitors.split('\n') if url.strip()]
results = self.competitor_analyzer.analyze(competitor_urls)
# Get AI-enhanced competitor analysis
ai_analysis = self.ai_processor.analyze_competitors({
'competitors': competitor_urls,
'analysis': results
})
# Combine results
results.update(ai_analysis)
st.session_state.analysis_results['competitors'] = results
# Display results
self._display_competitor_analysis(results)
# Move to next step
st.session_state.current_step = 3
st.rerun()
except Exception as e:
logger.error(f"Error in competitor analysis step: {str(e)}", exc_info=True)
st.error(f"Error in competitor analysis: {str(e)}")
def _display_competitor_analysis(self, results: dict):
"""Display competitor analysis results."""
st.subheader("Competitor Analysis Results")
# Competitor comparison
st.subheader("Competitor Comparison")
comp_data = pd.DataFrame(results.get('comparison', []))
if not comp_data.empty:
fig = px.bar(
comp_data,
x='competitor',
y='score',
color='metric',
title="Competitor Comparison"
)
st.plotly_chart(fig)
# AI-Enhanced Competitor Analysis
st.subheader("AI-Enhanced Competitor Analysis")
# Competitor Trend Analysis
trend_data = results.get('competitor_trends', {})
if trend_data:
fig = go.Figure()
for competitor, trends in trend_data.items():
fig.add_trace(go.Scatter(
x=trends.get('timeline', []),
y=trends.get('scores', []),
name=competitor,
mode='lines+markers'
))
fig.update_layout(
title="Competitor Performance Trends",
xaxis_title="Timeline",
yaxis_title="Score"
)
st.plotly_chart(fig)
# Content gaps
st.subheader("Content Gaps")
gaps = results.get('content_gaps', [])
for gap in gaps:
st.info(f"{gap}")
# AI-Generated Competitive Insights
st.subheader("Competitive Insights")
insights = results.get('competitive_insights', {})
if insights:
for category, points in insights.items():
with st.expander(f"{category.title()} Analysis"):
for point in points:
st.success(f"{point}")
def _keyword_research_step(self):
"""Keyword research step UI."""
try:
st.header("Step 3: Keyword Research")
with st.form("keyword_research_form"):
industry = st.text_input(
"Enter your industry/niche",
value=st.session_state.analysis_results.get('website', {}).get('industry', '')
)
submitted = st.form_submit_button("Research Keywords")
if submitted and industry:
with st.spinner("Researching keywords..."):
results = self.keyword_researcher.research(industry)
# Get AI-enhanced keyword analysis
ai_analysis = self.ai_processor.analyze_keywords({
'industry': industry,
'keywords': results
})
# Combine results
results.update(ai_analysis)
st.session_state.analysis_results['keywords'] = results
# Display results
self._display_keyword_research(results)
# Move to next step
st.session_state.current_step = 4
st.rerun()
except Exception as e:
logger.error(f"Error in keyword research step: {str(e)}", exc_info=True)
st.error(f"Error in keyword research: {str(e)}")
def _display_keyword_research(self, results: dict):
"""Display keyword research results."""
st.subheader("Keyword Research Results")
# Keyword metrics
st.subheader("Keyword Metrics")
keyword_data = pd.DataFrame(results.get('keywords', []))
if not keyword_data.empty:
fig = px.scatter(
keyword_data,
x='search_volume',
y='difficulty',
size='relevance_score',
hover_data=['keyword'],
title="Keyword Opportunities"
)
st.plotly_chart(fig)
# AI-Enhanced Keyword Analysis
st.subheader("AI-Enhanced Keyword Analysis")
# Keyword Trend Analysis
trend_data = results.get('keyword_trends', {})
if trend_data:
fig = go.Figure()
for keyword, trends in trend_data.items():
fig.add_trace(go.Scatter(
x=trends.get('timeline', []),
y=trends.get('scores', []),
name=keyword,
mode='lines+markers'
))
fig.update_layout(
title="Keyword Trend Analysis",
xaxis_title="Timeline",
yaxis_title="Trend Score"
)
st.plotly_chart(fig)
# Search intent distribution
st.subheader("Search Intent Distribution")
intent_data = pd.DataFrame(results.get('search_intent', {}).get('summary', {}))
if not intent_data.empty:
fig = px.pie(
intent_data,
values='count',
names='intent',
title="Search Intent Distribution"
)
st.plotly_chart(fig)
# Content format suggestions
st.subheader("Content Format Suggestions")
formats = results.get('content_formats', [])
for format in formats:
st.info(f"{format}")
# AI-Generated Keyword Insights
st.subheader("Keyword Insights")
insights = results.get('keyword_insights', {})
if insights:
for category, points in insights.items():
with st.expander(f"{category.title()} Insights"):
for point in points:
st.success(f"{point}")
def _recommendations_step(self):
"""Recommendations step UI."""
try:
st.header("Step 4: Content Recommendations")
with st.spinner("Generating recommendations..."):
results = self.recommendation_engine.generate_recommendations(
st.session_state.analysis_results
)
# Get AI-enhanced recommendations
ai_recommendations = self.ai_processor.analyze_recommendations({
'recommendations': results,
'analysis': st.session_state.analysis_results
})
# Combine results
results.update(ai_recommendations)
st.session_state.analysis_results['recommendations'] = results
# Display results
self._display_recommendations(results)
# Move to next step
st.session_state.current_step = 5
st.rerun()
except Exception as e:
logger.error(f"Error in recommendations step: {str(e)}", exc_info=True)
st.error(f"Error in recommendations: {str(e)}")
def _display_recommendations(self, results: dict):
"""Display content recommendations."""
st.subheader("Content Recommendations")
# Priority recommendations
st.subheader("Priority Recommendations")
priorities = results.get('priorities', [])
for priority in priorities:
st.success(f"{priority}")
# AI-Enhanced Recommendations
st.subheader("AI-Enhanced Recommendations")
# Recommendation Impact Analysis
impact_data = results.get('impact_analysis', {})
if impact_data:
fig = go.Figure()
for metric, values in impact_data.items():
fig.add_trace(go.Bar(
name=metric,
x=values.get('categories', []),
y=values.get('scores', [])
))
fig.update_layout(
title="Recommendation Impact Analysis",
xaxis_title="Categories",
yaxis_title="Impact Score",
barmode='group'
)
st.plotly_chart(fig)
# Implementation timeline
st.subheader("Implementation Timeline")
timeline = results.get('timeline', [])
for item in timeline:
st.info(f"{item}")
# Expected impact
st.subheader("Expected Impact")
impact = results.get('impact', {})
for metric, value in impact.items():
st.metric(metric, value)
# AI-Generated Strategic Insights
st.subheader("Strategic Insights")
insights = results.get('strategic_insights', {})
if insights:
for category, points in insights.items():
with st.expander(f"{category.title()} Strategy"):
for point in points:
st.success(f"{point}")
def _export_results(self):
"""Export results step UI."""
st.header("Step 5: Export Results")
# Export options
export_format = st.radio(
"Choose export format",
["JSON", "CSV", "PDF"]
)
if st.button("Export Results"):
if export_format == "JSON":
self._export_json()
elif export_format == "CSV":
self._export_csv()
else:
st.info("PDF export coming soon!")
def _export_json(self):
"""Export results as JSON."""
results = st.session_state.analysis_results
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"content_gap_analysis_{timestamp}.json"
st.download_button(
"Download JSON",
data=json.dumps(results, indent=2),
file_name=filename,
mime="application/json"
)
def _export_csv(self):
"""Export results as CSV."""
results = st.session_state.analysis_results
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Convert results to CSV format
csv_data = []
for section, data in results.items():
if isinstance(data, list):
for item in data:
if isinstance(item, dict):
item['section'] = section
csv_data.append(item)
elif isinstance(data, dict):
data['section'] = section
csv_data.append(data)
if csv_data:
df = pd.DataFrame(csv_data)
filename = f"content_gap_analysis_{timestamp}.csv"
st.download_button(
"Download CSV",
data=df.to_csv(index=False),
file_name=filename,
mime="text/csv"
)
def main():
"""Main entry point for the Streamlit app."""
ui = ContentGapAnalysisUI()
ui.run()
if __name__ == "__main__":
main()

View File

@@ -1,249 +0,0 @@
# Content Gap Analysis Utils
This directory contains utility modules that power the Content Gap Analysis tool. These modules provide core functionality for data collection, processing, analysis, and storage.
## Directory Structure
```
utils/
├── README.md
├── ai_processor.py # AI-powered content analysis and processing
├── content_parser.py # Content structure parsing and analysis
├── data_collector.py # Website data collection and processing
└── storage.py # Analysis results storage and retrieval
```
## Module Descriptions
### 1. AI Processor (`ai_processor.py`)
The AI Processor module enhances content analysis using AI techniques. It provides intelligent analysis of website content, competitor data, and keyword research.
#### Key Features:
- Content quality assessment
- Topic analysis and clustering
- Performance metrics analysis
- Strategic recommendations generation
- Progress tracking for analysis tasks
#### Main Components:
- `AIProcessor`: Main class for AI-powered analysis
- `ProgressTracker`: Tracks analysis progress and status
#### Usage Example:
```python
from utils.ai_processor import AIProcessor
processor = AIProcessor()
analysis = processor.analyze_content({
'url': 'https://example.com',
'industry': 'technology',
'content': content_data
})
```
### 2. Content Parser (`content_parser.py`)
The Content Parser module handles the parsing and analysis of website content structure. It provides detailed insights into content organization and quality.
#### Key Features:
- Content structure analysis
- Text statistics calculation
- Topic extraction
- Readability analysis
- Content hierarchy analysis
#### Main Components:
- `ContentParser`: Main class for content parsing and analysis
#### Usage Example:
```python
from utils.content_parser import ContentParser
parser = ContentParser()
structure = parser.parse_structure({
'main_content': content,
'html': html_content,
'headings': headings_data
})
```
### 3. Data Collector (`data_collector.py`)
The Data Collector module is responsible for gathering website data for analysis. It handles web scraping and data extraction.
#### Key Features:
- Website content collection
- Meta data extraction
- Heading structure analysis
- Link and image extraction
- Error handling and retry logic
#### Main Components:
- `DataCollector`: Main class for data collection
#### Usage Example:
```python
from utils.data_collector import DataCollector
collector = DataCollector()
data = collector.collect('https://example.com')
```
### 4. Storage (`storage.py`)
The Storage module manages the persistence and retrieval of analysis results. It provides a robust database interface for storing and accessing analysis data.
#### Key Features:
- Analysis results storage
- Historical data management
- Recommendation tracking
- User-specific analysis storage
- Error handling and rollback support
#### Main Components:
- `ContentGapAnalysisStorage`: Main class for storage operations
#### Usage Example:
```python
from utils.storage import ContentGapAnalysisStorage
storage = ContentGapAnalysisStorage(db_session)
analysis_id = storage.save_analysis(
user_id=1,
website_url='https://example.com',
industry='technology',
results=analysis_results
)
```
## Integration Points
### 1. Website Analysis Integration
```python
from utils.data_collector import DataCollector
from utils.content_parser import ContentParser
from utils.ai_processor import AIProcessor
# Collect data
collector = DataCollector()
data = collector.collect(url)
# Parse content
parser = ContentParser()
structure = parser.parse_structure(data)
# Process with AI
processor = AIProcessor()
analysis = processor.analyze_content({
'url': url,
'content': structure
})
```
### 2. Storage Integration
```python
from utils.storage import ContentGapAnalysisStorage
# Store analysis results
storage = ContentGapAnalysisStorage(db_session)
analysis_id = storage.save_analysis(
user_id=user_id,
website_url=url,
industry=industry,
results=analysis_results
)
# Retrieve analysis
results = storage.get_analysis(analysis_id)
```
## Error Handling
All modules implement comprehensive error handling:
1. **Data Collection Errors**
- Network timeouts
- Invalid URLs
- Access restrictions
- Parsing errors
2. **Processing Errors**
- Invalid data formats
- AI processing failures
- Resource limitations
- Analysis timeouts
3. **Storage Errors**
- Database connection issues
- Transaction failures
- Data validation errors
- Concurrent access conflicts
## Best Practices
1. **Data Collection**
- Implement rate limiting
- Use proper user agents
- Handle redirects
- Validate input data
2. **Content Processing**
- Clean and normalize data
- Handle encoding issues
- Implement fallback strategies
- Cache processed results
3. **Storage Management**
- Use transactions
- Implement data validation
- Handle concurrent access
- Maintain data integrity
## Future Enhancements
1. **Performance Optimizations**
- Implement parallel processing
- Add caching layer
- Optimize database queries
- Enhance error recovery
2. **Feature Additions**
- Content performance tracking
- Automated content planning
- Enhanced competitive intelligence
- Advanced topic clustering
3. **Integration Improvements**
- API endpoints
- Export capabilities
- Data visualization
- Progress tracking
4. **UI/UX Enhancements**
- Interactive visualizations
- Real-time progress updates
- Export interfaces
- Customization options
## Contributing
When contributing to these utility modules:
1. Follow the existing code structure
2. Add comprehensive error handling
3. Include unit tests
4. Update documentation
5. Follow PEP 8 style guide
## Dependencies
- BeautifulSoup4: HTML parsing
- NLTK: Natural language processing
- SQLAlchemy: Database operations
- Streamlit: UI components
- Requests: HTTP requests
## License
This project is licensed under the MIT License - see the LICENSE file for details.

View File

@@ -1,13 +0,0 @@
"""
Utility modules for content gap analysis.
"""
from .data_collector import DataCollector
from .content_parser import ContentParser
from .ai_processor import AIProcessor
__all__ = [
'DataCollector',
'ContentParser',
'AIProcessor'
]

File diff suppressed because it is too large Load Diff

View File

@@ -1,236 +0,0 @@
"""
Content parser utility for analyzing website content structure.
"""
from typing import Dict, Any, List
import re
from bs4 import BeautifulSoup
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from collections import Counter
class ContentParser:
"""Parser for analyzing website content structure."""
def __init__(self):
"""Initialize the content parser."""
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
try:
nltk.data.find('corpora/stopwords')
except LookupError:
nltk.download('stopwords')
self.stop_words = set(stopwords.words('english'))
def parse_structure(self, content: Dict[str, Any]) -> Dict[str, Any]:
"""
Parse and analyze the structure of website content.
Args:
content: Dictionary containing website content
Returns:
Dictionary containing parsed content structure
"""
try:
# Parse main content
main_content = content.get('main_content', '')
soup = BeautifulSoup(content.get('html', ''), 'html.parser')
# Extract text statistics
text_stats = self._analyze_text(main_content)
# Extract content sections
sections = self._extract_sections(soup)
# Extract topics
topics = self._extract_topics(main_content)
# Analyze readability
readability = self._analyze_readability(main_content)
# Analyze content hierarchy
hierarchy = self._analyze_hierarchy(content.get('headings', []))
return {
'text_statistics': text_stats,
'sections': sections,
'topics': topics,
'readability': readability,
'hierarchy': hierarchy,
'metadata': content.get('metadata', {})
}
except Exception as e:
return {
'error': str(e),
'text_statistics': {},
'sections': [],
'topics': [],
'readability': {},
'hierarchy': {},
'metadata': {}
}
def _analyze_text(self, text: str) -> Dict[str, Any]:
"""Analyze text statistics."""
sentences = sent_tokenize(text)
words = word_tokenize(text.lower())
words = [w for w in words if w.isalnum() and w not in self.stop_words]
return {
'word_count': len(words),
'sentence_count': len(sentences),
'average_sentence_length': len(words) / max(len(sentences), 1),
'unique_words': len(set(words)),
'stop_words': len([w for w in word_tokenize(text.lower()) if w in self.stop_words]),
'characters': len(text),
'paragraphs': len(text.split('\n\n')),
'sentences': sentences
}
def _extract_sections(self, soup: BeautifulSoup) -> List[Dict[str, Any]]:
"""Extract content sections."""
sections = []
# Find main content containers
containers = soup.find_all(['article', 'section', 'div'], class_=re.compile(r'content|main|article|section'))
for container in containers:
# Get section heading
heading = container.find(['h1', 'h2', 'h3'])
heading_text = heading.get_text().strip() if heading else 'Untitled Section'
# Get section content
content = container.get_text().strip()
# Get section type
section_type = container.name
if container.get('class'):
section_type = ' '.join(container.get('class'))
sections.append({
'heading': heading_text,
'content': content,
'type': section_type,
'word_count': len(word_tokenize(content)),
'position': self._get_element_position(container)
})
return sections
def _extract_topics(self, text: str) -> List[Dict[str, Any]]:
"""Extract main topics from content."""
# Tokenize and clean text
words = word_tokenize(text.lower())
words = [w for w in words if w.isalnum() and w not in self.stop_words]
# Get word frequencies
word_freq = Counter(words)
# Get top topics
topics = []
for word, freq in word_freq.most_common(10):
topics.append({
'topic': word,
'frequency': freq,
'percentage': freq / len(words) * 100
})
return topics
def _analyze_readability(self, text: str) -> Dict[str, float]:
"""Analyze text readability."""
sentences = sent_tokenize(text)
words = word_tokenize(text.lower())
words = [w for w in words if w.isalnum()]
# Calculate average sentence length
avg_sentence_length = len(words) / max(len(sentences), 1)
# Calculate average word length
avg_word_length = sum(len(w) for w in words) / max(len(words), 1)
# Calculate Flesch Reading Ease score
# Formula: 206.835 - 1.015(total words/total sentences) - 84.6(total syllables/total words)
syllables = sum(self._count_syllables(w) for w in words)
flesch_score = 206.835 - 1.015 * avg_sentence_length - 84.6 * (syllables / max(len(words), 1))
return {
'flesch_score': max(0, min(100, flesch_score)),
'avg_sentence_length': avg_sentence_length,
'avg_word_length': avg_word_length,
'syllables_per_word': syllables / max(len(words), 1)
}
def _analyze_hierarchy(self, headings: List[Dict[str, Any]]) -> Dict[str, Any]:
"""Analyze content hierarchy."""
# Group headings by level
heading_levels = {}
for heading in headings:
level = heading['level']
if level not in heading_levels:
heading_levels[level] = []
heading_levels[level].append(heading)
# Calculate hierarchy metrics
total_headings = len(headings)
max_depth = max(int(level[1]) for level in heading_levels.keys()) if heading_levels else 0
return {
'total_headings': total_headings,
'max_depth': max_depth,
'heading_distribution': {level: len(headings) for level, headings in heading_levels.items()},
'has_proper_hierarchy': self._check_proper_hierarchy(heading_levels)
}
def _check_proper_hierarchy(self, heading_levels: Dict[str, List[Dict[str, Any]]]) -> bool:
"""Check if headings follow proper hierarchy."""
if not heading_levels:
return False
# Check if h1 exists
if 'h1' not in heading_levels:
return False
# Check if h1 is unique
if len(heading_levels['h1']) > 1:
return False
# Check if levels are sequential
levels = sorted(int(level[1]) for level in heading_levels.keys())
return all(levels[i] - levels[i-1] <= 1 for i in range(1, len(levels)))
def _count_syllables(self, word: str) -> int:
"""Count syllables in a word."""
word = word.lower()
count = 0
vowels = 'aeiouy'
word = word.lower()
if word[0] in vowels:
count += 1
for index in range(1, len(word)):
if word[index] in vowels and word[index - 1] not in vowels:
count += 1
if word.endswith('e'):
count -= 1
if count == 0:
count += 1
return count
def _get_element_position(self, element) -> Dict[str, int]:
"""Get element position in the document."""
try:
return {
'top': element.sourceline,
'left': element.sourcepos
}
except:
return {
'top': 0,
'left': 0
}

View File

@@ -1,112 +0,0 @@
"""
Data collector utility for content gap analysis.
"""
import requests
from bs4 import BeautifulSoup
from typing import Dict, Any
class DataCollector:
"""
Collects and processes website data for analysis.
"""
def __init__(self):
"""Initialize the data collector."""
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
def collect(self, url: str) -> Dict[str, Any]:
"""
Collect website data for analysis.
Args:
url (str): The URL to collect data from
Returns:
dict: Collected website data
"""
try:
# Fetch webpage content
response = requests.get(url, headers=self.headers)
response.raise_for_status()
# Parse HTML content
soup = BeautifulSoup(response.text, 'html.parser')
# Extract relevant data
data = {
'url': url,
'title': self._extract_title(soup),
'meta_description': self._extract_meta_description(soup),
'headings': self._extract_headings(soup),
'content': self._extract_content(soup),
'links': self._extract_links(soup),
'images': self._extract_images(soup)
}
return data
except Exception as e:
return {
'error': str(e),
'url': url
}
def _extract_title(self, soup: BeautifulSoup) -> str:
"""Extract page title."""
title = soup.find('title')
return title.text if title else ''
def _extract_meta_description(self, soup: BeautifulSoup) -> str:
"""Extract meta description."""
meta = soup.find('meta', attrs={'name': 'description'})
return meta.get('content', '') if meta else ''
def _extract_headings(self, soup: BeautifulSoup) -> Dict[str, list]:
"""Extract all headings."""
headings = {}
for i in range(1, 7):
tags = soup.find_all(f'h{i}')
headings[f'h{i}'] = [tag.text.strip() for tag in tags]
return headings
def _extract_content(self, soup: BeautifulSoup) -> str:
"""Extract main content."""
# Remove script and style elements
for script in soup(['script', 'style']):
script.decompose()
# Get text content
text = soup.get_text()
# Clean up text
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = ' '.join(chunk for chunk in chunks if chunk)
return text
def _extract_links(self, soup: BeautifulSoup) -> list:
"""Extract all links."""
links = []
for link in soup.find_all('a'):
href = link.get('href')
if href:
links.append({
'url': href,
'text': link.text.strip()
})
return links
def _extract_images(self, soup: BeautifulSoup) -> list:
"""Extract all images."""
images = []
for img in soup.find_all('img'):
images.append({
'src': img.get('src', ''),
'alt': img.get('alt', ''),
'title': img.get('title', '')
})
return images

View File

@@ -1,237 +0,0 @@
"""
SEO analyzer utility for content gap analysis.
"""
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin
import re
from typing import Dict, Any, List, Optional
from ....utils.website_analyzer.analyzer import WebsiteAnalyzer
def analyze_onpage_seo(url: str) -> Dict[str, Any]:
"""
Analyze on-page SEO elements of a website.
Args:
url: The URL to analyze
Returns:
Dictionary containing SEO analysis results
"""
try:
# Use the combined website analyzer
analyzer = WebsiteAnalyzer()
analysis = analyzer.analyze_website(url)
if not analysis.get('success', False):
return {
'error': analysis.get('error', 'Unknown error in SEO analysis'),
'meta_title': '',
'meta_description': '',
'has_robots_txt': False,
'has_sitemap': False,
'mobile_friendly': False,
'load_time': 0
}
# Extract relevant information from the analysis
seo_info = analysis['data']['analysis']['seo_info']
basic_info = analysis['data']['analysis']['basic_info']
performance = analysis['data']['analysis']['performance']
return {
'meta_tags': seo_info.get('meta_tags', {}),
'content': seo_info.get('content', {}),
'meta_title': basic_info.get('title', ''),
'meta_description': basic_info.get('meta_description', ''),
'has_robots_txt': bool(basic_info.get('robots_txt')),
'has_sitemap': bool(basic_info.get('sitemap')),
'mobile_friendly': True, # This would need to be implemented separately
'load_time': performance.get('load_time', 0)
}
except Exception as e:
return {
'error': str(e),
'meta_title': '',
'meta_description': '',
'has_robots_txt': False,
'has_sitemap': False,
'mobile_friendly': False,
'load_time': 0
}
def _analyze_meta_tags(soup: BeautifulSoup) -> Dict[str, Any]:
"""Analyze meta tags of the webpage."""
meta_tags = {}
# Title tag
title_tag = soup.find('title')
if title_tag:
meta_tags['title'] = title_tag.string.strip()
# Meta description
meta_desc = soup.find('meta', {'name': 'description'})
if meta_desc:
meta_tags['description'] = meta_desc.get('content', '').strip()
# Meta keywords
meta_keywords = soup.find('meta', {'name': 'keywords'})
if meta_keywords:
meta_tags['keywords'] = meta_keywords.get('content', '').strip()
# Open Graph tags
og_tags = {}
for tag in soup.find_all('meta', property=re.compile(r'^og:')):
og_tags[tag['property']] = tag.get('content', '')
meta_tags['og_tags'] = og_tags
# Twitter Card tags
twitter_tags = {}
for tag in soup.find_all('meta', name=re.compile(r'^twitter:')):
twitter_tags[tag['name']] = tag.get('content', '')
meta_tags['twitter_tags'] = twitter_tags
return meta_tags
def _analyze_headings(soup: BeautifulSoup) -> Dict[str, Any]:
"""Analyze heading structure of the webpage."""
headings = {
'h1': [],
'h2': [],
'h3': [],
'h4': [],
'h5': [],
'h6': []
}
for tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
for heading in soup.find_all(tag):
headings[tag].append(heading.get_text().strip())
return headings
def _analyze_content(soup: BeautifulSoup) -> Dict[str, Any]:
"""Analyze main content of the webpage."""
# Find main content
main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile(r'content|main|article'))
if not main_content:
return {
'word_count': 0,
'paragraph_count': 0,
'content': ''
}
# Get text content
content = main_content.get_text()
# Count words and paragraphs
words = content.split()
paragraphs = main_content.find_all('p')
return {
'word_count': len(words),
'paragraph_count': len(paragraphs),
'content': content
}
def _analyze_links(soup: BeautifulSoup, base_url: str) -> Dict[str, Any]:
"""Analyze links on the webpage."""
links = {
'internal': [],
'external': [],
'broken': []
}
base_domain = urlparse(base_url).netloc
for link in soup.find_all('a', href=True):
href = link['href']
# Handle relative URLs
if not href.startswith(('http://', 'https://')):
href = urljoin(base_url, href)
# Categorize link
if urlparse(href).netloc == base_domain:
links['internal'].append({
'url': href,
'text': link.get_text().strip(),
'title': link.get('title', '')
})
else:
links['external'].append({
'url': href,
'text': link.get_text().strip(),
'title': link.get('title', '')
})
return links
def _analyze_images(soup: BeautifulSoup) -> Dict[str, Any]:
"""Analyze images on the webpage."""
images = []
for img in soup.find_all('img'):
image_data = {
'src': img.get('src', ''),
'alt': img.get('alt', ''),
'title': img.get('title', ''),
'width': img.get('width', ''),
'height': img.get('height', ''),
'has_alt': bool(img.get('alt')),
'has_title': bool(img.get('title')),
'has_dimensions': bool(img.get('width') and img.get('height'))
}
images.append(image_data)
return {
'total': len(images),
'with_alt': sum(1 for img in images if img['has_alt']),
'with_title': sum(1 for img in images if img['has_title']),
'with_dimensions': sum(1 for img in images if img['has_dimensions']),
'images': images
}
def _check_technical_elements(soup: BeautifulSoup, url: str) -> Dict[str, Any]:
"""Check technical SEO elements."""
base_url = urlparse(url)
domain = base_url.netloc
# Check robots.txt
robots_url = f"{base_url.scheme}://{domain}/robots.txt"
try:
robots_response = requests.get(robots_url, timeout=5)
has_robots_txt = robots_response.status_code == 200
except:
has_robots_txt = False
# Check sitemap
sitemap_url = f"{base_url.scheme}://{domain}/sitemap.xml"
try:
sitemap_response = requests.get(sitemap_url, timeout=5)
has_sitemap = sitemap_response.status_code == 200
except:
has_sitemap = False
# Check mobile friendliness
viewport = soup.find('meta', {'name': 'viewport'})
has_viewport = bool(viewport)
# Check canonical URL
canonical = soup.find('link', {'rel': 'canonical'})
has_canonical = bool(canonical)
# Check language
html_lang = soup.find('html').get('lang', '')
has_language = bool(html_lang)
return {
'has_robots_txt': has_robots_txt,
'has_sitemap': has_sitemap,
'mobile_friendly': has_viewport,
'has_canonical': has_canonical,
'has_language': has_language,
'language': html_lang
}

View File

@@ -1,270 +0,0 @@
"""
Storage module for content gap analysis results.
"""
from typing import Dict, Any, List, Optional
from datetime import datetime
from sqlalchemy.orm import Session
from sqlalchemy.exc import SQLAlchemyError
import streamlit as st
class ContentGapAnalysisStorage:
"""Handles storage and retrieval of content gap analysis results."""
def __init__(self, db_session: Session):
"""Initialize the storage handler."""
self.db = db_session
def save_analysis(self, user_id: int, website_url: str, industry: str, results: Dict[str, Any]) -> Optional[int]:
"""
Save content gap analysis results.
Args:
user_id: User ID
website_url: Target website URL
industry: Industry category
results: Analysis results dictionary
Returns:
Analysis ID if successful, None otherwise
"""
try:
# Create main analysis record
analysis = ContentGapAnalysis(
user_id=user_id,
website_url=website_url,
industry=industry,
status='completed',
metadata={'version': '1.0'}
)
self.db.add(analysis)
self.db.flush() # Get the ID without committing
# Save website analysis
website_analysis = WebsiteAnalysis(
content_gap_analysis_id=analysis.id,
content_score=results.get('website', {}).get('content_score', 0),
seo_score=results.get('website', {}).get('seo_score', 0),
structure_score=results.get('website', {}).get('structure_score', 0),
content_metrics=results.get('website', {}).get('content_metrics', {}),
seo_metrics=results.get('website', {}).get('seo_metrics', {}),
technical_metrics=results.get('website', {}).get('technical_metrics', {}),
ai_insights=results.get('website', {}).get('ai_insights', {})
)
self.db.add(website_analysis)
# Save competitor analysis if available
if 'competitors' in results:
for competitor in results['competitors']:
competitor_analysis = CompetitorAnalysis(
content_gap_analysis_id=analysis.id,
competitor_url=competitor.get('url'),
market_position=competitor.get('market_position', {}),
content_gaps=competitor.get('content_gaps', []),
competitive_advantages=competitor.get('competitive_advantages', []),
trend_analysis=competitor.get('trend_analysis', {})
)
self.db.add(competitor_analysis)
# Save keyword analysis
keyword_analysis = KeywordAnalysis(
content_gap_analysis_id=analysis.id,
top_keywords=results.get('keywords', {}).get('top_keywords', []),
search_intent=results.get('keywords', {}).get('search_intent', {}),
opportunities=results.get('keywords', {}).get('opportunities', []),
trend_analysis=results.get('keywords', {}).get('trend_analysis', {})
)
self.db.add(keyword_analysis)
# Save recommendations
for recommendation in results.get('recommendations', []):
content_recommendation = ContentRecommendation(
content_gap_analysis_id=analysis.id,
recommendation_type=recommendation.get('type'),
priority_score=recommendation.get('priority_score', 0),
recommendation=recommendation.get('recommendation', ''),
implementation_steps=recommendation.get('implementation_steps', []),
expected_impact=recommendation.get('expected_impact', {}),
status='pending'
)
self.db.add(content_recommendation)
# Save analysis history
history = AnalysisHistory(
content_gap_analysis_id=analysis.id,
status='completed',
metrics={'duration': results.get('duration', 0)}
)
self.db.add(history)
# Commit all changes
self.db.commit()
return analysis.id
except SQLAlchemyError as e:
self.db.rollback()
st.error(f"Error saving analysis results: {str(e)}")
return None
def get_analysis(self, analysis_id: int) -> Optional[Dict[str, Any]]:
"""
Retrieve content gap analysis results.
Args:
analysis_id: Analysis ID
Returns:
Dictionary containing analysis results if found, None otherwise
"""
try:
analysis = self.db.query(ContentGapAnalysis).get(analysis_id)
if not analysis:
return None
# Get website analysis
website_analysis = self.db.query(WebsiteAnalysis).filter_by(
content_gap_analysis_id=analysis_id
).first()
# Get competitor analysis
competitor_analyses = self.db.query(CompetitorAnalysis).filter_by(
content_gap_analysis_id=analysis_id
).all()
# Get keyword analysis
keyword_analysis = self.db.query(KeywordAnalysis).filter_by(
content_gap_analysis_id=analysis_id
).first()
# Get recommendations
recommendations = self.db.query(ContentRecommendation).filter_by(
content_gap_analysis_id=analysis_id
).all()
# Get analysis history
history = self.db.query(AnalysisHistory).filter_by(
content_gap_analysis_id=analysis_id
).order_by(AnalysisHistory.run_date.desc()).all()
return {
'id': analysis.id,
'website_url': analysis.website_url,
'industry': analysis.industry,
'analysis_date': analysis.analysis_date,
'status': analysis.status,
'website': {
'content_score': website_analysis.content_score,
'seo_score': website_analysis.seo_score,
'structure_score': website_analysis.structure_score,
'content_metrics': website_analysis.content_metrics,
'seo_metrics': website_analysis.seo_metrics,
'technical_metrics': website_analysis.technical_metrics,
'ai_insights': website_analysis.ai_insights
} if website_analysis else {},
'competitors': [{
'url': ca.competitor_url,
'market_position': ca.market_position,
'content_gaps': ca.content_gaps,
'competitive_advantages': ca.competitive_advantages,
'trend_analysis': ca.trend_analysis
} for ca in competitor_analyses],
'keywords': {
'top_keywords': keyword_analysis.top_keywords,
'search_intent': keyword_analysis.search_intent,
'opportunities': keyword_analysis.opportunities,
'trend_analysis': keyword_analysis.trend_analysis
} if keyword_analysis else {},
'recommendations': [{
'type': r.recommendation_type,
'priority_score': r.priority_score,
'recommendation': r.recommendation,
'implementation_steps': r.implementation_steps,
'expected_impact': r.expected_impact,
'status': r.status
} for r in recommendations],
'history': [{
'run_date': h.run_date,
'status': h.status,
'metrics': h.metrics,
'error_log': h.error_log
} for h in history]
}
except SQLAlchemyError as e:
st.error(f"Error retrieving analysis results: {str(e)}")
return None
def get_user_analyses(self, user_id: int) -> List[Dict[str, Any]]:
"""
Get all analyses for a user.
Args:
user_id: User ID
Returns:
List of analysis summaries
"""
try:
analyses = self.db.query(ContentGapAnalysis).filter_by(
user_id=user_id
).order_by(ContentGapAnalysis.analysis_date.desc()).all()
return [{
'id': analysis.id,
'website_url': analysis.website_url,
'industry': analysis.industry,
'analysis_date': analysis.analysis_date,
'status': analysis.status
} for analysis in analyses]
except SQLAlchemyError as e:
st.error(f"Error retrieving user analyses: {str(e)}")
return []
def update_recommendation_status(self, recommendation_id: int, status: str) -> bool:
"""
Update the status of a recommendation.
Args:
recommendation_id: Recommendation ID
status: New status
Returns:
True if successful, False otherwise
"""
try:
recommendation = self.db.query(ContentRecommendation).get(recommendation_id)
if recommendation:
recommendation.status = status
recommendation.updated_at = datetime.utcnow()
self.db.commit()
return True
return False
except SQLAlchemyError as e:
self.db.rollback()
st.error(f"Error updating recommendation status: {str(e)}")
return False
def delete_analysis(self, analysis_id: int) -> bool:
"""
Delete an analysis and all related data.
Args:
analysis_id: Analysis ID
Returns:
True if successful, False otherwise
"""
try:
analysis = self.db.query(ContentGapAnalysis).get(analysis_id)
if analysis:
self.db.delete(analysis)
self.db.commit()
return True
return False
except SQLAlchemyError as e:
self.db.rollback()
st.error(f"Error deleting analysis: {str(e)}")
return False

View File

@@ -1,291 +0,0 @@
"""Website analyzer module for content gap analysis."""
import streamlit as st
from loguru import logger
from typing import Dict, Any, List, Optional
import asyncio
import sys
import os
import json
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer as BaseWebsiteAnalyzer
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
# Configure logger
logger.remove() # Remove default handler
logger.add(
"logs/content_gap_website_analyzer.log",
rotation="50 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
# Ensure logs directory exists
os.makedirs("logs", exist_ok=True)
class WebsiteAnalyzer(BaseWebsiteAnalyzer):
"""Extended website analyzer for content gap analysis."""
def __init__(self):
"""Initialize the website analyzer."""
super().__init__()
logger.info("ContentGapWebsiteAnalyzer initialized")
def analyze_content_gaps(self, url: str, competitor_urls: List[str]) -> Dict[str, Any]:
"""
Analyze content gaps between the target website and competitors.
Args:
url: The target URL to analyze
competitor_urls: List of competitor URLs to compare against
Returns:
Dictionary containing content gap analysis results
"""
try:
# Analyze target website
target_analysis = self.analyze_website(url)
if not target_analysis.get('success', False):
return {
'error': target_analysis.get('error', 'Unknown error in target analysis'),
'gaps': [],
'recommendations': []
}
# Analyze competitor websites
competitor_analyses = []
for competitor_url in competitor_urls:
analysis = self.analyze_website(competitor_url)
if analysis.get('success', False):
competitor_analyses.append(analysis['data'])
# Generate content gap analysis using AI
prompt = f"""Analyze content gaps between the target website and competitors:
Target Website:
{json.dumps(target_analysis['data'], indent=2)}
Competitor Websites:
{json.dumps(competitor_analyses, indent=2)}
Identify:
1. Missing content topics
2. Content depth differences
3. Keyword gaps
4. Content structure improvements
5. Content quality recommendations
Format the response as JSON with 'gaps' and 'recommendations' keys."""
# Get AI analysis
analysis = llm_text_gen(
prompt=prompt,
system_prompt="You are an SEO expert specializing in content gap analysis.",
response_format="json_object"
)
if not analysis:
return {
'error': 'Failed to generate content gap analysis',
'gaps': [],
'recommendations': []
}
return {
'gaps': analysis.get('gaps', []),
'recommendations': analysis.get('recommendations', [])
}
except Exception as e:
error_msg = f"Error analyzing content gaps: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
'error': error_msg,
'gaps': [],
'recommendations': []
}
def analyze(self, url: str) -> Dict[str, Any]:
"""
Analyze a website for content gaps and SEO opportunities.
Args:
url: The URL to analyze
Returns:
Dictionary containing analysis results
"""
try:
# Initialize progress tracking
progress = {
'status': 'in_progress',
'current_stage': 'content_analysis',
'current_step': 'Initializing analysis',
'progress': 0,
'details': 'Starting website analysis...'
}
self.progress.update(progress)
# Get base website analysis
logger.info("Starting base website analysis")
website_analysis = self.analyze_website(url)
if not website_analysis.get('success', False):
error_msg = website_analysis.get('error', 'Unknown error in website analysis')
logger.error(f"Error in website analysis: {error_msg}")
progress['status'] = 'error'
progress['details'] = error_msg
self.progress.update(progress)
return {
'error': error_msg,
'error_details': website_analysis.get('error_details', {}),
'progress': progress
}
# Extract SEO metrics from the analysis
seo_metrics = self._extract_seo_metrics(website_analysis['data'])
# Extract performance metrics
performance_metrics = self._extract_performance_metrics(website_analysis['data'])
# Update progress
progress['status'] = 'completed'
progress['progress'] = 100
progress['details'] = 'Analysis completed successfully'
self.progress.update(progress)
return {
'success': True,
'data': {
'seo_metrics': seo_metrics,
'performance_metrics': performance_metrics,
'website_analysis': website_analysis['data']
},
'progress': progress
}
except Exception as e:
error_msg = f"Error in content gap analysis: {str(e)}"
logger.error(error_msg, exc_info=True)
progress['status'] = 'error'
progress['details'] = error_msg
self.progress.update(progress)
return {
'error': error_msg,
'error_details': {
'type': type(e).__name__,
'traceback': str(e.__traceback__)
},
'progress': progress
}
def _extract_seo_metrics(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""Extract SEO-related metrics from website analysis."""
try:
seo_info = website_analysis.get('analysis', {}).get('seo_info', {})
return {
'overall_score': seo_info.get('overall_score', 0),
'meta_tags': {
'title': seo_info.get('meta_tags', {}).get('title', {}),
'description': seo_info.get('meta_tags', {}).get('description', {}),
'keywords': seo_info.get('meta_tags', {}).get('keywords', {})
},
'content': {
'word_count': seo_info.get('content', {}).get('word_count', 0),
'readability_score': seo_info.get('content', {}).get('readability_score', 0),
'content_quality_score': seo_info.get('content', {}).get('content_quality_score', 0)
}
}
except Exception as e:
logger.error(f"Error extracting SEO metrics: {str(e)}", exc_info=True)
return {}
def _extract_performance_metrics(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""Extract performance metrics from website analysis."""
try:
performance_info = website_analysis.get('analysis', {}).get('performance', {})
return {
'load_time': performance_info.get('load_time', 0),
'page_size': performance_info.get('page_size', 0),
'resource_count': performance_info.get('resource_count', 0),
'performance_score': performance_info.get('performance_score', 0)
}
except Exception as e:
logger.error(f"Error extracting performance metrics: {str(e)}", exc_info=True)
return {}
def _extract_content_metrics(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""Extract content-related metrics from website analysis."""
try:
content_info = website_analysis['analysis']['content_info']
return {
'word_count': content_info.get('word_count', 0),
'heading_count': content_info.get('heading_count', 0),
'image_count': content_info.get('image_count', 0),
'link_count': content_info.get('link_count', 0),
'has_meta_description': content_info.get('has_meta_description', False),
'has_robots_txt': content_info.get('has_robots_txt', False),
'has_sitemap': content_info.get('has_sitemap', False)
}
except Exception as e:
logger.error(f"Error extracting content metrics: {str(e)}", exc_info=True)
return {}
def _extract_technical_info(self, website_analysis: Dict[str, Any]) -> Dict[str, Any]:
"""Extract technical information from website analysis."""
try:
basic_info = website_analysis.get('analysis', {}).get('basic_info', {})
return {
'title': basic_info.get('title', ''),
'meta_description': basic_info.get('meta_description', ''),
'headers': basic_info.get('headers', {}),
'robots_txt': basic_info.get('robots_txt', ''),
'sitemap': basic_info.get('sitemap', ''),
'server_info': basic_info.get('server_info', {}),
'security_info': basic_info.get('security_info', {})
}
except Exception as e:
logger.error(f"Error extracting technical info: {str(e)}", exc_info=True)
return {}
def _generate_insights(self, content_metrics: Dict[str, Any], seo_metrics: Dict[str, Any]) -> List[str]:
"""Generate content insights based on analysis results."""
try:
insights = []
# Content insights
if content_metrics['word_count'] < 300:
insights.append("Content length is below recommended minimum (300 words)")
elif content_metrics['word_count'] > 2000:
insights.append("Content length is above recommended maximum (2000 words)")
if content_metrics['heading_count'] < 2:
insights.append("Content structure could be improved with more headings")
if content_metrics['image_count'] == 0:
insights.append("Consider adding images to improve content engagement")
# SEO insights
if seo_metrics.get('overall_score', 0) < 60:
insights.append("SEO optimization needs significant improvement")
elif seo_metrics.get('overall_score', 0) < 80:
insights.append("SEO optimization has room for improvement")
if not content_metrics['has_meta_description']:
insights.append("Missing meta description - important for SEO")
if not content_metrics['has_robots_txt']:
insights.append("Missing robots.txt - important for search engine crawling")
if not content_metrics['has_sitemap']:
insights.append("Missing sitemap.xml - important for search engine indexing")
return insights
except Exception as e:
logger.error(f"Error generating insights: {str(e)}", exc_info=True)
return []

View File

@@ -1,160 +0,0 @@
"""Content title generator module."""
import os
import json
import streamlit as st
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
)
from loguru import logger
from typing import Dict, Any, List, Optional
import asyncio
import sys
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
from lib.utils.website_analyzer.analyzer import WebsiteAnalyzer
# Configure logger
logger.remove() # Remove default handler
logger.add(
"logs/content_title_generator.log",
rotation="50 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
# Ensure logs directory exists
os.makedirs("logs", exist_ok=True)
def ai_title_generator(url: str) -> Dict[str, Any]:
"""
Generate SEO-optimized titles using AI.
Args:
url: The URL to analyze
Returns:
Dictionary containing title suggestions and analysis
"""
try:
# Initialize analyzer
analyzer = WebsiteAnalyzer()
# Analyze website
analysis = analyzer.analyze_website(url)
if not analysis.get('success', False):
return {
'error': analysis.get('error', 'Unknown error in analysis'),
'patterns': {},
'suggestions': []
}
# Extract content and meta information
content_info = analysis['data']['analysis']['content_info']
seo_info = analysis['data']['analysis']['seo_info']
# Generate title suggestions using AI
prompt = f"""Based on the following website content and SEO analysis, generate 5 SEO-optimized title suggestions:
Content Analysis:
- Word Count: {content_info.get('word_count', 0)}
- Heading Structure: {content_info.get('heading_structure', {})}
SEO Analysis:
- Meta Title: {seo_info.get('meta_tags', {}).get('title', {}).get('value', '')}
- Meta Description: {seo_info.get('meta_tags', {}).get('description', {}).get('value', '')}
- Keywords: {seo_info.get('meta_tags', {}).get('keywords', {}).get('value', '')}
Generate 5 title suggestions that are:
1. SEO-optimized
2. Engaging and click-worthy
3. Between 50-60 characters
4. Include relevant keywords
5. Follow best practices for title optimization
Format the response as JSON with 'suggestions' and 'patterns' keys."""
# Get AI suggestions
suggestions = llm_text_gen(
prompt=prompt,
system_prompt="You are an SEO expert specializing in title optimization.",
response_format="json_object"
)
if not suggestions:
return {
'error': 'Failed to generate title suggestions',
'patterns': {},
'suggestions': []
}
return {
'patterns': suggestions.get('patterns', {}),
'suggestions': suggestions.get('suggestions', [])
}
except Exception as e:
error_msg = f"Error generating title suggestions: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
'error': error_msg,
'patterns': {},
'suggestions': []
}
@retry(stop=stop_after_attempt(3), wait=wait_random_exponential(min=1, max=4))
def generate_blog_titles(input_blog_keywords, input_blog_content, input_title_type, input_title_intent, input_language):
""" Generate SEO optimized blog titles using AI """
if input_blog_content and input_blog_keywords:
prompt = f"""As a SEO expert, I will provide you with main 'blog keywords' and 'blog content'.
Your task is to write 5 SEO optimized blog titles from the given blog keywords and content.
Follow the below guidelines for generating the blog titles:
1. Follow all best practices for SEO optimized blog titles.
2. Optimize your response around the given keywords and content.
3. Optimize your response for web search intent {input_title_intent}.
4. Optimize your response for blog type {input_title_type}.
5. The blog titles should be in {input_language} language.
Blog keywords: '{input_blog_keywords}'
Blog content: '{input_blog_content}'
"""
elif input_blog_keywords and not input_blog_content:
prompt = f"""As a SEO expert, I will provide you with the main 'keywords' of a blog.
Your task is to write 5 SEO optimized blog titles from the given blog keywords.
Follow the below guidelines for generating the blog titles:
1. Follow all best practices for SEO optimized blog titles.
2. Optimize your response around the given keywords.
3. Optimize your response for web search intent {input_title_intent}.
4. Optimize your response for blog type {input_title_type}.
5. The blog titles should be in {input_language} language.
Blog keywords: '{input_blog_keywords}'
"""
elif input_blog_content and not input_blog_keywords:
prompt = f"""As a SEO expert, I will provide you with the 'blog content'.
Your task is to write 5 SEO optimized blog titles from the given blog content.
Follow the below guidelines for generating the blog titles:
1. Follow all best practices for SEO optimized blog titles.
2. Optimize your response around the given content.
3. Optimize your response for web search intent {input_title_intent}.
4. Optimize your response for blog type {input_title_type}.
5. The blog titles should be in {input_language} language.
Blog content: '{input_blog_content}'
"""
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
st.error(f"Exit: Failed to get response from LLM: {err}")

View File

@@ -1,115 +0,0 @@
from typing import List, Dict, Union
#from nltk import tokenize, stem, pos_tag
from textblob import TextBlob
import enchant
class TextPreprocessor:
def preprocess_text(self, text: str) -> str:
# Tokenize the text
tokens = tokenize.word_tokenize(text)
# Stem the tokens
stemmer = stem.PorterStemmer()
stemmed_tokens = [stemmer.stem(token) for token in tokens]
# Join the stemmed tokens back into a string
preprocessed_text = ' '.join(stemmed_tokens)
return preprocessed_text
class SEOAnalyzer:
def calculate_seo_percentage(self, text: str, keywords: List[str]) -> float:
# Calculate the keyword density
keyword_density = self.calculate_keyword_density(text, keywords)
# Calculate the readability score
readability_score = self.calculate_readability_score(text)
# Perform semantic analysis
semantic_score = self.perform_semantic_analysis(text)
# Calculate the SEO percentage based on the metrics
seo_percentage = (keyword_density + readability_score + semantic_score) / 3
return seo_percentage
def calculate_keyword_density(self, text: str, keywords: List[str]) -> float:
# Count the number of occurrences of each keyword in the text
keyword_counts = {keyword: text.lower().count(keyword.lower()) for keyword in keywords}
# Calculate the total number of words in the text
word_count = len(tokenize.word_tokenize(text))
# Calculate the keyword density
keyword_density = sum(keyword_counts.values()) / word_count
return keyword_density
def calculate_readability_score(self, text: str) -> float:
# Calculate the average number of words per sentence
sentences = tokenize.sent_tokenize(text)
word_count = sum(len(tokenize.word_tokenize(sentence)) for sentence in sentences)
sentence_count = len(sentences)
average_words_per_sentence = word_count / sentence_count
# Calculate the readability score
readability_score = 1 / average_words_per_sentence
return readability_score
def perform_semantic_analysis(self, text: str) -> float:
# Perform part-of-speech tagging on the text
tagged_text = pos_tag(tokenize.word_tokenize(text))
# Calculate the semantic score based on the number of nouns and verbs
noun_count = sum(1 for word, pos in tagged_text if pos.startswith('N'))
verb_count = sum(1 for word, pos in tagged_text if pos.startswith('V'))
semantic_score = (noun_count + verb_count) / len(tagged_text)
return semantic_score
class SpellChecker:
def check_spelling(self, text: str) -> List[str]:
# Create a spellchecker object
spellchecker = enchant.Dict("en_US")
# Tokenize the text
tokens = tokenize.word_tokenize(text)
# Check the spelling of each token
misspelled_words = [token for token in tokens if not spellchecker.check(token)]
return misspelled_words
class SEOAnalysisModule:
def __init__(self):
self.text_preprocessor = TextPreprocessor()
self.seo_analyzer = SEOAnalyzer()
self.spell_checker = SpellChecker()
def analyze_text(self, text: str, keywords: List[str]) -> Dict[str, Union[float, List[str]]]:
# Preprocess the text
preprocessed_text = self.text_preprocessor.preprocess_text(text)
# Calculate the SEO percentage
seo_percentage = self.seo_analyzer.calculate_seo_percentage(preprocessed_text, keywords)
# Calculate the keyword density
keyword_density = self.seo_analyzer.calculate_keyword_density(preprocessed_text, keywords)
# Calculate the readability score
readability_score = self.seo_analyzer.calculate_readability_score(preprocessed_text)
# Perform semantic analysis
semantic_score = self.seo_analyzer.perform_semantic_analysis(preprocessed_text)
# Check the spelling
spelling_errors = self.spell_checker.check_spelling(preprocessed_text)
return {
'seo_percentage': seo_percentage,
'keyword_density': keyword_density,
'readability_score': readability_score,
'semantic_score': semantic_score,
'spelling_errors': spelling_errors
}

View File

@@ -0,0 +1,340 @@
"""
FastAPI endpoint for the Comprehensive SEO Analyzer
Provides data for the React SEO Dashboard
"""
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, HttpUrl
from typing import List, Optional, Dict, Any
from datetime import datetime
import json
from .comprehensive_seo_analyzer import ComprehensiveSEOAnalyzer, SEOAnalysisResult
app = FastAPI(
title="Comprehensive SEO Analyzer API",
description="API for analyzing website SEO performance with actionable insights",
version="1.0.0"
)
# Initialize the analyzer
seo_analyzer = ComprehensiveSEOAnalyzer()
class SEOAnalysisRequest(BaseModel):
url: HttpUrl
target_keywords: Optional[List[str]] = None
class SEOAnalysisResponse(BaseModel):
url: str
timestamp: datetime
overall_score: int
health_status: str
critical_issues: List[str]
warnings: List[str]
recommendations: List[str]
data: Dict[str, Any]
success: bool
message: str
@app.post("/analyze-seo", response_model=SEOAnalysisResponse)
async def analyze_seo(request: SEOAnalysisRequest):
"""
Analyze a URL for comprehensive SEO performance
Args:
request: SEOAnalysisRequest containing URL and optional target keywords
Returns:
SEOAnalysisResponse with detailed analysis results
"""
try:
# Convert URL to string
url_str = str(request.url)
# Perform analysis
result = seo_analyzer.analyze_url(url_str, request.target_keywords)
# Convert to response format
response_data = {
'url': result.url,
'timestamp': result.timestamp,
'overall_score': result.overall_score,
'health_status': result.health_status,
'critical_issues': result.critical_issues,
'warnings': result.warnings,
'recommendations': result.recommendations,
'data': result.data,
'success': True,
'message': f"SEO analysis completed successfully for {result.url}"
}
return SEOAnalysisResponse(**response_data)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Error analyzing SEO: {str(e)}"
)
@app.get("/health")
async def health_check():
"""Health check endpoint"""
return {
"status": "healthy",
"timestamp": datetime.now(),
"service": "Comprehensive SEO Analyzer API"
}
@app.get("/analysis-summary/{url:path}")
async def get_analysis_summary(url: str):
"""
Get a quick summary of SEO analysis for a URL
Args:
url: The URL to analyze
Returns:
Summary of SEO analysis
"""
try:
# Ensure URL has protocol
if not url.startswith(('http://', 'https://')):
url = f"https://{url}"
# Perform analysis
result = seo_analyzer.analyze_url(url)
# Create summary
summary = {
"url": result.url,
"overall_score": result.overall_score,
"health_status": result.health_status,
"critical_issues_count": len(result.critical_issues),
"warnings_count": len(result.warnings),
"recommendations_count": len(result.recommendations),
"top_issues": result.critical_issues[:3],
"top_recommendations": result.recommendations[:3],
"analysis_timestamp": result.timestamp.isoformat()
}
return summary
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Error getting analysis summary: {str(e)}"
)
@app.get("/seo-metrics/{url:path}")
async def get_seo_metrics(url: str):
"""
Get detailed SEO metrics for dashboard display
Args:
url: The URL to analyze
Returns:
Detailed SEO metrics for React dashboard
"""
try:
# Ensure URL has protocol
if not url.startswith(('http://', 'https://')):
url = f"https://{url}"
# Perform analysis
result = seo_analyzer.analyze_url(url)
# Extract metrics for dashboard
metrics = {
"overall_score": result.overall_score,
"health_status": result.health_status,
"url_structure_score": result.data.get('url_structure', {}).get('score', 0),
"meta_data_score": result.data.get('meta_data', {}).get('score', 0),
"content_score": result.data.get('content_analysis', {}).get('score', 0),
"technical_score": result.data.get('technical_seo', {}).get('score', 0),
"performance_score": result.data.get('performance', {}).get('score', 0),
"accessibility_score": result.data.get('accessibility', {}).get('score', 0),
"user_experience_score": result.data.get('user_experience', {}).get('score', 0),
"security_score": result.data.get('security_headers', {}).get('score', 0)
}
# Add detailed data for each category
dashboard_data = {
"metrics": metrics,
"critical_issues": result.critical_issues,
"warnings": result.warnings,
"recommendations": result.recommendations,
"detailed_analysis": {
"url_structure": result.data.get('url_structure', {}),
"meta_data": result.data.get('meta_data', {}),
"content_analysis": result.data.get('content_analysis', {}),
"technical_seo": result.data.get('technical_seo', {}),
"performance": result.data.get('performance', {}),
"accessibility": result.data.get('accessibility', {}),
"user_experience": result.data.get('user_experience', {}),
"security_headers": result.data.get('security_headers', {}),
"keyword_analysis": result.data.get('keyword_analysis', {})
},
"timestamp": result.timestamp.isoformat(),
"url": result.url
}
return dashboard_data
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Error getting SEO metrics: {str(e)}"
)
@app.post("/batch-analyze")
async def batch_analyze(urls: List[str]):
"""
Analyze multiple URLs in batch
Args:
urls: List of URLs to analyze
Returns:
Batch analysis results
"""
try:
results = []
for url in urls:
try:
# Ensure URL has protocol
if not url.startswith(('http://', 'https://')):
url = f"https://{url}"
# Perform analysis
result = seo_analyzer.analyze_url(url)
# Add to results
results.append({
"url": result.url,
"overall_score": result.overall_score,
"health_status": result.health_status,
"critical_issues_count": len(result.critical_issues),
"warnings_count": len(result.warnings),
"success": True
})
except Exception as e:
# Add error result
results.append({
"url": url,
"overall_score": 0,
"health_status": "error",
"critical_issues_count": 0,
"warnings_count": 0,
"success": False,
"error": str(e)
})
return {
"total_urls": len(urls),
"successful_analyses": len([r for r in results if r['success']]),
"failed_analyses": len([r for r in results if not r['success']]),
"results": results
}
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Error in batch analysis: {str(e)}"
)
# Enhanced prompts for better results
ENHANCED_PROMPTS = {
"critical_issue": "🚨 CRITICAL: This issue is severely impacting your SEO performance and must be fixed immediately.",
"warning": "⚠️ WARNING: This could be improved to boost your search rankings.",
"recommendation": "💡 RECOMMENDATION: Implement this to improve your SEO score.",
"excellent": "🎉 EXCELLENT: Your SEO is performing very well in this area!",
"good": "✅ GOOD: Your SEO is performing well, with room for minor improvements.",
"needs_improvement": "🔧 NEEDS IMPROVEMENT: Several areas need attention to boost your SEO.",
"poor": "❌ POOR: Significant improvements needed across multiple areas."
}
def enhance_analysis_result(result: SEOAnalysisResult) -> SEOAnalysisResult:
"""
Enhance analysis results with better prompts and user-friendly language
"""
# Enhance critical issues
enhanced_critical_issues = []
for issue in result.critical_issues:
enhanced_issue = f"{ENHANCED_PROMPTS['critical_issue']} {issue}"
enhanced_critical_issues.append(enhanced_issue)
# Enhance warnings
enhanced_warnings = []
for warning in result.warnings:
enhanced_warning = f"{ENHANCED_PROMPTS['warning']} {warning}"
enhanced_warnings.append(enhanced_warning)
# Enhance recommendations
enhanced_recommendations = []
for rec in result.recommendations:
enhanced_rec = f"{ENHANCED_PROMPTS['recommendation']} {rec}"
enhanced_recommendations.append(enhanced_rec)
# Create enhanced result
enhanced_result = SEOAnalysisResult(
url=result.url,
timestamp=result.timestamp,
overall_score=result.overall_score,
health_status=result.health_status,
critical_issues=enhanced_critical_issues,
warnings=enhanced_warnings,
recommendations=enhanced_recommendations,
data=result.data
)
return enhanced_result
@app.post("/analyze-seo-enhanced", response_model=SEOAnalysisResponse)
async def analyze_seo_enhanced(request: SEOAnalysisRequest):
"""
Analyze a URL with enhanced, user-friendly prompts
Args:
request: SEOAnalysisRequest containing URL and optional target keywords
Returns:
SEOAnalysisResponse with enhanced, user-friendly analysis results
"""
try:
# Convert URL to string
url_str = str(request.url)
# Perform analysis
result = seo_analyzer.analyze_url(url_str, request.target_keywords)
# Enhance results
enhanced_result = enhance_analysis_result(result)
# Convert to response format
response_data = {
'url': enhanced_result.url,
'timestamp': enhanced_result.timestamp,
'overall_score': enhanced_result.overall_score,
'health_status': enhanced_result.health_status,
'critical_issues': enhanced_result.critical_issues,
'warnings': enhanced_result.warnings,
'recommendations': enhanced_result.recommendations,
'data': enhanced_result.data,
'success': True,
'message': f"Enhanced SEO analysis completed successfully for {enhanced_result.url}"
}
return SEOAnalysisResponse(**response_data)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Error analyzing SEO: {str(e)}"
)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)

View File

@@ -1,98 +0,0 @@
import streamlit as st
import openai
import os
from bs4 import BeautifulSoup
import requests
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
def scrape_url_content(url):
"""
Scrapes the content from the provided URL.
Args:
url (str): The URL to scrape content from.
Returns:
str: The extracted text content from the webpage.
"""
# FIXME: Use firecrawl metadata option for this.
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
text = ' '.join([p.text for p in soup.find_all('p')])
return text
except requests.exceptions.RequestException as e:
st.error(f"Error fetching the URL content: {e}")
return ""
except Exception as e:
st.error(f"Error parsing the HTML content: {e}")
return ""
def generate_twitter_tags(topic, scraped_content=""):
"""
Generates a list of relevant Twitter hashtags based on the topic and optional scraped content.
Args:
topic (str): The main topic or key phrase.
scraped_content (str): Optional scraped content to add more context.
Returns:
str: A list of Twitter hashtags as a string.
"""
prompt = f"Generate a list of highly relevant and trending Twitter hashtags based on the topic '{topic}'"
if scraped_content:
prompt += f" and the following content: {scraped_content[:700]}..." # Limit content to keep prompt manageable.
prompt += " Make sure the hashtags are popular and relevant to the topic. Follow Latest best practices for twitter tags."
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
st.error(f"Failed to generate Open Graph tags: {err}")
return None
def display_app():
"""
Displays the Streamlit app UI and handles user interactions.
"""
st.title("AI Twitter Tag Generator")
st.write(
"Generate trending and highly relevant Twitter tags with minimal input. "
"Optionally, provide a URL to make the tags even more targeted."
)
# User Inputs
topic = st.text_input(
"Enter the topic or key phrase for Twitter tags",
placeholder="e.g., AI in marketing"
)
url = st.text_input(
"Optional: Enter a URL to scrape for more targeted tags",
placeholder="e.g., https://example.com/article"
)
if topic:
if url:
with st.spinner("Scraping content from the provided URL..."):
scraped_content = scrape_url_content(url)
if not scraped_content:
st.info("No content could be extracted from the provided URL.")
else:
scraped_content = ""
if st.button("Generate Twitter Tags"):
with st.spinner("Generating Twitter tags..."):
tags = generate_twitter_tags(topic, scraped_content)
if tags:
st.success("Twitter tags generated successfully!")
st.write(tags)
else:
st.info("Please enter a topic or key phrase to generate Twitter tags.")

View File

@@ -1,116 +0,0 @@
"""Webpage content analysis tool."""
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.util import ngrams
from langchain.llms import OpenAI
from langchain.chains import ConversationChain
from urllib.parse import urlparse
st.title("🧠 Web Content Analyzer: Uncover Hidden Insights with AI! 🧠")
st.write("""
Welcome! This tool leverages the power of AI to analyze your web page's content. It goes beyond just keywords -
we'll use cutting-edge technology to uncover valuable insights and unlock new ways to boost your website!
""")
# --- User Input ---
url_input = st.text_input("Paste your URL here:", "https://www.example.com/")
language_input = st.selectbox("What language is your content?", ('English', 'Italian', 'Albanian'))
st.write(" ")
# --- AI Model Setup ---
llm = OpenAI(temperature=0.7)
conversation_chain = ConversationChain(llm=llm)
# --- Analyze Button & Processing ---
if st.button("Analyze with AI!"):
with st.spinner('Analyzing your content...'):
url = url_input.strip()
language = language_input.lower()
if not url.startswith("http"):
st.error("Oops! Looks like you forgot 'http://' or 'https://' at the beginning of your URL. Please add it and try again! 😊")
st.stop()
try:
# Validate URL
parsed_url = urlparse(url)
if not parsed_url.scheme:
url = "https://" + url
# Fetch webpage content
response = requests.get(url)
response.raise_for_status()
# Parse HTML
soup = BeautifulSoup(response.text, 'html.parser')
# Extract content
title = soup.title.string if soup.title else "No title found"
meta_description = soup.find('meta', {'name': 'description'})
description = meta_description['content'] if meta_description else "No description found"
# Display results
st.subheader("Page Analysis")
st.metric("Title", title)
st.metric("Description", description)
# Content statistics
text_content = soup.get_text()
words = text_content.split()
st.metric("Word Count", len(words))
st.metric("Unique Words", len(set(words)))
# Frequency analysis (same as before)
freq = nltk.FreqDist(words)
keywords = freq.most_common(10)
df_keywords = pd.DataFrame(keywords, columns=("Keyword", "Frequency"))
# --- AI-Powered Insights ---
st.subheader("AI Insights:")
st.write(" ")
st.markdown("**Main Theme:**")
ai_theme = conversation_chain.run(f"What is the main theme or topic of this content? \n {text_content}")
st.markdown(f" {ai_theme}")
st.write(" ")
st.markdown("**Suggested Keywords:**")
ai_keywords = conversation_chain.run(f"What other relevant keywords might be helpful to target for this content? \n {text_content}")
st.markdown(f" {ai_keywords}")
st.write(" ")
st.markdown("**Content Improvement:**")
ai_improvement = conversation_chain.run(f"What could be done to improve this content for clarity, engagement, or SEO? \n {text_content}")
st.markdown(f" {ai_improvement}")
# --- Display Frequency Results ---
st.write(" ")
st.subheader("Top Keywords:")
st.write(" ")
st.dataframe(df_keywords)
st.subheader("What's the Value of This AI Analysis?")
st.write(" ")
st.markdown("""
* **Uncover Hidden Insights:** AI can analyze your content in much more nuanced ways, helping you spot connections and trends you might have missed.
* **Go Beyond Keywords:** AI can provide in-depth insights into your content's main themes, tone, and even suggest relevant topics to explore further.
* **AI as a Partner:** Think of this AI as your content strategist, offering guidance and actionable steps to make your content even better.
Ready to leverage the power of AI to optimize your content? Start putting the suggestions and insights you just received into practice. See what difference AI can make in your writing! 🚀
""")
except requests.exceptions.RequestException as e:
st.error(f"Oops! Something went wrong fetching the URL. Error: {e}")
except Exception as e:
st.error(f"An error occurred: {e}")

View File

@@ -1,377 +0,0 @@
import streamlit as st
import advertools as adv
import pandas as pd
from urllib.parse import urlparse
import requests
from datetime import datetime
import tempfile
import os
# Title and introduction
def show_title_and_intro():
st.title("🌟 URL SEO Checkup: Your Link's Health Report 🌟")
st.write("""
Welcome to the URL SEO Checkup! This tool is like a doctor for your website links.
Just paste your URL, and we'll check if it's healthy and ready to climb the search engine ladder.
""")
# Basic HTTPS Check
def check_https(url):
st.subheader("The Basics - Are We Looking Good?")
st.write("---")
if url.startswith("https://"):
st.success("✨ You're using HTTPS! This adds extra security, and Google rewards that with better rankings. Keep it up! ✨")
else:
st.warning("🚧 Heads Up! Your URL doesn't use 'https://'. This is a red flag for Google.")
st.info("🔧 **How to fix:** Contact your hosting provider or website developer to install an SSL certificate. This will secure your site with HTTPS.")
# URL Length Check
def check_url_length(path):
st.subheader("The Length Test - Keep it Short and Sweet!")
st.write("---")
if len(path) <= 50:
st.success("🏆 Great! Your URL is short and user-friendly. Google loves short URLs! 🏆")
else:
st.warning("🧭 Tip: Try shortening your URL. Shorter URLs are easier to remember and better for SEO.")
st.info("🔧 **How to fix:** Consider removing unnecessary words or folders in the URL. Aim for concise, descriptive URLs that are easy for users to read.")
# Hyphen Check
def check_hyphens(path):
st.subheader("The Hyphen Check - Use Hyphens for Clear Separation!")
st.write("---")
if "-" in path:
st.success("😎 You're on the right track! Using hyphens makes your URL more readable for both users and Google. 😎")
else:
st.warning("❓ Did you know? Using hyphens between words (like 'shoes-for-sale') helps Google understand your URL better!")
st.info("🔧 **How to fix:** Update your URL to use hyphens (-) instead of spaces or underscores (_). For example, 'shoes-for-sale' instead of 'shoes_for_sale'.")
# File Extension Check
def check_file_extension(path):
st.subheader("File Extension Check - Showing Your Files With Pride!")
st.write("---")
if "." in path:
st.success("🥳 File Extension Check: Your URL includes a file extension like '.html', which helps Google categorize your page. Nice job! 🥳")
else:
st.warning("🤔 Your URL seems to be missing a file extension like '.html' or '.php'.")
st.info("🔧 **How to fix:** While file extensions are not always required, adding them to static pages (like .html or .php) can improve clarity for search engines.")
# Keyword Insights
def show_keyword_insights(netloc, path):
st.subheader("Bonus Insight - Let's Talk Keywords")
st.write("---")
st.info("Keywords are the words people use to search for information online. Your goal is to help Google understand what your page is about by using the right keywords in your URL!")
st.markdown(f"""
**Your Domain:** {netloc}
**Your URL Path:** {path}
**Suggestion:** Consider adding a primary keyword to your URL if it aligns with your page content. But don't overdo it too many keywords can hurt your SEO. Keep it natural!
""")
# Enhanced HTTP Headers Analysis using advertools
def analyze_http_headers(url):
"""Analyze HTTP headers using advertools for comprehensive SEO insights."""
st.subheader("🔍 Advanced HTTP Headers Analysis")
st.write("---")
try:
with st.spinner("Analyzing HTTP headers..."):
# Create a temporary file for output
with tempfile.NamedTemporaryFile(mode='w', suffix='.jl', delete=False) as tmp_file:
temp_filename = tmp_file.name
# Use advertools to crawl headers
adv.crawl_headers([url], temp_filename)
# Read the results
headers_df = pd.read_json(temp_filename, lines=True)
# Clean up temp file
os.unlink(temp_filename)
if not headers_df.empty:
# Display key SEO-relevant headers
st.success("✅ Successfully analyzed HTTP headers!")
# Create tabs for different header categories
tab1, tab2, tab3, tab4 = st.tabs(["🔒 Security", "📈 SEO Headers", "⚡ Performance", "📊 Technical Details"])
with tab1:
st.write("### Security Headers Analysis")
security_headers = {
'resp_headers_X-Frame-Options': 'X-Frame-Options',
'resp_headers_X-Content-Type-Options': 'X-Content-Type-Options',
'resp_headers_X-XSS-Protection': 'X-XSS-Protection',
'resp_headers_Strict-Transport-Security': 'Strict-Transport-Security',
'resp_headers_Content-Security-Policy': 'Content-Security-Policy',
'resp_headers_Referrer-Policy': 'Referrer-Policy'
}
for header_key, header_name in security_headers.items():
if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
st.success(f"✅ **{header_name}**: Present")
with st.expander(f"View {header_name} Details"):
st.code(headers_df[header_key].iloc[0])
else:
st.warning(f"⚠️ **{header_name}**: Missing")
st.info(f"💡 **Recommendation**: Add {header_name} header for better security")
with tab2:
st.write("### SEO-Related Headers")
seo_headers = {
'resp_headers_Content-Type': 'Content-Type',
'resp_headers_Content-Language': 'Content-Language',
'resp_headers_Cache-Control': 'Cache-Control',
'resp_headers_Expires': 'Expires',
'resp_headers_Last-Modified': 'Last-Modified',
'resp_headers_ETag': 'ETag'
}
for header_key, header_name in seo_headers.items():
if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
st.success(f"✅ **{header_name}**: {headers_df[header_key].iloc[0]}")
else:
st.info(f" **{header_name}**: Not set or not detected")
# Special handling for content-type
if 'resp_headers_Content-Type' in headers_df.columns:
content_type = headers_df['resp_headers_Content-Type'].iloc[0]
if 'text/html' in str(content_type):
st.success("🎯 **Content-Type**: Properly set for HTML content")
if 'charset=utf-8' in str(content_type):
st.success("🌍 **Character Encoding**: UTF-8 detected - Great for international SEO!")
with tab3:
st.write("### Performance Headers")
perf_headers = {
'resp_headers_Server': 'Server',
'resp_headers_X-Powered-By': 'X-Powered-By',
'resp_headers_Connection': 'Connection',
'resp_headers_Transfer-Encoding': 'Transfer-Encoding',
'resp_headers_Content-Encoding': 'Content-Encoding',
'resp_headers_Content-Length': 'Content-Length'
}
for header_key, header_name in perf_headers.items():
if header_key in headers_df.columns and not pd.isna(headers_df[header_key].iloc[0]):
st.info(f"📊 **{header_name}**: {headers_df[header_key].iloc[0]}")
# Check for compression
if 'resp_headers_Content-Encoding' in headers_df.columns:
encoding = headers_df['resp_headers_Content-Encoding'].iloc[0]
if 'gzip' in str(encoding) or 'br' in str(encoding):
st.success("🚀 **Compression**: Enabled - Great for page speed!")
else:
st.warning("⚠️ **Compression**: Consider enabling GZIP or Brotli compression")
else:
st.warning("⚠️ **Compression**: Not detected - Consider enabling compression")
# Check status code
if 'status' in headers_df.columns:
status = headers_df['status'].iloc[0]
if status == 200:
st.success(f"✅ **HTTP Status**: {status} OK")
else:
st.warning(f"⚠️ **HTTP Status**: {status}")
with tab4:
st.write("### Complete Headers Analysis")
# Show response headers only (more relevant for SEO)
response_headers = {col: col.replace('resp_headers_', '') for col in headers_df.columns if col.startswith('resp_headers_')}
if response_headers:
st.write("**Response Headers:**")
for col, display_name in response_headers.items():
if not pd.isna(headers_df[col].iloc[0]):
st.write(f"**{display_name}**: `{headers_df[col].iloc[0]}`")
# Show crawl metadata
st.write("**Crawl Information:**")
metadata_cols = ['url', 'status', 'crawl_time', 'download_latency']
for col in metadata_cols:
if col in headers_df.columns:
st.write(f"**{col.replace('_', ' ').title()}**: `{headers_df[col].iloc[0]}`")
# Download option
csv = headers_df.to_csv(index=False)
st.download_button(
label="📥 Download Complete Headers Data as CSV",
data=csv,
file_name=f"headers_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
mime="text/csv"
)
else:
st.error("❌ Could not retrieve headers data")
except Exception as e:
st.error(f"❌ Error analyzing headers: {str(e)}")
st.info("💡 **Tip**: Make sure the URL is accessible and try again")
# Enhanced robots.txt and sitemap detection
def check_robots_and_sitemap(url):
"""Check for robots.txt and sitemap files."""
st.subheader("🤖 Robots.txt & Sitemap Detection")
st.write("---")
parsed_url = urlparse(url)
base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
# Check robots.txt
try:
robots_url = f"{base_url}/robots.txt"
response = requests.get(robots_url, timeout=10)
if response.status_code == 200:
st.success(f"✅ **Robots.txt found**: {robots_url}")
with st.expander("View robots.txt content"):
st.code(response.text[:1000]) # Show first 1000 characters
else:
st.warning(f"⚠️ **Robots.txt not found**: Consider creating one at {robots_url}")
except:
st.error("❌ Could not check robots.txt")
# Check common sitemap locations
sitemap_locations = [
f"{base_url}/sitemap.xml",
f"{base_url}/sitemap_index.xml",
f"{base_url}/sitemaps.xml"
]
sitemap_found = False
for sitemap_url in sitemap_locations:
try:
response = requests.get(sitemap_url, timeout=10)
if response.status_code == 200:
st.success(f"✅ **Sitemap found**: {sitemap_url}")
sitemap_found = True
break
except:
continue
if not sitemap_found:
st.warning("⚠️ **Sitemap not found**: Consider creating an XML sitemap")
st.info("💡 **Recommendation**: Submit your sitemap to Google Search Console")
# Enhanced URL structure analysis
def enhanced_url_analysis(url):
"""Provide enhanced URL structure analysis."""
st.subheader("🔗 Enhanced URL Structure Analysis")
st.write("---")
parsed_url = urlparse(url)
# URL components analysis
col1, col2 = st.columns(2)
with col1:
st.write("**URL Components:**")
st.info(f"**Protocol**: {parsed_url.scheme}")
st.info(f"**Domain**: {parsed_url.netloc}")
st.info(f"**Path**: {parsed_url.path}")
if parsed_url.query:
st.info(f"**Query**: {parsed_url.query}")
if parsed_url.fragment:
st.info(f"**Fragment**: {parsed_url.fragment}")
with col2:
st.write("**SEO Analysis:**")
# URL length analysis
url_length = len(url)
if url_length <= 60:
st.success(f"✅ **URL Length**: {url_length} characters (Excellent)")
elif url_length <= 100:
st.warning(f"⚠️ **URL Length**: {url_length} characters (Good, but could be shorter)")
else:
st.error(f"❌ **URL Length**: {url_length} characters (Too long)")
# Path depth analysis
path_segments = [seg for seg in parsed_url.path.split('/') if seg]
depth = len(path_segments)
if depth <= 3:
st.success(f"✅ **URL Depth**: {depth} levels (Good)")
else:
st.warning(f"⚠️ **URL Depth**: {depth} levels (Consider flattening)")
# Special characters check
special_chars = set(url) - set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-._~:/?#[]@!$&\'()*+,;=')
if not special_chars:
st.success("✅ **Special Characters**: Clean URL structure")
else:
st.warning(f"⚠️ **Special Characters**: Found {len(special_chars)} special characters")
# Enhanced main function to run the analysis
def run_analysis(url):
# Parse the URL
parsed_url = urlparse(url)
netloc = parsed_url.netloc # Domain name
path = parsed_url.path # Path after the domain
# Run existing checks
check_https(url)
check_url_length(path)
check_hyphens(path)
check_file_extension(path)
# Add new enhanced analyses
enhanced_url_analysis(url)
analyze_http_headers(url)
check_robots_and_sitemap(url)
# Keep existing keyword insights
show_keyword_insights(netloc, path)
# Add summary section
st.subheader("📋 Analysis Summary & Recommendations")
st.write("---")
st.success("🎉 **Analysis Complete!** Review the findings above and implement the recommendations for better SEO performance.")
recommendations = [
"✅ Ensure HTTPS is enabled for security and SEO benefits",
"🔗 Keep URLs short, descriptive, and user-friendly",
"🔒 Implement security headers to protect your site",
"🤖 Create and maintain robots.txt and XML sitemaps",
"⚡ Enable compression and optimize HTTP headers for performance",
"📊 Monitor your URL structure and avoid excessive depth"
]
st.write("**Key Recommendations:**")
for rec in recommendations:
st.write(rec)
# Display the app
def url_seo_checker():
show_title_and_intro()
# User input for URL
url_input = st.text_input("Paste your URL here:", "https://www.example.com/")
st.write(" ") # Add spacing
# When the analyze button is clicked
if st.button("Let's Analyze!"):
with st.spinner('Checking your link...'):
url = url_input.strip() # Clean up the input
# Validate URL format
if not url.startswith(("http://", "https://")):
st.error("Oops! It seems like your URL needs 'http://' or 'https://' at the beginning. Please add it!")
st.stop()
# Run the analysis
run_analysis(url)

View File

@@ -1,113 +0,0 @@
"""Word cloud generation tool."""
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.util import ngrams
from wordcloud import WordCloud
import matplotlib.pyplot as plt
st.title("🔎 Web Content Analyzer: Uncover Your Words' Power! 🔎")
st.write("""
Welcome! This tool helps you understand the words that drive your website content. Just paste in your web page's
URL, and we'll give you insights you can use to improve your content and reach more people!
""")
url_input = st.text_input("Paste your URL here:", "https://www.example.com/")
language_input = st.selectbox("What language is your content?", ('English', 'Italian', 'Albanian'))
num_results_input = st.slider("How many top words/phrases should we show?", min_value=10, max_value=150, value=50)
st.write(" ")
authorized_domains = ["example.com", "another-example.com"]
if st.button("Analyze Your Content!"):
with st.spinner('Analyzing your content...'):
url = url_input.strip()
language = language_input.lower()
num_results = num_results_input
if not url.startswith("http"):
st.error("Oops! Looks like you forgot 'http://' or 'https://' at the beginning of your URL. Please add it and try again! 😊")
st.stop()
from urllib.parse import urlparse
parsed_url = urlparse(url)
if parsed_url.netloc not in authorized_domains:
st.error("The domain of the provided URL is not authorized. Please use an authorized domain.")
st.stop()
try:
response = requests.get(url)
response.raise_for_status() # Check for errors
soup = BeautifulSoup(response.content, 'html.parser')
body_txt = soup.find('body').text
words = [w.lower() for w in word_tokenize(body_txt)]
stopw = nltk.corpus.stopwords.words(language)
final_words = [w for w in words if w not in stopw and w.isalpha()]
# Frequency analysis
freq = nltk.FreqDist(final_words)
keywords = freq.most_common(num_results)
bigrams = ngrams(final_words, 2)
freq_bigrams = nltk.FreqDist(bigrams)
bigrams_freq = freq_bigrams.most_common(num_results)
# Create DataFrames for Display
df_keywords = pd.DataFrame(keywords, columns=("Keyword", "Frequency"))
df_bigrams = pd.DataFrame(bigrams_freq, columns=("Bigram", "Frequency"))
st.subheader("Top Keywords and Phrases:")
st.write(" ")
st.dataframe(df_keywords)
st.write(" ")
st.subheader("Top Two-Word Phrases:")
st.write(" ")
st.dataframe(df_bigrams)
st.write(" ")
st.subheader("What's the Value of This Analysis?")
st.write(" ")
st.markdown("""
* **See What Resonates:** Discover the most popular words and phrases used on your website. This can reveal themes and topics that your audience is interested in.
* **Find Keywords for SEO:** The analysis helps identify relevant keywords you could use for your website content and marketing efforts.
* **Improve Your Content:** You can understand how people might search for similar content and ensure you're providing the right keywords.
* **Stand Out:** Compare your results to other websites or competitors to understand how you can differentiate your content.
Ready to dive deeper into your content's vocabulary? Start by making some of the keywords you just discovered the stars of your next blog post or social media message. You might be surprised at the impact! 🚀
""")
except requests.exceptions.RequestException as e:
st.error(f"Oops! Something went wrong fetching the URL. Error: {e}")
def generate_wordcloud(text):
"""Generate a word cloud from the given text."""
if not text:
st.warning("Please enter some text to generate a word cloud.")
return
# Create and generate a word cloud image
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
# Display the word cloud
st.subheader("Word Cloud Visualization")
fig, ax = plt.subplots(figsize=(10, 5))
ax.imshow(wordcloud, interpolation='bilinear')
ax.axis('off')
st.pyplot(fig)
# Add some statistics
st.subheader("Text Statistics")
words = text.split()
unique_words = set(words)
st.metric("Total Words", len(words))
st.metric("Unique Words", len(unique_words))

View File

@@ -1,6 +1,7 @@
import os
import streamlit as st
from google import genai
import google.genai as genai
from google.genai import types
from google.genai.types import Tool, GenerateContentConfig, GoogleSearch
# Set page config

View File

@@ -1,99 +0,0 @@
# Content Generation Dashboard
## Overview
The Content Generation Dashboard is a central hub for ALwrity's content creation tools, providing an intuitive interface for accessing various AI-powered content generation capabilities.
## Features
### 1. Modality-Based Organization
- **Text Generation**
- Blog Writing
- Story Creation
- Product Descriptions
- News Articles
- Long-form Content
- **Social Media**
- Instagram Posts
- LinkedIn Content
- YouTube Scripts
- **Image Generation**
- AI Image Creation
- Visual Content Tools
- **Audio/Video**
- Speech to Blog
- Audio Transcription
### 2. Smart Navigation
- Quick access to recently used tools
- Favorite tools management
- Hierarchical navigation structure
- Minimal-click access to tools
### 3. Error Handling
- Custom exception handling
- User-friendly error messages
- Automatic error recovery
- Detailed error logging
### 4. State Management
- Persistent tool states
- Usage analytics tracking
- Performance monitoring
- Session management
## Architecture
### Core Components
1. **Dashboard UI (`dashboard.py`)**
- Main interface rendering
- Tool card management
- Navigation controls
- User interaction handling
2. **State Manager (`state_manager.py`)**
- Tool state tracking
- Usage metrics collection
- State persistence
- Navigation history
3. **Error Handler (`error_handler.py`)**
- Custom exceptions
- Error logging
- Recovery mechanisms
- User feedback
## Implementation Status
### Completed Features
- ✅ Basic dashboard layout
- ✅ Tool card implementation
- ✅ Error handling system
- ✅ State management
- ✅ Navigation structure
### In Progress
- 🔄 Performance optimization
- 🔄 User analytics integration
- 🔄 Tool loading improvements
### Planned Features
- ⏳ Advanced error recovery
- ⏳ Tool usage suggestions
- ⏳ Accessibility improvements
- ⏳ Performance monitoring
## Usage
### For Users
1. Access the dashboard through ALwrity's main interface
2. Select desired content generation modality
3. Choose specific tool from available options
4. Follow tool-specific workflows
### For Developers
1. Error Handling:
```python
from content_generation.error_handler import DashboardError

View File

@@ -1,629 +0,0 @@
import streamlit as st
from typing import Dict, List
from functools import lru_cache
from datetime import datetime
from loguru import logger
# Import all necessary AI writer functions
from lib.ai_writers.ai_blog_writer.ai_blog_generator import ai_blog_writer_page
from lib.ai_writers.ai_essay_writer import ai_essay_generator
from lib.ai_writers.ai_news_article_writer import ai_news_generation
from lib.utils.alwrity_utils import ai_news_writer, ai_finance_ta_writer, ai_social_writer, essay_writer
from lib.ai_writers.ai_facebook_writer.facebook_ai_writer import facebook_main_menu
from lib.ai_writers.linkedin_writer.linkedin_ai_writer import linkedin_main_menu
from lib.ai_writers.twitter_writers import run_dashboard as twitter_writer
from lib.ai_writers.insta_ai_writer import insta_writer
from lib.ai_writers.youtube_writers.youtube_ai_writer import youtube_main_menu
from lib.ai_writers.ai_agents_crew_writer import ai_agents_writers
from lib.utils.alwrity_utils import ai_agents_team
# Import SEO tools from ai_seo_tools
from lib.ai_seo_tools.on_page_seo_analyzer import analyze_onpage_seo
from lib.ai_seo_tools.weburl_seo_checker import url_seo_checker
from lib.ai_seo_tools.content_title_generator import ai_title_generator, generate_blog_titles
from lib.ai_seo_tools.meta_desc_generator import metadesc_generator_main
from lib.ai_seo_tools.seo_structured_data import ai_structured_data
from lib.ai_seo_tools.image_alt_text_generator import alt_text_gen
from lib.ai_seo_tools.opengraph_generator import og_tag_generator
from lib.ai_seo_tools.google_pagespeed_insights import google_pagespeed_insights
from lib.ai_seo_tools.sitemap_analysis import main as sitemap_analyzer
from lib.ai_seo_tools.twitter_tags_generator import display_app as twitter_tags_app
from lib.ai_seo_tools.enterprise_seo_suite import render_enterprise_seo_suite
from lib.alwrity_ui.seo_tools_dashboard import ai_seo_tools
@lru_cache(maxsize=None)
def get_tool_implementations() -> Dict[str, callable]:
"""
Return a mapping of tool names to their implementation functions.
Uses caching to avoid repeated imports.
"""
tool_mapping = {
# Text Generation Tools
"AI Blog Writer": ai_blog_writer_page,
"AI Essay Writer": essay_writer,
"AI News Writer": ai_news_writer,
"AI Content Team": ai_agents_team,
# Business Content Tools
"Financial TA Writer": ai_finance_ta_writer,
"AI Social Media": ai_social_writer,
# Social Media Specific Tools
"Facebook Writer": facebook_main_menu,
"LinkedIn Writer": linkedin_main_menu,
"Twitter Writer": twitter_writer,
"Instagram Writer": insta_writer,
"YouTube Writer": youtube_main_menu,
# SEO & Optimization Tools
"SEO Dashboard": ai_seo_tools,
"On-Page SEO Analyzer": analyze_onpage_seo,
"URL SEO Checker": url_seo_checker,
"AI Title Generator": lambda: _render_seo_tool("AI Title Generator", generate_blog_titles),
"Meta Description Generator": metadesc_generator_main,
"Structured Data Generator": ai_structured_data,
"Alt Text Generator": alt_text_gen,
"OpenGraph Tags": og_tag_generator,
"Page Speed Insights": google_pagespeed_insights,
"Sitemap Analyzer": sitemap_analyzer,
"Twitter Cards Generator": twitter_tags_app,
"Enterprise SEO Suite": render_enterprise_seo_suite,
# Creative Content Tools - placeholder functions for now
"Story Generator": lambda: st.info("Story Generator coming soon!"),
"Poetry Writer": lambda: st.info("Poetry Writer coming soon!"),
"Script Writer": lambda: st.info("Script Writer coming soon!"),
"Email Templates": lambda: st.info("Email Templates coming soon!"),
# Marketing Content Tools - placeholder functions
"Ad Copy Generator": lambda: st.info("Ad Copy Generator coming soon!"),
"Product Descriptions": lambda: st.info("Product Descriptions coming soon!"),
"Press Releases": lambda: st.info("Press Releases coming soon!"),
"Landing Page Copy": lambda: st.info("Landing Page Copy coming soon!"),
# Educational Content Tools - placeholder functions
"Course Content": lambda: st.info("Course Content coming soon!"),
"Tutorial Writer": lambda: st.info("Tutorial Writer coming soon!"),
"Quiz Generator": lambda: st.info("Quiz Generator coming soon!"),
"Study Guides": lambda: st.info("Study Guides coming soon!")
}
# Handle import errors gracefully
failed_imports = []
working_tools = {}
for tool_name, tool_func in tool_mapping.items():
try:
# Test if the function is callable
if callable(tool_func):
working_tools[tool_name] = tool_func
else:
failed_imports.append(tool_name)
except Exception as e:
logger.warning(f"Failed to load tool {tool_name}: {e}")
failed_imports.append(tool_name)
if failed_imports:
logger.info(f"Some tools are not available: {failed_imports}")
return working_tools
def _render_seo_tool(tool_name: str, tool_function):
"""Render SEO tools with consistent styling and handle errors."""
st.markdown(f"## 🔍 {tool_name}")
st.markdown("---")
# Handle AI Title Generator specifically
if "Title Generator" in tool_name:
_render_title_generator_ui()
else:
# For other SEO tools, call them directly
try:
if callable(tool_function):
tool_function()
else:
st.warning(f"Tool '{tool_name}' is not properly configured.")
except Exception as e:
st.error(f"Error loading tool: {str(e)}")
logger.error(f"Error in SEO tool {tool_name}: {str(e)}")
def _render_title_generator_ui():
"""Render a custom UI for the AI Title Generator."""
st.markdown("### Generate SEO-Optimized Titles")
# Input form
with st.form("title_generator_form"):
col1, col2 = st.columns(2)
with col1:
keywords = st.text_input(
"Blog Keywords",
placeholder="Enter your main keywords (comma-separated)",
help="Primary keywords for your content"
)
title_type = st.selectbox(
"Content Type",
["How-to Guide", "Listicle", "News Article", "Product Review", "Tutorial", "Case Study", "Opinion", "Research"]
)
with col2:
content = st.text_area(
"Blog Content (Optional)",
placeholder="Paste your blog content here for more targeted titles...",
height=100,
help="Optional: Paste existing content for more relevant titles"
)
title_intent = st.selectbox(
"Search Intent",
["Informational", "Commercial", "Transactional", "Navigational"]
)
language = st.selectbox(
"Language",
["English", "Spanish", "French", "German", "Italian", "Portuguese", "Hindi"]
)
submitted = st.form_submit_button("🚀 Generate Titles", use_container_width=True)
if submitted:
if not keywords:
st.warning("Please enter at least some keywords to generate titles.")
return
with st.spinner("🎯 Generating SEO-optimized titles..."):
try:
# Import and call the title generation function
from lib.ai_seo_tools.content_title_generator import generate_blog_titles
result = generate_blog_titles(
input_blog_keywords=keywords,
input_blog_content=content if content else None,
input_title_type=title_type,
input_title_intent=title_intent,
input_language=language
)
if result:
st.success("✅ Titles generated successfully!")
st.markdown("### 🎯 Your SEO-Optimized Titles:")
# Display the result in a nice format
st.markdown(f"```\n{result}\n```")
# Add copy buttons or additional features
if st.button("📋 Copy All Titles"):
st.success("Titles copied to clipboard! (Feature coming soon)")
else:
st.error("Failed to generate titles. Please try again.")
except Exception as e:
st.error(f"Error generating titles: {str(e)}")
logger.error(f"Title generation error: {str(e)}")
def render_content_generation_dashboard():
"""Main function to render the content generation dashboard."""
# Initialize dashboard state
dashboard_state = DashboardState()
# Apply modern CSS
apply_modern_css()
# Main dashboard header
st.markdown("""
<div class="main-dashboard">
<div class="dashboard-title">🚀 Alwrity Content Hub</div>
<div class="dashboard-subtitle">
Complete AI-powered content creation and SEO optimization suite. From writing to ranking - everything you need in one place.
</div>
<div style="display: flex; justify-content: center; gap: 2rem; margin-top: 1rem; flex-wrap: wrap;">
<div style="text-align: center;">
<div style="font-size: 2rem;">✍️</div>
<div style="font-size: 0.9rem; opacity: 0.8;">AI Writing</div>
</div>
<div style="text-align: center;">
<div style="font-size: 2rem;">🔍</div>
<div style="font-size: 0.9rem; opacity: 0.8;">SEO Tools</div>
</div>
<div style="text-align: center;">
<div style="font-size: 2rem;">📱</div>
<div style="font-size: 0.9rem; opacity: 0.8;">Social Media</div>
</div>
<div style="text-align: center;">
<div style="font-size: 2rem;">📊</div>
<div style="font-size: 0.9rem; opacity: 0.8;">Analytics</div>
</div>
</div>
</div>
""", unsafe_allow_html=True)
# Quick access section
st.markdown("""
<div class="quick-access">
<div class="section-title">⚡ Quick Access</div>
</div>
""", unsafe_allow_html=True)
# Recent tools
if st.session_state.get('recent_tools'):
st.markdown("### 📝 Recently Used")
cols = st.columns(min(len(st.session_state.recent_tools), 5))
for idx, tool in enumerate(st.session_state.recent_tools[:5]):
with cols[idx]:
if st.button(f"🔄 {tool}", key=f"recent_{tool}_{idx}"):
handle_tool_selection(tool, dashboard_state)
# Popular tools
popular_tools = ToolAnalytics.get_popular_tools()
if popular_tools:
st.markdown("### 🔥 Popular Tools")
cols = st.columns(min(len(popular_tools), 5))
for idx, tool in enumerate(popular_tools[:5]):
with cols[idx]:
if st.button(f"{tool}", key=f"popular_{tool}_{idx}"):
handle_tool_selection(tool, dashboard_state)
# Content tools by category
content_tools = {
"Text Generation": {
"tools": [
{"name": "AI Blog Writer", "icon": "✍️", "desc": "Create SEO-optimized blog posts with AI assistance"},
{"name": "AI Essay Writer", "icon": "📝", "desc": "Generate academic essays and research papers"},
{"name": "AI News Writer", "icon": "📰", "desc": "Write breaking news articles and reports"},
{"name": "AI Content Team", "icon": "👥", "desc": "Collaborative AI writing team for complex projects"}
]
},
"SEO & Optimization": {
"tools": [
{"name": "SEO Dashboard", "icon": "🔍", "desc": "Comprehensive SEO tools and analytics dashboard"},
{"name": "On-Page SEO Analyzer", "icon": "📊", "desc": "Analyze and optimize individual page SEO elements"},
{"name": "AI Title Generator", "icon": "🏷️", "desc": "Generate SEO-optimized titles for better rankings"},
{"name": "Meta Description Generator", "icon": "📄", "desc": "Create compelling meta descriptions that drive clicks"},
{"name": "Structured Data Generator", "icon": "🏗️", "desc": "Generate schema markup for rich search results"},
{"name": "Page Speed Insights", "icon": "", "desc": "Analyze and improve website performance metrics"},
{"name": "Enterprise SEO Suite", "icon": "🏢", "desc": "Advanced SEO workflows for enterprise needs"}
]
},
"Business Content": {
"tools": [
{"name": "Financial TA Writer", "icon": "📊", "desc": "Generate technical analysis reports for stocks"},
{"name": "Email Templates", "icon": "📧", "desc": "Professional email templates for business"},
{"name": "Press Releases", "icon": "📢", "desc": "Company announcements and press releases"},
{"name": "Landing Page Copy", "icon": "🌐", "desc": "High-converting landing page content"}
]
},
"Social Media": {
"tools": [
{"name": "Facebook Writer", "icon": "📘", "desc": "Facebook posts, ads, and content strategies"},
{"name": "LinkedIn Writer", "icon": "💼", "desc": "Professional LinkedIn articles and posts"},
{"name": "Twitter Writer", "icon": "🐦", "desc": "Engaging tweets and Twitter threads"},
{"name": "Instagram Writer", "icon": "📷", "desc": "Instagram captions and story content"},
{"name": "YouTube Writer", "icon": "🎬", "desc": "YouTube descriptions and video scripts"},
{"name": "OpenGraph Tags", "icon": "🔗", "desc": "Optimize social media sharing with Open Graph tags"},
{"name": "Twitter Cards Generator", "icon": "🐦", "desc": "Create Twitter Card markup for rich previews"}
]
},
"Creative Content": {
"tools": [
{"name": "Story Generator", "icon": "📚", "desc": "Creative short stories and narratives"},
{"name": "Poetry Writer", "icon": "🎭", "desc": "Beautiful poems and verses"},
{"name": "Script Writer", "icon": "🎬", "desc": "Scripts for videos, plays, and presentations"},
{"name": "Song Lyrics", "icon": "🎵", "desc": "Original song lyrics and musical content"}
]
}
}
# Render categories
for category, category_data in content_tools.items():
st.markdown(f"""
<div class="category-section">
<div class="category-header">{category}</div>
<div class="category-grid">
""", unsafe_allow_html=True)
# Create columns for tools in this category
tools = category_data["tools"]
cols = st.columns(min(len(tools), 3))
for idx, tool in enumerate(tools):
col_idx = idx % 3
with cols[col_idx]:
# Create tool card with button
if st.button(
f"{tool['icon']} {tool['name']}\n{tool['desc']}",
key=f"tool_{tool['name']}_{category}",
help=tool['desc']
):
handle_tool_selection(tool['name'], dashboard_state)
st.markdown("</div></div>", unsafe_allow_html=True)
# Footer with statistics
st.markdown("---")
st.markdown("### 📈 Alwrity Analytics")
col1, col2, col3, col4 = st.columns(4)
total_tools = len(get_tool_implementations())
seo_tools_count = len([tool for category in content_tools.values() for tool in category["tools"] if "SEO" in category.get("name", "") or any(seo_keyword in tool["name"] for seo_keyword in ["SEO", "Meta", "Title", "Structured", "Speed", "OpenGraph"])])
with col1:
st.metric("🛠️ Total Tools", total_tools)
with col2:
st.metric("🔍 SEO Tools", 12) # Based on our SEO tool count
with col3:
st.metric("📝 Recent Tools", len(st.session_state.get('recent_tools', [])))
with col4:
st.metric("⭐ Favorites", len(st.session_state.get('favorite_tools', [])))
# Add capability showcase
st.markdown("""
<div style="background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); padding: 1.5rem; border-radius: 10px; margin-top: 1rem;">
<h4 style="color: #2c3e50; margin-bottom: 1rem;">✨ Why Choose Alwrity?</h4>
<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 1rem;">
<div>
<strong>🎯 All-in-One Solution</strong><br>
<small>Content creation, SEO optimization, and social media management in one platform</small>
</div>
<div>
<strong>🤖 AI-Powered Intelligence</strong><br>
<small>Advanced AI models for content generation and SEO analysis</small>
</div>
<div>
<strong>📊 Enterprise-Ready</strong><br>
<small>Scalable tools designed for teams and enterprise workflows</small>
</div>
<div>
<strong>🚀 Continuously Updated</strong><br>
<small>Regular updates with new tools and enhanced capabilities</small>
</div>
</div>
</div>
""", unsafe_allow_html=True)
class DashboardState:
"""Manage dashboard state and user preferences."""
def __init__(self):
self.initialize_session_state()
def initialize_session_state(self):
"""Initialize session state variables."""
if 'recent_tools' not in st.session_state:
st.session_state.recent_tools = []
if 'favorite_tools' not in st.session_state:
st.session_state.favorite_tools = []
if 'tool_usage_count' not in st.session_state:
st.session_state.tool_usage_count = {}
def add_recent_tool(self, tool_name: str):
"""Add a tool to recent tools list."""
if tool_name in st.session_state.recent_tools:
st.session_state.recent_tools.remove(tool_name)
st.session_state.recent_tools.insert(0, tool_name)
# Keep only last 5 recent tools
st.session_state.recent_tools = st.session_state.recent_tools[:5]
def toggle_favorite(self, tool_name: str):
"""Toggle tool favorite status."""
if tool_name in st.session_state.favorite_tools:
st.session_state.favorite_tools.remove(tool_name)
else:
st.session_state.favorite_tools.append(tool_name)
def increment_usage(self, tool_name: str):
"""Increment tool usage count."""
st.session_state.tool_usage_count[tool_name] = st.session_state.tool_usage_count.get(tool_name, 0) + 1
class ToolAnalytics:
"""Analytics for tool usage and recommendations."""
@staticmethod
def get_popular_tools(limit: int = 5) -> List[str]:
"""Get most popular tools based on usage."""
usage_count = st.session_state.get('tool_usage_count', {})
if not usage_count:
# Return default popular tools showcasing Alwrity's key capabilities
return ["AI Blog Writer", "SEO Dashboard", "AI Title Generator", "Meta Description Generator", "On-Page SEO Analyzer"]
sorted_tools = sorted(usage_count.items(), key=lambda x: x[1], reverse=True)
return [tool[0] for tool in sorted_tools[:limit]]
def apply_modern_css():
"""Apply modern CSS styling to the dashboard."""
st.markdown("""
<style>
/* Main dashboard styling */
.main-dashboard {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 2rem;
border-radius: 15px;
margin-bottom: 2rem;
color: white;
}
.dashboard-title {
font-size: 3rem;
font-weight: 700;
text-align: center;
margin-bottom: 1rem;
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
}
.dashboard-subtitle {
font-size: 1.2rem;
text-align: center;
opacity: 0.9;
margin-bottom: 2rem;
}
/* Tool cards */
.tool-card {
background: white;
border-radius: 12px;
padding: 1.5rem;
margin: 0.5rem;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
transition: all 0.3s ease;
cursor: pointer;
border: 2px solid transparent;
height: 200px;
display: flex;
flex-direction: column;
justify-content: space-between;
}
.tool-card:hover {
transform: translateY(-5px);
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15);
border-color: #667eea;
}
.tool-icon {
font-size: 2.5rem;
text-align: center;
margin-bottom: 1rem;
}
.tool-title {
font-size: 1.1rem;
font-weight: 600;
color: #333;
text-align: center;
margin-bottom: 0.5rem;
}
.tool-description {
font-size: 0.9rem;
color: #666;
text-align: center;
line-height: 1.4;
}
/* Quick access section */
.quick-access {
background: #f8f9fa;
border-radius: 10px;
padding: 1.5rem;
margin-bottom: 2rem;
}
.section-title {
font-size: 1.5rem;
font-weight: 600;
color: #333;
margin-bottom: 1rem;
display: flex;
align-items: center;
gap: 0.5rem;
}
/* Recent tools styling */
.recent-tool {
background: linear-gradient(135deg, #ff6b6b, #ee5a24);
color: white;
padding: 0.75rem 1rem;
border-radius: 8px;
margin: 0.25rem;
font-weight: 500;
text-align: center;
cursor: pointer;
transition: all 0.3s ease;
}
.recent-tool:hover {
transform: scale(1.05);
box-shadow: 0 4px 12px rgba(255, 107, 107, 0.4);
}
/* Category sections */
.category-section {
margin-bottom: 3rem;
}
.category-header {
background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
color: white;
padding: 1rem 1.5rem;
border-radius: 10px 10px 0 0;
font-size: 1.3rem;
font-weight: 600;
}
.category-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
gap: 1rem;
padding: 1.5rem;
background: #f8f9fa;
border-radius: 0 0 10px 10px;
}
/* Responsive design */
@media (max-width: 768px) {
.dashboard-title {
font-size: 2rem;
}
.category-grid {
grid-template-columns: 1fr;
}
.tool-card {
height: auto;
min-height: 150px;
}
}
/* Success and info messages */
.success-message {
background: linear-gradient(135deg, #56ab2f, #a8e6cf);
color: white;
padding: 1rem;
border-radius: 8px;
margin: 1rem 0;
}
.info-message {
background: linear-gradient(135deg, #74b9ff, #0984e3);
color: white;
padding: 1rem;
border-radius: 8px;
margin: 1rem 0;
}
</style>
""", unsafe_allow_html=True)
def handle_tool_selection(tool_name: str, dashboard_state: DashboardState):
"""Handle tool selection and navigation."""
try:
# Update usage statistics
dashboard_state.add_recent_tool(tool_name)
dashboard_state.increment_usage(tool_name)
# Get tool implementations
tools = get_tool_implementations()
if tool_name in tools:
st.markdown(f"<div class='success-message'>🚀 Launching {tool_name}...</div>", unsafe_allow_html=True)
# Show loading state
with st.spinner(f"Loading {tool_name}..."):
try:
# Execute the tool function
tools[tool_name]()
logger.info(f"Successfully launched tool: {tool_name}")
except Exception as e:
st.error(f"Error running {tool_name}: {str(e)}")
logger.error(f"Error running tool {tool_name}: {e}")
else:
st.warning(f"Tool '{tool_name}' is not available yet.")
except ImportError as e:
st.error(f"Unable to load {tool_name}. Some dependencies may be missing.")
logger.error(f"Import error for {tool_name}: {e}")
except Exception as e:
st.error(f"An unexpected error occurred: {str(e)}")
logger.error(f"Unexpected error in tool selection: {e}")
# Main entry point
if __name__ == "__main__":
render_content_generation_dashboard()

View File

@@ -1,92 +0,0 @@
Overview
The AI Writer Blog Post-Processing module provides various utilities for enhancing, formatting, and managing blog content. The tools available in this module help automate tasks such as proof-reading, converting content to Markdown, converting Markdown to HTML, humanizing blog content, and saving processed blog content to a file.
Modules
1. blog_proof_reader.py
Description:
This module provides functionality for proofreading blog content. It corrects grammar, enhances vocabulary, improves sentence structure, aligns tone and brand voice, optimizes content structure, and simplifies concepts.
Usage:
```
from blog_proof_reader import blog_proof_editor
# Example usage
blog_content = "Your raw blog content here"
edited_content = blog_proof_editor(blog_content)
print(edited_content)
```
2. convert_content_to_markdown.py
Description:
This module converts blog content to Markdown format to enhance readability and visual appeal. It follows best practices for structuring content using Markdown.
Usage:
```
from convert_content_to_markdown import convert_tomarkdown_format
# Example usage
blog_content = "Your raw blog content here"
markdown_content = convert_tomarkdown_format(blog_content, gpt_provider="openai")
print(markdown_content)
```
3. convert_markdown_to_html.py
Description:
This module converts Markdown content to HTML. (Implementation details are required to provide a specific example).
Usage:
```
from convert_markdown_to_html import convert_to_html
# Example usage
markdown_content = "Your Markdown content here"
html_content = convert_to_html(markdown_content)
print(html_content)
```
4. humanize_blog.py
Description:
This module "humanizes" blog content by avoiding overused and robotic phrases, replacing them with more natural language to improve readability and engagement.
Usage:
```
from humanize_blog import blog_humanize
# Example usage
blog_content = "Your raw blog content here"
humanized_content = blog_humanize(blog_content)
print(humanized_content)
```
5. save_blog_to_file.py
Description:
This module saves processed blog content to a file. (Implementation details are required to provide a specific example).
Usage:
```
from save_blog_to_file import save_to_file
# Example usage
blog_content = "Your processed blog content here"
file_path = "path/to/save/blog.txt"
save_to_file(blog_content, file_path)
```
~/AI-Writer/lib/blog_postprocessing
├── blog_proof_reader.py
├── convert_content_to_markdown.py
├── convert_markdown_to_html.py
├── humanize_blog.py
└── save_blog_to_file.py
This README file should help you understand the purpose and functionality of each module within the AI Writer Blog Post-Processing directory. Adjust the usage examples and descriptions as per the actual implementations and additional details of your modules.

View File

@@ -1,120 +0,0 @@
import os
import sys
import configparser
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
def blog_proof_editor(blog_content):
""" Helper for blog proof reading. """
try:
config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'main_config'))
config = configparser.ConfigParser()
#config = configparser.RawConfigParser()
config.read(config_path, encoding='utf-8')
except Exception as err:
print(f"ProofReader: Failed to read values from config: {err}")
prompt = f"""As an expert content writer and editor, I will provide you with 'my blog' content.
Your task is to rewrite my blog, by following the guidelines below.
Below are the guidelines to follow:
1). You must respond in {config.get('blog_characteristics', 'blog_language')} language.
2). Vocabulary and Grammar Enhancement: Directly correct any grammatical errors and upgrade the
vocabulary for better readability.
3). Improve Sentence Structure: Enhance sentence construction for better clarity and conversational flow.
4). Tone and Brand Alignment: Adjust tone, voice, personality for {config.get('blog_characteristics', 'blog_tone')} audience.
5). Optimize Content Structure: Reorganize content for more impactful presentation, including better paragraphing & transitions.
6). Simplify content: Simplify concepts and replace overly complex words. Use simple english words.
7). Make sure your response content length is of {config.get('blog_characteristics', 'blog_length')} words.
\n\nMy Blog: '{blog_content}'. """
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"Error Blog Proof Reading: {err}")
import streamlit as st
import requests
from bs4 import BeautifulSoup
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.util import ngrams
from langchain.llms import OpenAI
from langchain.chains import ConversationChain
# ... (rest of your code)
if st.button("Analyze with AI!"):
# ... (fetch and process content as before)
with st.spinner('Analyzing your content...'):
st.subheader("AI Insights:")
st.write(" ")
# 1. Overall Critique
st.markdown("**Overall Evaluation:**")
ai_overall = conversation_chain.run(f"""Analyze the provided article and give a constructive critique, focusing on its strengths and weaknesses regarding:
* Informativeness: Does it offer valuable information the reader might not know, or strengthen their understanding?
* Authority: Does the author demonstrate expertise and credibility, backing up claims with evidence?
* Captivatingness: Does it effectively engage the reader, capture attention, and make them want to continue reading?
Provide specific examples to support your evaluation.
""")
st.markdown(f" {ai_overall}")
st.write(" ")
# 2. Structure & Organization
st.markdown("**Structure and Organization:**")
ai_structure = conversation_chain.run(f"""Analyze the structure and organization of the provided article.
* Does it flow logically, with a clear beginning, middle, and end?
* Are subheadings effectively used to break down the content and guide the reader?
* Is the writing style consistent throughout the article?
Suggest improvements for clarity and readability.
""")
st.markdown(f" {ai_structure}")
st.write(" ")
# 3. Content Quality
st.markdown("**Content Quality:**")
ai_content = conversation_chain.run(f"""Critique the content of the article, considering:
* Is the value of the article clear?
* Does it address a pain point or a need for the target audience?
* Are the arguments compelling and supported by evidence or examples?
* Are any technical terms explained well?
Identify areas where the content could be strengthened or improved.
""")
st.markdown(f" {ai_content}")
st.write(" ")
# 4. Call to Action & Headline
st.markdown("**Headline and Call to Action:**")
ai_headline = conversation_chain.run(f"""Evaluate the effectiveness of the headline and call to action (CTA) in the provided article.
* Does the headline accurately and compellingly summarize the article's content?
* Is the CTA clear, actionable, and positioned well within the text?
Provide suggestions for improving the headline and CTA.
""")
st.markdown(f" {ai_headline}")
st.write(" ")
# 5. Writing Style & Tone
st.markdown("**Writing Style and Tone:**")
ai_style = conversation_chain.run(f"""Assess the overall writing style and tone of the article.
* Does it use jargon or overly technical language that might be inaccessible to the target audience?
* Is the tone appropriate for the topic and target audience (e.g., professional, conversational, humorous)?
* Is the writing clear, concise, and engaging?
Suggest ways to improve the writing style and make the article more accessible and compelling for the intended reader.
""")
st.markdown(f" {ai_style}")
# --- Display Keyword Results (same as before) ---
# ... (rest of your code)

View File

@@ -1,75 +0,0 @@
from .gpt_providers.openai_chat_completion import openai_chatgpt
from .gpt_providers.gemini_pro_text import gemini_text_response
def convert_tomarkdown_format(blog_content, gpt_provider="openai"):
""" Helper for converting content to markdown format for static sites. """
prompt = f"""As an expert in markdown language format and font matter,
I will provide you with a blog post.
Your task is to only Improve the formatting and structure of a blog post to enhance readability, visual appeal, and overall user experience. Do not alter the content of the provided blog. Modify only for the formatting.
Dont provide explanations, just your final response.
Guidelines to do formatting:
1. **Headings for Structure:**
- Use # for the main title of the blog post.
- Use ## for subheadings that divide the post into clear sections.
- Use ###, ####, etc. for additional subheadings as needed.
- Keep the headings concise and descriptive.
2. **Emphasizing Text:**
- Use * or _ for italicizing important words or phrases.
- Use ** or __ for bolding key points.
- Use *** or ___ for bold italicizing very important text.
- Use sparingly to avoid overwhelming the reader.
3. **Lists:**
- Use - or * for unordered lists.
- Use 1., 2., etc. for ordered lists.
- Keep list items concise and to the point.
- Use consistent formatting for all lists.
4. **Blockquotes:**
- Use > to indent and highlight quotes or important information.
- Use additional > for nested blockquotes.
- Attribute quotes to their original source if applicable.
5. **Code Blocks:**
- Use backticks ` for inline code.
- Use triple backticks ``` for code blocks.
- Specify the language of the code block for syntax highlighting, e.g., ```python```.
- Use code blocks to display code snippets or technical information.
6. **Horizontal Lines:**
- Use three or more asterisks, dashes, or underscores to create a horizontal line, e.g., ***, ---, or ___
- Use horizontal lines to separate different sections of the blog post.
7. **Table Formatting:**
- Use pipes | and dashes - to create tables.
- Align text within columns using colons :.
- Use tables to present data or information in a structured format.
8. **Other Best Practices:**
- Use emojis sparingly and appropriately to add visual interest and enhance the reader's experience.
- Proofread carefully for any errors in grammar, spelling, or formatting.
- Keep the blog post organized and easy to navigate.
- Use a consistent formatting style throughout the post.
Blog Post: '{blog_content}'"""
if 'openai' in gpt_provider.lower():
try:
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Openai Error in converting to Markdown format.")
elif 'gemini' in gpt_provider.lower():
prompt = f""" Convert the given blog post into well structured MARKDOWN content.
Do not alter the given blog post.
blog post: "{blog_content}" """
try:
response = gemini_text_response(prompt)
return response
except Exception as err:
SystemError(f"Gemini Error in converting to Markdown format.")

View File

@@ -1,37 +0,0 @@
from .gpt_providers.openai_chat_completion import openai_chatgpt
def convert_markdown_to_html(md_content):
""" Helper function to convert given text to HTML
"""
prompt =f"""
You are a skilled web developer tasked with converting a Markdown-formatted text to HTML.
You will be given text in markdown format. Follow these steps to perform the conversion:
1. Parse User's Markdown Input: You will receive a Markdown-formatted text as input from the user.
Carefully analyze the provided Markdown text, paying attention to different elements such as headings (#),
lists (unordered and ordered), bold and italic text, links, images, and code blocks.
2. Generate and Validate HTML: Generate corresponding HTML code for each Markdown element following
the conversion guidelines below. Ensure the generated HTML is well-structured and syntactically correct.
3. Preserve Line Breaks: Markdown line breaks (soft breaks) represented by two spaces at the end of a
line should be converted to <br> tags in HTML to preserve the line breaks.
4. REMEMBER to generate complete, valid HTML response only.
Follow below Conversion Guidelines:
- Headers: Convert Markdown headers (#, ##, ###, etc.) to corresponding HTML header tags (<h1>, <h2>, <h3>, etc.).
- Lists: Convert unordered lists (*) and ordered lists (1., 2., 3., etc.) to <ul> and <ol> HTML tags, respectively.
List items should be enclosed in <li> tags.
- Emphasis: Convert bold (**) and italic (*) text to <strong> and <em> HTML tags, respectively.
- Links: Convert Markdown links ([text](url)) to HTML anchor (<a>) tags. Ensure the href attribute contains the correct URL.
- Images: Convert Markdown image tags (![alt text](image_url)) to HTML image (<img>) tags.
Include the alt attribute for accessibility.
- Code: Convert inline code (`code`) to <code> HTML tags. Convert code blocks (```) to <pre> HTML tags
for preserving formatting.
- Blockquotes: Convert blockquotes (>) to <blockquote> HTML tags.
Convert the following Markdown text to HTML: {md_content}
"""
try:
# TBD: Add logic for which_provider and which_model
response = openai_chatgpt(prompt)
return response
except Exception as err:
SystemError(f"Error in convert to HTML")

View File

@@ -1,34 +0,0 @@
import os
import sys
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
def blog_humanize(blog_content):
""" Helper for blog proof reading. """
prompt = f"""The following is what I will refer to as an 'Exception-list'.
Do Not include any of the words or phrases on this list in your future responses to this chat thread.
These instructions are critical and require absolute adherence!
Exception-list: ["Informed decisions", "blueprint", "realm", "holistic", "fosters", "informed investment decisions", "informed", "more than just", "its about" "navigating", "beacon", "bustling", "treasure trove", "landscape", "tailored", "tailor", “roadmap” , “are more than just”, "tailoring", "dive in", "delving", “streamlining” "dynamic", "robust", "stay tuned", "in conclusion", "seamless", "bustling", “isn't just”, “not just a”, “isn't merely an”, “cornerstone”, “bridge”, “whopping”, “testament”, “paramount” ,“diving into”, “delve into”, “pivotal”, “navigating”, “This isn't a”, “isn't just about“ ,“dives deep”, "It's not just about", “delve”, “harness”, journey”, “elevate”, “maze”, “puzzle”, “overwhelmed” , "and other robotic cliches”, 'Its important to note', 'Delve into', 'Tapestry', 'Bustling', 'In summary', 'In conclusion', 'Unleash', 'Unveiling', 'ever-evolving', '', 'Remember that', 'Take a dive into', 'Navigating', 'Navigating the landscape', 'Navigating the complexities of', 'Landscape', 'The landscape of', 'Testament', 'a testament to', 'In the world of', 'Realm', 'Embark', 'virtuoso', 'Let's explore', 'symphony', 'Harnessing', 'Revolutionizing', 'Empower', 'game changing', 'ever-changing', 'Embrace', 'Embracing', 'game-changing', 'ever-evolving']
As an expert content writer and editor, I will provide you with blog content.
Your task is to replace all occurances of words from Exception-list from given blog content below.
Before generating any text, examine the Exception-list and avoid all cases of these words and phrases.
\n\nBlog Content: '{blog_content}'
"""
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"Openai Error Blog Proof Reading: {err}")
raise err

View File

@@ -1,111 +0,0 @@
import sys
import os
import re
import datetime
import random
from dateutil.relativedelta import relativedelta
from textwrap import dedent
import logging
from zoneinfo import ZoneInfo
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
def random_date_last_three_months():
current_date = datetime.datetime.now(ZoneInfo('Asia/Kolkata'))
three_months_ago = current_date - relativedelta(months=3)
# Generate a random date between three_months_ago and current_date
random_date = three_months_ago + datetime.timedelta(
seconds=random.randint(0, int((current_date - three_months_ago).total_seconds()))
)
return random_date.strftime('%Y-%m-%d %H:%M:%S %z')
def save_blog_to_file(blog_content, blog_title, blog_meta_desc, blog_tags, blog_categories, main_img_path=None, file_type="md"):
"""
Saves the provided blog content to a file in the specified format.
Args:
blog_content (str): The main content of the blog.
blog_title (str): Title of the blog.
blog_meta_desc (str): Meta description of the blog.
blog_tags (list): List of tags associated with the blog.
blog_categories (list): List of categories associated with the blog.
main_img_path (str): Path to the main image of the blog.
output_path (str): Path to the directory where the blog will be saved.
file_type (str, optional): The file format for saving the blog ('md' for Markdown or 'html' for HTML). Defaults to 'md'.
Raises:
FileNotFoundError: If the output_path does not exist.
Exception: If the blog content cannot be written to the file.
"""
blog_frontmatter = ''
# Sanitize and prepare the blog title
# Remove colon and ampersand
blog_title_md = blog_title.replace(":", "").replace("&", "")
# Replace spaces with hyphens
blog_title_md = blog_title_md.replace(" ", "-")
blog_title_md = re.sub('[^A-Za-z0-9-]', '', blog_title_md)
# Replace multiple consecutive dashes with a single dash
blog_title_md = re.sub('-+', '-', blog_title_md)
#blog_title_md = remove_stop_words(blog_title_md)
logger.debug(f"Blog Title is: {blog_title_md}")
# Check if output path exists
output_path = os.getenv('CONTENT_SAVE_DIR')
if not os.path.exists(output_path):
logger.error(f"Error: Blog output directory is set to {output_path}, which does not exist.")
raise FileNotFoundError(f"Output directory does not exist: {output_path}")
# Handle Markdown file type
if file_type == "md":
logger.info("Writing/Saving the resultant blog content in Markdown format.")
# Hmmmm, bulk generation will benefit from randomizing publishing dates.
#dtobj = datetime.datetime.now(ZoneInfo('Asia/Kolkata'))
#formatted_date = dtobj.strftime('%Y-%m-%d %H:%M:%S %z')
formatted_date = random_date_last_three_months()
blog_title = blog_title.replace(":", "-").replace('"', '').replace('**', '')
if main_img_path:
blog_frontmatter = dedent(f"""\
---
title: {blog_title}
date: {formatted_date}
categories: [{blog_categories}]
tags: [{blog_tags}]
description: {blog_meta_desc.replace(":", "-").replace('**', '')}
img_path: '/assets/'
image:
path: {os.path.basename(main_img_path)}
alt: {blog_title}
---\n\n""")
else:
blog_frontmatter = dedent(f"""\
---
title: {blog_title}
date: {formatted_date}
categories: [{blog_categories}]
tags: [{blog_tags}]
description: {blog_meta_desc.replace(":", "-")}
---\n\n""").strip()
blog_output_path = os.path.join(
output_path,
f"{datetime.date.today().strftime('%Y-%m-%d')}-{blog_title_md}.md"
)
# Write to the file
try:
with open(blog_output_path, "w", encoding="utf-8") as f:
f.write(blog_frontmatter)
f.write(blog_content)
except Exception as e:
raise Exception(f"Failed to write blog content: {e}")
logger.info(f"Successfully saved and posted blog at: {blog_output_path}")
return(blog_output_path)

View File

@@ -1,113 +0,0 @@
# AI Agents Content Planner
This document describes the `ai_agents_planner` module, a sophisticated tool for creating highly detailed and SEO-optimized content calendars. This module leverages AI agents to perform web research, trend analysis, and content planning.
## Prerequisites
To use this module, ensure the following are installed:
- Python 3.6 or higher
- Streamlit
- Crewai
- Crewai Tools
- Langchain Google GenAI
- Google Gemini API key
## Installation
Install the required Python packages using pip:
```bash
pip install streamlit crewai crewai_tools langchain_google_genai
```
## Environment Setup
Ensure that you have set up the following environment variables:
- `GEMINI_API_KEY`: Your Google Gemini API key.
- `SEARCH_SAVE_FILE`: Path to the file where search results are saved.
## Module Overview
The `ai_agents_planner` module consists of several key functions:
- **create_agents(search_keywords, already_written_on)**
- This function creates the AI agents required for content research and planning. Each agent is assigned a specific role and set of tools to achieve their goals.
- Agents:
- **content_researcher**: Conducts web research to identify content opportunities.
- **content_planner**: Develops a content calendar based on the research.
- **google_trends_researcher**: Analyzes Google Trends data to suggest relevant keywords and titles.
- **content_marketing_manager**: Ensures the content calendar is optimized and avoids keyword cannibalization.
- **create_tasks(agents, search_keywords, already_written_on)**
- This function creates tasks for each agent, including web analysis, Google Trends analysis, content calendar development, and final review.
- **execute_tasks(agents, tasks)**
- Executes the tasks assigned to each agent. The results are compiled into a comprehensive content calendar.
- **ai_agents_planner(search_keywords)**
- The main function that orchestrates the creation of agents, assignment of tasks, and execution of the content planning process. It performs Google Trends analysis and generates the final content calendar.
## Example Usage
To use the `ai_agents_planner` module, follow these steps:
1. Set up the environment variables.
2. Import the module and call the `ai_agents_planner` function with your target keywords.
```python
import os
from your_module import ai_agents_planner
# Set up environment variables
os.environ['GEMINI_API_KEY'] = 'your_google_gemini_api_key'
os.environ['SEARCH_SAVE_FILE'] = '/path/to/search_save_file.txt'
# Run the planner
ai_agents_planner('your_target_keywords')
```
## Detailed Agent Roles and Responsibilities
### Content Researcher: Aisha Sharma
**Role**: Senior Web Research Analyst (Content Strategy)
**Goal**: Create a detailed content calendar focused on specific keywords.
**Responsibilities**:
- Conduct web research and competitor analysis.
- Identify high-value content opportunities.
### Content Planner: Ted XingPi
**Role**: Senior Content Strategist & Planner
**Goal**: Craft a series of content titles for a 2-month-long series.
**Responsibilities**:
- Develop a content calendar with unique and non-repetitive titles.
- Ensure alignment with SEO best practices.
### Google Trends Researcher: Sarah Qureshi
**Role**: Content Marketing & Google Trends Specialist
**Goal**: Analyze Google Trends data and provide keyword recommendations.
**Responsibilities**:
- Identify high-volume, low-competition keywords.
- Collaborate on content strategy and planning.
### Content Marketing Manager: Diksha Yuj
**Role**: Content Marketing Manager
**Goal**: Optimize the content calendar and ensure no keyword cannibalization.
**Responsibilities**:
- Review and finalize the content calendar.
- Ensure all content is unique and SEO-optimized.
## Final Content Calendar
The result of the `ai_agents_planner` module is a highly detailed content calendar that positions your target keywords effectively. The content calendar includes:
- Head Term Keyword
- Long-Tail Keyword
- Blog Post Title
This structured approach ensures a comprehensive content strategy, optimized for search engines and tailored to your audience.
## Conclusion
The `ai_agents_planner` module provides a robust framework for content planning and strategy. By leveraging AI agents and integrating web research, trend analysis, and content planning, it delivers a detailed content calendar tailored to your audience and optimized for search engines.
For further information and detailed documentation, refer to the module's code and comments.

View File

@@ -1,33 +0,0 @@
* **Trending:** How Open-Source AI is Changing the Future of Content Creation
* **Seasonal:** 5 Ways Open-Source AI Can Help You Write More Engaging Holiday Content
* **Trending:** The Best Open-Source AI Writing Tools for Every Need
* **Seasonal:** Open-Source AI Writing: A Threat to Human Writers or a Valuable Tool?
* **Trending:** The Ethics of Open-Source AI Writing: What You Need to Know
* **Seasonal:** How to Use Open-Source AI to Write Festive Social Media Posts
* **Evergreen:** Open-Source AI Writing: The Good, the Bad, and the Ugly
* **Trending:** How Open-Source AI Can Help You Write More Effective Blog Posts
* **Seasonal:** Open-Source AI Writing: How to Create Holiday-Themed Website Content
* **Evergreen:** How to Use Open-Source AI to Write Better Social Media Content
* **Trending:** The Best Open-Source AI Writing Tools for Bloggers
* **Seasonal:** How to Use Open-Source AI to Write Holiday-Themed Email Marketing Campaigns
* **Seasonal:** Open Source AI Writers for Holiday Content Creation
* **Evergreen:** How to Write Great Content with Open Source AI Writers: A Step-by-Step Guide
* **Trending:** The Role of Open Source AI Writers in SEO
* **Seasonal:** Open Source AI Writers for Black Friday and Cyber Monday
* **Trending:** Open Source AI Writers and the Future of Content Consumption
* **Trending:** Open Source AI Writers and the Rise of Personalized Content
* **Trending:** Open Source AI Writers and the Future of Content Strategy
* **Seasonal:** Open Source AI Writers for Back-to-School Content
* **Trending:** Open Source AI Writers and the Rise of AI-Generated Art
* How AI Writers Can Help You Create High-Quality Blog Posts
* The Role of AI Writers in the Future of Content Creation
* AI Writers vs. Human Writers: Which Is Better for Your Content?
* The Dos and Don'ts of Using AI Writers
| 5 | AI writing tool news | News about AI writing tools | The Latest News about AI Writing Tools |
| 6 | AI writing tool resources | Resources for AI writing tools | The Best Resources for AI Writing Tools |
| 6 | AI writing tool community | Community for AI writing tools | The Best Community for AI Writing Tools |
| 6 | AI writing tool support | Support for AI writing tools | The Best Support for AI Writing Tools |
| 7 | AI writing tool training | Training for AI writing tools | The Best Training for AI Writing Tools |
| 7 | AI writing tool certification | Certification for AI writing tools | The Best Certification for AI Writing Tools |
| 7 | AI writing tool courses | Courses for AI writing tools | The Best Courses for AI Writing Tools |
| 8 | AI writing tool workshops | Workshops for AI writing tools | The Best Workshops for AI Writing Tools |

View File

@@ -1,241 +0,0 @@
import os
import streamlit as st
from crewai import Agent, Task, Crew
from crewai_tools import SerperDevTool
from langchain_google_genai import ChatGoogleGenerativeAI
from crewai_tools import ScrapeWebsiteTool
from crewai_tools import FileReadTool
from ..ai_web_researcher.google_trends_researcher import do_google_trends_analysis
def create_agents(search_keywords, already_written_on):
# Tools for the agents.
search_tool = SerperDevTool()
# To enable scrapping any website it finds during it's execution
#scrape_tool = ScrapeWebsiteTool()
# To read results from a file.
# Initialize the tool to read any files the agents knows or lean the path for
# file_read_tool = FileReadTool()
# Initialize the tool with a specific file path, so the agent can only read the content of the specified file
file_read_tool = FileReadTool(file_path=os.getenv('SEARCH_SAVE_FILE'))
# The manager keeps an eye on the content already planned to give new ideas.
# TBD: Accept the user website urls and populate the file with sitemap.xml
manager_read_tool = FileReadTool(file_path=already_written_on)
# Load the google gemini api key
google_api_key = os.getenv("GEMINI_API_KEY")
# Set gemini pro as llm
llm = ChatGoogleGenerativeAI(
model="gemini-pro", verbose=True, temperature=0.7, google_api_key=google_api_key
)
content_researcher = Agent(
role = 'Senior Web Research Analyst (Content Strategy): Aisha Sharma',
goal = f"""Help Create a highly detailed 2 month-long content calender, focused around keywords: {search_keywords}.
Provide web researched titles to be used for content calender & planning to Ted XingPi""",
backstory = f"""
Your Focus: Content Opportunity Analysis & Keyword Research ({search_keywords}).
Your Skills:
1). Web Research & Content Gap Identification (Expert).
2). SEO Best Practices, Keyword Research & content planning expert (Advanced).
3). Analyzes search trends and competitor content.
4). Fuel company's content strategy with data-driven insights to attract and educate online readers.
5). Identifies high-volume, low-competition keywords relevant to {search_keywords}.
Responsibilities:
1). Recommend high-value content opportunities through in-depth web research and competitor analysis.
2). Provide your research to Senior Content Strategist & planner - Ted XingPi
""",
tools = [search_tool],
memory = True, # Enable memory
verbose = True,
max_rpm = None, # No limit on requests per minute
max_iter = 10, # Default value for maximum iterations
allow_delegation = False,
llm = llm
)
content_planner = Agent(
role = 'Senior Content Strategist & planner - Ted XingPi',
goal = f"""
Craft a series of content titles around {search_keywords} that can be expanded into 2 month-long series.
Do not repeat the blog titles, always consult the previously written blog titles from the file: {already_written_on}.""",
backstory = """You are Ted XingPi, with Experience of 15 years.
Your Skills:
1). Content Opportunity Analysis & Content calender planning (Expert).
2). AI Applications for Content Marketing (Highly Knowledgeable).
3). Content Strategy Development & keyword research for content opportunities.
Your Responsibilties:
1). Employ a balance of head terms (broad topics) and long-tail keywords (specific phrases) for optimal reach and targeting.
2). Review & Include suggestions from Content Marketing & Google Trends Specialist - Sarah Qureshi.
3). Identify content topics and keywords for {search_keywords}.
4). Senior Web Research Analyst (Content Strategy): Aisha Sharma
5). Create content calender that showcases the value proposition around {search_keywords}.
6). New content should target unique keywords to avoid competition with existing content.
7). Focus on specific aspects within a theme to differentiate semantically similar keywords for {search_keywords}.
8). Collaborate with team to identify content gaps and trending topics, relevant to given keywords.
9). Develop content calender with a focus on organic marketing to attract online customers.
10). The content calender should include, Head Term Keyword, Long-Tail Keyword and Blog Post Title.
""",
memory = True, # Enable memory
verbose = True,
tools = [manager_read_tool],
max_rpm = None, # No limit on requests per minute
max_iter = 15, # Default value for maximum iterations
allow_delegation = False,
llm = llm
)
google_trends_researcher = Agent(
role = 'Content Marketing & Google Trends Specialist - Sarah Qureshi.',
goal = f"""Help Create a highly detailed 2 month-long content calender, focused around keywords: {search_keywords}.
Analyse & provide Google trends data for content calender & planning to Ted XingPi""",
backstory = f"""You are Sarah Qureshi, with 10 years as a content writer and planner.
Your Skills:
1). Proven experience in using Google Trends for keyword research.
2). Strong understanding of SEO best practices.
3). Reading files and understanding long table with data.
Your responsibilties:
1). Collaborate on content strategy, provide keyword, titles recommendations to Ted XingPi.
2). Recommend high-volume, low-competition keywords, titles with strong user intent.
3). Recommend, Rising search queries related to {search_keywords}.
4). Recommend keywords, blog titles for preparing/planning the content calender.
5). Provide your research to Senior Content Strategist & planner - Ted XingPi
""",
memory = True, # Enable memory
tools = [file_read_tool],
verbose = True,
max_rpm = None, # No limit on requests per minute
max_iter = 15, # Default value for maximum iterations
allow_delegation = False,
llm = llm
)
content_marketing_manager = Agent(
role="Content Marketing Manager - Diksha Yuj",
goal=f"""Create highly detailed 2 month-long content calender, focused around keywords: {search_keywords}.
Use insights and context from team members: Sarah Qureshi, Ted XingPi and Aisha Sharma""",
backstory="""
Content Marketing Manager: Diksha Yuj
Experience: Digital Marketing Veteran (15+ years)
Mission: Supercharge organic growth of the company, with content marketing.
Responsibilities:
1). Ensures that content titles are not repeated & No keyword cannabilization.
2). Maintains and consults a file for all previous written titles({already_written_on}).
3). Develops a content calendar aligned and optimized around {search_keywords}.
4). Keenly follows & learns the research and communication of other team members.
5). The content calender should include, Head Term Keyword, Long-Tail Keyword and Blog Post Title.
6). Use insights and context from team members: Sarah Qureshi, Ted XingPi and Aisha Sharma
""",
memory=True, # Enable memory
verbose=True,
tools = [manager_read_tool],
max_rpm=None, # No limit on requests per minute
max_iter=10, # Default value for maximum iterations
allow_delegation=False,
llm=llm
)
return [content_researcher, google_trends_researcher, content_planner, content_marketing_manager]
def create_tasks(agents, search_keywords, already_written_on):
research_task = Task(
description=f"""Conduct web analysis on "{search_keywords}",for content calender.
Set the input parameter 'search_query' to query""",
expected_output=f"""Provide comprehensive content calender ideas to Senior Content Strategist & planner - Ted XingPi""",
agent=agents[0] # Assign to the researcher agent
)
google_trends_task = Task(
description=f"""Conduct Google Trends analysis, on keywords: {search_keywords}, from the file({os.getenv('SEARCH_SAVE_FILE')}).
Suggest blog titles for content calender. Recommend high-volume, low-competition keywords with strong user intent.
Set the input parameter 'file_path' to {os.getenv('SEARCH_SAVE_FILE')}""",
expected_output=f"Provide comprehensive content calender ideas to Senior Content Strategist & planner - Ted XingPi",
agent=agents[1] # Assign to the researcher agent
)
planner_task = Task(
description=f"""Develop a content calendar for {search_keywords}, based team member's.
New content should target unique keywords to avoid competition with existing content.
Use context & insights from Aisha Sharma & Sarah Qureshi.
Set the input parameter file_path to {already_written_on}""",
expected_output=f"""A Highly detailed content calender that positions {search_keywords} as a must-read for industry insiders and newcomers alike. Final content calender for the next 2 months. Targeting 5 articles per week.
""",
#human_input=True,
agent=agents[2] # Assign to the outliner agent
)
marketing_manager_task = Task(
description=f"""Make sure the content calender is optimised for keywords: '{search_keywords}'.
Make sure the titles are unique, semantically unique and mitigate keyword cannabilization.
Use context & insights from Aisha Sharma, Ted XingPi & Sarah Qureshi.
Set the input parameter 'file_path' to {already_written_on}
""",
expected_output=f"""Final content calender for the next 2 months. Targeting 5 articles per week.
Make sure to present the content calender in tabular format. Include details of how to use the content calender.
""",
agent=agents[3] # Assign to the reviewer agent
)
return [research_task, google_trends_task, planner_task, marketing_manager_task]
def execute_tasks(agents, tasks):
""" WIP """
result = None
crew = Crew(
agents=agents,
tasks=tasks,
verbose=2, # You can set it to 1 or 2 for different logging levels
#process=Process.sequential,
#memory=True,
language="en"
)
try:
result = crew.kickoff()
return result
except Exception as err:
print(err)
def ai_agents_content_planner(search_keywords):
already_written_on = os.path.join(os.getcwd(), "lib", "content_planning_calender", "content_already_planned.txt")
do_google_trends_analysis(search_keywords)
result = None
#setup_environment()
try:
agents = create_agents(search_keywords, already_written_on)
except Exception as err:
st.error(f"Failed in Creating in Agents: {err}")
try:
tasks = create_tasks(agents, search_keywords, already_written_on)
except Exception as err:
st.error(f"Failed to Create Agent Tasks: {err}")
try:
result = execute_tasks(agents, tasks)
except Exception as err:
st.error(f"Failed to execute Agent Tasks: {err}")
st.markdown("### Final Content Calender:")
st.markdown(result)

View File

@@ -1,309 +0,0 @@
"""
Gemini Audio Text Generation Module
This module provides a comprehensive interface for working with audio files using Google's Gemini API.
It supports various audio processing capabilities including transcription, summarization, and analysis.
Key Features:
------------
1. Audio Transcription: Convert speech in audio files to text
2. Audio Summarization: Generate concise summaries of audio content
3. Segment Analysis: Analyze specific time segments of audio files
4. Timestamped Transcription: Generate transcriptions with timestamps
5. Token Counting: Count tokens in audio files
6. Format Support: Information about supported audio formats
Supported Audio Formats:
----------------------
- WAV (audio/wav)
- MP3 (audio/mp3)
- AIFF (audio/aiff)
- AAC (audio/aac)
- OGG Vorbis (audio/ogg)
- FLAC (audio/flac)
Technical Details:
----------------
- Each second of audio is represented as 32 tokens
- Maximum supported length of audio data in a single prompt is 9.5 hours
- Audio files are downsampled to 16 Kbps data resolution
- Multi-channel audio is combined into a single channel
Usage:
------
```python
from lib.gpt_providers.audio_to_text_generation.gemini_audio_text import transcribe_audio, summarize_audio
# Basic transcription
transcript = transcribe_audio("path/to/audio.mp3")
print(transcript)
# Summarization
summary = summarize_audio("path/to/audio.mp3")
print(summary)
# Analyze specific segment
segment_analysis = analyze_audio_segment("path/to/audio.mp3", "02:30", "03:29")
print(segment_analysis)
```
Requirements:
------------
- GEMINI_API_KEY environment variable must be set
- google-generativeai Python package
- python-dotenv for environment variable management
- loguru for logging
Dependencies:
------------
- google.genai
- dotenv
- loguru
- os, sys, base64, typing
"""
import os
import sys
import base64
from typing import Optional, Dict, Any, List, Union
from dotenv import load_dotenv
from google import genai
from google.genai import types
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
def load_environment():
"""Loads environment variables from a .env file."""
load_dotenv()
logger.info("Environment variables loaded successfully.")
def configure_google_api():
"""Configures the Google Gemini API for audio transcription.
Raises:
ValueError: If the GEMINI_API_KEY environment variable is not set.
"""
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
error_message = "Google API key not found. Please set the GEMINI_API_KEY environment variable."
logger.error(error_message)
raise ValueError(error_message)
genai.configure(api_key=api_key)
logger.info("Google Gemini API configured successfully.")
def transcribe_audio(audio_file_path: str, prompt: str = "Transcribe the following audio:") -> Optional[str]:
"""
Transcribes audio using Google's Gemini model.
Args:
audio_file_path (str): The path to the audio file to be transcribed.
prompt (str, optional): The prompt to guide the transcription. Defaults to "Transcribe the following audio:".
Returns:
str: The transcribed text from the audio.
Returns None if transcription fails.
Raises:
FileNotFoundError: If the audio file is not found.
"""
try:
# Load environment variables and configure the Google API
load_environment()
configure_google_api()
logger.info(f"Attempting to transcribe audio file: {audio_file_path}")
# Check if file exists
if not os.path.exists(audio_file_path):
error_message = f"FileNotFoundError: The audio file at {audio_file_path} does not exist."
logger.error(error_message)
raise FileNotFoundError(error_message)
# Initialize a Gemini model appropriate for audio understanding
model = genai.GenerativeModel(model_name="gemini-1.5-flash")
# Upload the audio file
try:
audio_file = genai.upload_file(audio_file_path)
logger.info(f"Audio file uploaded successfully: {audio_file=}")
except FileNotFoundError:
error_message = f"FileNotFoundError: The audio file at {audio_file_path} does not exist."
logger.error(error_message)
raise FileNotFoundError(error_message)
except Exception as e:
logger.error(f"Error uploading audio file: {e}")
return None
# Generate the transcription
try:
response = model.generate_content([
prompt,
audio_file
])
# Check for valid response and extract text
if response and hasattr(response, 'text'):
transcript = response.text
logger.info(f"Transcription successful:\n{transcript}")
return transcript
else:
logger.warning("Transcription failed: Invalid or empty response from API.")
return None
except Exception as e:
logger.error(f"Error during transcription: {e}")
return None
except Exception as e:
logger.error(f"An unexpected error occurred: {e}")
return None
def summarize_audio(audio_file_path: str) -> Optional[str]:
"""
Summarizes the content of an audio file using Google's Gemini model.
Args:
audio_file_path (str): The path to the audio file to be summarized.
Returns:
str: A summary of the audio content.
Returns None if summarization fails.
"""
return transcribe_audio(audio_file_path, prompt="Please summarize the audio content:")
def analyze_audio_segment(audio_file_path: str, start_time: str, end_time: str) -> Optional[str]:
"""
Analyzes a specific segment of an audio file using timestamps.
Args:
audio_file_path (str): The path to the audio file.
start_time (str): Start time in MM:SS format.
end_time (str): End time in MM:SS format.
Returns:
str: Analysis of the specified audio segment.
Returns None if analysis fails.
"""
prompt = f"Analyze the audio content from {start_time} to {end_time}."
return transcribe_audio(audio_file_path, prompt=prompt)
def transcribe_with_timestamps(audio_file_path: str) -> Optional[str]:
"""
Transcribes audio with timestamps for each segment.
Args:
audio_file_path (str): The path to the audio file.
Returns:
str: Transcription with timestamps.
Returns None if transcription fails.
"""
return transcribe_audio(audio_file_path, prompt="Transcribe the audio with timestamps for each segment:")
def count_tokens(audio_file_path: str) -> Optional[int]:
"""
Counts the number of tokens in an audio file.
Args:
audio_file_path (str): The path to the audio file.
Returns:
int: Number of tokens in the audio file.
Returns None if counting fails.
"""
try:
# Load environment variables and configure the Google API
load_environment()
configure_google_api()
logger.info(f"Attempting to count tokens in audio file: {audio_file_path}")
# Check if file exists
if not os.path.exists(audio_file_path):
error_message = f"FileNotFoundError: The audio file at {audio_file_path} does not exist."
logger.error(error_message)
raise FileNotFoundError(error_message)
# Initialize a Gemini model
model = genai.GenerativeModel(model_name="gemini-1.5-flash")
# Upload the audio file
try:
audio_file = genai.upload_file(audio_file_path)
logger.info(f"Audio file uploaded successfully: {audio_file=}")
except Exception as e:
logger.error(f"Error uploading audio file: {e}")
return None
# Count tokens
try:
response = model.count_tokens([audio_file])
token_count = response.total_tokens
logger.info(f"Token count: {token_count}")
return token_count
except Exception as e:
logger.error(f"Error counting tokens: {e}")
return None
except Exception as e:
logger.error(f"An unexpected error occurred: {e}")
return None
def get_supported_formats() -> List[str]:
"""
Returns a list of supported audio formats.
Returns:
List[str]: List of supported MIME types.
"""
return [
"audio/wav",
"audio/mp3",
"audio/aiff",
"audio/aac",
"audio/ogg",
"audio/flac"
]
# Example usage
if __name__ == "__main__":
# Example 1: Basic transcription
audio_path = "path/to/your/audio.mp3"
transcript = transcribe_audio(audio_path)
print(f"Transcript: {transcript}")
# Example 2: Summarization
summary = summarize_audio(audio_path)
print(f"Summary: {summary}")
# Example 3: Analyze specific segment
segment_analysis = analyze_audio_segment(audio_path, "02:30", "03:29")
print(f"Segment Analysis: {segment_analysis}")
# Example 4: Transcription with timestamps
timestamped_transcript = transcribe_with_timestamps(audio_path)
print(f"Timestamped Transcript: {timestamped_transcript}")
# Example 5: Count tokens
token_count = count_tokens(audio_path)
print(f"Token Count: {token_count}")
# Example 6: Get supported formats
formats = get_supported_formats()
print(f"Supported Formats: {formats}")

View File

@@ -1,206 +0,0 @@
import os
import re
import sys
from pytubefix import YouTube
from loguru import logger
from openai import OpenAI
from tqdm import tqdm
import streamlit as st
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
from .gemini_audio_text import transcribe_audio
def progress_function(stream, chunk, bytes_remaining):
# Calculate the percentage completion
current = ((stream.filesize - bytes_remaining) / stream.filesize)
progress_bar.update(current - progress_bar.n) # Update the progress bar
def rename_file_with_underscores(file_path):
"""Rename a file by replacing spaces and special characters with underscores.
Args:
file_path (str): The original file path.
Returns:
str: The new file path with underscores.
"""
# Extract the directory and the filename
dir_name, original_filename = os.path.split(file_path)
# Replace spaces and special characters with underscores in the filename
new_filename = re.sub(r'[^\w\-_\.]', '_', original_filename)
# Create the new file path
new_file_path = os.path.join(dir_name, new_filename)
# Rename the file
os.rename(file_path, new_file_path)
return new_file_path
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def speech_to_text(video_url):
"""
Transcribes speech to text from a YouTube video URL using OpenAI's Whisper model.
Args:
video_url (str): URL of the YouTube video to transcribe.
output_path (str, optional): Directory where the audio file will be saved. Defaults to '.'.
Returns:
str: The transcribed text from the video.
Raises:
SystemExit: If a critical error occurs that prevents successful execution.
"""
output_path = os.getenv("CONTENT_SAVE_DIR")
yt = None
audio_file = None
with st.status("Started Writing..", expanded=False) as status:
try:
if video_url.startswith("https://www.youtube.com/") or video_url.startswith("http://www.youtube.com/"):
logger.info(f"Accessing YouTube URL: {video_url}")
status.update(label=f"Accessing YouTube URL: {video_url}")
try:
vid_id = video_url.split("=")[1]
yt = YouTube(video_url, on_progress_callback=progress_function)
except Exception as err:
logger.error(f"Failed to get pytube stream object: {err}")
st.stop()
logger.info(f"Fetching the highest quality audio stream:{yt.title}")
status.update(label=f"Fetching the highest quality audio stream: {yt.title}")
try:
audio_stream = yt.streams.filter(only_audio=True).first()
except Exception as err:
logger.error(f"Failed to Download Youtube Audio: {err}")
st.stop()
if audio_stream is None:
logger.warning("No audio stream found for this video.")
st.warning("No audio stream found for this video.")
st.stop()
logger.info(f"Downloading audio for: {yt.title}")
status.update(label=f"Downloading audio for: {yt.title}")
global progress_bar
progress_bar = tqdm(total=1.0, unit='iB', unit_scale=True, desc=yt.title)
try:
audio_filename = re.sub(r'[^\w\-_\.]', '_', yt.title) + '.mp4'
audio_file = audio_stream.download(
output_path=os.getenv("CONTENT_SAVE_DIR"),
filename=audio_filename)
#audio_file = rename_file_with_underscores(audio_file)
except Exception as err:
logger.error(f"Failed to download audio file: {audio_file}")
progress_bar.close()
logger.info(f"Audio downloaded: {yt.title} to {audio_file}")
status.update(label=f"Audio downloaded: {yt.title} to {output_path}")
# Audio filepath from local directory.
elif os.path.exists(audio_input):
audio_file = video_url
# Checking file size
max_file_size = 24 * 1024 * 1024 # 24MB
file_size = os.path.getsize(audio_file)
# Convert file size to MB for logging
file_size_MB = file_size / (1024 * 1024) # Convert bytes to MB
logger.info(f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
status.update(label=f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
if file_size > max_file_size:
logger.error("File size exceeds 24MB limit.")
# FIXME: We can chunk hour long videos, the code is not tested.
#long_video(audio_file)
sys.exit("File size limit exceeded.")
st.error("Audio File size limit exceeded. File a fixme/issues at ALwrity github.")
try:
print(f"Audio File: {audio_file}")
transcript = transcribe_audio(audio_file)
print(f"\n\n\n--- Tracribe: {transcript} ----\n\n\n")
exit(1)
status.update(label=f"Initializing OpenAI client for transcription: {audio_file}")
logger.info(f"Initializing OpenAI client for transcription: {audio_file}")
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
logger.info("Transcribing using OpenAI's Whisper model.")
transcript = client.audio.transcriptions.create(
model="whisper-1",
file=open(audio_file, "rb"),
response_format="text"
)
logger.info(f"\nYouTube video transcription:\n{yt.title}\n{transcript}\n")
status.update(label=f"\nYouTube video transcription:\n{yt.title}\n{transcript}\n")
return transcript, yt.title
except Exception as e:
logger.error(f"Failed in Whisper transcription: {e}")
st.warning(f"Failed in Openai Whisper transcription: {e}")
transcript = transcribe_audio(audio_file)
print(f"\n\n\n--- Tracribe: {transcript} ----\n\n\n")
return transcript, yt.title
except Exception as e:
st.error(f"An error occurred during YouTube video processing: {e}")
finally:
try:
if os.path.exists(audio_file):
os.remove(audio_file)
logger.info("Temporary audio file removed.")
except PermissionError:
st.error(f"Permission error: Cannot remove '{audio_file}'. Please make sure of necessary permissions.")
except Exception as e:
st.error(f"An error occurred removing audio file: {e}")
def long_video(temp_file_name):
"""
Transcribes a YouTube video using OpenAI's Whisper API by processing the video in chunks.
This function handles videos longer than the context limit of the Whisper API by dividing the video into
10-minute segments, transcribing each segment individually, and then combining the results.
Key Changes and Notes:
1. Video Splitting: Splits the audio into 10-minute chunks using the moviepy library.
2. Chunk Transcription: Each audio chunk is transcribed separately and the results are concatenated.
3. Temporary Files for Chunks: Uses temporary files for each audio chunk for transcription.
4. Error Handling: Exception handling is included to capture and return any errors during the process.
5. Logging: Process steps are logged for debugging and monitoring.
6. Cleaning Up: Removes temporary files for both the entire video and individual audio chunks after processing.
Args:
video_url (str): URL of the YouTube video to be transcribed.
"""
# Extract audio and split into chunks
logger.info(f"Processing the YT video: {temp_file_name}")
full_audio = mp.AudioFileClip(temp_file_name)
duration = full_audio.duration
chunk_length = 600 # 10 minutes in seconds
chunks = [full_audio.subclip(start, min(start + chunk_length, duration)) for start in range(0, int(duration), chunk_length)]
combined_transcript = ""
for i, chunk in enumerate(chunks):
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as audio_chunk_file:
chunk.write_audiofile(audio_chunk_file.name, codec="mp3")
with open(audio_chunk_file.name, "rb", encoding="utf-8") as audio_file:
# Transcribe each chunk using OpenAI's Whisper API
app.logger.info(f"Transcribing chunk {i+1}/{len(chunks)}")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
combined_transcript += transcript['text'] + "\n\n"
# Remove the chunk audio file
os.remove(audio_chunk_file.name)

View File

@@ -1,105 +0,0 @@
"""Configuration management for GPT providers."""
import os
import json
from loguru import logger
import sys
# Configure logger to output to both file and stdout
logger.remove() # Remove default handler
logger.add(
"logs/config.log",
rotation="500 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
def load_config() -> Optional[Dict]:
"""
Load configuration from environment or config file.
Returns:
Optional[Dict]: Configuration dictionary or None if loading fails
"""
try:
logger.info("[load_config] Starting configuration load")
# First try to load from environment variable
config_str = os.getenv('ALWRITY_CONFIG')
if config_str:
logger.debug("[load_config] Found configuration in environment variable")
try:
config = json.loads(config_str)
logger.info("[load_config] Successfully loaded configuration from environment")
return config
except json.JSONDecodeError as e:
logger.error(f"[load_config] Failed to parse environment config: {str(e)}")
# If no environment variable, try to load from file
config_path = os.getenv('ALWRITY_CONFIG', 'config.json')
logger.debug(f"[load_config] Attempting to load config from file: {config_path}")
if os.path.exists(config_path):
try:
with open(config_path, 'r') as f:
config = json.load(f)
logger.info("[load_config] Successfully loaded configuration from file")
return config
except json.JSONDecodeError as e:
logger.error(f"[load_config] Failed to parse config file: {str(e)}")
except Exception as e:
logger.error(f"[load_config] Error reading config file: {str(e)}")
else:
logger.error(f"[load_config] Config file not found: {config_path}")
return None
except Exception as e:
logger.error(f"[load_config] Unexpected error loading configuration: {str(e)}")
return None
def read_return_config_section(section: str) -> tuple:
"""
Read a specific section from the configuration.
Args:
section (str): The section to read
Returns:
tuple: Configuration values
"""
try:
logger.info(f"[read_return_config_section] Reading section: {section}")
config = load_config()
if not config:
logger.error("[read_return_config_section] No configuration available")
return None, None, None, None, None, None, None
section_config = config.get(section, {})
logger.debug(f"[read_return_config_section] Section config: {section_config}")
# Extract values with defaults
gpt_provider = section_config.get('gpt_provider', 'openai')
model = section_config.get('model', 'gpt-3.5-turbo')
temperature = float(section_config.get('temperature', 0.7))
max_tokens = int(section_config.get('max_tokens', 2000))
top_p = float(section_config.get('top_p', 1.0))
n = int(section_config.get('n', 1))
fp = section_config.get('fp', 'json')
logger.info(f"[read_return_config_section] Successfully read configuration for {section}")
logger.debug(f"[read_return_config_section] Values: provider={gpt_provider}, model={model}, "
f"temperature={temperature}, max_tokens={max_tokens}")
return gpt_provider, model, temperature, max_tokens, top_p, n, fp
except Exception as e:
logger.error(f"[read_return_config_section] Error reading configuration section: {str(e)}")
return None, None, None, None, None, None, None

View File

@@ -1,116 +0,0 @@
"""
Gemini Image Description Module
This module provides functionality to generate text descriptions of images using Google's Gemini API.
"""
import os
import sys
from typing import Optional, Union, List
from google import genai
from PIL import Image
from dotenv import load_dotenv
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
def describe_image(image_path: str, prompt: str = "Describe this image in detail:") -> Optional[str]:
"""
Generate a text description of an image using Google's Gemini API.
Parameters:
image_path (str): Path to the image file.
prompt (str, optional): Custom prompt to guide the image description.
Defaults to "Describe this image in detail:".
Returns:
Optional[str]: The generated description of the image, or None if an error occurs.
Raises:
FileNotFoundError: If the image file does not exist.
ValueError: If the API key is not set.
"""
try:
# Load environment variables
load_dotenv()
# Check if API key is set
api_key = os.getenv('GEMINI_API_KEY')
if not api_key:
error_message = "GEMINI_API_KEY environment variable is not set"
logger.error(error_message)
raise ValueError(error_message)
# Check if image file exists
if not os.path.exists(image_path):
error_message = f"Image file not found: {image_path}"
logger.error(error_message)
raise FileNotFoundError(error_message)
# Initialize the Gemini client
client = genai.Client(api_key=api_key)
# Open and process the image
try:
image = Image.open(image_path)
logger.info(f"Successfully opened image: {image_path}")
except Exception as e:
error_message = f"Failed to open image: {e}"
logger.error(error_message)
return None
# Generate content description
try:
response = client.models.generate_content(
model='gemini-2.0-flash',
contents=[
prompt,
image
]
)
# Extract and return the text
description = response.text
logger.info(f"Successfully generated description for image: {image_path}")
return description
except Exception as e:
error_message = f"Failed to generate content: {e}"
logger.error(error_message)
return None
except Exception as e:
error_message = f"An unexpected error occurred: {e}"
logger.error(error_message)
return None
def analyze_image_with_prompt(image_path: str, prompt: str) -> Optional[str]:
"""
Analyze an image with a custom prompt using Google's Gemini API.
Parameters:
image_path (str): Path to the image file.
prompt (str): Custom prompt for analyzing the image.
Returns:
Optional[str]: The generated analysis of the image, or None if an error occurs.
"""
return describe_image(image_path, prompt)
# Example usage
if __name__ == "__main__":
# Example usage of the function
image_path = "path/to/your/image.jpg"
description = describe_image(image_path)
if description:
print(f"Image description: {description}")
else:
print("Failed to generate image description")

View File

@@ -1,79 +0,0 @@
"""
This module provides functionality to analyze images using OpenAI's Vision API.
It encodes an image to a base64 string and sends a request to the OpenAI API
to interpret the contents of the image, returning a textual description.
"""
import requests
import sys
import re
import base64
def analyze_and_extract_details_from_image(image_path, api_key):
"""
Analyzes an image using OpenAI's Vision API and extracts Alt Text, Description, Title, and Caption.
Args:
image_path (str): Path to the image file.
api_key (str): Your OpenAI API key.
Returns:
dict: Extracted details including Alt Text, Description, Title, and Caption.
"""
def encode_image(path):
""" Encodes an image to a base64 string. """
with open(path, "rb", encoding="utf-8") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
base64_image = encode_image(image_path)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "The given image is used in blog content. Analyze the given image and suggest alternative(alt) test, description, title, caption."
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 300
}
try:
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
response.raise_for_status()
assistant_message = response.json()['choices'][0]['message']['content']
# Extracting details using regular expressions
alt_text_match = re.search(r'Alt Text: "(.*?)"', assistant_message)
description_match = re.search(r'Description: (.*?)\n\n', assistant_message)
title_match = re.search(r'Title: "(.*?)"', assistant_message)
caption_match = re.search(r'Caption: "(.*?)"', assistant_message)
return {
'alt_text': alt_text_match.group(1) if alt_text_match else None,
'description': description_match.group(1) if description_match else None,
'title': title_match.group(1) if title_match else None,
'caption': caption_match.group(1) if caption_match else None
}
except requests.RequestException as e:
sys.exit(f"Error: Failed to communicate with OpenAI API. Error: {e}")
except Exception as e:
sys.exit(f"Error occurred: {e}")

View File

@@ -1,157 +0,0 @@
# AI Text Generation Guide for Content Creators
## What is AI Text Generation?
AI Text Generation is a powerful tool that helps content creators generate high-quality, engaging content using advanced artificial intelligence models. This tool supports multiple AI providers, each offering unique strengths for different types of content creation.
## Available AI Models
### 1. OpenAI's GPT Models
**Best for:** General content creation, creative writing, and detailed analysis
**Key Features:**
- **Advanced Understanding**: Deep comprehension of context and nuance
- **Creative Flexibility**: Adapts to various writing styles and tones
- **Consistent Quality**: Reliable output for long-form content
- **Streaming Responses**: Real-time content generation
**Use Cases:**
- Blog posts and articles
- Creative storytelling
- Technical writing
- Content analysis and summaries
### 2. Google's Gemini Pro
**Best for:** Balanced content creation and factual accuracy
**Key Features:**
- **Factual Accuracy**: Strong focus on reliable information
- **Balanced Output**: Good mix of creativity and precision
- **Multilingual Support**: Works well across different languages
- **Contextual Understanding**: Strong grasp of context
**Use Cases:**
- Educational content
- Fact-based articles
- Multilingual content
- Research-based writing
### 3. Anthropic's Claude
**Best for:** Professional and academic content
**Key Features:**
- **Professional Tone**: Excellent for formal writing
- **Detailed Analysis**: Strong analytical capabilities
- **Ethical Considerations**: Built-in ethical guidelines
- **Long-form Excellence**: Great for extended content
**Use Cases:**
- Academic writing
- Professional documentation
- Research papers
- Policy documents
### 4. DeepSeek
**Best for:** Technical and specialized content
**Key Features:**
- **Technical Precision**: Excellent for technical writing
- **Specialized Knowledge**: Strong in specific domains
- **Efficient Processing**: Fast response times
- **Customizable Output**: Flexible formatting options
**Use Cases:**
- Technical documentation
- Industry-specific content
- Scientific writing
- Specialized reports
## How to Use the Text Generation Tool
### 1. Setting Up Your Content Parameters
Before generating content, you can specify:
- **Language**: Choose your preferred writing language
- **Tone**: Select the appropriate tone (formal, casual, technical, etc.)
- **Content Length**: Set your desired word count
- **Content Type**: Specify the type of content (blog, article, etc.)
- **Target Audience**: Define your reader demographic
- **Output Format**: Choose your preferred format (Markdown, HTML, etc.)
### 2. Content Generation Process
1. **Input Your Requirements**: Provide your content specifications
2. **Select Your Model**: Choose the AI model best suited for your needs
3. **Generate Content**: Let the AI create your content
4. **Review and Edit**: Polish the generated content as needed
### 3. Customization Options
You can adjust various parameters to fine-tune your content:
- **Temperature**: Control creativity (lower = more focused, higher = more creative)
- **Maximum Length**: Set content length limits
- **Output Format**: Choose how you want the content structured
- **Language Style**: Adjust the writing style and complexity
## Best Practices for Content Creation
### 1. Before Generation
- Clearly define your content goals
- Identify your target audience
- Choose the appropriate model for your needs
- Set clear parameters for tone and style
### 2. During Generation
- Monitor the content quality
- Ensure it aligns with your brand voice
- Check for factual accuracy
- Maintain consistency with your style guide
### 3. After Generation
- Review and edit the content
- Fact-check important information
- Optimize for SEO if needed
- Add your personal touch
## Tips for Optimal Results
1. **Be Specific**: Provide clear instructions for the AI
2. **Use Examples**: Share examples of your desired style
3. **Iterate**: Don't hesitate to regenerate if needed
4. **Review**: Always review and edit generated content
5. **Optimize**: Fine-tune parameters for better results
## Common Use Cases
### Blog Writing
- Generate engaging blog posts
- Create consistent content series
- Develop topic outlines
- Write product reviews
### Article Creation
- Research-based articles
- Opinion pieces
- How-to guides
- Industry analysis
### Technical Writing
- Documentation
- User guides
- Technical specifications
- Process descriptions
### Creative Writing
- Story development
- Character creation
- Plot outlines
- Scene descriptions
## Need Help?
If you encounter any issues or need assistance:
1. Check the model-specific documentation
2. Review your input parameters
3. Try adjusting the generation settings
4. Contact support for technical issues
---
*Note: This tool is designed to assist content creators in generating high-quality content. While AI can help with content creation, it's important to review and edit the generated content to ensure it meets your standards and brand guidelines.*

View File

@@ -1,121 +0,0 @@
import os
import anthropic
import asyncio
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
# Configure standard logging
import logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
logger = logging.getLogger(__name__)
async def test_anthropic_api_key(api_key: str) -> tuple[bool, str]:
"""
Test if the provided Anthropic API key is valid.
Args:
api_key (str): The Anthropic API key to test
Returns:
tuple[bool, str]: A tuple containing (is_valid, message)
"""
try:
# Create Anthropic client with the provided key
client = anthropic.Anthropic(api_key=api_key)
# Try a simple completion as a test
response = client.messages.create(
model="claude-3-haiku-20240307",
max_tokens=10,
messages=[{
"role": "user",
"content": "Say hello"
}]
)
# If we get here, the key is valid
return True, "Anthropic API key is valid"
except anthropic.AuthenticationError:
return False, "Invalid Anthropic API key"
except anthropic.RateLimitError:
return False, "Rate limit exceeded. Please try again later."
except Exception as e:
return False, f"Error testing Anthropic API key: {str(e)}"
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def anthropic_text_response(prompt, model="claude-3-haiku-20240307", temperature=0.7, max_tokens=2048, top_p=0.9, n=1, system_prompt="You are a helpful AI assistant."):
"""
Generate text using Anthropic's Claude model with retry logic.
Args:
prompt (str): The input text to generate completion for
model (str, optional): Model to use. Defaults to "claude-3-haiku-20240307"
temperature (float, optional): Controls randomness. Defaults to 0.7
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
top_p (float, optional): Controls diversity. Defaults to 0.9
n (int, optional): Number of completions to generate. Defaults to 1
system_prompt (str, optional): System prompt to guide the model. Defaults to "You are a helpful AI assistant."
Returns:
str: The generated text completion
"""
try:
# Create Anthropic client
client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
# Generate completion
response = client.messages.create(
model=model,
max_tokens=max_tokens,
temperature=temperature,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]
)
# Return the generated text
return response.content[0].text
except Exception as e:
logger.error(f"Error in Anthropic text generation: {e}")
raise SystemExit from e
def anthropic_text_gen(prompt, model="claude-3-haiku-20240307", temperature=0.7, max_tokens=2048):
"""
Generate text using Anthropic's Claude model.
Args:
prompt (str): The input text to generate completion for
model (str, optional): Model to use. Defaults to "claude-3-haiku-20240307"
temperature (float, optional): Controls randomness. Defaults to 0.7
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
Returns:
str: The generated text completion
"""
try:
# Create Anthropic client
client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
# Generate completion
response = client.messages.create(
model=model,
max_tokens=max_tokens,
temperature=temperature,
messages=[{
"role": "user",
"content": prompt
}]
)
# Return the generated text
return response.content[0].text
except Exception as e:
logger.error(f"Error in Anthropic text generation: {e}")
return str(e)

View File

@@ -1,139 +0,0 @@
import os
import time
import logging
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
)
import openai
import asyncio
# Configure standard logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
logger = logging.getLogger(__name__)
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def deepseek_text_response(prompt, model, temperature, max_tokens, top_p, n, system_prompt):
"""
Wrapper function for DeepSeek's text generation.
Args:
prompt (str): The input text to generate completion for.
model (str, optional): Model to be used for the completion. Defaults to "deepseek-chat".
temperature (float, optional): Controls randomness. Lower values make responses more deterministic. Defaults to 0.2.
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 4096.
top_p (float, optional): Controls diversity. Defaults to 0.9.
n (int, optional): Number of completions to generate. Defaults to 1.
Returns:
str: The generated text completion.
Raises:
SystemExit: If an API error, connection error, or rate limit error occurs.
"""
# Wait for 10 seconds to comply with rate limits
for _ in range(10):
time.sleep(1)
try:
client = DeepSeek(api_key=os.getenv('DEEPSEEK_API_KEY'), base_url="https://api.deepseek.com")
response = client.reasoning.create(
model=model,
context=system_prompt,
query=prompt,
max_tokens=max_tokens,
n=n,
top_p=top_p,
stream=True,
temperature=temperature
)
# Create variables to collect the stream of chunks
collected_chunks = []
collected_messages = []
full_reply_content = None
# Iterate through the stream of events
for chunk in response:
collected_chunks.append(chunk) # save the event response
chunk_message = chunk.result # extract the message
collected_messages.append(chunk_message) # save the message
print(chunk.result, end="", flush=True)
# Clean None in collected_messages
collected_messages = [m for m in collected_messages if m is not None]
full_reply_content = ''.join([m for m in collected_messages])
return full_reply_content
except Exception as err:
logger.error(f"DeepSeek error: {err}")
raise SystemExit from err
async def test_deepseek_api_key(api_key: str) -> tuple[bool, str]:
"""
Test if the provided DeepSeek API key is valid.
Args:
api_key (str): The DeepSeek API key to test
Returns:
tuple[bool, str]: A tuple containing (is_valid, message)
"""
try:
# Create OpenAI client with DeepSeek base URL
client = openai.OpenAI(
api_key=api_key,
base_url="https://api.deepseek.com/v1"
)
# Try to list models as a simple API test
models = client.models.list()
# If we get here, the key is valid
return True, "DeepSeek API key is valid"
except openai.AuthenticationError:
return False, "Invalid DeepSeek API key"
except openai.RateLimitError:
return False, "Rate limit exceeded. Please try again later."
except Exception as e:
return False, f"Error testing DeepSeek API key: {str(e)}"
def deepseek_text_gen(prompt, model="deepseek-chat", temperature=0.7, max_tokens=2048):
"""
Generate text using DeepSeek's API.
Args:
prompt (str): The input text to generate completion for
model (str, optional): Model to use. Defaults to "deepseek-chat"
temperature (float, optional): Controls randomness. Defaults to 0.7
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
Returns:
str: The generated text completion
"""
try:
# Create OpenAI client with DeepSeek base URL
client = openai.OpenAI(
api_key=os.getenv('DEEPSEEK_API_KEY'),
base_url="https://api.deepseek.com/v1"
)
# Generate chat completion
response = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt
}],
temperature=temperature,
max_tokens=max_tokens
)
# Return the generated text
return response.choices[0].message.content
except Exception as e:
logger.error(f"Error in DeepSeek text generation: {e}")
return str(e)

View File

@@ -1,232 +0,0 @@
# Using Gemini Pro LLM model
import os
import sys
from pathlib import Path
from google import genai
from google.genai import types
from dotenv import load_dotenv
load_dotenv(Path('../../../.env'))
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
)
import asyncio
import json
import re
# Configure standard logging
import logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
logger = logging.getLogger(__name__)
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def gemini_text_response(prompt, temperature, top_p, n, max_tokens, system_prompt):
""" Common functiont to get response from gemini pro Text. """
#FIXME: Include : https://github.com/google-gemini/cookbook/blob/main/quickstarts/rest/System_instructions_REST.ipynb
try:
client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
except Exception as err:
logger.error(f"Failed to configure Gemini: {err}")
logger.info(f"Temp: {temperature}, MaxTokens: {max_tokens}, TopP: {top_p}, N: {n}")
# Set up AI model config
generation_config = {
"temperature": temperature,
"top_p": top_p,
"top_k": n,
"max_output_tokens": max_tokens,
}
# FIXME: Expose model_name in main_config
try:
response = client.models.generate_content(
model='gemini-2.0-flash-001',
contents=prompt,
config=types.GenerateContentConfig(
system_instruction=system_prompt,
max_output_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=n,
),
)
#logger.info(f"Number of Token in Prompt Sent: {model.count_tokens(prompt)}")
return response.text
except Exception as err:
logger.error(f"Failed to get response from Gemini: {err}. Retrying.")
#@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
#def gemini_blog_metadata_json(blog_content):
# """ Common functiont to get response from gemini pro Text. """
# prompt = f"I will provide you with the content of a blog post. Based on this content, you need to generate the following elements in JSON format:\n\n1. **Blog Title**: A compelling and relevant title that summarizes the blog content.\n2. **Meta Description**: A concise meta description (up to 160 characters) that captures the essence of the blog post and encourages clicks.\n3. **Tags**: A list of 5-10 relevant tags that represent the key topics covered in the blog post.\n4. **Categories**: A list of 1-3 appropriate categories that best describe the blog post's main themes.\n\nOutput your response in the following JSON format:\n\n```json\n{\n \"type\": \"object\",\n \"properties\": {\n \"blog_title\": {\n \"type\": \"string\"\n },\n \"meta_description\": {\n \"type\": \"string\"\n },\n \"tags\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"string\"\n }\n },\n \"categories\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"string\"\n }\n }\n }\n}\n\n. The Blog Content is given below: \n\n{blog_content}\n\n"
#
# try:
# genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
# except Exception as err:
# logger.error(f"Failed to configure Gemini: {err}")
#
# # Create the model
# generation_config = {
# "temperature": 1,
# "top_p": 0.95,
# "top_k": 64,
# "max_output_tokens": 8192,
# "response_schema": content.Schema(
# type = content.Type.OBJECT,
# properties = {
# "response": content.Schema(
# type = content.Type.STRING,
# ),
# },
# ),
# "response_mime_type": "application/json",
# }
#
# model = genai.GenerativeModel(
# model_name="gemini-1.5-flash",
# generation_config=generation_config,
# # safety_settings = Adjust safety settings
# # See https://ai.google.dev/gemini-api/docs/safety-settings
# )
#
# try:
# # text_response = []
# response = model.generate_content(prompt)
# if response:
# logger.info(f"Number of Token in Prompt Sent: {model.count_tokens(prompt)}")
# return response.text
# except Exception as err:
# logger.error(f"Failed to get SEO METADATA from Gemini: {err}. Retrying.")
async def test_gemini_api_key(api_key: str) -> tuple[bool, str]:
"""
Test if the provided Gemini API key is valid.
Args:
api_key (str): The Gemini API key to test
Returns:
tuple[bool, str]: A tuple containing (is_valid, message)
"""
try:
# Configure Gemini with the provided key
genai.configure(api_key=api_key)
# Try to list models as a simple API test
models = genai.list_models()
# Check if Gemini Pro is available
if any(model.name == "gemini-pro" for model in models):
return True, "Gemini API key is valid"
else:
return False, "Gemini Pro model not available with this API key"
except Exception as e:
return False, f"Error testing Gemini API key: {str(e)}"
def gemini_pro_text_gen(prompt, temperature=0.7, top_p=0.9, top_k=40, max_tokens=2048):
"""
Generate text using Google's Gemini Pro model.
Args:
prompt (str): The input text to generate completion for
temperature (float, optional): Controls randomness. Defaults to 0.7
top_p (float, optional): Controls diversity. Defaults to 0.9
top_k (int, optional): Controls vocabulary size. Defaults to 40
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
Returns:
str: The generated text completion
"""
try:
# Configure the model
model = genai.GenerativeModel('gemini-pro')
# Generate content
response = model.generate_content(
prompt,
generation_config=genai.types.GenerationConfig(
temperature=temperature,
top_p=top_p,
top_k=top_k,
max_output_tokens=max_tokens,
)
)
# Return the generated text
return response.text
except Exception as e:
logger.error(f"Error in Gemini Pro text generation: {e}")
return str(e)
def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9, top_k=40, max_tokens=2048, system_prompt=None):
"""
Generate structured JSON response using Google's Gemini Pro model.
Args:
prompt (str): The input text to generate completion for
schema (dict): The JSON schema to follow for the response
temperature (float, optional): Controls randomness. Defaults to 0.7
top_p (float, optional): Controls diversity. Defaults to 0.9
top_k (int, optional): Controls vocabulary size. Defaults to 40
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
system_prompt (str, optional): System instructions for the model
Returns:
dict: The generated structured JSON response
"""
try:
# Configure the model
client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
# Set up generation config
generation_config = {
"temperature": temperature,
"top_p": top_p,
"top_k": top_k,
"max_output_tokens": max_tokens,
}
# Generate content with structured response
response = client.models.generate_content(
model='gemini-2.0-flash',
contents=prompt,
config=types.GenerateContentConfig(
system_instruction=system_prompt,
max_output_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
response_mime_type='application/json',
response_schema=schema
),
)
# Parse the response
try:
# First try to get the parsed response
if hasattr(response, 'parsed'):
return response.parsed
# If parsed is not available, try to parse the text
response_text = response.text
return json.loads(response_text)
except json.JSONDecodeError as e:
logger.error(f"Error parsing JSON response: {e}")
return {"error": f"Failed to parse JSON response: {e}", "raw_response": response_text}
except Exception as e:
logger.error(f"Error in Gemini Pro structured JSON generation: {e}")
return {"error": str(e)}

View File

@@ -1,219 +0,0 @@
import os
import sys
import json
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(Path('../.env'))
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from .openai_text_gen import openai_chatgpt
from .gemini_pro_text import gemini_text_response, gemini_structured_json_response
from .anthropic_text_gen import anthropic_text_response
from .deepseek_text_gen import deepseek_text_response
from ...utils.read_main_config_params import read_return_config_section
def llm_text_gen(prompt, system_prompt=None, json_struct=None):
"""
Generate text using Language Model (LLM) based on the provided prompt.
Args:
prompt (str): The prompt to generate text from.
system_prompt (str, optional): Custom system prompt to use instead of the default one.
json_struct (dict, optional): JSON schema structure for structured responses.
Returns:
str: Generated text based on the prompt.
"""
try:
logger.info("[llm_text_gen] Starting text generation")
logger.debug(f"[llm_text_gen] Prompt length: {len(prompt)} characters")
try:
# Set default values for LLM parameters
gpt_provider = "google"
model = "gemini-1.5-flash-latest"
temperature = 0.7
max_tokens = 4000
top_p = 0.9
n = 1
fp = 16
frequency_penalty = 0.0
presence_penalty = 0.0
# Default blog characteristics
blog_tone = "Professional"
blog_demographic = "Professional"
blog_type = "Informational"
blog_language = "English"
blog_output_format = "markdown"
blog_length = 2000
# Try to read values from config, but keep defaults if any key is missing
try:
# Read LLM config
llm_config = read_return_config_section('llm_config')
if llm_config and len(llm_config) >= 4:
gpt_provider = llm_config[0] if llm_config[0] else gpt_provider
model = llm_config[1] if llm_config[1] else model
temperature = llm_config[2] if llm_config[2] else temperature
max_tokens = llm_config[3] if llm_config[3] else max_tokens
# Handle additional parameters with defaults if they're missing
if len(llm_config) > 4:
top_p = llm_config[4] if llm_config[4] else top_p
if len(llm_config) > 5:
# Try to get n parameter (could be either 'N' or 'n' in config)
n = llm_config[5] if llm_config[5] else n
if len(llm_config) > 6:
frequency_penalty = llm_config[6] if llm_config[6] else frequency_penalty
logger.debug(f"[llm_text_gen] LLM Config loaded: Provider={gpt_provider}, Model={model}, Temp={temperature}")
except Exception as err:
logger.warning(f"[llm_text_gen] Couldn't load LLM config completely, using defaults where needed: {err}")
try:
# Read blog characteristics
blog_chars = read_return_config_section('blog_characteristics')
if blog_chars and len(blog_chars) >= 6:
blog_tone = blog_chars[0] if blog_chars[0] else blog_tone
blog_demographic = blog_chars[1] if blog_chars[1] else blog_demographic
blog_type = blog_chars[2] if blog_chars[2] else blog_type
blog_language = blog_chars[3] if blog_chars[3] else blog_language
blog_output_format = blog_chars[4] if blog_chars[4] else blog_output_format
blog_length = blog_chars[5] if blog_chars[5] else blog_length
logger.debug(f"[llm_text_gen] Blog characteristics loaded: Tone={blog_tone}, Type={blog_type}")
except Exception as err:
logger.warning(f"[llm_text_gen] Couldn't load blog characteristics completely, using defaults where needed: {err}")
except Exception as err:
logger.warning(f"[llm_text_gen] Using default settings due to config read error: {err}")
# Construct the system prompt with the sidebar config params if no custom system_prompt is provided
if system_prompt is None:
system_instructions = f"""You are a highly skilled content writer with a knack for creating engaging and informative content.
Your expertise spans various writing styles and formats.
Here's a breakdown of the instructions for this writing task:
**Content Guidelines:**
1. **Language:** Your response must be in **{blog_language}** language.
2. **Tone and Brand Alignment:** Adjust your tone, voice, and personality to be appropriate for a **{blog_tone}** audience.
3. **Content Length:** Ensure your response is approximately **{blog_length}** words in length.
4. **Blog Type:** The type of blog is **{blog_type}**. Write accordingly, adhering to the conventions and expectations of this type of content.
5. **Target Audience:** The demographic for this content is **{blog_demographic}**. Keep their interests and needs in mind.
6. **Output Format:** Your response should be in **{blog_output_format}** format. This could be Markdown, HTML, or a specific structured format, depending on the user's preference.
**Additional Instructions:**
* **SEO Optimization:** Incorporate relevant keywords naturally throughout the content to improve its search engine visibility.
* **Call to Action:** Include a call to action if appropriate for the blog type and target audience.
* **Factual Accuracy:** Ensure your content is accurate and reliable. Back up any claims with credible sources.
* **Unique Voice and Style:** Inject your unique voice and writing style to make the content engaging and memorable. """
else:
system_instructions = system_prompt
logger.info("[llm_text_gen] Using custom system prompt")
# Check if API key is provided for the given gpt_provider
get_api_key(gpt_provider)
# Perform text generation using the specified LLM parameters and prompt
if 'google' in gpt_provider.lower():
try:
logger.info("Using Google Gemini Pro text generation model.")
if json_struct:
response = gemini_structured_json_response(prompt, json_struct, temperature, top_p, n, max_tokens, system_instructions)
else:
response = gemini_text_response(prompt, temperature, top_p, n, max_tokens, system_instructions)
return response
except Exception as err:
logger.error(f"Failed to get response from gemini: {err}")
raise err
elif 'openai' in gpt_provider.lower():
try:
logger.info(f"Using OpenAI Model: {model} for text Generation.")
response = openai_chatgpt(prompt, model, temperature, max_tokens, top_p, n, fp, system_instructions)
return response
except Exception as err:
logger.error(f"Failed to get response from Openai: {err}")
raise err
elif 'anthropic' in gpt_provider.lower():
try:
logger.info(f"Using Anthropic Model: {model} for text Generation.")
response = anthropic_text_response(prompt, model, temperature, max_tokens, top_p, n, system_instructions)
return response
except Exception as err:
logger.error(f"Failed to get response from Anthropic: {err}")
raise err
elif 'deepseek' in gpt_provider.lower():
try:
logger.info(f"Using DeepSeek Model: {model} for text Generation.")
response = deepseek_text_response(prompt, model, temperature, max_tokens, top_p, n, system_instructions)
return response
except Exception as err:
logger.error(f"Failed to get response from DeepSeek: {err}")
raise err
else:
logger.warning(f"Unknown provider '{gpt_provider}', falling back to Google Gemini")
response = gemini_text_response(prompt, temperature, top_p, n, max_tokens, system_instructions)
return response
except Exception as err:
logger.error(f"Failed to generate text: {err}")
raise
def check_gpt_provider(gpt_provider):
"""
Check if the specified GPT provider matches the environment variable GPT_PROVIDER,
assign and export the GPT_PROVIDER value from the config file if missing,
and continue.
Args:
gpt_provider (str): The specified GPT provider.
Raises:
ValueError: If both the specified GPT provider and environment variable GPT_PROVIDER are missing.
"""
env_gpt_provider = os.getenv('GPT_PROVIDER')
if gpt_provider and gpt_provider.lower() != env_gpt_provider.lower():
logger.warning(f"Config: '{gpt_provider}' different to environment variable 'GPT_PROVIDER' '{env_gpt_provider}'")
gpt_provider = env_gpt_provider
return gpt_provider
def get_api_key(gpt_provider):
"""
Get the API key for the specified GPT provider.
Args:
gpt_provider (str): The specified GPT provider.
Returns:
str: The API key for the specified GPT provider.
Raises:
ValueError: If no API key is found for the specified GPT provider.
"""
api_key = None
if gpt_provider.lower() == 'google':
api_key = os.getenv('GEMINI_API_KEY')
elif gpt_provider.lower() == 'openai':
api_key = os.getenv('OPENAI_API_KEY')
elif gpt_provider.lower() == 'anthropic':
api_key = os.getenv('ANTHROPIC_API_KEY')
elif gpt_provider.lower() == 'deepseek':
api_key = os.getenv('DEEPSEEK_API_KEY')
if not api_key:
raise ValueError(f"No API key found for the specified GPT provider: '{gpt_provider}'")
logger.info(f"Using API key for {gpt_provider}")
return api_key

View File

@@ -1,144 +0,0 @@
import os
import logging
from pathlib import Path
from mistralai import Mistral
import asyncio
from loguru import logger
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
from dotenv import load_dotenv
load_dotenv(Path('../../.env'))
# Configure standard logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
logger = logging.getLogger(__name__)
async def test_mistral_api_key(api_key: str) -> tuple[bool, str]:
"""
Test if the provided Mistral API key is valid.
Args:
api_key (str): The Mistral API key to test
Returns:
tuple[bool, str]: A tuple containing (is_valid, message)
"""
try:
async with Mistral(api_key=api_key) as client:
# Try a simple completion as a test
response = await client.chat.complete_async(
model="mistral-small-latest",
messages=[{
"role": "user",
"content": "Hello"
}],
max_tokens=10
)
if response and response.choices:
return True, "Mistral API key is valid"
else:
return False, "Invalid response from Mistral API"
except Exception as e:
return False, f"Error testing Mistral API key: {str(e)}"
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
async def mistral_chat_completion_async(
prompt: str,
model: str = "mistral-small-latest",
temperature: float = 0.7,
max_tokens: int = 2048,
top_p: float = 0.9,
system_prompt: str = "You are a helpful AI assistant."
) -> str:
"""
Generate text using Mistral's chat completion API asynchronously.
Args:
prompt (str): The input text to generate completion for
model (str, optional): Model to use. Defaults to "mistral-small-latest"
temperature (float, optional): Controls randomness. Defaults to 0.7
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
top_p (float, optional): Controls diversity. Defaults to 0.9
system_prompt (str, optional): System prompt to guide the model. Defaults to "You are a helpful AI assistant."
Returns:
str: The generated text completion
"""
try:
async with Mistral(api_key=os.getenv('MISTRAL_API_KEY')) as client:
messages = []
# Add system message if provided
if system_prompt:
messages.append({
"role": "system",
"content": system_prompt
})
# Add user message
messages.append({
"role": "user",
"content": prompt
})
# Generate chat completion
response = await client.chat.complete_async(
model=model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p
)
if response and response.choices:
return response.choices[0].message.content
else:
raise Exception("No response generated")
except Exception as e:
logger.error(f"Error in Mistral chat completion: {e}")
raise SystemExit from e
# Synchronous wrapper for compatibility
def mistral_chat_completion(
prompt: str,
model: str = "mistral-small-latest",
temperature: float = 0.7,
max_tokens: int = 2048,
top_p: float = 0.9,
system_prompt: str = "You are a helpful AI assistant."
) -> str:
"""
Synchronous wrapper for mistral_chat_completion_async.
"""
try:
return asyncio.run(mistral_chat_completion_async(
prompt=prompt,
model=model,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
system_prompt=system_prompt
))
except Exception as e:
logger.error(f"Error in Mistral chat completion: {e}")
return str(e)
# For backward compatibility
def mistral_text_response(prompt, model="mistral-small-latest", temperature=0.7, max_tokens=2048):
"""
Legacy function for backward compatibility.
"""
return mistral_chat_completion(
prompt=prompt,
model=model,
temperature=temperature,
max_tokens=max_tokens
)

View File

@@ -1,109 +0,0 @@
import os
import time #IWish
import openai
import asyncio
# Configure standard logging
import logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
logger = logging.getLogger(__name__)
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
async def test_openai_api_key(api_key: str) -> tuple[bool, str]:
"""
Test if the provided OpenAI API key is valid.
Args:
api_key (str): The OpenAI API key to test
Returns:
tuple[bool, str]: A tuple containing (is_valid, message)
"""
try:
# Create OpenAI client with the provided key
client = openai.OpenAI(api_key=api_key)
# Try to list models as a simple API test
models = client.models.list()
# If we get here, the key is valid
return True, "OpenAI API key is valid"
except openai.AuthenticationError:
return False, "Invalid OpenAI API key"
except openai.RateLimitError:
return False, "Rate limit exceeded. Please try again later."
except Exception as e:
return False, f"Error testing OpenAI API key: {str(e)}"
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def openai_chatgpt(prompt, model, temperature, max_tokens, top_p, n, fp, system_prompt):
"""
Wrapper function for OpenAI's ChatGPT completion.
Args:
prompt (str): The input text to generate completion for.
model (str, optional): Model to be used for the completion. Defaults to "gpt-4o".
temperature (float, optional): Controls randomness. Lower values make responses more deterministic. Defaults to 0.2.
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 4096
top_p (float, optional): Controls diversity. Defaults to 0.9.
n (int, optional): Number of completions to generate. Defaults to 1.
Returns:
str: The generated text completion.
Raises:
SystemExit: If an API error, connection error, or rate limit error occurs.
"""
# Wait for 10 seconds to comply with rate limits
for _ in range(5):
time.sleep(1)
try:
# Create variables to collect the stream of chunks
collected_chunks = []
collected_messages = []
full_reply_content = None
client = openai.OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
response = client.chat.completions.create(
model=model,
messages=[{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}],
max_tokens=max_tokens,
n=n,
top_p=top_p,
stream=True,
frequency_penalty=fp
# Additional parameters can be included here
)
# Iterate through the stream of events
for chunk in response:
collected_chunks.append(chunk) # save the event response
chunk_message = chunk.choices[0].delta.content # extract the message
collected_messages.append(chunk_message) # save the message
print(chunk.choices[0].delta.content, end = "", flush = True)
# Clean None in collected_messages
collected_messages = [m for m in collected_messages if m is not None]
full_reply_content = ''.join([m for m in collected_messages])
return full_reply_content
except openai.APIError as e:
logger.error(f"OpenAI API Error: {e}")
raise SystemExit from e
except openai.APIConnectionError as e:
logger.error(f"Failed to connect to OpenAI API: {e}")
raise SystemExit from e
except openai.RateLimitError as e:
logger.error(f"Rate limit exceeded on OpenAI API request: {e}")
raise SystemExit from e
except Exception as err:
logger.error(f"OpenAI error: {err}")
raise SystemExit from e

View File

@@ -1,56 +0,0 @@
from openai import OpenAI
from loguru import logger
import sys
from .save_image import save_generated_image
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd", n=1):
"""
Generates images using the DALL-E 3 model based on a given text prompt.
Args:
img_prompt (str): Text prompt to generate the image.
image_dir (str): Directory where the generated image will be saved.
size (str, optional): Size of the generated images. Defaults to "1024x1024".
quality (str, optional): Quality of the generated images. Defaults to "hd".
n (int, optional): Number of images to generate. Defaults to 1.
Returns:
str: Path to the saved image.
Raises:
SystemExit: If an error occurs in image generation or saving.
"""
try:
logger.info("Generating Dall-e-3 image for the blog.")
client = OpenAI()
img_generation_response = client.images.generate(
model="dall-e-3",
prompt=img_prompt,
size=size,
quality=quality,
n=n
)
# Save the generated image locally.
try:
img_path = save_generated_image(img_generation_response, image_dir)
return img_path
except Exception as err:
logger.error(f"Failed to Save generated image: {err}")
except openai.OpenAIError as e:
logger.error(f"Dalle-3 image generation error: HTTP Status {e.http_status}, Error: {e.error}")
sys.exit("Exiting due to Dalle-3 image generation error.")
except Exception as e:
logger.error(f"Failed to generate images with Dalle3: {e}")
sys.exit("Exiting due to a general error in image generation.")

View File

@@ -1,53 +0,0 @@
from openai import OpenAI
from loguru import logger
import sys
from tenacity import (
retry,
stop_after_attempt,
wait_random_exponential,
) # for exponential backoff
from .save_image import save_generated_image
@retry(wait=wait_random_exponential(min=1, max=120), stop=stop_after_attempt(6))
def generate_dalle3_images(img_prompt, image_dir, size="1024x1024", quality="hd", n=1):
"""
Generates images using the DALL-E 3 model based on a given text prompt.
Args:
img_prompt (str): Text prompt to generate the image.
image_dir (str): Directory where the generated image will be saved.
size (str, optional): Size of the generated images. Defaults to "1024x1024".
quality (str, optional): Quality of the generated images. Defaults to "hd".
n (int, optional): Number of images to generate. Defaults to 1.
Returns:
str: Path to the saved image.
Raises:
SystemExit: If an error occurs in image generation or saving.
"""
try:
logger.info("Generating Dall-e-3 image for the blog.")
client = OpenAI()
img_generation_response = client.images.generate(
model="dall-e-3",
prompt=img_prompt,
size=size,
quality=quality,
n=n
)
img_path = save_generated_image(img_generation_response, image_dir)
return img_path
except openai.OpenAIError as e:
logger.error(f"Dalle-3 image generation error: HTTP Status {e.http_status}, Error: {e.error}")
sys.exit("Exiting due to Dalle-3 image generation error.")
except Exception as e:
logger.error(f"Failed to generate images with Dalle3: {e}")
sys.exit("Exiting due to a general error in image generation.")

View File

@@ -1,423 +0,0 @@
import os
from PIL import Image
from io import BytesIO
import PIL
import streamlit as st
from google import genai
from google.genai import types
import logging
import datetime
import base64
import random
import time
from .save_image import save_generated_image
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('gemini_image_generator')
# With image generation in Gemini, your imagination is the limit.
# If what you see doesn't quite match what you had in mind, try adding more details to the prompt.
# The more specific you are, the better Gemini can create images that reflect your vision.
# Generate images using Gemini
# Gemini 2.0 Flash Experimental supports the ability to output text and inline images.
# This lets you use Gemini to conversationally edit images or generate outputs with interwoven text (for example, generating a blog post with text and images in a single turn).
# Note: Make sure to include responseModalities: ["Text", "Image"] in your generation configuration for text and image output with gemini-2.0-flash-exp-image-generation. Image only is not allowed.
class AIPromptGenerator:
"""
Generates enhanced AI image prompts based on user keywords,
following the guidelines of the Imagen documentation.
"""
def __init__(self):
self.photography_styles = ["photo", "photograph"]
self.art_styles = ["painting", "sketch", "drawing", "illustration", "digital art", "render"]
self.art_techniques = ["technical pencil drawing", "charcoal drawing", "color pencil drawing", "pastel painting", "digital art", "art deco (poster)", "impressionist painting", "renaissance painting", "pop art"]
self.camera_proximity = ["close-up", "zoomed out", "taken from far away"]
self.camera_position = ["aerial", "from below"]
self.lighting = ["natural lighting", "dramatic lighting", "warm lighting", "cold lighting", "studio lighting", "golden hour lighting"]
self.camera_settings = ["motion blur", "soft focus", "bokeh", "portrait"]
self.lens_types = ["35mm lens", "50mm lens", "fisheye lens", "wide angle lens", "macro lens", "telephoto lens"]
self.film_types = ["black and white film", "polaroid"]
self.materials = ["made of cheese", "made of paper", "made of neon tubes", "metallic", "glass", "wooden", "stone"]
self.shapes = ["in the shape of a bird", "angular", "curved", "geometric"]
self.quality_modifiers_general = ["high-quality", "beautiful", "stylized", "detailed", "epic", "grand"]
self.quality_modifiers_photo = ["4K", "HDR", "studio photo", "professional photo", "photorealistic"]
self.quality_modifiers_art = ["by a professional artist", "intricate details", "masterpiece"]
self.aspect_ratios = ["1:1 aspect ratio", "4:3 aspect ratio", "3:4 aspect ratio", "16:9 aspect ratio", "9:16 aspect ratio"]
self.photorealistic_modifiers = {
"portraits": ["prime lens", "zoom lens", "24-35mm", "black and white film", "film noir", "shallow depth of field", "duotone (mention two colors)"],
"objects": ["macro lens", "60-105mm", "high detail", "precise focusing", "controlled lighting"],
"motion": ["telephoto zoom lens", "100-400mm", "fast shutter speed", "action shot", "movement tracking"],
"wide-angle": ["wide-angle lens", "10-24mm", "long exposure", "sharp focus", "smooth water or clouds", "astro photography"]
}
def generate_prompt(self, keywords):
"""
Generates an enhanced AI image prompt based on user-provided keywords.
Args:
keywords (list): A list of keywords describing the desired image.
Returns:
str: An enhanced AI image prompt.
"""
if not keywords:
return "A beautiful image."
prompt_parts = []
subject = " ".join(keywords)
prompt_parts.append(subject)
# Add context and background (optional)
context_options = ["in a detailed background", "outdoors", "indoors", "in a studio", "with a blurred background"]
if random.random() < 0.6: # Add context with a probability
prompt_parts.append(random.choice(context_options))
# Add style (optional)
style_options = self.photography_styles + [f"{art} of" for art in self.art_styles]
if random.random() < 0.7:
prompt_parts.insert(0, random.choice(style_options))
if prompt_parts[0].startswith("painting of") or prompt_parts[0].startswith("sketch of") or prompt_parts[0].startswith("drawing of"):
if random.random() < 0.5:
prompt_parts.append(f"in the style of {random.choice(self.art_techniques)}")
# Add photography modifiers (if photography style is chosen)
if any(style in prompt_parts[0] for style in self.photography_styles):
if random.random() < 0.4:
prompt_parts.append(random.choice(self.camera_proximity))
if random.random() < 0.3:
prompt_parts.append(random.choice(self.camera_position))
if random.random() < 0.5:
prompt_parts.append(random.choice(self.lighting))
if random.random() < 0.3:
prompt_parts.append(random.choice(self.camera_settings))
if random.random() < 0.2:
prompt_parts.append(random.choice(self.lens_types))
if random.random() < 0.1:
prompt_parts.append(random.choice(self.film_types))
# Add shapes and materials (optional)
if random.random() < 0.3:
prompt_parts.append(random.choice(self.materials))
if random.random() < 0.2:
prompt_parts.append(random.choice(self.shapes))
# Add quality modifiers (optional)
if random.random() < 0.6:
quality_options = self.quality_modifiers_general
if any(style in prompt_parts[0] for style in self.photography_styles):
quality_options += self.quality_modifiers_photo
else:
quality_options += self.quality_modifiers_art
prompt_parts.append(random.choice(list(set(quality_options)))) # Avoid duplicates
# Add aspect ratio (optional)
if random.random() < 0.2:
prompt_parts.append(random.choice(self.aspect_ratios))
return ", ".join(prompt_parts)
def generate_photorealistic_prompt(self, keywords, focus=""):
"""
Generates an enhanced AI image prompt specifically for photorealistic images.
Args:
keywords (list): A list of keywords describing the desired image.
focus (str, optional): The focus of the photorealistic image (e.g., "portraits", "objects", "motion", "wide-angle"). Defaults to "".
Returns:
str: An enhanced photorealistic AI image prompt.
"""
if not keywords:
return "A photorealistic image."
prompt_parts = ["A photo of", "photorealistic"]
prompt_parts.append(" ".join(keywords))
if focus and focus in self.photorealistic_modifiers:
modifiers = self.photorealistic_modifiers[focus]
if modifiers:
num_modifiers = random.randint(1, min(3, len(modifiers)))
selected_modifiers = random.sample(modifiers, num_modifiers)
prompt_parts.extend(selected_modifiers)
# Add general quality modifiers
if random.random() < 0.5:
prompt_parts.append(random.choice(self.quality_modifiers_photo))
# Add lighting
if random.random() < 0.4:
prompt_parts.append(random.choice(self.lighting))
return ", ".join(prompt_parts)
def generate_gemini_image(prompt, keywords=None, style=None, focus=None, enhance_prompt=True, max_retries=3, initial_retry_delay=2, aspect_ratio="16:9"):
"""
Generate images using Gemini
Depending on the prompt and context, Gemini will generate content in different modes (text to image, text to image and text, etc.).
Here are some examples:
1). Text to image
Example prompt: "Generate an image of the Eiffel tower with fireworks in the background."
2). Text to image(s) and text (interleaved)
Example prompt: "Generate an illustrated recipe for a paella."
Image generation may not always trigger:
- The model may output text only. Try asking for image outputs explicitly (e.g. "generate an image", "provide images as you go along", "update the image").
- The model may stop generating partway through. Try again or try a different prompt.
Args:
prompt (str): The prompt to generate the image from.
keywords (list, optional): Keywords to enhance the prompt. Defaults to None.
style (str, optional): The style of the image. Defaults to None.
focus (str, optional): The focus of the image (e.g., "portraits", "objects", "motion", "wide-angle"). Defaults to None.
enhance_prompt (bool, optional): Whether to enhance the prompt using AIPromptGenerator. Defaults to True.
max_retries (int, optional): Maximum number of retry attempts for handling 503 errors. Defaults to 3.
initial_retry_delay (int, optional): Initial delay in seconds before retrying. Defaults to 2.
aspect_ratio (str, optional): The aspect ratio for the generated image. Must be one of "16:9", "9:16", "4:3", "3:4", or "1:1". Defaults to "16:9".
Returns:
str: The path to the generated image.
"""
logger.info(f"Generating image with prompt: '{prompt[:100]}...'")
# Check if the GEMINI_API_KEY is available
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
error_msg = "GEMINI_API_KEY is missing. Please set it in your environment variables."
logger.error(error_msg)
st.error(f"🔑 {error_msg}")
return None
# Enhance the prompt if requested
if enhance_prompt and keywords:
prompt_generator = AIPromptGenerator()
if style == "photorealistic" and focus:
logger.info(f"Generating photorealistic prompt with focus: {focus}")
enhanced_prompt = prompt_generator.generate_photorealistic_prompt(keywords, focus)
else:
logger.info("Generating enhanced prompt")
enhanced_prompt = prompt_generator.generate_prompt(keywords)
# Combine the enhanced prompt with the original prompt
prompt = f"{prompt}\n\nEnhanced prompt: {enhanced_prompt}"
logger.info(f"Final prompt: '{prompt[:100]}...'")
# Add aspect ratio to the prompt
if aspect_ratio:
prompt += f"\n\nPlease generate the image with {aspect_ratio} aspect ratio."
retry_count = 0
retry_delay = initial_retry_delay
while retry_count <= max_retries:
try:
client = genai.Client(api_key=api_key)
contents = (prompt)
logger.info("Sending request to Gemini API")
response = client.models.generate_content(
model="gemini-2.0-flash-exp-image-generation",
contents=contents,
config=types.GenerateContentConfig(
response_modalities=['Text', 'Image']
)
)
logger.info("Received response from Gemini API")
img_name = None
for part in response.candidates[0].content.parts:
if part.text is not None:
logger.info(f"Received text response: '{part.text[:100]}...'")
print(part.text)
elif part.inline_data is not None:
logger.info("Received image data from Gemini")
image = Image.open(BytesIO((part.inline_data.data)))
# Resize image to match aspect ratio if needed
if aspect_ratio:
current_width, current_height = image.size
target_width = current_width
target_height = current_height
# Calculate target dimensions based on aspect ratio
if aspect_ratio == "16:9":
target_height = int(current_width * 9/16)
elif aspect_ratio == "9:16":
target_width = int(current_height * 9/16)
elif aspect_ratio == "4:3":
target_height = int(current_width * 3/4)
elif aspect_ratio == "3:4":
target_width = int(current_height * 3/4)
elif aspect_ratio == "1:1":
target_size = min(current_width, current_height)
target_width = target_size
target_height = target_size
logger.info(f"Resizing image from {current_width}x{current_height} to {target_width}x{target_height}")
# Create a new image with the target dimensions
resized_image = Image.new('RGB', (target_width, target_height), (255, 255, 255))
# Calculate position to paste the original image
paste_x = (target_width - current_width) // 2
paste_y = (target_height - current_height) // 2
# Paste the original image onto the new canvas
resized_image.paste(image, (paste_x, paste_y))
image = resized_image
if part.text is not None:
img_name = f'{part.text}-gemini-native-image.png'
else:
img_name = f'gemini-native-image-{datetime.datetime.now().strftime("%Y%m%d-%H%M%S")}.png'
try:
logger.info(f"Saving image to: {img_name}")
image.save(img_name)
# Create a dictionary with the expected format for save_generated_image
img_response = {
"artifacts": [
{
"base64": base64.b64encode(open(img_name, "rb").read()).decode('utf-8')
}
]
}
# Call save_generated_image with the correct format
save_generated_image(img_response)
except Exception as err:
logger.error(f"Failed to save image: {err}")
st.error(f"Failed to save image: {err}")
logger.info(f"Image generation completed. Image name: {img_name}")
return img_name
except Exception as err:
error_message = str(err)
logger.error(f"Error in generate_gemini_image: {err}")
# Check if this is a 503 UNAVAILABLE error
if "503 UNAVAILABLE" in error_message and retry_count < max_retries:
retry_count += 1
logger.info(f"Model is overloaded. Retrying in {retry_delay} seconds (attempt {retry_count}/{max_retries})")
st.warning(f"The image generation service is currently busy. Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
# Exponential backoff
retry_delay *= 2
else:
st.error(f"Error generating image: {err}")
return None
# If we've exhausted all retries
st.error("The image generation service is currently unavailable. Please try again later.")
return None
def edit_image(image_path, prompt, max_retries=3, initial_retry_delay=2):
"""
- Image editing (text and image to image)
Example prompt: "Edit this image to make it look like a cartoon"
Example prompt: [image of a cat] + [image of a pillow] + "Create a cross stitch of my cat on this pillow."
- Multi-turn image editing (chat)
Example prompts: [upload an image of a blue car.] "Turn this car into a convertible." "Now change the color to yellow."
Image editing with Gemini
To perform image editing, add an image as input.
The following example demonstrats uploading base64 encoded images.
For multiple images and larger payloads, check the image input section.
Args:
image_path (str): The path to the image to edit.
prompt (str): The prompt to edit the image with.
max_retries (int, optional): Maximum number of retry attempts for handling 503 errors. Defaults to 3.
initial_retry_delay (int, optional): Initial delay in seconds before retrying. Defaults to 2.
Returns:
str: The path to the edited image.
"""
import PIL.Image
image = PIL.Image.open(image_path)
retry_count = 0
retry_delay = initial_retry_delay
while retry_count <= max_retries:
try:
client = genai.Client()
text_input = (prompt)
logger.info("Sending request to Gemini API for image editing")
response = client.models.generate_content(
model="gemini-2.0-flash-exp-image-generation",
contents=[text_input, image],
config=types.GenerateContentConfig(
response_modalities=['Text', 'Image']
)
)
logger.info("Received response from Gemini API for image editing")
edited_img_name = None
for part in response.candidates[0].content.parts:
if part.text is not None:
logger.info(f"Received text response: '{part.text[:100]}...'")
st.write(part.text)
elif part.inline_data is not None:
logger.info("Received edited image data from Gemini")
edited_image = Image.open(BytesIO(part.inline_data.data))
edited_image.show()
# Save the edited image
edited_img_name = f'edited-{os.path.basename(image_path)}'
try:
logger.info(f"Saving edited image to: {edited_img_name}")
edited_image.save(edited_img_name)
# Create a dictionary with the expected format for save_generated_image
img_response = {
"artifacts": [
{
"base64": base64.b64encode(open(edited_img_name, "rb").read()).decode('utf-8')
}
]
}
# Call save_generated_image with the correct format
save_generated_image(img_response)
except Exception as err:
logger.error(f"Failed to save edited image: {err}")
st.error(f"Failed to save edited image: {err}")
logger.info(f"Image editing completed. Edited image name: {edited_img_name}")
return edited_img_name
except Exception as err:
error_message = str(err)
logger.error(f"Error in edit_image: {err}")
# Check if this is a 503 UNAVAILABLE error
if "503 UNAVAILABLE" in error_message and retry_count < max_retries:
retry_count += 1
logger.info(f"Model is overloaded. Retrying in {retry_delay} seconds (attempt {retry_count}/{max_retries})")
st.warning(f"The image editing service is currently busy. Retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
# Exponential backoff
retry_delay *= 2
else:
st.error(f"Error editing image: {err}")
return None
# If we've exhausted all retries
st.error("The image editing service is currently unavailable. Please try again later.")
return None

View File

@@ -1,59 +0,0 @@
# Ensure you sign up for an account to obtain an API key:
# https://platform.stability.ai/
# Your API key can be found here after account creation:
# https://platform.stability.ai/account/keys
import base64
import os
import requests
from PIL import Image
from io import BytesIO
import streamlit as st
from .save_image import save_generated_image
def generate_stable_diffusion_image(prompt):
engine_id = "stable-diffusion-xl-1024-v1-0"
api_host = os.getenv('API_HOST', 'https://api.stability.ai')
api_key = os.getenv("STABILITY_API_KEY")
if api_key is None:
st.warning("Missing Stability API key.")
response = requests.post(
f"{api_host}/v1/generation/{engine_id}/text-to-image",
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"Authorization": f"Bearer {api_key}"
},
json={
"text_prompts": [
{
"text": prompt
}
],
"cfg_scale": 7,
"height": 1024,
"width": 1024,
"samples": 1,
"steps": 30,
},
)
if response.status_code != 200:
raise Exception("Non-200 response: " + str(response.text))
data = response.json()
img_path = save_generated_image(data)
for i, image in enumerate(data["artifacts"]):
# Decode base64 image data
img_data = base64.b64decode(image["base64"])
# Open image using PIL
img = Image.open(BytesIO(img_data))
# Display the image
img.show()
return img_path

View File

@@ -1,51 +0,0 @@
from loguru import logger
import sys
from PIL import Image
from openai import OpenAI
def gen_new_from_given_img(img_path, image_dir, num_img=1, img_size="1024x1024", response_format="url"):
"""
Generates variations of a given image using OpenAI's image variation API.
This function takes an existing image, processes it, and generates a specified number of new images based on it.
These generated images are variations of the original, providing creative flexibility.
Args:
img_path (str): Path to the original image file.
image_dir (str): Directory where the generated images will be saved.
num_img (int, optional): Number of image variations to generate. Defaults to 1.
img_size (str, optional): Size of the generated images. Defaults to "1024x1024".
response_format (str, optional): Format in which the generated images are returned. Defaults to "url".
Returns:
str: Path to the saved image variation.
Raises:
SystemExit: If a critical error occurs that prevents successful execution.
"""
try:
logger.info(f"Starting image variation generation for: {img_path}")
# Convert and prepare the image
png = Image.open(img_path).convert('RGBA')
background = Image.new('RGBA', png.size, (255, 255, 255))
alpha_composite = Image.alpha_composite(background, png)
alpha_composite.save(img_path, 'PNG', quality=80)
logger.info("Image prepared for variation generation.")
client = OpenAI()
variation_response = client.images.create_variation(
image=open(img_path, "rb", encoding="utf-8"),
n=num_img,
size=img_size,
response_format=response_format
)
# Saving the generated image
generated_image_path = save_generated_image(variation_response, image_dir)
logger.info(f"Image variation generated and saved to: {generated_image_path}")
return generated_image_path
except Exception as e:
logger.error(f"Error occurred during image variation generation: {e}")
sys.exit(f"Exiting due to critical error: {e}")

View File

@@ -1,163 +0,0 @@
#########################################################
#
# This module will generate images for the blogs using APIs
# from Dall-E and other free resources. Given a prompt, the
# images will be stored in local directory.
# Required: openai API key.
#
#########################################################
# imports
import os
import sys
import datetime
import streamlit as st
import openai # OpenAI Python library to make API calls
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
#from .gen_dali2_images
from .gen_dali3_images import generate_dalle3_images
from .gen_stabl_diff_img import generate_stable_diffusion_image
from ..text_generation.main_text_generation import llm_text_gen
from .gen_gemini_images import generate_gemini_image
def generate_image(user_prompt, title=None, description=None, tags=None, content=None, aspect_ratio="16:9"):
"""
The generation API endpoint creates an image based on a text prompt.
Required inputs:
prompt (str): A text description of the desired image(s). The maximum length is 1000 characters.
Optional inputs:
--> image_engine: dalle2, dalle3, stable diffusion are supported.
--> num_images (int): The number of images to generate. Must be between 1 and 10. Defaults to 1.
--> size (str): The size of the generated images. Must be one of "256x256", "512x512", or "1024x1024".
Smaller images are faster. Defaults to "1024x1024".
-->response_format (str): The format in which the generated images are returned.
Must be one of "url" or "b64_json". Defaults to "url".
--> user (str): A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse.
--> aspect_ratio (str): The aspect ratio for the generated image. Must be one of "16:9", "4:3", or "1:1". Defaults to "16:9".
"""
# FIXME: Need to remove default value to match sidebar input.
image_engine = 'Gemini-AI'
image_stored_at = None
if user_prompt:
try:
# Use enhanced prompt generator with all available parameters
img_prompt = generate_enhanced_img_prompt(user_prompt, title, description, tags, content)
# Add aspect ratio to the prompt
if aspect_ratio:
img_prompt += f"\n\nAspect ratio: {aspect_ratio}"
if 'Dalle3' in image_engine:
logger.info(f"Calling Dalle3 text-to-image with prompt: {img_prompt}")
image_stored_at = generate_dalle3_images(img_prompt)
elif 'Stability-AI' in image_engine:
logger.info(f"Calling Stable diffusion text-to-image with prompt: \n{img_prompt}")
image_stored_at = generate_stable_diffusion_image(img_prompt)
elif 'Gemini-AI' in image_engine:
logger.info(f"Calling Gemini text-to-image with prompt: \n{img_prompt}")
image_stored_at = generate_gemini_image(img_prompt, aspect_ratio=aspect_ratio)
return image_stored_at
except Exception as err:
logger.error(f"Failed to generate Image: {err}")
st.warning(f"Failed to generate Image: {err}")
else:
logger.error("Skipping Image creation, No prompt provided.")
def generate_img_prompt(user_prompt):
"""
Given prompt, this functions generated a prompt for image generation.
"""
prompt = f"""
As an expert prompt generator for AI text to image models and artist, I will provide you with 'user text' for creating images.
Your task is to create a prompt for a highly relevant image from given 'user text'.
\n
Choose from various art styles, utilize light & shadow effects etc.
Make sure to avoid common image generation mistakes.
Reply with only one answer, no descrition and in plaintext.
Make sure your prompt is detailed and creative descriptions that will inspire unique and interesting images from the AI.
\n\nuser text:
'''{user_prompt}'''"""
response = llm_text_gen(prompt)
return response
def generate_enhanced_img_prompt(user_prompt, title=None, description=None, tags=None, content=None):
"""
Given user prompt and additional context (title, description, tags, content),
this function generates an enhanced prompt for better image generation.
Args:
user_prompt (str): Base prompt from the user
title (str, optional): Blog title or content title
description (str, optional): Blog or content description/summary
tags (list, optional): List of tags related to the content
content (str, optional): Actual content or excerpt
Returns:
str: Enhanced prompt for image generation
"""
# Start with the base prompt
context_parts = [user_prompt]
# Add relevant context if available
if title:
context_parts.append(f"Title: {title}")
if description:
context_parts.append(f"Description: {description}")
if tags and len(tags) > 0:
tag_text = ", ".join(tags[:5]) # Limit to 5 tags to avoid too much noise
context_parts.append(f"Tags: {tag_text}")
# Create a combined context
combined_context = "\n".join(context_parts)
# Add some content excerpt if available (limited to avoid token limits)
content_excerpt = ""
if content:
# Just use the first few hundred characters as excerpt
content_excerpt = content[:300] + "..." if len(content) > 300 else content
# Create the prompt for LLM
prompt = f"""
As an expert prompt engineer for AI image generation models, create a detailed, creative prompt
for generating a high-quality, relevant image based on the following context:
{combined_context}
Additional content excerpt:
{content_excerpt}
Your task is to:
1. Analyze the context and content to understand the main theme and subject
2. Create a rich, detailed prompt for image generation (50-75 words)
3. Include specific visual details, art style, mood, lighting, composition
4. Make sure the prompt is highly relevant to the original context
5. Avoid prohibited content or anything that violates image generation guidelines
Reply with ONLY the final prompt. No explanations or other text.
"""
# Generate the enhanced prompt
try:
enhanced_prompt = llm_text_gen(prompt)
logger.info(f"Generated enhanced image prompt: {enhanced_prompt[:100]}...")
return enhanced_prompt
except Exception as e:
logger.error(f"Error generating enhanced prompt: {e}")
# Fall back to the simple prompt generation if enhanced fails
return generate_img_prompt(user_prompt)

View File

@@ -1,39 +0,0 @@
import base64
import datetime
import os
import requests
from PIL import Image
import logging
def save_generated_image(img_generation_response):
"""
Save generated images for blog, ensuring unique names for SEO.
"""
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Get image save directory with fallback to a local directory
image_save_dir = os.getenv('IMG_SAVE_DIR', 'generated_images')
# Create the directory if it doesn't exist
if not os.path.exists(image_save_dir):
logger.info(f"Creating image save directory: {image_save_dir}")
os.makedirs(image_save_dir, exist_ok=True)
generated_image_name = f"generated_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.webp"
generated_image_filepath = os.path.join(image_save_dir, generated_image_name)
try:
for i, image in enumerate(img_generation_response["artifacts"]):
with open(generated_image_filepath, "wb") as f:
f.write(base64.b64decode(image["base64"]))
except requests.exceptions.RequestException as e:
logger.error(f"Failed to get generated image content: {e}")
return None
except Exception as e:
logger.error(f"Error saving image: {e}")
return None
logger.info(f"Saved image at path: {generated_image_filepath}")
return generated_image_filepath

View File

@@ -1,88 +0,0 @@
# Content Style Analyzer Guide
## What is the Content Style Analyzer?
The Content Style Analyzer is an AI-powered tool that helps you understand and improve your writing style. It analyzes your content to provide detailed insights about your writing approach, helping you create more consistent and engaging content.
## What Can It Do?
### 1. Writing Style Analysis
The analyzer examines your content to identify:
- **Tone**: Whether your writing is formal, casual, technical, or conversational
- **Voice**: If you're using active or passive voice
- **Complexity**: How complex your writing is (simple, moderate, or complex)
- **Engagement Level**: How engaging your content is (low, medium, or high)
### 2. Content Characteristics
It provides insights about:
- **Sentence Structure**: How your sentences are organized
- **Vocabulary Level**: Whether you're using basic, intermediate, or advanced vocabulary
- **Paragraph Organization**: How your paragraphs flow together
- **Content Flow**: How well your ideas progress throughout the content
### 3. Target Audience Analysis
The tool helps you understand:
- **Demographics**: Who your content appeals to
- **Expertise Level**: Whether it's suitable for beginners, intermediate, or advanced readers
- **Industry Focus**: Which industry your content is targeting
- **Geographic Focus**: Which regions your content is most relevant for
### 4. Content Type Assessment
It identifies:
- **Primary Type**: Whether it's a blog post, article, product description, etc.
- **Secondary Types**: Other content categories it might fit into
- **Purpose**: Whether it's meant to inform, entertain, persuade, etc.
- **Call to Action**: How effectively you're guiding readers to take action
### 5. Style Pattern Analysis
The analyzer also looks for specific patterns in your writing:
- **Sentence Patterns**: How your sentences are structured
- **Word Patterns**: Your vocabulary choices and frequency
- **Rhetorical Devices**: Literary techniques you're using
## How to Use It
1. **Input Your Content**: Provide your content, including:
- Main content text
- Title
- Description
2. **Get Analysis**: The tool will analyze your content and provide detailed insights
3. **Review Recommendations**: Receive suggestions for:
- Writing tone
- Target audience
- Content type
- Creativity level
- Geographic focus
## Benefits for Content Creators
1. **Consistency**: Maintain a consistent writing style across your content
2. **Audience Alignment**: Ensure your content matches your target audience's expectations
3. **Quality Improvement**: Identify areas where your writing can be enhanced
4. **Style Optimization**: Get recommendations for better engagement
5. **Content Strategy**: Make data-driven decisions about your content approach
## Tips for Best Results
1. **Provide Complete Content**: Include all relevant sections (title, description, main content)
2. **Keep Content Length Reasonable**: The analyzer works best with content up to 4000 characters
3. **Review All Sections**: Pay attention to all aspects of the analysis for comprehensive insights
4. **Use Recommendations**: Apply the suggested improvements to enhance your content
## Understanding the Results
The analysis results are presented in a clear, structured format that helps you:
- Identify your current writing style
- Understand your content's strengths
- Spot areas for improvement
- Make informed decisions about future content
## Need Help?
If you encounter any issues or have questions about the analysis results, please refer to your content team or technical support for assistance.
---
*Note: This tool is designed to help content creators improve their writing style and content quality. It uses advanced AI technology to provide detailed insights and recommendations.*

View File

@@ -1,203 +0,0 @@
"""Style analyzer module for analyzing content style using LLM."""
from typing import Dict, List, Optional
from loguru import logger
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
import json
import re
class StyleAnalyzer:
"""Analyzer for content style using LLM."""
def __init__(self):
"""Initialize the style analyzer."""
logger.info("[StyleAnalyzer.__init__] Initializing style analyzer")
def _clean_json_response(self, text: str) -> str:
"""
Clean the LLM response to extract valid JSON.
Args:
text (str): Raw response from LLM
Returns:
str: Cleaned JSON string
"""
try:
# Remove markdown code block markers
cleaned_string = text.replace("```json", "").replace("```", "").strip()
# Log the cleaned JSON for debugging
logger.debug(f"[StyleAnalyzer._clean_json_response] Cleaned JSON: {cleaned_string}")
return cleaned_string
except Exception as e:
logger.error(f"[StyleAnalyzer._clean_json_response] Error cleaning response: {str(e)}")
return ""
def analyze_content_style(self, content: Dict) -> Dict:
"""
Analyze the style of the provided content.
Args:
content (Dict): Content to analyze, containing main_content, title, etc.
Returns:
Dict: Analysis results
"""
try:
logger.info("[StyleAnalyzer.analyze_content_style] Starting content style analysis")
# Prepare content for analysis
main_content = content.get("main_content", "")
title = content.get("title", "")
description = content.get("description", "")
# Construct the analysis prompt
prompt = f"""Analyze the following content and provide a comprehensive writing style analysis.
Focus on identifying the writing style, tone, and characteristics that make this content unique.
Title: {title}
Description: {description}
Content: {main_content[:4000]} # Limit content length for API
IMPORTANT: Respond ONLY with a JSON object in the following format. Do not include any additional text, explanations, or markdown formatting:
{{
"writing_style": {{
"tone": "formal/casual/technical/etc",
"voice": "active/passive",
"complexity": "simple/moderate/complex",
"engagement_level": "low/medium/high"
}},
"content_characteristics": {{
"sentence_structure": "description",
"vocabulary_level": "basic/intermediate/advanced",
"paragraph_organization": "description",
"content_flow": "description"
}},
"target_audience": {{
"demographics": ["list"],
"expertise_level": "beginner/intermediate/advanced",
"industry_focus": "primary industry",
"geographic_focus": "primary region"
}},
"content_type": {{
"primary_type": "blog/article/product/etc",
"secondary_types": ["list"],
"purpose": "inform/entertain/persuade/etc",
"call_to_action": "type and frequency"
}},
"recommended_settings": {{
"writing_tone": "recommended tone",
"target_audience": "recommended audience",
"content_type": "recommended type",
"creativity_level": "low/medium/high",
"geographic_location": "recommended location"
}}
}}"""
# Get analysis from LLM
logger.debug("[StyleAnalyzer.analyze_content_style] Sending prompt to LLM")
analysis_text = llm_text_gen(prompt)
try:
# Clean and parse the JSON response
cleaned_json = self._clean_json_response(analysis_text)
if not cleaned_json:
raise ValueError("No valid JSON found in response")
# Log the cleaned JSON for debugging
logger.debug(f"[StyleAnalyzer.analyze_content_style] Cleaned JSON: {cleaned_json}")
# Try to parse the cleaned JSON
try:
analysis = json.loads(cleaned_json)
except json.JSONDecodeError as e:
# If parsing fails, try to fix common JSON issues
logger.warning(f"[StyleAnalyzer.analyze_content_style] Initial JSON parsing failed: {e}")
# Fix any remaining issues
cleaned_json = re.sub(r'([^"\\])\n', r'\1 ', cleaned_json)
cleaned_json = re.sub(r'\\n', ' ', cleaned_json)
# Try parsing again
analysis = json.loads(cleaned_json)
logger.info("[StyleAnalyzer.analyze_content_style] Successfully parsed analysis results")
return analysis
except json.JSONDecodeError as e:
logger.error(f"[StyleAnalyzer.analyze_content_style] Failed to parse JSON response: {e}")
logger.debug(f"[StyleAnalyzer.analyze_content_style] Raw response: {analysis_text}")
return {
"error": "Failed to parse analysis results",
"raw_response": analysis_text
}
except Exception as e:
logger.error(f"[StyleAnalyzer.analyze_content_style] Error during analysis: {str(e)}")
return {
"error": str(e),
"success": False
}
def analyze_style_patterns(self, content: Dict) -> Dict:
"""
Analyze specific writing style patterns in the content.
Args:
content (Dict): Content to analyze
Returns:
Dict: Pattern analysis results
"""
try:
main_content = content.get("main_content", "")
prompt = f"""Analyze the following content for specific writing style patterns.
Focus on identifying recurring patterns in sentence structure, word choice, and rhetorical devices.
Content: {main_content[:4000]}
IMPORTANT: Respond ONLY with a JSON object in the following format. Do not include any additional text, explanations, or markdown formatting:
{{
"sentence_patterns": {{
"structure": ["list of patterns"],
"length": "short/medium/long",
"complexity": "simple/moderate/complex"
}},
"word_patterns": {{
"vocabulary": ["list of patterns"],
"frequency": "low/medium/high",
"diversity": "low/medium/high"
}},
"rhetorical_devices": {{
"types": ["list of devices"],
"frequency": "low/medium/high",
"effectiveness": "low/medium/high"
}}
}}"""
analysis_text = llm_text_gen(prompt)
try:
cleaned_json = self._clean_json_response(analysis_text)
if not cleaned_json:
raise ValueError("No valid JSON found in response")
analysis = json.loads(cleaned_json)
return analysis
except json.JSONDecodeError as e:
logger.error(f"[StyleAnalyzer.analyze_style_patterns] Failed to parse JSON response: {e}")
return {
"error": "Failed to parse pattern analysis results",
"raw_response": analysis_text
}
except Exception as e:
logger.error(f"[StyleAnalyzer.analyze_style_patterns] Error during analysis: {str(e)}")
return {
"error": str(e),
"success": False
}

View File

@@ -1,159 +0,0 @@
# ALwrity Setup Guide: API Key Manager
## What is the API Key Manager?
The API Key Manager is a crucial component of ALwrity that helps you set up and configure all the necessary API keys and settings for your content creation workflow. It provides a user-friendly wizard interface to guide you through the setup process step by step.
## Setup Wizard Steps
### 1. Website Setup
- **Purpose**: Configure your website's basic information
- **Features**:
- Website URL configuration
- Site structure setup
- Basic SEO settings
- Content organization preferences
### 2. AI Research Setup
- **Purpose**: Set up AI-powered research capabilities
- **Features**:
- Research parameters configuration
- Data collection preferences
- Analysis settings
- Research depth options
### 3. AI Providers Configuration
- **Purpose**: Configure AI service providers
- **Supported Providers**:
- OpenAI (GPT models)
- Google (Gemini Pro)
- Anthropic (Claude)
- DeepSeek
- **Features**:
- API key management
- Model selection
- Usage preferences
- Cost optimization settings
### 4. Personalization Setup
- **Purpose**: Customize your content creation experience
- **Features**:
- Writing style preferences
- Tone settings
- Content structure templates
- Brand voice configuration
### 5. ALwrity Integrations
- **Purpose**: Set up additional tools and services
- **Features**:
- Third-party service connections
- Plugin configurations
- API integrations
- Workflow automation settings
### 6. Final Setup
- **Purpose**: Complete and verify your configuration
- **Features**:
- Configuration review
- Settings verification
- Test connections
- Setup completion
## How to Use the Setup Wizard
### 1. Starting the Setup
1. Launch ALwrity
2. Navigate to the Setup section
3. Begin the wizard process
### 2. Navigation
- Use the step indicator to track progress
- Navigate between steps using buttons
- Save progress automatically
- Return to previous steps if needed
### 3. Configuration Process
1. **Enter Information**: Fill in required details
2. **Verify Settings**: Review your inputs
3. **Test Connections**: Ensure everything works
4. **Complete Setup**: Finalize your configuration
## Managing API Keys
### 1. Key Storage
- Secure storage of API keys
- Environment variable management
- Key rotation support
- Access control
### 2. Key Validation
- Automatic key verification
- Usage monitoring
- Error handling
- Expiration tracking
### 3. Security Features
- Encrypted storage
- Access logging
- Permission management
- Secure transmission
## Progress Tracking
### 1. Setup Progress
- Visual progress indicator
- Step completion tracking
- Overall setup status
- Remaining tasks
### 2. Status Monitoring
- API key status
- Connection status
- Configuration status
- Error reporting
## Best Practices
### 1. Before Setup
- Gather all necessary API keys
- Review provider documentation
- Plan your configuration
- Backup existing settings
### 2. During Setup
- Follow the wizard steps
- Verify each configuration
- Test connections
- Save progress regularly
### 3. After Setup
- Review all settings
- Test functionality
- Document configurations
- Monitor usage
## Troubleshooting
### 1. Common Issues
- Invalid API keys
- Connection problems
- Configuration errors
- Setup interruptions
### 2. Solutions
- Key verification
- Connection testing
- Error logging
- Support resources
## Need Help?
If you encounter any issues during setup:
1. Check the error messages
2. Review the documentation
3. Verify your API keys
4. Contact ALwrity support
---
*Note: Keep your API keys secure and never share them. The API Key Manager helps you manage these keys safely while setting up ALwrity for optimal content creation.*

View File

@@ -1,37 +1,54 @@
"""API key manager package."""
"""API Key Manager package for ALwrity."""
from .manager import APIKeyManager
from .api_key_manager import (
initialize_wizard_state,
update_progress,
check_all_api_keys,
render,
render_navigation
from .api_key_manager import render, check_onboarding_completion, get_onboarding_status, reset_onboarding
from .onboarding_progress import (
OnboardingProgress,
get_onboarding_progress,
render_progress_indicator,
render_resume_message,
StepStatus,
StepData
)
from .components import (
render_website_setup,
render_ai_research_setup,
render_ai_providers,
render_final_setup,
render_personalization_setup,
render_alwrity_integrations,
from .validation import check_all_api_keys
from .components.base import (
render_step_indicator,
render_navigation_buttons,
render_step_indicator
render_step_validation,
render_resume_options
)
# Export all public components
__all__ = [
# Main classes
'APIKeyManager',
'initialize_wizard_state',
'update_progress',
'check_all_api_keys',
'OnboardingProgress',
'StepStatus',
'StepData',
# Main functions
'render',
'render_navigation',
'render_website_setup',
'render_ai_research_setup',
'render_ai_providers',
'render_final_setup',
'render_personalization_setup',
'render_alwrity_integrations',
'check_onboarding_completion',
'get_onboarding_status',
'reset_onboarding',
'get_onboarding_progress',
# UI components
'render_progress_indicator',
'render_resume_message',
'render_step_indicator',
'render_navigation_buttons',
'render_step_indicator'
]
'render_step_validation',
'render_resume_options',
# Validation
'check_all_api_keys'
]
# Version information
__version__ = "2.0.0"
__author__ = "ALwrity Team"
__description__ = "Comprehensive API key management and onboarding system for ALwrity"
# Note: FastAPI endpoints have been moved to the backend/ directory
# for better separation of concerns and enterprise architecture.

View File

@@ -1,165 +0,0 @@
"""API key manager for handling various API keys."""
from typing import Dict, Any, Optional
from loguru import logger
import streamlit as st
import os
import json
import sys
from datetime import datetime
from dotenv import load_dotenv
from .components.website_setup import render_website_setup
from .components.ai_research_setup import render_ai_research_setup
from .components.ai_providers import render_ai_providers
from .components.final_setup import render_final_setup
from .components.personalization_setup import render_personalization_setup
from .components.alwrity_integrations import render_alwrity_integrations
from .components.base import render_navigation_buttons, render_step_indicator
from .wizard_state import initialize_wizard_state, get_current_step, next_step, previous_step
from .manager import APIKeyManager
from .validation import check_all_api_keys
# Configure logger to output to both file and stdout
logger.remove() # Remove default handler
logger.add("logs/api_key_manager.log",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
level="DEBUG")
logger.add(sys.stdout,
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
level="INFO")
def initialize_wizard_state():
"""Initialize or get the wizard state from session"""
logger.debug("Initializing wizard state")
if 'wizard_state' not in st.session_state:
st.session_state.wizard_state = {
'current_step': 0,
'total_steps': 0,
'completed_steps': set(),
'api_keys_status': {},
'setup_progress': 0
}
logger.info("Created new wizard state")
def update_progress():
"""Update the overall setup progress"""
logger.debug("Updating setup progress")
try:
# Get the API key manager instance from session state
api_key_manager = st.session_state.get('api_key_manager')
if not api_key_manager:
logger.warning("API key manager not found in session state")
return
total_keys = sum(len(keys) for keys in api_key_manager.api_key_groups.values())
configured_keys = sum(1 for status in st.session_state.wizard_state['api_keys_status'].values()
if status.get('configured', False))
progress = (configured_keys / total_keys) * 100
st.session_state.wizard_state['setup_progress'] = progress
logger.info(f"Updated progress to {progress:.1f}%")
except Exception as e:
logger.error(f"Error updating progress: {str(e)}", exc_info=True)
def render(api_key_manager: APIKeyManager) -> Dict[str, Any]:
"""
Render the API key manager interface.
Returns:
Dict[str, Any]: Current state
"""
try:
logger.info("[render] Rendering API key manager interface")
# Initialize session state for current step if not exists
if "current_step" not in st.session_state:
st.session_state.current_step = 1
logger.info("[render] Initialized current_step to 1")
# Display step indicator
render_step_indicator(st.session_state.current_step, 6)
# Render appropriate step based on current_step
if st.session_state.current_step == 1:
logger.info("[render] Rendering AI providers setup")
return render_ai_providers(api_key_manager)
elif st.session_state.current_step == 2:
logger.info("[render] Rendering website setup")
return render_website_setup(api_key_manager)
elif st.session_state.current_step == 3:
logger.info("[render] Rendering AI Research setup")
return render_ai_research_setup(api_key_manager)
elif st.session_state.current_step == 4:
logger.info("[render] Rendering personalization setup")
return render_personalization_setup(api_key_manager)
elif st.session_state.current_step == 5:
logger.info("[render] Rendering ALwrity integrations setup")
return render_alwrity_integrations(api_key_manager)
elif st.session_state.current_step == 6:
logger.info("[render] Rendering final setup")
return render_final_setup(api_key_manager)
except Exception as e:
error_msg = f"Error in API key manager: {str(e)}"
logger.error(f"[render] {error_msg}")
st.error(error_msg)
return {"current_step": st.session_state.current_step, "error": error_msg}
def render_navigation(self):
"""Render navigation buttons with proper state handling"""
st.markdown("""
<div class="nav-buttons">
""", unsafe_allow_html=True)
# Back button
if self.current_step > 1:
if st.button("← Back", key="back_button"):
self.current_step -= 1
st.rerun()
# Next/Continue button
if self.current_step < 3:
if st.button("Continue →", key="next_button"):
if self.current_step == 1:
# Validate at least one provider is configured
if not self.validate_providers():
st.error("Please configure at least one AI provider to continue.")
return
# Store all API keys in session state
st.session_state['api_keys'] = {
'openai': self.openai_key,
'google': self.google_key,
'anthropic': self.anthropic_key,
'mistral': self.mistral_key,
'serpapi': self.serpapi_key,
'google_search': self.google_search_key,
'google_search_cx': self.google_search_cx,
'bing_search': self.bing_search_key,
'tavily': self.tavily_key,
'metaphor': self.metaphor_key,
'wordpress': {
'url': self.wordpress_url,
'username': self.wordpress_username,
'password': self.wordpress_password,
'app_password': self.wordpress_app_password
}
}
self.current_step = 2
st.rerun()
elif self.current_step == 2:
# Validate WordPress credentials
if not self.validate_wordpress_credentials():
st.error("Please configure valid WordPress credentials to continue.")
return
# Store WordPress credentials in session state
st.session_state['wordpress_credentials'] = {
'url': self.wordpress_url,
'username': self.wordpress_username,
'password': self.wordpress_password,
'app_password': self.wordpress_app_password
}
self.current_step = 3
st.rerun()
st.markdown("</div>", unsafe_allow_html=True)

View File

@@ -1,76 +0,0 @@
"""API key manager components."""
import asyncio
import streamlit as st
import os
from loguru import logger
from .styles import API_KEY_MANAGER_STYLES
from .config import FEATURE_PREVIEWS, API_KEY_CONFIGS
from .wizard_state import (
get_current_step,
next_step,
previous_step,
set_selected_providers,
get_selected_providers,
set_website_url,
get_website_url,
set_api_key,
get_api_key,
can_proceed_to_next_step,
get_api_keys
)
from .health_monitor import APIKeyHealthMonitor
from .key_rotation import KeyRotationManager
from ...utils.website_analyzer import analyze_website
from .api_key_tests import (
test_openai_api_key,
test_gemini_api_key,
test_anthropic_api_key,
test_deepseek_api_key,
test_mistral_api_key
)
from .components.base import render_step_indicator, render_navigation_buttons, render_success_message
from .components import (
render_ai_providers,
render_website_setup,
render_health_monitoring,
render_ai_research_setup,
render_final_setup
)
def render_wizard():
"""Render the main wizard interface."""
st.title("API Key Setup Wizard")
# Get current step
current_step = get_current_step()
# Render step indicator
render_step_indicator()
# Render current step content
if current_step == 1:
render_ai_providers()
elif current_step == 2:
render_website_setup()
elif current_step == 3:
render_ai_research_setup()
elif current_step == 4:
render_final_setup()
elif current_step == 5:
render_health_monitoring()
# Render navigation buttons
render_navigation_buttons()
__all__ = [
'render_wizard',
'render_step_indicator',
'render_navigation_buttons',
'render_success_message',
'render_ai_providers',
'render_website_setup',
'render_ai_research_setup',
'render_health_monitoring',
'render_final_setup'
]

View File

@@ -1,281 +0,0 @@
"""AI providers setup component."""
import streamlit as st
from loguru import logger
from typing import Dict, Any
from ..manager import APIKeyManager
from .base import render_navigation_buttons, render_step_indicator, render_tab_style
from ..wizard_state import next_step, update_progress
from datetime import datetime
import os
from dotenv import load_dotenv
def validate_api_key(key: str) -> bool:
"""Validate if an API key is properly formatted."""
if not key:
return False
# Basic validation - check if key is not empty and has minimum length
return len(key.strip()) > 0
def save_to_env_file(key_name: str, key_value: str) -> bool:
"""Save API key to .env file."""
try:
env_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))), '.env')
# Read existing .env file
env_contents = []
if os.path.exists(env_path):
with open(env_path, 'r') as f:
env_contents = f.readlines()
# Check if key already exists
key_exists = False
for i, line in enumerate(env_contents):
if line.startswith(f"{key_name}="):
env_contents[i] = f"{key_name}={key_value}\n"
key_exists = True
break
# Add new key if it doesn't exist
if not key_exists:
env_contents.append(f"{key_name}={key_value}\n")
# Write back to .env file
with open(env_path, 'w') as f:
f.writelines(env_contents)
# Reload environment variables to ensure consistency
load_dotenv(override=True)
logger.info(f"[save_to_env_file] Successfully saved {key_name} to .env file")
return True
except Exception as e:
logger.error(f"[save_to_env_file] Error saving to .env file: {str(e)}")
return False
def render_ai_providers(api_key_manager: APIKeyManager) -> Dict[str, Any]:
"""Render the AI providers setup step."""
logger.info("[render_ai_providers] Starting AI providers setup")
try:
# Load environment variables
load_dotenv(override=True)
# Get existing API keys from .env
openai_key = os.getenv('OPENAI_API_KEY', '')
gemini_key = os.getenv('GEMINI_API_KEY', '')
# Initialize wizard state if not already initialized
if 'wizard_state' not in st.session_state:
st.session_state.wizard_state = {
'current_step': 1,
'total_steps': 6,
'progress': 0,
'completed_steps': set(),
'last_updated': datetime.now()
}
logger.info("[render_ai_providers] Initialized wizard state")
# Store API key manager in session state for update_progress
st.session_state['api_key_manager'] = api_key_manager
# Main content
st.markdown("""
<div class='setup-header'><h2>🤖 AI LLM Providers Setup</h2></div>
""", unsafe_allow_html=True)
# Create tabs for different AI providers
tabs = st.tabs(["Primary Providers", "Additional Providers"])
# Track if any changes were made
changes_made = False
has_valid_key = False
validation_message = ""
with tabs[0]:
st.markdown("### Primary AI Providers")
# Create a grid layout for AI provider cards
col1, col2 = st.columns(2)
with col1:
# OpenAI Card
with st.container():
openai_input = st.text_input(
"OpenAI API Key",
value=openai_key,
type="password",
key="openai_key",
help="Enter your OpenAI API key",
placeholder="Power your content generation with GPT-4 AI models"
)
if openai_key:
st.success("✅ OpenAI API key found in environment")
elif openai_input:
if validate_api_key(openai_input):
st.markdown("""
<div class="ai-provider-status status-valid">
✓ API key configured
</div>
""", unsafe_allow_html=True)
else:
st.markdown("""
<div class="ai-provider-status status-invalid">
⚠️ Invalid API key format
</div>
""", unsafe_allow_html=True)
with st.expander("📋 How to get your OpenAI API key", expanded=False):
st.markdown("""
**Step-by-step guide:**
1. Go to [OpenAI's website](https://platform.openai.com)
2. Sign up or log in to your account
3. Navigate to the API section
4. Click "Create new secret key"
5. Copy the generated key and paste it here
**Note:** Keep your API key secure and never share it publicly.
""")
with col2:
# Google Card
with st.container():
gemini_input = st.text_input(
"Google Gemini API Key",
value=gemini_key,
type="password",
key="google_key",
help="Enter your Google API key",
placeholder="Power your content generation with Gemini AI models"
)
if gemini_key:
st.success("✅ Gemini API key found in environment")
elif gemini_input:
if validate_api_key(gemini_input):
st.markdown("""
<div class="ai-provider-status status-valid">
✓ API key configured
</div>
""", unsafe_allow_html=True)
else:
st.markdown("""
<div class="ai-provider-status status-invalid">
⚠️ Invalid API key format
</div>
""", unsafe_allow_html=True)
with st.expander("📋 How to get your Google API key", expanded=False):
st.markdown("""
**Step-by-step guide:**
1. Visit [Google AI Studio](https://makersuite.google.com/app/apikey)
2. Sign in with your Google account
3. Click "Create API key"
4. Copy the generated key and paste it here
**Note:** Make sure to enable the Gemini API in your Google Cloud Console.
""")
with tabs[1]:
st.markdown("### Additional AI Providers")
st.markdown("Configure additional AI providers for enhanced capabilities")
# Create a grid layout for additional provider cards
col1, col2 = st.columns(2)
with col1:
# Anthropic Card (Coming Soon)
with st.container():
st.markdown("""
<div class="ai-provider-card disabled">
<div class="ai-provider-header">
<div class="ai-provider-icon">🧠</div>
<div class="ai-provider-title">Anthropic <span class="coming-soon-badge">Coming Soon</span></div>
</div>
<div class="ai-provider-content">
<p>Access Claude for advanced content generation</p>
</div>
</div>
""", unsafe_allow_html=True)
st.info("Anthropic integration will be available in the next update")
with col2:
# Mistral Card (Coming Soon)
with st.container():
st.markdown("""
<div class="ai-provider-card disabled">
<div class="ai-provider-header">
<div class="ai-provider-icon">⚡</div>
<div class="ai-provider-title">Mistral <span class="coming-soon-badge">Coming Soon</span></div>
</div>
<div class="ai-provider-content">
<p>Use Mistral's efficient language models</p>
</div>
</div>
""", unsafe_allow_html=True)
st.info("Mistral integration will be available in the next update")
# Track changes and validate keys
if any([openai_input, gemini_input]):
changes_made = True
# Check if at least one valid API key is provided
if validate_api_key(openai_input) or validate_api_key(gemini_input):
has_valid_key = True
validation_message = "✅ At least one AI provider configured successfully"
else:
validation_message = "⚠️ Please provide at least one valid API key"
else:
validation_message = "⚠️ Please configure at least one AI provider to continue"
# Display validation message
if validation_message:
if "" in validation_message:
st.success(validation_message)
else:
st.warning(validation_message)
# Navigation buttons
if render_navigation_buttons(1, 6, changes_made):
if has_valid_key:
# Save API keys to .env file
if validate_api_key(openai_input):
if save_to_env_file("OPENAI_API_KEY", openai_input):
logger.info("[render_ai_providers] OpenAI API key saved to .env file")
else:
st.error("Failed to save OpenAI API key to .env file")
return {"current_step": 1, "error": "Failed to save OpenAI API key"}
if validate_api_key(gemini_input):
if save_to_env_file("GEMINI_API_KEY", gemini_input):
logger.info("[render_ai_providers] Google Gemini API key saved to .env file")
else:
st.error("Failed to save Gemini API key to .env file")
return {"current_step": 1, "error": "Failed to save Gemini API key"}
# Reload environment variables to ensure consistency
load_dotenv(override=True)
# Get updated API keys from environment
updated_openai_key = os.getenv('OPENAI_API_KEY', '')
updated_gemini_key = os.getenv('GEMINI_API_KEY', '')
# Store the API keys in session state
st.session_state['api_keys'] = {
'openai': updated_openai_key,
'google': updated_gemini_key
}
# Update progress and move to next step
st.session_state['current_step'] = 2 # Set the next step explicitly
update_progress()
st.rerun() # Rerun to apply the changes
else:
st.error("Please configure at least one valid AI provider to continue")
return {"current_step": 1, "changes_made": changes_made}
except Exception as e:
error_msg = f"Error in AI providers setup: {str(e)}"
logger.error(f"[render_ai_providers] {error_msg}")
st.error(error_msg)
return {"current_step": 1, "error": error_msg}

View File

@@ -1,400 +0,0 @@
"""AI research setup component for the API key manager."""
import streamlit as st
from loguru import logger
from typing import Dict, Any
from ..manager import APIKeyManager
from .base import render_navigation_buttons
import os
from dotenv import load_dotenv
import sys
# Configure logger
logger.remove() # Remove default handler
logger.add(
"logs/ai_research_setup.log",
rotation="500 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
def get_existing_api_key(key_name: str) -> str:
"""Get existing API key from environment or .env file.
Args:
key_name (str): Name of the API key to retrieve
Returns:
str: The API key value if found, empty string otherwise
"""
# First try to get from environment
api_key = os.getenv(key_name)
# If not in environment, try to get from .env file
if not api_key and os.path.exists('.env'):
try:
with open('.env', 'r') as f:
for line in f:
if line.strip().startswith(f"{key_name}="):
api_key = line.strip().split('=')[1]
break
except Exception as e:
logger.error(f"[get_existing_api_key] Failed to read {key_name} from .env: {str(e)}")
return api_key if api_key else ""
def update_env_file(api_keys: Dict[str, str]) -> None:
"""Update the .env file with new API keys, avoiding duplicates.
Args:
api_keys (Dict[str, str]): Dictionary of API keys to update
"""
try:
# Read existing .env file content
env_content = []
if os.path.exists('.env'):
with open('.env', 'r') as f:
env_content = f.readlines()
# Remove trailing newlines and empty lines
env_content = [line.strip() for line in env_content if line.strip()]
# Create a dictionary of existing variables
env_dict = {}
for line in env_content:
if '=' in line:
key, value = line.split('=', 1)
env_dict[key.strip()] = value.strip()
# Update with new values
env_dict.update(api_keys)
# Write back to .env file
with open('.env', 'w') as f:
for key, value in env_dict.items():
f.write(f"{key}={value}\n")
logger.info("[update_env_file] Successfully updated .env file with API keys")
except Exception as e:
logger.error(f"[update_env_file] Error updating .env file: {str(e)}")
raise
def render_ai_research_setup(api_key_manager: APIKeyManager) -> Dict[str, Any]:
"""Render the AI research setup step."""
logger.info("[render_ai_research_setup] Rendering AI research setup component")
st.markdown("""
<div class='setup-header'><h2>🔍 AI Web Research API Setup</h2></div>
""", unsafe_allow_html=True)
# Create two columns for different search types
col1, col2 = st.columns(2)
with col1:
st.markdown("### The Usual")
# Get existing API keys
existing_serpapi_key = get_existing_api_key("SERPAPI_KEY")
existing_firecrawl_key = get_existing_api_key("FIRECRAWL_API_KEY")
serpapi_key = st.text_input(
"## Enter 🔎 SerpAPI",
value=existing_serpapi_key,
type="password",
key="serpapi_key",
help="Enter your SerpAPI key",
placeholder="Access search engine results for research"
)
if serpapi_key or existing_serpapi_key:
st.markdown("""
<div class="ai-provider-status status-valid">
✓ API key configured
</div>
""", unsafe_allow_html=True)
st.markdown("""
<div class="api-info-section">
<details>
<summary>📋 How to get your SerpAPI key</summary>
<div class="api-info-content">
<p><strong>Step-by-step guide:</strong></p>
<ol>
<li>Visit <a href="https://serpapi.com" target="_blank">SerpAPI</a></li>
<li>Create an account</li>
<li>Go to your dashboard</li>
<li>Copy your API key</li>
<li>Paste it here</li>
</ol>
<p><strong>Note:</strong> SerpAPI provides real-time search results from multiple engines.</p>
</div>
</details>
</div>
""", unsafe_allow_html=True)
st.markdown("</div></div>", unsafe_allow_html=True)
firecrawl_key = st.text_input(
"Enter 🕷️ Firecrawl API Key",
value=existing_firecrawl_key,
type="password",
key="firecrawl_key",
help="Enter your Firecrawl API key",
placeholder="Web content extraction and analysis"
)
if firecrawl_key or existing_firecrawl_key:
st.markdown("""
<div class="ai-provider-status status-valid">
✓ Firecrawl API key configured
</div>
""", unsafe_allow_html=True)
st.markdown("""
<div class="api-info-section">
<details>
<summary>📋 How to get your Firecrawl API key</summary>
<div class="api-info-content">
<p><strong>Step-by-step guide:</strong></p>
<ol>
<li>Visit <a href="https://www.firecrawl.dev/account" target="_blank">Firecrawl</a></li>
<li>Create an account</li>
<li>Go to your dashboard</li>
<li>Generate your API key</li>
<li>Copy and paste it here</li>
</ol>
<p><strong>Note:</strong> Firecrawl provides powerful web content extraction and analysis capabilities.</p>
</div>
</details>
</div>
""", unsafe_allow_html=True)
st.markdown("</div></div>", unsafe_allow_html=True)
with col2:
st.markdown("### AI Deep Research")
# Get existing API keys
existing_tavily_key = get_existing_api_key("TAVILY_API_KEY")
existing_metaphor_key = get_existing_api_key("METAPHOR_API_KEY")
tavily_key = st.text_input(
"Enter 🤖 Tavily API Key",
value=existing_tavily_key,
type="password",
key="tavily_key",
help="Enter your Tavily API key",
placeholder="AI-powered search with semantic understanding"
)
if tavily_key or existing_tavily_key:
st.markdown("""
<div class="ai-provider-status status-valid">
✓ Tavily API key configured
</div>
""", unsafe_allow_html=True)
st.markdown("""
<div class="api-info-section">
<details>
<summary>📋 How to get your Tavily API key</summary>
<div class="api-info-content">
<p><strong>Step-by-step guide:</strong></p>
<ol>
<li>Visit <a href="https://tavily.com" target="_blank">Tavily</a></li>
<li>Create an account</li>
<li>Go to API settings</li>
<li>Generate a new API key</li>
<li>Copy and paste it here</li>
</ol>
<p><strong>Note:</strong> Tavily provides AI-powered semantic search capabilities.</p>
</div>
</details>
</div>
""", unsafe_allow_html=True)
st.markdown("</div></div>", unsafe_allow_html=True)
metaphor_key = st.text_input(
"Enter 🧠 Metaphor/Exa API Key",
value=existing_metaphor_key,
type="password",
key="metaphor_key",
help="Enter your Metaphor/Exa API key",
placeholder="Neural search engine for deep research"
)
if metaphor_key or existing_metaphor_key:
st.markdown("""
<div class="ai-provider-status status-valid">
✓ API key configured
</div>
""", unsafe_allow_html=True)
st.markdown("""
<div class="api-info-section">
<details>
<summary>📋 How to get your Metaphor/Exa API key</summary>
<div class="api-info-content">
<p><strong>Step-by-step guide:</strong></p>
<ol>
<li>Visit <a href="https://metaphor.systems" target="_blank">Metaphor/Exa</a></li>
<li>Create an account</li>
<li>Navigate to API settings</li>
<li>Generate your API key</li>
<li>Copy and paste it here</li>
</ol>
<p><strong>Note:</strong> Metaphor/Exa provides neural search capabilities for deep research.</p>
</div>
</details>
</div>
""", unsafe_allow_html=True)
st.markdown("</div></div>", unsafe_allow_html=True)
# Track changes
changes_made = bool(serpapi_key or tavily_key or metaphor_key or firecrawl_key)
# Navigation buttons with correct arguments
if render_navigation_buttons(3, 5, changes_made):
if changes_made:
try:
# Prepare API keys dictionary with only non-empty values
api_keys = {}
if serpapi_key:
api_keys['SERPAPI_KEY'] = serpapi_key
if tavily_key:
api_keys['TAVILY_API_KEY'] = tavily_key
if metaphor_key:
api_keys['METAPHOR_API_KEY'] = metaphor_key
if firecrawl_key:
api_keys['FIRECRAWL_API_KEY'] = firecrawl_key
# Update .env file with new API keys
update_env_file(api_keys)
# Update environment variables
for key, value in api_keys.items():
os.environ[key] = value
# Store the API keys in session state
st.session_state['api_keys'] = {
'serpapi': serpapi_key,
'tavily': tavily_key,
'metaphor': metaphor_key,
'firecrawl': firecrawl_key
}
# Update progress and move to next step
st.session_state['current_step'] = 4
st.rerun()
except Exception as e:
error_msg = f"Error saving API keys: {str(e)}"
logger.error(f"[render_ai_research_setup] {error_msg}")
st.error(error_msg)
else:
st.error("Please configure at least one research provider to continue")
# Detailed Information Section
st.markdown("---")
st.markdown("### Understanding Your Research Options")
# Create four columns for the information popovers
info_col1, info_col2, info_col3, info_col4 = st.columns(4)
# The Usual: Traditional Search Popover
with info_col1:
with st.popover("#### The Usual: Traditional Search"):
st.markdown("""
**SerpAPI**
- Real-time search results from multiple search engines
- Access to structured data from search results
- Great for gathering general information and market research
- Includes features like:
- Web search results
- News articles
- Knowledge graphs
- Related questions
""")
# AI Deep Research Popover
with info_col2:
with st.popover("#### AI Deep Research: Advanced Search Capabilities"):
st.markdown("""
**Tavily AI**
- AI-powered search with semantic understanding
- Automatically summarizes and analyzes search results
- Perfect for:
- Deep research tasks
- Academic research
- Fact-checking
- Real-time information gathering
**Metaphor/Exa**
- Neural search engine that understands context and meaning
- Specialized in finding highly relevant content
- Ideal for:
- Technical research
- Finding similar content
- Discovering patterns in research
- Understanding topic landscapes
""")
# Choosing the Right Tool Popover
with info_col3:
with st.popover("#### Choosing the Right Tool"):
st.markdown("""
1. **For General Research:**
- Start with SerpAPI for broad coverage and structured data
2. **For Deep Analysis:**
- Use Tavily AI when you need AI-powered insights
- Choose Metaphor/Exa for neural search and pattern discovery
3. **For Comprehensive Research:**
- Combine multiple tools to get the most complete picture
- Use SerpAPI for initial research
- Follow up with AI tools for deeper insights
> **Pro Tip:** Configure multiple providers to ensure you have backup options and can cross-reference results for better accuracy.
""")
# Coming Soon Popover
with info_col4:
with st.popover("#### 🔜 Coming Soon - More Search Options"):
st.markdown("""
**Bing Search API**
- Microsoft's powerful search API with comprehensive capabilities
- Features include:
- Web search with advanced filtering
- News articles with sentiment analysis
- Image search with visual recognition
- Video search with content understanding
- Custom search parameters for targeted results
**Google Search API**
- Google's programmable search engine with extensive features
- Capabilities include:
- Custom search engine creation
- Site-specific search
- Image and video search
- News search with time-based filtering
- Knowledge graph integration
**Additional Planned Integrations:**
- **DuckDuckGo API**: Privacy-focused search with no tracking
- **Brave Search API**: Independent search engine with unique features
- **Perplexity API**: AI-powered research assistant with real-time data
> **Note:** These integrations are under active development and will be available in future updates.
""")
return {"current_step": 3, "changes_made": changes_made}

View File

@@ -1,226 +0,0 @@
"""ALwrity integrations setup component."""
import streamlit as st
from loguru import logger
import os
from typing import Dict, Any
from ..manager import APIKeyManager
from .base import render_navigation_buttons, render_step_indicator, render_tab_style
def update_env_file(env_vars: Dict[str, str]) -> None:
"""Update the .env file with new environment variables, avoiding duplicates.
Args:
env_vars (Dict[str, str]): Dictionary of environment variables to update
"""
try:
# Read existing .env file content
env_content = []
if os.path.exists('.env'):
with open('.env', 'r') as f:
env_content = f.readlines()
# Remove trailing newlines and empty lines
env_content = [line.strip() for line in env_content if line.strip()]
# Create a dictionary of existing variables
env_dict = {}
for line in env_content:
if '=' in line:
key, value = line.split('=', 1)
env_dict[key.strip()] = value.strip()
# Update with new values
env_dict.update(env_vars)
# Write back to .env file
with open('.env', 'w') as f:
for key, value in env_dict.items():
f.write(f"{key}={value}\n")
logger.info("[update_env_file] Successfully updated .env file")
except Exception as e:
logger.error(f"[update_env_file] Error updating .env file: {str(e)}")
raise
def render_alwrity_integrations(api_key_manager: APIKeyManager) -> Dict[str, Any]:
"""Render the ALwrity integrations setup step."""
try:
# Apply enhanced tab styling
render_tab_style()
st.markdown("""
<div class='setup-header'>
<h2>🔄 ALwrity Integrations</h2>
<p>Connect your content platforms and tools</p>
</div>
""", unsafe_allow_html=True)
# Create tabs for different integration types
tabs = st.tabs(["Website Platforms", "Social Media", "Analytics Tools"])
changes_made = False
has_valid_integrations = False
validation_message = ""
with tabs[0]:
st.markdown("""
<div class="tab-content">
<h3>Website Platforms</h3>
<p>Connect your website platforms for seamless content publishing</p>
</div>
""", unsafe_allow_html=True)
# Website Platforms Grid
col1, col2 = st.columns(2)
with col1:
# WordPress Card (Coming Soon)
with st.container():
st.markdown("""
<div class="integration-card disabled">
<div class="integration-header">
<div class="integration-icon">🌐</div>
<div class="integration-title">WordPress <span class="coming-soon-badge">Coming Soon</span></div>
</div>
<div class="integration-content">
<p>Connect your WordPress site for direct content publishing.</p>
</div>
</div>
""", unsafe_allow_html=True)
st.info("WordPress integration will be available in the next update")
with col2:
# Wix Card (Coming Soon)
with st.container():
st.markdown("""
<div class="integration-card disabled">
<div class="integration-header">
<div class="integration-icon">🎨</div>
<div class="integration-title">Wix <span class="coming-soon-badge">Coming Soon</span></div>
</div>
<div class="integration-content">
<p>Connect your Wix site for direct content publishing.</p>
</div>
</div>
""", unsafe_allow_html=True)
st.info("Wix integration will be available in the next update")
with tabs[1]:
st.markdown("""
<div class="tab-content">
<h3>Social Media</h3>
<p>Connect your social media accounts for content distribution</p>
</div>
""", unsafe_allow_html=True)
# Social Media Grid
col1, col2 = st.columns(2)
with col1:
# Facebook Card (Coming Soon)
with st.container():
st.markdown("""
<div class="integration-card disabled">
<div class="integration-header">
<div class="integration-icon">📘</div>
<div class="integration-title">Facebook <span class="coming-soon-badge">Coming Soon</span></div>
</div>
<div class="integration-content">
<p>Connect your Facebook account for content sharing.</p>
</div>
</div>
""", unsafe_allow_html=True)
st.info("Facebook integration will be available in the next update")
with col2:
# Instagram Card (Coming Soon)
with st.container():
st.markdown("""
<div class="integration-card disabled">
<div class="integration-header">
<div class="integration-icon">📸</div>
<div class="integration-title">Instagram <span class="coming-soon-badge">Coming Soon</span></div>
</div>
<div class="integration-content">
<p>Connect your Instagram account for content sharing.</p>
</div>
</div>
""", unsafe_allow_html=True)
st.info("Instagram integration will be available in the next update")
with tabs[2]:
st.markdown("""
<div class="tab-content">
<h3>Analytics Tools</h3>
<p>Connect your analytics tools for content performance tracking</p>
</div>
""", unsafe_allow_html=True)
# Google Search Console Card (Coming Soon)
with st.container():
st.markdown("""
<div class="integration-card disabled">
<div class="integration-header">
<div class="integration-icon">📊</div>
<div class="integration-title">Google Search Console <span class="coming-soon-badge">Coming Soon</span></div>
</div>
<div class="integration-content">
<p>Connect your Google Search Console for SEO insights.</p>
</div>
</div>
""", unsafe_allow_html=True)
st.info("Google Search Console integration will be available in the next update")
# Validate integrations
changes_made = True # Always allow proceeding since integrations are coming soon
has_valid_integrations = True
validation_message = "✅ Website platform integrations will be available in the next update"
# Display validation message
if validation_message:
if "" in validation_message:
st.success(validation_message)
else:
st.warning(validation_message)
# Navigation buttons
if render_navigation_buttons(5, 6, changes_made):
if has_valid_integrations:
try:
# Store integration settings in session state
st.session_state['integrations'] = {
'coming_soon': {
'wordpress': True,
'wix': True,
'facebook': True,
'instagram': True,
'google_search_console': True
}
}
# Update INTEGRATION_DONE in .env file and environment
env_vars = {'INTEGRATION_DONE': 'True'}
update_env_file(env_vars)
# Update environment variable
os.environ['INTEGRATION_DONE'] = 'True'
logger.info("Updated INTEGRATION_DONE status")
# Update progress and move to next step
st.session_state['current_step'] = 6
st.rerun()
except Exception as e:
error_msg = f"Failed to update integration status: {str(e)}"
logger.error(error_msg)
st.error(error_msg)
else:
st.error("Please configure at least one integration to continue")
return {"current_step": 5, "changes_made": changes_made}
except Exception as e:
error_msg = f"Error in ALwrity integrations setup: {str(e)}"
logger.error(f"[render_alwrity_integrations] {error_msg}")
st.error(error_msg)
return {"current_step": 5, "error": error_msg}

View File

@@ -1,181 +0,0 @@
"""Base components for the API key manager."""
import streamlit as st
from typing import Dict, Any
from loguru import logger
from ..styles import API_KEY_MANAGER_STYLES
def render_step_indicator(current_step: int, total_steps: int) -> None:
"""Render the step indicator."""
try:
st.markdown("""
<style>
.step-indicator {
display: flex;
justify-content: space-between;
margin-bottom: 2rem;
padding: 1rem;
background: #f0f2f6;
border-radius: 10px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.step {
display: flex;
align-items: center;
gap: 0.5rem;
padding: 0.5rem 1rem;
border-radius: 20px;
background: #ffffff;
transition: all 0.3s ease;
}
.step.active {
background: #1f77b4;
color: white;
}
.step.completed {
background: #2ecc71;
color: white;
}
.step-icon {
font-size: 1.2rem;
}
.step-number {
font-weight: bold;
}
.step-title {
font-size: 0.9rem;
}
.step-line {
flex: 1;
height: 2px;
background: #e0e0e0;
margin: 0 1rem;
}
.step-line.active {
background: #1f77b4;
}
.step-line.completed {
background: #2ecc71;
}
</style>
""", unsafe_allow_html=True)
steps = [
("🔑", "AI LLM", 1),
("🔍", "Website Analysis", 2),
("👤", "AI Research", 3),
("🎨", "Personalization", 4),
("🔄", "Integrations", 5),
("", "Complete", 6)
]
html = '<div class="step-indicator">'
for i, (icon, title, step) in enumerate(steps):
step_class = "active" if step == current_step else "completed" if step < current_step else ""
line_class = "active" if step == current_step else "completed" if step < current_step else ""
html += f'''
<div class="step {step_class}">
<span class="step-icon">{icon}</span>
<span class="step-number">{step}</span>
<span class="step-title">{title}</span>
</div>
'''
if i < len(steps) - 1:
html += f'<div class="step-line {line_class}"></div>'
html += '</div>'
st.markdown(html, unsafe_allow_html=True)
except Exception as e:
logger.error(f"Error rendering step indicator: {str(e)}")
st.error("Error displaying step indicator")
def render_navigation_buttons(current_step: int, total_steps: int, changes_made: bool = True) -> bool:
"""Render the navigation buttons with modern glassmorphic styling.
Args:
current_step (int): Current step number
total_steps (int): Total number of steps
changes_made (bool): Whether changes were made in the current step
Returns:
bool: True if next/complete button was clicked, False otherwise
"""
col1, col2, col3 = st.columns([1, 2, 1])
with col1:
if current_step > 1:
if st.button("**← Back**", use_container_width=True, key="back_button"):
from ..wizard_state import previous_step
previous_step()
st.rerun()
with col3:
if current_step < total_steps:
next_text = "**Continue →**"
if st.button(next_text, use_container_width=True, disabled=not changes_made, key="next_button"):
# Don't call next_step() here, let the component handle it
return True
else:
if st.button("**Complete Setup ✓**", use_container_width=True, type="primary", key="complete_button"):
# Save the configuration
st.success("✅ Setup completed successfully!")
return True
return False
def render_tab_style() -> None:
"""Render enhanced tab styling."""
st.markdown("""
<style>
.stTabs [data-baseweb="tab-list"] {
gap: 2rem;
background: #f8f9fa;
padding: 0.5rem;
border-radius: 10px;
margin-bottom: 1rem;
}
.stTabs [data-baseweb="tab"] {
padding: 0.75rem 1.5rem;
border-radius: 25px;
transition: all 0.3s ease;
background: transparent;
color: #495057;
font-weight: 500;
}
.stTabs [data-baseweb="tab"]:hover {
background: #e9ecef;
color: #1f77b4;
}
.stTabs [aria-selected="true"] {
background: #1f77b4 !important;
color: white !important;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.stTabs [data-baseweb="tab-list"] button:nth-child(1) {
margin-left: 0.5rem;
}
.stTabs [data-baseweb="tab-list"] button:nth-child(3) {
margin-right: 0.5rem;
}
.tab-content {
background: white;
padding: 1.5rem;
border-radius: 10px;
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
margin-top: 1rem;
}
</style>
""", unsafe_allow_html=True)
def render_success_message():
"""Render the success message with glassmorphic design."""
st.markdown("""
<div class="success-message">
<h3 style='color: white; margin-bottom: 12px; font-size: 1.4em;'>✅ API keys saved successfully!</h3>
<p style='color: rgba(255,255,255,0.95); font-size: 1.1em;'>
Please restart the application for the changes to take effect.
</p>
</div>
""", unsafe_allow_html=True)

View File

@@ -1,272 +0,0 @@
"""Final setup component for the API key manager."""
import streamlit as st
from loguru import logger
import sys
import json
import os
from typing import Dict, Any
from ..manager import APIKeyManager
from ..validation import check_all_api_keys
# Configure logger to output to both file and stdout
logger.remove() # Remove default handler
logger.add(
"logs/final_setup.log",
rotation="500 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
def load_main_config() -> Dict[str, Any]:
"""Load the main configuration file."""
config_path = os.path.join("lib", "workspace", "alwrity_config", "main_config.json")
try:
with open(config_path, 'r') as f:
return json.load(f)
except Exception as e:
logger.error(f"Error loading main_config.json: {str(e)}")
return {}
def render_final_setup(api_key_manager: APIKeyManager) -> Dict[str, Any]:
"""Render the final setup step.
Args:
api_key_manager (APIKeyManager): The API key manager instance
Returns:
Dict[str, Any]: Current state
"""
logger.info("[render_final_setup] Rendering final setup component")
st.markdown("### Step 6: Final Setup & Validation")
# Load main config
main_config = load_main_config()
# Create tabs for each step
tabs = st.tabs([
"Step 1: AI LLM Setup",
"Step 2: Website Analysis",
"Step 3: AI Research",
"Step 4: Personalization",
"Step 5: Integrations"
])
# Step 1: AI LLM Setup
with tabs[0]:
st.markdown("#### AI LLM Configuration")
# Get API keys from environment
openai_key = os.getenv('OPENAI_API_KEY', 'Not configured')
gemini_key = os.getenv('GEMINI_API_KEY', 'Not configured')
anthropic_key = os.getenv('ANTHROPIC_API_KEY', 'Not configured')
mistral_key = os.getenv('MISTRAL_API_KEY', 'Not configured')
# Display API keys (masked)
st.markdown("##### API Keys")
col1, col2 = st.columns(2)
with col1:
st.markdown(f"**OpenAI API Key:** {'*' * 8}{openai_key[-4:] if openai_key != 'Not configured' else ''}")
st.markdown(f"**Google Gemini API Key:** {'*' * 8}{gemini_key[-4:] if gemini_key != 'Not configured' else ''}")
with col2:
st.markdown(f"**Anthropic API Key:** {'*' * 8}{anthropic_key[-4:] if anthropic_key != 'Not configured' else ''}")
st.markdown(f"**Mistral API Key:** {'*' * 8}{mistral_key[-4:] if mistral_key != 'Not configured' else ''}")
# Step 2: Website Analysis
with tabs[1]:
st.markdown("#### Website Analysis Configuration")
# Get website URL from environment
website_url = os.getenv('WEBSITE_URL', 'Not configured')
# Display website URL
st.markdown("##### Website URL")
st.markdown(f"**Website URL:** {website_url}")
# Display website analysis settings
st.markdown("##### Analysis Settings")
st.markdown("Website analysis settings will be used to understand your content style and preferences.")
# Step 3: AI Research
with tabs[2]:
st.markdown("#### AI Research Configuration")
# Get research API keys from environment
serpapi_key = os.getenv('SERPAPI_KEY', 'Not configured')
tavily_key = os.getenv('TAVILY_API_KEY', 'Not configured')
metaphor_key = os.getenv('METAPHOR_API_KEY', 'Not configured')
firecrawl_key = os.getenv('FIRECRAWL_API_KEY', 'Not configured')
# Display API keys (masked)
st.markdown("##### Research API Keys")
col1, col2 = st.columns(2)
with col1:
st.markdown(f"**SerpAPI Key:** {'*' * 8}{serpapi_key[-4:] if serpapi_key != 'Not configured' else ''}")
st.markdown(f"**Tavily API Key:** {'*' * 8}{tavily_key[-4:] if tavily_key != 'Not configured' else ''}")
with col2:
st.markdown(f"**Metaphor API Key:** {'*' * 8}{metaphor_key[-4:] if metaphor_key != 'Not configured' else ''}")
st.markdown(f"**Firecrawl API Key:** {'*' * 8}{firecrawl_key[-4:] if firecrawl_key != 'Not configured' else ''}")
# Step 4: Personalization
with tabs[3]:
st.markdown("#### Personalization Configuration")
# Display personalization settings from main config
with st.popover("Blog Content Characteristics", help="Click to see details about blog content settings"):
st.markdown("##### Blog Content Characteristics")
blog_settings = main_config.get("Blog Content Characteristics", {})
st.write(f"- Blog Length: {blog_settings.get('Blog Length', '2000')}")
st.write(f"- Blog Tone: {blog_settings.get('Blog Tone', 'Professional')}")
st.write(f"- Blog Demographic: {blog_settings.get('Blog Demographic', 'Professional')}")
st.write(f"- Blog Type: {blog_settings.get('Blog Type', 'Informational')}")
st.write(f"- Blog Language: {blog_settings.get('Blog Language', 'English')}")
st.write(f"- Blog Output Format: {blog_settings.get('Blog Output Format', 'markdown')}")
st.markdown("These settings define the overall structure and style of your blog content.")
with st.popover("Blog Images Details", help="Click to see details about image generation settings"):
st.markdown("##### Blog Images Details")
image_settings = main_config.get("Blog Images Details", {})
st.write(f"- Image Generation Model: {image_settings.get('Image Generation Model', 'stable-diffusion')}")
st.write(f"- Number of Blog Images: {image_settings.get('Number of Blog Images', 1)}")
st.markdown("These settings control how images are generated for your blog posts.")
with st.popover("LLM Options", help="Click to see details about language model settings"):
st.markdown("##### LLM Options")
llm_settings = main_config.get("LLM Options", {})
st.write(f"- GPT Provider: {llm_settings.get('GPT Provider', 'google')}")
st.write(f"- Model: {llm_settings.get('Model', 'gemini-1.5-flash-latest')}")
st.write(f"- Temperature: {llm_settings.get('Temperature', 0.7)}")
st.write(f"- Top-p: {llm_settings.get('Top-p', 0.9)}")
st.write(f"- Max Tokens: {llm_settings.get('Max Tokens', 4000)}")
st.write(f"- Frequency Penalty: {llm_settings.get('Frequency Penalty', 1.0)}")
st.write(f"- Presence Penalty: {llm_settings.get('Presence Penalty', 1.0)}")
st.markdown("These settings control the behavior of the language model used for content generation.")
with st.popover("Search Engine Parameters", help="Click to see details about search engine settings"):
st.markdown("##### Search Engine Parameters")
search_settings = main_config.get("Search Engine Parameters", {})
st.write(f"- Geographic Location: {search_settings.get('Geographic Location', 'us')}")
st.write(f"- Search Language: {search_settings.get('Search Language', 'en')}")
st.write(f"- Number of Results: {search_settings.get('Number of Results', 10)}")
st.write(f"- Time Range: {search_settings.get('Time Range', 'anytime')}")
st.markdown("These settings control how search engines are used for research and content creation.")
# Step 5: Integrations
with tabs[4]:
st.markdown("#### ALwrity Integrations Configuration")
# Display integrations settings
st.markdown("##### Website Platforms")
st.info("WordPress integration will be available in the next update")
st.info("Wix integration will be available in the next update")
st.markdown("##### Social Media")
st.info("Facebook integration will be available in the next update")
st.info("Instagram integration will be available in the next update")
st.markdown("##### Analytics Tools")
st.info("Google Search Console integration will be available in the next update")
# Navigation buttons
col1, col2 = st.columns(2)
with col1:
if st.button("← Back to Personalization"):
logger.info("[render_final_setup] User clicked back to personalization")
st.session_state.current_step = 4
st.session_state.next_step = "personalization_setup"
st.rerun()
with col2:
if st.button("Complete Setup →"):
logger.info("[render_final_setup] User clicked complete setup")
try:
# First set FINAL_SETUP_COMPLETE to True
try:
# Read existing .env content
env_lines = []
if os.path.exists('.env'):
with open('.env', 'r') as f:
env_lines = f.readlines()
# Remove any existing FINAL_SETUP_COMPLETE entries
env_lines = [line for line in env_lines if not line.startswith('FINAL_SETUP_COMPLETE=')]
# Add the new FINAL_SETUP_COMPLETE entry
env_lines.append("FINAL_SETUP_COMPLETE=True\n")
# Write back to .env file
with open('.env', 'w') as f:
f.writelines(env_lines)
# Set environment variable
os.environ['FINAL_SETUP_COMPLETE'] = "True"
logger.info("[render_final_setup] Set FINAL_SETUP_COMPLETE=True")
except Exception as e:
logger.error(f"[render_final_setup] Error setting FINAL_SETUP_COMPLETE: {str(e)}")
st.error("Error updating setup status. Please try again.")
return {"current_step": 6, "changes_made": False}
# Now validate all steps
validation_result = check_all_api_keys(api_key_manager)
if not validation_result:
# If validation fails, revert FINAL_SETUP_COMPLETE
try:
env_lines = [line for line in env_lines if not line.startswith('FINAL_SETUP_COMPLETE=')]
env_lines.append("FINAL_SETUP_COMPLETE=False\n")
with open('.env', 'w') as f:
f.writelines(env_lines)
os.environ['FINAL_SETUP_COMPLETE'] = "False"
except Exception:
pass # Ignore reversion errors
st.error("Setup validation failed. Please ensure all required steps are completed.")
logger.error("[render_final_setup] Validation failed")
return {"current_step": 6, "changes_made": False}
# Log the current API keys in the manager
logger.info("[render_final_setup] Current API keys in manager:")
for key, value in api_key_manager.api_keys.items():
if value:
logger.info(f" - {key}: {'*' * 8}{value[-4:]}")
else:
logger.info(f" - {key}: Not set")
# Save main configuration
config_path = os.path.join("lib", "workspace", "alwrity_config", "main_config.json")
with open(config_path, 'w') as f:
json.dump(main_config, f, indent=4)
logger.info("[render_final_setup] Saved main configuration")
# Show success message
st.success("✅ Setup completed successfully! Redirecting to main application...")
# Set setup completion flag in session state
st.session_state['setup_completed'] = True
st.session_state['redirect_to_main'] = True
# Clear the current step to ensure proper redirection
if 'current_step' in st.session_state:
del st.session_state['current_step']
# Rerun to trigger redirection
st.rerun()
except Exception as e:
error_msg = f"Error completing setup: {str(e)}"
logger.error(f"[render_final_setup] {error_msg}")
st.error(error_msg)
return {"current_step": 6, "changes_made": False}
return {"current_step": 6, "changes_made": True}

View File

@@ -1,39 +0,0 @@
"""Health monitoring component for the API key manager."""
import streamlit as st
from loguru import logger
from ..health_monitor import APIKeyHealthMonitor
from ..key_rotation import KeyRotationManager
from ..wizard_state import get_api_keys
def render_health_monitoring():
"""Render the API key health monitoring dashboard."""
st.header("API Key Health & Rotation")
# Initialize managers
health_monitor = APIKeyHealthMonitor()
rotation_manager = KeyRotationManager()
# Create tabs for different views
health_tab, rotation_tab = st.tabs(["Health Monitor", "Key Rotation"])
with health_tab:
health_monitor.get_health_dashboard()
with rotation_tab:
rotation_manager.display_rotation_dashboard()
# Manual rotation controls
st.subheader("Manual Controls")
key_type = st.selectbox(
"Select Key Type",
options=[k.split('_')[0] for k in get_api_keys()]
)
if key_type:
if st.button("Force Rotation"):
new_key = rotation_manager.rotate_if_needed(key_type)
if new_key:
st.success(f"Rotated to new key: {new_key}")
else:
st.warning("No suitable key available for rotation")

View File

@@ -1,487 +0,0 @@
"""Personalization setup component for the API key manager."""
import streamlit as st
from loguru import logger
import sys
import json
import os
from typing import Dict, Any
from ..manager import APIKeyManager
from ....web_crawlers.async_web_crawler import AsyncWebCrawlerService
from ....personalization.style_analyzer import StyleAnalyzer
from lib.utils.style_utils import (
get_test_config_styles,
get_glass_container,
get_info_section,
get_example_box,
get_analysis_section,
get_style_guide_html
)
from .base import render_navigation_buttons
from .alwrity_integrations import render_alwrity_integrations
import asyncio
import os
from pathlib import Path
import yaml
# Configure logger to output to both file and stdout
logger.remove() # Remove default handler
logger.add(
"logs/personalization_setup.log",
rotation="500 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
def load_main_config() -> Dict[str, Any]:
"""Load the main configuration file."""
config_path = os.path.join("lib", "workspace", "alwrity_config", "main_config.json")
try:
with open(config_path, 'r') as f:
return json.load(f)
except Exception as e:
logger.error(f"Error loading main_config.json: {str(e)}")
return {}
def save_main_config(config: Dict[str, Any]) -> bool:
"""Save the main configuration file."""
try:
config_path = os.path.join("lib", "workspace", "alwrity_config", "main_config.json")
os.makedirs(os.path.dirname(config_path), exist_ok=True)
with open(config_path, 'w') as f:
json.dump(config, f, indent=4)
return True
except Exception as e:
logger.error(f"Error saving main_config.json: {str(e)}")
return False
def display_style_analysis(analysis_results: dict):
"""Display the style analysis results in a structured format."""
try:
# Writing Style Section
writing_style = analysis_results.get("writing_style", {})
writing_style_content = f"""
<ul>
<li><strong>Tone:</strong> {writing_style.get("tone", "N/A")}</li>
<li><strong>Voice:</strong> {writing_style.get("voice", "N/A")}</li>
<li><strong>Complexity:</strong> {writing_style.get("complexity", "N/A")}</li>
<li><strong>Formality:</strong> {writing_style.get("formality", "N/A")}</li>
</ul>
"""
st.markdown(get_analysis_section("Writing Style", writing_style_content), unsafe_allow_html=True)
# Target Audience Section
target_audience = analysis_results.get("target_audience", {})
target_audience_content = f"""
<ul>
<li><strong>Demographics:</strong> {', '.join(target_audience.get("demographics", ["N/A"]))}</li>
<li><strong>Expertise Level:</strong> {target_audience.get("expertise_level", "N/A")}</li>
<li><strong>Industry Focus:</strong> {target_audience.get("industry_focus", "N/A")}</li>
<li><strong>Geographic Focus:</strong> {target_audience.get("geographic_focus", "N/A")}</li>
</ul>
"""
st.markdown(get_analysis_section("Target Audience", target_audience_content), unsafe_allow_html=True)
# Content Type Section
content_type = analysis_results.get("content_type", {})
content_type_content = f"""
<ul>
<li><strong>Primary Type:</strong> {content_type.get("primary_type", "N/A")}</li>
<li><strong>Secondary Types:</strong> {', '.join(content_type.get("secondary_types", ["N/A"]))}</li>
<li><strong>Purpose:</strong> {content_type.get("purpose", "N/A")}</li>
<li><strong>Call to Action:</strong> {content_type.get("call_to_action", "N/A")}</li>
</ul>
"""
st.markdown(get_analysis_section("Content Type", content_type_content), unsafe_allow_html=True)
# Recommended Settings Section
recommended = analysis_results.get("recommended_settings", {})
recommended_content = f"""
<ul>
<li><strong>Writing Tone:</strong> {recommended.get("writing_tone", "N/A")}</li>
<li><strong>Target Audience:</strong> {recommended.get("target_audience", "N/A")}</li>
<li><strong>Content Type:</strong> {recommended.get("content_type", "N/A")}</li>
<li><strong>Creativity Level:</strong> {recommended.get("creativity_level", "N/A")}</li>
<li><strong>Geographic Location:</strong> {recommended.get("geographic_location", "N/A")}</li>
</ul>
"""
st.markdown(get_analysis_section("Recommended Settings", recommended_content), unsafe_allow_html=True)
except Exception as e:
logger.error(f"Error displaying style analysis: {str(e)}")
st.error(f"Error displaying analysis results: {str(e)}")
def render_personalization_setup(api_key_manager: APIKeyManager) -> Dict[str, Any]:
"""Render the personalization setup step."""
logger.info("[render_personalization_setup] Rendering personalization setup component")
st.markdown("""
# ✨ Personalization Setup
Configure your content generation preferences and writing style
""")
# Main section selection using radio buttons
setup_mode = st.radio(
"Choose Setup Mode",
["Manual Settings", "ALwrity Personalization"],
horizontal=True,
label_visibility="collapsed"
)
if setup_mode == "Manual Settings":
# Create tabs for different settings categories
tabs = st.tabs([
"Blog Content Characteristics",
"Blog Images",
"AI Generation Settings",
"Search Settings"
])
# Blog Content Characteristics Tab
with tabs[0]:
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("#### Blog Content Characteristics")
blog_length = st.text_input(
"Blog Length",
value="2000",
placeholder="e.g., 2000",
help="Target word count for your blog posts"
)
blog_tone = st.selectbox(
"Blog Tone",
["Professional", "Casual", "Technical", "Conversational"],
help="The overall tone of your content"
)
blog_demographic = st.selectbox(
"Target Demographic",
["Professional", "General", "Technical", "Academic"],
help="Your primary audience demographic"
)
blog_type = st.selectbox(
"Content Type",
["Informational", "Educational", "Entertainment", "Technical"],
help="The primary type of content you create"
)
blog_language = st.selectbox(
"Content Language",
["English", "Spanish", "French", "German", "Other"],
help="Primary language for your content"
)
blog_format = st.selectbox(
"Output Format",
["markdown", "html", "plain text"],
help="Format of the generated content"
)
with col2:
st.markdown("### Blog Content Settings Guide")
st.markdown("""
#### Blog Length
- Determines word count target
- Affects content depth
- Impacts SEO performance
#### Blog Tone
- Professional: Business-oriented
- Casual: Friendly, approachable
- Technical: Detailed, precise
#### Best Practices
- Match tone to audience
- Consider SEO requirements
- Maintain consistency
""")
# Blog Images Tab
with tabs[1]:
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("#### Blog Images Settings")
image_model = st.selectbox(
"Image Generation Model",
["stable-diffusion", "dall-e", "midjourney"],
help="AI model for generating images"
)
num_images = st.number_input(
"Number of Images",
min_value=1,
max_value=5,
value=1,
help="Number of images per blog post"
)
image_style = st.selectbox(
"Image Style",
["Realistic", "Artistic", "Professional", "Creative"],
help="Style of generated images"
)
with col2:
st.markdown("### Image Generation Guide")
st.markdown("""
#### Model Selection
- Stable Diffusion: Versatile, fast
- DALL-E: High quality, creative
- Midjourney: Artistic, detailed
#### Best Practices
- Consider content type
- Balance quality vs. speed
- Optimize for platforms
""")
# AI Generation Settings Tab
with tabs[2]:
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("#### AI Generation Settings")
gpt_provider = st.selectbox(
"AI Provider",
["google", "openai", "anthropic"],
help="Choose your preferred AI provider"
)
model = st.text_input(
"Model",
value="gemini-1.5-flash-latest",
help="The specific AI model to use"
)
temperature = st.slider(
"Creativity Level",
min_value=0.0,
max_value=1.0,
value=0.7,
help="Higher = more creative, lower = more focused"
)
max_tokens = st.number_input(
"Maximum Length",
min_value=100,
max_value=8000,
value=4000,
help="Maximum length of generated content"
)
with col2:
st.markdown("### AI Settings Guide")
st.markdown("""
#### Provider Selection
- Google: Balanced, reliable
- OpenAI: Creative, versatile
- Anthropic: Precise, ethical
#### Temperature Guide
- 0.0-0.3: Focused, consistent
- 0.4-0.7: Balanced creativity
- 0.8-1.0: Highly creative
""")
# Search Settings Tab
with tabs[3]:
col1, col2 = st.columns([1, 1])
with col1:
st.markdown("#### Search Settings")
geo_location = st.text_input(
"Geographic Location",
value="us",
help="Target geographic location for search"
)
search_language = st.selectbox(
"Search Language",
["en", "es", "fr", "de", "other"],
help="Language for search results"
)
num_results = st.number_input(
"Number of Results",
min_value=1,
max_value=50,
value=10,
help="Number of search results to analyze"
)
time_range = st.selectbox(
"Time Range",
["anytime", "day", "week", "month", "year"],
help="Time range for search results"
)
with col2:
st.markdown("### Search Settings Guide")
st.markdown("""
#### Location & Language
- Affects result relevance
- Impacts local SEO
- Consider target market
#### Search Optimization
- Balance quantity vs. quality
- Consider time sensitivity
- Optimize for accuracy
""")
# Save button for manual settings
if st.button("Save Manual Settings", type="primary", use_container_width=True):
try:
# Save to main_config.json
config = {
"Blog Content Characteristics": {
"Blog Length": blog_length,
"Blog Tone": blog_tone,
"Blog Demographic": blog_demographic,
"Blog Type": blog_type,
"Blog Language": blog_language,
"Blog Output Format": blog_format
},
"Blog Images Details": {
"Image Generation Model": image_model,
"Number of Blog Images": num_images,
"Image Style": image_style
},
"LLM Options": {
"GPT Provider": gpt_provider,
"Model": model,
"Temperature": temperature,
"Max Tokens": max_tokens
},
"Search Engine Parameters": {
"Geographic Location": geo_location,
"Search Language": search_language,
"Number of Results": num_results,
"Time Range": time_range
}
}
if save_main_config(config):
try:
# Read existing .env file content
env_lines = []
if os.path.exists('.env'):
with open('.env', 'r') as f:
env_lines = f.readlines()
# Remove any existing PERSONALIZATION_DONE entries
env_lines = [line for line in env_lines if not line.startswith('PERSONALIZATION_DONE=')]
# Add new PERSONALIZATION_DONE entry
env_lines.append("PERSONALIZATION_DONE=True\n")
# Write back to .env file
with open('.env', 'w') as f:
f.writelines(env_lines)
# Update environment variable and session state
os.environ['PERSONALIZATION_DONE'] = "True"
st.session_state['personalization_saved'] = True
logger.info("Successfully set PERSONALIZATION_DONE=True in .env and environment")
st.success("✅ Your personalization settings have been saved successfully!")
except Exception as e:
logger.error(f"Error updating PERSONALIZATION_DONE: {str(e)}")
st.error("Settings saved but failed to update environment. Please try again.")
else:
st.error("Unable to save settings. Please try again.")
except Exception as e:
logger.error(f"Error saving settings: {str(e)}")
st.error(f"Failed to save settings: {str(e)}")
else: # ALwrity Personalization
col1, col2 = st.columns([2, 1])
with col1:
st.markdown("### Website URL")
url = st.text_input(
"Enter your website URL",
placeholder="https://example.com",
help="Provide your website URL to analyze your content style"
)
if not url:
st.markdown("### Written Samples")
st.info("No website URL? No problem! Provide written samples instead.")
samples = st.text_area(
"Paste your content samples here",
help="Paste 2-3 samples of your best content"
)
if st.button("🎨 Analyze Style", use_container_width=True):
# Existing style analysis code...
pass
with col2:
st.markdown("### How ALwrity Discovers Your Style")
st.markdown("""
#### AI-Powered Analysis
ALwrity analyzes your content to understand:
- Writing tone and voice
- Content structure
- Target audience
- Engagement style
#### Personalized Recommendations
We provide:
- Writing guidelines
- Content templates
- Style recommendations
- Audience insights
""")
# Navigation buttons
if render_navigation_buttons(4, 6, changes_made=True):
try:
# If user hasn't saved settings manually, mark as skipped
if 'personalization_saved' not in st.session_state or not st.session_state.get('personalization_saved'):
# Read existing .env file content
env_lines = []
if os.path.exists('.env'):
with open('.env', 'r') as f:
env_lines = f.readlines()
# Remove any existing PERSONALIZATION_DONE entries
env_lines = [line for line in env_lines if not line.startswith('PERSONALIZATION_DONE=')]
# Add PERSONALIZATION_DONE=False since user skipped
env_lines.append("PERSONALIZATION_DONE=False\n")
# Write back to .env file
with open('.env', 'w') as f:
f.writelines(env_lines)
# Update environment variable
os.environ['PERSONALIZATION_DONE'] = "False"
logger.info("User skipped personalization. Set PERSONALIZATION_DONE=False")
except Exception as e:
logger.error(f"Error updating PERSONALIZATION_DONE on skip: {str(e)}")
st.error("Error updating environment. You may need to configure personalization later.")
st.session_state.current_step = 5
st.rerun()
return {"current_step": 4, "changes_made": True}

View File

@@ -1,312 +0,0 @@
"""Website setup component for the API key manager."""
import streamlit as st
from loguru import logger
from ...website_analyzer import analyze_website
from ...website_analyzer.analyzer import WebsiteAnalyzer
import asyncio
import sys
from typing import Dict, Any
from ..manager import APIKeyManager
from .base import render_navigation_buttons
import os
# Configure logger to output to both file and stdout
logger.remove() # Remove default handler
logger.add(
"logs/website_setup.log",
rotation="50 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
# Ensure logs directory exists
os.makedirs("logs", exist_ok=True)
def render_website_setup(api_key_manager: APIKeyManager) -> Dict[str, Any]:
"""Render the website setup step.
Args:
api_key_manager (APIKeyManager): The API key manager instance
Returns:
Dict[str, Any]: Current state
"""
logger.info("[render_website_setup] Rendering website setup component")
st.markdown("### Step 2: Enter Your Website URL for Analysis (Optional)")
# Create two columns for input and results
col1, col2 = st.columns([1, 1])
with col1:
# Get existing website URL from environment or .env file
existing_url = os.getenv('WEBSITE_URL', None)
if not existing_url and os.path.exists('.env'):
try:
with open('.env', 'r') as f:
for line in f:
if line.strip().startswith('WEBSITE_URL='):
existing_url = line.strip().split('=')[1]
break
except Exception as e:
logger.error(f"[render_website_setup] Failed to read existing URL from .env: {str(e)}")
# If existing_url is 'no_website_provided', set it to empty for better UX
if existing_url == 'no_website_provided':
existing_url = ''
url = st.text_input(
"Enter your website URL, if you own one",
value=existing_url if existing_url else "",
placeholder="https://example.com"
)
logger.info(f"[render_website_setup] URL input value: {url}")
# Save URL to .env file
try:
# Check if WEBSITE_URL already exists in .env file
website_url_exists = False
env_lines = []
if os.path.exists('.env'):
with open('.env', 'r') as f:
for line in f:
if line.strip().startswith('WEBSITE_URL='):
website_url_exists = True
# Replace the existing WEBSITE_URL line with the new value
if url:
env_lines.append(f"WEBSITE_URL={url}\n")
else:
env_lines.append("WEBSITE_URL=no_website_provided\n")
else:
env_lines.append(line)
# If WEBSITE_URL doesn't exist, add it
if not website_url_exists:
if url:
env_lines.append(f"WEBSITE_URL={url}\n")
else:
env_lines.append("WEBSITE_URL=no_website_provided\n")
# Write all lines back to the .env file
with open('.env', 'w') as f:
f.writelines(env_lines)
# Set environment variable
if url:
os.environ['WEBSITE_URL'] = url
logger.info(f"[render_website_setup] Saved website URL to .env: {url}")
else:
os.environ['WEBSITE_URL'] = "no_website_provided"
logger.info("[render_website_setup] Set default website URL: no_website_provided")
except Exception as e:
logger.error(f"[render_website_setup] Failed to save website URL: {str(e)}")
analyze_type = st.radio(
"Analysis Type",
["Basic Website Analysis", "Full Website Analysis with SEO"],
horizontal=True,
label_visibility="hidden",
help="Choose between basic website analysis or comprehensive SEO analysis"
)
if st.button("Analyze Website"):
if url:
with st.spinner("Analyzing website..."):
try:
logger.info(f"[render_website_setup] Starting website analysis for URL: {url}")
# Call the analyze_website function
results = analyze_website(url)
# Replace the old SEO analysis code with the new analyzer
analyzer = WebsiteAnalyzer()
seo_results = analyzer.analyze_website(url)
if seo_results.get('success', False):
results['data']['seo_analysis'] = seo_results['data']['analysis']['seo_info']
else:
results['data']['seo_analysis'] = {
'error': seo_results.get('error', 'Unknown error in SEO analysis'),
'overall_score': 0,
'meta_tags': {},
'content': {},
'recommendations': []
}
logger.debug(f"[render_website_setup] Analysis results received: {results.get('success', False)}")
# Store results in session state
st.session_state.website_analysis = results
logger.info("[render_website_setup] Results stored in session state")
if not results.get('success', False):
error_msg = results.get('error', 'Analysis failed')
logger.error(f"[render_website_setup] Analysis failed: {error_msg}")
st.error(error_msg)
else:
logger.info("[render_website_setup] Analysis completed successfully")
st.success("✅ Website analysis completed successfully!")
except Exception as e:
error_msg = f"Analysis failed: {str(e)}"
logger.error(f"[render_website_setup] {error_msg}")
st.error(error_msg)
else:
logger.warning("[render_website_setup] No URL provided")
st.warning("Please enter a valid URL")
with col2:
# Check if we have analysis results
if 'website_analysis' in st.session_state:
results = st.session_state.website_analysis
if results.get('success', False):
data = results.get('data', {})
analysis = data.get('analysis', {})
# Create tabs for different sections
if analyze_type == "Full Website Analysis with SEO":
tab1, tab2, tab3, tab4, tab5 = st.tabs([
"Basic Metrics",
"Content Analysis",
"SEO Analysis",
"Technical SEO",
"Strategy"
])
else:
tab1, tab2, tab3, tab4 = st.tabs([
"Basic Metrics",
"Content Analysis",
"Technical Info",
"Strategy"
])
with tab1:
st.markdown("##### Basic Metrics")
basic_info = analysis.get('basic_info', {})
st.write(f"Status Code: {basic_info.get('status_code')}")
st.write(f"Content Type: {basic_info.get('content_type')}")
st.write(f"Title: {basic_info.get('title')}")
st.write(f"Meta Description: {basic_info.get('meta_description')}")
# SSL Info
ssl_info = analysis.get('ssl_info', {})
if ssl_info.get('has_ssl'):
st.success("SSL Certificate is valid")
st.write(f"Expiry: {ssl_info.get('expiry')}")
else:
st.error("No valid SSL certificate found")
with tab2:
st.markdown("##### Content Analysis")
content_info = analysis.get('content_info', {})
# Content Overview
st.markdown("###### 📊 Content Overview")
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("Word Count", content_info.get('word_count', 0))
with col2:
st.metric("Headings", content_info.get('heading_count', 0))
with col3:
st.metric("Images", content_info.get('image_count', 0))
with col4:
st.metric("Links", content_info.get('link_count', 0))
if analyze_type == "Full Analysis with SEO":
with tab3:
st.markdown("##### SEO Analysis")
seo_data = data.get('seo_analysis', {})
# Display SEO Score
seo_score = seo_data.get('overall_score', 0)
st.markdown(f"### SEO Score: {seo_score}/100")
st.progress(seo_score / 100)
# Meta Tags Analysis
st.markdown("#### Meta Tags Analysis")
meta_analysis = seo_data.get('meta_tags', {})
for key, value in meta_analysis.items():
if isinstance(value, bool):
st.write(f"{'' if value else ''} {key.replace('_', ' ').title()}")
elif isinstance(value, dict):
st.write(f"**{key.replace('_', ' ').title()}:**")
st.write(f"Status: {value.get('status', 'N/A')}")
st.write(f"Value: {value.get('value', 'N/A')}")
if value.get('recommendation'):
st.write(f"Recommendation: {value['recommendation']}")
else:
st.write(f"**{key.replace('_', ' ').title()}:** {value}")
# Content Analysis
st.markdown("#### AI Content Analysis")
content_analysis = seo_data.get('content', {})
st.write(f"**Word Count:** {content_analysis.get('word_count', 0)}")
st.write(f"**Readability Score:** {content_analysis.get('readability_score', 0)}/100")
st.write(f"**Content Quality Score:** {content_analysis.get('content_quality_score', 0)}/100")
# Recommendations
st.markdown("#### SEO Recommendations")
recommendations = seo_data.get('recommendations', [])
for rec in recommendations:
st.write(f"**{rec.get('priority', '').upper()} Priority - {rec.get('category', '')}**")
st.write(f"Issue: {rec.get('issue', '')}")
st.write(f"Recommendation: {rec.get('recommendation', '')}")
st.write(f"Impact: {rec.get('impact', '')}")
st.write("---")
with tab4:
st.markdown("##### Technical SEO")
technical_seo = seo_data.get('technical_analysis', {})
# Mobile Friendliness
st.markdown("#### Mobile Friendliness")
mobile_friendly = technical_seo.get('mobile_friendly', False)
st.write(f"{'' if mobile_friendly else ''} Mobile Friendly")
# Page Speed
st.markdown("#### Page Speed")
speed_metrics = technical_seo.get('speed_metrics', {})
for metric, value in speed_metrics.items():
st.write(f"**{metric.replace('_', ' ').title()}:** {value}")
# Technical Issues
st.markdown("#### Technical Issues")
issues = technical_seo.get('issues', [])
for issue in issues:
st.write(f"{issue}")
with tab4 if analyze_type == "Basic Website Analysis" else tab5:
st.markdown("##### Strategy Recommendations")
strategy_info = analysis.get('strategy', {})
if strategy_info:
for category, recommendations in strategy_info.items():
st.markdown(f"###### {category.replace('_', ' ').title()}")
for rec in recommendations:
st.write(f"{rec}")
else:
st.info("No strategy recommendations available")
else:
error_msg = results.get('error', 'Analysis failed')
logger.error(f"[render_website_setup] Displaying error: {error_msg}")
st.error(error_msg)
else:
logger.debug("[render_website_setup] No analysis results in session state")
st.info("Enter a URL and click 'Analyze Website' to see results")
# Navigation buttons
if render_navigation_buttons(2, 5, True):
# Move to next step (AI Research Setup)
st.session_state.current_step = 3
st.session_state.next_step = "ai_research_setup"
st.rerun()
return {"current_step": 2, "changes_made": True}

View File

@@ -1,121 +0,0 @@
"""API Key Rotation Manager."""
from datetime import datetime
from typing import Dict, Optional, List
import streamlit as st
from .health_monitor import APIKeyHealthMonitor
from .wizard_state import get_api_keys, set_api_key
class KeyRotationManager:
"""Manages automatic rotation of API keys based on health metrics."""
def __init__(self):
"""Initialize the key rotation manager."""
self.health_monitor = APIKeyHealthMonitor()
if 'active_keys' not in st.session_state:
st.session_state.active_keys = {}
def get_active_key(self, key_type: str) -> str:
"""Get the currently active key for a given type."""
return st.session_state.active_keys.get(key_type)
def set_active_key(self, key_type: str, key_name: str) -> None:
"""Set the active key for a given type."""
st.session_state.active_keys[key_type] = key_name
def rotate_if_needed(self, key_type: str) -> Optional[str]:
"""Check and rotate key if needed based on health metrics."""
current_key = self.get_active_key(key_type)
# If no current key or current key needs rotation
if not current_key or self.health_monitor.should_rotate_key(current_key):
new_key = self.health_monitor.get_best_available_key(key_type)
if new_key and new_key != current_key:
# Set cooldown on the old key if it exists
if current_key:
self.health_monitor.set_cooldown(current_key, duration_minutes=30)
# Update the active key
self.set_active_key(key_type, new_key)
return new_key
return current_key
def get_rotation_status(self) -> Dict[str, Dict]:
"""Get rotation status for all key types."""
status = {}
api_keys = get_api_keys()
for key_name in api_keys:
key_type = key_name.split('_')[0] # e.g., OPENAI from OPENAI_API_KEY
active_key = self.get_active_key(key_type)
health = self.health_monitor.get_key_health(key_name)
if key_type not in status:
status[key_type] = {
'active_key': active_key,
'available_keys': [],
'cooldown_keys': []
}
if health and health['in_cooldown']:
status[key_type]['cooldown_keys'].append(key_name)
else:
status[key_type]['available_keys'].append(key_name)
return status
def display_rotation_dashboard(self) -> None:
"""Display the key rotation dashboard."""
st.subheader("🔄 API Key Rotation Status")
rotation_status = self.get_rotation_status()
if not rotation_status:
st.info("No API keys configured for rotation.")
return
for key_type, status in rotation_status.items():
with st.expander(f"{key_type} Rotation Status"):
# Active Key
st.write("**Active Key:**")
if status['active_key']:
st.success(status['active_key'])
else:
st.warning("No active key")
# Available Keys
st.write("**Available Keys:**")
if status['available_keys']:
for key in status['available_keys']:
st.write(f"- {key}")
else:
st.warning("No available keys")
# Cooldown Keys
if status['cooldown_keys']:
st.write("**Keys in Cooldown:**")
for key in status['cooldown_keys']:
health = self.health_monitor.get_key_health(key)
if health and health['cooldown_until']:
time_left = (health['cooldown_until'] - datetime.now())
minutes_left = int(time_left.total_seconds() / 60)
st.info(f"- {key} (Cooldown: {minutes_left} minutes remaining)")
def initialize_rotation(self) -> None:
"""Initialize key rotation for all API key types."""
api_keys = get_api_keys()
key_types = set()
# Get unique key types
for key_name in api_keys:
key_type = key_name.split('_')[0]
key_types.add(key_type)
# Initialize rotation for each key type
for key_type in key_types:
if not self.get_active_key(key_type):
best_key = self.health_monitor.get_best_available_key(key_type)
if best_key:
self.set_active_key(key_type, best_key)

View File

@@ -1,238 +0,0 @@
"""API key manager class."""
from typing import Dict, Any, Optional
from loguru import logger
import streamlit as st
import os
import json
import sys
from datetime import datetime
from dotenv import load_dotenv
# Configure logger to output to both file and stdout
logger.remove() # Remove default handler
logger.add("logs/api_key_manager.log",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
level="DEBUG")
logger.add(sys.stdout,
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
level="INFO")
class APIKeyManager:
"""Manager for handling API keys."""
def __init__(self):
"""Initialize the API key manager."""
logger.info("[APIKeyManager.__init__] Initializing API key manager")
self.api_keys = {}
self.load_api_keys()
self.api_key_groups = {
"Create": {
"GEMINI_API_KEY": {
"url": "https://makersuite.google.com/app/apikey",
"description": "Google's Gemini AI for content generation",
"setup_steps": [
"Visit Google AI Studio",
"Create a Google Cloud account",
"Enable Gemini API",
"Generate API key"
]
},
"OPENAI_API_KEY": {
"url": "https://platform.openai.com/api-keys",
"description": "OpenAI's GPT models for content creation",
"setup_steps": [
"Go to OpenAI platform",
"Create an account",
"Navigate to API keys",
"Create new API key"
]
},
"MISTRAL_API_KEY": {
"url": "https://console.mistral.ai/api-keys/",
"description": "Mistral AI for efficient content generation",
"setup_steps": [
"Visit Mistral AI website",
"Sign up for an account",
"Access API section",
"Generate API key"
]
}
},
"Research": {
"TAVILY_API_KEY": {
"url": "https://tavily.com/#api",
"description": "Powers intelligent web research features",
"setup_steps": [
"Go to Tavily's website",
"Create an account",
"Access your API dashboard",
"Generate a new API key"
]
},
"SERPER_API_KEY": {
"url": "https://serper.dev/signup",
"description": "Enables Google search functionality",
"setup_steps": [
"Visit Serper.dev",
"Sign up for an account",
"Go to API section",
"Create your API key"
]
}
},
"Deep Search": {
"METAPHOR_API_KEY": {
"url": "https://dashboard.exa.ai/login",
"description": "Enables advanced web search capabilities",
"setup_steps": [
"Visit the Exa AI dashboard",
"Sign up for a free account",
"Navigate to API Keys section",
"Create a new API key"
]
},
"FIRECRAWL_API_KEY": {
"url": "https://www.firecrawl.dev/account",
"description": "Enables web content extraction",
"setup_steps": [
"Visit Firecrawl website",
"Sign up for an account",
"Access API dashboard",
"Create your API key"
]
}
},
"Integrations": {
"STABILITY_API_KEY": {
"url": "https://platform.stability.ai/",
"description": "Enables AI image generation",
"setup_steps": [
"Access Stability AI platform",
"Create an account",
"Navigate to API settings",
"Generate your API key"
]
}
}
}
def load_api_keys(self):
"""Load API keys from environment variables."""
try:
logger.info("[APIKeyManager.load_api_keys] Loading API keys from environment")
# Get the current working directory and .env file path
current_dir = os.getcwd()
env_path = os.path.join(current_dir, '.env')
logger.info(f"[APIKeyManager.load_api_keys] Looking for .env file at: {env_path}")
# Check if .env file exists
if not os.path.exists(env_path):
logger.warning(f"[APIKeyManager.load_api_keys] .env file not found at {env_path}")
return
# Load environment variables
load_dotenv(env_path, override=True)
logger.debug("[APIKeyManager.load_api_keys] Environment variables loaded")
# Define all possible API key providers
all_providers = [
# AI Providers
'OPENAI_API_KEY',
'GEMINI_API_KEY',
'ANTHROPIC_API_KEY',
'MISTRAL_API_KEY',
# Research Providers
'SERPER_API_KEY',
'TAVILY_API_KEY',
'METAPHOR_API_KEY',
'FIRECRAWL_API_KEY'
]
# Load API keys from environment variables
for provider in all_providers:
value = os.getenv(provider)
if value:
self.api_keys[provider] = value
logger.info(f"[APIKeyManager.load_api_keys] Loaded {provider} from environment")
else:
logger.debug(f"[APIKeyManager.load_api_keys] {provider} not found in environment")
logger.info(f"[APIKeyManager.load_api_keys] Loaded {len(self.api_keys)} API keys")
except Exception as e:
logger.error(f"[APIKeyManager.load_api_keys] Error loading API keys: {str(e)}")
def save_api_key(self, provider: str, api_key: str) -> bool:
"""
Save an API key for a provider.
Args:
provider: The provider name (e.g., 'openai', 'gemini')
api_key: The API key value
Returns:
bool: True if successful, False otherwise
"""
try:
logger.info(f"[APIKeyManager] Saving API key for {provider}")
# Map provider to environment variable name
env_var_map = {
'openai': 'OPENAI_API_KEY',
'gemini': 'GEMINI_API_KEY',
'mistral': 'MISTRAL_API_KEY',
'anthropic': 'ANTHROPIC_API_KEY',
'serpapi': 'SERPAPI_API_KEY',
'tavily': 'TAVILY_API_KEY',
'metaphor': 'METAPHOR_API_KEY',
'firecrawl': 'FIRECRAWL_API_KEY'
}
env_var = env_var_map.get(provider)
if not env_var:
logger.error(f"[APIKeyManager] Unknown provider: {provider}")
return False
# Update the in-memory dictionary
self.api_keys[provider] = api_key
# Update environment variable
os.environ[env_var] = api_key
# Read existing .env file content
env_path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))), '.env')
try:
with open(env_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
except FileNotFoundError:
lines = []
# Update or add the API key
key_found = False
updated_lines = []
for line in lines:
if line.startswith(f"{env_var}="):
updated_lines.append(f"{env_var}={api_key}\n")
key_found = True
else:
updated_lines.append(line)
if not key_found:
updated_lines.append(f"{env_var}={api_key}\n")
# Write back to .env file
with open(env_path, 'w', encoding='utf-8') as f:
f.writelines(updated_lines)
logger.info(f"[APIKeyManager] Successfully saved API key for {provider}")
return True
except Exception as e:
logger.error(f"[APIKeyManager] Error saving API key for {provider}: {str(e)}")
return False
def get_api_key(self, provider: str) -> Optional[str]:
"""Get an API key."""
return self.api_keys.get(provider)

View File

@@ -1,37 +0,0 @@
"""State management for the API key manager."""
import streamlit as st
from datetime import datetime
def initialize_wizard_state():
"""Initialize or get the wizard state from session."""
if 'wizard_state' not in st.session_state:
st.session_state.wizard_state = {
'current_step': 0,
'total_steps': 0,
'completed_steps': set(),
'api_keys_status': {},
'setup_progress': 0
}
def update_progress(api_keys_config):
"""Update the overall setup progress."""
total_keys = sum(len(keys) for keys in api_keys_config.values())
configured_keys = sum(1 for status in st.session_state.wizard_state['api_keys_status'].values()
if status.get('configured', False))
st.session_state.wizard_state['setup_progress'] = (configured_keys / total_keys) * 100
def update_key_status(key):
"""Update the status of an API key in the wizard state."""
st.session_state.wizard_state['api_keys_status'][key] = {
'configured': True,
'timestamp': datetime.now().isoformat()
}
def get_key_status(key):
"""Get the current status of an API key."""
return st.session_state.wizard_state['api_keys_status'].get(key, {})
def get_progress():
"""Get the current setup progress."""
return st.session_state.wizard_state['setup_progress']

Some files were not shown because too many files have changed in this diff Show More