ALwrity Version 0.5.1 (Fastapi + React)
This commit is contained in:
99
ToBeMigrated/utils/ai_research.py
Normal file
99
ToBeMigrated/utils/ai_research.py
Normal file
@@ -0,0 +1,99 @@
|
||||
"""AI research module for topic analysis and research."""
|
||||
|
||||
import asyncio
|
||||
from typing import Dict, Any
|
||||
from loguru import logger
|
||||
import sys
|
||||
from ..web_crawlers.async_web_crawler import AsyncWebCrawlerService
|
||||
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
# Configure logger
|
||||
logger.remove()
|
||||
logger.add(
|
||||
"logs/ai_research.log",
|
||||
rotation="500 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
def research_topic(topic: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Research a topic using web crawling and AI analysis.
|
||||
|
||||
Args:
|
||||
topic (str): The topic to research
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Research results including overview, findings, and recommendations
|
||||
"""
|
||||
try:
|
||||
logger.info(f"[research_topic] Starting research for topic: {topic}")
|
||||
|
||||
# Initialize web crawler
|
||||
async def analyze_topic():
|
||||
async with AsyncWebCrawlerService() as crawler:
|
||||
# Perform web research
|
||||
search_results = await crawler.crawl_website(topic)
|
||||
|
||||
if not search_results.get('success'):
|
||||
return {
|
||||
'success': False,
|
||||
'error': search_results.get('error', 'Research failed')
|
||||
}
|
||||
|
||||
# Analyze content with LLM
|
||||
analysis = await crawler.analyze_content_with_llm(
|
||||
search_results['content'],
|
||||
api_key=None, # Should be passed from config
|
||||
gpt_provider="google" # Should be configurable
|
||||
)
|
||||
|
||||
# Structure the response
|
||||
return {
|
||||
'success': True,
|
||||
'data': {
|
||||
'research': {
|
||||
'overview': {
|
||||
'topic': topic,
|
||||
'scope': analysis.get('topics', []),
|
||||
'methodology': 'Web crawling and AI analysis'
|
||||
},
|
||||
'data_quality': {
|
||||
'is_reliable': bool(analysis.get('seo_score', 0) > 0.7)
|
||||
},
|
||||
'analysis_quality': {
|
||||
'is_thorough': bool(len(analysis.get('key_insights', [])) > 5)
|
||||
},
|
||||
'recommendations': analysis.get('recommendations', []),
|
||||
'next_steps': analysis.get('priority_areas', [])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Run the async analysis
|
||||
results = asyncio.run(analyze_topic())
|
||||
|
||||
if not results.get('success'):
|
||||
error_msg = results.get('error', 'Research failed')
|
||||
logger.error(f"[research_topic] Research failed: {error_msg}")
|
||||
return {
|
||||
'success': False,
|
||||
'error': error_msg
|
||||
}
|
||||
|
||||
logger.info("[research_topic] Research completed successfully")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Research failed: {str(e)}"
|
||||
logger.error(f"[research_topic] {error_msg}")
|
||||
return {
|
||||
'success': False,
|
||||
'error': str(e)
|
||||
}
|
||||
232
ToBeMigrated/utils/alwrity_utils.py
Normal file
232
ToBeMigrated/utils/alwrity_utils.py
Normal file
@@ -0,0 +1,232 @@
|
||||
import re
|
||||
import os
|
||||
import PyPDF2
|
||||
import openai
|
||||
import streamlit as st
|
||||
import tempfile
|
||||
from loguru import logger
|
||||
|
||||
|
||||
from lib.ai_writers.ai_news_article_writer import ai_news_generation
|
||||
from lib.ai_writers.ai_finance_report_generator.ai_financial_dashboard import get_dashboard
|
||||
from lib.ai_writers.ai_facebook_writer.facebook_ai_writer import facebook_main_menu
|
||||
from lib.ai_writers.linkedin_writer.linkedin_ai_writer import linkedin_main_menu
|
||||
from lib.ai_writers.twitter_writers.twitter_dashboard import run_dashboard
|
||||
from lib.ai_writers.insta_ai_writer import insta_writer
|
||||
from lib.ai_writers.youtube_writers.youtube_ai_writer import youtube_main_menu
|
||||
from lib.ai_writers.ai_essay_writer import ai_essay_generator
|
||||
from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
|
||||
#from lib.content_planning_calender.content_planning_agents_alwrity_crew import ai_agents_content_planner
|
||||
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
|
||||
|
||||
def ai_agents_team():
|
||||
# Define options for AI Content Teams
|
||||
st.title("🐲 Your AI Agents Teams")
|
||||
st.markdown("""Alwrity offers AI agents team for content creators to easily modify them for their needs.
|
||||
Abstracting tech & plumbing, easily define role, goal, task. Use different AI agents framework.""")
|
||||
|
||||
options = [
|
||||
"AI Planning Team",
|
||||
"AI Content Creation Team"
|
||||
]
|
||||
|
||||
# Radio button for choosing an AI Content Team
|
||||
selected_team = st.radio("**Choose AI Agents Team:**", options)
|
||||
|
||||
if selected_team == "AI Planning Team":
|
||||
st.title("AI Agents for Content Ideation")
|
||||
plan_keywords = st.text_input(
|
||||
"Enter Keywords to get 2 months content calendar:",
|
||||
placeholder="Enter keywords to generate AI content calendar:",
|
||||
help="Enter at least two words for better results."
|
||||
)
|
||||
if st.button("Get calendar"):
|
||||
if plan_keywords and len(plan_keywords.split()) >= 2:
|
||||
with st.spinner("Get Content Plan..."):
|
||||
try:
|
||||
#plan_content = ai_agents_content_planner(plan_keywords)
|
||||
st.success(f"Coming soon: Content plan for: {plan_keywords}")
|
||||
#st.markdown(plan_content)
|
||||
except Exception as err:
|
||||
st.error(f"Failed to generate content plan: {err}")
|
||||
else:
|
||||
st.error("🚫 Single keywords are just too vague. Try again.")
|
||||
elif selected_team == "AI Content Creation Team":
|
||||
content_agents()
|
||||
|
||||
|
||||
|
||||
def content_agents():
|
||||
st.markdown("AI Agents Team for Content Writing")
|
||||
content_keywords = st.text_input(
|
||||
"Enter Main Domain Keywords of your business:",
|
||||
placeholder="Better keywords, Better content. Get keywords from Google search",
|
||||
help="These keywords define your main business sector, blogging niche, Industry, domain etc"
|
||||
)
|
||||
|
||||
if st.button("Start Writing"):
|
||||
if content_keywords and len(content_keywords.split()) >= 2:
|
||||
with st.spinner("Generating Content..."):
|
||||
try:
|
||||
#calendar_content = ai_agents_writers(content_keywords)
|
||||
st.success(f"🚫 Not implemented yet: {content_keywords}")
|
||||
#st.markdown(calendar_content)
|
||||
except Exception as err:
|
||||
st.error(f"🚫 Failed to generate content with AI Agents: {err}")
|
||||
else:
|
||||
st.error("🚫 Single keywords are just too vague. Try again.")
|
||||
|
||||
|
||||
|
||||
def essay_writer():
|
||||
st.title("AI Essay Writer 📝")
|
||||
st.write("Select your essay type, education level, and desired length, then let AI generate an essay for you. ✨")
|
||||
|
||||
# Define essay types and education levels
|
||||
essay_types = [
|
||||
"📖 Argumentative - Forming an opinion via research. Building an evidence-based argument.",
|
||||
"📚 Expository - Knowledge of a topic. Communicating information clearly.",
|
||||
"✒️ Narrative - Creative language use. Presenting a compelling narrative.",
|
||||
"🎨 Descriptive - Creative language use. Describing sensory details."
|
||||
]
|
||||
|
||||
education_levels = [
|
||||
"🏫 Primary School",
|
||||
"🏫 High School",
|
||||
"🎓 College",
|
||||
"🎓 Graduate School"
|
||||
]
|
||||
|
||||
# Define the options for number of pages
|
||||
num_pages_options = [
|
||||
"📄 Short Form (1-2 pages)",
|
||||
"📄📄 Medium Form (3-5 pages)",
|
||||
"📄📄📄 Long Form (6+ pages)"
|
||||
]
|
||||
|
||||
# Create columns for input fields
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
with col1:
|
||||
# Ask the user for the title of the essay
|
||||
essay_title = st.text_input("📝 Essay Title", placeholder="Enter the title of your essay", help="Provide a clear and concise title for your essay.")
|
||||
|
||||
# Ask the user for type of essay
|
||||
selected_essay_type = st.selectbox("📚 Type of Essay", options=essay_types, help="Choose the type of essay you want to write.")
|
||||
|
||||
with col2:
|
||||
# Ask the user for level of education
|
||||
selected_education_level = st.selectbox("🎓 Level of Education", options=education_levels, help="Choose your level of education.")
|
||||
|
||||
# Ask the user for number of pages
|
||||
selected_num_pages = st.selectbox("📄 Number of Pages", options=num_pages_options, help="Select the length of your essay.")
|
||||
|
||||
if st.button("🚀 Generate Essay"):
|
||||
if essay_title:
|
||||
st.success("Generating your essay... ✨")
|
||||
ai_essay_generator(essay_title, selected_essay_type, selected_education_level, selected_num_pages)
|
||||
else:
|
||||
st.error("Please enter a valid title for your essay. 🚫")
|
||||
|
||||
|
||||
def ai_news_writer():
|
||||
""" AI News Writer """
|
||||
st.markdown("<h1>📰 AI News Writer 🗞️ </h1>", unsafe_allow_html=True)
|
||||
|
||||
# Input for news keywords
|
||||
news_keywords = st.text_input(
|
||||
"**🔑 Enter Keywords from News Headlines:**",
|
||||
placeholder="Describe the News article in 3-5 words. Enter main keywords describing the News Event:",
|
||||
help="Enter at least two words for better results."
|
||||
)
|
||||
|
||||
if news_keywords and len(news_keywords.split()) < 2:
|
||||
st.error("🚫 News keywords should be at least two words long. Least, you can do..")
|
||||
|
||||
# Selectbox for country and language
|
||||
countries = [
|
||||
("es", "Spain"),
|
||||
("vn", "Vietnam"),
|
||||
("pk", "Pakistan"),
|
||||
("in", "India"),
|
||||
("de", "Germany"),
|
||||
("cn", "China")
|
||||
]
|
||||
|
||||
languages = [
|
||||
("en", "English"),
|
||||
("es", "Spanish"),
|
||||
("vi", "Vietnamese"),
|
||||
("ar", "Arabic"),
|
||||
("hi", "Hindi"),
|
||||
("de", "German"),
|
||||
("zh-cn", "Chinese")
|
||||
]
|
||||
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
news_country = st.selectbox("**🌍 Select Origin Country of News Event:**",
|
||||
countries, format_func=lambda x: x[1], help="Which country did the NEWS originate from ?")
|
||||
with col2:
|
||||
news_language = st.selectbox("**🗣️ Select News Article Language to Search For:**",
|
||||
languages, format_func=lambda x: x[1], help="Language to output News Article in ?")
|
||||
|
||||
if st.button("📰 Generate News Report"):
|
||||
if news_keywords and len(news_keywords.split()) >= 2:
|
||||
with st.spinner("Generating News Report... ⏳"):
|
||||
try:
|
||||
news_report = ai_news_generation(news_keywords, news_country, news_language)
|
||||
st.success(f"Successfully generated news report on: {news_keywords} 🎉")
|
||||
st.markdown(news_report)
|
||||
except Exception as err:
|
||||
st.error(f"Failed to generate news report: {err} ❌")
|
||||
else:
|
||||
st.error("Please enter valid keywords for the news report. 🚫")
|
||||
|
||||
|
||||
def ai_finance_ta_writer():
|
||||
st.markdown("<div class='sub-header'>AI Financial Technical Analysis Writer</div>", unsafe_allow_html=True)
|
||||
|
||||
ticker_symbol = st.text_input(
|
||||
"Enter Ticker Symbol for TA:",
|
||||
placeholder="Enter a valid Ticker Symbol (Examples: IBM, BABA, HDFCBANK.NS, TATAMOTORS.NS etc)",
|
||||
help="Be sure of the ticker symbol. Double-check it! Examples: IBM, BABA, HDFCBANK.NS, TATAMOTORS.NS"
|
||||
)
|
||||
|
||||
if st.button("Generate TA Report"):
|
||||
if ticker_symbol:
|
||||
with st.spinner("Generating TA Report..."):
|
||||
try:
|
||||
# Get dashboard instance and generate technical analysis
|
||||
dashboard = get_dashboard()
|
||||
ta_report = dashboard.generate_technical_analysis(ticker_symbol)
|
||||
st.success(f"Successfully generated TA report for: {ticker_symbol}")
|
||||
st.markdown(ta_report)
|
||||
except Exception as err:
|
||||
st.error(f"🚫 Check ticker symbol: Failed to write Financial Technical Analysis. Error: {err}")
|
||||
else:
|
||||
st.error("🚫 Provide a valid Ticker Symbol. Don't waste my time.")
|
||||
|
||||
def ai_social_writer():
|
||||
# Define social media platforms as radio buttons
|
||||
social_media_options = [
|
||||
("facebook", "Facebook"),
|
||||
("linkedin", "LinkedIn"),
|
||||
("twitter", "Twitter"),
|
||||
("instagram", "Instagram"),
|
||||
("youtube", "YouTube")
|
||||
]
|
||||
|
||||
# Selectbox for choosing a platform
|
||||
selected_platform = st.radio("Choose a Social Media Platform:", social_media_options, format_func=lambda x: x[1])
|
||||
if "facebook" in selected_platform:
|
||||
facebook_main_menu()
|
||||
elif "linkedin" in selected_platform:
|
||||
linkedin_main_menu()
|
||||
elif "twitter" in selected_platform:
|
||||
run_dashboard()
|
||||
elif "instagram" in selected_platform:
|
||||
insta_writer()
|
||||
elif "youtube" in selected_platform:
|
||||
youtube_main_menu()
|
||||
54
ToBeMigrated/utils/api_key_manager/__init__.py
Normal file
54
ToBeMigrated/utils/api_key_manager/__init__.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""API Key Manager package for ALwrity."""
|
||||
|
||||
from .manager import APIKeyManager
|
||||
from .api_key_manager import render, check_onboarding_completion, get_onboarding_status, reset_onboarding
|
||||
from .onboarding_progress import (
|
||||
OnboardingProgress,
|
||||
get_onboarding_progress,
|
||||
render_progress_indicator,
|
||||
render_resume_message,
|
||||
StepStatus,
|
||||
StepData
|
||||
)
|
||||
from .validation import check_all_api_keys
|
||||
from .components.base import (
|
||||
render_step_indicator,
|
||||
render_navigation_buttons,
|
||||
render_step_validation,
|
||||
render_resume_options
|
||||
)
|
||||
|
||||
# Export all public components
|
||||
__all__ = [
|
||||
# Main classes
|
||||
'APIKeyManager',
|
||||
'OnboardingProgress',
|
||||
'StepStatus',
|
||||
'StepData',
|
||||
|
||||
# Main functions
|
||||
'render',
|
||||
'check_onboarding_completion',
|
||||
'get_onboarding_status',
|
||||
'reset_onboarding',
|
||||
'get_onboarding_progress',
|
||||
|
||||
# UI components
|
||||
'render_progress_indicator',
|
||||
'render_resume_message',
|
||||
'render_step_indicator',
|
||||
'render_navigation_buttons',
|
||||
'render_step_validation',
|
||||
'render_resume_options',
|
||||
|
||||
# Validation
|
||||
'check_all_api_keys'
|
||||
]
|
||||
|
||||
# Version information
|
||||
__version__ = "2.0.0"
|
||||
__author__ = "ALwrity Team"
|
||||
__description__ = "Comprehensive API key management and onboarding system for ALwrity"
|
||||
|
||||
# Note: FastAPI endpoints have been moved to the backend/ directory
|
||||
# for better separation of concerns and enterprise architecture.
|
||||
42
ToBeMigrated/utils/api_key_manager/ai_research.py
Normal file
42
ToBeMigrated/utils/api_key_manager/ai_research.py
Normal file
@@ -0,0 +1,42 @@
|
||||
"""AI research functionality for API key manager."""
|
||||
|
||||
from loguru import logger
|
||||
import asyncio
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
async def research_topic(topic: str, api_keys: Dict[str, str]) -> Dict[str, Any]:
|
||||
"""
|
||||
Research a topic using available AI services.
|
||||
|
||||
Args:
|
||||
topic (str): The topic to research
|
||||
api_keys (Dict[str, str]): Dictionary of API keys for different services
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Research results and metadata
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Starting research on topic: {topic}")
|
||||
|
||||
# TODO: Implement actual research functionality using available API keys
|
||||
# This is a placeholder implementation
|
||||
results = {
|
||||
"topic": topic,
|
||||
"status": "success",
|
||||
"data": {
|
||||
"summary": f"Research summary for {topic}",
|
||||
"key_points": ["Point 1", "Point 2", "Point 3"],
|
||||
"sources": ["Source 1", "Source 2"]
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("Research completed successfully")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during research: {str(e)}")
|
||||
return {
|
||||
"topic": topic,
|
||||
"status": "error",
|
||||
"error": str(e)
|
||||
}
|
||||
178
ToBeMigrated/utils/api_key_manager/components/README.md
Normal file
178
ToBeMigrated/utils/api_key_manager/components/README.md
Normal file
@@ -0,0 +1,178 @@
|
||||
# ALwrity Setup Components Guide
|
||||
|
||||
## Overview
|
||||
|
||||
The ALwrity Setup Components are the building blocks that guide you through setting up your content creation environment. Each component is designed to help you configure specific aspects of ALwrity for optimal content creation.
|
||||
|
||||
## Core Components
|
||||
|
||||
### 1. Website Setup (`website_setup.py`)
|
||||
**Purpose**: Configure your website's basic information and analyze its current state
|
||||
|
||||
**Features**:
|
||||
- **URL Configuration**: Set up your website's URL
|
||||
- **Analysis Options**:
|
||||
- Basic Analysis: Quick overview of your website
|
||||
- Full Analysis with SEO: Comprehensive website and SEO analysis
|
||||
- **Analysis Results**:
|
||||
- Basic Metrics: Status, content type, title, meta description
|
||||
- Content Analysis: Word count, headings, images, links
|
||||
- SEO Analysis: SEO score, meta tags, content quality
|
||||
- Technical SEO: Mobile friendliness, page speed, technical issues
|
||||
- Strategy Recommendations: Actionable improvements
|
||||
|
||||
### 2. AI Research Setup (`ai_research_setup.py`)
|
||||
**Purpose**: Configure AI-powered research tools for content creation
|
||||
|
||||
**Features**:
|
||||
- **Traditional Search**:
|
||||
- SerpAPI integration for real-time search results
|
||||
- Access to structured data and knowledge graphs
|
||||
- News articles and related questions
|
||||
|
||||
- **AI Deep Research**:
|
||||
- Tavily AI for semantic understanding
|
||||
- Metaphor/Exa for neural search capabilities
|
||||
- Advanced research features
|
||||
|
||||
### 3. AI Providers (`ai_providers.py`)
|
||||
**Purpose**: Set up your preferred AI content generation services
|
||||
|
||||
**Supported Providers**:
|
||||
- **OpenAI (GPT models)**
|
||||
- Advanced language models
|
||||
- Creative content generation
|
||||
- Context-aware responses
|
||||
|
||||
- **Google (Gemini Pro)**
|
||||
- Balanced content creation
|
||||
- Factual accuracy
|
||||
- Multilingual support
|
||||
|
||||
- **Anthropic (Claude)**
|
||||
- Professional writing
|
||||
- Detailed analysis
|
||||
- Ethical considerations
|
||||
|
||||
- **DeepSeek**
|
||||
- Technical content
|
||||
- Specialized knowledge
|
||||
- Efficient processing
|
||||
|
||||
### 4. Personalization Setup (`personalization_setup.py`)
|
||||
**Purpose**: Customize your content creation experience
|
||||
|
||||
**Features**:
|
||||
- **Writing Style**:
|
||||
- Tone preferences
|
||||
- Voice settings
|
||||
- Content structure
|
||||
|
||||
- **Brand Configuration**:
|
||||
- Brand voice
|
||||
- Style guidelines
|
||||
- Content templates
|
||||
|
||||
### 5. ALwrity Integrations (`alwrity_integrations.py`)
|
||||
**Purpose**: Connect additional tools and services
|
||||
|
||||
**Features**:
|
||||
- **Third-party Services**:
|
||||
- Analytics integration
|
||||
- Social media tools
|
||||
- Content management systems
|
||||
|
||||
- **Workflow Automation**:
|
||||
- Publishing tools
|
||||
- Content scheduling
|
||||
- Distribution channels
|
||||
|
||||
### 6. Final Setup (`final_setup.py`)
|
||||
**Purpose**: Complete and verify your configuration
|
||||
|
||||
**Features**:
|
||||
- **Configuration Review**:
|
||||
- Settings verification
|
||||
- Connection testing
|
||||
- Setup completion
|
||||
|
||||
- **Validation**:
|
||||
- API key verification
|
||||
- Service connectivity
|
||||
- System readiness
|
||||
|
||||
## Base Components
|
||||
|
||||
### 1. Navigation (`base.py`)
|
||||
**Purpose**: Provide consistent navigation throughout the setup process
|
||||
|
||||
**Features**:
|
||||
- Step indicators
|
||||
- Navigation buttons
|
||||
- Progress tracking
|
||||
- Back/forward controls
|
||||
|
||||
## How to Use the Components
|
||||
|
||||
### 1. Starting the Setup
|
||||
1. Launch ALwrity
|
||||
2. Navigate to the Setup section
|
||||
3. Follow the guided wizard process
|
||||
|
||||
### 2. Component Navigation
|
||||
- Use the step indicator to track progress
|
||||
- Navigate between components using buttons
|
||||
- Save progress automatically
|
||||
- Return to previous steps if needed
|
||||
|
||||
### 3. Configuration Process
|
||||
1. **Enter Information**: Fill in required details
|
||||
2. **Verify Settings**: Review your inputs
|
||||
3. **Test Connections**: Ensure everything works
|
||||
4. **Complete Setup**: Finalize your configuration
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Before Setup
|
||||
- Gather all necessary API keys
|
||||
- Review provider documentation
|
||||
- Plan your configuration
|
||||
- Backup existing settings
|
||||
|
||||
### 2. During Setup
|
||||
- Follow the wizard steps
|
||||
- Verify each configuration
|
||||
- Test connections
|
||||
- Save progress regularly
|
||||
|
||||
### 3. After Setup
|
||||
- Review all settings
|
||||
- Test functionality
|
||||
- Document configurations
|
||||
- Monitor usage
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### 1. Common Issues
|
||||
- Invalid API keys
|
||||
- Connection problems
|
||||
- Configuration errors
|
||||
- Setup interruptions
|
||||
|
||||
### 2. Solutions
|
||||
- Key verification
|
||||
- Connection testing
|
||||
- Error logging
|
||||
- Support resources
|
||||
|
||||
## Need Help?
|
||||
|
||||
If you encounter any issues during setup:
|
||||
1. Check the error messages
|
||||
2. Review the documentation
|
||||
3. Verify your API keys
|
||||
4. Contact ALwrity support
|
||||
|
||||
---
|
||||
|
||||
*Note: Each component is designed to help you set up a specific aspect of ALwrity. Follow the setup wizard in order to ensure all components are properly configured for optimal content creation.*
|
||||
22
ToBeMigrated/utils/api_key_manager/components/__init__.py
Normal file
22
ToBeMigrated/utils/api_key_manager/components/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""API key manager components package."""
|
||||
|
||||
from .ai_research_setup import render_ai_research_setup
|
||||
from .ai_research import render_ai_research
|
||||
from .ai_providers import render_ai_providers
|
||||
from .final_setup import render_final_setup
|
||||
from .personalization_setup import render_personalization_setup
|
||||
from .alwrity_integrations import render_alwrity_integrations
|
||||
from .base import render_navigation_buttons, render_step_indicator
|
||||
from .website_setup import render_website_setup
|
||||
|
||||
__all__ = [
|
||||
'render_ai_research_setup',
|
||||
'render_ai_research',
|
||||
'render_ai_providers',
|
||||
'render_final_setup',
|
||||
'render_personalization_setup',
|
||||
'render_alwrity_integrations',
|
||||
'render_navigation_buttons',
|
||||
'render_step_indicator',
|
||||
'render_website_setup'
|
||||
]
|
||||
137
ToBeMigrated/utils/api_key_manager/components/ai_research.py
Normal file
137
ToBeMigrated/utils/api_key_manager/components/ai_research.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""AI Research setup component."""
|
||||
|
||||
import streamlit as st
|
||||
from typing import Dict, Any
|
||||
from loguru import logger
|
||||
from ..manager import APIKeyManager
|
||||
from .base import render_navigation_buttons, render_step_indicator
|
||||
|
||||
def render_ai_research(api_key_manager: APIKeyManager) -> Dict[str, Any]:
|
||||
"""Render the AI Research setup step."""
|
||||
try:
|
||||
st.markdown("""
|
||||
<div class='setup-header'>
|
||||
<h2>🔍 AI Research Configuration</h2>
|
||||
<p>Configure your research preferences and provide user information</p>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Create tabs for different sections
|
||||
tabs = st.tabs(["User Information", "Research Preferences"])
|
||||
|
||||
changes_made = False
|
||||
has_valid_info = False
|
||||
validation_message = ""
|
||||
|
||||
with tabs[0]:
|
||||
st.markdown("### User Information")
|
||||
st.markdown("Please provide your details for personalized research experience")
|
||||
|
||||
# User Information Card
|
||||
with st.container():
|
||||
st.markdown("""
|
||||
<div class="user-info-card">
|
||||
<div class="user-info-header">
|
||||
<div class="user-info-icon">👤</div>
|
||||
<div class="user-info-title">Personal Details</div>
|
||||
</div>
|
||||
<div class="user-info-content">
|
||||
<p>Your information helps us customize the research experience.</p>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# User Input Fields with Streamlit Components
|
||||
full_name = st.text_input("Full Name", key="full_name",
|
||||
help="Enter your full name as you'd like it to appear")
|
||||
|
||||
email = st.text_input("Email Address", key="email",
|
||||
help="Enter your business email address")
|
||||
|
||||
company = st.text_input("Company/Organization", key="company",
|
||||
help="Enter your company or organization name")
|
||||
|
||||
role = st.selectbox("Role",
|
||||
["Content Creator", "Marketing Manager", "Business Owner", "Other"],
|
||||
help="Select your primary role")
|
||||
|
||||
with tabs[1]:
|
||||
st.markdown("### Research Preferences")
|
||||
st.markdown("Configure how AI assists with your research")
|
||||
|
||||
# Research Preferences Card
|
||||
with st.container():
|
||||
st.markdown("""
|
||||
<div class="research-prefs-card">
|
||||
<div class="research-prefs-header">
|
||||
<div class="research-prefs-icon">🎯</div>
|
||||
<div class="research-prefs-title">Research Settings</div>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Research Preferences Settings
|
||||
research_depth = st.select_slider(
|
||||
"Research Depth",
|
||||
options=["Basic", "Standard", "Deep", "Comprehensive"],
|
||||
value="Standard",
|
||||
help="Choose how detailed you want the AI research to be"
|
||||
)
|
||||
|
||||
st.markdown("#### Content Types")
|
||||
content_types = st.multiselect(
|
||||
"Select content types to focus on",
|
||||
["Blog Posts", "Social Media", "Technical Articles", "News", "Academic Papers"],
|
||||
default=["Blog Posts", "Social Media"],
|
||||
help="Choose what types of content you want to research"
|
||||
)
|
||||
|
||||
auto_research = st.toggle(
|
||||
"Enable Automated Research",
|
||||
help="Automatically start research when content topics are added"
|
||||
)
|
||||
|
||||
# Validate inputs
|
||||
if all([full_name, email, company]):
|
||||
changes_made = True
|
||||
has_valid_info = True
|
||||
validation_message = "✅ User information completed successfully"
|
||||
else:
|
||||
validation_message = "⚠️ Please fill in all required fields to continue"
|
||||
|
||||
# Display validation message
|
||||
if validation_message:
|
||||
if "✅" in validation_message:
|
||||
st.success(validation_message)
|
||||
else:
|
||||
st.warning(validation_message)
|
||||
|
||||
# Navigation buttons
|
||||
if render_navigation_buttons(3, 6, changes_made):
|
||||
if has_valid_info:
|
||||
# Store user information in session state
|
||||
st.session_state['user_info'] = {
|
||||
'full_name': full_name,
|
||||
'email': email,
|
||||
'company': company,
|
||||
'role': role,
|
||||
'research_preferences': {
|
||||
'depth': research_depth,
|
||||
'content_types': content_types,
|
||||
'auto_research': auto_research
|
||||
}
|
||||
}
|
||||
|
||||
# Update progress and move to next step
|
||||
st.session_state['current_step'] = 4
|
||||
st.rerun()
|
||||
else:
|
||||
st.error("Please complete all required fields to continue")
|
||||
|
||||
return {"current_step": 3, "changes_made": changes_made}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in AI research setup: {str(e)}"
|
||||
logger.error(f"[render_ai_research] {error_msg}")
|
||||
st.error(error_msg)
|
||||
return {"current_step": 3, "error": error_msg}
|
||||
188
ToBeMigrated/utils/api_key_manager/components/personalization.py
Normal file
188
ToBeMigrated/utils/api_key_manager/components/personalization.py
Normal file
@@ -0,0 +1,188 @@
|
||||
"""Personalization setup component."""
|
||||
|
||||
import streamlit as st
|
||||
from typing import Dict, Any
|
||||
from loguru import logger
|
||||
from ..manager import APIKeyManager
|
||||
from .base import render_navigation_buttons, render_step_indicator
|
||||
|
||||
def render_personalization(api_key_manager: APIKeyManager) -> Dict[str, Any]:
|
||||
"""Render the personalization setup step."""
|
||||
try:
|
||||
st.markdown("""
|
||||
<div class='setup-header'>
|
||||
<h2>🎨 Personalization Settings</h2>
|
||||
<p>Customize your content generation experience</p>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Create tabs for different sections
|
||||
tabs = st.tabs(["Content Style", "Brand Voice", "Advanced Settings"])
|
||||
|
||||
changes_made = False
|
||||
has_valid_settings = False
|
||||
validation_message = ""
|
||||
|
||||
with tabs[0]:
|
||||
st.markdown("### Content Style")
|
||||
st.markdown("Define your preferred content style and tone")
|
||||
|
||||
# Content Style Card
|
||||
with st.container():
|
||||
st.markdown("""
|
||||
<div class="style-card">
|
||||
<div class="style-header">
|
||||
<div class="style-icon">✨</div>
|
||||
<div class="style-title">Writing Style</div>
|
||||
</div>
|
||||
<div class="style-content">
|
||||
<p>Choose how you want your content to be written.</p>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Style Settings
|
||||
writing_style = st.selectbox(
|
||||
"Writing Style",
|
||||
["Professional", "Casual", "Technical", "Conversational", "Academic"],
|
||||
help="Select your preferred writing style"
|
||||
)
|
||||
|
||||
tone = st.select_slider(
|
||||
"Content Tone",
|
||||
options=["Formal", "Semi-Formal", "Neutral", "Friendly", "Humorous"],
|
||||
value="Neutral",
|
||||
help="Choose the tone for your content"
|
||||
)
|
||||
|
||||
content_length = st.select_slider(
|
||||
"Content Length",
|
||||
options=["Concise", "Standard", "Detailed", "Comprehensive"],
|
||||
value="Standard",
|
||||
help="Select your preferred content length"
|
||||
)
|
||||
|
||||
with tabs[1]:
|
||||
st.markdown("### Brand Voice")
|
||||
st.markdown("Configure your brand's unique voice and personality")
|
||||
|
||||
# Brand Voice Card
|
||||
with st.container():
|
||||
st.markdown("""
|
||||
<div class="brand-card">
|
||||
<div class="brand-header">
|
||||
<div class="brand-icon">🎯</div>
|
||||
<div class="brand-title">Brand Identity</div>
|
||||
</div>
|
||||
<div class="brand-content">
|
||||
<p>Define your brand's personality and voice.</p>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Brand Settings
|
||||
brand_personality = st.multiselect(
|
||||
"Brand Personality Traits",
|
||||
["Professional", "Innovative", "Friendly", "Trustworthy", "Creative", "Expert"],
|
||||
default=["Professional", "Trustworthy"],
|
||||
help="Select traits that best describe your brand"
|
||||
)
|
||||
|
||||
brand_voice = st.text_area(
|
||||
"Brand Voice Description",
|
||||
help="Describe how your brand should sound in content"
|
||||
)
|
||||
|
||||
keywords = st.text_input(
|
||||
"Brand Keywords",
|
||||
help="Enter key terms that should be used in your content"
|
||||
)
|
||||
|
||||
with tabs[2]:
|
||||
st.markdown("### Advanced Settings")
|
||||
st.markdown("Fine-tune your content generation preferences")
|
||||
|
||||
# Advanced Settings Card
|
||||
with st.container():
|
||||
st.markdown("""
|
||||
<div class="advanced-card">
|
||||
<div class="advanced-header">
|
||||
<div class="advanced-icon">⚙️</div>
|
||||
<div class="advanced-title">Advanced Options</div>
|
||||
</div>
|
||||
<div class="advanced-content">
|
||||
<p>Configure advanced content generation settings.</p>
|
||||
</div>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Advanced Settings
|
||||
seo_optimization = st.toggle(
|
||||
"Enable SEO Optimization",
|
||||
help="Automatically optimize content for search engines"
|
||||
)
|
||||
|
||||
readability_level = st.select_slider(
|
||||
"Readability Level",
|
||||
options=["Simple", "Standard", "Advanced", "Expert"],
|
||||
value="Standard",
|
||||
help="Choose the complexity level of your content"
|
||||
)
|
||||
|
||||
content_structure = st.multiselect(
|
||||
"Content Structure",
|
||||
["Introduction", "Key Points", "Examples", "Conclusion", "Call-to-Action"],
|
||||
default=["Introduction", "Key Points", "Conclusion"],
|
||||
help="Select required content sections"
|
||||
)
|
||||
|
||||
# Validate settings
|
||||
if all([writing_style, tone, content_length, brand_personality]):
|
||||
changes_made = True
|
||||
has_valid_settings = True
|
||||
validation_message = "✅ Personalization settings completed successfully"
|
||||
else:
|
||||
validation_message = "⚠️ Please complete all required settings to continue"
|
||||
|
||||
# Display validation message
|
||||
if validation_message:
|
||||
if "✅" in validation_message:
|
||||
st.success(validation_message)
|
||||
else:
|
||||
st.warning(validation_message)
|
||||
|
||||
# Navigation buttons
|
||||
if render_navigation_buttons(4, 6, changes_made):
|
||||
if has_valid_settings:
|
||||
# Store personalization settings in session state
|
||||
st.session_state['personalization'] = {
|
||||
'content_style': {
|
||||
'writing_style': writing_style,
|
||||
'tone': tone,
|
||||
'content_length': content_length
|
||||
},
|
||||
'brand_voice': {
|
||||
'personality': brand_personality,
|
||||
'voice_description': brand_voice,
|
||||
'keywords': keywords
|
||||
},
|
||||
'advanced_settings': {
|
||||
'seo_optimization': seo_optimization,
|
||||
'readability_level': readability_level,
|
||||
'content_structure': content_structure
|
||||
}
|
||||
}
|
||||
|
||||
# Update progress and move to next step
|
||||
st.session_state['current_step'] = 5
|
||||
st.rerun()
|
||||
else:
|
||||
st.error("Please complete all required settings to continue")
|
||||
|
||||
return {"current_step": 4, "changes_made": changes_made}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in personalization setup: {str(e)}"
|
||||
logger.error(f"[render_personalization] {error_msg}")
|
||||
st.error(error_msg)
|
||||
return {"current_step": 4, "error": error_msg}
|
||||
79
ToBeMigrated/utils/content_generators.py
Normal file
79
ToBeMigrated/utils/content_generators.py
Normal file
@@ -0,0 +1,79 @@
|
||||
import streamlit as st
|
||||
|
||||
from lib.alwrity_ui.similar_analysis import competitor_analysis
|
||||
from lib.alwrity_ui.keyword_web_researcher import do_web_research
|
||||
|
||||
|
||||
def content_planning_tools():
|
||||
# A custom CSS for compact layout
|
||||
st.markdown("""
|
||||
<style>
|
||||
/* Reduce top padding of main container */
|
||||
.main .block-container {
|
||||
padding-top: 0rem !important;
|
||||
padding-bottom: 1rem !important;
|
||||
}
|
||||
|
||||
/* Reduce spacing between elements */
|
||||
.stTabs {
|
||||
margin-top: 0.5rem !important;
|
||||
}
|
||||
|
||||
/* Make markdown text more compact */
|
||||
.element-container {
|
||||
margin-bottom: 0.5rem !important;
|
||||
}
|
||||
|
||||
/* Adjust subheader margins */
|
||||
.stMarkdown h3 {
|
||||
margin-top: 0 !important;
|
||||
margin-bottom: 0.5rem !important;
|
||||
}
|
||||
</style>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Make description more compact using a smaller font
|
||||
st.markdown("""
|
||||
<div style='font-size: 0.9em; margin-bottom: 0.5rem;'>
|
||||
<strong>Alwrity content Ideation & Planning</strong>: Provide few keywords to do comprehensive web research.
|
||||
Provide few keywords to get Google, Neural, pytrends analysis. Know keywords, blog titles to target.
|
||||
Generate months long content calendar around given keywords.
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Create tabs with reduced spacing
|
||||
tab_keywords, tab_competitor, tab_calendar = st.tabs([
|
||||
"🔍 Keywords Researcher",
|
||||
"📊 Competitor Analysis",
|
||||
"📅 Content Calendar Ideator"
|
||||
])
|
||||
|
||||
# Keywords Researcher tab
|
||||
with tab_keywords:
|
||||
do_web_research()
|
||||
|
||||
# Competitor Analysis tab
|
||||
with tab_competitor:
|
||||
competitor_analysis()
|
||||
|
||||
# Content Calendar Ideator tab
|
||||
with tab_calendar:
|
||||
st.info("🚧 **Content Calendar & Planning Dashboard**")
|
||||
st.markdown("""
|
||||
<div style='background-color: #f0f2f6; padding: 15px; border-radius: 5px; margin-bottom: 20px;'>
|
||||
<h3 style='margin-top: 0;'>📅 Content Calendar & Planning Dashboard</h3>
|
||||
<p>The Content Calendar Dashboard provides:</p>
|
||||
<ul>
|
||||
<li>AI-powered content planning and generation</li>
|
||||
<li>Multi-platform content scheduling</li>
|
||||
<li>Content optimization tools</li>
|
||||
<li>A/B testing capabilities</li>
|
||||
<li>Performance analytics</li>
|
||||
</ul>
|
||||
</div>
|
||||
""", unsafe_allow_html=True)
|
||||
|
||||
# Initialize and render the dashboard directly
|
||||
from lib.ai_seo_tools.content_calendar.ui.dashboard import ContentCalendarDashboard
|
||||
dashboard = ContentCalendarDashboard()
|
||||
dashboard.render()
|
||||
113
ToBeMigrated/utils/take_url_screenshot.py
Normal file
113
ToBeMigrated/utils/take_url_screenshot.py
Normal file
@@ -0,0 +1,113 @@
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import subprocess
|
||||
|
||||
from time import sleep
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from PIL import Image
|
||||
|
||||
from selenium import webdriver
|
||||
from PIL import Image
|
||||
import shutil
|
||||
from screenshotone import Client, TakeOptions
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../.env'))
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def screenshot_api(url, generated_image_filepath):
|
||||
""" Use screenshotone API to take company webpage screenshots """
|
||||
try:
|
||||
# create API client
|
||||
client = Client(os.getenv('SCREENSHOTONE_ACCESS_KEY'), os.getenv('SCREENSHOTONE_SECRET_KEY'))
|
||||
|
||||
# set up options
|
||||
options = (TakeOptions.url(url)
|
||||
.format("png")
|
||||
.viewport_width(1024)
|
||||
.viewport_height(768)
|
||||
.block_cookie_banners(True)
|
||||
.block_chats(True))
|
||||
|
||||
# generate the screenshot URL and share it with a user
|
||||
#url = client.generate_take_url(options)
|
||||
# or render a screenshot and download the image as stream
|
||||
image = client.take(options)
|
||||
|
||||
# store the screenshot the example.png file
|
||||
with open(generated_image_filepath, 'wb', encoding="utf-8") as result_file:
|
||||
shutil.copyfileobj(image, result_file)
|
||||
|
||||
# Display the screenshot using Image.show
|
||||
image = Image.open(generated_image_filepath)
|
||||
image.show()
|
||||
# Wait for 2 seconds (adjust the delay as needed)
|
||||
sleep(2)
|
||||
# Close the image window
|
||||
image.close()
|
||||
|
||||
except Exception as err:
|
||||
print(f"Failed in screenshotone api: {err}")
|
||||
generated_image_filepath = take_screenshot(url, generated_image_filepath)
|
||||
|
||||
return generated_image_filepath
|
||||
|
||||
|
||||
def take_screenshot(url, generated_image_filepath):
|
||||
# Create a webdriver instance in headless mode
|
||||
options = webdriver.ChromeOptions()
|
||||
options.add_argument("--headless")
|
||||
driver = webdriver.Chrome(options=options)
|
||||
logger.debug(f"Taking screenshot of url: {url}")
|
||||
|
||||
try:
|
||||
# Navigate to the given url
|
||||
driver.get(url)
|
||||
|
||||
# Optionally, increase the delay to ensure all content is loaded
|
||||
sleep(2)
|
||||
|
||||
# Explicitly wait for the page to load (adjust timeout as needed)
|
||||
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
|
||||
|
||||
# Set a larger window size
|
||||
driver.set_window_size(1200, 800)
|
||||
|
||||
# Take a screenshot of the webpage
|
||||
screenshot = driver.get_screenshot_as_png()
|
||||
|
||||
# Save the screenshot to a file
|
||||
with open(generated_image_filepath, "wb", encoding="utf-8") as f:
|
||||
f.write(screenshot)
|
||||
|
||||
# Display the screenshot using Image.show
|
||||
image = Image.open(generated_image_filepath)
|
||||
image.show()
|
||||
# Wait for 2 seconds (adjust the delay as needed)
|
||||
sleep(2)
|
||||
|
||||
# Close the image window using subprocess (platform-dependent)
|
||||
subprocess.run(["pkill", "-f", "display"]) # Adjust based on your platform and viewer
|
||||
|
||||
# If using macOS, you can use the following:
|
||||
# subprocess.run(["osascript", "-e", 'tell application "Preview" to close every window'])
|
||||
# If using Windows, you can use the following:
|
||||
# subprocess.run(["taskkill", "/F", "/IM", "Microsoft.Photos.exe"])
|
||||
|
||||
logger.debug(f"Screenshot successfully stored at: {generated_image_filepath}")
|
||||
return generated_image_filepath
|
||||
finally:
|
||||
# Close the webdriver instance
|
||||
driver.quit()
|
||||
310
ToBeMigrated/utils/test_config_settings.py
Normal file
310
ToBeMigrated/utils/test_config_settings.py
Normal file
@@ -0,0 +1,310 @@
|
||||
"""Test configuration settings page for ALwrity."""
|
||||
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
import asyncio
|
||||
from lib.web_crawlers.async_web_crawler import AsyncWebCrawlerService
|
||||
from pages.style_utils import (
|
||||
get_test_config_styles,
|
||||
get_glass_container,
|
||||
get_info_section,
|
||||
get_example_box,
|
||||
get_analysis_section,
|
||||
get_style_guide_html
|
||||
)
|
||||
import sys
|
||||
from lib.personalization.style_analyzer import StyleAnalyzer
|
||||
|
||||
# Set page config - must be the first Streamlit command
|
||||
st.set_page_config(
|
||||
layout="wide",
|
||||
initial_sidebar_state="collapsed",
|
||||
menu_items={
|
||||
'Get Help': None,
|
||||
'Report a bug': None,
|
||||
'About': None
|
||||
}
|
||||
)
|
||||
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
import os
|
||||
from loguru import logger
|
||||
from lib.utils.read_main_config_params import get_personalization_settings
|
||||
from lib.web_crawlers.crawl4ai_web_crawler import analyze_style
|
||||
|
||||
# Configure logger
|
||||
logger.remove() # Remove default handler
|
||||
logger.add(
|
||||
"logs/test_config_settings.log",
|
||||
rotation="500 MB",
|
||||
retention="10 days",
|
||||
level="DEBUG",
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}",
|
||||
backtrace=True,
|
||||
diagnose=True
|
||||
)
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level="INFO",
|
||||
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
|
||||
)
|
||||
|
||||
# Apply CSS styles
|
||||
st.markdown(get_test_config_styles(), unsafe_allow_html=True)
|
||||
|
||||
def load_website_url():
|
||||
"""Load website URL from config file."""
|
||||
try:
|
||||
logger.debug("Loading website URL from config file")
|
||||
config_path = Path(os.environ["ALWRITY_CONFIG"])
|
||||
config = yaml.safe_load(config_path.read_text())
|
||||
url = config.get('website', {}).get('url', '')
|
||||
logger.info(f"Loaded website URL: {url}")
|
||||
return url
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading website URL: {str(e)}", exc_info=True)
|
||||
return ''
|
||||
|
||||
def display_style_analysis(analysis_results: dict):
|
||||
"""Display the style analysis results in a structured format."""
|
||||
try:
|
||||
# Writing Style Section
|
||||
st.markdown("### 🎨 Writing Style Analysis")
|
||||
writing_style = analysis_results.get("writing_style", {})
|
||||
writing_style_content = f"""
|
||||
<ul>
|
||||
<li><strong>Tone:</strong> {writing_style.get("tone", "N/A")}</li>
|
||||
<li><strong>Voice:</strong> {writing_style.get("voice", "N/A")}</li>
|
||||
<li><strong>Complexity:</strong> {writing_style.get("complexity", "N/A")}</li>
|
||||
<li><strong>Engagement Level:</strong> {writing_style.get("engagement_level", "N/A")}</li>
|
||||
</ul>
|
||||
"""
|
||||
st.markdown(get_analysis_section("Writing Style", writing_style_content), unsafe_allow_html=True)
|
||||
|
||||
# Content Characteristics Section
|
||||
content_chars = analysis_results.get("content_characteristics", {})
|
||||
content_chars_content = f"""
|
||||
<ul>
|
||||
<li><strong>Sentence Structure:</strong> {content_chars.get("sentence_structure", "N/A")}</li>
|
||||
<li><strong>Vocabulary Level:</strong> {content_chars.get("vocabulary_level", "N/A")}</li>
|
||||
<li><strong>Paragraph Organization:</strong> {content_chars.get("paragraph_organization", "N/A")}</li>
|
||||
<li><strong>Content Flow:</strong> {content_chars.get("content_flow", "N/A")}</li>
|
||||
</ul>
|
||||
"""
|
||||
st.markdown(get_analysis_section("Content Characteristics", content_chars_content), unsafe_allow_html=True)
|
||||
|
||||
# Target Audience Section
|
||||
target_audience = analysis_results.get("target_audience", {})
|
||||
target_audience_content = f"""
|
||||
<ul>
|
||||
<li><strong>Demographics:</strong> {', '.join(target_audience.get("demographics", ["N/A"]))}</li>
|
||||
<li><strong>Expertise Level:</strong> {target_audience.get("expertise_level", "N/A")}</li>
|
||||
<li><strong>Industry Focus:</strong> {target_audience.get("industry_focus", "N/A")}</li>
|
||||
<li><strong>Geographic Focus:</strong> {target_audience.get("geographic_focus", "N/A")}</li>
|
||||
</ul>
|
||||
"""
|
||||
st.markdown(get_analysis_section("Target Audience", target_audience_content), unsafe_allow_html=True)
|
||||
|
||||
# Content Type Section
|
||||
content_type = analysis_results.get("content_type", {})
|
||||
content_type_content = f"""
|
||||
<ul>
|
||||
<li><strong>Primary Type:</strong> {content_type.get("primary_type", "N/A")}</li>
|
||||
<li><strong>Secondary Types:</strong> {', '.join(content_type.get("secondary_types", ["N/A"]))}</li>
|
||||
<li><strong>Purpose:</strong> {content_type.get("purpose", "N/A")}</li>
|
||||
<li><strong>Call to Action:</strong> {content_type.get("call_to_action", "N/A")}</li>
|
||||
</ul>
|
||||
"""
|
||||
st.markdown(get_analysis_section("Content Type", content_type_content), unsafe_allow_html=True)
|
||||
|
||||
# Recommended Settings Section
|
||||
recommended = analysis_results.get("recommended_settings", {})
|
||||
recommended_content = f"""
|
||||
<ul>
|
||||
<li><strong>Writing Tone:</strong> {recommended.get("writing_tone", "N/A")}</li>
|
||||
<li><strong>Target Audience:</strong> {recommended.get("target_audience", "N/A")}</li>
|
||||
<li><strong>Content Type:</strong> {recommended.get("content_type", "N/A")}</li>
|
||||
<li><strong>Creativity Level:</strong> {recommended.get("creativity_level", "N/A")}</li>
|
||||
<li><strong>Geographic Location:</strong> {recommended.get("geographic_location", "N/A")}</li>
|
||||
</ul>
|
||||
"""
|
||||
st.markdown(get_analysis_section("Recommended Settings", recommended_content), unsafe_allow_html=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error displaying style analysis: {str(e)}")
|
||||
st.error(f"Error displaying analysis results: {str(e)}")
|
||||
|
||||
def render_test_config_settings():
|
||||
"""Render the test configuration settings page."""
|
||||
try:
|
||||
logger.info("Starting to render test configuration settings")
|
||||
|
||||
# Add back button at the top
|
||||
col1, col2 = st.columns([1, 3])
|
||||
with col1:
|
||||
if st.button("← Back to Personalization Setup"):
|
||||
logger.info("User clicked back to personalization setup")
|
||||
# Set session state for navigation
|
||||
st.session_state.current_step = 4
|
||||
st.session_state.next_step = "personalization_setup"
|
||||
# Navigate back to the main page where personalization setup is rendered
|
||||
st.switch_page("alwrity.py")
|
||||
|
||||
# Title and description
|
||||
st.title("🎨 Find Your Style with ALwrity")
|
||||
st.markdown(get_glass_container(
|
||||
"<p>Enter a website URL or provide content samples to analyze your writing style and get personalized recommendations.</p>"
|
||||
), unsafe_allow_html=True)
|
||||
|
||||
# Create two columns for the layout
|
||||
col1, col2 = st.columns([2, 1])
|
||||
|
||||
with col1:
|
||||
# Website URL input
|
||||
st.markdown("### Website URL")
|
||||
url = st.text_input(
|
||||
"Enter your website URL",
|
||||
placeholder="https://example.com",
|
||||
help="Provide your website URL to analyze your content style. Leave empty if you want to provide written samples instead."
|
||||
)
|
||||
logger.debug(f"Website URL input value: {url}")
|
||||
|
||||
# Alternative: Written samples
|
||||
if not url:
|
||||
st.markdown("### Written Samples")
|
||||
st.markdown(get_info_section("""
|
||||
<p>No website URL? No problem! You can provide written samples of your content instead.</p>
|
||||
<p>Share your best articles, blog posts, or any content that represents your writing style.</p>
|
||||
"""), unsafe_allow_html=True)
|
||||
samples = st.text_area(
|
||||
"Paste your content samples here",
|
||||
help="Paste 2-3 samples of your best content. This helps ALwrity understand your writing style."
|
||||
)
|
||||
logger.debug(f"Sample text length: {len(samples) if samples else 0}")
|
||||
|
||||
st.markdown('</div>', unsafe_allow_html=True)
|
||||
|
||||
# ALwrity Style button
|
||||
st.markdown("<div style='height: 20px'></div>", unsafe_allow_html=True)
|
||||
if st.button("🎨 ALwrity Style", use_container_width=True):
|
||||
if url:
|
||||
with st.status("Starting style analysis...", expanded=True) as status:
|
||||
try:
|
||||
logger.info(f"Starting style analysis for URL: {url}")
|
||||
|
||||
# Step 1: Initialize crawler
|
||||
status.update(label="Step 1/4: Initializing web crawler...", state="running")
|
||||
crawler_service = AsyncWebCrawlerService()
|
||||
|
||||
# Step 2: Crawl website
|
||||
status.update(label="Step 2/4: Crawling website content...", state="running")
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
result = loop.run_until_complete(crawler_service.crawl_website(url))
|
||||
loop.close()
|
||||
|
||||
if result.get('success', False):
|
||||
content = result.get('content', {})
|
||||
|
||||
# Step 3: Initialize style analyzer
|
||||
status.update(label="Step 3/4: Analyzing content style...", state="running")
|
||||
style_analyzer = StyleAnalyzer()
|
||||
|
||||
# Step 4: Perform style analysis
|
||||
status.update(label="Step 4/4: Generating style recommendations...", state="running")
|
||||
style_analysis = style_analyzer.analyze_content_style(content)
|
||||
|
||||
if style_analysis.get('error'):
|
||||
status.update(label="Analysis failed", state="error")
|
||||
st.error(f"Style analysis failed: {style_analysis['error']}")
|
||||
else:
|
||||
status.update(label="Analysis complete!", state="complete")
|
||||
# Display style analysis results
|
||||
display_style_analysis(style_analysis)
|
||||
|
||||
# Display original content in tabs
|
||||
tab1, tab2, tab3 = st.tabs(["Content", "Metadata", "Links"])
|
||||
|
||||
with tab1:
|
||||
st.markdown("### Main Content")
|
||||
st.markdown(content.get('main_content', 'No content found'))
|
||||
|
||||
with tab2:
|
||||
st.markdown("### Metadata")
|
||||
st.markdown(f"""
|
||||
**Title:** {content.get('title', 'No title found')}
|
||||
|
||||
**Description:** {content.get('description', 'No description found')}
|
||||
|
||||
**Meta Tags:**
|
||||
{content.get('meta_tags', {})}
|
||||
""")
|
||||
|
||||
with tab3:
|
||||
st.markdown("### Links")
|
||||
for link in content.get('links', []):
|
||||
st.markdown(f"- [{link.get('text', '')}]({link.get('href', '')})")
|
||||
|
||||
else:
|
||||
status.update(label="Crawling failed", state="error")
|
||||
st.error(f"Failed to analyze website: {result.get('error', 'Unknown error')}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during style analysis: {str(e)}")
|
||||
st.error(f"Analysis failed: {str(e)}")
|
||||
elif samples:
|
||||
with st.spinner("Analyzing content samples..."):
|
||||
try:
|
||||
# TODO: Implement sample text analysis
|
||||
st.info("Sample text analysis coming soon!")
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing samples: {str(e)}")
|
||||
st.error(f"Analysis failed: {str(e)}")
|
||||
else:
|
||||
st.warning("Please provide either a website URL or content samples")
|
||||
|
||||
with col2:
|
||||
st.markdown("""
|
||||
### How ALwrity Discovers Your Style
|
||||
|
||||
**AI-Powered Style Analysis**
|
||||
|
||||
ALwrity AI analyzes your existing content to understand your unique writing style and preferences. This helps us generate content that matches your voice perfectly.
|
||||
|
||||
**Step 1: Content Analysis**
|
||||
|
||||
We'll analyze your website content or written samples to understand:
|
||||
|
||||
- Writing tone and voice
|
||||
- Vocabulary and language style
|
||||
- Content structure and formatting
|
||||
- Target audience and engagement style
|
||||
|
||||
**Step 2: Style Recommendations**
|
||||
|
||||
Based on the analysis, we'll provide:
|
||||
|
||||
- Personalized writing guidelines
|
||||
- Content structure templates
|
||||
- Tone and voice recommendations
|
||||
- Audience engagement strategies
|
||||
|
||||
**Step 3: Content Generation**
|
||||
|
||||
Finally, we'll use these insights to:
|
||||
|
||||
- Generate content that matches your style
|
||||
- Maintain consistency across all content
|
||||
- Optimize for your target audience
|
||||
- Ensure brand voice alignment
|
||||
""")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in render_test_config_settings: {str(e)}")
|
||||
st.error(f"An error occurred: {str(e)}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
logger.info("Starting test config settings page")
|
||||
render_test_config_settings()
|
||||
logger.info("Test config settings page rendered successfully")
|
||||
23
ToBeMigrated/utils/voice_processing.py
Normal file
23
ToBeMigrated/utils/voice_processing.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import streamlit as st
|
||||
from streamlit_mic_recorder import speech_to_text
|
||||
|
||||
def record_voice(language="en"):
|
||||
# https://github.com/B4PT0R/streamlit-mic-recorder?tab=readme-ov-file#example
|
||||
state = st.session_state
|
||||
if "text_received" not in state:
|
||||
state.text_received = []
|
||||
|
||||
text = speech_to_text(
|
||||
start_prompt="🎙️Press & Speak🔊",
|
||||
stop_prompt="🔇Stop Recording🚨",
|
||||
language=language,
|
||||
use_container_width=True,
|
||||
just_once=False,
|
||||
)
|
||||
if text:
|
||||
state.text_received.append(text)
|
||||
result = ""
|
||||
for text in state.text_received:
|
||||
result += text
|
||||
state.text_received = []
|
||||
return result if result else None
|
||||
181
ToBeMigrated/utils/website_analyzer/README.md
Normal file
181
ToBeMigrated/utils/website_analyzer/README.md
Normal file
@@ -0,0 +1,181 @@
|
||||
# Website Analyzer Module
|
||||
|
||||
A comprehensive website analysis toolkit that provides detailed insights into website performance, SEO metrics, and content quality. This module combines traditional web analysis techniques with AI-powered content evaluation to deliver actionable recommendations.
|
||||
|
||||
## Features
|
||||
|
||||
### 1. Comprehensive Website Analysis
|
||||
- Basic website information extraction
|
||||
- SSL/TLS certificate validation
|
||||
- DNS record analysis
|
||||
- WHOIS information retrieval
|
||||
- Content analysis and structure evaluation
|
||||
- Performance metrics assessment
|
||||
|
||||
### 2. Advanced SEO Analysis
|
||||
- Meta tag optimization analysis
|
||||
- Content quality evaluation
|
||||
- Keyword density analysis
|
||||
- Readability scoring
|
||||
- Heading structure analysis
|
||||
- AI-powered content recommendations
|
||||
|
||||
### 3. Technical Infrastructure
|
||||
- Asynchronous web crawling
|
||||
- Multi-threaded analysis
|
||||
- Robust error handling
|
||||
- Comprehensive logging
|
||||
- Type-safe data models
|
||||
|
||||
## Module Structure
|
||||
|
||||
### 1. `analyzer.py`
|
||||
The main analysis engine that provides comprehensive website analysis.
|
||||
|
||||
#### Key Components:
|
||||
- `WebsiteAnalyzer` class
|
||||
- URL validation
|
||||
- Basic website information extraction
|
||||
- SSL/TLS certificate checking
|
||||
- DNS record analysis
|
||||
- WHOIS information retrieval
|
||||
- Content analysis
|
||||
- Performance metrics assessment
|
||||
|
||||
#### Features:
|
||||
- Concurrent analysis using ThreadPoolExecutor
|
||||
- Robust error handling and logging
|
||||
- User-agent simulation for reliable scraping
|
||||
- Timeout handling for requests
|
||||
- Comprehensive result formatting
|
||||
|
||||
### 2. `seo_analyzer.py`
|
||||
Specialized SEO analysis module with AI integration.
|
||||
|
||||
#### Key Components:
|
||||
- `extract_content()`: Fetches and parses webpage content
|
||||
- `analyze_meta_tags()`: Evaluates meta tags and SEO elements
|
||||
- `analyze_content_with_ai()`: AI-powered content analysis
|
||||
- `analyze_seo()`: Main SEO analysis function
|
||||
|
||||
#### Features:
|
||||
- Meta tag optimization analysis
|
||||
- Content quality scoring
|
||||
- Keyword density analysis
|
||||
- Readability evaluation
|
||||
- AI-powered recommendations
|
||||
- Weighted scoring system
|
||||
|
||||
### 3. `models.py`
|
||||
Data models for structured analysis results.
|
||||
|
||||
#### Key Components:
|
||||
- `SEORecommendation`: Individual SEO recommendations
|
||||
- `MetaTagAnalysis`: Meta tag analysis results
|
||||
- `ContentAnalysis`: Content analysis metrics
|
||||
- `SEOAnalysisResult`: Complete analysis results
|
||||
|
||||
#### Features:
|
||||
- Type-safe data structures
|
||||
- Clear data organization
|
||||
- Easy serialization/deserialization
|
||||
- Comprehensive documentation
|
||||
|
||||
## Usage Examples
|
||||
|
||||
### Basic Website Analysis
|
||||
```python
|
||||
from website_analyzer import analyze_website
|
||||
|
||||
# Analyze a website
|
||||
results = analyze_website("https://example.com")
|
||||
|
||||
# Access analysis results
|
||||
if results["success"]:
|
||||
data = results["data"]
|
||||
print(f"Domain: {data['domain']}")
|
||||
print(f"SSL Info: {data['analysis']['ssl_info']}")
|
||||
print(f"Content Info: {data['analysis']['content_info']}")
|
||||
```
|
||||
|
||||
### SEO Analysis
|
||||
```python
|
||||
from website_analyzer.seo_analyzer import analyze_seo
|
||||
|
||||
# Perform SEO analysis
|
||||
seo_results = analyze_seo("https://example.com", "your-openai-api-key")
|
||||
|
||||
# Access SEO results
|
||||
if seo_results.success:
|
||||
print(f"Overall Score: {seo_results.overall_score}")
|
||||
print(f"Meta Tags: {seo_results.meta_tags}")
|
||||
print(f"Content Analysis: {seo_results.content}")
|
||||
print(f"Recommendations: {seo_results.recommendations}")
|
||||
```
|
||||
|
||||
## Dependencies
|
||||
|
||||
- `requests`: HTTP requests
|
||||
- `beautifulsoup4`: HTML parsing
|
||||
- `python-whois`: WHOIS information
|
||||
- `dnspython`: DNS record analysis
|
||||
- `openai`: AI-powered analysis
|
||||
- `loguru`: Logging
|
||||
- `typing`: Type hints
|
||||
- `dataclasses`: Data models
|
||||
|
||||
## Error Handling
|
||||
|
||||
The module implements comprehensive error handling:
|
||||
- URL validation
|
||||
- Request timeouts
|
||||
- Connection errors
|
||||
- Parsing errors
|
||||
- API errors
|
||||
- DNS resolution errors
|
||||
- SSL/TLS errors
|
||||
|
||||
All errors are logged and returned in a structured format for easy handling.
|
||||
|
||||
## Logging
|
||||
|
||||
The module uses `loguru` for logging with the following features:
|
||||
- File rotation (500 MB)
|
||||
- 10-day retention
|
||||
- Debug level logging
|
||||
- Structured log format
|
||||
- Both file and stdout output
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **API Key Management**
|
||||
- Store API keys securely
|
||||
- Use environment variables
|
||||
- Implement rate limiting
|
||||
|
||||
2. **Error Handling**
|
||||
- Always check success status
|
||||
- Handle errors gracefully
|
||||
- Log errors appropriately
|
||||
|
||||
3. **Performance**
|
||||
- Use concurrent analysis
|
||||
- Implement timeouts
|
||||
- Cache results when possible
|
||||
|
||||
4. **Rate Limiting**
|
||||
- Respect website robots.txt
|
||||
- Implement delays between requests
|
||||
- Use appropriate user agents
|
||||
|
||||
## Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Commit your changes
|
||||
4. Push to the branch
|
||||
5. Create a Pull Request
|
||||
|
||||
## License
|
||||
|
||||
This module is part of the ALwrity project and is licensed under the MIT License.
|
||||
6
ToBeMigrated/utils/website_analyzer/__init__.py
Normal file
6
ToBeMigrated/utils/website_analyzer/__init__.py
Normal file
@@ -0,0 +1,6 @@
|
||||
"""Website analyzer module for AI-powered website analysis."""
|
||||
|
||||
from .analyzer import analyze_website, WebsiteAnalyzer
|
||||
from .models import SEOAnalysisResult
|
||||
|
||||
__all__ = ['analyze_website', 'WebsiteAnalyzer', 'SEOAnalysisResult']
|
||||
697
ToBeMigrated/utils/website_analyzer/analyzer.py
Normal file
697
ToBeMigrated/utils/website_analyzer/analyzer.py
Normal file
@@ -0,0 +1,697 @@
|
||||
"""Website and SEO analysis module."""
|
||||
|
||||
import asyncio
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.parse import urljoin, urlparse
|
||||
import streamlit as st
|
||||
import re
|
||||
from loguru import logger
|
||||
from ...web_crawlers.async_web_crawler import AsyncWebCrawlerService
|
||||
from ...gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||
import os
|
||||
import sys
|
||||
import logging
|
||||
import json
|
||||
from datetime import datetime
|
||||
import requests
|
||||
import ssl
|
||||
import socket
|
||||
import whois
|
||||
import dns.resolver
|
||||
from requests.exceptions import RequestException
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from .models import (
|
||||
SEOAnalysisResult,
|
||||
MetaTagAnalysis,
|
||||
ContentAnalysis,
|
||||
SEORecommendation
|
||||
)
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.StreamHandler(),
|
||||
logging.FileHandler('logs/website_analyzer.log')
|
||||
]
|
||||
)
|
||||
|
||||
# Create a logger for the website analyzer
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Create a separate logger for scraping operations
|
||||
scraping_logger = logging.getLogger('website_analyzer.scraping')
|
||||
scraping_logger.setLevel(logging.WARNING)
|
||||
|
||||
class WebsiteAnalyzer:
|
||||
def __init__(self):
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
})
|
||||
logger.info("WebsiteAnalyzer initialized")
|
||||
|
||||
def analyze_website(self, url: str) -> Dict:
|
||||
"""
|
||||
Perform comprehensive analysis of a website.
|
||||
|
||||
Args:
|
||||
url (str): The URL to analyze
|
||||
|
||||
Returns:
|
||||
Dict: Analysis results including various metrics and checks
|
||||
"""
|
||||
logger.info(f"Starting analysis for URL: {url}")
|
||||
try:
|
||||
# Validate URL
|
||||
if not self._validate_url(url):
|
||||
error_msg = f"Invalid URL format: {url}"
|
||||
logger.error(error_msg)
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": {"stage": "url_validation"}
|
||||
}
|
||||
|
||||
# Basic URL parsing
|
||||
parsed_url = urlparse(url)
|
||||
domain = parsed_url.netloc
|
||||
|
||||
# Initialize results dictionary
|
||||
results = {
|
||||
"url": url,
|
||||
"domain": domain,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"analysis": {}
|
||||
}
|
||||
|
||||
# Perform various analyses
|
||||
with ThreadPoolExecutor(max_workers=4) as executor:
|
||||
logger.info("Starting parallel analysis tasks")
|
||||
|
||||
# Basic website info
|
||||
logger.info("Starting basic info analysis")
|
||||
basic_info = executor.submit(self._get_basic_info, url).result()
|
||||
if "error" in basic_info:
|
||||
error_msg = f"Basic info analysis failed: {basic_info['error']}"
|
||||
logger.error(error_msg)
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"stage": "basic_info",
|
||||
"details": basic_info.get("error_details", {})
|
||||
}
|
||||
}
|
||||
results["analysis"]["basic_info"] = basic_info
|
||||
|
||||
# SSL/TLS info
|
||||
logger.info("Starting SSL analysis")
|
||||
ssl_info = executor.submit(self._check_ssl, domain).result()
|
||||
results["analysis"]["ssl_info"] = ssl_info
|
||||
|
||||
# DNS info
|
||||
logger.info("Starting DNS analysis")
|
||||
dns_info = executor.submit(self._check_dns, domain).result()
|
||||
results["analysis"]["dns_info"] = dns_info
|
||||
|
||||
# WHOIS info
|
||||
logger.info("Starting WHOIS analysis")
|
||||
whois_info = executor.submit(self._get_whois_info, domain).result()
|
||||
results["analysis"]["whois_info"] = whois_info
|
||||
|
||||
# Content analysis
|
||||
logger.info("Starting content analysis")
|
||||
content_info = executor.submit(self._analyze_content, url).result()
|
||||
if "error" in content_info:
|
||||
error_msg = f"Content analysis failed: {content_info['error']}"
|
||||
logger.error(error_msg)
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"stage": "content_analysis",
|
||||
"details": content_info.get("error_details", {})
|
||||
}
|
||||
}
|
||||
results["analysis"]["content_info"] = content_info
|
||||
|
||||
# Performance metrics
|
||||
logger.info("Starting performance analysis")
|
||||
performance = executor.submit(self._check_performance, url).result()
|
||||
if "error" in performance:
|
||||
error_msg = f"Performance analysis failed: {performance['error']}"
|
||||
logger.error(error_msg)
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"stage": "performance_analysis",
|
||||
"details": performance.get("error_details", {})
|
||||
}
|
||||
}
|
||||
results["analysis"]["performance"] = performance
|
||||
|
||||
# SEO analysis
|
||||
logger.info("Starting SEO analysis")
|
||||
seo_analysis = executor.submit(self._analyze_seo, url).result()
|
||||
if "error" in seo_analysis:
|
||||
error_msg = f"SEO analysis failed: {seo_analysis['error']}"
|
||||
logger.error(error_msg)
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"stage": "seo_analysis",
|
||||
"details": seo_analysis.get("error_details", {})
|
||||
}
|
||||
}
|
||||
results["analysis"]["seo_info"] = seo_analysis
|
||||
|
||||
logger.info(f"Analysis completed successfully for {url}")
|
||||
logger.debug(f"Final results: {json.dumps(results, indent=2)}")
|
||||
return {
|
||||
"success": True,
|
||||
"data": results
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error during website analysis: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"type": type(e).__name__,
|
||||
"traceback": str(e.__traceback__)
|
||||
}
|
||||
}
|
||||
|
||||
def _validate_url(self, url: str) -> bool:
|
||||
"""Validate URL format."""
|
||||
try:
|
||||
result = urlparse(url)
|
||||
return all([result.scheme, result.netloc])
|
||||
except Exception as e:
|
||||
logger.error(f"URL validation error: {str(e)}")
|
||||
return False
|
||||
|
||||
def _get_basic_info(self, url: str) -> Dict:
|
||||
"""Get basic website information."""
|
||||
scraping_logger.debug(f"Getting basic info for {url}")
|
||||
try:
|
||||
response = self.session.get(url, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
return {
|
||||
"status_code": response.status_code,
|
||||
"content_type": response.headers.get('content-type', ''),
|
||||
"title": soup.title.string if soup.title else '',
|
||||
"meta_description": self._get_meta_description(soup),
|
||||
"headers": dict(response.headers),
|
||||
"robots_txt": self._get_robots_txt(url),
|
||||
"sitemap": self._get_sitemap(url)
|
||||
}
|
||||
except requests.exceptions.RequestException as e:
|
||||
error_msg = f"Request error in basic info: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"type": "RequestException",
|
||||
"status_code": getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None,
|
||||
"url": url
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
error_msg = f"Error getting basic info: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"type": type(e).__name__,
|
||||
"traceback": str(e.__traceback__)
|
||||
}
|
||||
}
|
||||
|
||||
def _check_ssl(self, domain: str) -> Dict:
|
||||
"""Check SSL/TLS certificate information."""
|
||||
scraping_logger.debug(f"Checking SSL for {domain}")
|
||||
try:
|
||||
context = ssl.create_default_context()
|
||||
with socket.create_connection((domain, 443)) as sock:
|
||||
with context.wrap_socket(sock, server_hostname=domain) as ssock:
|
||||
cert = ssock.getpeercert()
|
||||
return {
|
||||
"has_ssl": True,
|
||||
"issuer": dict(x[0] for x in cert['issuer']),
|
||||
"expiry": datetime.strptime(cert['notAfter'], '%b %d %H:%M:%S %Y %Z').isoformat(),
|
||||
"version": cert['version'],
|
||||
"subject": dict(x[0] for x in cert['subject'])
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"SSL check error: {str(e)}", exc_info=True)
|
||||
return {"has_ssl": False, "error": str(e)}
|
||||
|
||||
def _check_dns(self, domain: str) -> Dict:
|
||||
"""Check DNS records."""
|
||||
scraping_logger.debug(f"Checking DNS for {domain}")
|
||||
try:
|
||||
records = {}
|
||||
for record_type in ['A', 'AAAA', 'MX', 'NS', 'TXT']:
|
||||
try:
|
||||
answers = dns.resolver.resolve(domain, record_type)
|
||||
records[record_type] = [str(rdata) for rdata in answers]
|
||||
except dns.resolver.NoAnswer:
|
||||
records[record_type] = []
|
||||
except Exception as e:
|
||||
scraping_logger.warning(f"Error resolving {record_type} record: {str(e)}")
|
||||
records[record_type] = []
|
||||
return records
|
||||
except Exception as e:
|
||||
logger.error(f"DNS check error: {str(e)}", exc_info=True)
|
||||
return {"error": str(e)}
|
||||
|
||||
def _get_whois_info(self, domain: str) -> Dict:
|
||||
"""Get WHOIS information for a domain."""
|
||||
scraping_logger.debug(f"Getting WHOIS info for {domain}")
|
||||
try:
|
||||
w = whois.whois(domain)
|
||||
|
||||
def format_date(date_value):
|
||||
if isinstance(date_value, list):
|
||||
return date_value[0].isoformat() if date_value else 'Unknown'
|
||||
return date_value.isoformat() if date_value else 'Unknown'
|
||||
|
||||
return {
|
||||
'registrar': w.registrar if hasattr(w, 'registrar') else 'Unknown',
|
||||
'creation_date': format_date(w.creation_date),
|
||||
'expiration_date': format_date(w.expiration_date),
|
||||
'updated_date': format_date(w.updated_date) if hasattr(w, 'updated_date') else 'Unknown',
|
||||
'name_servers': w.name_servers if hasattr(w, 'name_servers') else [],
|
||||
'domain_name': w.domain_name if hasattr(w, 'domain_name') else domain,
|
||||
'text': w.text if hasattr(w, 'text') else ''
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"WHOIS check error: {str(e)}")
|
||||
return {
|
||||
'registrar': 'Unknown',
|
||||
'creation_date': 'Unknown',
|
||||
'expiration_date': 'Unknown',
|
||||
'updated_date': 'Unknown',
|
||||
'name_servers': [],
|
||||
'domain_name': domain,
|
||||
'text': ''
|
||||
}
|
||||
|
||||
def _analyze_content(self, url: str) -> Dict:
|
||||
"""Analyze website content."""
|
||||
scraping_logger.debug(f"Analyzing content for {url}")
|
||||
try:
|
||||
response = self.session.get(url, timeout=10)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
# Get all text content
|
||||
text_content = soup.get_text()
|
||||
|
||||
# Count words
|
||||
words = re.findall(r'\w+', text_content.lower())
|
||||
word_count = len(words)
|
||||
|
||||
# Count headings
|
||||
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
|
||||
heading_counts = {
|
||||
'h1': len(soup.find_all('h1')),
|
||||
'h2': len(soup.find_all('h2')),
|
||||
'h3': len(soup.find_all('h3')),
|
||||
'h4': len(soup.find_all('h4')),
|
||||
'h5': len(soup.find_all('h5')),
|
||||
'h6': len(soup.find_all('h6'))
|
||||
}
|
||||
|
||||
# Count images
|
||||
images = soup.find_all('img')
|
||||
|
||||
# Count links
|
||||
links = soup.find_all('a')
|
||||
|
||||
# Count paragraphs
|
||||
paragraphs = soup.find_all('p')
|
||||
|
||||
return {
|
||||
"word_count": word_count,
|
||||
"heading_count": len(headings),
|
||||
"heading_structure": heading_counts,
|
||||
"image_count": len(images),
|
||||
"link_count": len(links),
|
||||
"paragraph_count": len(paragraphs),
|
||||
"has_meta_description": bool(self._get_meta_description(soup)),
|
||||
"has_robots_txt": bool(self._get_robots_txt(url)),
|
||||
"has_sitemap": bool(self._get_sitemap(url))
|
||||
}
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Request error in content analysis: {str(e)}", exc_info=True)
|
||||
return {
|
||||
"word_count": 0,
|
||||
"heading_count": 0,
|
||||
"heading_structure": {'h1': 0, 'h2': 0, 'h3': 0, 'h4': 0, 'h5': 0, 'h6': 0},
|
||||
"image_count": 0,
|
||||
"link_count": 0,
|
||||
"paragraph_count": 0,
|
||||
"has_meta_description": False,
|
||||
"has_robots_txt": False,
|
||||
"has_sitemap": False,
|
||||
"error": str(e)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Content analysis error: {str(e)}", exc_info=True)
|
||||
return {
|
||||
"word_count": 0,
|
||||
"heading_count": 0,
|
||||
"heading_structure": {'h1': 0, 'h2': 0, 'h3': 0, 'h4': 0, 'h5': 0, 'h6': 0},
|
||||
"image_count": 0,
|
||||
"link_count": 0,
|
||||
"paragraph_count": 0,
|
||||
"has_meta_description": False,
|
||||
"has_robots_txt": False,
|
||||
"has_sitemap": False,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def _check_performance(self, url: str) -> Dict:
|
||||
"""Check website performance metrics."""
|
||||
scraping_logger.debug(f"Checking performance for {url}")
|
||||
try:
|
||||
start_time = datetime.now()
|
||||
response = self.session.get(url, timeout=10)
|
||||
end_time = datetime.now()
|
||||
|
||||
load_time = (end_time - start_time).total_seconds()
|
||||
|
||||
return {
|
||||
"load_time": load_time,
|
||||
"status_code": response.status_code,
|
||||
"content_length": len(response.content),
|
||||
"headers": dict(response.headers),
|
||||
"response_time": response.elapsed.total_seconds()
|
||||
}
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Request error in performance check: {str(e)}", exc_info=True)
|
||||
return {
|
||||
"load_time": 0,
|
||||
"status_code": 0,
|
||||
"content_length": 0,
|
||||
"headers": {},
|
||||
"response_time": 0,
|
||||
"error": str(e)
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Performance check error: {str(e)}", exc_info=True)
|
||||
return {
|
||||
"load_time": 0,
|
||||
"status_code": 0,
|
||||
"content_length": 0,
|
||||
"headers": {},
|
||||
"response_time": 0,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def _get_meta_description(self, soup: BeautifulSoup) -> Optional[str]:
|
||||
"""Extract meta description from HTML."""
|
||||
meta_desc = soup.find('meta', attrs={'name': 'description'})
|
||||
return meta_desc.get('content') if meta_desc else None
|
||||
|
||||
def _get_robots_txt(self, url: str) -> Optional[str]:
|
||||
"""Get robots.txt content."""
|
||||
try:
|
||||
robots_url = f"{url.rstrip('/')}/robots.txt"
|
||||
response = self.session.get(robots_url, timeout=5)
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
except Exception as e:
|
||||
scraping_logger.warning(f"Error fetching robots.txt: {str(e)}")
|
||||
return None
|
||||
|
||||
def _get_sitemap(self, url: str) -> Optional[str]:
|
||||
"""Get sitemap.xml content."""
|
||||
try:
|
||||
sitemap_url = f"{url.rstrip('/')}/sitemap.xml"
|
||||
response = self.session.get(sitemap_url, timeout=5)
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
except Exception as e:
|
||||
scraping_logger.warning(f"Error fetching sitemap.xml: {str(e)}")
|
||||
return None
|
||||
|
||||
def _analyze_seo(self, url: str) -> Dict:
|
||||
"""Analyze website SEO."""
|
||||
try:
|
||||
# Extract content
|
||||
content, soup, extract_errors = self._extract_content(url)
|
||||
if not content or not soup:
|
||||
return {
|
||||
"error": "Failed to extract content",
|
||||
"error_details": {"errors": extract_errors}
|
||||
}
|
||||
|
||||
# Analyze meta tags
|
||||
meta_analysis = self._analyze_meta_tags(soup)
|
||||
|
||||
# Analyze content with AI
|
||||
content_analysis, recommendations = self._analyze_content_with_ai(content)
|
||||
|
||||
# Calculate overall score
|
||||
meta_score = sum([
|
||||
1 if meta_analysis.title['status'] == 'good' else 0,
|
||||
1 if meta_analysis.description['status'] == 'good' else 0,
|
||||
1 if meta_analysis.keywords['status'] == 'good' else 0,
|
||||
1 if meta_analysis.has_robots else 0,
|
||||
1 if meta_analysis.has_sitemap else 0
|
||||
]) * 20 # Scale to 100
|
||||
|
||||
overall_score = (
|
||||
meta_score * 0.3 + # 30% weight for meta tags
|
||||
content_analysis.readability_score * 0.3 + # 30% weight for readability
|
||||
content_analysis.content_quality_score * 0.4 # 40% weight for content quality
|
||||
)
|
||||
|
||||
return {
|
||||
"overall_score": overall_score,
|
||||
"meta_tags": meta_analysis.__dict__,
|
||||
"content": content_analysis.__dict__,
|
||||
"recommendations": [rec.__dict__ for rec in recommendations]
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in SEO analysis: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"type": type(e).__name__,
|
||||
"traceback": str(e.__traceback__)
|
||||
}
|
||||
}
|
||||
|
||||
def _extract_content(self, url: str) -> Tuple[Optional[str], Optional[BeautifulSoup], List[str]]:
|
||||
"""Extract content from URL."""
|
||||
errors = []
|
||||
try:
|
||||
response = self.session.get(url, timeout=10)
|
||||
response.raise_for_status()
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
return response.text, soup, errors
|
||||
except requests.RequestException as e:
|
||||
error_msg = f"Error fetching URL: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
errors.append(error_msg)
|
||||
return None, None, errors
|
||||
|
||||
def _analyze_meta_tags(self, soup: BeautifulSoup) -> MetaTagAnalysis:
|
||||
"""Analyze meta tags using BeautifulSoup."""
|
||||
# Title analysis
|
||||
title = soup.title.string if soup.title else ""
|
||||
title_analysis = {
|
||||
'status': 'good' if title and 30 <= len(title) <= 60 else 'needs_improvement',
|
||||
'value': title,
|
||||
'recommendation': '' if title and 30 <= len(title) <= 60 else 'Title should be between 30-60 characters'
|
||||
}
|
||||
|
||||
# Meta description analysis
|
||||
meta_desc = soup.find('meta', attrs={'name': 'description'})
|
||||
desc = meta_desc.get('content', '') if meta_desc else ""
|
||||
desc_analysis = {
|
||||
'status': 'good' if desc and 120 <= len(desc) <= 160 else 'needs_improvement',
|
||||
'value': desc,
|
||||
'recommendation': '' if desc and 120 <= len(desc) <= 160 else 'Description should be between 120-160 characters'
|
||||
}
|
||||
|
||||
# Keywords analysis
|
||||
meta_keywords = soup.find('meta', attrs={'name': 'keywords'})
|
||||
keywords = meta_keywords.get('content', '') if meta_keywords else ""
|
||||
keywords_analysis = {
|
||||
'status': 'good' if keywords else 'needs_improvement',
|
||||
'value': keywords,
|
||||
'recommendation': '' if keywords else 'Add relevant keywords meta tag'
|
||||
}
|
||||
|
||||
return MetaTagAnalysis(
|
||||
title=title_analysis,
|
||||
description=desc_analysis,
|
||||
keywords=keywords_analysis,
|
||||
has_robots=bool(soup.find('meta', attrs={'name': 'robots'})),
|
||||
has_sitemap=bool(soup.find('link', attrs={'rel': 'sitemap'}))
|
||||
)
|
||||
|
||||
def _analyze_content_with_ai(self, content: str) -> Tuple[ContentAnalysis, List[SEORecommendation]]:
|
||||
"""Analyze content using AI."""
|
||||
try:
|
||||
# Prepare prompt for content analysis
|
||||
prompt = f"""Analyze the following webpage content for SEO and provide a structured analysis:
|
||||
Content: {content[:4000]}... # Truncate to avoid token limits
|
||||
|
||||
Provide analysis in the following format:
|
||||
1. Word count
|
||||
2. Heading structure analysis
|
||||
3. Keyword density for main topics
|
||||
4. Readability score (0-100)
|
||||
5. Content quality score (0-100)
|
||||
6. List of SEO recommendations with priority (high/medium/low), category, issue, recommendation, and impact
|
||||
|
||||
Format the response as JSON."""
|
||||
|
||||
try:
|
||||
# Get AI analysis using llm_text_gen
|
||||
analysis = llm_text_gen(
|
||||
prompt=prompt,
|
||||
system_prompt="You are an SEO expert analyzing website content.",
|
||||
response_format="json_object"
|
||||
)
|
||||
|
||||
if not analysis:
|
||||
logger.error("Empty response from AI analysis")
|
||||
return self._get_fallback_analysis(content)
|
||||
|
||||
# Create ContentAnalysis object
|
||||
content_analysis = ContentAnalysis(
|
||||
word_count=len(content.split()),
|
||||
headings_structure=analysis.get('heading_structure', {}),
|
||||
keyword_density=analysis.get('keyword_density', {}),
|
||||
readability_score=analysis.get('readability_score', 0),
|
||||
content_quality_score=analysis.get('content_quality_score', 0)
|
||||
)
|
||||
|
||||
# Create recommendations
|
||||
recommendations = [
|
||||
SEORecommendation(
|
||||
priority=rec['priority'],
|
||||
category=rec['category'],
|
||||
issue=rec['issue'],
|
||||
recommendation=rec['recommendation'],
|
||||
impact=rec['impact']
|
||||
)
|
||||
for rec in analysis.get('recommendations', [])
|
||||
]
|
||||
|
||||
return content_analysis, recommendations
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI analysis: {str(e)}")
|
||||
return self._get_fallback_analysis(content)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in AI analysis setup: {str(e)}")
|
||||
return self._get_fallback_analysis(content)
|
||||
|
||||
def _get_fallback_analysis(self, content: str) -> Tuple[ContentAnalysis, List[SEORecommendation]]:
|
||||
"""Provide fallback analysis when AI analysis is not available."""
|
||||
try:
|
||||
# Basic content analysis
|
||||
words = content.split()
|
||||
word_count = len(words)
|
||||
|
||||
# Simple readability score based on word count
|
||||
readability_score = min(100, max(0, word_count / 10))
|
||||
|
||||
# Basic content quality score
|
||||
content_quality_score = min(100, max(0, word_count / 20))
|
||||
|
||||
# Create basic recommendations
|
||||
recommendations = [
|
||||
SEORecommendation(
|
||||
priority="high",
|
||||
category="content",
|
||||
issue="AI analysis unavailable",
|
||||
recommendation="Consider running the analysis again with a valid API key for more detailed insights",
|
||||
impact="Limited analysis capabilities"
|
||||
)
|
||||
]
|
||||
|
||||
return ContentAnalysis(
|
||||
word_count=word_count,
|
||||
headings_structure={},
|
||||
keyword_density={},
|
||||
readability_score=readability_score,
|
||||
content_quality_score=content_quality_score
|
||||
), recommendations
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in fallback analysis: {str(e)}")
|
||||
return ContentAnalysis(
|
||||
word_count=0,
|
||||
headings_structure={},
|
||||
keyword_density={},
|
||||
readability_score=0,
|
||||
content_quality_score=0
|
||||
), []
|
||||
|
||||
def analyze_website(url: str) -> Dict:
|
||||
"""
|
||||
Analyze a website and return comprehensive results.
|
||||
|
||||
Args:
|
||||
url (str): The URL to analyze
|
||||
|
||||
Returns:
|
||||
Dict: Analysis results including various metrics and checks
|
||||
"""
|
||||
logger.info(f"Starting website analysis for URL: {url}")
|
||||
try:
|
||||
analyzer = WebsiteAnalyzer()
|
||||
|
||||
results = analyzer.analyze_website(url)
|
||||
|
||||
# Add success status to results
|
||||
if "error" in results:
|
||||
error_msg = f"Error in base analysis: {results['error']}"
|
||||
logger.error(error_msg)
|
||||
logger.error(f"Error details: {json.dumps(results.get('error_details', {}), indent=2)}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": results.get("error_details", {})
|
||||
}
|
||||
|
||||
# Add success status and wrap results
|
||||
logger.info("Analysis completed successfully")
|
||||
logger.debug(f"Analysis results: {json.dumps(results, indent=2)}")
|
||||
return {
|
||||
"success": True,
|
||||
"data": results
|
||||
}
|
||||
except Exception as e:
|
||||
error_msg = f"Error in analyze_website: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"type": type(e).__name__,
|
||||
"traceback": str(e.__traceback__)
|
||||
}
|
||||
}
|
||||
134
ToBeMigrated/utils/website_analyzer/content_gap_analyzer.py
Normal file
134
ToBeMigrated/utils/website_analyzer/content_gap_analyzer.py
Normal file
@@ -0,0 +1,134 @@
|
||||
from typing import Dict
|
||||
import json
|
||||
|
||||
class ContentGapAnalyzer:
|
||||
def __init__(self, analyzer):
|
||||
self.analyzer = analyzer
|
||||
|
||||
def analyze(self, url: str) -> Dict:
|
||||
"""
|
||||
Analyze content gaps for a given URL.
|
||||
|
||||
Args:
|
||||
url (str): The URL to analyze
|
||||
|
||||
Returns:
|
||||
Dict: Analysis results including content gaps and recommendations
|
||||
"""
|
||||
try:
|
||||
# Get base analysis
|
||||
logger.info(f"Starting content gap analysis for URL: {url}")
|
||||
base_analysis = self.analyzer.analyze_website(url)
|
||||
|
||||
# Check for errors in base analysis
|
||||
if not base_analysis.get("success", False):
|
||||
error_msg = base_analysis.get("error", "Unknown error in website analysis")
|
||||
error_details = base_analysis.get("error_details", {})
|
||||
logger.error(f"Base analysis failed: {error_msg}")
|
||||
logger.error(f"Error details: {json.dumps(error_details, indent=2)}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": error_details,
|
||||
"stage": "base_analysis"
|
||||
}
|
||||
|
||||
# Extract required sections
|
||||
analysis_data = base_analysis.get("data", {}).get("analysis", {})
|
||||
required_sections = ["content_info", "basic_info", "performance"]
|
||||
missing_sections = [section for section in required_sections if section not in analysis_data]
|
||||
|
||||
if missing_sections:
|
||||
error_msg = f"Missing required analysis sections: {', '.join(missing_sections)}"
|
||||
logger.error(error_msg)
|
||||
logger.error(f"Available sections: {list(analysis_data.keys())}")
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"missing_sections": missing_sections,
|
||||
"available_sections": list(analysis_data.keys())
|
||||
},
|
||||
"stage": "section_validation"
|
||||
}
|
||||
|
||||
# Extract content metrics
|
||||
try:
|
||||
content_info = analysis_data["content_info"]
|
||||
basic_info = analysis_data["basic_info"]
|
||||
performance = analysis_data["performance"]
|
||||
except KeyError as e:
|
||||
error_msg = f"Error extracting analysis section: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"type": "KeyError",
|
||||
"missing_key": str(e),
|
||||
"available_keys": list(analysis_data.keys())
|
||||
},
|
||||
"stage": "data_extraction"
|
||||
}
|
||||
|
||||
# Analyze content gaps
|
||||
try:
|
||||
gaps = self._analyze_content_gaps(content_info, basic_info, performance)
|
||||
except Exception as e:
|
||||
error_msg = f"Error analyzing content gaps: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"type": type(e).__name__,
|
||||
"traceback": str(e.__traceback__)
|
||||
},
|
||||
"stage": "gap_analysis"
|
||||
}
|
||||
|
||||
# Generate recommendations
|
||||
try:
|
||||
recommendations = self._generate_recommendations(gaps)
|
||||
except Exception as e:
|
||||
error_msg = f"Error generating recommendations: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"type": type(e).__name__,
|
||||
"traceback": str(e.__traceback__)
|
||||
},
|
||||
"stage": "recommendation_generation"
|
||||
}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"data": {
|
||||
"content_gaps": gaps,
|
||||
"recommendations": recommendations,
|
||||
"metrics": {
|
||||
"word_count": content_info.get("word_count", 0),
|
||||
"heading_count": content_info.get("heading_count", 0),
|
||||
"image_count": content_info.get("image_count", 0),
|
||||
"link_count": content_info.get("link_count", 0),
|
||||
"paragraph_count": content_info.get("paragraph_count", 0),
|
||||
"load_time": performance.get("load_time", 0),
|
||||
"response_time": performance.get("response_time", 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in content gap analysis: {str(e)}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
return {
|
||||
"success": False,
|
||||
"error": error_msg,
|
||||
"error_details": {
|
||||
"type": type(e).__name__,
|
||||
"traceback": str(e.__traceback__)
|
||||
},
|
||||
"stage": "general"
|
||||
}
|
||||
45
ToBeMigrated/utils/website_analyzer/models.py
Normal file
45
ToBeMigrated/utils/website_analyzer/models.py
Normal file
@@ -0,0 +1,45 @@
|
||||
"""Data models for website analysis results."""
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict, Optional
|
||||
from datetime import datetime
|
||||
|
||||
@dataclass
|
||||
class SEORecommendation:
|
||||
"""A single SEO recommendation."""
|
||||
priority: str # 'high', 'medium', 'low'
|
||||
category: str # 'content', 'technical', 'meta', etc.
|
||||
issue: str
|
||||
recommendation: str
|
||||
impact: str
|
||||
|
||||
@dataclass
|
||||
class MetaTagAnalysis:
|
||||
"""Analysis of meta tags."""
|
||||
title: Dict[str, str] # {'status': 'good', 'value': 'actual title', 'recommendation': 'suggestion'}
|
||||
description: Dict[str, str]
|
||||
keywords: Dict[str, str]
|
||||
has_robots: bool
|
||||
has_sitemap: bool
|
||||
|
||||
@dataclass
|
||||
class ContentAnalysis:
|
||||
"""Analysis of page content."""
|
||||
word_count: int
|
||||
headings_structure: Dict[str, int] # {'h1': 1, 'h2': 3, etc}
|
||||
keyword_density: Dict[str, float]
|
||||
readability_score: float
|
||||
content_quality_score: float
|
||||
|
||||
@dataclass
|
||||
class SEOAnalysisResult:
|
||||
"""Complete SEO analysis result."""
|
||||
url: str
|
||||
analyzed_at: datetime
|
||||
overall_score: float # 0-100
|
||||
meta_tags: MetaTagAnalysis
|
||||
content: ContentAnalysis
|
||||
recommendations: List[SEORecommendation]
|
||||
errors: List[str]
|
||||
warnings: List[str]
|
||||
success: bool
|
||||
Reference in New Issue
Block a user