ALwrity Version 0.5.1 (Fastapi + React)

This commit is contained in:
ajaysi
2025-08-06 16:29:49 +05:30
parent dbf761c31f
commit 2579c12ba4
331 changed files with 0 additions and 22 deletions

View File

@@ -0,0 +1,99 @@
"""AI research module for topic analysis and research."""
import asyncio
from typing import Dict, Any
from loguru import logger
import sys
from ..web_crawlers.async_web_crawler import AsyncWebCrawlerService
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
# Configure logger
logger.remove()
logger.add(
"logs/ai_research.log",
rotation="500 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
def research_topic(topic: str) -> Dict[str, Any]:
"""
Research a topic using web crawling and AI analysis.
Args:
topic (str): The topic to research
Returns:
Dict[str, Any]: Research results including overview, findings, and recommendations
"""
try:
logger.info(f"[research_topic] Starting research for topic: {topic}")
# Initialize web crawler
async def analyze_topic():
async with AsyncWebCrawlerService() as crawler:
# Perform web research
search_results = await crawler.crawl_website(topic)
if not search_results.get('success'):
return {
'success': False,
'error': search_results.get('error', 'Research failed')
}
# Analyze content with LLM
analysis = await crawler.analyze_content_with_llm(
search_results['content'],
api_key=None, # Should be passed from config
gpt_provider="google" # Should be configurable
)
# Structure the response
return {
'success': True,
'data': {
'research': {
'overview': {
'topic': topic,
'scope': analysis.get('topics', []),
'methodology': 'Web crawling and AI analysis'
},
'data_quality': {
'is_reliable': bool(analysis.get('seo_score', 0) > 0.7)
},
'analysis_quality': {
'is_thorough': bool(len(analysis.get('key_insights', [])) > 5)
},
'recommendations': analysis.get('recommendations', []),
'next_steps': analysis.get('priority_areas', [])
}
}
}
# Run the async analysis
results = asyncio.run(analyze_topic())
if not results.get('success'):
error_msg = results.get('error', 'Research failed')
logger.error(f"[research_topic] Research failed: {error_msg}")
return {
'success': False,
'error': error_msg
}
logger.info("[research_topic] Research completed successfully")
return results
except Exception as e:
error_msg = f"Research failed: {str(e)}"
logger.error(f"[research_topic] {error_msg}")
return {
'success': False,
'error': str(e)
}

View File

@@ -0,0 +1,232 @@
import re
import os
import PyPDF2
import openai
import streamlit as st
import tempfile
from loguru import logger
from lib.ai_writers.ai_news_article_writer import ai_news_generation
from lib.ai_writers.ai_finance_report_generator.ai_financial_dashboard import get_dashboard
from lib.ai_writers.ai_facebook_writer.facebook_ai_writer import facebook_main_menu
from lib.ai_writers.linkedin_writer.linkedin_ai_writer import linkedin_main_menu
from lib.ai_writers.twitter_writers.twitter_dashboard import run_dashboard
from lib.ai_writers.insta_ai_writer import insta_writer
from lib.ai_writers.youtube_writers.youtube_ai_writer import youtube_main_menu
from lib.ai_writers.ai_essay_writer import ai_essay_generator
from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
#from lib.content_planning_calender.content_planning_agents_alwrity_crew import ai_agents_content_planner
from lib.gpt_providers.text_generation.main_text_generation import llm_text_gen
def ai_agents_team():
# Define options for AI Content Teams
st.title("🐲 Your AI Agents Teams")
st.markdown("""Alwrity offers AI agents team for content creators to easily modify them for their needs.
Abstracting tech & plumbing, easily define role, goal, task. Use different AI agents framework.""")
options = [
"AI Planning Team",
"AI Content Creation Team"
]
# Radio button for choosing an AI Content Team
selected_team = st.radio("**Choose AI Agents Team:**", options)
if selected_team == "AI Planning Team":
st.title("AI Agents for Content Ideation")
plan_keywords = st.text_input(
"Enter Keywords to get 2 months content calendar:",
placeholder="Enter keywords to generate AI content calendar:",
help="Enter at least two words for better results."
)
if st.button("Get calendar"):
if plan_keywords and len(plan_keywords.split()) >= 2:
with st.spinner("Get Content Plan..."):
try:
#plan_content = ai_agents_content_planner(plan_keywords)
st.success(f"Coming soon: Content plan for: {plan_keywords}")
#st.markdown(plan_content)
except Exception as err:
st.error(f"Failed to generate content plan: {err}")
else:
st.error("🚫 Single keywords are just too vague. Try again.")
elif selected_team == "AI Content Creation Team":
content_agents()
def content_agents():
st.markdown("AI Agents Team for Content Writing")
content_keywords = st.text_input(
"Enter Main Domain Keywords of your business:",
placeholder="Better keywords, Better content. Get keywords from Google search",
help="These keywords define your main business sector, blogging niche, Industry, domain etc"
)
if st.button("Start Writing"):
if content_keywords and len(content_keywords.split()) >= 2:
with st.spinner("Generating Content..."):
try:
#calendar_content = ai_agents_writers(content_keywords)
st.success(f"🚫 Not implemented yet: {content_keywords}")
#st.markdown(calendar_content)
except Exception as err:
st.error(f"🚫 Failed to generate content with AI Agents: {err}")
else:
st.error("🚫 Single keywords are just too vague. Try again.")
def essay_writer():
st.title("AI Essay Writer 📝")
st.write("Select your essay type, education level, and desired length, then let AI generate an essay for you. ✨")
# Define essay types and education levels
essay_types = [
"📖 Argumentative - Forming an opinion via research. Building an evidence-based argument.",
"📚 Expository - Knowledge of a topic. Communicating information clearly.",
"✒️ Narrative - Creative language use. Presenting a compelling narrative.",
"🎨 Descriptive - Creative language use. Describing sensory details."
]
education_levels = [
"🏫 Primary School",
"🏫 High School",
"🎓 College",
"🎓 Graduate School"
]
# Define the options for number of pages
num_pages_options = [
"📄 Short Form (1-2 pages)",
"📄📄 Medium Form (3-5 pages)",
"📄📄📄 Long Form (6+ pages)"
]
# Create columns for input fields
col1, col2 = st.columns(2)
with col1:
# Ask the user for the title of the essay
essay_title = st.text_input("📝 Essay Title", placeholder="Enter the title of your essay", help="Provide a clear and concise title for your essay.")
# Ask the user for type of essay
selected_essay_type = st.selectbox("📚 Type of Essay", options=essay_types, help="Choose the type of essay you want to write.")
with col2:
# Ask the user for level of education
selected_education_level = st.selectbox("🎓 Level of Education", options=education_levels, help="Choose your level of education.")
# Ask the user for number of pages
selected_num_pages = st.selectbox("📄 Number of Pages", options=num_pages_options, help="Select the length of your essay.")
if st.button("🚀 Generate Essay"):
if essay_title:
st.success("Generating your essay... ✨")
ai_essay_generator(essay_title, selected_essay_type, selected_education_level, selected_num_pages)
else:
st.error("Please enter a valid title for your essay. 🚫")
def ai_news_writer():
""" AI News Writer """
st.markdown("<h1>📰 AI News Writer 🗞️ </h1>", unsafe_allow_html=True)
# Input for news keywords
news_keywords = st.text_input(
"**🔑 Enter Keywords from News Headlines:**",
placeholder="Describe the News article in 3-5 words. Enter main keywords describing the News Event:",
help="Enter at least two words for better results."
)
if news_keywords and len(news_keywords.split()) < 2:
st.error("🚫 News keywords should be at least two words long. Least, you can do..")
# Selectbox for country and language
countries = [
("es", "Spain"),
("vn", "Vietnam"),
("pk", "Pakistan"),
("in", "India"),
("de", "Germany"),
("cn", "China")
]
languages = [
("en", "English"),
("es", "Spanish"),
("vi", "Vietnamese"),
("ar", "Arabic"),
("hi", "Hindi"),
("de", "German"),
("zh-cn", "Chinese")
]
col1, col2 = st.columns(2)
with col1:
news_country = st.selectbox("**🌍 Select Origin Country of News Event:**",
countries, format_func=lambda x: x[1], help="Which country did the NEWS originate from ?")
with col2:
news_language = st.selectbox("**🗣️ Select News Article Language to Search For:**",
languages, format_func=lambda x: x[1], help="Language to output News Article in ?")
if st.button("📰 Generate News Report"):
if news_keywords and len(news_keywords.split()) >= 2:
with st.spinner("Generating News Report... ⏳"):
try:
news_report = ai_news_generation(news_keywords, news_country, news_language)
st.success(f"Successfully generated news report on: {news_keywords} 🎉")
st.markdown(news_report)
except Exception as err:
st.error(f"Failed to generate news report: {err}")
else:
st.error("Please enter valid keywords for the news report. 🚫")
def ai_finance_ta_writer():
st.markdown("<div class='sub-header'>AI Financial Technical Analysis Writer</div>", unsafe_allow_html=True)
ticker_symbol = st.text_input(
"Enter Ticker Symbol for TA:",
placeholder="Enter a valid Ticker Symbol (Examples: IBM, BABA, HDFCBANK.NS, TATAMOTORS.NS etc)",
help="Be sure of the ticker symbol. Double-check it! Examples: IBM, BABA, HDFCBANK.NS, TATAMOTORS.NS"
)
if st.button("Generate TA Report"):
if ticker_symbol:
with st.spinner("Generating TA Report..."):
try:
# Get dashboard instance and generate technical analysis
dashboard = get_dashboard()
ta_report = dashboard.generate_technical_analysis(ticker_symbol)
st.success(f"Successfully generated TA report for: {ticker_symbol}")
st.markdown(ta_report)
except Exception as err:
st.error(f"🚫 Check ticker symbol: Failed to write Financial Technical Analysis. Error: {err}")
else:
st.error("🚫 Provide a valid Ticker Symbol. Don't waste my time.")
def ai_social_writer():
# Define social media platforms as radio buttons
social_media_options = [
("facebook", "Facebook"),
("linkedin", "LinkedIn"),
("twitter", "Twitter"),
("instagram", "Instagram"),
("youtube", "YouTube")
]
# Selectbox for choosing a platform
selected_platform = st.radio("Choose a Social Media Platform:", social_media_options, format_func=lambda x: x[1])
if "facebook" in selected_platform:
facebook_main_menu()
elif "linkedin" in selected_platform:
linkedin_main_menu()
elif "twitter" in selected_platform:
run_dashboard()
elif "instagram" in selected_platform:
insta_writer()
elif "youtube" in selected_platform:
youtube_main_menu()

View File

@@ -0,0 +1,54 @@
"""API Key Manager package for ALwrity."""
from .manager import APIKeyManager
from .api_key_manager import render, check_onboarding_completion, get_onboarding_status, reset_onboarding
from .onboarding_progress import (
OnboardingProgress,
get_onboarding_progress,
render_progress_indicator,
render_resume_message,
StepStatus,
StepData
)
from .validation import check_all_api_keys
from .components.base import (
render_step_indicator,
render_navigation_buttons,
render_step_validation,
render_resume_options
)
# Export all public components
__all__ = [
# Main classes
'APIKeyManager',
'OnboardingProgress',
'StepStatus',
'StepData',
# Main functions
'render',
'check_onboarding_completion',
'get_onboarding_status',
'reset_onboarding',
'get_onboarding_progress',
# UI components
'render_progress_indicator',
'render_resume_message',
'render_step_indicator',
'render_navigation_buttons',
'render_step_validation',
'render_resume_options',
# Validation
'check_all_api_keys'
]
# Version information
__version__ = "2.0.0"
__author__ = "ALwrity Team"
__description__ = "Comprehensive API key management and onboarding system for ALwrity"
# Note: FastAPI endpoints have been moved to the backend/ directory
# for better separation of concerns and enterprise architecture.

View File

@@ -0,0 +1,42 @@
"""AI research functionality for API key manager."""
from loguru import logger
import asyncio
from typing import Dict, Any, Optional
async def research_topic(topic: str, api_keys: Dict[str, str]) -> Dict[str, Any]:
"""
Research a topic using available AI services.
Args:
topic (str): The topic to research
api_keys (Dict[str, str]): Dictionary of API keys for different services
Returns:
Dict[str, Any]: Research results and metadata
"""
try:
logger.info(f"Starting research on topic: {topic}")
# TODO: Implement actual research functionality using available API keys
# This is a placeholder implementation
results = {
"topic": topic,
"status": "success",
"data": {
"summary": f"Research summary for {topic}",
"key_points": ["Point 1", "Point 2", "Point 3"],
"sources": ["Source 1", "Source 2"]
}
}
logger.info("Research completed successfully")
return results
except Exception as e:
logger.error(f"Error during research: {str(e)}")
return {
"topic": topic,
"status": "error",
"error": str(e)
}

View File

@@ -0,0 +1,178 @@
# ALwrity Setup Components Guide
## Overview
The ALwrity Setup Components are the building blocks that guide you through setting up your content creation environment. Each component is designed to help you configure specific aspects of ALwrity for optimal content creation.
## Core Components
### 1. Website Setup (`website_setup.py`)
**Purpose**: Configure your website's basic information and analyze its current state
**Features**:
- **URL Configuration**: Set up your website's URL
- **Analysis Options**:
- Basic Analysis: Quick overview of your website
- Full Analysis with SEO: Comprehensive website and SEO analysis
- **Analysis Results**:
- Basic Metrics: Status, content type, title, meta description
- Content Analysis: Word count, headings, images, links
- SEO Analysis: SEO score, meta tags, content quality
- Technical SEO: Mobile friendliness, page speed, technical issues
- Strategy Recommendations: Actionable improvements
### 2. AI Research Setup (`ai_research_setup.py`)
**Purpose**: Configure AI-powered research tools for content creation
**Features**:
- **Traditional Search**:
- SerpAPI integration for real-time search results
- Access to structured data and knowledge graphs
- News articles and related questions
- **AI Deep Research**:
- Tavily AI for semantic understanding
- Metaphor/Exa for neural search capabilities
- Advanced research features
### 3. AI Providers (`ai_providers.py`)
**Purpose**: Set up your preferred AI content generation services
**Supported Providers**:
- **OpenAI (GPT models)**
- Advanced language models
- Creative content generation
- Context-aware responses
- **Google (Gemini Pro)**
- Balanced content creation
- Factual accuracy
- Multilingual support
- **Anthropic (Claude)**
- Professional writing
- Detailed analysis
- Ethical considerations
- **DeepSeek**
- Technical content
- Specialized knowledge
- Efficient processing
### 4. Personalization Setup (`personalization_setup.py`)
**Purpose**: Customize your content creation experience
**Features**:
- **Writing Style**:
- Tone preferences
- Voice settings
- Content structure
- **Brand Configuration**:
- Brand voice
- Style guidelines
- Content templates
### 5. ALwrity Integrations (`alwrity_integrations.py`)
**Purpose**: Connect additional tools and services
**Features**:
- **Third-party Services**:
- Analytics integration
- Social media tools
- Content management systems
- **Workflow Automation**:
- Publishing tools
- Content scheduling
- Distribution channels
### 6. Final Setup (`final_setup.py`)
**Purpose**: Complete and verify your configuration
**Features**:
- **Configuration Review**:
- Settings verification
- Connection testing
- Setup completion
- **Validation**:
- API key verification
- Service connectivity
- System readiness
## Base Components
### 1. Navigation (`base.py`)
**Purpose**: Provide consistent navigation throughout the setup process
**Features**:
- Step indicators
- Navigation buttons
- Progress tracking
- Back/forward controls
## How to Use the Components
### 1. Starting the Setup
1. Launch ALwrity
2. Navigate to the Setup section
3. Follow the guided wizard process
### 2. Component Navigation
- Use the step indicator to track progress
- Navigate between components using buttons
- Save progress automatically
- Return to previous steps if needed
### 3. Configuration Process
1. **Enter Information**: Fill in required details
2. **Verify Settings**: Review your inputs
3. **Test Connections**: Ensure everything works
4. **Complete Setup**: Finalize your configuration
## Best Practices
### 1. Before Setup
- Gather all necessary API keys
- Review provider documentation
- Plan your configuration
- Backup existing settings
### 2. During Setup
- Follow the wizard steps
- Verify each configuration
- Test connections
- Save progress regularly
### 3. After Setup
- Review all settings
- Test functionality
- Document configurations
- Monitor usage
## Troubleshooting
### 1. Common Issues
- Invalid API keys
- Connection problems
- Configuration errors
- Setup interruptions
### 2. Solutions
- Key verification
- Connection testing
- Error logging
- Support resources
## Need Help?
If you encounter any issues during setup:
1. Check the error messages
2. Review the documentation
3. Verify your API keys
4. Contact ALwrity support
---
*Note: Each component is designed to help you set up a specific aspect of ALwrity. Follow the setup wizard in order to ensure all components are properly configured for optimal content creation.*

View File

@@ -0,0 +1,22 @@
"""API key manager components package."""
from .ai_research_setup import render_ai_research_setup
from .ai_research import render_ai_research
from .ai_providers import render_ai_providers
from .final_setup import render_final_setup
from .personalization_setup import render_personalization_setup
from .alwrity_integrations import render_alwrity_integrations
from .base import render_navigation_buttons, render_step_indicator
from .website_setup import render_website_setup
__all__ = [
'render_ai_research_setup',
'render_ai_research',
'render_ai_providers',
'render_final_setup',
'render_personalization_setup',
'render_alwrity_integrations',
'render_navigation_buttons',
'render_step_indicator',
'render_website_setup'
]

View File

@@ -0,0 +1,137 @@
"""AI Research setup component."""
import streamlit as st
from typing import Dict, Any
from loguru import logger
from ..manager import APIKeyManager
from .base import render_navigation_buttons, render_step_indicator
def render_ai_research(api_key_manager: APIKeyManager) -> Dict[str, Any]:
"""Render the AI Research setup step."""
try:
st.markdown("""
<div class='setup-header'>
<h2>🔍 AI Research Configuration</h2>
<p>Configure your research preferences and provide user information</p>
</div>
""", unsafe_allow_html=True)
# Create tabs for different sections
tabs = st.tabs(["User Information", "Research Preferences"])
changes_made = False
has_valid_info = False
validation_message = ""
with tabs[0]:
st.markdown("### User Information")
st.markdown("Please provide your details for personalized research experience")
# User Information Card
with st.container():
st.markdown("""
<div class="user-info-card">
<div class="user-info-header">
<div class="user-info-icon">👤</div>
<div class="user-info-title">Personal Details</div>
</div>
<div class="user-info-content">
<p>Your information helps us customize the research experience.</p>
</div>
</div>
""", unsafe_allow_html=True)
# User Input Fields with Streamlit Components
full_name = st.text_input("Full Name", key="full_name",
help="Enter your full name as you'd like it to appear")
email = st.text_input("Email Address", key="email",
help="Enter your business email address")
company = st.text_input("Company/Organization", key="company",
help="Enter your company or organization name")
role = st.selectbox("Role",
["Content Creator", "Marketing Manager", "Business Owner", "Other"],
help="Select your primary role")
with tabs[1]:
st.markdown("### Research Preferences")
st.markdown("Configure how AI assists with your research")
# Research Preferences Card
with st.container():
st.markdown("""
<div class="research-prefs-card">
<div class="research-prefs-header">
<div class="research-prefs-icon">🎯</div>
<div class="research-prefs-title">Research Settings</div>
</div>
</div>
""", unsafe_allow_html=True)
# Research Preferences Settings
research_depth = st.select_slider(
"Research Depth",
options=["Basic", "Standard", "Deep", "Comprehensive"],
value="Standard",
help="Choose how detailed you want the AI research to be"
)
st.markdown("#### Content Types")
content_types = st.multiselect(
"Select content types to focus on",
["Blog Posts", "Social Media", "Technical Articles", "News", "Academic Papers"],
default=["Blog Posts", "Social Media"],
help="Choose what types of content you want to research"
)
auto_research = st.toggle(
"Enable Automated Research",
help="Automatically start research when content topics are added"
)
# Validate inputs
if all([full_name, email, company]):
changes_made = True
has_valid_info = True
validation_message = "✅ User information completed successfully"
else:
validation_message = "⚠️ Please fill in all required fields to continue"
# Display validation message
if validation_message:
if "" in validation_message:
st.success(validation_message)
else:
st.warning(validation_message)
# Navigation buttons
if render_navigation_buttons(3, 6, changes_made):
if has_valid_info:
# Store user information in session state
st.session_state['user_info'] = {
'full_name': full_name,
'email': email,
'company': company,
'role': role,
'research_preferences': {
'depth': research_depth,
'content_types': content_types,
'auto_research': auto_research
}
}
# Update progress and move to next step
st.session_state['current_step'] = 4
st.rerun()
else:
st.error("Please complete all required fields to continue")
return {"current_step": 3, "changes_made": changes_made}
except Exception as e:
error_msg = f"Error in AI research setup: {str(e)}"
logger.error(f"[render_ai_research] {error_msg}")
st.error(error_msg)
return {"current_step": 3, "error": error_msg}

View File

@@ -0,0 +1,188 @@
"""Personalization setup component."""
import streamlit as st
from typing import Dict, Any
from loguru import logger
from ..manager import APIKeyManager
from .base import render_navigation_buttons, render_step_indicator
def render_personalization(api_key_manager: APIKeyManager) -> Dict[str, Any]:
"""Render the personalization setup step."""
try:
st.markdown("""
<div class='setup-header'>
<h2>🎨 Personalization Settings</h2>
<p>Customize your content generation experience</p>
</div>
""", unsafe_allow_html=True)
# Create tabs for different sections
tabs = st.tabs(["Content Style", "Brand Voice", "Advanced Settings"])
changes_made = False
has_valid_settings = False
validation_message = ""
with tabs[0]:
st.markdown("### Content Style")
st.markdown("Define your preferred content style and tone")
# Content Style Card
with st.container():
st.markdown("""
<div class="style-card">
<div class="style-header">
<div class="style-icon">✨</div>
<div class="style-title">Writing Style</div>
</div>
<div class="style-content">
<p>Choose how you want your content to be written.</p>
</div>
</div>
""", unsafe_allow_html=True)
# Style Settings
writing_style = st.selectbox(
"Writing Style",
["Professional", "Casual", "Technical", "Conversational", "Academic"],
help="Select your preferred writing style"
)
tone = st.select_slider(
"Content Tone",
options=["Formal", "Semi-Formal", "Neutral", "Friendly", "Humorous"],
value="Neutral",
help="Choose the tone for your content"
)
content_length = st.select_slider(
"Content Length",
options=["Concise", "Standard", "Detailed", "Comprehensive"],
value="Standard",
help="Select your preferred content length"
)
with tabs[1]:
st.markdown("### Brand Voice")
st.markdown("Configure your brand's unique voice and personality")
# Brand Voice Card
with st.container():
st.markdown("""
<div class="brand-card">
<div class="brand-header">
<div class="brand-icon">🎯</div>
<div class="brand-title">Brand Identity</div>
</div>
<div class="brand-content">
<p>Define your brand's personality and voice.</p>
</div>
</div>
""", unsafe_allow_html=True)
# Brand Settings
brand_personality = st.multiselect(
"Brand Personality Traits",
["Professional", "Innovative", "Friendly", "Trustworthy", "Creative", "Expert"],
default=["Professional", "Trustworthy"],
help="Select traits that best describe your brand"
)
brand_voice = st.text_area(
"Brand Voice Description",
help="Describe how your brand should sound in content"
)
keywords = st.text_input(
"Brand Keywords",
help="Enter key terms that should be used in your content"
)
with tabs[2]:
st.markdown("### Advanced Settings")
st.markdown("Fine-tune your content generation preferences")
# Advanced Settings Card
with st.container():
st.markdown("""
<div class="advanced-card">
<div class="advanced-header">
<div class="advanced-icon">⚙️</div>
<div class="advanced-title">Advanced Options</div>
</div>
<div class="advanced-content">
<p>Configure advanced content generation settings.</p>
</div>
</div>
""", unsafe_allow_html=True)
# Advanced Settings
seo_optimization = st.toggle(
"Enable SEO Optimization",
help="Automatically optimize content for search engines"
)
readability_level = st.select_slider(
"Readability Level",
options=["Simple", "Standard", "Advanced", "Expert"],
value="Standard",
help="Choose the complexity level of your content"
)
content_structure = st.multiselect(
"Content Structure",
["Introduction", "Key Points", "Examples", "Conclusion", "Call-to-Action"],
default=["Introduction", "Key Points", "Conclusion"],
help="Select required content sections"
)
# Validate settings
if all([writing_style, tone, content_length, brand_personality]):
changes_made = True
has_valid_settings = True
validation_message = "✅ Personalization settings completed successfully"
else:
validation_message = "⚠️ Please complete all required settings to continue"
# Display validation message
if validation_message:
if "" in validation_message:
st.success(validation_message)
else:
st.warning(validation_message)
# Navigation buttons
if render_navigation_buttons(4, 6, changes_made):
if has_valid_settings:
# Store personalization settings in session state
st.session_state['personalization'] = {
'content_style': {
'writing_style': writing_style,
'tone': tone,
'content_length': content_length
},
'brand_voice': {
'personality': brand_personality,
'voice_description': brand_voice,
'keywords': keywords
},
'advanced_settings': {
'seo_optimization': seo_optimization,
'readability_level': readability_level,
'content_structure': content_structure
}
}
# Update progress and move to next step
st.session_state['current_step'] = 5
st.rerun()
else:
st.error("Please complete all required settings to continue")
return {"current_step": 4, "changes_made": changes_made}
except Exception as e:
error_msg = f"Error in personalization setup: {str(e)}"
logger.error(f"[render_personalization] {error_msg}")
st.error(error_msg)
return {"current_step": 4, "error": error_msg}

View File

@@ -0,0 +1,79 @@
import streamlit as st
from lib.alwrity_ui.similar_analysis import competitor_analysis
from lib.alwrity_ui.keyword_web_researcher import do_web_research
def content_planning_tools():
# A custom CSS for compact layout
st.markdown("""
<style>
/* Reduce top padding of main container */
.main .block-container {
padding-top: 0rem !important;
padding-bottom: 1rem !important;
}
/* Reduce spacing between elements */
.stTabs {
margin-top: 0.5rem !important;
}
/* Make markdown text more compact */
.element-container {
margin-bottom: 0.5rem !important;
}
/* Adjust subheader margins */
.stMarkdown h3 {
margin-top: 0 !important;
margin-bottom: 0.5rem !important;
}
</style>
""", unsafe_allow_html=True)
# Make description more compact using a smaller font
st.markdown("""
<div style='font-size: 0.9em; margin-bottom: 0.5rem;'>
<strong>Alwrity content Ideation & Planning</strong>: Provide few keywords to do comprehensive web research.
Provide few keywords to get Google, Neural, pytrends analysis. Know keywords, blog titles to target.
Generate months long content calendar around given keywords.
</div>
""", unsafe_allow_html=True)
# Create tabs with reduced spacing
tab_keywords, tab_competitor, tab_calendar = st.tabs([
"🔍 Keywords Researcher",
"📊 Competitor Analysis",
"📅 Content Calendar Ideator"
])
# Keywords Researcher tab
with tab_keywords:
do_web_research()
# Competitor Analysis tab
with tab_competitor:
competitor_analysis()
# Content Calendar Ideator tab
with tab_calendar:
st.info("🚧 **Content Calendar & Planning Dashboard**")
st.markdown("""
<div style='background-color: #f0f2f6; padding: 15px; border-radius: 5px; margin-bottom: 20px;'>
<h3 style='margin-top: 0;'>📅 Content Calendar & Planning Dashboard</h3>
<p>The Content Calendar Dashboard provides:</p>
<ul>
<li>AI-powered content planning and generation</li>
<li>Multi-platform content scheduling</li>
<li>Content optimization tools</li>
<li>A/B testing capabilities</li>
<li>Performance analytics</li>
</ul>
</div>
""", unsafe_allow_html=True)
# Initialize and render the dashboard directly
from lib.ai_seo_tools.content_calendar.ui.dashboard import ContentCalendarDashboard
dashboard = ContentCalendarDashboard()
dashboard.render()

View File

@@ -0,0 +1,113 @@
import os
import sys
import datetime
import subprocess
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from PIL import Image
from selenium import webdriver
from PIL import Image
import shutil
from screenshotone import Client, TakeOptions
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(Path('../.env'))
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
def screenshot_api(url, generated_image_filepath):
""" Use screenshotone API to take company webpage screenshots """
try:
# create API client
client = Client(os.getenv('SCREENSHOTONE_ACCESS_KEY'), os.getenv('SCREENSHOTONE_SECRET_KEY'))
# set up options
options = (TakeOptions.url(url)
.format("png")
.viewport_width(1024)
.viewport_height(768)
.block_cookie_banners(True)
.block_chats(True))
# generate the screenshot URL and share it with a user
#url = client.generate_take_url(options)
# or render a screenshot and download the image as stream
image = client.take(options)
# store the screenshot the example.png file
with open(generated_image_filepath, 'wb', encoding="utf-8") as result_file:
shutil.copyfileobj(image, result_file)
# Display the screenshot using Image.show
image = Image.open(generated_image_filepath)
image.show()
# Wait for 2 seconds (adjust the delay as needed)
sleep(2)
# Close the image window
image.close()
except Exception as err:
print(f"Failed in screenshotone api: {err}")
generated_image_filepath = take_screenshot(url, generated_image_filepath)
return generated_image_filepath
def take_screenshot(url, generated_image_filepath):
# Create a webdriver instance in headless mode
options = webdriver.ChromeOptions()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)
logger.debug(f"Taking screenshot of url: {url}")
try:
# Navigate to the given url
driver.get(url)
# Optionally, increase the delay to ensure all content is loaded
sleep(2)
# Explicitly wait for the page to load (adjust timeout as needed)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
# Set a larger window size
driver.set_window_size(1200, 800)
# Take a screenshot of the webpage
screenshot = driver.get_screenshot_as_png()
# Save the screenshot to a file
with open(generated_image_filepath, "wb", encoding="utf-8") as f:
f.write(screenshot)
# Display the screenshot using Image.show
image = Image.open(generated_image_filepath)
image.show()
# Wait for 2 seconds (adjust the delay as needed)
sleep(2)
# Close the image window using subprocess (platform-dependent)
subprocess.run(["pkill", "-f", "display"]) # Adjust based on your platform and viewer
# If using macOS, you can use the following:
# subprocess.run(["osascript", "-e", 'tell application "Preview" to close every window'])
# If using Windows, you can use the following:
# subprocess.run(["taskkill", "/F", "/IM", "Microsoft.Photos.exe"])
logger.debug(f"Screenshot successfully stored at: {generated_image_filepath}")
return generated_image_filepath
finally:
# Close the webdriver instance
driver.quit()

View File

@@ -0,0 +1,310 @@
"""Test configuration settings page for ALwrity."""
import streamlit as st
from loguru import logger
import asyncio
from lib.web_crawlers.async_web_crawler import AsyncWebCrawlerService
from pages.style_utils import (
get_test_config_styles,
get_glass_container,
get_info_section,
get_example_box,
get_analysis_section,
get_style_guide_html
)
import sys
from lib.personalization.style_analyzer import StyleAnalyzer
# Set page config - must be the first Streamlit command
st.set_page_config(
layout="wide",
initial_sidebar_state="collapsed",
menu_items={
'Get Help': None,
'Report a bug': None,
'About': None
}
)
import yaml
from pathlib import Path
import os
from loguru import logger
from lib.utils.read_main_config_params import get_personalization_settings
from lib.web_crawlers.crawl4ai_web_crawler import analyze_style
# Configure logger
logger.remove() # Remove default handler
logger.add(
"logs/test_config_settings.log",
rotation="500 MB",
retention="10 days",
level="DEBUG",
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}",
backtrace=True,
diagnose=True
)
logger.add(
sys.stdout,
level="INFO",
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{message}</cyan>"
)
# Apply CSS styles
st.markdown(get_test_config_styles(), unsafe_allow_html=True)
def load_website_url():
"""Load website URL from config file."""
try:
logger.debug("Loading website URL from config file")
config_path = Path(os.environ["ALWRITY_CONFIG"])
config = yaml.safe_load(config_path.read_text())
url = config.get('website', {}).get('url', '')
logger.info(f"Loaded website URL: {url}")
return url
except Exception as e:
logger.error(f"Error loading website URL: {str(e)}", exc_info=True)
return ''
def display_style_analysis(analysis_results: dict):
"""Display the style analysis results in a structured format."""
try:
# Writing Style Section
st.markdown("### 🎨 Writing Style Analysis")
writing_style = analysis_results.get("writing_style", {})
writing_style_content = f"""
<ul>
<li><strong>Tone:</strong> {writing_style.get("tone", "N/A")}</li>
<li><strong>Voice:</strong> {writing_style.get("voice", "N/A")}</li>
<li><strong>Complexity:</strong> {writing_style.get("complexity", "N/A")}</li>
<li><strong>Engagement Level:</strong> {writing_style.get("engagement_level", "N/A")}</li>
</ul>
"""
st.markdown(get_analysis_section("Writing Style", writing_style_content), unsafe_allow_html=True)
# Content Characteristics Section
content_chars = analysis_results.get("content_characteristics", {})
content_chars_content = f"""
<ul>
<li><strong>Sentence Structure:</strong> {content_chars.get("sentence_structure", "N/A")}</li>
<li><strong>Vocabulary Level:</strong> {content_chars.get("vocabulary_level", "N/A")}</li>
<li><strong>Paragraph Organization:</strong> {content_chars.get("paragraph_organization", "N/A")}</li>
<li><strong>Content Flow:</strong> {content_chars.get("content_flow", "N/A")}</li>
</ul>
"""
st.markdown(get_analysis_section("Content Characteristics", content_chars_content), unsafe_allow_html=True)
# Target Audience Section
target_audience = analysis_results.get("target_audience", {})
target_audience_content = f"""
<ul>
<li><strong>Demographics:</strong> {', '.join(target_audience.get("demographics", ["N/A"]))}</li>
<li><strong>Expertise Level:</strong> {target_audience.get("expertise_level", "N/A")}</li>
<li><strong>Industry Focus:</strong> {target_audience.get("industry_focus", "N/A")}</li>
<li><strong>Geographic Focus:</strong> {target_audience.get("geographic_focus", "N/A")}</li>
</ul>
"""
st.markdown(get_analysis_section("Target Audience", target_audience_content), unsafe_allow_html=True)
# Content Type Section
content_type = analysis_results.get("content_type", {})
content_type_content = f"""
<ul>
<li><strong>Primary Type:</strong> {content_type.get("primary_type", "N/A")}</li>
<li><strong>Secondary Types:</strong> {', '.join(content_type.get("secondary_types", ["N/A"]))}</li>
<li><strong>Purpose:</strong> {content_type.get("purpose", "N/A")}</li>
<li><strong>Call to Action:</strong> {content_type.get("call_to_action", "N/A")}</li>
</ul>
"""
st.markdown(get_analysis_section("Content Type", content_type_content), unsafe_allow_html=True)
# Recommended Settings Section
recommended = analysis_results.get("recommended_settings", {})
recommended_content = f"""
<ul>
<li><strong>Writing Tone:</strong> {recommended.get("writing_tone", "N/A")}</li>
<li><strong>Target Audience:</strong> {recommended.get("target_audience", "N/A")}</li>
<li><strong>Content Type:</strong> {recommended.get("content_type", "N/A")}</li>
<li><strong>Creativity Level:</strong> {recommended.get("creativity_level", "N/A")}</li>
<li><strong>Geographic Location:</strong> {recommended.get("geographic_location", "N/A")}</li>
</ul>
"""
st.markdown(get_analysis_section("Recommended Settings", recommended_content), unsafe_allow_html=True)
except Exception as e:
logger.error(f"Error displaying style analysis: {str(e)}")
st.error(f"Error displaying analysis results: {str(e)}")
def render_test_config_settings():
"""Render the test configuration settings page."""
try:
logger.info("Starting to render test configuration settings")
# Add back button at the top
col1, col2 = st.columns([1, 3])
with col1:
if st.button("← Back to Personalization Setup"):
logger.info("User clicked back to personalization setup")
# Set session state for navigation
st.session_state.current_step = 4
st.session_state.next_step = "personalization_setup"
# Navigate back to the main page where personalization setup is rendered
st.switch_page("alwrity.py")
# Title and description
st.title("🎨 Find Your Style with ALwrity")
st.markdown(get_glass_container(
"<p>Enter a website URL or provide content samples to analyze your writing style and get personalized recommendations.</p>"
), unsafe_allow_html=True)
# Create two columns for the layout
col1, col2 = st.columns([2, 1])
with col1:
# Website URL input
st.markdown("### Website URL")
url = st.text_input(
"Enter your website URL",
placeholder="https://example.com",
help="Provide your website URL to analyze your content style. Leave empty if you want to provide written samples instead."
)
logger.debug(f"Website URL input value: {url}")
# Alternative: Written samples
if not url:
st.markdown("### Written Samples")
st.markdown(get_info_section("""
<p>No website URL? No problem! You can provide written samples of your content instead.</p>
<p>Share your best articles, blog posts, or any content that represents your writing style.</p>
"""), unsafe_allow_html=True)
samples = st.text_area(
"Paste your content samples here",
help="Paste 2-3 samples of your best content. This helps ALwrity understand your writing style."
)
logger.debug(f"Sample text length: {len(samples) if samples else 0}")
st.markdown('</div>', unsafe_allow_html=True)
# ALwrity Style button
st.markdown("<div style='height: 20px'></div>", unsafe_allow_html=True)
if st.button("🎨 ALwrity Style", use_container_width=True):
if url:
with st.status("Starting style analysis...", expanded=True) as status:
try:
logger.info(f"Starting style analysis for URL: {url}")
# Step 1: Initialize crawler
status.update(label="Step 1/4: Initializing web crawler...", state="running")
crawler_service = AsyncWebCrawlerService()
# Step 2: Crawl website
status.update(label="Step 2/4: Crawling website content...", state="running")
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
result = loop.run_until_complete(crawler_service.crawl_website(url))
loop.close()
if result.get('success', False):
content = result.get('content', {})
# Step 3: Initialize style analyzer
status.update(label="Step 3/4: Analyzing content style...", state="running")
style_analyzer = StyleAnalyzer()
# Step 4: Perform style analysis
status.update(label="Step 4/4: Generating style recommendations...", state="running")
style_analysis = style_analyzer.analyze_content_style(content)
if style_analysis.get('error'):
status.update(label="Analysis failed", state="error")
st.error(f"Style analysis failed: {style_analysis['error']}")
else:
status.update(label="Analysis complete!", state="complete")
# Display style analysis results
display_style_analysis(style_analysis)
# Display original content in tabs
tab1, tab2, tab3 = st.tabs(["Content", "Metadata", "Links"])
with tab1:
st.markdown("### Main Content")
st.markdown(content.get('main_content', 'No content found'))
with tab2:
st.markdown("### Metadata")
st.markdown(f"""
**Title:** {content.get('title', 'No title found')}
**Description:** {content.get('description', 'No description found')}
**Meta Tags:**
{content.get('meta_tags', {})}
""")
with tab3:
st.markdown("### Links")
for link in content.get('links', []):
st.markdown(f"- [{link.get('text', '')}]({link.get('href', '')})")
else:
status.update(label="Crawling failed", state="error")
st.error(f"Failed to analyze website: {result.get('error', 'Unknown error')}")
except Exception as e:
logger.error(f"Error during style analysis: {str(e)}")
st.error(f"Analysis failed: {str(e)}")
elif samples:
with st.spinner("Analyzing content samples..."):
try:
# TODO: Implement sample text analysis
st.info("Sample text analysis coming soon!")
except Exception as e:
logger.error(f"Error analyzing samples: {str(e)}")
st.error(f"Analysis failed: {str(e)}")
else:
st.warning("Please provide either a website URL or content samples")
with col2:
st.markdown("""
### How ALwrity Discovers Your Style
**AI-Powered Style Analysis**
ALwrity AI analyzes your existing content to understand your unique writing style and preferences. This helps us generate content that matches your voice perfectly.
**Step 1: Content Analysis**
We'll analyze your website content or written samples to understand:
- Writing tone and voice
- Vocabulary and language style
- Content structure and formatting
- Target audience and engagement style
**Step 2: Style Recommendations**
Based on the analysis, we'll provide:
- Personalized writing guidelines
- Content structure templates
- Tone and voice recommendations
- Audience engagement strategies
**Step 3: Content Generation**
Finally, we'll use these insights to:
- Generate content that matches your style
- Maintain consistency across all content
- Optimize for your target audience
- Ensure brand voice alignment
""")
except Exception as e:
logger.error(f"Error in render_test_config_settings: {str(e)}")
st.error(f"An error occurred: {str(e)}")
if __name__ == "__main__":
logger.info("Starting test config settings page")
render_test_config_settings()
logger.info("Test config settings page rendered successfully")

View File

@@ -0,0 +1,23 @@
import streamlit as st
from streamlit_mic_recorder import speech_to_text
def record_voice(language="en"):
# https://github.com/B4PT0R/streamlit-mic-recorder?tab=readme-ov-file#example
state = st.session_state
if "text_received" not in state:
state.text_received = []
text = speech_to_text(
start_prompt="🎙Press & Speak🔊",
stop_prompt="🔇Stop Recording🚨",
language=language,
use_container_width=True,
just_once=False,
)
if text:
state.text_received.append(text)
result = ""
for text in state.text_received:
result += text
state.text_received = []
return result if result else None

View File

@@ -0,0 +1,181 @@
# Website Analyzer Module
A comprehensive website analysis toolkit that provides detailed insights into website performance, SEO metrics, and content quality. This module combines traditional web analysis techniques with AI-powered content evaluation to deliver actionable recommendations.
## Features
### 1. Comprehensive Website Analysis
- Basic website information extraction
- SSL/TLS certificate validation
- DNS record analysis
- WHOIS information retrieval
- Content analysis and structure evaluation
- Performance metrics assessment
### 2. Advanced SEO Analysis
- Meta tag optimization analysis
- Content quality evaluation
- Keyword density analysis
- Readability scoring
- Heading structure analysis
- AI-powered content recommendations
### 3. Technical Infrastructure
- Asynchronous web crawling
- Multi-threaded analysis
- Robust error handling
- Comprehensive logging
- Type-safe data models
## Module Structure
### 1. `analyzer.py`
The main analysis engine that provides comprehensive website analysis.
#### Key Components:
- `WebsiteAnalyzer` class
- URL validation
- Basic website information extraction
- SSL/TLS certificate checking
- DNS record analysis
- WHOIS information retrieval
- Content analysis
- Performance metrics assessment
#### Features:
- Concurrent analysis using ThreadPoolExecutor
- Robust error handling and logging
- User-agent simulation for reliable scraping
- Timeout handling for requests
- Comprehensive result formatting
### 2. `seo_analyzer.py`
Specialized SEO analysis module with AI integration.
#### Key Components:
- `extract_content()`: Fetches and parses webpage content
- `analyze_meta_tags()`: Evaluates meta tags and SEO elements
- `analyze_content_with_ai()`: AI-powered content analysis
- `analyze_seo()`: Main SEO analysis function
#### Features:
- Meta tag optimization analysis
- Content quality scoring
- Keyword density analysis
- Readability evaluation
- AI-powered recommendations
- Weighted scoring system
### 3. `models.py`
Data models for structured analysis results.
#### Key Components:
- `SEORecommendation`: Individual SEO recommendations
- `MetaTagAnalysis`: Meta tag analysis results
- `ContentAnalysis`: Content analysis metrics
- `SEOAnalysisResult`: Complete analysis results
#### Features:
- Type-safe data structures
- Clear data organization
- Easy serialization/deserialization
- Comprehensive documentation
## Usage Examples
### Basic Website Analysis
```python
from website_analyzer import analyze_website
# Analyze a website
results = analyze_website("https://example.com")
# Access analysis results
if results["success"]:
data = results["data"]
print(f"Domain: {data['domain']}")
print(f"SSL Info: {data['analysis']['ssl_info']}")
print(f"Content Info: {data['analysis']['content_info']}")
```
### SEO Analysis
```python
from website_analyzer.seo_analyzer import analyze_seo
# Perform SEO analysis
seo_results = analyze_seo("https://example.com", "your-openai-api-key")
# Access SEO results
if seo_results.success:
print(f"Overall Score: {seo_results.overall_score}")
print(f"Meta Tags: {seo_results.meta_tags}")
print(f"Content Analysis: {seo_results.content}")
print(f"Recommendations: {seo_results.recommendations}")
```
## Dependencies
- `requests`: HTTP requests
- `beautifulsoup4`: HTML parsing
- `python-whois`: WHOIS information
- `dnspython`: DNS record analysis
- `openai`: AI-powered analysis
- `loguru`: Logging
- `typing`: Type hints
- `dataclasses`: Data models
## Error Handling
The module implements comprehensive error handling:
- URL validation
- Request timeouts
- Connection errors
- Parsing errors
- API errors
- DNS resolution errors
- SSL/TLS errors
All errors are logged and returned in a structured format for easy handling.
## Logging
The module uses `loguru` for logging with the following features:
- File rotation (500 MB)
- 10-day retention
- Debug level logging
- Structured log format
- Both file and stdout output
## Best Practices
1. **API Key Management**
- Store API keys securely
- Use environment variables
- Implement rate limiting
2. **Error Handling**
- Always check success status
- Handle errors gracefully
- Log errors appropriately
3. **Performance**
- Use concurrent analysis
- Implement timeouts
- Cache results when possible
4. **Rate Limiting**
- Respect website robots.txt
- Implement delays between requests
- Use appropriate user agents
## Contributing
1. Fork the repository
2. Create a feature branch
3. Commit your changes
4. Push to the branch
5. Create a Pull Request
## License
This module is part of the ALwrity project and is licensed under the MIT License.

View File

@@ -0,0 +1,6 @@
"""Website analyzer module for AI-powered website analysis."""
from .analyzer import analyze_website, WebsiteAnalyzer
from .models import SEOAnalysisResult
__all__ = ['analyze_website', 'WebsiteAnalyzer', 'SEOAnalysisResult']

View File

@@ -0,0 +1,697 @@
"""Website and SEO analysis module."""
import asyncio
from typing import Dict, List, Optional, Tuple
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
import streamlit as st
import re
from loguru import logger
from ...web_crawlers.async_web_crawler import AsyncWebCrawlerService
from ...gpt_providers.text_generation.main_text_generation import llm_text_gen
import os
import sys
import logging
import json
from datetime import datetime
import requests
import ssl
import socket
import whois
import dns.resolver
from requests.exceptions import RequestException
from concurrent.futures import ThreadPoolExecutor
from .models import (
SEOAnalysisResult,
MetaTagAnalysis,
ContentAnalysis,
SEORecommendation
)
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler(),
logging.FileHandler('logs/website_analyzer.log')
]
)
# Create a logger for the website analyzer
logger = logging.getLogger(__name__)
# Create a separate logger for scraping operations
scraping_logger = logging.getLogger('website_analyzer.scraping')
scraping_logger.setLevel(logging.WARNING)
class WebsiteAnalyzer:
def __init__(self):
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
})
logger.info("WebsiteAnalyzer initialized")
def analyze_website(self, url: str) -> Dict:
"""
Perform comprehensive analysis of a website.
Args:
url (str): The URL to analyze
Returns:
Dict: Analysis results including various metrics and checks
"""
logger.info(f"Starting analysis for URL: {url}")
try:
# Validate URL
if not self._validate_url(url):
error_msg = f"Invalid URL format: {url}"
logger.error(error_msg)
return {
"success": False,
"error": error_msg,
"error_details": {"stage": "url_validation"}
}
# Basic URL parsing
parsed_url = urlparse(url)
domain = parsed_url.netloc
# Initialize results dictionary
results = {
"url": url,
"domain": domain,
"timestamp": datetime.now().isoformat(),
"analysis": {}
}
# Perform various analyses
with ThreadPoolExecutor(max_workers=4) as executor:
logger.info("Starting parallel analysis tasks")
# Basic website info
logger.info("Starting basic info analysis")
basic_info = executor.submit(self._get_basic_info, url).result()
if "error" in basic_info:
error_msg = f"Basic info analysis failed: {basic_info['error']}"
logger.error(error_msg)
return {
"success": False,
"error": error_msg,
"error_details": {
"stage": "basic_info",
"details": basic_info.get("error_details", {})
}
}
results["analysis"]["basic_info"] = basic_info
# SSL/TLS info
logger.info("Starting SSL analysis")
ssl_info = executor.submit(self._check_ssl, domain).result()
results["analysis"]["ssl_info"] = ssl_info
# DNS info
logger.info("Starting DNS analysis")
dns_info = executor.submit(self._check_dns, domain).result()
results["analysis"]["dns_info"] = dns_info
# WHOIS info
logger.info("Starting WHOIS analysis")
whois_info = executor.submit(self._get_whois_info, domain).result()
results["analysis"]["whois_info"] = whois_info
# Content analysis
logger.info("Starting content analysis")
content_info = executor.submit(self._analyze_content, url).result()
if "error" in content_info:
error_msg = f"Content analysis failed: {content_info['error']}"
logger.error(error_msg)
return {
"success": False,
"error": error_msg,
"error_details": {
"stage": "content_analysis",
"details": content_info.get("error_details", {})
}
}
results["analysis"]["content_info"] = content_info
# Performance metrics
logger.info("Starting performance analysis")
performance = executor.submit(self._check_performance, url).result()
if "error" in performance:
error_msg = f"Performance analysis failed: {performance['error']}"
logger.error(error_msg)
return {
"success": False,
"error": error_msg,
"error_details": {
"stage": "performance_analysis",
"details": performance.get("error_details", {})
}
}
results["analysis"]["performance"] = performance
# SEO analysis
logger.info("Starting SEO analysis")
seo_analysis = executor.submit(self._analyze_seo, url).result()
if "error" in seo_analysis:
error_msg = f"SEO analysis failed: {seo_analysis['error']}"
logger.error(error_msg)
return {
"success": False,
"error": error_msg,
"error_details": {
"stage": "seo_analysis",
"details": seo_analysis.get("error_details", {})
}
}
results["analysis"]["seo_info"] = seo_analysis
logger.info(f"Analysis completed successfully for {url}")
logger.debug(f"Final results: {json.dumps(results, indent=2)}")
return {
"success": True,
"data": results
}
except Exception as e:
error_msg = f"Error during website analysis: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
"success": False,
"error": error_msg,
"error_details": {
"type": type(e).__name__,
"traceback": str(e.__traceback__)
}
}
def _validate_url(self, url: str) -> bool:
"""Validate URL format."""
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except Exception as e:
logger.error(f"URL validation error: {str(e)}")
return False
def _get_basic_info(self, url: str) -> Dict:
"""Get basic website information."""
scraping_logger.debug(f"Getting basic info for {url}")
try:
response = self.session.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
return {
"status_code": response.status_code,
"content_type": response.headers.get('content-type', ''),
"title": soup.title.string if soup.title else '',
"meta_description": self._get_meta_description(soup),
"headers": dict(response.headers),
"robots_txt": self._get_robots_txt(url),
"sitemap": self._get_sitemap(url)
}
except requests.exceptions.RequestException as e:
error_msg = f"Request error in basic info: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
"error": error_msg,
"error_details": {
"type": "RequestException",
"status_code": getattr(e.response, 'status_code', None) if hasattr(e, 'response') else None,
"url": url
}
}
except Exception as e:
error_msg = f"Error getting basic info: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
"error": error_msg,
"error_details": {
"type": type(e).__name__,
"traceback": str(e.__traceback__)
}
}
def _check_ssl(self, domain: str) -> Dict:
"""Check SSL/TLS certificate information."""
scraping_logger.debug(f"Checking SSL for {domain}")
try:
context = ssl.create_default_context()
with socket.create_connection((domain, 443)) as sock:
with context.wrap_socket(sock, server_hostname=domain) as ssock:
cert = ssock.getpeercert()
return {
"has_ssl": True,
"issuer": dict(x[0] for x in cert['issuer']),
"expiry": datetime.strptime(cert['notAfter'], '%b %d %H:%M:%S %Y %Z').isoformat(),
"version": cert['version'],
"subject": dict(x[0] for x in cert['subject'])
}
except Exception as e:
logger.error(f"SSL check error: {str(e)}", exc_info=True)
return {"has_ssl": False, "error": str(e)}
def _check_dns(self, domain: str) -> Dict:
"""Check DNS records."""
scraping_logger.debug(f"Checking DNS for {domain}")
try:
records = {}
for record_type in ['A', 'AAAA', 'MX', 'NS', 'TXT']:
try:
answers = dns.resolver.resolve(domain, record_type)
records[record_type] = [str(rdata) for rdata in answers]
except dns.resolver.NoAnswer:
records[record_type] = []
except Exception as e:
scraping_logger.warning(f"Error resolving {record_type} record: {str(e)}")
records[record_type] = []
return records
except Exception as e:
logger.error(f"DNS check error: {str(e)}", exc_info=True)
return {"error": str(e)}
def _get_whois_info(self, domain: str) -> Dict:
"""Get WHOIS information for a domain."""
scraping_logger.debug(f"Getting WHOIS info for {domain}")
try:
w = whois.whois(domain)
def format_date(date_value):
if isinstance(date_value, list):
return date_value[0].isoformat() if date_value else 'Unknown'
return date_value.isoformat() if date_value else 'Unknown'
return {
'registrar': w.registrar if hasattr(w, 'registrar') else 'Unknown',
'creation_date': format_date(w.creation_date),
'expiration_date': format_date(w.expiration_date),
'updated_date': format_date(w.updated_date) if hasattr(w, 'updated_date') else 'Unknown',
'name_servers': w.name_servers if hasattr(w, 'name_servers') else [],
'domain_name': w.domain_name if hasattr(w, 'domain_name') else domain,
'text': w.text if hasattr(w, 'text') else ''
}
except Exception as e:
logger.error(f"WHOIS check error: {str(e)}")
return {
'registrar': 'Unknown',
'creation_date': 'Unknown',
'expiration_date': 'Unknown',
'updated_date': 'Unknown',
'name_servers': [],
'domain_name': domain,
'text': ''
}
def _analyze_content(self, url: str) -> Dict:
"""Analyze website content."""
scraping_logger.debug(f"Analyzing content for {url}")
try:
response = self.session.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Get all text content
text_content = soup.get_text()
# Count words
words = re.findall(r'\w+', text_content.lower())
word_count = len(words)
# Count headings
headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
heading_counts = {
'h1': len(soup.find_all('h1')),
'h2': len(soup.find_all('h2')),
'h3': len(soup.find_all('h3')),
'h4': len(soup.find_all('h4')),
'h5': len(soup.find_all('h5')),
'h6': len(soup.find_all('h6'))
}
# Count images
images = soup.find_all('img')
# Count links
links = soup.find_all('a')
# Count paragraphs
paragraphs = soup.find_all('p')
return {
"word_count": word_count,
"heading_count": len(headings),
"heading_structure": heading_counts,
"image_count": len(images),
"link_count": len(links),
"paragraph_count": len(paragraphs),
"has_meta_description": bool(self._get_meta_description(soup)),
"has_robots_txt": bool(self._get_robots_txt(url)),
"has_sitemap": bool(self._get_sitemap(url))
}
except requests.exceptions.RequestException as e:
logger.error(f"Request error in content analysis: {str(e)}", exc_info=True)
return {
"word_count": 0,
"heading_count": 0,
"heading_structure": {'h1': 0, 'h2': 0, 'h3': 0, 'h4': 0, 'h5': 0, 'h6': 0},
"image_count": 0,
"link_count": 0,
"paragraph_count": 0,
"has_meta_description": False,
"has_robots_txt": False,
"has_sitemap": False,
"error": str(e)
}
except Exception as e:
logger.error(f"Content analysis error: {str(e)}", exc_info=True)
return {
"word_count": 0,
"heading_count": 0,
"heading_structure": {'h1': 0, 'h2': 0, 'h3': 0, 'h4': 0, 'h5': 0, 'h6': 0},
"image_count": 0,
"link_count": 0,
"paragraph_count": 0,
"has_meta_description": False,
"has_robots_txt": False,
"has_sitemap": False,
"error": str(e)
}
def _check_performance(self, url: str) -> Dict:
"""Check website performance metrics."""
scraping_logger.debug(f"Checking performance for {url}")
try:
start_time = datetime.now()
response = self.session.get(url, timeout=10)
end_time = datetime.now()
load_time = (end_time - start_time).total_seconds()
return {
"load_time": load_time,
"status_code": response.status_code,
"content_length": len(response.content),
"headers": dict(response.headers),
"response_time": response.elapsed.total_seconds()
}
except requests.exceptions.RequestException as e:
logger.error(f"Request error in performance check: {str(e)}", exc_info=True)
return {
"load_time": 0,
"status_code": 0,
"content_length": 0,
"headers": {},
"response_time": 0,
"error": str(e)
}
except Exception as e:
logger.error(f"Performance check error: {str(e)}", exc_info=True)
return {
"load_time": 0,
"status_code": 0,
"content_length": 0,
"headers": {},
"response_time": 0,
"error": str(e)
}
def _get_meta_description(self, soup: BeautifulSoup) -> Optional[str]:
"""Extract meta description from HTML."""
meta_desc = soup.find('meta', attrs={'name': 'description'})
return meta_desc.get('content') if meta_desc else None
def _get_robots_txt(self, url: str) -> Optional[str]:
"""Get robots.txt content."""
try:
robots_url = f"{url.rstrip('/')}/robots.txt"
response = self.session.get(robots_url, timeout=5)
if response.status_code == 200:
return response.text
except Exception as e:
scraping_logger.warning(f"Error fetching robots.txt: {str(e)}")
return None
def _get_sitemap(self, url: str) -> Optional[str]:
"""Get sitemap.xml content."""
try:
sitemap_url = f"{url.rstrip('/')}/sitemap.xml"
response = self.session.get(sitemap_url, timeout=5)
if response.status_code == 200:
return response.text
except Exception as e:
scraping_logger.warning(f"Error fetching sitemap.xml: {str(e)}")
return None
def _analyze_seo(self, url: str) -> Dict:
"""Analyze website SEO."""
try:
# Extract content
content, soup, extract_errors = self._extract_content(url)
if not content or not soup:
return {
"error": "Failed to extract content",
"error_details": {"errors": extract_errors}
}
# Analyze meta tags
meta_analysis = self._analyze_meta_tags(soup)
# Analyze content with AI
content_analysis, recommendations = self._analyze_content_with_ai(content)
# Calculate overall score
meta_score = sum([
1 if meta_analysis.title['status'] == 'good' else 0,
1 if meta_analysis.description['status'] == 'good' else 0,
1 if meta_analysis.keywords['status'] == 'good' else 0,
1 if meta_analysis.has_robots else 0,
1 if meta_analysis.has_sitemap else 0
]) * 20 # Scale to 100
overall_score = (
meta_score * 0.3 + # 30% weight for meta tags
content_analysis.readability_score * 0.3 + # 30% weight for readability
content_analysis.content_quality_score * 0.4 # 40% weight for content quality
)
return {
"overall_score": overall_score,
"meta_tags": meta_analysis.__dict__,
"content": content_analysis.__dict__,
"recommendations": [rec.__dict__ for rec in recommendations]
}
except Exception as e:
error_msg = f"Error in SEO analysis: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
"error": error_msg,
"error_details": {
"type": type(e).__name__,
"traceback": str(e.__traceback__)
}
}
def _extract_content(self, url: str) -> Tuple[Optional[str], Optional[BeautifulSoup], List[str]]:
"""Extract content from URL."""
errors = []
try:
response = self.session.get(url, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
return response.text, soup, errors
except requests.RequestException as e:
error_msg = f"Error fetching URL: {str(e)}"
logger.error(error_msg)
errors.append(error_msg)
return None, None, errors
def _analyze_meta_tags(self, soup: BeautifulSoup) -> MetaTagAnalysis:
"""Analyze meta tags using BeautifulSoup."""
# Title analysis
title = soup.title.string if soup.title else ""
title_analysis = {
'status': 'good' if title and 30 <= len(title) <= 60 else 'needs_improvement',
'value': title,
'recommendation': '' if title and 30 <= len(title) <= 60 else 'Title should be between 30-60 characters'
}
# Meta description analysis
meta_desc = soup.find('meta', attrs={'name': 'description'})
desc = meta_desc.get('content', '') if meta_desc else ""
desc_analysis = {
'status': 'good' if desc and 120 <= len(desc) <= 160 else 'needs_improvement',
'value': desc,
'recommendation': '' if desc and 120 <= len(desc) <= 160 else 'Description should be between 120-160 characters'
}
# Keywords analysis
meta_keywords = soup.find('meta', attrs={'name': 'keywords'})
keywords = meta_keywords.get('content', '') if meta_keywords else ""
keywords_analysis = {
'status': 'good' if keywords else 'needs_improvement',
'value': keywords,
'recommendation': '' if keywords else 'Add relevant keywords meta tag'
}
return MetaTagAnalysis(
title=title_analysis,
description=desc_analysis,
keywords=keywords_analysis,
has_robots=bool(soup.find('meta', attrs={'name': 'robots'})),
has_sitemap=bool(soup.find('link', attrs={'rel': 'sitemap'}))
)
def _analyze_content_with_ai(self, content: str) -> Tuple[ContentAnalysis, List[SEORecommendation]]:
"""Analyze content using AI."""
try:
# Prepare prompt for content analysis
prompt = f"""Analyze the following webpage content for SEO and provide a structured analysis:
Content: {content[:4000]}... # Truncate to avoid token limits
Provide analysis in the following format:
1. Word count
2. Heading structure analysis
3. Keyword density for main topics
4. Readability score (0-100)
5. Content quality score (0-100)
6. List of SEO recommendations with priority (high/medium/low), category, issue, recommendation, and impact
Format the response as JSON."""
try:
# Get AI analysis using llm_text_gen
analysis = llm_text_gen(
prompt=prompt,
system_prompt="You are an SEO expert analyzing website content.",
response_format="json_object"
)
if not analysis:
logger.error("Empty response from AI analysis")
return self._get_fallback_analysis(content)
# Create ContentAnalysis object
content_analysis = ContentAnalysis(
word_count=len(content.split()),
headings_structure=analysis.get('heading_structure', {}),
keyword_density=analysis.get('keyword_density', {}),
readability_score=analysis.get('readability_score', 0),
content_quality_score=analysis.get('content_quality_score', 0)
)
# Create recommendations
recommendations = [
SEORecommendation(
priority=rec['priority'],
category=rec['category'],
issue=rec['issue'],
recommendation=rec['recommendation'],
impact=rec['impact']
)
for rec in analysis.get('recommendations', [])
]
return content_analysis, recommendations
except Exception as e:
logger.error(f"Error in AI analysis: {str(e)}")
return self._get_fallback_analysis(content)
except Exception as e:
logger.error(f"Error in AI analysis setup: {str(e)}")
return self._get_fallback_analysis(content)
def _get_fallback_analysis(self, content: str) -> Tuple[ContentAnalysis, List[SEORecommendation]]:
"""Provide fallback analysis when AI analysis is not available."""
try:
# Basic content analysis
words = content.split()
word_count = len(words)
# Simple readability score based on word count
readability_score = min(100, max(0, word_count / 10))
# Basic content quality score
content_quality_score = min(100, max(0, word_count / 20))
# Create basic recommendations
recommendations = [
SEORecommendation(
priority="high",
category="content",
issue="AI analysis unavailable",
recommendation="Consider running the analysis again with a valid API key for more detailed insights",
impact="Limited analysis capabilities"
)
]
return ContentAnalysis(
word_count=word_count,
headings_structure={},
keyword_density={},
readability_score=readability_score,
content_quality_score=content_quality_score
), recommendations
except Exception as e:
logger.error(f"Error in fallback analysis: {str(e)}")
return ContentAnalysis(
word_count=0,
headings_structure={},
keyword_density={},
readability_score=0,
content_quality_score=0
), []
def analyze_website(url: str) -> Dict:
"""
Analyze a website and return comprehensive results.
Args:
url (str): The URL to analyze
Returns:
Dict: Analysis results including various metrics and checks
"""
logger.info(f"Starting website analysis for URL: {url}")
try:
analyzer = WebsiteAnalyzer()
results = analyzer.analyze_website(url)
# Add success status to results
if "error" in results:
error_msg = f"Error in base analysis: {results['error']}"
logger.error(error_msg)
logger.error(f"Error details: {json.dumps(results.get('error_details', {}), indent=2)}")
return {
"success": False,
"error": error_msg,
"error_details": results.get("error_details", {})
}
# Add success status and wrap results
logger.info("Analysis completed successfully")
logger.debug(f"Analysis results: {json.dumps(results, indent=2)}")
return {
"success": True,
"data": results
}
except Exception as e:
error_msg = f"Error in analyze_website: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
"success": False,
"error": error_msg,
"error_details": {
"type": type(e).__name__,
"traceback": str(e.__traceback__)
}
}

View File

@@ -0,0 +1,134 @@
from typing import Dict
import json
class ContentGapAnalyzer:
def __init__(self, analyzer):
self.analyzer = analyzer
def analyze(self, url: str) -> Dict:
"""
Analyze content gaps for a given URL.
Args:
url (str): The URL to analyze
Returns:
Dict: Analysis results including content gaps and recommendations
"""
try:
# Get base analysis
logger.info(f"Starting content gap analysis for URL: {url}")
base_analysis = self.analyzer.analyze_website(url)
# Check for errors in base analysis
if not base_analysis.get("success", False):
error_msg = base_analysis.get("error", "Unknown error in website analysis")
error_details = base_analysis.get("error_details", {})
logger.error(f"Base analysis failed: {error_msg}")
logger.error(f"Error details: {json.dumps(error_details, indent=2)}")
return {
"success": False,
"error": error_msg,
"error_details": error_details,
"stage": "base_analysis"
}
# Extract required sections
analysis_data = base_analysis.get("data", {}).get("analysis", {})
required_sections = ["content_info", "basic_info", "performance"]
missing_sections = [section for section in required_sections if section not in analysis_data]
if missing_sections:
error_msg = f"Missing required analysis sections: {', '.join(missing_sections)}"
logger.error(error_msg)
logger.error(f"Available sections: {list(analysis_data.keys())}")
return {
"success": False,
"error": error_msg,
"error_details": {
"missing_sections": missing_sections,
"available_sections": list(analysis_data.keys())
},
"stage": "section_validation"
}
# Extract content metrics
try:
content_info = analysis_data["content_info"]
basic_info = analysis_data["basic_info"]
performance = analysis_data["performance"]
except KeyError as e:
error_msg = f"Error extracting analysis section: {str(e)}"
logger.error(error_msg)
return {
"success": False,
"error": error_msg,
"error_details": {
"type": "KeyError",
"missing_key": str(e),
"available_keys": list(analysis_data.keys())
},
"stage": "data_extraction"
}
# Analyze content gaps
try:
gaps = self._analyze_content_gaps(content_info, basic_info, performance)
except Exception as e:
error_msg = f"Error analyzing content gaps: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
"success": False,
"error": error_msg,
"error_details": {
"type": type(e).__name__,
"traceback": str(e.__traceback__)
},
"stage": "gap_analysis"
}
# Generate recommendations
try:
recommendations = self._generate_recommendations(gaps)
except Exception as e:
error_msg = f"Error generating recommendations: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
"success": False,
"error": error_msg,
"error_details": {
"type": type(e).__name__,
"traceback": str(e.__traceback__)
},
"stage": "recommendation_generation"
}
return {
"success": True,
"data": {
"content_gaps": gaps,
"recommendations": recommendations,
"metrics": {
"word_count": content_info.get("word_count", 0),
"heading_count": content_info.get("heading_count", 0),
"image_count": content_info.get("image_count", 0),
"link_count": content_info.get("link_count", 0),
"paragraph_count": content_info.get("paragraph_count", 0),
"load_time": performance.get("load_time", 0),
"response_time": performance.get("response_time", 0)
}
}
}
except Exception as e:
error_msg = f"Error in content gap analysis: {str(e)}"
logger.error(error_msg, exc_info=True)
return {
"success": False,
"error": error_msg,
"error_details": {
"type": type(e).__name__,
"traceback": str(e.__traceback__)
},
"stage": "general"
}

View File

@@ -0,0 +1,45 @@
"""Data models for website analysis results."""
from dataclasses import dataclass
from typing import List, Dict, Optional
from datetime import datetime
@dataclass
class SEORecommendation:
"""A single SEO recommendation."""
priority: str # 'high', 'medium', 'low'
category: str # 'content', 'technical', 'meta', etc.
issue: str
recommendation: str
impact: str
@dataclass
class MetaTagAnalysis:
"""Analysis of meta tags."""
title: Dict[str, str] # {'status': 'good', 'value': 'actual title', 'recommendation': 'suggestion'}
description: Dict[str, str]
keywords: Dict[str, str]
has_robots: bool
has_sitemap: bool
@dataclass
class ContentAnalysis:
"""Analysis of page content."""
word_count: int
headings_structure: Dict[str, int] # {'h1': 1, 'h2': 3, etc}
keyword_density: Dict[str, float]
readability_score: float
content_quality_score: float
@dataclass
class SEOAnalysisResult:
"""Complete SEO analysis result."""
url: str
analyzed_at: datetime
overall_score: float # 0-100
meta_tags: MetaTagAnalysis
content: ContentAnalysis
recommendations: List[SEORecommendation]
errors: List[str]
warnings: List[str]
success: bool