Auto-sync from website-creator
This commit is contained in:
336
skills/seo-data/scripts/data_aggregator.py
Normal file
336
skills/seo-data/scripts/data_aggregator.py
Normal file
@@ -0,0 +1,336 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Data Service Manager
|
||||
|
||||
Manages connections to multiple analytics services (GA4, GSC, DataForSEO, Umami).
|
||||
All services are optional - skips unconfigured services silently.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import argparse
|
||||
from typing import Dict, List, Optional, Any
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
class DataServiceManager:
|
||||
"""Manage optional analytics connections"""
|
||||
|
||||
def __init__(self, context_path: str):
|
||||
self.context_path = context_path
|
||||
self.config = self._load_config()
|
||||
self.services = {}
|
||||
self._initialize_services()
|
||||
|
||||
def _load_config(self) -> Dict:
|
||||
"""Load data-services.json from context folder"""
|
||||
config_file = os.path.join(self.context_path, 'data-services.json')
|
||||
|
||||
if not os.path.exists(config_file):
|
||||
print(f"Warning: {config_file} not found. No services configured.")
|
||||
return {}
|
||||
|
||||
with open(config_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
def _initialize_services(self):
|
||||
"""Initialize only configured and enabled services"""
|
||||
# GA4
|
||||
if self.config.get('ga4', {}).get('enabled'):
|
||||
try:
|
||||
from ga4_connector import GA4Connector
|
||||
ga4_config = self.config['ga4']
|
||||
self.services['ga4'] = GA4Connector(
|
||||
ga4_config.get('property_id', os.getenv('GA4_PROPERTY_ID')),
|
||||
ga4_config.get('credentials_path', os.getenv('GA4_CREDENTIALS_PATH'))
|
||||
)
|
||||
print(f"✓ GA4 initialized: {ga4_config.get('property_id')}")
|
||||
except ImportError as e:
|
||||
print(f"⚠ GA4 skipped: {e}")
|
||||
except Exception as e:
|
||||
print(f"✗ GA4 initialization failed: {e}")
|
||||
|
||||
# GSC
|
||||
if self.config.get('gsc', {}).get('enabled'):
|
||||
try:
|
||||
from gsc_connector import GSCConnector
|
||||
gsc_config = self.config['gsc']
|
||||
self.services['gsc'] = GSCConnector(
|
||||
gsc_config.get('site_url', os.getenv('GSC_SITE_URL')),
|
||||
gsc_config.get('credentials_path', os.getenv('GSC_CREDENTIALS_PATH'))
|
||||
)
|
||||
print(f"✓ GSC initialized: {gsc_config.get('site_url')}")
|
||||
except ImportError as e:
|
||||
print(f"⚠ GSC skipped: {e}")
|
||||
except Exception as e:
|
||||
print(f"✗ GSC initialization failed: {e}")
|
||||
|
||||
# DataForSEO
|
||||
if self.config.get('dataforseo', {}).get('enabled'):
|
||||
try:
|
||||
from dataforseo_client import DataForSEOClient
|
||||
dfs_config = self.config['dataforseo']
|
||||
self.services['dataforseo'] = DataForSEOClient(
|
||||
dfs_config.get('login', os.getenv('DATAFORSEO_LOGIN')),
|
||||
dfs_config.get('password', os.getenv('DATAFORSEO_PASSWORD'))
|
||||
)
|
||||
print(f"✓ DataForSEO initialized")
|
||||
except ImportError as e:
|
||||
print(f"⚠ DataForSEO skipped: {e}")
|
||||
except Exception as e:
|
||||
print(f"✗ DataForSEO initialization failed: {e}")
|
||||
|
||||
# Umami (updated to use username/password)
|
||||
if self.config.get('umami', {}).get('enabled'):
|
||||
try:
|
||||
from umami_connector import UmamiConnector
|
||||
umami_config = self.config['umami']
|
||||
self.services['umami'] = UmamiConnector(
|
||||
umami_url=umami_config.get('api_url', os.getenv('UMAMI_URL')),
|
||||
username=umami_config.get('username', os.getenv('UMAMI_USERNAME')),
|
||||
password=umami_config.get('password', os.getenv('UMAMI_PASSWORD')),
|
||||
website_id=umami_config.get('website_id', os.getenv('UMAMI_WEBSITE_ID'))
|
||||
)
|
||||
print(f"✓ Umami initialized: {umami_config.get('api_url')}")
|
||||
except ImportError as e:
|
||||
print(f"⚠ Umami skipped: {e}")
|
||||
except Exception as e:
|
||||
print(f"✗ Umami initialization failed: {e}")
|
||||
|
||||
if not self.services:
|
||||
print("No analytics services configured. All features will be skipped.")
|
||||
|
||||
def get_page_performance(self, url: str, days: int = 30) -> Dict:
|
||||
"""Aggregate data from all available services"""
|
||||
results = {
|
||||
'url': url,
|
||||
'period': f'last_{days}_days',
|
||||
'generated_at': datetime.now().isoformat(),
|
||||
'services': {}
|
||||
}
|
||||
|
||||
for name, service in self.services.items():
|
||||
try:
|
||||
print(f" Fetching data from {name}...")
|
||||
data = service.get_page_data(url, days)
|
||||
results['services'][name] = {
|
||||
'success': True,
|
||||
'data': data
|
||||
}
|
||||
except Exception as e:
|
||||
print(f" ✗ {name} failed: {e}")
|
||||
results['services'][name] = {
|
||||
'success': False,
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
def get_quick_wins(self, min_position: int = 11, max_position: int = 20) -> List[Dict]:
|
||||
"""Find keywords ranking 11-20 (page 2 opportunities)"""
|
||||
if 'gsc' not in self.services:
|
||||
print("GSC not configured. Cannot fetch quick wins.")
|
||||
return []
|
||||
|
||||
try:
|
||||
return self.services['gsc'].get_quick_wins(min_position, max_position)
|
||||
except Exception as e:
|
||||
print(f"Quick wins fetch failed: {e}")
|
||||
return []
|
||||
|
||||
def get_competitor_gap(self, your_domain: str, competitor_domain: str,
|
||||
keywords: List[str]) -> Dict:
|
||||
"""Find keywords competitor ranks for but you don't"""
|
||||
if 'dataforseo' not in self.services:
|
||||
print("DataForSEO not configured. Cannot analyze competitor gap.")
|
||||
return {'gap_keywords': [], 'error': 'DataForSEO not configured'}
|
||||
|
||||
try:
|
||||
return self.services['dataforseo'].analyze_competitor_gap(
|
||||
your_domain, competitor_domain, keywords
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Competitor analysis failed: {e}")
|
||||
return {'gap_keywords': [], 'error': str(e)}
|
||||
|
||||
def get_all_rankings(self, days: int = 30) -> Dict:
|
||||
"""Get all keyword rankings from all available services"""
|
||||
rankings = {
|
||||
'generated_at': datetime.now().isoformat(),
|
||||
'rankings': []
|
||||
}
|
||||
|
||||
# From GSC
|
||||
if 'gsc' in self.services:
|
||||
try:
|
||||
gsc_rankings = self.services['gsc'].get_keyword_positions(days)
|
||||
rankings['rankings'].extend([{
|
||||
'source': 'gsc',
|
||||
**r
|
||||
} for r in gsc_rankings])
|
||||
except Exception as e:
|
||||
print(f"GSC rankings failed: {e}")
|
||||
|
||||
# From DataForSEO
|
||||
if 'dataforseo' in self.services:
|
||||
try:
|
||||
dfs_rankings = self.services['dataforseo'].get_all_rankings()
|
||||
rankings['rankings'].extend([{
|
||||
'source': 'dataforseo',
|
||||
**r
|
||||
} for r in dfs_rankings])
|
||||
except Exception as e:
|
||||
print(f"DataForSEO rankings failed: {e}")
|
||||
|
||||
return rankings
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Aggregate data from multiple analytics services'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--context', '-c',
|
||||
required=True,
|
||||
help='Path to context folder (contains data-services.json)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--action', '-a',
|
||||
choices=['performance', 'quick-wins', 'competitor-gap', 'rankings'],
|
||||
default='performance',
|
||||
help='Action to perform (default: performance)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--url', '-u',
|
||||
help='Page URL to analyze (for performance action)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--days', '-d',
|
||||
type=int,
|
||||
default=30,
|
||||
help='Number of days to analyze (default: 30)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--your-domain',
|
||||
help='Your domain (for competitor-gap action)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--competitor',
|
||||
help='Competitor domain (for competitor-gap action)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--keywords',
|
||||
help='Comma-separated keywords (for competitor-gap action)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output', '-o',
|
||||
choices=['json', 'text'],
|
||||
default='text',
|
||||
help='Output format (default: text)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize manager
|
||||
print(f"\n📊 Initializing Data Service Manager...")
|
||||
print(f"Context: {args.context}\n")
|
||||
|
||||
manager = DataServiceManager(args.context)
|
||||
|
||||
if not manager.services:
|
||||
print("\n⚠️ No services configured. Exiting.")
|
||||
return
|
||||
|
||||
print(f"\n✅ Initialized {len(manager.services)} service(s)\n")
|
||||
|
||||
# Perform action
|
||||
if args.action == 'performance':
|
||||
if not args.url:
|
||||
print("Error: --url required for performance action")
|
||||
return
|
||||
|
||||
print(f"📈 Fetching performance for: {args.url}")
|
||||
result = manager.get_page_performance(args.url, args.days)
|
||||
|
||||
elif args.action == 'quick-wins':
|
||||
print(f"🎯 Finding quick wins (position 11-20)...")
|
||||
quick_wins = manager.get_quick_wins()
|
||||
result = {
|
||||
'quick_wins': quick_wins,
|
||||
'total_opportunities': len(quick_wins)
|
||||
}
|
||||
|
||||
elif args.action == 'competitor-gap':
|
||||
if not args.your_domain or not args.competitor or not args.keywords:
|
||||
print("Error: --your-domain, --competitor, and --keywords required")
|
||||
return
|
||||
|
||||
keywords = [k.strip() for k in args.keywords.split(',')]
|
||||
print(f"🔍 Analyzing competitor gap: {args.your_domain} vs {args.competitor}")
|
||||
result = manager.get_competitor_gap(
|
||||
args.your_domain, args.competitor, keywords
|
||||
)
|
||||
|
||||
elif args.action == 'rankings':
|
||||
print(f"📊 Fetching all rankings...")
|
||||
result = manager.get_all_rankings(args.days)
|
||||
|
||||
# Output
|
||||
if args.output == 'json':
|
||||
print(json.dumps(result, indent=2, ensure_ascii=False))
|
||||
else:
|
||||
print(f"\n{'='*60}")
|
||||
print("RESULTS")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
if args.action == 'performance':
|
||||
for service, data in result['services'].items():
|
||||
print(f"{service.upper()}:")
|
||||
if data['success']:
|
||||
for key, value in data['data'].items():
|
||||
if isinstance(value, (int, float)):
|
||||
print(f" • {key}: {value:,}")
|
||||
else:
|
||||
print(f" • {key}: {value}")
|
||||
else:
|
||||
print(f" ✗ Error: {data['error']}")
|
||||
print()
|
||||
|
||||
elif args.action == 'quick-wins':
|
||||
print(f"Found {len(result['quick_wins'])} quick win opportunities:\n")
|
||||
for i, kw in enumerate(result['quick_wins'][:10], 1):
|
||||
print(f"{i}. {kw['keyword']}")
|
||||
print(f" Position: {kw['current_position']} | "
|
||||
f"Volume: {kw.get('search_volume', 'N/A'):,} | "
|
||||
f"URL: {kw['url']}")
|
||||
print()
|
||||
|
||||
elif args.action == 'competitor-gap':
|
||||
print(f"Gap Keywords: {len(result.get('gap_keywords', []))}\n")
|
||||
for i, kw in enumerate(result.get('gap_keywords', [])[:10], 1):
|
||||
print(f"{i}. {kw['keyword']}")
|
||||
print(f" Competitor Position: {kw['competitor_position']} | "
|
||||
f"Search Volume: {kw.get('search_volume', 'N/A'):,}")
|
||||
print()
|
||||
|
||||
elif args.action == 'rankings':
|
||||
print(f"Total Rankings: {len(result.get('rankings', []))}\n")
|
||||
for r in result.get('rankings', [])[:20]:
|
||||
print(f"• {r['keyword']}: Position {r['position']} "
|
||||
f"({r['source']})")
|
||||
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user