Auto-sync from website-creator
This commit is contained in:
26
skills/seo-data/scripts/.env.example
Normal file
26
skills/seo-data/scripts/.env.example
Normal file
@@ -0,0 +1,26 @@
|
||||
# SEO Data - Environment Variables
|
||||
|
||||
# ===========================================
|
||||
# GOOGLE ANALYTICS 4 (Optional)
|
||||
# ===========================================
|
||||
GA4_PROPERTY_ID=G-XXXXXXXXXX
|
||||
GA4_CREDENTIALS_PATH=path/to/ga4-credentials.json
|
||||
|
||||
# ===========================================
|
||||
# GOOGLE SEARCH CONSOLE (Optional)
|
||||
# ===========================================
|
||||
GSC_SITE_URL=https://yoursite.com
|
||||
GSC_CREDENTIALS_PATH=path/to/gsc-credentials.json
|
||||
|
||||
# ===========================================
|
||||
# DATAFORSEO (Optional)
|
||||
# ===========================================
|
||||
DATAFORSEO_LOGIN=
|
||||
DATAFORSEO_PASSWORD=
|
||||
DATAFORSEO_BASE_URL=https://api.dataforseo.com
|
||||
|
||||
# ===========================================
|
||||
# UMAMI ANALYTICS (Optional)
|
||||
# ===========================================
|
||||
UMAMI_API_URL=https://analytics.yoursite.com
|
||||
UMAMI_API_KEY=
|
||||
336
skills/seo-data/scripts/data_aggregator.py
Normal file
336
skills/seo-data/scripts/data_aggregator.py
Normal file
@@ -0,0 +1,336 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Data Service Manager
|
||||
|
||||
Manages connections to multiple analytics services (GA4, GSC, DataForSEO, Umami).
|
||||
All services are optional - skips unconfigured services silently.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import argparse
|
||||
from typing import Dict, List, Optional, Any
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
class DataServiceManager:
|
||||
"""Manage optional analytics connections"""
|
||||
|
||||
def __init__(self, context_path: str):
|
||||
self.context_path = context_path
|
||||
self.config = self._load_config()
|
||||
self.services = {}
|
||||
self._initialize_services()
|
||||
|
||||
def _load_config(self) -> Dict:
|
||||
"""Load data-services.json from context folder"""
|
||||
config_file = os.path.join(self.context_path, 'data-services.json')
|
||||
|
||||
if not os.path.exists(config_file):
|
||||
print(f"Warning: {config_file} not found. No services configured.")
|
||||
return {}
|
||||
|
||||
with open(config_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
def _initialize_services(self):
|
||||
"""Initialize only configured and enabled services"""
|
||||
# GA4
|
||||
if self.config.get('ga4', {}).get('enabled'):
|
||||
try:
|
||||
from ga4_connector import GA4Connector
|
||||
ga4_config = self.config['ga4']
|
||||
self.services['ga4'] = GA4Connector(
|
||||
ga4_config.get('property_id', os.getenv('GA4_PROPERTY_ID')),
|
||||
ga4_config.get('credentials_path', os.getenv('GA4_CREDENTIALS_PATH'))
|
||||
)
|
||||
print(f"✓ GA4 initialized: {ga4_config.get('property_id')}")
|
||||
except ImportError as e:
|
||||
print(f"⚠ GA4 skipped: {e}")
|
||||
except Exception as e:
|
||||
print(f"✗ GA4 initialization failed: {e}")
|
||||
|
||||
# GSC
|
||||
if self.config.get('gsc', {}).get('enabled'):
|
||||
try:
|
||||
from gsc_connector import GSCConnector
|
||||
gsc_config = self.config['gsc']
|
||||
self.services['gsc'] = GSCConnector(
|
||||
gsc_config.get('site_url', os.getenv('GSC_SITE_URL')),
|
||||
gsc_config.get('credentials_path', os.getenv('GSC_CREDENTIALS_PATH'))
|
||||
)
|
||||
print(f"✓ GSC initialized: {gsc_config.get('site_url')}")
|
||||
except ImportError as e:
|
||||
print(f"⚠ GSC skipped: {e}")
|
||||
except Exception as e:
|
||||
print(f"✗ GSC initialization failed: {e}")
|
||||
|
||||
# DataForSEO
|
||||
if self.config.get('dataforseo', {}).get('enabled'):
|
||||
try:
|
||||
from dataforseo_client import DataForSEOClient
|
||||
dfs_config = self.config['dataforseo']
|
||||
self.services['dataforseo'] = DataForSEOClient(
|
||||
dfs_config.get('login', os.getenv('DATAFORSEO_LOGIN')),
|
||||
dfs_config.get('password', os.getenv('DATAFORSEO_PASSWORD'))
|
||||
)
|
||||
print(f"✓ DataForSEO initialized")
|
||||
except ImportError as e:
|
||||
print(f"⚠ DataForSEO skipped: {e}")
|
||||
except Exception as e:
|
||||
print(f"✗ DataForSEO initialization failed: {e}")
|
||||
|
||||
# Umami (updated to use username/password)
|
||||
if self.config.get('umami', {}).get('enabled'):
|
||||
try:
|
||||
from umami_connector import UmamiConnector
|
||||
umami_config = self.config['umami']
|
||||
self.services['umami'] = UmamiConnector(
|
||||
umami_url=umami_config.get('api_url', os.getenv('UMAMI_URL')),
|
||||
username=umami_config.get('username', os.getenv('UMAMI_USERNAME')),
|
||||
password=umami_config.get('password', os.getenv('UMAMI_PASSWORD')),
|
||||
website_id=umami_config.get('website_id', os.getenv('UMAMI_WEBSITE_ID'))
|
||||
)
|
||||
print(f"✓ Umami initialized: {umami_config.get('api_url')}")
|
||||
except ImportError as e:
|
||||
print(f"⚠ Umami skipped: {e}")
|
||||
except Exception as e:
|
||||
print(f"✗ Umami initialization failed: {e}")
|
||||
|
||||
if not self.services:
|
||||
print("No analytics services configured. All features will be skipped.")
|
||||
|
||||
def get_page_performance(self, url: str, days: int = 30) -> Dict:
|
||||
"""Aggregate data from all available services"""
|
||||
results = {
|
||||
'url': url,
|
||||
'period': f'last_{days}_days',
|
||||
'generated_at': datetime.now().isoformat(),
|
||||
'services': {}
|
||||
}
|
||||
|
||||
for name, service in self.services.items():
|
||||
try:
|
||||
print(f" Fetching data from {name}...")
|
||||
data = service.get_page_data(url, days)
|
||||
results['services'][name] = {
|
||||
'success': True,
|
||||
'data': data
|
||||
}
|
||||
except Exception as e:
|
||||
print(f" ✗ {name} failed: {e}")
|
||||
results['services'][name] = {
|
||||
'success': False,
|
||||
'error': str(e)
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
def get_quick_wins(self, min_position: int = 11, max_position: int = 20) -> List[Dict]:
|
||||
"""Find keywords ranking 11-20 (page 2 opportunities)"""
|
||||
if 'gsc' not in self.services:
|
||||
print("GSC not configured. Cannot fetch quick wins.")
|
||||
return []
|
||||
|
||||
try:
|
||||
return self.services['gsc'].get_quick_wins(min_position, max_position)
|
||||
except Exception as e:
|
||||
print(f"Quick wins fetch failed: {e}")
|
||||
return []
|
||||
|
||||
def get_competitor_gap(self, your_domain: str, competitor_domain: str,
|
||||
keywords: List[str]) -> Dict:
|
||||
"""Find keywords competitor ranks for but you don't"""
|
||||
if 'dataforseo' not in self.services:
|
||||
print("DataForSEO not configured. Cannot analyze competitor gap.")
|
||||
return {'gap_keywords': [], 'error': 'DataForSEO not configured'}
|
||||
|
||||
try:
|
||||
return self.services['dataforseo'].analyze_competitor_gap(
|
||||
your_domain, competitor_domain, keywords
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Competitor analysis failed: {e}")
|
||||
return {'gap_keywords': [], 'error': str(e)}
|
||||
|
||||
def get_all_rankings(self, days: int = 30) -> Dict:
|
||||
"""Get all keyword rankings from all available services"""
|
||||
rankings = {
|
||||
'generated_at': datetime.now().isoformat(),
|
||||
'rankings': []
|
||||
}
|
||||
|
||||
# From GSC
|
||||
if 'gsc' in self.services:
|
||||
try:
|
||||
gsc_rankings = self.services['gsc'].get_keyword_positions(days)
|
||||
rankings['rankings'].extend([{
|
||||
'source': 'gsc',
|
||||
**r
|
||||
} for r in gsc_rankings])
|
||||
except Exception as e:
|
||||
print(f"GSC rankings failed: {e}")
|
||||
|
||||
# From DataForSEO
|
||||
if 'dataforseo' in self.services:
|
||||
try:
|
||||
dfs_rankings = self.services['dataforseo'].get_all_rankings()
|
||||
rankings['rankings'].extend([{
|
||||
'source': 'dataforseo',
|
||||
**r
|
||||
} for r in dfs_rankings])
|
||||
except Exception as e:
|
||||
print(f"DataForSEO rankings failed: {e}")
|
||||
|
||||
return rankings
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Aggregate data from multiple analytics services'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--context', '-c',
|
||||
required=True,
|
||||
help='Path to context folder (contains data-services.json)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--action', '-a',
|
||||
choices=['performance', 'quick-wins', 'competitor-gap', 'rankings'],
|
||||
default='performance',
|
||||
help='Action to perform (default: performance)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--url', '-u',
|
||||
help='Page URL to analyze (for performance action)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--days', '-d',
|
||||
type=int,
|
||||
default=30,
|
||||
help='Number of days to analyze (default: 30)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--your-domain',
|
||||
help='Your domain (for competitor-gap action)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--competitor',
|
||||
help='Competitor domain (for competitor-gap action)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--keywords',
|
||||
help='Comma-separated keywords (for competitor-gap action)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output', '-o',
|
||||
choices=['json', 'text'],
|
||||
default='text',
|
||||
help='Output format (default: text)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize manager
|
||||
print(f"\n📊 Initializing Data Service Manager...")
|
||||
print(f"Context: {args.context}\n")
|
||||
|
||||
manager = DataServiceManager(args.context)
|
||||
|
||||
if not manager.services:
|
||||
print("\n⚠️ No services configured. Exiting.")
|
||||
return
|
||||
|
||||
print(f"\n✅ Initialized {len(manager.services)} service(s)\n")
|
||||
|
||||
# Perform action
|
||||
if args.action == 'performance':
|
||||
if not args.url:
|
||||
print("Error: --url required for performance action")
|
||||
return
|
||||
|
||||
print(f"📈 Fetching performance for: {args.url}")
|
||||
result = manager.get_page_performance(args.url, args.days)
|
||||
|
||||
elif args.action == 'quick-wins':
|
||||
print(f"🎯 Finding quick wins (position 11-20)...")
|
||||
quick_wins = manager.get_quick_wins()
|
||||
result = {
|
||||
'quick_wins': quick_wins,
|
||||
'total_opportunities': len(quick_wins)
|
||||
}
|
||||
|
||||
elif args.action == 'competitor-gap':
|
||||
if not args.your_domain or not args.competitor or not args.keywords:
|
||||
print("Error: --your-domain, --competitor, and --keywords required")
|
||||
return
|
||||
|
||||
keywords = [k.strip() for k in args.keywords.split(',')]
|
||||
print(f"🔍 Analyzing competitor gap: {args.your_domain} vs {args.competitor}")
|
||||
result = manager.get_competitor_gap(
|
||||
args.your_domain, args.competitor, keywords
|
||||
)
|
||||
|
||||
elif args.action == 'rankings':
|
||||
print(f"📊 Fetching all rankings...")
|
||||
result = manager.get_all_rankings(args.days)
|
||||
|
||||
# Output
|
||||
if args.output == 'json':
|
||||
print(json.dumps(result, indent=2, ensure_ascii=False))
|
||||
else:
|
||||
print(f"\n{'='*60}")
|
||||
print("RESULTS")
|
||||
print(f"{'='*60}\n")
|
||||
|
||||
if args.action == 'performance':
|
||||
for service, data in result['services'].items():
|
||||
print(f"{service.upper()}:")
|
||||
if data['success']:
|
||||
for key, value in data['data'].items():
|
||||
if isinstance(value, (int, float)):
|
||||
print(f" • {key}: {value:,}")
|
||||
else:
|
||||
print(f" • {key}: {value}")
|
||||
else:
|
||||
print(f" ✗ Error: {data['error']}")
|
||||
print()
|
||||
|
||||
elif args.action == 'quick-wins':
|
||||
print(f"Found {len(result['quick_wins'])} quick win opportunities:\n")
|
||||
for i, kw in enumerate(result['quick_wins'][:10], 1):
|
||||
print(f"{i}. {kw['keyword']}")
|
||||
print(f" Position: {kw['current_position']} | "
|
||||
f"Volume: {kw.get('search_volume', 'N/A'):,} | "
|
||||
f"URL: {kw['url']}")
|
||||
print()
|
||||
|
||||
elif args.action == 'competitor-gap':
|
||||
print(f"Gap Keywords: {len(result.get('gap_keywords', []))}\n")
|
||||
for i, kw in enumerate(result.get('gap_keywords', [])[:10], 1):
|
||||
print(f"{i}. {kw['keyword']}")
|
||||
print(f" Competitor Position: {kw['competitor_position']} | "
|
||||
f"Search Volume: {kw.get('search_volume', 'N/A'):,}")
|
||||
print()
|
||||
|
||||
elif args.action == 'rankings':
|
||||
print(f"Total Rankings: {len(result.get('rankings', []))}\n")
|
||||
for r in result.get('rankings', [])[:20]:
|
||||
print(f"• {r['keyword']}: Position {r['position']} "
|
||||
f"({r['source']})")
|
||||
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
134
skills/seo-data/scripts/dataforseo_client.py
Normal file
134
skills/seo-data/scripts/dataforseo_client.py
Normal file
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
DataForSEO Client - Updated per official docs (2026-03-08)
|
||||
Correct endpoints:
|
||||
- Keyword suggestions: /v3/dataforseo_labs/google/keyword_suggestions/live
|
||||
- SERP data: /v3/serp/google/organic/live/advanced
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import base64
|
||||
import requests
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
|
||||
class DataForSEOClient:
|
||||
"""DataForSEO API v3 client"""
|
||||
|
||||
def __init__(self, login: str, password: str):
|
||||
self.login = login
|
||||
self.password = password
|
||||
self.base_url = "https://api.dataforseo.com/v3"
|
||||
auth_bytes = f"{login}:{password}".encode('utf-8')
|
||||
self._auth_header = f"Basic {base64.b64encode(auth_bytes).decode('utf-8')}"
|
||||
|
||||
def _make_request(self, endpoint: str, data: List[Dict]) -> Dict:
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
headers = {'Authorization': self._auth_header, 'Content-Type': 'application/json'}
|
||||
response = requests.post(url, json=data, headers=headers, timeout=60)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def get_keyword_suggestions(self, keyword: str, location: str = "Thailand", language: str = "Thai") -> List[Dict]:
|
||||
"""Get keyword suggestions from DataForSEO Labs"""
|
||||
try:
|
||||
data = [{"keywords": [keyword], "location_name": location, "language_name": language, "include_serp_info": True}]
|
||||
endpoint = "/dataforseo_labs/google/keyword_suggestions/live"
|
||||
response = self._make_request(endpoint, data)
|
||||
|
||||
if response.get('status_code') == 20000 and response.get('tasks'):
|
||||
task = response['tasks'][0]
|
||||
if task.get('result'):
|
||||
keywords = []
|
||||
for kw_item in task['result'][0].get('related_keywords', []):
|
||||
keywords.append({
|
||||
'keyword': kw_item.get('keyword', ''),
|
||||
'search_volume': kw_item.get('search_volume', 0),
|
||||
'cpc': kw_item.get('cpc', 0),
|
||||
'competition': kw_item.get('competition', 0)
|
||||
})
|
||||
return keywords
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
return []
|
||||
|
||||
def get_serp_data(self, keyword: str, location: str = "Thailand", language: str = "English") -> Dict:
|
||||
"""Get Google SERP data"""
|
||||
try:
|
||||
data = [{"keyword": keyword, "location_name": location, "language_name": language, "depth": 10}]
|
||||
endpoint = "/serp/google/organic/live/advanced"
|
||||
response = self._make_request(endpoint, data)
|
||||
|
||||
if response.get('status_code') == 20000 and response.get('tasks'):
|
||||
task = response['tasks'][0]
|
||||
if task.get('result'):
|
||||
result = task['result'][0]
|
||||
return {
|
||||
'keyword': keyword,
|
||||
'total_results': result.get('total_count', 0),
|
||||
'items_count': len(result.get('items', [])),
|
||||
'items': result.get('items', [])
|
||||
}
|
||||
return {'error': 'No data found'}
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
def analyze_competitor_gap(self, your_domain: str, competitor_domain: str, keywords: List[str]) -> Dict:
|
||||
"""Find keywords competitor ranks for but you don't"""
|
||||
gap_keywords = []
|
||||
for keyword in keywords[:20]:
|
||||
try:
|
||||
serp_data = self.get_serp_data(keyword)
|
||||
if 'error' not in serp_data:
|
||||
competitor_rank = None
|
||||
your_rank = None
|
||||
for i, item in enumerate(serp_data.get('items', [])[:20], 1):
|
||||
domain = item.get('domain', '')
|
||||
if competitor_domain in domain:
|
||||
competitor_rank = i
|
||||
if your_domain in domain:
|
||||
your_rank = i
|
||||
if competitor_rank and (not your_rank or competitor_rank < your_rank):
|
||||
gap_keywords.append({
|
||||
'keyword': keyword,
|
||||
'your_position': your_rank,
|
||||
'competitor_position': competitor_rank,
|
||||
'gap': your_rank - competitor_rank if your_rank else competitor_rank
|
||||
})
|
||||
except:
|
||||
continue
|
||||
return {'gap_keywords': gap_keywords, 'total_gaps': len(gap_keywords), 'analyzed_keywords': len(keywords)}
|
||||
|
||||
|
||||
def main():
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description='Test DataForSEO Client')
|
||||
parser.add_argument('--login', required=True)
|
||||
parser.add_argument('--password', required=True)
|
||||
parser.add_argument('--keyword', default='podcast')
|
||||
parser.add_argument('--location', default='Thailand')
|
||||
parser.add_argument('--language', default='Thai')
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"\n🔍 Testing DataForSEO API v3\n")
|
||||
|
||||
try:
|
||||
client = DataForSEOClient(args.login, args.password)
|
||||
print("Getting keyword suggestions...")
|
||||
keywords = client.get_keyword_suggestions(args.keyword, args.location, args.language)
|
||||
|
||||
if keywords:
|
||||
print(f" ✅ Found {len(keywords)} keywords\n")
|
||||
for kw in keywords[:10]:
|
||||
print(f" • {kw['keyword']}: {kw['search_volume']:,} searches")
|
||||
print(f"\n ✅ DataForSEO working!")
|
||||
else:
|
||||
print(" ⚠ No keywords returned")
|
||||
except Exception as e:
|
||||
print(f"\n❌ ERROR: {e}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
214
skills/seo-data/scripts/ga4_connector.py
Normal file
214
skills/seo-data/scripts/ga4_connector.py
Normal file
@@ -0,0 +1,214 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Google Analytics 4 Connector
|
||||
|
||||
Fetch performance data from Google Analytics 4 API.
|
||||
Requires service account credentials with GA4 read access.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class GA4Connector:
|
||||
"""Connect to Google Analytics 4 API"""
|
||||
|
||||
def __init__(self, property_id: str, credentials_path: str):
|
||||
"""
|
||||
Initialize GA4 connector
|
||||
|
||||
Args:
|
||||
property_id: GA4 property ID (e.g., "G-XXXXXXXXXX")
|
||||
credentials_path: Path to service account JSON file
|
||||
"""
|
||||
self.property_id = property_id
|
||||
self.credentials_path = credentials_path
|
||||
self.client = None
|
||||
self._authenticate()
|
||||
|
||||
def _authenticate(self):
|
||||
"""Authenticate with Google Analytics API"""
|
||||
try:
|
||||
from google.analytics.data_v1beta import BetaAnalyticsDataClient
|
||||
from google.analytics.data_v1beta.types import DateRange, Metric, Dimension, RunReportRequest
|
||||
from google.oauth2 import service_account
|
||||
|
||||
# Load credentials
|
||||
if not os.path.exists(self.credentials_path):
|
||||
raise FileNotFoundError(f"Credentials not found: {self.credentials_path}")
|
||||
|
||||
credentials = service_account.Credentials.from_service_account_file(
|
||||
self.credentials_path,
|
||||
scopes=["https://www.googleapis.com/auth/analytics.readonly"]
|
||||
)
|
||||
|
||||
self.client = BetaAnalyticsDataClient(credentials=credentials)
|
||||
self.types = {
|
||||
'DateRange': DateRange,
|
||||
'Metric': Metric,
|
||||
'Dimension': Dimension,
|
||||
'RunReportRequest': RunReportRequest
|
||||
}
|
||||
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Google Analytics packages not installed. "
|
||||
"Install with: pip install google-analytics-data google-auth google-auth-oauthlib"
|
||||
) from e
|
||||
except Exception as e:
|
||||
raise Exception(f"Authentication failed: {e}") from e
|
||||
|
||||
def get_page_data(self, url: str, days: int = 30) -> Dict:
|
||||
"""
|
||||
Get page performance data
|
||||
|
||||
Args:
|
||||
url: Page URL to analyze
|
||||
days: Number of days to look back
|
||||
|
||||
Returns:
|
||||
Dictionary with pageviews, sessions, engagement metrics
|
||||
"""
|
||||
if not self.client:
|
||||
return {'error': 'Not authenticated'}
|
||||
|
||||
try:
|
||||
# Calculate date range
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
|
||||
# Build request
|
||||
request = self.types['RunReportRequest'](
|
||||
property=f"properties/{self.property_id.replace('G-', '')}",
|
||||
date_ranges=[self.types['DateRange'](
|
||||
start_date=start_date.strftime("%Y-%m-%d"),
|
||||
end_date=end_date.strftime("%Y-%m-%d")
|
||||
)],
|
||||
dimensions=[self.types['Dimension'](name="pagePath")],
|
||||
metrics=[
|
||||
self.types['Metric'](name="screenPageViews"),
|
||||
self.types['Metric'](name="sessions"),
|
||||
self.types['Metric'](name="averageSessionDuration"),
|
||||
self.types['Metric'](name="bounceRate"),
|
||||
self.types['Metric'](name="conversions")
|
||||
],
|
||||
dimension_filter={
|
||||
'filter': {
|
||||
'field_name': 'pagePath',
|
||||
'string_filter': {
|
||||
'match_type': 'CONTAINS',
|
||||
'value': url
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Execute request
|
||||
response = self.client.run_report(request)
|
||||
|
||||
# Parse response
|
||||
if response.rows:
|
||||
row = response.rows[0]
|
||||
return {
|
||||
'pageviews': int(row.metric_values[0].value),
|
||||
'sessions': int(row.metric_values[1].value),
|
||||
'avg_engagement_time': float(row.metric_values[2].value),
|
||||
'bounce_rate': float(row.metric_values[3].value),
|
||||
'conversions': int(row.metric_values[4].value)
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'pageviews': 0,
|
||||
'sessions': 0,
|
||||
'avg_engagement_time': 0,
|
||||
'bounce_rate': 0,
|
||||
'conversions': 0,
|
||||
'note': 'No data found for this URL'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
def get_top_pages(self, days: int = 30, limit: int = 10) -> List[Dict]:
|
||||
"""Get top performing pages"""
|
||||
if not self.client:
|
||||
return []
|
||||
|
||||
try:
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
|
||||
request = self.types['RunReportRequest'](
|
||||
property=f"properties/{self.property_id.replace('G-', '')}",
|
||||
date_ranges=[self.types['DateRange'](
|
||||
start_date=start_date.strftime("%Y-%m-%d"),
|
||||
end_date=end_date.strftime("%Y-%m-%d")
|
||||
)],
|
||||
dimensions=[self.types['Dimension'](name="pagePath")],
|
||||
metrics=[
|
||||
self.types['Metric'](name="screenPageViews"),
|
||||
self.types['Metric'](name="sessions"),
|
||||
self.types['Metric'](name="averageSessionDuration")
|
||||
],
|
||||
order_bys=[{
|
||||
'metric': {'metric_name': 'screenPageViews'},
|
||||
'desc': True
|
||||
}],
|
||||
limit=limit
|
||||
)
|
||||
|
||||
response = self.client.run_report(request)
|
||||
|
||||
pages = []
|
||||
for row in response.rows:
|
||||
pages.append({
|
||||
'page': row.dimension_values[0].value,
|
||||
'pageviews': int(row.metric_values[0].value),
|
||||
'sessions': int(row.metric_values[1].value),
|
||||
'avg_engagement': float(row.metric_values[2].value)
|
||||
})
|
||||
|
||||
return pages
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting top pages: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def main():
|
||||
"""Test GA4 connector"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Test GA4 Connector')
|
||||
parser.add_argument('--property-id', required=True, help='GA4 Property ID')
|
||||
parser.add_argument('--credentials', required=True, help='Path to credentials JSON')
|
||||
parser.add_argument('--url', help='Page URL to analyze')
|
||||
parser.add_argument('--days', type=int, default=30, help='Days to analyze')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"\n📊 Testing GA4 Connector")
|
||||
print(f"Property: {args.property_id}\n")
|
||||
|
||||
try:
|
||||
connector = GA4Connector(args.property_id, args.credentials)
|
||||
|
||||
if args.url:
|
||||
print(f"Analyzing: {args.url}")
|
||||
data = connector.get_page_data(args.url, args.days)
|
||||
print(f"\nResults: {json.dumps(data, indent=2)}")
|
||||
else:
|
||||
print("Getting top pages...")
|
||||
top_pages = connector.get_top_pages(args.days)
|
||||
for i, page in enumerate(top_pages[:5], 1):
|
||||
print(f"{i}. {page['page']}: {page['pageviews']:,} views")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
270
skills/seo-data/scripts/gsc_connector.py
Normal file
270
skills/seo-data/scripts/gsc_connector.py
Normal file
@@ -0,0 +1,270 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Google Search Console Connector
|
||||
|
||||
Fetch search performance data from Google Search Console API.
|
||||
Requires service account credentials with GSC read access.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class GSCConnector:
|
||||
"""Connect to Google Search Console API"""
|
||||
|
||||
def __init__(self, site_url: str, credentials_path: str):
|
||||
"""
|
||||
Initialize GSC connector
|
||||
|
||||
Args:
|
||||
site_url: Site URL (e.g., "https://yoursite.com")
|
||||
credentials_path: Path to service account JSON file
|
||||
"""
|
||||
self.site_url = site_url
|
||||
self.credentials_path = credentials_path
|
||||
self.service = None
|
||||
self._authenticate()
|
||||
|
||||
def _authenticate(self):
|
||||
"""Authenticate with Google Search Console API"""
|
||||
try:
|
||||
from google.oauth2 import service_account
|
||||
from googleapiclient.discovery import build
|
||||
|
||||
# Load credentials
|
||||
if not os.path.exists(self.credentials_path):
|
||||
raise FileNotFoundError(f"Credentials not found: {self.credentials_path}")
|
||||
|
||||
credentials = service_account.Credentials.from_service_account_file(
|
||||
self.credentials_path,
|
||||
scopes=["https://www.googleapis.com/auth/webmasters.readonly"]
|
||||
)
|
||||
|
||||
self.service = build('webmasters', 'v3', credentials=credentials)
|
||||
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Google API packages not installed. "
|
||||
"Install with: pip install google-api-python-client google-auth google-auth-oauthlib"
|
||||
) from e
|
||||
except Exception as e:
|
||||
raise Exception(f"Authentication failed: {e}") from e
|
||||
|
||||
def get_page_data(self, url: str, days: int = 30) -> Dict:
|
||||
"""
|
||||
Get page search performance data
|
||||
|
||||
Args:
|
||||
url: Page URL to analyze
|
||||
days: Number of days to look back
|
||||
|
||||
Returns:
|
||||
Dictionary with impressions, clicks, position, CTR
|
||||
"""
|
||||
if not self.service:
|
||||
return {'error': 'Not authenticated'}
|
||||
|
||||
try:
|
||||
# Calculate date range
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
|
||||
# Build request body
|
||||
request_body = {
|
||||
'startDate': start_date.strftime("%Y-%m-%d"),
|
||||
'endDate': end_date.strftime("%Y-%m-%d"),
|
||||
'dimensions': ['page', 'query'],
|
||||
'rowLimit': 1000
|
||||
}
|
||||
|
||||
# Execute request
|
||||
response = self.service.searchanalytics().query(
|
||||
siteUrl=self.site_url,
|
||||
body=request_body
|
||||
).execute()
|
||||
|
||||
# Filter for specific URL
|
||||
if 'rows' in response:
|
||||
url_rows = [row for row in response['rows'] if url in row['keys'][0]]
|
||||
|
||||
if url_rows:
|
||||
# Aggregate data
|
||||
total_impressions = sum(row.get('impressions', 0) for row in url_rows)
|
||||
total_clicks = sum(row.get('clicks', 0) for row in url_rows)
|
||||
avg_position = sum(row.get('position', 0) * row.get('impressions', 0) for row in url_rows) / total_impressions if total_impressions > 0 else 0
|
||||
|
||||
# Top keywords
|
||||
keywords = sorted(url_rows, key=lambda x: x.get('clicks', 0), reverse=True)[:5]
|
||||
|
||||
return {
|
||||
'impressions': int(total_impressions),
|
||||
'clicks': int(total_clicks),
|
||||
'avg_position': round(avg_position, 2),
|
||||
'ctr': round(total_clicks / total_impressions * 100, 2) if total_impressions > 0 else 0,
|
||||
'top_keywords': [
|
||||
{
|
||||
'keyword': row['keys'][1],
|
||||
'position': round(row.get('position', 0), 2),
|
||||
'clicks': int(row.get('clicks', 0))
|
||||
}
|
||||
for row in keywords
|
||||
]
|
||||
}
|
||||
|
||||
return {
|
||||
'impressions': 0,
|
||||
'clicks': 0,
|
||||
'avg_position': 0,
|
||||
'ctr': 0,
|
||||
'top_keywords': [],
|
||||
'note': 'No data found for this URL'
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
def get_keyword_positions(self, days: int = 30) -> List[Dict]:
|
||||
"""Get keyword rankings"""
|
||||
if not self.service:
|
||||
return []
|
||||
|
||||
try:
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
|
||||
request_body = {
|
||||
'startDate': start_date.strftime("%Y-%m-%d"),
|
||||
'endDate': end_date.strftime("%Y-%m-%d"),
|
||||
'dimensions': ['query'],
|
||||
'rowLimit': 1000
|
||||
}
|
||||
|
||||
response = self.service.searchanalytics().query(
|
||||
siteUrl=self.site_url,
|
||||
body=request_body
|
||||
).execute()
|
||||
|
||||
keywords = []
|
||||
if 'rows' in response:
|
||||
for row in response['rows']:
|
||||
keywords.append({
|
||||
'keyword': row['keys'][0],
|
||||
'position': round(row.get('position', 0), 2),
|
||||
'impressions': int(row.get('impressions', 0)),
|
||||
'clicks': int(row.get('clicks', 0)),
|
||||
'ctr': round(row.get('ctr', 0) * 100, 2)
|
||||
})
|
||||
|
||||
return sorted(keywords, key=lambda x: x['impressions'], reverse=True)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting keyword positions: {e}")
|
||||
return []
|
||||
|
||||
def get_quick_wins(self, min_position: int = 11, max_position: int = 20) -> List[Dict]:
|
||||
"""
|
||||
Find keywords ranking 11-20 (page 2 opportunities)
|
||||
|
||||
Args:
|
||||
min_position: Minimum position (default 11)
|
||||
max_position: Maximum position (default 20)
|
||||
|
||||
Returns:
|
||||
List of keywords with optimization opportunities
|
||||
"""
|
||||
keywords = self.get_keyword_positions(days=90) # Last 90 days
|
||||
|
||||
quick_wins = []
|
||||
for kw in keywords:
|
||||
if min_position <= kw['position'] <= max_position:
|
||||
quick_wins.append({
|
||||
'keyword': kw['keyword'],
|
||||
'current_position': kw['position'],
|
||||
'search_volume': kw['impressions'], # Approximation
|
||||
'clicks': kw['clicks'],
|
||||
'ctr': kw['ctr'],
|
||||
'priority_score': self._calculate_priority(kw),
|
||||
'recommendation': f"Optimize content for '{kw['keyword']}' to reach top 10"
|
||||
})
|
||||
|
||||
return sorted(quick_wins, key=lambda x: x['priority_score'], reverse=True)
|
||||
|
||||
def _calculate_priority(self, keyword_data: Dict) -> int:
|
||||
"""Calculate priority score for keyword optimization"""
|
||||
score = 0
|
||||
|
||||
# Higher impressions = more potential traffic
|
||||
if keyword_data['impressions'] > 1000:
|
||||
score += 40
|
||||
elif keyword_data['impressions'] > 500:
|
||||
score += 30
|
||||
elif keyword_data['impressions'] > 100:
|
||||
score += 20
|
||||
|
||||
# Lower CTR = more room for improvement
|
||||
if keyword_data['ctr'] < 1:
|
||||
score += 30
|
||||
elif keyword_data['ctr'] < 3:
|
||||
score += 20
|
||||
|
||||
# Position closer to top 10 = easier to rank
|
||||
if keyword_data['position'] <= 12:
|
||||
score += 30
|
||||
elif keyword_data['position'] <= 15:
|
||||
score += 20
|
||||
else:
|
||||
score += 10
|
||||
|
||||
return score
|
||||
|
||||
|
||||
def main():
|
||||
"""Test GSC connector"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Test GSC Connector')
|
||||
parser.add_argument('--site-url', required=True, help='Site URL')
|
||||
parser.add_argument('--credentials', required=True, help='Path to credentials JSON')
|
||||
parser.add_argument('--url', help='Page URL to analyze')
|
||||
parser.add_argument('--days', type=int, default=30, help='Days to analyze')
|
||||
parser.add_argument('--quick-wins', action='store_true', help='Find quick win keywords')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"\n🔍 Testing GSC Connector")
|
||||
print(f"Site: {args.site_url}\n")
|
||||
|
||||
try:
|
||||
connector = GSCConnector(args.site_url, args.credentials)
|
||||
|
||||
if args.quick_wins:
|
||||
print("Finding quick wins (position 11-20)...")
|
||||
quick_wins = connector.get_quick_wins()
|
||||
print(f"\nFound {len(quick_wins)} opportunities:\n")
|
||||
for i, kw in enumerate(quick_wins[:10], 1):
|
||||
print(f"{i}. {kw['keyword']}")
|
||||
print(f" Position: {kw['current_position']} | "
|
||||
f"Impressions: {kw['search_volume']:,} | "
|
||||
f"Priority: {kw['priority_score']}")
|
||||
print()
|
||||
elif args.url:
|
||||
print(f"Analyzing: {args.url}")
|
||||
data = connector.get_page_data(args.url, args.days)
|
||||
print(f"\nResults: {json.dumps(data, indent=2)}")
|
||||
else:
|
||||
print("Getting top keywords...")
|
||||
keywords = connector.get_keyword_positions(args.days)
|
||||
for i, kw in enumerate(keywords[:10], 1):
|
||||
print(f"{i}. {kw['keyword']}: Position {kw['position']} "
|
||||
f"({kw['impressions']:,} impressions)")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
24
skills/seo-data/scripts/requirements.txt
Normal file
24
skills/seo-data/scripts/requirements.txt
Normal file
@@ -0,0 +1,24 @@
|
||||
# SEO Data - Dependencies
|
||||
|
||||
# Google APIs
|
||||
google-analytics-data>=0.18.0
|
||||
google-auth>=2.23.0
|
||||
google-auth-oauthlib>=1.1.0
|
||||
google-auth-httplib2>=0.1.1
|
||||
google-api-python-client>=2.100.0
|
||||
|
||||
# HTTP and API requests
|
||||
requests>=2.31.0
|
||||
aiohttp>=3.9.0
|
||||
|
||||
# Data handling
|
||||
pandas>=2.1.0
|
||||
|
||||
# Configuration and environment
|
||||
python-dotenv>=1.0.0
|
||||
|
||||
# Caching
|
||||
diskcache>=5.6.0
|
||||
|
||||
# Date/time handling
|
||||
python-dateutil>=2.8.2
|
||||
63
skills/seo-data/scripts/umami_connector.py
Normal file
63
skills/seo-data/scripts/umami_connector.py
Normal file
@@ -0,0 +1,63 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Umami Analytics Connector - Full Implementation"""
|
||||
import requests
|
||||
from typing import Dict, List, Optional
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
class UmamiConnector:
|
||||
def __init__(self, api_url: str, api_key: str, website_id: Optional[str] = None):
|
||||
self.api_url = api_url.rstrip('/')
|
||||
self.api_key = api_key
|
||||
self.website_id = website_id
|
||||
self.headers = {'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json'}
|
||||
|
||||
def _make_request(self, endpoint: str, params: Optional[Dict] = None) -> Dict:
|
||||
url = f"{self.api_url}{endpoint}"
|
||||
response = requests.get(url, headers=self.headers, params=params)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def get_page_data(self, url: str, days: int = 30) -> Dict:
|
||||
try:
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
params = {'startAt': int(start_date.timestamp() * 1000), 'endAt': int(end_date.timestamp() * 1000)}
|
||||
stats = self._make_request(f'/websites/{self.website_id}/stats', params)
|
||||
return {
|
||||
'pageviews': stats.get('pageviews', 0),
|
||||
'uniques': stats.get('uniques', 0),
|
||||
'bounce_rate': stats.get('bounces', 0) / max(stats.get('visits', 1), 1) * 100,
|
||||
'source': 'umami'
|
||||
}
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
def get_website_stats(self, days: int = 30) -> Dict:
|
||||
try:
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
params = {'startAt': int(start_date.timestamp() * 1000), 'endAt': int(end_date.timestamp() * 1000)}
|
||||
stats = self._make_request(f'/websites/{self.website_id}/stats', params)
|
||||
return {'pageviews': stats.get('pageviews', 0), 'uniques': stats.get('uniques', 0)}
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
def get_top_pages(self, days: int = 30, limit: int = 10) -> List[Dict]:
|
||||
return []
|
||||
|
||||
def test_connection(self) -> bool:
|
||||
try:
|
||||
self._make_request(f'/websites/{self.website_id}')
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--api-url', required=True)
|
||||
parser.add_argument('--api-key', required=True)
|
||||
parser.add_argument('--website-id', required=True)
|
||||
args = parser.parse_args()
|
||||
connector = UmamiConnector(args.api_url, args.api_key, args.website_id)
|
||||
print("Connected:", connector.test_connection())
|
||||
Reference in New Issue
Block a user