271 lines
9.9 KiB
Python
271 lines
9.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Google Search Console Connector
|
|
|
|
Fetch search performance data from Google Search Console API.
|
|
Requires service account credentials with GSC read access.
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
from datetime import datetime, timedelta
|
|
from typing import Dict, List, Optional
|
|
from pathlib import Path
|
|
|
|
|
|
class GSCConnector:
|
|
"""Connect to Google Search Console API"""
|
|
|
|
def __init__(self, site_url: str, credentials_path: str):
|
|
"""
|
|
Initialize GSC connector
|
|
|
|
Args:
|
|
site_url: Site URL (e.g., "https://yoursite.com")
|
|
credentials_path: Path to service account JSON file
|
|
"""
|
|
self.site_url = site_url
|
|
self.credentials_path = credentials_path
|
|
self.service = None
|
|
self._authenticate()
|
|
|
|
def _authenticate(self):
|
|
"""Authenticate with Google Search Console API"""
|
|
try:
|
|
from google.oauth2 import service_account
|
|
from googleapiclient.discovery import build
|
|
|
|
# Load credentials
|
|
if not os.path.exists(self.credentials_path):
|
|
raise FileNotFoundError(f"Credentials not found: {self.credentials_path}")
|
|
|
|
credentials = service_account.Credentials.from_service_account_file(
|
|
self.credentials_path,
|
|
scopes=["https://www.googleapis.com/auth/webmasters.readonly"]
|
|
)
|
|
|
|
self.service = build('webmasters', 'v3', credentials=credentials)
|
|
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"Google API packages not installed. "
|
|
"Install with: pip install google-api-python-client google-auth google-auth-oauthlib"
|
|
) from e
|
|
except Exception as e:
|
|
raise Exception(f"Authentication failed: {e}") from e
|
|
|
|
def get_page_data(self, url: str, days: int = 30) -> Dict:
|
|
"""
|
|
Get page search performance data
|
|
|
|
Args:
|
|
url: Page URL to analyze
|
|
days: Number of days to look back
|
|
|
|
Returns:
|
|
Dictionary with impressions, clicks, position, CTR
|
|
"""
|
|
if not self.service:
|
|
return {'error': 'Not authenticated'}
|
|
|
|
try:
|
|
# Calculate date range
|
|
end_date = datetime.now()
|
|
start_date = end_date - timedelta(days=days)
|
|
|
|
# Build request body
|
|
request_body = {
|
|
'startDate': start_date.strftime("%Y-%m-%d"),
|
|
'endDate': end_date.strftime("%Y-%m-%d"),
|
|
'dimensions': ['page', 'query'],
|
|
'rowLimit': 1000
|
|
}
|
|
|
|
# Execute request
|
|
response = self.service.searchanalytics().query(
|
|
siteUrl=self.site_url,
|
|
body=request_body
|
|
).execute()
|
|
|
|
# Filter for specific URL
|
|
if 'rows' in response:
|
|
url_rows = [row for row in response['rows'] if url in row['keys'][0]]
|
|
|
|
if url_rows:
|
|
# Aggregate data
|
|
total_impressions = sum(row.get('impressions', 0) for row in url_rows)
|
|
total_clicks = sum(row.get('clicks', 0) for row in url_rows)
|
|
avg_position = sum(row.get('position', 0) * row.get('impressions', 0) for row in url_rows) / total_impressions if total_impressions > 0 else 0
|
|
|
|
# Top keywords
|
|
keywords = sorted(url_rows, key=lambda x: x.get('clicks', 0), reverse=True)[:5]
|
|
|
|
return {
|
|
'impressions': int(total_impressions),
|
|
'clicks': int(total_clicks),
|
|
'avg_position': round(avg_position, 2),
|
|
'ctr': round(total_clicks / total_impressions * 100, 2) if total_impressions > 0 else 0,
|
|
'top_keywords': [
|
|
{
|
|
'keyword': row['keys'][1],
|
|
'position': round(row.get('position', 0), 2),
|
|
'clicks': int(row.get('clicks', 0))
|
|
}
|
|
for row in keywords
|
|
]
|
|
}
|
|
|
|
return {
|
|
'impressions': 0,
|
|
'clicks': 0,
|
|
'avg_position': 0,
|
|
'ctr': 0,
|
|
'top_keywords': [],
|
|
'note': 'No data found for this URL'
|
|
}
|
|
|
|
except Exception as e:
|
|
return {'error': str(e)}
|
|
|
|
def get_keyword_positions(self, days: int = 30) -> List[Dict]:
|
|
"""Get keyword rankings"""
|
|
if not self.service:
|
|
return []
|
|
|
|
try:
|
|
end_date = datetime.now()
|
|
start_date = end_date - timedelta(days=days)
|
|
|
|
request_body = {
|
|
'startDate': start_date.strftime("%Y-%m-%d"),
|
|
'endDate': end_date.strftime("%Y-%m-%d"),
|
|
'dimensions': ['query'],
|
|
'rowLimit': 1000
|
|
}
|
|
|
|
response = self.service.searchanalytics().query(
|
|
siteUrl=self.site_url,
|
|
body=request_body
|
|
).execute()
|
|
|
|
keywords = []
|
|
if 'rows' in response:
|
|
for row in response['rows']:
|
|
keywords.append({
|
|
'keyword': row['keys'][0],
|
|
'position': round(row.get('position', 0), 2),
|
|
'impressions': int(row.get('impressions', 0)),
|
|
'clicks': int(row.get('clicks', 0)),
|
|
'ctr': round(row.get('ctr', 0) * 100, 2)
|
|
})
|
|
|
|
return sorted(keywords, key=lambda x: x['impressions'], reverse=True)
|
|
|
|
except Exception as e:
|
|
print(f"Error getting keyword positions: {e}")
|
|
return []
|
|
|
|
def get_quick_wins(self, min_position: int = 11, max_position: int = 20) -> List[Dict]:
|
|
"""
|
|
Find keywords ranking 11-20 (page 2 opportunities)
|
|
|
|
Args:
|
|
min_position: Minimum position (default 11)
|
|
max_position: Maximum position (default 20)
|
|
|
|
Returns:
|
|
List of keywords with optimization opportunities
|
|
"""
|
|
keywords = self.get_keyword_positions(days=90) # Last 90 days
|
|
|
|
quick_wins = []
|
|
for kw in keywords:
|
|
if min_position <= kw['position'] <= max_position:
|
|
quick_wins.append({
|
|
'keyword': kw['keyword'],
|
|
'current_position': kw['position'],
|
|
'search_volume': kw['impressions'], # Approximation
|
|
'clicks': kw['clicks'],
|
|
'ctr': kw['ctr'],
|
|
'priority_score': self._calculate_priority(kw),
|
|
'recommendation': f"Optimize content for '{kw['keyword']}' to reach top 10"
|
|
})
|
|
|
|
return sorted(quick_wins, key=lambda x: x['priority_score'], reverse=True)
|
|
|
|
def _calculate_priority(self, keyword_data: Dict) -> int:
|
|
"""Calculate priority score for keyword optimization"""
|
|
score = 0
|
|
|
|
# Higher impressions = more potential traffic
|
|
if keyword_data['impressions'] > 1000:
|
|
score += 40
|
|
elif keyword_data['impressions'] > 500:
|
|
score += 30
|
|
elif keyword_data['impressions'] > 100:
|
|
score += 20
|
|
|
|
# Lower CTR = more room for improvement
|
|
if keyword_data['ctr'] < 1:
|
|
score += 30
|
|
elif keyword_data['ctr'] < 3:
|
|
score += 20
|
|
|
|
# Position closer to top 10 = easier to rank
|
|
if keyword_data['position'] <= 12:
|
|
score += 30
|
|
elif keyword_data['position'] <= 15:
|
|
score += 20
|
|
else:
|
|
score += 10
|
|
|
|
return score
|
|
|
|
|
|
def main():
|
|
"""Test GSC connector"""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description='Test GSC Connector')
|
|
parser.add_argument('--site-url', required=True, help='Site URL')
|
|
parser.add_argument('--credentials', required=True, help='Path to credentials JSON')
|
|
parser.add_argument('--url', help='Page URL to analyze')
|
|
parser.add_argument('--days', type=int, default=30, help='Days to analyze')
|
|
parser.add_argument('--quick-wins', action='store_true', help='Find quick win keywords')
|
|
|
|
args = parser.parse_args()
|
|
|
|
print(f"\n🔍 Testing GSC Connector")
|
|
print(f"Site: {args.site_url}\n")
|
|
|
|
try:
|
|
connector = GSCConnector(args.site_url, args.credentials)
|
|
|
|
if args.quick_wins:
|
|
print("Finding quick wins (position 11-20)...")
|
|
quick_wins = connector.get_quick_wins()
|
|
print(f"\nFound {len(quick_wins)} opportunities:\n")
|
|
for i, kw in enumerate(quick_wins[:10], 1):
|
|
print(f"{i}. {kw['keyword']}")
|
|
print(f" Position: {kw['current_position']} | "
|
|
f"Impressions: {kw['search_volume']:,} | "
|
|
f"Priority: {kw['priority_score']}")
|
|
print()
|
|
elif args.url:
|
|
print(f"Analyzing: {args.url}")
|
|
data = connector.get_page_data(args.url, args.days)
|
|
print(f"\nResults: {json.dumps(data, indent=2)}")
|
|
else:
|
|
print("Getting top keywords...")
|
|
keywords = connector.get_keyword_positions(args.days)
|
|
for i, kw in enumerate(keywords[:10], 1):
|
|
print(f"{i}. {kw['keyword']}: Position {kw['position']} "
|
|
f"({kw['impressions']:,} impressions)")
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|