538 lines
23 KiB
Python
538 lines
23 KiB
Python
"""Google Search Console Service for ALwrity."""
|
|
|
|
import os
|
|
import json
|
|
import sqlite3
|
|
from typing import Dict, List, Optional, Any
|
|
from datetime import datetime, timedelta
|
|
from google.auth.transport.requests import Request as GoogleRequest
|
|
from google.oauth2.credentials import Credentials
|
|
from google_auth_oauthlib.flow import Flow
|
|
from googleapiclient.discovery import build
|
|
from loguru import logger
|
|
|
|
class GSCService:
|
|
"""Service for Google Search Console integration."""
|
|
|
|
def __init__(self, db_path: str = "alwrity.db"):
|
|
"""Initialize GSC service with database connection."""
|
|
self.db_path = db_path
|
|
# Resolve credentials file robustly: env override or project-relative default
|
|
env_credentials_path = os.getenv("GSC_CREDENTIALS_FILE")
|
|
if env_credentials_path:
|
|
self.credentials_file = env_credentials_path
|
|
else:
|
|
# Default to <backend>/gsc_credentials.json regardless of CWD
|
|
services_dir = os.path.dirname(__file__)
|
|
backend_dir = os.path.abspath(os.path.join(services_dir, os.pardir))
|
|
self.credentials_file = os.path.join(backend_dir, "gsc_credentials.json")
|
|
logger.info(f"GSC credentials file path set to: {self.credentials_file}")
|
|
self.scopes = ['https://www.googleapis.com/auth/webmasters.readonly']
|
|
self._init_gsc_tables()
|
|
logger.info("GSC Service initialized successfully")
|
|
|
|
def _init_gsc_tables(self):
|
|
"""Initialize GSC-related database tables."""
|
|
try:
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# GSC credentials table
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS gsc_credentials (
|
|
user_id TEXT PRIMARY KEY,
|
|
credentials_json TEXT NOT NULL,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
# GSC data cache table
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS gsc_data_cache (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
user_id TEXT NOT NULL,
|
|
site_url TEXT NOT NULL,
|
|
data_type TEXT NOT NULL,
|
|
data_json TEXT NOT NULL,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
expires_at TIMESTAMP NOT NULL,
|
|
FOREIGN KEY (user_id) REFERENCES gsc_credentials (user_id)
|
|
)
|
|
''')
|
|
|
|
conn.commit()
|
|
logger.info("GSC database tables initialized successfully")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error initializing GSC tables: {e}")
|
|
raise
|
|
|
|
def save_user_credentials(self, user_id: str, credentials: Credentials) -> bool:
|
|
"""Save user's GSC credentials to database."""
|
|
try:
|
|
# Read client credentials from file to ensure we have all required fields
|
|
with open(self.credentials_file, 'r') as f:
|
|
client_config = json.load(f)
|
|
|
|
web_config = client_config.get('web', {})
|
|
|
|
credentials_json = json.dumps({
|
|
'token': credentials.token,
|
|
'refresh_token': credentials.refresh_token,
|
|
'token_uri': credentials.token_uri or web_config.get('token_uri'),
|
|
'client_id': credentials.client_id or web_config.get('client_id'),
|
|
'client_secret': credentials.client_secret or web_config.get('client_secret'),
|
|
'scopes': credentials.scopes
|
|
})
|
|
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
INSERT OR REPLACE INTO gsc_credentials
|
|
(user_id, credentials_json, updated_at)
|
|
VALUES (?, ?, CURRENT_TIMESTAMP)
|
|
''', (user_id, credentials_json))
|
|
conn.commit()
|
|
|
|
logger.info(f"GSC credentials saved for user: {user_id}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error saving GSC credentials for user {user_id}: {e}")
|
|
return False
|
|
|
|
def load_user_credentials(self, user_id: str) -> Optional[Credentials]:
|
|
"""Load user's GSC credentials from database."""
|
|
try:
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT credentials_json FROM gsc_credentials
|
|
WHERE user_id = ?
|
|
''', (user_id,))
|
|
|
|
result = cursor.fetchone()
|
|
if not result:
|
|
return None
|
|
|
|
credentials_data = json.loads(result[0])
|
|
|
|
# Check for required fields, but allow connection without refresh token
|
|
required_fields = ['token_uri', 'client_id', 'client_secret']
|
|
missing_fields = [field for field in required_fields if not credentials_data.get(field)]
|
|
|
|
if missing_fields:
|
|
logger.warning(f"GSC credentials for user {user_id} missing required fields: {missing_fields}")
|
|
return None
|
|
|
|
credentials = Credentials.from_authorized_user_info(credentials_data, self.scopes)
|
|
|
|
# Refresh token if needed and possible
|
|
if credentials.expired:
|
|
if credentials.refresh_token:
|
|
try:
|
|
credentials.refresh(GoogleRequest())
|
|
self.save_user_credentials(user_id, credentials)
|
|
except Exception as e:
|
|
logger.error(f"Failed to refresh GSC token for user {user_id}: {e}")
|
|
return None
|
|
else:
|
|
logger.warning(f"GSC token expired for user {user_id} but no refresh token available - user needs to re-authorize")
|
|
return None
|
|
|
|
return credentials
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error loading GSC credentials for user {user_id}: {e}")
|
|
return None
|
|
|
|
def get_oauth_url(self, user_id: str) -> str:
|
|
"""Get OAuth authorization URL for GSC."""
|
|
try:
|
|
logger.info(f"Generating OAuth URL for user: {user_id}")
|
|
|
|
if not os.path.exists(self.credentials_file):
|
|
raise FileNotFoundError(f"GSC credentials file not found: {self.credentials_file}")
|
|
|
|
redirect_uri = os.getenv('GSC_REDIRECT_URI', 'http://localhost:8000/gsc/callback')
|
|
flow = Flow.from_client_secrets_file(
|
|
self.credentials_file,
|
|
scopes=self.scopes,
|
|
redirect_uri=redirect_uri
|
|
)
|
|
|
|
authorization_url, state = flow.authorization_url(
|
|
access_type='offline',
|
|
include_granted_scopes='true',
|
|
prompt='consent' # Force consent screen to get refresh token
|
|
)
|
|
|
|
logger.info(f"OAuth URL generated for user: {user_id}")
|
|
|
|
# Store state for verification
|
|
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS gsc_oauth_states (
|
|
state TEXT PRIMARY KEY,
|
|
user_id TEXT NOT NULL,
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
|
)
|
|
''')
|
|
|
|
cursor.execute('''
|
|
INSERT OR REPLACE INTO gsc_oauth_states (state, user_id)
|
|
VALUES (?, ?)
|
|
''', (state, user_id))
|
|
conn.commit()
|
|
|
|
logger.info(f"OAuth URL generated successfully for user: {user_id}")
|
|
return authorization_url
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error generating OAuth URL for user {user_id}: {e}")
|
|
logger.error(f"Error type: {type(e).__name__}")
|
|
logger.error(f"Error details: {str(e)}")
|
|
raise
|
|
|
|
def handle_oauth_callback(self, authorization_code: str, state: str) -> bool:
|
|
"""Handle OAuth callback and save credentials."""
|
|
try:
|
|
logger.info(f"Handling OAuth callback with state: {state}")
|
|
|
|
# Verify state
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('''
|
|
SELECT user_id FROM gsc_oauth_states WHERE state = ?
|
|
''', (state,))
|
|
|
|
result = cursor.fetchone()
|
|
|
|
if not result:
|
|
# Check if this is a duplicate callback by looking for recent credentials
|
|
cursor.execute('SELECT user_id, credentials_json FROM gsc_credentials ORDER BY updated_at DESC LIMIT 1')
|
|
recent_credentials = cursor.fetchone()
|
|
|
|
if recent_credentials:
|
|
logger.info("Duplicate callback detected - returning success")
|
|
return True
|
|
|
|
# If no recent credentials, try to find any recent state
|
|
cursor.execute('SELECT state, user_id FROM gsc_oauth_states ORDER BY created_at DESC LIMIT 1')
|
|
recent_state = cursor.fetchone()
|
|
if recent_state:
|
|
user_id = recent_state[1]
|
|
# Clean up the old state
|
|
cursor.execute('DELETE FROM gsc_oauth_states WHERE state = ?', (recent_state[0],))
|
|
conn.commit()
|
|
else:
|
|
raise ValueError("Invalid OAuth state")
|
|
else:
|
|
user_id = result[0]
|
|
|
|
# Clean up state
|
|
cursor.execute('DELETE FROM gsc_oauth_states WHERE state = ?', (state,))
|
|
conn.commit()
|
|
|
|
# Exchange code for credentials
|
|
flow = Flow.from_client_secrets_file(
|
|
self.credentials_file,
|
|
scopes=self.scopes,
|
|
redirect_uri=os.getenv('GSC_REDIRECT_URI', 'http://localhost:8000/gsc/callback')
|
|
)
|
|
|
|
flow.fetch_token(code=authorization_code)
|
|
credentials = flow.credentials
|
|
|
|
# Save credentials
|
|
success = self.save_user_credentials(user_id, credentials)
|
|
|
|
if success:
|
|
logger.info(f"OAuth callback handled successfully for user: {user_id}")
|
|
else:
|
|
logger.error(f"Failed to save credentials for user: {user_id}")
|
|
|
|
return success
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error handling OAuth callback: {e}")
|
|
return False
|
|
|
|
def get_authenticated_service(self, user_id: str):
|
|
"""Get authenticated GSC service for user."""
|
|
try:
|
|
credentials = self.load_user_credentials(user_id)
|
|
if not credentials:
|
|
raise ValueError("No valid credentials found")
|
|
|
|
service = build('searchconsole', 'v1', credentials=credentials)
|
|
logger.info(f"Authenticated GSC service created for user: {user_id}")
|
|
return service
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error creating authenticated GSC service for user {user_id}: {e}")
|
|
raise
|
|
|
|
def get_site_list(self, user_id: str) -> List[Dict[str, Any]]:
|
|
"""Get list of sites from GSC."""
|
|
try:
|
|
service = self.get_authenticated_service(user_id)
|
|
sites = service.sites().list().execute()
|
|
|
|
site_list = []
|
|
for site in sites.get('siteEntry', []):
|
|
site_list.append({
|
|
'siteUrl': site.get('siteUrl'),
|
|
'permissionLevel': site.get('permissionLevel')
|
|
})
|
|
|
|
logger.info(f"Retrieved {len(site_list)} sites for user: {user_id}")
|
|
return site_list
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting site list for user {user_id}: {e}")
|
|
raise
|
|
|
|
def get_search_analytics(self, user_id: str, site_url: str,
|
|
start_date: str = None, end_date: str = None) -> Dict[str, Any]:
|
|
"""Get search analytics data from GSC."""
|
|
try:
|
|
# Set default date range (last 30 days)
|
|
if not end_date:
|
|
end_date = datetime.now().strftime('%Y-%m-%d')
|
|
if not start_date:
|
|
start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
|
|
|
|
# Check cache first
|
|
cache_key = f"{user_id}_{site_url}_{start_date}_{end_date}"
|
|
cached_data = self._get_cached_data(user_id, site_url, 'analytics', cache_key)
|
|
if cached_data:
|
|
logger.info(f"Returning cached analytics data for user: {user_id}")
|
|
return cached_data
|
|
|
|
service = self.get_authenticated_service(user_id)
|
|
if not service:
|
|
logger.error(f"Failed to get authenticated GSC service for user: {user_id}")
|
|
return {'error': 'Authentication failed', 'rows': [], 'rowCount': 0}
|
|
|
|
# Step 1: Verify data presence first (as per GSC API documentation)
|
|
verification_request = {
|
|
'startDate': start_date,
|
|
'endDate': end_date,
|
|
'dimensions': ['date'] # Only date dimension for verification
|
|
}
|
|
|
|
logger.info(f"GSC Data verification request for user {user_id}: {verification_request}")
|
|
|
|
try:
|
|
verification_response = service.searchanalytics().query(
|
|
siteUrl=site_url,
|
|
body=verification_request
|
|
).execute()
|
|
|
|
logger.info(f"GSC Data verification response for user {user_id}: {verification_response}")
|
|
|
|
# Check if we have any data
|
|
verification_rows = verification_response.get('rows', [])
|
|
if not verification_rows:
|
|
logger.warning(f"No GSC data available for user {user_id} in date range {start_date} to {end_date}")
|
|
return {'error': 'No data available for this date range', 'rows': [], 'rowCount': 0}
|
|
|
|
logger.info(f"GSC Data verification successful - found {len(verification_rows)} days with data")
|
|
|
|
except Exception as verification_error:
|
|
logger.error(f"GSC Data verification failed for user {user_id}: {verification_error}")
|
|
return {'error': f'Data verification failed: {str(verification_error)}', 'rows': [], 'rowCount': 0}
|
|
|
|
# Step 2: Get overall metrics (no dimensions)
|
|
request = {
|
|
'startDate': start_date,
|
|
'endDate': end_date,
|
|
'dimensions': [], # No dimensions for overall metrics
|
|
'rowLimit': 1000
|
|
}
|
|
|
|
logger.info(f"GSC API request for user {user_id}: {request}")
|
|
|
|
try:
|
|
response = service.searchanalytics().query(
|
|
siteUrl=site_url,
|
|
body=request
|
|
).execute()
|
|
|
|
logger.info(f"GSC API response for user {user_id}: {response}")
|
|
except Exception as api_error:
|
|
logger.error(f"GSC API call failed for user {user_id}: {api_error}")
|
|
return {'error': str(api_error), 'rows': [], 'rowCount': 0}
|
|
|
|
# Step 3: Get query-level data for insights (as per documentation)
|
|
query_request = {
|
|
'startDate': start_date,
|
|
'endDate': end_date,
|
|
'dimensions': ['query'], # Get query-level data
|
|
'rowLimit': 1000
|
|
}
|
|
|
|
logger.info(f"GSC Query-level request for user {user_id}: {query_request}")
|
|
|
|
try:
|
|
query_response = service.searchanalytics().query(
|
|
siteUrl=site_url,
|
|
body=query_request
|
|
).execute()
|
|
|
|
logger.info(f"GSC Query-level response for user {user_id}: {query_response}")
|
|
|
|
# Combine overall metrics with query-level data
|
|
analytics_data = {
|
|
'overall_metrics': {
|
|
'rows': response.get('rows', []),
|
|
'rowCount': response.get('rowCount', 0)
|
|
},
|
|
'query_data': {
|
|
'rows': query_response.get('rows', []),
|
|
'rowCount': query_response.get('rowCount', 0)
|
|
},
|
|
'verification_data': {
|
|
'rows': verification_rows,
|
|
'rowCount': len(verification_rows)
|
|
},
|
|
'startDate': start_date,
|
|
'endDate': end_date,
|
|
'siteUrl': site_url
|
|
}
|
|
|
|
self._cache_data(user_id, site_url, 'analytics', analytics_data, cache_key)
|
|
|
|
logger.info(f"Retrieved comprehensive analytics data for user: {user_id}, site: {site_url}")
|
|
return analytics_data
|
|
|
|
except Exception as query_error:
|
|
logger.error(f"GSC Query-level request failed for user {user_id}: {query_error}")
|
|
# Fall back to overall metrics only
|
|
analytics_data = {
|
|
'overall_metrics': {
|
|
'rows': response.get('rows', []),
|
|
'rowCount': response.get('rowCount', 0)
|
|
},
|
|
'query_data': {'rows': [], 'rowCount': 0},
|
|
'verification_data': {
|
|
'rows': verification_rows,
|
|
'rowCount': len(verification_rows)
|
|
},
|
|
'startDate': start_date,
|
|
'endDate': end_date,
|
|
'siteUrl': site_url,
|
|
'warning': f'Query-level data unavailable: {str(query_error)}'
|
|
}
|
|
|
|
self._cache_data(user_id, site_url, 'analytics', analytics_data, cache_key)
|
|
return analytics_data
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting search analytics for user {user_id}: {e}")
|
|
raise
|
|
|
|
def get_sitemaps(self, user_id: str, site_url: str) -> List[Dict[str, Any]]:
|
|
"""Get sitemaps from GSC."""
|
|
try:
|
|
service = self.get_authenticated_service(user_id)
|
|
response = service.sitemaps().list(siteUrl=site_url).execute()
|
|
|
|
sitemaps = []
|
|
for sitemap in response.get('sitemap', []):
|
|
sitemaps.append({
|
|
'path': sitemap.get('path'),
|
|
'lastSubmitted': sitemap.get('lastSubmitted'),
|
|
'contents': sitemap.get('contents', [])
|
|
})
|
|
|
|
logger.info(f"Retrieved {len(sitemaps)} sitemaps for user: {user_id}, site: {site_url}")
|
|
return sitemaps
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting sitemaps for user {user_id}: {e}")
|
|
raise
|
|
|
|
def revoke_user_access(self, user_id: str) -> bool:
|
|
"""Revoke user's GSC access."""
|
|
try:
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
|
|
# Delete credentials
|
|
cursor.execute('DELETE FROM gsc_credentials WHERE user_id = ?', (user_id,))
|
|
|
|
# Delete cached data
|
|
cursor.execute('DELETE FROM gsc_data_cache WHERE user_id = ?', (user_id,))
|
|
|
|
# Delete OAuth states
|
|
cursor.execute('DELETE FROM gsc_oauth_states WHERE user_id = ?', (user_id,))
|
|
|
|
conn.commit()
|
|
|
|
logger.info(f"GSC access revoked for user: {user_id}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error revoking GSC access for user {user_id}: {e}")
|
|
return False
|
|
|
|
def clear_incomplete_credentials(self, user_id: str) -> bool:
|
|
"""Clear incomplete GSC credentials that are missing required fields."""
|
|
try:
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('DELETE FROM gsc_credentials WHERE user_id = ?', (user_id,))
|
|
conn.commit()
|
|
|
|
logger.info(f"Cleared incomplete GSC credentials for user: {user_id}")
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error clearing incomplete credentials for user {user_id}: {e}")
|
|
return False
|
|
|
|
def _get_cached_data(self, user_id: str, site_url: str, data_type: str, cache_key: str) -> Optional[Dict]:
|
|
"""Get cached data if not expired."""
|
|
try:
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
SELECT data_json FROM gsc_data_cache
|
|
WHERE user_id = ? AND site_url = ? AND data_type = ?
|
|
AND expires_at > CURRENT_TIMESTAMP
|
|
''', (user_id, site_url, data_type))
|
|
|
|
result = cursor.fetchone()
|
|
if result:
|
|
return json.loads(result[0])
|
|
return None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting cached data: {e}")
|
|
return None
|
|
|
|
def _cache_data(self, user_id: str, site_url: str, data_type: str, data: Dict, cache_key: str):
|
|
"""Cache data with expiration."""
|
|
try:
|
|
expires_at = datetime.now() + timedelta(hours=1) # Cache for 1 hour
|
|
|
|
with sqlite3.connect(self.db_path) as conn:
|
|
cursor = conn.cursor()
|
|
cursor.execute('''
|
|
INSERT OR REPLACE INTO gsc_data_cache
|
|
(user_id, site_url, data_type, data_json, expires_at)
|
|
VALUES (?, ?, ?, ?, ?)
|
|
''', (user_id, site_url, data_type, json.dumps(data), expires_at))
|
|
conn.commit()
|
|
|
|
logger.info(f"Data cached for user: {user_id}, type: {data_type}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error caching data: {e}")
|