Base code

This commit is contained in:
Kunthawat Greethong
2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions

View File

@@ -0,0 +1,537 @@
"""Google Search Console Service for ALwrity."""
import os
import json
import sqlite3
from typing import Dict, List, Optional, Any
from datetime import datetime, timedelta
from google.auth.transport.requests import Request as GoogleRequest
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import Flow
from googleapiclient.discovery import build
from loguru import logger
class GSCService:
"""Service for Google Search Console integration."""
def __init__(self, db_path: str = "alwrity.db"):
"""Initialize GSC service with database connection."""
self.db_path = db_path
# Resolve credentials file robustly: env override or project-relative default
env_credentials_path = os.getenv("GSC_CREDENTIALS_FILE")
if env_credentials_path:
self.credentials_file = env_credentials_path
else:
# Default to <backend>/gsc_credentials.json regardless of CWD
services_dir = os.path.dirname(__file__)
backend_dir = os.path.abspath(os.path.join(services_dir, os.pardir))
self.credentials_file = os.path.join(backend_dir, "gsc_credentials.json")
logger.info(f"GSC credentials file path set to: {self.credentials_file}")
self.scopes = ['https://www.googleapis.com/auth/webmasters.readonly']
self._init_gsc_tables()
logger.info("GSC Service initialized successfully")
def _init_gsc_tables(self):
"""Initialize GSC-related database tables."""
try:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
# GSC credentials table
cursor.execute('''
CREATE TABLE IF NOT EXISTS gsc_credentials (
user_id TEXT PRIMARY KEY,
credentials_json TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
# GSC data cache table
cursor.execute('''
CREATE TABLE IF NOT EXISTS gsc_data_cache (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id TEXT NOT NULL,
site_url TEXT NOT NULL,
data_type TEXT NOT NULL,
data_json TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
expires_at TIMESTAMP NOT NULL,
FOREIGN KEY (user_id) REFERENCES gsc_credentials (user_id)
)
''')
conn.commit()
logger.info("GSC database tables initialized successfully")
except Exception as e:
logger.error(f"Error initializing GSC tables: {e}")
raise
def save_user_credentials(self, user_id: str, credentials: Credentials) -> bool:
"""Save user's GSC credentials to database."""
try:
# Read client credentials from file to ensure we have all required fields
with open(self.credentials_file, 'r') as f:
client_config = json.load(f)
web_config = client_config.get('web', {})
credentials_json = json.dumps({
'token': credentials.token,
'refresh_token': credentials.refresh_token,
'token_uri': credentials.token_uri or web_config.get('token_uri'),
'client_id': credentials.client_id or web_config.get('client_id'),
'client_secret': credentials.client_secret or web_config.get('client_secret'),
'scopes': credentials.scopes
})
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
INSERT OR REPLACE INTO gsc_credentials
(user_id, credentials_json, updated_at)
VALUES (?, ?, CURRENT_TIMESTAMP)
''', (user_id, credentials_json))
conn.commit()
logger.info(f"GSC credentials saved for user: {user_id}")
return True
except Exception as e:
logger.error(f"Error saving GSC credentials for user {user_id}: {e}")
return False
def load_user_credentials(self, user_id: str) -> Optional[Credentials]:
"""Load user's GSC credentials from database."""
try:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT credentials_json FROM gsc_credentials
WHERE user_id = ?
''', (user_id,))
result = cursor.fetchone()
if not result:
return None
credentials_data = json.loads(result[0])
# Check for required fields, but allow connection without refresh token
required_fields = ['token_uri', 'client_id', 'client_secret']
missing_fields = [field for field in required_fields if not credentials_data.get(field)]
if missing_fields:
logger.warning(f"GSC credentials for user {user_id} missing required fields: {missing_fields}")
return None
credentials = Credentials.from_authorized_user_info(credentials_data, self.scopes)
# Refresh token if needed and possible
if credentials.expired:
if credentials.refresh_token:
try:
credentials.refresh(GoogleRequest())
self.save_user_credentials(user_id, credentials)
except Exception as e:
logger.error(f"Failed to refresh GSC token for user {user_id}: {e}")
return None
else:
logger.warning(f"GSC token expired for user {user_id} but no refresh token available - user needs to re-authorize")
return None
return credentials
except Exception as e:
logger.error(f"Error loading GSC credentials for user {user_id}: {e}")
return None
def get_oauth_url(self, user_id: str) -> str:
"""Get OAuth authorization URL for GSC."""
try:
logger.info(f"Generating OAuth URL for user: {user_id}")
if not os.path.exists(self.credentials_file):
raise FileNotFoundError(f"GSC credentials file not found: {self.credentials_file}")
redirect_uri = os.getenv('GSC_REDIRECT_URI', 'http://localhost:8000/gsc/callback')
flow = Flow.from_client_secrets_file(
self.credentials_file,
scopes=self.scopes,
redirect_uri=redirect_uri
)
authorization_url, state = flow.authorization_url(
access_type='offline',
include_granted_scopes='true',
prompt='consent' # Force consent screen to get refresh token
)
logger.info(f"OAuth URL generated for user: {user_id}")
# Store state for verification
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS gsc_oauth_states (
state TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
''')
cursor.execute('''
INSERT OR REPLACE INTO gsc_oauth_states (state, user_id)
VALUES (?, ?)
''', (state, user_id))
conn.commit()
logger.info(f"OAuth URL generated successfully for user: {user_id}")
return authorization_url
except Exception as e:
logger.error(f"Error generating OAuth URL for user {user_id}: {e}")
logger.error(f"Error type: {type(e).__name__}")
logger.error(f"Error details: {str(e)}")
raise
def handle_oauth_callback(self, authorization_code: str, state: str) -> bool:
"""Handle OAuth callback and save credentials."""
try:
logger.info(f"Handling OAuth callback with state: {state}")
# Verify state
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT user_id FROM gsc_oauth_states WHERE state = ?
''', (state,))
result = cursor.fetchone()
if not result:
# Check if this is a duplicate callback by looking for recent credentials
cursor.execute('SELECT user_id, credentials_json FROM gsc_credentials ORDER BY updated_at DESC LIMIT 1')
recent_credentials = cursor.fetchone()
if recent_credentials:
logger.info("Duplicate callback detected - returning success")
return True
# If no recent credentials, try to find any recent state
cursor.execute('SELECT state, user_id FROM gsc_oauth_states ORDER BY created_at DESC LIMIT 1')
recent_state = cursor.fetchone()
if recent_state:
user_id = recent_state[1]
# Clean up the old state
cursor.execute('DELETE FROM gsc_oauth_states WHERE state = ?', (recent_state[0],))
conn.commit()
else:
raise ValueError("Invalid OAuth state")
else:
user_id = result[0]
# Clean up state
cursor.execute('DELETE FROM gsc_oauth_states WHERE state = ?', (state,))
conn.commit()
# Exchange code for credentials
flow = Flow.from_client_secrets_file(
self.credentials_file,
scopes=self.scopes,
redirect_uri=os.getenv('GSC_REDIRECT_URI', 'http://localhost:8000/gsc/callback')
)
flow.fetch_token(code=authorization_code)
credentials = flow.credentials
# Save credentials
success = self.save_user_credentials(user_id, credentials)
if success:
logger.info(f"OAuth callback handled successfully for user: {user_id}")
else:
logger.error(f"Failed to save credentials for user: {user_id}")
return success
except Exception as e:
logger.error(f"Error handling OAuth callback: {e}")
return False
def get_authenticated_service(self, user_id: str):
"""Get authenticated GSC service for user."""
try:
credentials = self.load_user_credentials(user_id)
if not credentials:
raise ValueError("No valid credentials found")
service = build('searchconsole', 'v1', credentials=credentials)
logger.info(f"Authenticated GSC service created for user: {user_id}")
return service
except Exception as e:
logger.error(f"Error creating authenticated GSC service for user {user_id}: {e}")
raise
def get_site_list(self, user_id: str) -> List[Dict[str, Any]]:
"""Get list of sites from GSC."""
try:
service = self.get_authenticated_service(user_id)
sites = service.sites().list().execute()
site_list = []
for site in sites.get('siteEntry', []):
site_list.append({
'siteUrl': site.get('siteUrl'),
'permissionLevel': site.get('permissionLevel')
})
logger.info(f"Retrieved {len(site_list)} sites for user: {user_id}")
return site_list
except Exception as e:
logger.error(f"Error getting site list for user {user_id}: {e}")
raise
def get_search_analytics(self, user_id: str, site_url: str,
start_date: str = None, end_date: str = None) -> Dict[str, Any]:
"""Get search analytics data from GSC."""
try:
# Set default date range (last 30 days)
if not end_date:
end_date = datetime.now().strftime('%Y-%m-%d')
if not start_date:
start_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
# Check cache first
cache_key = f"{user_id}_{site_url}_{start_date}_{end_date}"
cached_data = self._get_cached_data(user_id, site_url, 'analytics', cache_key)
if cached_data:
logger.info(f"Returning cached analytics data for user: {user_id}")
return cached_data
service = self.get_authenticated_service(user_id)
if not service:
logger.error(f"Failed to get authenticated GSC service for user: {user_id}")
return {'error': 'Authentication failed', 'rows': [], 'rowCount': 0}
# Step 1: Verify data presence first (as per GSC API documentation)
verification_request = {
'startDate': start_date,
'endDate': end_date,
'dimensions': ['date'] # Only date dimension for verification
}
logger.info(f"GSC Data verification request for user {user_id}: {verification_request}")
try:
verification_response = service.searchanalytics().query(
siteUrl=site_url,
body=verification_request
).execute()
logger.info(f"GSC Data verification response for user {user_id}: {verification_response}")
# Check if we have any data
verification_rows = verification_response.get('rows', [])
if not verification_rows:
logger.warning(f"No GSC data available for user {user_id} in date range {start_date} to {end_date}")
return {'error': 'No data available for this date range', 'rows': [], 'rowCount': 0}
logger.info(f"GSC Data verification successful - found {len(verification_rows)} days with data")
except Exception as verification_error:
logger.error(f"GSC Data verification failed for user {user_id}: {verification_error}")
return {'error': f'Data verification failed: {str(verification_error)}', 'rows': [], 'rowCount': 0}
# Step 2: Get overall metrics (no dimensions)
request = {
'startDate': start_date,
'endDate': end_date,
'dimensions': [], # No dimensions for overall metrics
'rowLimit': 1000
}
logger.info(f"GSC API request for user {user_id}: {request}")
try:
response = service.searchanalytics().query(
siteUrl=site_url,
body=request
).execute()
logger.info(f"GSC API response for user {user_id}: {response}")
except Exception as api_error:
logger.error(f"GSC API call failed for user {user_id}: {api_error}")
return {'error': str(api_error), 'rows': [], 'rowCount': 0}
# Step 3: Get query-level data for insights (as per documentation)
query_request = {
'startDate': start_date,
'endDate': end_date,
'dimensions': ['query'], # Get query-level data
'rowLimit': 1000
}
logger.info(f"GSC Query-level request for user {user_id}: {query_request}")
try:
query_response = service.searchanalytics().query(
siteUrl=site_url,
body=query_request
).execute()
logger.info(f"GSC Query-level response for user {user_id}: {query_response}")
# Combine overall metrics with query-level data
analytics_data = {
'overall_metrics': {
'rows': response.get('rows', []),
'rowCount': response.get('rowCount', 0)
},
'query_data': {
'rows': query_response.get('rows', []),
'rowCount': query_response.get('rowCount', 0)
},
'verification_data': {
'rows': verification_rows,
'rowCount': len(verification_rows)
},
'startDate': start_date,
'endDate': end_date,
'siteUrl': site_url
}
self._cache_data(user_id, site_url, 'analytics', analytics_data, cache_key)
logger.info(f"Retrieved comprehensive analytics data for user: {user_id}, site: {site_url}")
return analytics_data
except Exception as query_error:
logger.error(f"GSC Query-level request failed for user {user_id}: {query_error}")
# Fall back to overall metrics only
analytics_data = {
'overall_metrics': {
'rows': response.get('rows', []),
'rowCount': response.get('rowCount', 0)
},
'query_data': {'rows': [], 'rowCount': 0},
'verification_data': {
'rows': verification_rows,
'rowCount': len(verification_rows)
},
'startDate': start_date,
'endDate': end_date,
'siteUrl': site_url,
'warning': f'Query-level data unavailable: {str(query_error)}'
}
self._cache_data(user_id, site_url, 'analytics', analytics_data, cache_key)
return analytics_data
except Exception as e:
logger.error(f"Error getting search analytics for user {user_id}: {e}")
raise
def get_sitemaps(self, user_id: str, site_url: str) -> List[Dict[str, Any]]:
"""Get sitemaps from GSC."""
try:
service = self.get_authenticated_service(user_id)
response = service.sitemaps().list(siteUrl=site_url).execute()
sitemaps = []
for sitemap in response.get('sitemap', []):
sitemaps.append({
'path': sitemap.get('path'),
'lastSubmitted': sitemap.get('lastSubmitted'),
'contents': sitemap.get('contents', [])
})
logger.info(f"Retrieved {len(sitemaps)} sitemaps for user: {user_id}, site: {site_url}")
return sitemaps
except Exception as e:
logger.error(f"Error getting sitemaps for user {user_id}: {e}")
raise
def revoke_user_access(self, user_id: str) -> bool:
"""Revoke user's GSC access."""
try:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
# Delete credentials
cursor.execute('DELETE FROM gsc_credentials WHERE user_id = ?', (user_id,))
# Delete cached data
cursor.execute('DELETE FROM gsc_data_cache WHERE user_id = ?', (user_id,))
# Delete OAuth states
cursor.execute('DELETE FROM gsc_oauth_states WHERE user_id = ?', (user_id,))
conn.commit()
logger.info(f"GSC access revoked for user: {user_id}")
return True
except Exception as e:
logger.error(f"Error revoking GSC access for user {user_id}: {e}")
return False
def clear_incomplete_credentials(self, user_id: str) -> bool:
"""Clear incomplete GSC credentials that are missing required fields."""
try:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('DELETE FROM gsc_credentials WHERE user_id = ?', (user_id,))
conn.commit()
logger.info(f"Cleared incomplete GSC credentials for user: {user_id}")
return True
except Exception as e:
logger.error(f"Error clearing incomplete credentials for user {user_id}: {e}")
return False
def _get_cached_data(self, user_id: str, site_url: str, data_type: str, cache_key: str) -> Optional[Dict]:
"""Get cached data if not expired."""
try:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT data_json FROM gsc_data_cache
WHERE user_id = ? AND site_url = ? AND data_type = ?
AND expires_at > CURRENT_TIMESTAMP
''', (user_id, site_url, data_type))
result = cursor.fetchone()
if result:
return json.loads(result[0])
return None
except Exception as e:
logger.error(f"Error getting cached data: {e}")
return None
def _cache_data(self, user_id: str, site_url: str, data_type: str, data: Dict, cache_key: str):
"""Cache data with expiration."""
try:
expires_at = datetime.now() + timedelta(hours=1) # Cache for 1 hour
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute('''
INSERT OR REPLACE INTO gsc_data_cache
(user_id, site_url, data_type, data_json, expires_at)
VALUES (?, ?, ?, ?, ?)
''', (user_id, site_url, data_type, json.dumps(data), expires_at))
conn.commit()
logger.info(f"Data cached for user: {user_id}, type: {data_type}")
except Exception as e:
logger.error(f"Error caching data: {e}")