183 lines
5.2 KiB
Python
183 lines
5.2 KiB
Python
"""
|
|
Data Parsing Utilities
|
|
Shared utilities for parsing and validating strategy data.
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
from typing import Any, Optional, Dict, List
|
|
|
|
|
|
def parse_float(value: Any) -> Optional[float]:
|
|
"""
|
|
Parse a value to float, handling various formats.
|
|
|
|
Supports:
|
|
- Numbers (int, float)
|
|
- Strings with numbers
|
|
- Percentages (e.g., "25%")
|
|
- Suffixes (e.g., "10k", "5m")
|
|
- Comma-separated numbers
|
|
|
|
Args:
|
|
value: Value to parse
|
|
|
|
Returns:
|
|
Parsed float value or None if parsing fails
|
|
"""
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, (int, float)):
|
|
return float(value)
|
|
if isinstance(value, str):
|
|
s = value.strip().lower().replace(",", "")
|
|
# Handle percentage
|
|
if s.endswith('%'):
|
|
try:
|
|
return float(s[:-1])
|
|
except Exception:
|
|
pass
|
|
# Handle k/m suffix
|
|
mul = 1.0
|
|
if s.endswith('k'):
|
|
mul = 1_000.0
|
|
s = s[:-1]
|
|
elif s.endswith('m'):
|
|
mul = 1_000_000.0
|
|
s = s[:-1]
|
|
m = re.search(r"[-+]?\d*\.?\d+", s)
|
|
if m:
|
|
try:
|
|
return float(m.group(0)) * mul
|
|
except Exception:
|
|
return None
|
|
return None
|
|
|
|
|
|
def parse_int(value: Any) -> Optional[int]:
|
|
"""
|
|
Parse a value to integer.
|
|
|
|
Args:
|
|
value: Value to parse
|
|
|
|
Returns:
|
|
Parsed integer value or None if parsing fails
|
|
"""
|
|
f = parse_float(value)
|
|
if f is None:
|
|
return None
|
|
try:
|
|
return int(round(f))
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def parse_json(value: Any) -> Optional[Any]:
|
|
"""
|
|
Parse a value to JSON (dict/list) or return as-is if already structured.
|
|
|
|
Args:
|
|
value: Value to parse
|
|
|
|
Returns:
|
|
Parsed JSON value, original value if already structured, or None
|
|
"""
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, (dict, list)):
|
|
return value
|
|
if isinstance(value, str):
|
|
try:
|
|
return json.loads(value)
|
|
except Exception:
|
|
# Accept plain strings in JSON columns
|
|
return value
|
|
return None
|
|
|
|
|
|
def parse_array(value: Any) -> Optional[List]:
|
|
"""
|
|
Parse a value to array/list.
|
|
|
|
Supports:
|
|
- Lists (returned as-is)
|
|
- JSON strings
|
|
- Comma-separated strings
|
|
|
|
Args:
|
|
value: Value to parse
|
|
|
|
Returns:
|
|
Parsed list or None if parsing fails
|
|
"""
|
|
if value is None:
|
|
return None
|
|
if isinstance(value, list):
|
|
return value
|
|
if isinstance(value, str):
|
|
# Try JSON first
|
|
try:
|
|
j = json.loads(value)
|
|
if isinstance(j, list):
|
|
return j
|
|
except Exception:
|
|
pass
|
|
# Try comma-separated
|
|
parts = [p.strip() for p in value.split(',') if p.strip()]
|
|
return parts if parts else None
|
|
return None
|
|
|
|
|
|
def parse_strategy_data(strategy_data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, str]]:
|
|
"""
|
|
Parse and validate strategy data, returning cleaned data and warnings.
|
|
|
|
Args:
|
|
strategy_data: Raw strategy data dictionary
|
|
|
|
Returns:
|
|
Tuple of (cleaned_data, warnings_dict)
|
|
"""
|
|
warnings: Dict[str, str] = {}
|
|
cleaned = dict(strategy_data)
|
|
|
|
# Numeric fields
|
|
content_budget = parse_float(strategy_data.get('content_budget'))
|
|
if strategy_data.get('content_budget') is not None and content_budget is None:
|
|
warnings['content_budget'] = 'Could not parse number; saved as null'
|
|
cleaned['content_budget'] = content_budget
|
|
|
|
team_size = parse_int(strategy_data.get('team_size'))
|
|
if strategy_data.get('team_size') is not None and team_size is None:
|
|
warnings['team_size'] = 'Could not parse integer; saved as null'
|
|
cleaned['team_size'] = team_size
|
|
|
|
# Array fields
|
|
array_fields = ['preferred_formats']
|
|
for field in array_fields:
|
|
if field in strategy_data:
|
|
parsed = parse_array(strategy_data.get(field))
|
|
if strategy_data.get(field) is not None and parsed is None:
|
|
warnings[field] = 'Could not parse list; saved as null'
|
|
cleaned[field] = parsed
|
|
|
|
# JSON fields
|
|
json_fields = [
|
|
'business_objectives', 'target_metrics', 'performance_metrics', 'content_preferences',
|
|
'consumption_patterns', 'audience_pain_points', 'buying_journey', 'seasonal_trends',
|
|
'engagement_metrics', 'top_competitors', 'competitor_content_strategies', 'market_gaps',
|
|
'industry_trends', 'emerging_trends', 'content_mix', 'optimal_timing', 'quality_metrics',
|
|
'editorial_guidelines', 'brand_voice', 'traffic_sources', 'conversion_rates', 'content_roi_targets',
|
|
'target_audience', 'content_pillars', 'ai_recommendations'
|
|
]
|
|
for field in json_fields:
|
|
if field in strategy_data:
|
|
cleaned[field] = parse_json(strategy_data.get(field))
|
|
|
|
# Boolean fields
|
|
if 'ab_testing_capabilities' in strategy_data:
|
|
cleaned['ab_testing_capabilities'] = bool(strategy_data.get('ab_testing_capabilities'))
|
|
|
|
return cleaned, warnings
|