ALwrity version 0.5.4

This commit is contained in:
ajaysi
2025-08-09 23:14:16 +05:30
parent 01fe1e0a9c
commit 5c08b6e007
42 changed files with 3514 additions and 2148 deletions

View File

@@ -13,7 +13,13 @@ from enum import Enum
# Import AI providers
from llm_providers.main_text_generation import llm_text_gen
from llm_providers.gemini_provider import gemini_structured_json_response
# Prefer the extended gemini provider if available; fallback to base
try:
from services.llm_providers.gemini_provider import gemini_structured_json_response as _gemini_fn
_GEMINI_EXTENDED = True
except Exception:
from llm_providers.gemini_provider import gemini_structured_json_response as _gemini_fn
_GEMINI_EXTENDED = False
class AIServiceType(Enum):
"""AI service types for monitoring."""
@@ -54,14 +60,16 @@ class AIServiceManager:
def _load_ai_configuration(self) -> Dict[str, Any]:
"""Load AI configuration settings."""
return {
'max_retries': 3,
'timeout_seconds': 30,
'temperature': 0.7,
'max_tokens': 2048,
'max_retries': 2, # Reduced from 3
'timeout_seconds': 45, # increased from 15 to accommodate structured 30+ fields
'temperature': 0.3, # more deterministic for schema-constrained JSON
'top_p': 0.9,
'top_k': 40,
'max_tokens': 2048, # increased from 1024 for larger structured outputs
'enable_caching': True,
'cache_duration_minutes': 60,
'performance_monitoring': True,
'fallback_enabled': True
'fallback_enabled': False # Disabled fallback to prevent false positives
}
def _load_centralized_prompts(self) -> Dict[str, str]:
@@ -448,47 +456,120 @@ Format as structured JSON with detailed assessment and optimization guidance.
try:
logger.info(f"🤖 Executing AI call for {service_type.value}")
logger.debug(f"Using gemini provider extended={_GEMINI_EXTENDED}")
# Execute AI call with timeout
# Execute AI call with timeout (run sync provider in a thread)
response = await asyncio.wait_for(
gemini_structured_json_response(
prompt=prompt,
schema=schema,
temperature=self.config['temperature'],
max_tokens=self.config['max_tokens']
asyncio.to_thread(
self._call_gemini_structured,
prompt,
schema,
),
timeout=self.config['timeout_seconds']
)
# Parse response
result = json.loads(response)
if isinstance(response, dict):
result = response
elif isinstance(response, str):
try:
result = json.loads(response)
except json.JSONDecodeError:
# Return raw string if not valid JSON
result = {"raw_response": response}
else:
# Fallback to string conversion
result = {"raw_response": str(response)}
# Treat provider-reported errors or empty results as failures
if isinstance(result, dict) and ('error' in result or not result):
error_message = result.get('error', 'Empty AI response') if isinstance(result, dict) else 'Empty AI response'
# record metrics and raise
response_time = (datetime.utcnow() - start_time).total_seconds()
metrics = AIServiceMetrics(
service_type=service_type,
response_time=response_time,
success=False,
error_message=error_message
)
self.metrics.append(metrics)
raise Exception(error_message)
success = True
logger.info(f"✅ AI call for {service_type.value} completed successfully")
except asyncio.TimeoutError:
error_message = f"AI call timeout for {service_type.value}"
logger.error(error_message)
# record metrics and raise
response_time = (datetime.utcnow() - start_time).total_seconds()
metrics = AIServiceMetrics(
service_type=service_type,
response_time=response_time,
success=False,
error_message=error_message
)
self.metrics.append(metrics)
raise Exception(error_message)
except json.JSONDecodeError as e:
error_message = f"JSON decode error for {service_type.value}: {str(e)}"
logger.error(error_message)
response_time = (datetime.utcnow() - start_time).total_seconds()
metrics = AIServiceMetrics(
service_type=service_type,
response_time=response_time,
success=False,
error_message=error_message
)
self.metrics.append(metrics)
raise Exception(error_message)
except Exception as e:
error_message = f"AI call error for {service_type.value}: {str(e)}"
logger.error(error_message)
response_time = (datetime.utcnow() - start_time).total_seconds()
metrics = AIServiceMetrics(
service_type=service_type,
response_time=response_time,
success=False,
error_message=error_message
)
self.metrics.append(metrics)
raise
# Calculate response time
# Calculate response time and record metrics for successful calls
response_time = (datetime.utcnow() - start_time).total_seconds()
# Record metrics
metrics = AIServiceMetrics(
service_type=service_type,
response_time=response_time,
success=success,
error_message=error_message
error_message=None
)
self.metrics.append(metrics)
return result
def _call_gemini_structured(self, prompt: str, schema: Dict[str, Any]):
"""Call gemini structured JSON with flexible signature support.
Tries extended signature first; falls back to minimal signature to avoid TypeError.
"""
try:
# Attempt extended signature (temperature/top_p/top_k/max_tokens/system_prompt)
return _gemini_fn(
prompt,
schema,
self.config['temperature'],
self.config['top_p'],
self.config.get('top_k', 40),
self.config['max_tokens'],
None
)
except TypeError:
logger.debug("Falling back to base gemini provider signature (prompt, schema)")
return _gemini_fn(prompt, schema)
async def execute_structured_json_call(self, service_type: AIServiceType, prompt: str, schema: Dict[str, Any]) -> Dict[str, Any]:
"""Public wrapper to execute a structured JSON AI call with a provided schema."""
return await self._execute_ai_call(service_type, prompt, schema)
async def generate_content_gap_analysis(self, analysis_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Generate content gap analysis using centralized AI service.
@@ -520,11 +601,11 @@ Format as structured JSON with detailed assessment and optimization guidance.
self.schemas['content_gap_analysis']
)
return result if result else self._get_fallback_content_gap_analysis()
return result if result else {}
except Exception as e:
logger.error(f"Error in content gap analysis: {str(e)}")
return self._get_fallback_content_gap_analysis()
raise Exception(f"Failed to generate content gap analysis: {str(e)}")
async def generate_market_position_analysis(self, market_data: Dict[str, Any]) -> Dict[str, Any]:
"""
@@ -553,11 +634,11 @@ Format as structured JSON with detailed assessment and optimization guidance.
self.schemas['market_position_analysis']
)
return result if result else self._get_fallback_market_position_analysis()
return result if result else {}
except Exception as e:
logger.error(f"Error in market position analysis: {str(e)}")
return self._get_fallback_market_position_analysis()
raise Exception(f"Failed to generate market position analysis: {str(e)}")
async def generate_keyword_analysis(self, keyword_data: Dict[str, Any]) -> Dict[str, Any]:
"""
@@ -586,11 +667,11 @@ Format as structured JSON with detailed assessment and optimization guidance.
self.schemas['keyword_analysis']
)
return result if result else self._get_fallback_keyword_analysis()
return result if result else {}
except Exception as e:
logger.error(f"Error in keyword analysis: {str(e)}")
return self._get_fallback_keyword_analysis()
raise Exception(f"Failed to generate keyword analysis: {str(e)}")
async def generate_performance_prediction(self, content_data: Dict[str, Any]) -> Dict[str, Any]:
"""
@@ -618,11 +699,11 @@ Format as structured JSON with detailed assessment and optimization guidance.
self.schemas['performance_prediction']
)
return result if result else self._get_fallback_performance_prediction()
return result if result else {}
except Exception as e:
logger.error(f"Error in performance prediction: {str(e)}")
return self._get_fallback_performance_prediction()
raise Exception(f"Failed to generate performance prediction: {str(e)}")
async def generate_strategic_intelligence(self, analysis_data: Dict[str, Any]) -> Dict[str, Any]:
"""
@@ -651,11 +732,11 @@ Format as structured JSON with detailed assessment and optimization guidance.
self.schemas['strategic_intelligence']
)
return result if result else self._get_fallback_strategic_intelligence()
return result if result else {}
except Exception as e:
logger.error(f"Error in strategic intelligence: {str(e)}")
return self._get_fallback_strategic_intelligence()
raise Exception(f"Failed to generate strategic intelligence: {str(e)}")
async def generate_content_quality_assessment(self, content_data: Dict[str, Any]) -> Dict[str, Any]:
"""
@@ -684,11 +765,11 @@ Format as structured JSON with detailed assessment and optimization guidance.
self.schemas['content_quality_assessment']
)
return result if result else self._get_fallback_content_quality_assessment()
return result if result else {}
except Exception as e:
logger.error(f"Error in content quality assessment: {str(e)}")
return self._get_fallback_content_quality_assessment()
raise Exception(f"Failed to generate content quality assessment: {str(e)}")
async def generate_content_schedule(self, prompt: str) -> Dict[str, Any]:
"""
@@ -733,109 +814,6 @@ Format as structured JSON with detailed assessment and optimization guidance.
logger.error(f"Error generating content schedule: {str(e)}")
return {"schedule": []}
# Fallback methods
def _get_fallback_content_gap_analysis(self) -> Dict[str, Any]:
"""Fallback content gap analysis."""
return {
'strategic_insights': [
{
'type': 'content_strategy',
'insight': 'Focus on educational content to build authority',
'confidence': 0.85,
'priority': 'high',
'estimated_impact': 'Authority building',
'implementation_time': '3-6 months',
'risk_level': 'low'
}
],
'content_recommendations': [
{
'type': 'content_creation',
'recommendation': 'Create comprehensive guides for high-opportunity keywords',
'priority': 'high',
'estimated_traffic': '5K+ monthly',
'implementation_time': '2-3 weeks',
'roi_estimate': 'High ROI potential',
'success_metrics': ['Traffic increase', 'Authority building', 'Lead generation']
}
]
}
def _get_fallback_market_position_analysis(self) -> Dict[str, Any]:
"""Fallback market position analysis."""
return {
'market_leader': 'competitor1.com',
'content_leader': 'competitor2.com',
'quality_leader': 'competitor3.com',
'market_gaps': ['Video content', 'Interactive content', 'Expert interviews'],
'opportunities': ['Niche content development', 'Expert interviews', 'Industry reports'],
'competitive_advantages': ['Technical expertise', 'Comprehensive guides', 'Industry insights']
}
def _get_fallback_keyword_analysis(self) -> Dict[str, Any]:
"""Fallback keyword analysis."""
return {
'keyword_opportunities': [
{
'keyword': 'industry best practices',
'search_volume': 3000,
'competition_level': 'low',
'difficulty_score': 35,
'trend': 'rising',
'intent': 'informational',
'opportunity_score': 85,
'recommended_format': 'comprehensive_guide',
'estimated_traffic': '2K+ monthly',
'implementation_priority': 'high'
}
]
}
def _get_fallback_performance_prediction(self) -> Dict[str, Any]:
"""Fallback performance prediction."""
return {
"traffic_predictions": {
"estimated_monthly_traffic": "10K+",
"traffic_growth_rate": "10%",
"peak_traffic_month": "June",
"confidence_level": "high"
},
"engagement_predictions": {
"estimated_time_on_page": "5 min",
"estimated_bounce_rate": "20%",
"estimated_social_shares": "100+",
"estimated_comments": "50+",
"confidence_level": "medium"
}
}
def _get_fallback_strategic_intelligence(self) -> Dict[str, Any]:
"""Fallback strategic intelligence."""
return {
"strategic_insights": [
{
"type": "content_strategy",
"insight": "Focus on educational content to build authority",
"reasoning": "Educational content is highly shareable and can attract a targeted audience.",
"priority": "high",
"estimated_impact": "Authority building",
"implementation_time": "3-6 months",
"confidence_level": "high"
}
]
}
def _get_fallback_content_quality_assessment(self) -> Dict[str, Any]:
"""Fallback content quality assessment."""
return {
"overall_score": 88.0,
"readability_score": 92.0,
"seo_score": 95.0,
"engagement_potential": "High engagement and retention",
"improvement_suggestions": ["Add more internal links", "Optimize images for SEO"],
"timestamp": datetime.utcnow().isoformat()
}
def get_performance_metrics(self) -> Dict[str, Any]:
"""
Get AI service performance metrics.

View File

@@ -24,6 +24,8 @@ import asyncio
import json
import re
from typing import Optional, Dict, Any
# Configure standard logging
import logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s')
@@ -170,63 +172,107 @@ def gemini_pro_text_gen(prompt, temperature=0.7, top_p=0.9, top_k=40, max_tokens
logger.error(f"Error in Gemini Pro text generation: {e}")
return str(e)
def _dict_to_types_schema(schema: Dict[str, Any]) -> types.Schema:
"""Convert a lightweight dict schema to google.genai.types.Schema."""
if not isinstance(schema, dict):
raise ValueError("response_schema must be a dict compatible with types.Schema")
def _convert(node: Dict[str, Any]) -> types.Schema:
node_type = (node.get("type") or "OBJECT").upper()
if node_type == "OBJECT":
props = node.get("properties") or {}
props_types: Dict[str, types.Schema] = {}
for key, prop in props.items():
if isinstance(prop, dict):
props_types[key] = _convert(prop)
else:
props_types[key] = types.Schema(type=types.Type.STRING)
return types.Schema(type=types.Type.OBJECT, properties=props_types if props_types else None)
elif node_type == "ARRAY":
items_node = node.get("items")
if isinstance(items_node, dict):
item_schema = _convert(items_node)
else:
item_schema = types.Schema(type=types.Type.STRING)
return types.Schema(type=types.Type.ARRAY, items=item_schema)
elif node_type == "NUMBER":
return types.Schema(type=types.Type.NUMBER)
elif node_type == "BOOLEAN":
return types.Schema(type=types.Type.BOOLEAN)
else:
return types.Schema(type=types.Type.STRING)
return _convert(schema)
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def gemini_structured_json_response(prompt, schema, temperature=0.7, top_p=0.9, top_k=40, max_tokens=2048, system_prompt=None):
"""
Generate structured JSON response using Google's Gemini Pro model.
Args:
prompt (str): The input text to generate completion for
schema (dict): The JSON schema to follow for the response
temperature (float, optional): Controls randomness. Defaults to 0.7
top_p (float, optional): Controls diversity. Defaults to 0.9
top_k (int, optional): Controls vocabulary size. Defaults to 40
max_tokens (int, optional): Maximum number of tokens to generate. Defaults to 2048
system_prompt (str, optional): System instructions for the model
Returns:
dict: The generated structured JSON response
"""
try:
# Configure the model
client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
# Set up generation config
generation_config = {
"temperature": temperature,
"top_p": top_p,
"top_k": top_k,
"max_output_tokens": max_tokens,
}
# Generate content with structured response
response = client.models.generate_content(
model='gemini-2.5-pro',
contents=prompt,
config=types.GenerateContentConfig(
system_instruction=system_prompt,
max_output_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
response_mime_type='application/json',
response_schema=schema
),
)
# Parse the response
# Build config using official SDK schema type
try:
# First try to get the parsed response
if hasattr(response, 'parsed'):
return response.parsed
# If parsed is not available, try to parse the text
response_text = response.text
return json.loads(response_text)
except json.JSONDecodeError as e:
logger.error(f"Error parsing JSON response: {e}")
return {"error": f"Failed to parse JSON response: {e}", "raw_response": response_text}
types_schema = _dict_to_types_schema(schema) if isinstance(schema, dict) else schema
except Exception as conv_err:
logger.warning(f"Schema conversion warning, defaulting to OBJECT: {conv_err}")
types_schema = types.Schema(type=types.Type.OBJECT)
generation_config = types.GenerateContentConfig(
system_instruction=system_prompt,
max_output_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
response_mime_type='application/json',
response_schema=types_schema
)
response = client.models.generate_content(
model='gemini-2.5-flash',
contents=prompt,
config=generation_config,
)
# Prefer parsed if present and non-empty; otherwise parse text with fallbacks
try:
parsed = getattr(response, 'parsed', None)
if parsed:
return parsed if isinstance(parsed, dict) else json.loads(json.dumps(parsed))
text = (response.text or '').strip()
# Strip markdown code fences if present
if text.startswith('```'):
# remove leading ```json or ``` and trailing ```
if text.lower().startswith('```json'):
text = text[7:]
else:
text = text[3:]
if text.endswith('```'):
text = text[:-3]
text = text.strip()
try:
return json.loads(text)
except json.JSONDecodeError:
# Fallback: extract likely JSON object substring
first = text.find('{')
last = text.rfind('}')
if first != -1 and last != -1 and last > first:
candidate = text[first:last+1]
try:
return json.loads(candidate)
except json.JSONDecodeError:
pass
# Final fallback: regex any object
import re
match = re.search(r'\{[\s\S]*\}', text)
if match:
return json.loads(match.group(0))
raise
except Exception as e:
logger.error(f"Error parsing structured response: {e}")
return {"error": f"Failed to parse JSON response: {e}", "raw_response": (response.text or '')}
except Exception as e:
logger.error(f"Error in Gemini Pro structured JSON generation: {e}")
return {"error": str(e)}

View File

@@ -79,8 +79,8 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
elif gpt_provider == "deepseek":
model = "deepseek-chat"
else:
logger.warning("[llm_text_gen] No API keys found, using mock response")
return _get_mock_response(prompt)
logger.error("[llm_text_gen] No API keys found. Structured mock responses are disabled.")
raise RuntimeError("No LLM API keys configured. Configure provider API keys to enable AI responses.")
logger.debug(f"[llm_text_gen] Using provider: {gpt_provider}, model: {model}")
@@ -163,7 +163,7 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
)
else:
logger.error(f"[llm_text_gen] Unknown provider: {gpt_provider}")
return _get_mock_response(prompt)
raise RuntimeError("Unknown LLM provider.")
except Exception as provider_error:
logger.error(f"[llm_text_gen] Provider {gpt_provider} failed: {str(provider_error)}")
# Try to fallback to another provider
@@ -203,85 +203,13 @@ def llm_text_gen(prompt: str, system_prompt: Optional[str] = None, json_struct:
logger.error(f"[llm_text_gen] Fallback provider {fallback_provider} also failed: {str(fallback_error)}")
continue
# If all providers fail, return mock response
logger.warning("[llm_text_gen] All providers failed, using mock response")
return _get_mock_response(prompt)
# If all providers fail, raise an error (no mock)
logger.error("[llm_text_gen] All providers failed. Structured mock responses are disabled.")
raise RuntimeError("All LLM providers failed to generate a response.")
except Exception as e:
logger.error(f"[llm_text_gen] Error during text generation: {str(e)}")
return _get_mock_response(prompt)
def _get_mock_response(prompt: str) -> str:
"""Get a mock response when no API keys are available."""
logger.warning("[llm_text_gen] Using mock response - no API keys configured")
# Return a structured mock response for style detection
if "style analysis" in prompt.lower() or "writing style" in prompt.lower():
return json.dumps({
"writing_style": {
"tone": "professional",
"voice": "active",
"complexity": "moderate",
"engagement_level": "high"
},
"content_characteristics": {
"sentence_structure": "well-structured",
"vocabulary_level": "intermediate",
"paragraph_organization": "logical flow",
"content_flow": "smooth transitions"
},
"target_audience": {
"demographics": ["professionals", "business users"],
"expertise_level": "intermediate",
"industry_focus": "technology",
"geographic_focus": "global"
},
"content_type": {
"primary_type": "blog",
"secondary_types": ["article", "guide"],
"purpose": "inform",
"call_to_action": "moderate"
},
"recommended_settings": {
"writing_tone": "professional",
"target_audience": "business professionals",
"content_type": "blog",
"creativity_level": "medium",
"geographic_location": "global"
}
})
# Handle pattern analysis requests
if "pattern" in prompt.lower() or "recurring" in prompt.lower():
return json.dumps({
"patterns": {
"sentence_length": "medium",
"vocabulary_patterns": ["technical terms", "professional language"],
"rhetorical_devices": ["examples", "analogies"],
"paragraph_structure": "topic sentence followed by supporting details",
"transition_phrases": ["furthermore", "additionally", "however"]
},
"style_consistency": "high",
"unique_elements": ["clear structure", "professional tone", "evidence-based content"]
})
# Handle guidelines generation requests
if "guidelines" in prompt.lower() or "recommendations" in prompt.lower():
return json.dumps({
"guidelines": {
"tone_recommendations": ["maintain professional tone", "use clear language"],
"structure_guidelines": ["start with introduction", "use headings", "conclude with summary"],
"vocabulary_suggestions": ["avoid jargon", "use industry-specific terms appropriately"],
"engagement_tips": ["include examples", "use active voice", "ask questions"],
"audience_considerations": ["consider technical level", "provide context"]
},
"best_practices": ["research thoroughly", "cite sources", "update regularly"],
"avoid_elements": ["overly technical language", "long paragraphs", "passive voice"],
"content_strategy": "focus on providing value while maintaining professional credibility"
})
# Generic mock response for other content generation
return "This is a mock response. Please configure API keys for real content generation. To get started, visit the onboarding process and configure your AI provider API keys."
raise
def check_gpt_provider(gpt_provider: str) -> bool:
"""Check if the specified GPT provider is supported."""