ALwrity AI Blog Writer - Added Google Grounding UI Implementation
This commit is contained in:
@@ -11,11 +11,16 @@ from models.blog_models import (
|
||||
BlogResearchRequest,
|
||||
BlogResearchResponse,
|
||||
ResearchSource,
|
||||
GroundingMetadata,
|
||||
GroundingChunk,
|
||||
GroundingSupport,
|
||||
Citation,
|
||||
)
|
||||
|
||||
from .keyword_analyzer import KeywordAnalyzer
|
||||
from .competitor_analyzer import CompetitorAnalyzer
|
||||
from .content_angle_generator import ContentAngleGenerator
|
||||
from .data_filter import ResearchDataFilter
|
||||
|
||||
|
||||
class ResearchService:
|
||||
@@ -25,6 +30,7 @@ class ResearchService:
|
||||
self.keyword_analyzer = KeywordAnalyzer()
|
||||
self.competitor_analyzer = CompetitorAnalyzer()
|
||||
self.content_angle_generator = ContentAngleGenerator()
|
||||
self.data_filter = ResearchDataFilter()
|
||||
|
||||
async def research(self, request: BlogResearchRequest) -> BlogResearchResponse:
|
||||
"""
|
||||
@@ -85,6 +91,9 @@ class ResearchService:
|
||||
# Extract sources from grounding metadata
|
||||
sources = self._extract_sources_from_grounding(gemini_result)
|
||||
|
||||
# Extract grounding metadata for detailed UI display
|
||||
grounding_metadata = self._extract_grounding_metadata(gemini_result)
|
||||
|
||||
# Extract search widget and queries for UI display
|
||||
search_widget = gemini_result.get("search_widget", "") or ""
|
||||
search_queries = gemini_result.get("search_queries", []) or []
|
||||
@@ -107,17 +116,31 @@ class ResearchService:
|
||||
# Add search widget and queries for UI display
|
||||
search_widget=search_widget if 'search_widget' in locals() else "",
|
||||
search_queries=search_queries if 'search_queries' in locals() else [],
|
||||
# Add grounding metadata for detailed UI display
|
||||
grounding_metadata=grounding_metadata,
|
||||
)
|
||||
|
||||
# Cache the successful result for future exact keyword matches
|
||||
# Filter and clean research data for optimal AI processing
|
||||
filtered_response = self.data_filter.filter_research_data(response)
|
||||
logger.info("Research data filtering completed successfully")
|
||||
|
||||
# Cache the successful result for future exact keyword matches (both caches)
|
||||
persistent_research_cache.cache_result(
|
||||
keywords=request.keywords,
|
||||
industry=industry,
|
||||
target_audience=target_audience,
|
||||
result=filtered_response.dict()
|
||||
)
|
||||
|
||||
# Also cache in memory for faster access
|
||||
research_cache.cache_result(
|
||||
keywords=request.keywords,
|
||||
industry=industry,
|
||||
target_audience=target_audience,
|
||||
result=response.dict()
|
||||
result=filtered_response.dict()
|
||||
)
|
||||
|
||||
return response
|
||||
return filtered_response
|
||||
|
||||
except Exception as e:
|
||||
error_message = str(e)
|
||||
@@ -142,27 +165,38 @@ class ResearchService:
|
||||
try:
|
||||
from services.llm_providers.gemini_grounded_provider import GeminiGroundedProvider
|
||||
from services.cache.research_cache import research_cache
|
||||
from api.blog_writer.router import _update_progress
|
||||
from services.cache.persistent_research_cache import persistent_research_cache
|
||||
from api.blog_writer.task_manager import task_manager
|
||||
|
||||
topic = request.topic or ", ".join(request.keywords)
|
||||
industry = request.industry or (request.persona.industry if request.persona and request.persona.industry else "General")
|
||||
target_audience = getattr(request.persona, 'target_audience', 'General') if request.persona else 'General'
|
||||
|
||||
# Check cache first for exact keyword match
|
||||
await _update_progress(task_id, "🔍 Checking cache for existing research...")
|
||||
cached_result = research_cache.get_cached_result(
|
||||
# Check cache first for exact keyword match (try both caches)
|
||||
await task_manager.update_progress(task_id, "🔍 Checking cache for existing research...")
|
||||
|
||||
# Try persistent cache first (survives restarts)
|
||||
cached_result = persistent_research_cache.get_cached_result(
|
||||
keywords=request.keywords,
|
||||
industry=industry,
|
||||
target_audience=target_audience
|
||||
)
|
||||
|
||||
# Fallback to in-memory cache
|
||||
if not cached_result:
|
||||
cached_result = research_cache.get_cached_result(
|
||||
keywords=request.keywords,
|
||||
industry=industry,
|
||||
target_audience=target_audience
|
||||
)
|
||||
|
||||
if cached_result:
|
||||
await _update_progress(task_id, "✅ Found cached research results! Returning instantly...")
|
||||
await task_manager.update_progress(task_id, "✅ Found cached research results! Returning instantly...")
|
||||
logger.info(f"Returning cached research result for keywords: {request.keywords}")
|
||||
return BlogResearchResponse(**cached_result)
|
||||
|
||||
# Cache miss - proceed with API call
|
||||
await _update_progress(task_id, "🌐 Cache miss - connecting to Google Search grounding...")
|
||||
await task_manager.update_progress(task_id, "🌐 Cache miss - connecting to Google Search grounding...")
|
||||
logger.info(f"Cache miss - making API call for keywords: {request.keywords}")
|
||||
gemini = GeminiGroundedProvider()
|
||||
|
||||
@@ -185,7 +219,7 @@ class ResearchService:
|
||||
Structure your response with clear sections for each analysis area.
|
||||
"""
|
||||
|
||||
await _update_progress(task_id, "🤖 Making AI request to Gemini with Google Search grounding...")
|
||||
await task_manager.update_progress(task_id, "🤖 Making AI request to Gemini with Google Search grounding...")
|
||||
# Single Gemini call with native Google Search grounding - no fallbacks
|
||||
gemini_result = await gemini.generate_grounded_content(
|
||||
prompt=research_prompt,
|
||||
@@ -193,22 +227,25 @@ class ResearchService:
|
||||
max_tokens=2000
|
||||
)
|
||||
|
||||
await _update_progress(task_id, "📊 Processing research results and extracting insights...")
|
||||
await task_manager.update_progress(task_id, "📊 Processing research results and extracting insights...")
|
||||
# Extract sources from grounding metadata
|
||||
sources = self._extract_sources_from_grounding(gemini_result)
|
||||
|
||||
# Extract grounding metadata for detailed UI display
|
||||
grounding_metadata = self._extract_grounding_metadata(gemini_result)
|
||||
|
||||
# Extract search widget and queries for UI display
|
||||
search_widget = gemini_result.get("search_widget", "") or ""
|
||||
search_queries = gemini_result.get("search_queries", []) or []
|
||||
|
||||
await _update_progress(task_id, "🔍 Analyzing keywords and content angles...")
|
||||
await task_manager.update_progress(task_id, "🔍 Analyzing keywords and content angles...")
|
||||
# Parse the comprehensive response for different analysis components
|
||||
content = gemini_result.get("content", "")
|
||||
keyword_analysis = self.keyword_analyzer.analyze(content, request.keywords)
|
||||
competitor_analysis = self.competitor_analyzer.analyze(content)
|
||||
suggested_angles = self.content_angle_generator.generate(content, topic, industry)
|
||||
|
||||
await _update_progress(task_id, "💾 Caching results for future use...")
|
||||
await task_manager.update_progress(task_id, "💾 Caching results for future use...")
|
||||
logger.info(f"Research completed successfully with {len(sources)} sources and {len(search_queries)} search queries")
|
||||
|
||||
# Create the response
|
||||
@@ -221,17 +258,34 @@ class ResearchService:
|
||||
# Add search widget and queries for UI display
|
||||
search_widget=search_widget if 'search_widget' in locals() else "",
|
||||
search_queries=search_queries if 'search_queries' in locals() else [],
|
||||
# Add grounding metadata for detailed UI display
|
||||
grounding_metadata=grounding_metadata,
|
||||
# Preserve original user keywords for caching
|
||||
original_keywords=request.keywords,
|
||||
)
|
||||
|
||||
# Cache the successful result for future exact keyword matches
|
||||
# Filter and clean research data for optimal AI processing
|
||||
await task_manager.update_progress(task_id, "🔍 Filtering and cleaning research data...")
|
||||
filtered_response = self.data_filter.filter_research_data(response)
|
||||
logger.info("Research data filtering completed successfully")
|
||||
|
||||
# Cache the successful result for future exact keyword matches (both caches)
|
||||
persistent_research_cache.cache_result(
|
||||
keywords=request.keywords,
|
||||
industry=industry,
|
||||
target_audience=target_audience,
|
||||
result=filtered_response.dict()
|
||||
)
|
||||
|
||||
# Also cache in memory for faster access
|
||||
research_cache.cache_result(
|
||||
keywords=request.keywords,
|
||||
industry=industry,
|
||||
target_audience=target_audience,
|
||||
result=response.dict()
|
||||
result=filtered_response.dict()
|
||||
)
|
||||
|
||||
return response
|
||||
return filtered_response
|
||||
|
||||
except Exception as e:
|
||||
error_message = str(e)
|
||||
@@ -261,8 +315,104 @@ class ResearchService:
|
||||
url=src.get("url", ""),
|
||||
excerpt=src.get("content", "")[:500] if src.get("content") else f"Source from {src.get('title', 'web')}",
|
||||
credibility_score=float(src.get("credibility_score", 0.8)),
|
||||
published_at=str(src.get("publication_date", "2024-01-01"))
|
||||
published_at=str(src.get("publication_date", "2024-01-01")),
|
||||
index=src.get("index"),
|
||||
source_type=src.get("type", "web")
|
||||
)
|
||||
sources.append(source)
|
||||
|
||||
return sources
|
||||
|
||||
def _extract_grounding_metadata(self, gemini_result: Dict[str, Any]) -> GroundingMetadata:
|
||||
"""Extract detailed grounding metadata from Gemini result."""
|
||||
grounding_chunks = []
|
||||
grounding_supports = []
|
||||
citations = []
|
||||
|
||||
# Extract grounding chunks from the raw grounding metadata
|
||||
raw_grounding = gemini_result.get("grounding_metadata", {})
|
||||
|
||||
# Handle case where grounding_metadata might be a GroundingMetadata object
|
||||
if hasattr(raw_grounding, 'grounding_chunks'):
|
||||
raw_chunks = raw_grounding.grounding_chunks
|
||||
else:
|
||||
raw_chunks = raw_grounding.get("grounding_chunks", [])
|
||||
|
||||
for chunk in raw_chunks:
|
||||
if "web" in chunk:
|
||||
web_data = chunk["web"]
|
||||
grounding_chunk = GroundingChunk(
|
||||
title=web_data.get("title", "Untitled"),
|
||||
url=web_data.get("uri", ""),
|
||||
confidence_score=None # Will be set from supports
|
||||
)
|
||||
grounding_chunks.append(grounding_chunk)
|
||||
|
||||
# Extract grounding supports with confidence scores
|
||||
if hasattr(raw_grounding, 'grounding_supports'):
|
||||
raw_supports = raw_grounding.grounding_supports
|
||||
else:
|
||||
raw_supports = raw_grounding.get("grounding_supports", [])
|
||||
for support in raw_supports:
|
||||
# Handle both dictionary and GroundingSupport object formats
|
||||
if hasattr(support, 'confidence_scores'):
|
||||
confidence_scores = support.confidence_scores
|
||||
chunk_indices = support.grounding_chunk_indices
|
||||
segment_text = getattr(support, 'segment_text', '')
|
||||
start_index = getattr(support, 'start_index', None)
|
||||
end_index = getattr(support, 'end_index', None)
|
||||
else:
|
||||
confidence_scores = support.get("confidence_scores", [])
|
||||
chunk_indices = support.get("grounding_chunk_indices", [])
|
||||
segment = support.get("segment", {})
|
||||
segment_text = segment.get("text", "")
|
||||
start_index = segment.get("start_index")
|
||||
end_index = segment.get("end_index")
|
||||
|
||||
grounding_support = GroundingSupport(
|
||||
confidence_scores=confidence_scores,
|
||||
grounding_chunk_indices=chunk_indices,
|
||||
segment_text=segment_text,
|
||||
start_index=start_index,
|
||||
end_index=end_index
|
||||
)
|
||||
grounding_supports.append(grounding_support)
|
||||
|
||||
# Update confidence scores for chunks
|
||||
if confidence_scores and chunk_indices:
|
||||
avg_confidence = sum(confidence_scores) / len(confidence_scores)
|
||||
for idx in chunk_indices:
|
||||
if idx < len(grounding_chunks):
|
||||
grounding_chunks[idx].confidence_score = avg_confidence
|
||||
|
||||
# Extract citations from the raw result
|
||||
raw_citations = gemini_result.get("citations", [])
|
||||
for citation in raw_citations:
|
||||
citation_obj = Citation(
|
||||
citation_type=citation.get("type", "inline"),
|
||||
start_index=citation.get("start_index", 0),
|
||||
end_index=citation.get("end_index", 0),
|
||||
text=citation.get("text", ""),
|
||||
source_indices=citation.get("source_indices", []),
|
||||
reference=citation.get("reference", "")
|
||||
)
|
||||
citations.append(citation_obj)
|
||||
|
||||
# Extract search entry point and web search queries
|
||||
if hasattr(raw_grounding, 'search_entry_point'):
|
||||
search_entry_point = getattr(raw_grounding.search_entry_point, 'rendered_content', '') if raw_grounding.search_entry_point else ''
|
||||
else:
|
||||
search_entry_point = raw_grounding.get("search_entry_point", {}).get("rendered_content", "")
|
||||
|
||||
if hasattr(raw_grounding, 'web_search_queries'):
|
||||
web_search_queries = raw_grounding.web_search_queries
|
||||
else:
|
||||
web_search_queries = raw_grounding.get("web_search_queries", [])
|
||||
|
||||
return GroundingMetadata(
|
||||
grounding_chunks=grounding_chunks,
|
||||
grounding_supports=grounding_supports,
|
||||
citations=citations,
|
||||
search_entry_point=search_entry_point,
|
||||
web_search_queries=web_search_queries
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user