feat: image generation overhaul (model-aware text, dim clamping, \.30 pricing), event-driven dashboard cache invalidation, SEO insights (AI visibility, GSC, keyword gap), YouTube OAuth/publish, blog writer & content planning improvements, scheduler monitoring updates
This commit is contained in:
@@ -220,12 +220,15 @@ class TxtaiIntelligenceService:
|
||||
return 0.0
|
||||
return dot_product / (norm_v1 * norm_v2)
|
||||
|
||||
async def index_content(self, items: List[Tuple[str, str, Dict[str, Any]]]):
|
||||
async def index_content(self, items: List[Tuple[str, str, Dict[str, Any]]]) -> int:
|
||||
"""
|
||||
Index content for semantic search and clustering.
|
||||
Index content using incremental upsert — only processes new/changed documents.
|
||||
|
||||
Args:
|
||||
items: List of (id, text, metadata) tuples.
|
||||
|
||||
Returns:
|
||||
Number of items actually upserted.
|
||||
"""
|
||||
self._ensure_initialized()
|
||||
if not self._initialized:
|
||||
@@ -235,38 +238,28 @@ class TxtaiIntelligenceService:
|
||||
logger.warning(message)
|
||||
if self.fail_fast:
|
||||
raise RuntimeError(message)
|
||||
return
|
||||
return 0
|
||||
|
||||
try:
|
||||
logger.info(f"Starting content indexing for user {self.user_id}")
|
||||
logger.debug(f"Indexing {len(items)} items")
|
||||
|
||||
# Validate input items
|
||||
if not items:
|
||||
logger.warning("No items provided for indexing")
|
||||
return
|
||||
return 0
|
||||
|
||||
# Index items: [(id, text, metadata)] - metadata needs to be JSON string for txtai
|
||||
import json
|
||||
processed_items = []
|
||||
for item in items:
|
||||
id_val, text, metadata = item
|
||||
# Convert metadata dict to JSON string
|
||||
metadata_json = json.dumps(metadata) if metadata else "{}"
|
||||
processed_items.append((id_val, text, metadata_json))
|
||||
|
||||
self.embeddings.index(processed_items)
|
||||
|
||||
# Save the index
|
||||
self.embeddings.upsert(processed_items)
|
||||
self.embeddings.save(self.index_path)
|
||||
logger.info(f"Successfully indexed {len(items)} items for user {self.user_id}")
|
||||
logger.debug(f"Index saved to: {self.index_path}")
|
||||
count = len(processed_items)
|
||||
logger.info(f"Upserted {count} items for user {self.user_id}")
|
||||
return count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error indexing content for user {self.user_id}: {e}")
|
||||
logger.error(f"Full traceback: {traceback.format_exc()}")
|
||||
logger.error(f"Items count: {len(items) if items else 0}")
|
||||
|
||||
message = str(e)
|
||||
is_windows_lock_error = isinstance(e, PermissionError) or "WinError 32" in message
|
||||
if is_windows_lock_error:
|
||||
@@ -274,7 +267,62 @@ class TxtaiIntelligenceService:
|
||||
f"Txtai index save skipped for user {self.user_id} due to file lock. "
|
||||
f"The index will be retried on a future run."
|
||||
)
|
||||
return
|
||||
return 0
|
||||
raise
|
||||
|
||||
async def delete_content(self, doc_ids: List[str]) -> int:
|
||||
"""
|
||||
Delete specific documents from the index by ID.
|
||||
|
||||
Args:
|
||||
doc_ids: List of document IDs to remove.
|
||||
|
||||
Returns:
|
||||
Number of documents deleted.
|
||||
"""
|
||||
await self._ensure_initialized_async()
|
||||
if not self._initialized or not self.embeddings:
|
||||
return 0
|
||||
|
||||
try:
|
||||
self.embeddings.delete(doc_ids)
|
||||
self.embeddings.save(self.index_path)
|
||||
logger.info(f"Deleted {len(doc_ids)} documents for user {self.user_id}")
|
||||
return len(doc_ids)
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting documents for user {self.user_id}: {e}")
|
||||
return 0
|
||||
|
||||
async def reindex_all(self, items: List[Tuple[str, str, Dict[str, Any]]]) -> int:
|
||||
"""
|
||||
Full reindex — replaces all content. Use sparingly (e.g. schema migration).
|
||||
|
||||
Args:
|
||||
items: List of (id, text, metadata) tuples.
|
||||
|
||||
Returns:
|
||||
Number of items indexed.
|
||||
"""
|
||||
await self._ensure_initialized_async()
|
||||
if not self._initialized or not self.embeddings:
|
||||
return 0
|
||||
|
||||
try:
|
||||
import json
|
||||
processed_items = []
|
||||
for item in items:
|
||||
id_val, text, metadata = item
|
||||
metadata_json = json.dumps(metadata) if metadata else "{}"
|
||||
processed_items.append((id_val, text, metadata_json))
|
||||
|
||||
self.embeddings.index(processed_items, reindex=True)
|
||||
self.embeddings.save(self.index_path)
|
||||
count = len(processed_items)
|
||||
logger.info(f"Reindexed all {count} items for user {self.user_id}")
|
||||
return count
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reindexing all for user {self.user_id}: {e}")
|
||||
raise
|
||||
|
||||
async def search(self, query: str, limit: int = 5) -> List[Dict[str, Any]]:
|
||||
@@ -292,7 +340,8 @@ class TxtaiIntelligenceService:
|
||||
if self.enable_caching and self.cache_manager:
|
||||
cached_results = self.cache_manager.get_cached_query_results(
|
||||
query=query,
|
||||
relevance_threshold=0.5 # Lower threshold for search results
|
||||
relevance_threshold=0.5, # Lower threshold for search results
|
||||
user_id=self.user_id
|
||||
)
|
||||
if cached_results:
|
||||
logger.info(f"Cache hit for search query: '{query}'")
|
||||
@@ -309,7 +358,8 @@ class TxtaiIntelligenceService:
|
||||
self.cache_manager.cache_query_results(
|
||||
query=query,
|
||||
results=results,
|
||||
relevance_threshold=0.5
|
||||
relevance_threshold=0.5,
|
||||
user_id=self.user_id
|
||||
)
|
||||
logger.debug(f"Cached search results for query: '{query}'")
|
||||
|
||||
@@ -462,8 +512,7 @@ class TxtaiIntelligenceService:
|
||||
"""Fallback clustering method when graph clustering is not available."""
|
||||
logger.info(f"Using fallback clustering for user {self.user_id}")
|
||||
|
||||
# Simple clustering based on semantic similarity
|
||||
# This is a placeholder - in production, you'd implement a proper clustering algorithm
|
||||
# Simple clustering based on semantic similarity against sample queries
|
||||
try:
|
||||
# Get a sample of indexed items to analyze
|
||||
sample_queries = ["marketing", "SEO", "content", "social media", "email marketing"]
|
||||
|
||||
Reference in New Issue
Block a user