Compare commits

..

1 Commits

Author SHA1 Message Date
ي
23489fdc12 Add flat-context synthesis and mnemonic prompt injection 2026-05-18 16:00:56 +05:30
7 changed files with 122 additions and 159 deletions

View File

@@ -5,7 +5,7 @@ API endpoints for managing unified content assets across all modules.
from fastapi import APIRouter, Depends, HTTPException, Query, Body
from sqlalchemy.orm import Session
from typing import List, Optional, Dict, Any, Set
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, Field
from datetime import datetime
@@ -47,33 +47,6 @@ class AssetResponse(BaseModel):
from_attributes = True
def _parse_source_modules(source_module: Optional[List[str]]) -> Optional[List[AssetSource]]:
"""Parse source_module query values from repeated params and/or comma-separated values."""
if not source_module:
return None
parsed_values: List[AssetSource] = []
seen: Set[AssetSource] = set()
for raw_value in source_module:
for value in raw_value.split(","):
normalized = value.strip().lower()
if not normalized:
continue
try:
module = AssetSource(normalized)
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid source module: {value.strip()}")
if module not in seen:
seen.add(module)
parsed_values.append(module)
return parsed_values or None
class AssetListResponse(BaseModel):
"""Response model for asset list."""
assets: List[AssetResponse]
@@ -85,7 +58,7 @@ class AssetListResponse(BaseModel):
@router.get("/", response_model=AssetListResponse)
async def get_assets(
asset_type: Optional[str] = Query(None, description="Filter by asset type"),
source_module: Optional[List[str]] = Query(None, description="Filter by source module(s); supports repeated params and comma-separated values"),
source_module: Optional[str] = Query(None, description="Filter by source module"),
search: Optional[str] = Query(None, description="Search query"),
tags: Optional[str] = Query(None, description="Comma-separated tags"),
favorites_only: bool = Query(False, description="Only favorites"),
@@ -116,7 +89,12 @@ async def get_assets(
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid asset type: {asset_type}")
source_modules_enum = _parse_source_modules(source_module)
source_module_enum = None
if source_module:
try:
source_module_enum = AssetSource(source_module.lower())
except ValueError:
raise HTTPException(status_code=400, detail=f"Invalid source module: {source_module}")
tags_list = None
if tags:
@@ -148,7 +126,7 @@ async def get_assets(
assets, total = service.get_user_assets(
user_id=user_id,
asset_type=asset_type_enum,
source_modules=source_modules_enum,
source_module=source_module_enum,
search_query=search,
tags=tags_list,
favorites_only=favorites_only,
@@ -222,7 +200,7 @@ async def create_asset(
asset = service.create_asset(
user_id=user_id,
asset_type=asset_type_enum,
source_modules=source_modules_enum,
source_module=source_module_enum,
filename=asset_data.filename,
file_url=asset_data.file_url,
file_path=asset_data.file_path,

View File

@@ -107,7 +107,6 @@ class ContentAssetService:
user_id: str,
asset_type: Optional[AssetType] = None,
source_module: Optional[AssetSource] = None,
source_modules: Optional[List[AssetSource]] = None,
search_query: Optional[str] = None,
tags: Optional[List[str]] = None,
favorites_only: bool = False,
@@ -126,7 +125,6 @@ class ContentAssetService:
user_id: Clerk user ID
asset_type: Filter by asset type (optional)
source_module: Filter by source module (optional)
source_modules: Filter by multiple source modules (optional)
search_query: Search in title, description, prompt (optional)
tags: Filter by tags (optional)
favorites_only: Only return favorites (optional)
@@ -144,9 +142,7 @@ class ContentAssetService:
if asset_type:
query = query.filter(ContentAsset.asset_type == asset_type)
if source_modules:
query = query.filter(ContentAsset.source_module.in_(source_modules))
elif source_module:
if source_module:
query = query.filter(ContentAsset.source_module == source_module)
if favorites_only:

View File

@@ -101,6 +101,7 @@ class AgentContextVFS:
"/steps/integrations": AgentFlatContextStore.STEP5_FILENAME,
}
HIGH_SIGNAL_MARKERS = ("agent_summary", "high_signal_terms", "quick_facts", "context_type")
LOW_CONFIDENCE_MARKER = "low_confidence"
def __init__(self, user_id: str, project_id: Optional[str] = None):
self.user_id = user_id
@@ -294,6 +295,101 @@ class AgentContextVFS:
)
return ranked[: max(1, top_k)]
@staticmethod
def _mnemonic_token(result: Dict[str, Any], rank: int) -> str:
"""Create compressed mnemonic token with source reference."""
path = str(result.get("path") or "unknown")
reason = str(result.get("reason") or "match")
confidence = float(result.get("confidence") or 0.0)
low_flag = "!" if result.get(AgentContextVFS.LOW_CONFIDENCE_MARKER) else ""
src = path.replace(".json", "").replace("_", "-")[:28]
hint = reason.replace(" ", "-")[:20]
return f"M{rank}:{src}|{hint}|c{confidence:.2f}{low_flag}"
@staticmethod
def _detect_contradictions(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Detect contradictory learnings by path with conflicting reasons/relevance classes."""
by_path: Dict[str, List[Dict[str, Any]]] = {}
for item in results:
p = str(item.get("path") or "")
by_path.setdefault(p, []).append(item)
contradictions: List[Dict[str, Any]] = []
for path, rows in by_path.items():
reasons = {str(r.get("reason") or "").strip().lower() for r in rows}
relevance = {str(r.get("relevance") or "").strip().lower() for r in rows}
# contradictory if both high/supported or mixed summary/body signals in same source cluster
if len(reasons) > 1 and len(relevance) > 1:
contradictions.append(
{
"path": path,
"reason_variants": sorted([r for r in reasons if r]),
"relevance_variants": sorted([r for r in relevance if r]),
"count": len(rows),
}
)
return contradictions
def _run_synthesis_pipeline(
self, ranked_results: List[Dict[str, Any]], *, char_budget: int = 1200, top_k: int = 5
) -> Dict[str, Any]:
"""
Flat-context synthesis pipeline:
1) Compress telemetry into mnemonic tokens with source references
2) Detect contradictions and mark low-confidence heuristics
3) Select top-ranked, budget-fitting tokens for prompt injection
4) Persist synthesis + source lineage for explainability
"""
contradictions = self._detect_contradictions(ranked_results)
contradiction_paths = {c["path"] for c in contradictions}
normalized: List[Dict[str, Any]] = []
for idx, item in enumerate(ranked_results, start=1):
row = dict(item)
low_conf = bool(row.get("low_probability")) or (str(row.get("path") or "") in contradiction_paths)
row[self.LOW_CONFIDENCE_MARKER] = low_conf
if low_conf:
row["confidence"] = round(max(0.05, float(row.get("confidence", 0.0)) * 0.7), 3)
row["mnemonic_token"] = self._mnemonic_token(row, idx)
normalized.append(row)
chosen: List[Dict[str, Any]] = []
used = 0
for row in normalized[: max(1, top_k * 3)]:
token = str(row.get("mnemonic_token") or "")
cost = len(token) + 8
if chosen and used + cost > char_budget:
continue
chosen.append(row)
used += cost
if len(chosen) >= top_k:
break
synthesis = {
"created_at": datetime.now(timezone.utc).isoformat(),
"top_k": top_k,
"char_budget": char_budget,
"char_budget_used": used,
"selected_mnemonics": [c.get("mnemonic_token") for c in chosen],
"source_lineage": [
{
"mnemonic_token": c.get("mnemonic_token"),
"path": c.get("path"),
"reason": c.get("reason"),
"confidence": c.get("confidence"),
"low_confidence": c.get(self.LOW_CONFIDENCE_MARKER, False),
}
for c in chosen
],
"contradictions": contradictions,
}
self.append_activity_log(
event_type="flat_context_synthesis",
actor="agent_context_vfs",
details=synthesis,
)
return {"ranked_results": normalized, "synthesis": synthesis}
@staticmethod
def _resolve_json_path(data: Any, path_query: str) -> Any:
"""Resolve dot/bracket JSON path such as 'data.seo_audit.recommendations[0]'."""
@@ -518,15 +614,26 @@ class AgentContextVFS:
bounded_results.append(r)
used += cost
synthesis_bundle = self._run_synthesis_pipeline(
self._static_triage(bounded_results, normalized),
char_budget=1200,
top_k=5,
)
triaged_results = synthesis_bundle["ranked_results"]
synthesis = synthesis_bundle["synthesis"]
result = {
"query": normalized,
"attempted_queries": attempted_queries,
"matched_files_count": len(matched_files),
"results": self._static_triage(bounded_results, normalized),
"results": triaged_results,
"notice": notice,
"char_budget_used": used,
"can_answer": bool(bounded_results),
"synthesis": synthesis,
"prompt_context_mnemonics": synthesis.get("selected_mnemonics", []),
}
# Top-ranked, budget-fitting mnemonic tokens are the only ones intended for prompt context injection.
result["triage_top5"] = self._llm_router_stub(result["results"], top_k=5)
logger.info(
f"[vfs_audit] user={self.store.safe_user_id} action=search_context query={normalized!r} results={len(result['results'])}"

View File

@@ -1,31 +0,0 @@
import importlib.util
from pathlib import Path
from fastapi import HTTPException
ROOT = Path(__file__).resolve().parents[3]
ROUTER_PATH = ROOT / 'backend' / 'api' / 'content_assets' / 'router.py'
MODELS_PATH = ROOT / 'backend' / 'models' / 'content_asset_models.py'
models_spec = importlib.util.spec_from_file_location('content_asset_models', MODELS_PATH)
models = importlib.util.module_from_spec(models_spec)
models_spec.loader.exec_module(models)
AssetSource = models.AssetSource
router_spec = importlib.util.spec_from_file_location('content_assets_router', ROUTER_PATH)
router = importlib.util.module_from_spec(router_spec)
router_spec.loader.exec_module(router)
def test_parse_source_modules_supports_repeated_and_csv_values():
parsed = router._parse_source_modules(["blog_writer", "youtube,podcast"])
assert parsed == [AssetSource.BLOG_WRITER, AssetSource.YOUTUBE, AssetSource.PODCAST]
def test_parse_source_modules_raises_for_invalid_values():
try:
router._parse_source_modules(["blog_writer,unknown"])
except HTTPException as exc:
assert exc.status_code == 400
assert "Invalid source module" in exc.detail
else:
raise AssertionError("Expected HTTPException for invalid source module")

View File

@@ -1,50 +0,0 @@
import importlib.util
from pathlib import Path
ROOT = Path(__file__).resolve().parents[3]
SERVICE_PATH = ROOT / 'backend' / 'services' / 'content_asset_service.py'
MODELS_PATH = ROOT / 'backend' / 'models' / 'content_asset_models.py'
models_spec = importlib.util.spec_from_file_location('content_asset_models', MODELS_PATH)
models = importlib.util.module_from_spec(models_spec)
models_spec.loader.exec_module(models)
AssetSource = models.AssetSource
service_spec = importlib.util.spec_from_file_location('content_asset_service', SERVICE_PATH)
service_module = importlib.util.module_from_spec(service_spec)
service_spec.loader.exec_module(service_module)
ContentAssetService = service_module.ContentAssetService
class DummyQuery:
def __init__(self):
self.filters = []
def filter(self, expr):
self.filters.append(expr)
return self
def count(self): return 0
def order_by(self, *_args, **_kwargs): return self
def limit(self, *_args, **_kwargs): return self
def offset(self, *_args, **_kwargs): return self
def all(self): return []
class DummyDB:
def __init__(self): self.query_obj = DummyQuery()
def query(self, *_args, **_kwargs): return self.query_obj
def test_get_user_assets_accepts_multiple_source_modules_filter():
db = DummyDB()
service = ContentAssetService(db)
assets, total = service.get_user_assets(
user_id="user-1",
source_modules=[AssetSource.BLOG_WRITER, AssetSource.YOUTUBE],
)
assert assets == []
assert total == 0
assert len(db.query_obj.filters) >= 2

View File

@@ -1,35 +0,0 @@
import { renderHook, waitFor } from '@testing-library/react';
import { useContentAssets } from '../useContentAssets';
const getTokenMock = jest.fn();
jest.mock('@clerk/clerk-react', () => ({
useAuth: () => ({ getToken: getTokenMock }),
}));
describe('useContentAssets', () => {
beforeEach(() => {
getTokenMock.mockResolvedValue('test-token');
global.fetch = jest.fn().mockResolvedValue({
ok: true,
json: async () => ({ assets: [], total: 0, limit: 100, offset: 0 }),
} as Response);
});
afterEach(() => {
jest.clearAllMocks();
});
it('sends all source_module values as repeated query params', async () => {
renderHook(() =>
useContentAssets({ source_module: ['blog_writer', 'youtube'], limit: 50, offset: 0 })
);
await waitFor(() => expect(global.fetch).toHaveBeenCalled());
const calledUrl = (global.fetch as jest.Mock).mock.calls[0][0] as string;
const params = new URL(calledUrl).searchParams;
expect(params.getAll('source_module')).toEqual(['blog_writer', 'youtube']);
});
});

View File

@@ -29,7 +29,7 @@ export interface ContentAsset {
export interface AssetFilters {
asset_type?: 'text' | 'image' | 'video' | 'audio';
source_module?: string | string[]; // Supports single or multiple source modules
source_module?: string | string[]; // Support single or multiple source modules
search?: string;
tags?: string[];
favorites_only?: boolean;
@@ -146,10 +146,8 @@ export const useContentAssets = (filters: AssetFilters = {}) => {
if (currentFilters.source_module) {
// Handle both string and array cases
if (Array.isArray(currentFilters.source_module)) {
// Send every selected source module as repeated query params
currentFilters.source_module.forEach((module) => {
params.append('source_module', module);
});
// For arrays, use the first value (backend doesn't support multiple yet)
params.append('source_module', currentFilters.source_module[0]);
} else {
params.append('source_module', currentFilters.source_module);
}