first commit
This commit is contained in:
509
packages/plugins/ai-moderation/src/admin.tsx
Normal file
509
packages/plugins/ai-moderation/src/admin.tsx
Normal file
@@ -0,0 +1,509 @@
|
||||
/**
|
||||
* AI Moderation Plugin — Admin Components
|
||||
*
|
||||
* Exports widgets and pages for the admin UI.
|
||||
*/
|
||||
|
||||
import { Switch } from "@cloudflare/kumo";
|
||||
import {
|
||||
ShieldCheck,
|
||||
CheckCircle,
|
||||
WarningCircle,
|
||||
FloppyDisk,
|
||||
CircleNotch,
|
||||
Trash,
|
||||
PencilSimple,
|
||||
Plus,
|
||||
TestTube,
|
||||
X,
|
||||
} from "@phosphor-icons/react";
|
||||
import type { PluginAdminExports } from "emdash";
|
||||
import { apiFetch, isRecord, parseApiResponse } from "emdash/plugin-utils";
|
||||
import * as React from "react";
|
||||
|
||||
import type { Category } from "./categories.js";
|
||||
|
||||
const API_BASE = "/_emdash/api/plugins/ai-moderation";
|
||||
|
||||
// =============================================================================
|
||||
// Dashboard Widget
|
||||
// =============================================================================
|
||||
|
||||
interface PluginStatus {
|
||||
enabled: boolean;
|
||||
categoryCount: number;
|
||||
autoApproveClean: boolean;
|
||||
}
|
||||
|
||||
function StatusWidget() {
|
||||
const [status, setStatus] = React.useState<PluginStatus | null>(null);
|
||||
const [isLoading, setIsLoading] = React.useState(true);
|
||||
|
||||
React.useEffect(() => {
|
||||
async function fetchStatus() {
|
||||
try {
|
||||
const response = await apiFetch(`${API_BASE}/status`);
|
||||
if (!response.ok) return;
|
||||
const data = await parseApiResponse<PluginStatus>(response);
|
||||
setStatus(data);
|
||||
} catch {
|
||||
// Widget is non-critical
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
}
|
||||
void fetchStatus();
|
||||
}, []);
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className="flex items-center justify-center py-8">
|
||||
<CircleNotch className="h-5 w-5 animate-spin text-muted-foreground" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-4">
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="p-2 rounded-full bg-green-100 dark:bg-green-900/30">
|
||||
<ShieldCheck className="h-5 w-5 text-green-600 dark:text-green-400" />
|
||||
</div>
|
||||
<div>
|
||||
<div className="font-medium">AI Moderation Active</div>
|
||||
<div className="text-xs text-muted-foreground">
|
||||
{status?.categoryCount ?? 0} active categories
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="pt-2 border-t space-y-1">
|
||||
<div className="flex justify-between text-sm">
|
||||
<span className="text-muted-foreground">Auto-approve clean</span>
|
||||
<span>{status?.autoApproveClean ? "Yes" : "No"}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="pt-2">
|
||||
<a
|
||||
href="/_emdash/admin/plugins/ai-moderation/settings"
|
||||
className="text-xs text-primary hover:underline"
|
||||
>
|
||||
Configure moderation
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Category Edit Dialog
|
||||
// =============================================================================
|
||||
|
||||
interface CategoryDialogProps {
|
||||
category: Category | null;
|
||||
onSave: (category: Category) => void;
|
||||
onClose: () => void;
|
||||
}
|
||||
|
||||
function CategoryDialog({ category, onSave, onClose }: CategoryDialogProps) {
|
||||
const [form, setForm] = React.useState<Category>(
|
||||
category ?? {
|
||||
id: "",
|
||||
name: "",
|
||||
description: "",
|
||||
action: "hold",
|
||||
builtin: false,
|
||||
},
|
||||
);
|
||||
|
||||
const isEditing = !!category;
|
||||
|
||||
return (
|
||||
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/50">
|
||||
<div className="bg-background border rounded-lg p-6 w-full max-w-md space-y-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<h3 className="text-lg font-semibold">{isEditing ? "Edit Category" : "Add Category"}</h3>
|
||||
<button onClick={onClose} className="p-1 hover:bg-muted rounded">
|
||||
<X className="h-4 w-4" />
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div className="space-y-3">
|
||||
<div className="space-y-1">
|
||||
<label className="text-sm font-medium">ID</label>
|
||||
<input
|
||||
type="text"
|
||||
value={form.id}
|
||||
onChange={(e: React.ChangeEvent<HTMLInputElement>) =>
|
||||
setForm({ ...form, id: e.target.value })
|
||||
}
|
||||
disabled={isEditing}
|
||||
placeholder="e.g. S10"
|
||||
className="w-full px-3 py-2 border rounded-md bg-background text-sm disabled:opacity-50"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="space-y-1">
|
||||
<label className="text-sm font-medium">Name</label>
|
||||
<input
|
||||
type="text"
|
||||
value={form.name}
|
||||
onChange={(e: React.ChangeEvent<HTMLInputElement>) =>
|
||||
setForm({ ...form, name: e.target.value })
|
||||
}
|
||||
placeholder="e.g. Self-Promotion"
|
||||
className="w-full px-3 py-2 border rounded-md bg-background text-sm"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="space-y-1">
|
||||
<label className="text-sm font-medium">Description</label>
|
||||
<textarea
|
||||
value={form.description}
|
||||
onChange={(e: React.ChangeEvent<HTMLTextAreaElement>) =>
|
||||
setForm({ ...form, description: e.target.value })
|
||||
}
|
||||
rows={3}
|
||||
placeholder="Description for AI classification..."
|
||||
className="w-full px-3 py-2 border rounded-md bg-background text-sm resize-none"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="space-y-1">
|
||||
<label className="text-sm font-medium">Action</label>
|
||||
<select
|
||||
value={form.action}
|
||||
onChange={(e: React.ChangeEvent<HTMLSelectElement>) => {
|
||||
const val = e.target.value;
|
||||
if (val === "block" || val === "hold" || val === "ignore") {
|
||||
setForm({ ...form, action: val });
|
||||
}
|
||||
}}
|
||||
className="w-full px-3 py-2 border rounded-md bg-background text-sm"
|
||||
>
|
||||
<option value="block">Block (mark as spam)</option>
|
||||
<option value="hold">Hold (pending review)</option>
|
||||
<option value="ignore">Ignore (no action)</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex justify-end gap-2 pt-2">
|
||||
<button onClick={onClose} className="px-4 py-2 border rounded-md hover:bg-muted text-sm">
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
if (form.id && form.name && form.description) {
|
||||
onSave(form);
|
||||
}
|
||||
}}
|
||||
disabled={!form.id || !form.name || !form.description}
|
||||
className="px-4 py-2 bg-primary text-primary-foreground rounded-md hover:bg-primary/90 disabled:opacity-50 text-sm"
|
||||
>
|
||||
{isEditing ? "Save" : "Add"}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Settings Page
|
||||
// =============================================================================
|
||||
|
||||
function SettingsPage() {
|
||||
const [categories, setCategories] = React.useState<Category[]>([]);
|
||||
const [autoApproveClean, setAutoApproveClean] = React.useState(true);
|
||||
const [isLoading, setIsLoading] = React.useState(true);
|
||||
const [isSaving, setIsSaving] = React.useState(false);
|
||||
const [saveMessage, setSaveMessage] = React.useState<string | null>(null);
|
||||
const [editingCategory, setEditingCategory] = React.useState<Category | null | "new">(null);
|
||||
|
||||
// Test panel state
|
||||
const [testText, setTestText] = React.useState("");
|
||||
const [testResult, setTestResult] = React.useState<Record<string, unknown> | null>(null);
|
||||
const [isTesting, setIsTesting] = React.useState(false);
|
||||
|
||||
// Load settings on mount
|
||||
React.useEffect(() => {
|
||||
async function loadSettings() {
|
||||
try {
|
||||
const response = await apiFetch(`${API_BASE}/settings`);
|
||||
if (response.ok) {
|
||||
const data = await parseApiResponse<{
|
||||
categories?: Category[];
|
||||
behavior?: { autoApproveClean?: boolean };
|
||||
}>(response);
|
||||
if (data.categories) setCategories(data.categories);
|
||||
if (data.behavior?.autoApproveClean !== undefined) {
|
||||
setAutoApproveClean(data.behavior.autoApproveClean);
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Use defaults
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
}
|
||||
void loadSettings();
|
||||
}, []);
|
||||
|
||||
const handleSave = async () => {
|
||||
setIsSaving(true);
|
||||
setSaveMessage(null);
|
||||
try {
|
||||
const response = await apiFetch(`${API_BASE}/settings/save`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
categories,
|
||||
behavior: { autoApproveClean },
|
||||
}),
|
||||
});
|
||||
if (response.ok) {
|
||||
setSaveMessage("Settings saved");
|
||||
} else {
|
||||
setSaveMessage("Failed to save settings");
|
||||
}
|
||||
} catch {
|
||||
setSaveMessage("Failed to save settings");
|
||||
} finally {
|
||||
setIsSaving(false);
|
||||
// eslint-disable-next-line e18e/prefer-timer-args -- conflicts with no-implied-eval
|
||||
setTimeout(() => setSaveMessage(null), 3000);
|
||||
}
|
||||
};
|
||||
|
||||
const handleTest = async () => {
|
||||
if (!testText.trim()) return;
|
||||
setIsTesting(true);
|
||||
setTestResult(null);
|
||||
try {
|
||||
const response = await apiFetch(`${API_BASE}/settings/test`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ text: testText }),
|
||||
});
|
||||
const data = await parseApiResponse<Record<string, unknown>>(response);
|
||||
setTestResult(data);
|
||||
} catch {
|
||||
setTestResult({ success: false, error: "Failed to run test" });
|
||||
} finally {
|
||||
setIsTesting(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleCategorySave = (cat: Category) => {
|
||||
setCategories((prev) => {
|
||||
const idx = prev.findIndex((c) => c.id === cat.id);
|
||||
if (idx >= 0) {
|
||||
const updated = [...prev];
|
||||
updated[idx] = cat;
|
||||
return updated;
|
||||
}
|
||||
return [...prev, cat];
|
||||
});
|
||||
setEditingCategory(null);
|
||||
};
|
||||
|
||||
const handleCategoryDelete = (id: string) => {
|
||||
setCategories((prev) => prev.filter((c) => c.id !== id));
|
||||
};
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className="flex items-center justify-center py-16">
|
||||
<CircleNotch className="h-6 w-6 animate-spin text-muted-foreground" />
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/* Header */}
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<h1 className="text-3xl font-bold">AI Moderation</h1>
|
||||
<p className="text-muted-foreground mt-1">Configure AI-powered comment moderation</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-3">
|
||||
{saveMessage && <span className="text-sm text-muted-foreground">{saveMessage}</span>}
|
||||
<button
|
||||
onClick={handleSave}
|
||||
disabled={isSaving}
|
||||
className="inline-flex items-center gap-2 px-4 py-2 bg-primary text-primary-foreground rounded-md hover:bg-primary/90 disabled:opacity-50"
|
||||
>
|
||||
{isSaving ? (
|
||||
<CircleNotch className="h-4 w-4 animate-spin" />
|
||||
) : (
|
||||
<FloppyDisk className="h-4 w-4" />
|
||||
)}
|
||||
{isSaving ? "Saving..." : "Save Settings"}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="grid gap-6 lg:grid-cols-2">
|
||||
{/* Categories */}
|
||||
<div className="border rounded-lg p-6 space-y-4 lg:col-span-2">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-2">
|
||||
<ShieldCheck className="h-5 w-5 text-muted-foreground" />
|
||||
<h2 className="text-lg font-semibold">Safety Categories</h2>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => setEditingCategory("new")}
|
||||
className="inline-flex items-center gap-1 px-3 py-1.5 border rounded-md hover:bg-muted text-sm"
|
||||
>
|
||||
<Plus className="h-3.5 w-3.5" />
|
||||
Add Category
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div className="divide-y">
|
||||
{categories.map((cat) => (
|
||||
<div key={cat.id} className="flex items-center justify-between py-3">
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className="text-xs font-mono bg-muted px-1.5 py-0.5 rounded">
|
||||
{cat.id}
|
||||
</span>
|
||||
<span className="font-medium">{cat.name}</span>
|
||||
<span
|
||||
className={`text-xs px-2 py-0.5 rounded-full ${
|
||||
cat.action === "block"
|
||||
? "bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400"
|
||||
: cat.action === "hold"
|
||||
? "bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400"
|
||||
: "bg-gray-100 text-gray-600 dark:bg-gray-800 dark:text-gray-400"
|
||||
}`}
|
||||
>
|
||||
{cat.action}
|
||||
</span>
|
||||
</div>
|
||||
<p className="text-sm text-muted-foreground mt-0.5 truncate">{cat.description}</p>
|
||||
</div>
|
||||
<div className="flex items-center gap-1 ml-4">
|
||||
<button
|
||||
onClick={() => setEditingCategory(cat)}
|
||||
className="p-1.5 hover:bg-muted rounded"
|
||||
title="Edit"
|
||||
>
|
||||
<PencilSimple className="h-4 w-4" />
|
||||
</button>
|
||||
{!cat.builtin && (
|
||||
<button
|
||||
onClick={() => handleCategoryDelete(cat.id)}
|
||||
className="p-1.5 hover:bg-muted rounded text-red-600"
|
||||
title="Delete"
|
||||
>
|
||||
<Trash className="h-4 w-4" />
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Behavior */}
|
||||
<div className="border rounded-lg p-6 space-y-4">
|
||||
<h2 className="text-lg font-semibold">Behavior</h2>
|
||||
|
||||
<Switch
|
||||
checked={autoApproveClean}
|
||||
onCheckedChange={setAutoApproveClean}
|
||||
label="Auto-approve clean comments"
|
||||
labelTooltip="Automatically approve comments that pass AI checks. When off, falls back to collection moderation settings."
|
||||
controlFirst={false}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Test Panel */}
|
||||
<div className="border rounded-lg p-6 space-y-4 lg:col-span-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<TestTube className="h-5 w-5 text-muted-foreground" />
|
||||
<h2 className="text-lg font-semibold">Test Panel</h2>
|
||||
</div>
|
||||
|
||||
<div className="space-y-3">
|
||||
<textarea
|
||||
value={testText}
|
||||
onChange={(e: React.ChangeEvent<HTMLTextAreaElement>) => setTestText(e.target.value)}
|
||||
rows={3}
|
||||
placeholder="Paste a comment to test AI analysis..."
|
||||
className="w-full px-3 py-2 border rounded-md bg-background text-sm resize-none"
|
||||
/>
|
||||
<button
|
||||
onClick={handleTest}
|
||||
disabled={isTesting || !testText.trim()}
|
||||
className="inline-flex items-center gap-2 px-4 py-2 border rounded-md hover:bg-muted disabled:opacity-50 text-sm"
|
||||
>
|
||||
{isTesting ? (
|
||||
<CircleNotch className="h-4 w-4 animate-spin" />
|
||||
) : (
|
||||
<TestTube className="h-4 w-4" />
|
||||
)}
|
||||
{isTesting ? "Analyzing..." : "Analyze"}
|
||||
</button>
|
||||
|
||||
{testResult && (
|
||||
<div className="p-4 bg-muted/50 rounded-md space-y-2">
|
||||
{testResult.guard && isRecord(testResult.guard) ? (
|
||||
<div className="flex items-center gap-2">
|
||||
{testResult.guard.safe ? (
|
||||
<CheckCircle className="h-5 w-5 text-green-600" />
|
||||
) : (
|
||||
<WarningCircle className="h-5 w-5 text-red-600" />
|
||||
)}
|
||||
<span className="font-medium">{testResult.guard.safe ? "Safe" : "Unsafe"}</span>
|
||||
{!testResult.guard.safe && Array.isArray(testResult.guard.categories) && (
|
||||
<span className="text-sm text-muted-foreground">
|
||||
— Categories: {(testResult.guard.categories as string[]).join(", ")}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
) : testResult.guardError ? (
|
||||
<div className="text-sm text-red-600">
|
||||
AI Error:{" "}
|
||||
{typeof testResult.guardError === "string"
|
||||
? testResult.guardError
|
||||
: "Unknown error"}
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-sm text-muted-foreground">
|
||||
AI analysis not available (no active categories)
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Category Dialog */}
|
||||
{editingCategory !== null && (
|
||||
<CategoryDialog
|
||||
category={editingCategory === "new" ? null : editingCategory}
|
||||
onSave={handleCategorySave}
|
||||
onClose={() => setEditingCategory(null)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Exports
|
||||
// =============================================================================
|
||||
|
||||
export const widgets: PluginAdminExports["widgets"] = {
|
||||
status: StatusWidget,
|
||||
};
|
||||
|
||||
export const pages: PluginAdminExports["pages"] = {
|
||||
"/settings": SettingsPage,
|
||||
};
|
||||
95
packages/plugins/ai-moderation/src/categories.ts
Normal file
95
packages/plugins/ai-moderation/src/categories.ts
Normal file
@@ -0,0 +1,95 @@
|
||||
/**
|
||||
* AI Moderation Categories
|
||||
*
|
||||
* Defines the content taxonomy used by Llama Guard for comment classification.
|
||||
* Categories map to actions (block, hold, ignore) that feed into the moderation decision.
|
||||
*/
|
||||
|
||||
export interface Category {
|
||||
/** Short identifier (e.g. "C1") */
|
||||
id: string;
|
||||
/** Human-readable name */
|
||||
name: string;
|
||||
/** Description of what this category covers */
|
||||
description: string;
|
||||
/** Action to take when this category is triggered */
|
||||
action: "block" | "hold" | "ignore";
|
||||
/** Whether this is a built-in category (cannot be deleted) */
|
||||
builtin: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Default categories tuned for comment moderation.
|
||||
*
|
||||
* Covers the most common problems a comment moderator faces: spam, toxicity,
|
||||
* trolling, harassment, and hate speech. Child safety is retained as a
|
||||
* hard block since it applies universally.
|
||||
*/
|
||||
export const DEFAULT_CATEGORIES: Category[] = [
|
||||
{
|
||||
id: "C1",
|
||||
name: "Spam",
|
||||
description:
|
||||
"Unsolicited commercial messages, repetitive posts, or comments that exist solely to promote a product, service, or link with no genuine contribution to the discussion",
|
||||
action: "block",
|
||||
builtin: true,
|
||||
},
|
||||
{
|
||||
id: "C2",
|
||||
name: "Toxic Comment",
|
||||
description:
|
||||
"Rude, disrespectful, or hostile language intended to upset or demean others, including insults, profanity directed at people, and gratuitously offensive content",
|
||||
action: "hold",
|
||||
builtin: true,
|
||||
},
|
||||
{
|
||||
id: "C3",
|
||||
name: "Trolling",
|
||||
description:
|
||||
"Comments designed to provoke arguments or emotional reactions rather than contribute to discussion — including deliberate bad-faith arguments, inflammatory bait, and intentional disruption",
|
||||
action: "hold",
|
||||
builtin: true,
|
||||
},
|
||||
{
|
||||
id: "C4",
|
||||
name: "Harassment",
|
||||
description:
|
||||
"Targeted abuse, threats, or intimidation directed at a specific person or group, including doxxing, personal attacks, and coordinated pile-ons",
|
||||
action: "block",
|
||||
builtin: true,
|
||||
},
|
||||
{
|
||||
id: "C5",
|
||||
name: "Hate Speech",
|
||||
description:
|
||||
"Content attacking individuals or groups based on protected characteristics such as race, religion, gender, sexual orientation, or disability",
|
||||
action: "block",
|
||||
builtin: true,
|
||||
},
|
||||
{
|
||||
id: "C6",
|
||||
name: "Misinformation",
|
||||
description:
|
||||
"Demonstrably false claims, conspiracy theories, or fabricated facts presented as true — especially on health, safety, or electoral topics",
|
||||
action: "hold",
|
||||
builtin: true,
|
||||
},
|
||||
{
|
||||
id: "C7",
|
||||
name: "Child Safety",
|
||||
description: "Any content that sexualizes minors or could facilitate harm to children",
|
||||
action: "block",
|
||||
builtin: true,
|
||||
},
|
||||
];
|
||||
|
||||
/**
|
||||
* Build a Llama Guard taxonomy string from categories.
|
||||
* Only includes categories whose action is not "ignore".
|
||||
*/
|
||||
export function buildTaxonomy(categories: Category[]): string {
|
||||
const active = categories.filter((c) => c.action !== "ignore");
|
||||
if (active.length === 0) return "";
|
||||
|
||||
return active.map((c) => `${c.id}: ${c.name}\n${c.description}`).join("\n");
|
||||
}
|
||||
100
packages/plugins/ai-moderation/src/decision.ts
Normal file
100
packages/plugins/ai-moderation/src/decision.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
/**
|
||||
* Moderation Decision Logic
|
||||
*/
|
||||
|
||||
import type { CollectionCommentSettings, ModerationDecision } from "emdash";
|
||||
|
||||
import type { Category } from "./categories.js";
|
||||
import type { GuardResult } from "./guard.js";
|
||||
/**
|
||||
* Compute the moderation decision for a comment.
|
||||
*
|
||||
* Decision flow (in priority order):
|
||||
* 1. Authenticated CMS user → approved
|
||||
* 2. AI flagged "block" category → spam
|
||||
* 3. AI flagged "hold" category → pending
|
||||
* 4. AI error (fail-safe) → pending
|
||||
* 5. AI clean + autoApproveClean → approved
|
||||
* 6. Collection moderation fallback
|
||||
*/
|
||||
export function computeDecision(
|
||||
guard: GuardResult | undefined,
|
||||
guardError: string | undefined,
|
||||
categories: Category[],
|
||||
settings: { autoApproveClean: boolean },
|
||||
collectionSettings: CollectionCommentSettings,
|
||||
priorApprovedCount: number,
|
||||
isAuthenticatedUser: boolean,
|
||||
): ModerationDecision {
|
||||
// 1. Auto-approve authenticated CMS users
|
||||
if (isAuthenticatedUser) {
|
||||
return { status: "approved", reason: "Authenticated CMS user" };
|
||||
}
|
||||
|
||||
// Build category action lookup
|
||||
const categoryActions = new Map(categories.map((c) => [c.id, c.action]));
|
||||
|
||||
// 2 & 3. Check AI guard results
|
||||
// Track whether AI ran and found only ignorable categories (treat as clean)
|
||||
let aiRanClean = guard?.safe === true;
|
||||
|
||||
if (guard && !guard.safe) {
|
||||
let shouldBlock = false;
|
||||
let shouldHold = false;
|
||||
const flaggedCategories: string[] = [];
|
||||
|
||||
for (const catId of guard.categories) {
|
||||
const action = categoryActions.get(catId);
|
||||
if (action === "block") {
|
||||
shouldBlock = true;
|
||||
flaggedCategories.push(catId);
|
||||
} else if (action === "hold" || action === undefined) {
|
||||
// Unknown categories default to "hold" (fail-safe)
|
||||
shouldHold = true;
|
||||
flaggedCategories.push(catId);
|
||||
}
|
||||
// "ignore" categories are skipped
|
||||
}
|
||||
|
||||
if (shouldBlock) {
|
||||
return {
|
||||
status: "spam",
|
||||
reason: `AI flagged: ${flaggedCategories.join(", ")}`,
|
||||
};
|
||||
}
|
||||
|
||||
if (shouldHold) {
|
||||
return {
|
||||
status: "pending",
|
||||
reason: `AI flagged for review: ${flaggedCategories.join(", ")}`,
|
||||
};
|
||||
}
|
||||
|
||||
// AI flagged categories but all were "ignore" — treat as clean
|
||||
aiRanClean = true;
|
||||
}
|
||||
|
||||
// 4. AI error (fail-safe: hold for review)
|
||||
if (guardError) {
|
||||
return {
|
||||
status: "pending",
|
||||
reason: `AI error: ${guardError}`,
|
||||
};
|
||||
}
|
||||
|
||||
// 5. Auto-approve clean comments when configured
|
||||
if (settings.autoApproveClean && aiRanClean) {
|
||||
return { status: "approved", reason: "AI verified clean" };
|
||||
}
|
||||
|
||||
// 6. Fall back to collection moderation settings
|
||||
if (collectionSettings.commentsModeration === "none") {
|
||||
return { status: "approved", reason: "Moderation disabled" };
|
||||
}
|
||||
|
||||
if (collectionSettings.commentsModeration === "first_time" && priorApprovedCount > 0) {
|
||||
return { status: "approved", reason: "Returning commenter" };
|
||||
}
|
||||
|
||||
return { status: "pending", reason: "Held for review" };
|
||||
}
|
||||
33
packages/plugins/ai-moderation/src/descriptor.ts
Normal file
33
packages/plugins/ai-moderation/src/descriptor.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
/**
|
||||
* AI Moderation Plugin Descriptor
|
||||
*/
|
||||
|
||||
import type { PluginDescriptor } from "emdash";
|
||||
|
||||
import type { Category } from "./categories.js";
|
||||
|
||||
export interface AIModerationOptions {
|
||||
/** Override default categories */
|
||||
categories?: Category[];
|
||||
/** Auto-approve comments that pass AI checks (default: true) */
|
||||
autoApproveClean?: boolean;
|
||||
/** Workers AI binding name (default: "AI") */
|
||||
aiBinding?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the AI moderation plugin descriptor.
|
||||
*/
|
||||
export function aiModerationPlugin(
|
||||
options: AIModerationOptions = {},
|
||||
): PluginDescriptor<AIModerationOptions> {
|
||||
return {
|
||||
id: "ai-moderation",
|
||||
version: "0.1.0",
|
||||
entrypoint: "@emdashcms/plugin-ai-moderation/plugin",
|
||||
options,
|
||||
adminEntry: "@emdashcms/plugin-ai-moderation/admin",
|
||||
adminPages: [{ path: "/settings", label: "AI Moderation", icon: "shield" }],
|
||||
adminWidgets: [{ id: "status", title: "AI Moderation", size: "third" }],
|
||||
};
|
||||
}
|
||||
100
packages/plugins/ai-moderation/src/guard.ts
Normal file
100
packages/plugins/ai-moderation/src/guard.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
/**
|
||||
* Llama Guard Integration
|
||||
*
|
||||
* Builds prompts for and parses responses from Llama Guard 3 8B via Workers AI.
|
||||
*/
|
||||
|
||||
export interface GuardResult {
|
||||
safe: boolean;
|
||||
categories: string[];
|
||||
}
|
||||
|
||||
const CATEGORY_SPLIT_PATTERN = /[,\s]+/;
|
||||
|
||||
/** Structural markers used by the Llama Guard prompt format */
|
||||
const STRUCTURAL_MARKERS = /(<\/?(?:BEGIN|END)\s+(?:UNSAFE CONTENT CATEGORIES|CONVERSATION)>)/gi;
|
||||
|
||||
/**
|
||||
* Sanitize user text to prevent prompt injection via structural markers.
|
||||
*/
|
||||
function sanitizeUserText(text: string): string {
|
||||
return text.replace(STRUCTURAL_MARKERS, "");
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a Llama Guard prompt for classifying a comment.
|
||||
*/
|
||||
export function buildGuardPrompt(text: string, taxonomy: string): string {
|
||||
const sanitized = sanitizeUserText(text);
|
||||
return `Task: Check if there is unsafe content in the 'User' message according to our safety policy with the below categories.
|
||||
|
||||
<BEGIN UNSAFE CONTENT CATEGORIES>
|
||||
${taxonomy}
|
||||
<END UNSAFE CONTENT CATEGORIES>
|
||||
|
||||
<BEGIN CONVERSATION>
|
||||
|
||||
User: ${sanitized}
|
||||
|
||||
<END CONVERSATION>
|
||||
|
||||
Provide your safety assessment for the User message. First line must be 'safe' or 'unsafe'. If unsafe, list the violated categories on the second line separated by commas.`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a Llama Guard response into a structured result.
|
||||
* Handles both text mode ("safe"/"unsafe\nS1,S6") and structured JSON mode
|
||||
* ({ safe, categories }) as returned by Workers AI.
|
||||
*/
|
||||
export function parseGuardResponse(output: Ai_Cf_Meta_Llama_Guard_3_8B_Output): GuardResult {
|
||||
const resp = output.response;
|
||||
// Structured JSON mode — Workers AI returns { safe, categories } directly
|
||||
if (typeof resp === "object" && resp !== null) {
|
||||
return {
|
||||
safe: resp.safe ?? true,
|
||||
categories: resp.categories ?? [],
|
||||
};
|
||||
}
|
||||
|
||||
// Text mode — "safe" or "unsafe\nS1,S6"
|
||||
if (typeof resp === "string") {
|
||||
const lines = resp.trim().split("\n");
|
||||
const firstLine = lines[0]?.trim().toLowerCase();
|
||||
|
||||
if (firstLine === "unsafe" && lines.length > 1) {
|
||||
const categoryLine = lines[1]!.trim();
|
||||
const categories = categoryLine
|
||||
.split(CATEGORY_SPLIT_PATTERN)
|
||||
.map((c) => c.trim())
|
||||
.filter((c) => c.length > 0);
|
||||
return { safe: false, categories };
|
||||
}
|
||||
}
|
||||
|
||||
// Default: safe (including undefined or unexpected responses)
|
||||
return { safe: true, categories: [] };
|
||||
}
|
||||
|
||||
/**
|
||||
* Run Llama Guard classification via Workers AI.
|
||||
*/
|
||||
export async function runGuard(
|
||||
text: string,
|
||||
taxonomy: string,
|
||||
aiBinding = "AI",
|
||||
): Promise<GuardResult> {
|
||||
const { env } = await import("cloudflare:workers");
|
||||
const ai = (env as Record<string, Ai>)[aiBinding];
|
||||
if (!ai) {
|
||||
throw new Error(`Workers AI binding "${aiBinding}" not found in env`);
|
||||
}
|
||||
|
||||
const prompt = buildGuardPrompt(text, taxonomy);
|
||||
const output = await ai.run("@cf/meta/llama-guard-3-8b", {
|
||||
messages: [{ role: "user", content: prompt }],
|
||||
max_tokens: 100,
|
||||
temperature: 0.1,
|
||||
});
|
||||
|
||||
return parseGuardResponse(output);
|
||||
}
|
||||
235
packages/plugins/ai-moderation/src/index.ts
Normal file
235
packages/plugins/ai-moderation/src/index.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
/**
|
||||
* AI Moderation Plugin
|
||||
*
|
||||
* Uses Cloudflare Workers AI (Llama Guard 3 8B) to moderate comments.
|
||||
* Registers as the exclusive comment:moderate provider, replacing the
|
||||
* built-in default moderator.
|
||||
*/
|
||||
|
||||
import type { ResolvedPlugin } from "emdash";
|
||||
import { definePlugin } from "emdash";
|
||||
|
||||
import { DEFAULT_CATEGORIES, buildTaxonomy } from "./categories.js";
|
||||
import type { Category } from "./categories.js";
|
||||
import { computeDecision } from "./decision.js";
|
||||
import type { AIModerationOptions } from "./descriptor.js";
|
||||
import { runGuard } from "./guard.js";
|
||||
import type { GuardResult } from "./guard.js";
|
||||
|
||||
/** KV key for stored categories */
|
||||
const KV_CATEGORIES = "config:categories";
|
||||
/** KV key for behavior settings */
|
||||
const KV_BEHAVIOR = "config:behavior";
|
||||
|
||||
/** Narrow unknown to a record */
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === "object" && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the AI moderation plugin.
|
||||
*/
|
||||
export function createPlugin(options: AIModerationOptions = {}): ResolvedPlugin {
|
||||
const defaultAutoApprove = options.autoApproveClean ?? true;
|
||||
const aiBinding = options.aiBinding ?? "AI";
|
||||
|
||||
/** Load categories from KV or fall back to options/defaults */
|
||||
async function loadCategories(kv: {
|
||||
get: <T>(key: string) => Promise<T | null>;
|
||||
}): Promise<Category[]> {
|
||||
const stored = await kv.get<Category[]>(KV_CATEGORIES);
|
||||
return stored ?? options.categories ?? DEFAULT_CATEGORIES;
|
||||
}
|
||||
|
||||
/** Load behavior settings from KV or fall back to defaults */
|
||||
async function loadBehavior(kv: {
|
||||
get: <T>(key: string) => Promise<T | null>;
|
||||
}): Promise<{ autoApproveClean: boolean }> {
|
||||
const stored = await kv.get<{ autoApproveClean: boolean }>(KV_BEHAVIOR);
|
||||
return stored ?? { autoApproveClean: defaultAutoApprove };
|
||||
}
|
||||
|
||||
return definePlugin({
|
||||
id: "ai-moderation",
|
||||
version: "0.1.0",
|
||||
capabilities: [],
|
||||
allowedHosts: [],
|
||||
|
||||
admin: {
|
||||
entry: "@emdashcms/plugin-ai-moderation/admin",
|
||||
pages: [{ path: "/settings", label: "AI Moderation", icon: "shield" }],
|
||||
widgets: [{ id: "status", title: "AI Moderation", size: "third" }],
|
||||
},
|
||||
|
||||
hooks: {
|
||||
// Enrichment hook — runs AI guard, writes signals to metadata
|
||||
"comment:beforeCreate": {
|
||||
priority: 10,
|
||||
errorPolicy: "continue",
|
||||
handler: async (event, ctx) => {
|
||||
const categories = await loadCategories(ctx.kv);
|
||||
|
||||
// Run AI guard (try/catch — failure is non-fatal)
|
||||
let guard: GuardResult | undefined;
|
||||
let guardError: string | undefined;
|
||||
|
||||
const taxonomy = buildTaxonomy(categories);
|
||||
if (taxonomy) {
|
||||
try {
|
||||
guard = await runGuard(event.comment.body, taxonomy, aiBinding);
|
||||
} catch (error) {
|
||||
guardError = "AI classification failed";
|
||||
ctx.log.error("AI guard failed", {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Write signals to metadata for the moderator
|
||||
event.metadata.aiGuard = guard;
|
||||
event.metadata.aiGuardError = guardError;
|
||||
|
||||
return event;
|
||||
},
|
||||
},
|
||||
|
||||
// Exclusive moderator — reads metadata signals, computes decision
|
||||
"comment:moderate": {
|
||||
exclusive: true,
|
||||
handler: async (event, ctx) => {
|
||||
const categories = await loadCategories(ctx.kv);
|
||||
const behavior = await loadBehavior(ctx.kv);
|
||||
|
||||
// Read signals from metadata (written by beforeCreate hook)
|
||||
const guard = event.metadata.aiGuard as GuardResult | undefined;
|
||||
const guardError = event.metadata.aiGuardError as string | undefined;
|
||||
|
||||
const isAuthenticated = !!event.comment.authorUserId;
|
||||
|
||||
return computeDecision(
|
||||
guard,
|
||||
guardError,
|
||||
categories,
|
||||
behavior,
|
||||
event.collectionSettings,
|
||||
event.priorApprovedCount,
|
||||
isAuthenticated,
|
||||
);
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
routes: {
|
||||
// Get current settings
|
||||
settings: {
|
||||
handler: async (ctx) => {
|
||||
const categories = await loadCategories(ctx.kv);
|
||||
const behavior = await loadBehavior(ctx.kv);
|
||||
|
||||
return { categories, behavior };
|
||||
},
|
||||
},
|
||||
|
||||
// Save settings
|
||||
"settings/save": {
|
||||
handler: async (ctx) => {
|
||||
const input = isRecord(ctx.input) ? ctx.input : {};
|
||||
|
||||
if (Array.isArray(input.categories)) {
|
||||
const cats = input.categories as Category[];
|
||||
const seenIds = new Set<string>();
|
||||
for (const cat of cats) {
|
||||
if (
|
||||
typeof cat.id !== "string" ||
|
||||
typeof cat.name !== "string" ||
|
||||
typeof cat.description !== "string" ||
|
||||
!cat.id ||
|
||||
!cat.name ||
|
||||
!cat.description ||
|
||||
cat.id.length > 10 ||
|
||||
cat.name.length > 100 ||
|
||||
cat.description.length > 500 ||
|
||||
!["block", "hold", "ignore"].includes(cat.action)
|
||||
) {
|
||||
return {
|
||||
success: false,
|
||||
error: `Invalid category: ${typeof cat.id === "string" ? cat.id : "missing id"}`,
|
||||
};
|
||||
}
|
||||
if (seenIds.has(cat.id)) {
|
||||
return {
|
||||
success: false,
|
||||
error: `Duplicate category ID: ${cat.id}`,
|
||||
};
|
||||
}
|
||||
seenIds.add(cat.id);
|
||||
}
|
||||
await ctx.kv.set(KV_CATEGORIES, cats);
|
||||
}
|
||||
|
||||
if (isRecord(input.behavior)) {
|
||||
const behavior = {
|
||||
autoApproveClean:
|
||||
typeof input.behavior.autoApproveClean === "boolean"
|
||||
? input.behavior.autoApproveClean
|
||||
: defaultAutoApprove,
|
||||
};
|
||||
await ctx.kv.set(KV_BEHAVIOR, behavior);
|
||||
}
|
||||
|
||||
return { success: true };
|
||||
},
|
||||
},
|
||||
|
||||
// Test AI analysis on sample text
|
||||
"settings/test": {
|
||||
handler: async (ctx) => {
|
||||
const input = isRecord(ctx.input) ? ctx.input : {};
|
||||
const text = typeof input.text === "string" ? input.text : "";
|
||||
|
||||
if (!text.trim()) {
|
||||
return { success: false, error: "No text provided" };
|
||||
}
|
||||
|
||||
const categories = await loadCategories(ctx.kv);
|
||||
|
||||
// Run AI guard
|
||||
let guard: GuardResult | undefined;
|
||||
let guardError: string | undefined;
|
||||
const taxonomy = buildTaxonomy(categories);
|
||||
|
||||
if (taxonomy) {
|
||||
try {
|
||||
guard = await runGuard(text, taxonomy, aiBinding);
|
||||
} catch (error) {
|
||||
guardError = error instanceof Error ? error.message : String(error);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
guard: guard ?? null,
|
||||
guardError: guardError ?? null,
|
||||
taxonomy,
|
||||
};
|
||||
},
|
||||
},
|
||||
|
||||
// Plugin status for dashboard widget
|
||||
status: {
|
||||
handler: async (ctx) => {
|
||||
const categories = await loadCategories(ctx.kv);
|
||||
const behavior = await loadBehavior(ctx.kv);
|
||||
|
||||
return {
|
||||
enabled: true,
|
||||
categoryCount: categories.filter((c) => c.action !== "ignore").length,
|
||||
autoApproveClean: behavior.autoApproveClean,
|
||||
};
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export default createPlugin;
|
||||
Reference in New Issue
Block a user