import type { Kysely } from "kysely"; import { describe, it, expect, beforeEach, afterEach } from "vitest"; import { ContentRepository } from "../../../src/database/repositories/content.js"; import type { Database } from "../../../src/database/types.js"; import { SchemaRegistry } from "../../../src/schema/registry.js"; import { FTSManager } from "../../../src/search/fts-manager.js"; import { searchWithDb } from "../../../src/search/query.js"; import { createPostFixture } from "../../utils/fixtures.js"; import { setupTestDatabaseWithCollections, teardownTestDatabase } from "../../utils/test-db.js"; /** * Snippets returned by FTS5 splice literal `` markers around matched * terms but never escape the surrounding text. If the indexed content * contains characters that mean something in HTML (`<`, `>`, `&`, `"`, * `'`) the resulting "snippet" is unsafe to render with set:html or * innerHTML — both for visual integrity (broken markup, mojibake) and * for security (a ` world" }, }), ); const { items } = await searchWithDb(db, "alert", { collections: ["post"], }); expect(items).toHaveLength(1); const snippet = items[0]!.snippet ?? ""; // The dangerous `"); expect(snippet).toContain("<script>"); }); it("escapes ampersands so `<3` and `&` round-trip correctly", async () => { await repo.create( createPostFixture({ slug: "ampersand", status: "published", data: { title: "Tom & Jerry: 2 < 3 forever" }, }), ); const { items } = await searchWithDb(db, "Jerry", { collections: ["post"], }); expect(items).toHaveLength(1); const snippet = items[0]!.snippet ?? ""; // Bare `&` must be escaped to `&` — otherwise a downstream // HTML parser may interpret `& Jerry` as the start of an entity. expect(snippet).toContain("&"); expect(snippet).not.toMatch(/&(?!amp;|lt;|gt;|quot;|#39;)/); // `<` from "2 < 3" must also be escaped, even though it's not // adjacent to a tag-like structure. expect(snippet).toContain("<"); }); it("does not crash when the snippet column is NULL", async () => { // FTS triggers insert raw column values with no COALESCE, so any // row whose title (the column the snippet() call targets) is // NULL produces a NULL snippet from SQLite — even when the row // matched via a different searchable column. A regression that // drops the null-guard throws "Cannot read properties of null // (reading 'replace')" before these assertions can run. const registry = new SchemaRegistry(db); await registry.updateField("post", "content", { searchable: true }); const ftsManager = new FTSManager(db); await ftsManager.enableSearch("post"); await repo.create( createPostFixture({ slug: "no-title", status: "published", data: { // Deliberately NULL title — matched via the content // column so this row still surfaces in results. title: null, content: [ { _type: "block", style: "normal", children: [{ _type: "span", text: "Quokka spotted today" }], }, ], }, }), ); const { items } = await searchWithDb(db, "Quokka", { collections: ["post"], }); expect(items).toHaveLength(1); // Whether the snippet ends up as a string or undefined doesn't // matter — the contract is "the search call must not throw". expect(typeof items[0]!.snippet === "string" || items[0]!.snippet === undefined).toBe(true); }); it("preserves `` highlight tags as live HTML", async () => { // The whole point of returning a snippet is highlighting matches. // Sanitization must not strip the markers we deliberately added. await repo.create( createPostFixture({ slug: "highlight", status: "published", data: { title: "The quick brown fox jumps" }, }), ); const { items } = await searchWithDb(db, "fox", { collections: ["post"], }); expect(items).toHaveLength(1); const snippet = items[0]!.snippet ?? ""; expect(snippet).toContain(""); expect(snippet).toContain(""); // And the highlighted token should be the matched word. expect(snippet).toMatch(/fox<\/mark>/i); }); });