fix: use stable site hash for install telemetry deduplication (#298)

* fix: use stable site hash for install telemetry deduplication (#297)

generateSiteHash() used Date.now() as the hash seed, producing a different
hash on every call. Since the installs table uses PRIMARY KEY (plugin_id,
site_hash), the same site could insert unlimited rows, inflating install
counts and making "Most Popular" sorting meaningless.

Fix: use the site's request origin as a stable hash seed. The same origin
always produces the same hash, so the marketplace deduplicates correctly.

Also denormalizes install_count on the plugins table to avoid a COUNT(*)
subquery per row in searchPlugins(). The count is recalculated atomically
on each upsertInstall() call.

Fixes #297

* chore: add changeset for install telemetry fix

* fix: address review feedback on install telemetry

- Replace crypto.subtle fallback with FNV-1a hash to avoid origin
  leakage and collisions from truncated seed strings
- Remove duplicate p.install_count from SELECT (p.* already includes it)
- Use explicit p.install_count in ORDER BY clause
- Use db.batch() for atomic upsert + count recomputation instead of
  separate statements with misleading meta.changes check
This commit is contained in:
Benjamin Price
2026-04-08 06:37:00 +09:00
committed by GitHub
parent 91e31fb2ca
commit f112ac4819
9 changed files with 94 additions and 36 deletions

View File

@@ -0,0 +1,5 @@
---
"emdash": patch
---
Fixes install telemetry using an unstable hash that inflated install counts. Uses the site's request origin as a stable hash seed for accurate per-site deduplication. Denormalizes install_count on the marketplace plugins table for query performance.

View File

@@ -83,9 +83,12 @@ function validateVersion(version: string): void {
}
}
function getClient(marketplaceUrl: string | undefined): MarketplaceClient | null {
function getClient(
marketplaceUrl: string | undefined,
siteOrigin?: string,
): MarketplaceClient | null {
if (!marketplaceUrl) return null;
return createMarketplaceClient(marketplaceUrl);
return createMarketplaceClient(marketplaceUrl, siteOrigin);
}
function diffCapabilities(
@@ -289,9 +292,9 @@ export async function handleMarketplaceInstall(
sandboxRunner: SandboxRunner | null,
marketplaceUrl: string | undefined,
pluginId: string,
opts?: { version?: string; configuredPluginIds?: Set<string> },
opts?: { version?: string; configuredPluginIds?: Set<string>; siteOrigin?: string },
): Promise<ApiResult<MarketplaceInstallResult>> {
const client = getClient(marketplaceUrl);
const client = getClient(marketplaceUrl, opts?.siteOrigin);
if (!client) {
return {
success: false,

View File

@@ -41,13 +41,15 @@ export const POST: APIRoute = async ({ params, request, locals }) => {
emdash.configuredPlugins.map((p: { id: string }) => p.id),
);
const siteOrigin = new URL(request.url).origin;
const result = await handleMarketplaceInstall(
emdash.db,
emdash.storage,
emdash.getSandboxRunner(),
emdash.config.marketplace,
id,
{ version: body.version, configuredPluginIds },
{ version: body.version, configuredPluginIds, siteOrigin },
);
if (!result.success) return unwrapResult(result);

View File

@@ -204,10 +204,12 @@ export class MarketplaceUnavailableError extends MarketplaceError {
class MarketplaceClientImpl implements MarketplaceClient {
private readonly baseUrl: string;
private readonly siteOrigin: string | undefined;
constructor(baseUrl: string) {
constructor(baseUrl: string, siteOrigin?: string) {
// Strip trailing slash
this.baseUrl = baseUrl.replace(TRAILING_SLASHES, "");
this.siteOrigin = siteOrigin;
}
async search(query?: string, opts?: MarketplaceSearchOpts): Promise<MarketplaceSearchResult> {
@@ -270,8 +272,8 @@ class MarketplaceClientImpl implements MarketplaceClient {
}
async reportInstall(id: string, version: string): Promise<void> {
// Generate a stable site hash (best-effort, non-identifying)
const siteHash = await generateSiteHash();
// Generate a stable site hash from the site origin (best-effort, non-identifying)
const siteHash = await generateSiteHash(this.siteOrigin);
const url = `${this.baseUrl}/api/v1/plugins/${encodeURIComponent(id)}/installs`;
try {
@@ -433,18 +435,27 @@ async function extractBundle(tarballBytes: Uint8Array): Promise<PluginBundle> {
// ── Helpers ────────────────────────────────────────────────────────
/** Generate a stable non-identifying site hash (best-effort) */
async function generateSiteHash(): Promise<string> {
// Use a timestamp-based approach since we can't reliably get the origin
// in all contexts (Workers, Node, etc.)
const seed = `emdash-${Date.now()}`;
/**
* Generate a stable non-identifying site hash from the site origin.
* The same origin always produces the same hash, so the marketplace
* installs table deduplicates correctly per (plugin_id, site_hash).
*/
async function generateSiteHash(siteOrigin?: string): Promise<string> {
const seed = siteOrigin ? `emdash-site:${siteOrigin}` : `emdash-anonymous`;
try {
const hash = await crypto.subtle.digest("SHA-256", new TextEncoder().encode(seed));
const arr = new Uint8Array(hash);
return Array.from(arr.slice(0, 8), (b) => b.toString(16).padStart(2, "0")).join("");
} catch {
// Fallback for environments without crypto.subtle
return Math.random().toString(36).slice(2, 18);
// Fallback for environments without crypto.subtle: FNV-1a hash encoded as hex.
// Deterministic, uniform distribution, no origin leakage.
let h = 0x811c9dc5;
for (let i = 0; i < seed.length; i++) {
h ^= seed.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
const h2 = h ^ (h >>> 16);
return (h >>> 0).toString(16).padStart(8, "0") + (h2 >>> 0).toString(16).padStart(8, "0");
}
}
@@ -454,7 +465,9 @@ async function generateSiteHash(): Promise<string> {
* Create a MarketplaceClient for the given marketplace URL.
*
* @param baseUrl - The marketplace API base URL (e.g. "https://marketplace.emdashcms.com")
* @param siteOrigin - The origin of the EmDash site (e.g. "https://myblog.example.com").
* Used to generate a stable, non-identifying site hash for install deduplication.
*/
export function createMarketplaceClient(baseUrl: string): MarketplaceClient {
return new MarketplaceClientImpl(baseUrl);
export function createMarketplaceClient(baseUrl: string, siteOrigin?: string): MarketplaceClient {
return new MarketplaceClientImpl(baseUrl, siteOrigin);
}

View File

@@ -21,6 +21,7 @@ import {
} from "../../../src/plugins/marketplace.js";
const HEX_64_PATTERN = /^[a-f0-9]{64}$/;
const HEX_16_PATTERN = /^[a-f0-9]{16}$/;
// ── Helpers ───────────<E29480><E29480><EFBFBD>────────────────────────────────────────────
@@ -432,6 +433,40 @@ describe("MarketplaceClient", () => {
// Should not throw
await client.reportInstall("test-seo", "1.0.0");
});
it("sends a stable site hash across multiple calls", async () => {
const clientWithOrigin = createMarketplaceClient(BASE_URL, "https://myblog.example.com");
fetchSpy.mockResolvedValue(new Response("OK", { status: 200 }));
await clientWithOrigin.reportInstall("test-seo", "1.0.0");
await clientWithOrigin.reportInstall("test-seo", "1.0.0");
const calls = fetchSpy.mock.calls;
expect(calls.length).toBe(2);
const body1 = JSON.parse(calls[0]![1]!.body as string);
const body2 = JSON.parse(calls[1]![1]!.body as string);
// Same origin produces the same hash every time
expect(body1.siteHash).toBe(body2.siteHash);
expect(body1.siteHash).toMatch(HEX_16_PATTERN);
});
it("produces different hashes for different site origins", async () => {
const client1 = createMarketplaceClient(BASE_URL, "https://site-a.example.com");
const client2 = createMarketplaceClient(BASE_URL, "https://site-b.example.com");
fetchSpy.mockResolvedValue(new Response("OK", { status: 200 }));
await client1.reportInstall("test-seo", "1.0.0");
await client2.reportInstall("test-seo", "1.0.0");
const body1 = JSON.parse(fetchSpy.mock.calls[0]![1]!.body as string);
const body2 = JSON.parse(fetchSpy.mock.calls[1]![1]!.body as string);
expect(body1.siteHash).not.toBe(body2.siteHash);
});
});
describe("trailing slash handling", () => {

View File

@@ -99,13 +99,12 @@ export async function searchPlugins(
break;
case "installs":
default:
orderBy = "install_count DESC, p.created_at DESC";
orderBy = "p.install_count DESC, p.created_at DESC";
break;
}
const query = `
SELECT p.*, a.name AS author_name, a.avatar_url AS author_avatar_url, a.verified AS author_verified,
(SELECT COUNT(*) FROM installs i WHERE i.plugin_id = p.id) AS install_count,
lv.version AS latest_version,
lv.status AS latest_status,
lv.audit_verdict AS latest_audit_verdict,
@@ -201,25 +200,25 @@ export async function getPluginVersion(
// ── Install queries ─────────────────────────────────────────────
export async function getInstallCount(db: D1Database, pluginId: string): Promise<number> {
const row = await db
.prepare("SELECT COUNT(*) AS count FROM installs WHERE plugin_id = ?")
.bind(pluginId)
.first<{ count: number }>();
return row?.count ?? 0;
}
export async function upsertInstall(
db: D1Database,
data: { pluginId: string; siteHash: string; version: string },
): Promise<void> {
await db
// Run the install upsert and install_count recomputation together so the
// plugin count stays consistent with the installs table.
await db.batch([
db
.prepare(
`INSERT INTO installs (plugin_id, site_hash, version) VALUES (?, ?, ?)
ON CONFLICT (plugin_id, site_hash) DO UPDATE SET version = excluded.version, installed_at = datetime('now')`,
)
.bind(data.pluginId, data.siteHash, data.version)
.run();
.bind(data.pluginId, data.siteHash, data.version),
db
.prepare(
`UPDATE plugins SET install_count = (SELECT COUNT(*) FROM installs WHERE plugin_id = ?) WHERE id = ?`,
)
.bind(data.pluginId, data.pluginId),
]);
}
// ── Write queries ───────────────────────────────────────────────

View File

@@ -19,6 +19,7 @@ CREATE TABLE IF NOT EXISTS plugins (
capabilities TEXT NOT NULL,
keywords TEXT,
has_icon INTEGER DEFAULT 0,
install_count INTEGER NOT NULL DEFAULT 0,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);

View File

@@ -19,6 +19,7 @@ export interface PluginRow {
capabilities: string;
keywords: string | null;
has_icon: number;
install_count: number;
created_at: string;
updated_at: string;
}

View File

@@ -1,7 +1,6 @@
import { Hono } from "hono";
import {
getInstallCount,
getLatestVersion,
getPluginVersion,
getPluginVersions,
@@ -100,7 +99,7 @@ publicRoutes.get("/plugins/:id", async (c) => {
if (!plugin) return c.json({ error: "Plugin not found" }, 404);
const latestVersion = await getLatestVersion(c.env.DB, id);
const installCount = await getInstallCount(c.env.DB, id);
const installCount = plugin.install_count ?? 0;
const capabilities = safeJsonParse<string[]>(plugin.capabilities, []);
const keywords = safeJsonParse<string[]>(plugin.keywords, []);