fix: use stable site hash for install telemetry deduplication (#298)

* fix: use stable site hash for install telemetry deduplication (#297)

generateSiteHash() used Date.now() as the hash seed, producing a different
hash on every call. Since the installs table uses PRIMARY KEY (plugin_id,
site_hash), the same site could insert unlimited rows, inflating install
counts and making "Most Popular" sorting meaningless.

Fix: use the site's request origin as a stable hash seed. The same origin
always produces the same hash, so the marketplace deduplicates correctly.

Also denormalizes install_count on the plugins table to avoid a COUNT(*)
subquery per row in searchPlugins(). The count is recalculated atomically
on each upsertInstall() call.

Fixes #297

* chore: add changeset for install telemetry fix

* fix: address review feedback on install telemetry

- Replace crypto.subtle fallback with FNV-1a hash to avoid origin
  leakage and collisions from truncated seed strings
- Remove duplicate p.install_count from SELECT (p.* already includes it)
- Use explicit p.install_count in ORDER BY clause
- Use db.batch() for atomic upsert + count recomputation instead of
  separate statements with misleading meta.changes check
This commit is contained in:
Benjamin Price
2026-04-08 06:37:00 +09:00
committed by GitHub
parent 91e31fb2ca
commit f112ac4819
9 changed files with 94 additions and 36 deletions

View File

@@ -83,9 +83,12 @@ function validateVersion(version: string): void {
}
}
function getClient(marketplaceUrl: string | undefined): MarketplaceClient | null {
function getClient(
marketplaceUrl: string | undefined,
siteOrigin?: string,
): MarketplaceClient | null {
if (!marketplaceUrl) return null;
return createMarketplaceClient(marketplaceUrl);
return createMarketplaceClient(marketplaceUrl, siteOrigin);
}
function diffCapabilities(
@@ -289,9 +292,9 @@ export async function handleMarketplaceInstall(
sandboxRunner: SandboxRunner | null,
marketplaceUrl: string | undefined,
pluginId: string,
opts?: { version?: string; configuredPluginIds?: Set<string> },
opts?: { version?: string; configuredPluginIds?: Set<string>; siteOrigin?: string },
): Promise<ApiResult<MarketplaceInstallResult>> {
const client = getClient(marketplaceUrl);
const client = getClient(marketplaceUrl, opts?.siteOrigin);
if (!client) {
return {
success: false,

View File

@@ -41,13 +41,15 @@ export const POST: APIRoute = async ({ params, request, locals }) => {
emdash.configuredPlugins.map((p: { id: string }) => p.id),
);
const siteOrigin = new URL(request.url).origin;
const result = await handleMarketplaceInstall(
emdash.db,
emdash.storage,
emdash.getSandboxRunner(),
emdash.config.marketplace,
id,
{ version: body.version, configuredPluginIds },
{ version: body.version, configuredPluginIds, siteOrigin },
);
if (!result.success) return unwrapResult(result);

View File

@@ -204,10 +204,12 @@ export class MarketplaceUnavailableError extends MarketplaceError {
class MarketplaceClientImpl implements MarketplaceClient {
private readonly baseUrl: string;
private readonly siteOrigin: string | undefined;
constructor(baseUrl: string) {
constructor(baseUrl: string, siteOrigin?: string) {
// Strip trailing slash
this.baseUrl = baseUrl.replace(TRAILING_SLASHES, "");
this.siteOrigin = siteOrigin;
}
async search(query?: string, opts?: MarketplaceSearchOpts): Promise<MarketplaceSearchResult> {
@@ -270,8 +272,8 @@ class MarketplaceClientImpl implements MarketplaceClient {
}
async reportInstall(id: string, version: string): Promise<void> {
// Generate a stable site hash (best-effort, non-identifying)
const siteHash = await generateSiteHash();
// Generate a stable site hash from the site origin (best-effort, non-identifying)
const siteHash = await generateSiteHash(this.siteOrigin);
const url = `${this.baseUrl}/api/v1/plugins/${encodeURIComponent(id)}/installs`;
try {
@@ -433,18 +435,27 @@ async function extractBundle(tarballBytes: Uint8Array): Promise<PluginBundle> {
// ── Helpers ────────────────────────────────────────────────────────
/** Generate a stable non-identifying site hash (best-effort) */
async function generateSiteHash(): Promise<string> {
// Use a timestamp-based approach since we can't reliably get the origin
// in all contexts (Workers, Node, etc.)
const seed = `emdash-${Date.now()}`;
/**
* Generate a stable non-identifying site hash from the site origin.
* The same origin always produces the same hash, so the marketplace
* installs table deduplicates correctly per (plugin_id, site_hash).
*/
async function generateSiteHash(siteOrigin?: string): Promise<string> {
const seed = siteOrigin ? `emdash-site:${siteOrigin}` : `emdash-anonymous`;
try {
const hash = await crypto.subtle.digest("SHA-256", new TextEncoder().encode(seed));
const arr = new Uint8Array(hash);
return Array.from(arr.slice(0, 8), (b) => b.toString(16).padStart(2, "0")).join("");
} catch {
// Fallback for environments without crypto.subtle
return Math.random().toString(36).slice(2, 18);
// Fallback for environments without crypto.subtle: FNV-1a hash encoded as hex.
// Deterministic, uniform distribution, no origin leakage.
let h = 0x811c9dc5;
for (let i = 0; i < seed.length; i++) {
h ^= seed.charCodeAt(i);
h = Math.imul(h, 0x01000193);
}
const h2 = h ^ (h >>> 16);
return (h >>> 0).toString(16).padStart(8, "0") + (h2 >>> 0).toString(16).padStart(8, "0");
}
}
@@ -454,7 +465,9 @@ async function generateSiteHash(): Promise<string> {
* Create a MarketplaceClient for the given marketplace URL.
*
* @param baseUrl - The marketplace API base URL (e.g. "https://marketplace.emdashcms.com")
* @param siteOrigin - The origin of the EmDash site (e.g. "https://myblog.example.com").
* Used to generate a stable, non-identifying site hash for install deduplication.
*/
export function createMarketplaceClient(baseUrl: string): MarketplaceClient {
return new MarketplaceClientImpl(baseUrl);
export function createMarketplaceClient(baseUrl: string, siteOrigin?: string): MarketplaceClient {
return new MarketplaceClientImpl(baseUrl, siteOrigin);
}

View File

@@ -21,6 +21,7 @@ import {
} from "../../../src/plugins/marketplace.js";
const HEX_64_PATTERN = /^[a-f0-9]{64}$/;
const HEX_16_PATTERN = /^[a-f0-9]{16}$/;
// ── Helpers ───────────<E29480><E29480><EFBFBD>────────────────────────────────────────────
@@ -432,6 +433,40 @@ describe("MarketplaceClient", () => {
// Should not throw
await client.reportInstall("test-seo", "1.0.0");
});
it("sends a stable site hash across multiple calls", async () => {
const clientWithOrigin = createMarketplaceClient(BASE_URL, "https://myblog.example.com");
fetchSpy.mockResolvedValue(new Response("OK", { status: 200 }));
await clientWithOrigin.reportInstall("test-seo", "1.0.0");
await clientWithOrigin.reportInstall("test-seo", "1.0.0");
const calls = fetchSpy.mock.calls;
expect(calls.length).toBe(2);
const body1 = JSON.parse(calls[0]![1]!.body as string);
const body2 = JSON.parse(calls[1]![1]!.body as string);
// Same origin produces the same hash every time
expect(body1.siteHash).toBe(body2.siteHash);
expect(body1.siteHash).toMatch(HEX_16_PATTERN);
});
it("produces different hashes for different site origins", async () => {
const client1 = createMarketplaceClient(BASE_URL, "https://site-a.example.com");
const client2 = createMarketplaceClient(BASE_URL, "https://site-b.example.com");
fetchSpy.mockResolvedValue(new Response("OK", { status: 200 }));
await client1.reportInstall("test-seo", "1.0.0");
await client2.reportInstall("test-seo", "1.0.0");
const body1 = JSON.parse(fetchSpy.mock.calls[0]![1]!.body as string);
const body2 = JSON.parse(fetchSpy.mock.calls[1]![1]!.body as string);
expect(body1.siteHash).not.toBe(body2.siteHash);
});
});
describe("trailing slash handling", () => {