fix: use stable site hash for install telemetry deduplication (#298)
* fix: use stable site hash for install telemetry deduplication (#297) generateSiteHash() used Date.now() as the hash seed, producing a different hash on every call. Since the installs table uses PRIMARY KEY (plugin_id, site_hash), the same site could insert unlimited rows, inflating install counts and making "Most Popular" sorting meaningless. Fix: use the site's request origin as a stable hash seed. The same origin always produces the same hash, so the marketplace deduplicates correctly. Also denormalizes install_count on the plugins table to avoid a COUNT(*) subquery per row in searchPlugins(). The count is recalculated atomically on each upsertInstall() call. Fixes #297 * chore: add changeset for install telemetry fix * fix: address review feedback on install telemetry - Replace crypto.subtle fallback with FNV-1a hash to avoid origin leakage and collisions from truncated seed strings - Remove duplicate p.install_count from SELECT (p.* already includes it) - Use explicit p.install_count in ORDER BY clause - Use db.batch() for atomic upsert + count recomputation instead of separate statements with misleading meta.changes check
This commit is contained in:
5
.changeset/neat-islands-smoke.md
Normal file
5
.changeset/neat-islands-smoke.md
Normal file
@@ -0,0 +1,5 @@
|
||||
---
|
||||
"emdash": patch
|
||||
---
|
||||
|
||||
Fixes install telemetry using an unstable hash that inflated install counts. Uses the site's request origin as a stable hash seed for accurate per-site deduplication. Denormalizes install_count on the marketplace plugins table for query performance.
|
||||
@@ -83,9 +83,12 @@ function validateVersion(version: string): void {
|
||||
}
|
||||
}
|
||||
|
||||
function getClient(marketplaceUrl: string | undefined): MarketplaceClient | null {
|
||||
function getClient(
|
||||
marketplaceUrl: string | undefined,
|
||||
siteOrigin?: string,
|
||||
): MarketplaceClient | null {
|
||||
if (!marketplaceUrl) return null;
|
||||
return createMarketplaceClient(marketplaceUrl);
|
||||
return createMarketplaceClient(marketplaceUrl, siteOrigin);
|
||||
}
|
||||
|
||||
function diffCapabilities(
|
||||
@@ -289,9 +292,9 @@ export async function handleMarketplaceInstall(
|
||||
sandboxRunner: SandboxRunner | null,
|
||||
marketplaceUrl: string | undefined,
|
||||
pluginId: string,
|
||||
opts?: { version?: string; configuredPluginIds?: Set<string> },
|
||||
opts?: { version?: string; configuredPluginIds?: Set<string>; siteOrigin?: string },
|
||||
): Promise<ApiResult<MarketplaceInstallResult>> {
|
||||
const client = getClient(marketplaceUrl);
|
||||
const client = getClient(marketplaceUrl, opts?.siteOrigin);
|
||||
if (!client) {
|
||||
return {
|
||||
success: false,
|
||||
|
||||
@@ -41,13 +41,15 @@ export const POST: APIRoute = async ({ params, request, locals }) => {
|
||||
emdash.configuredPlugins.map((p: { id: string }) => p.id),
|
||||
);
|
||||
|
||||
const siteOrigin = new URL(request.url).origin;
|
||||
|
||||
const result = await handleMarketplaceInstall(
|
||||
emdash.db,
|
||||
emdash.storage,
|
||||
emdash.getSandboxRunner(),
|
||||
emdash.config.marketplace,
|
||||
id,
|
||||
{ version: body.version, configuredPluginIds },
|
||||
{ version: body.version, configuredPluginIds, siteOrigin },
|
||||
);
|
||||
|
||||
if (!result.success) return unwrapResult(result);
|
||||
|
||||
@@ -204,10 +204,12 @@ export class MarketplaceUnavailableError extends MarketplaceError {
|
||||
|
||||
class MarketplaceClientImpl implements MarketplaceClient {
|
||||
private readonly baseUrl: string;
|
||||
private readonly siteOrigin: string | undefined;
|
||||
|
||||
constructor(baseUrl: string) {
|
||||
constructor(baseUrl: string, siteOrigin?: string) {
|
||||
// Strip trailing slash
|
||||
this.baseUrl = baseUrl.replace(TRAILING_SLASHES, "");
|
||||
this.siteOrigin = siteOrigin;
|
||||
}
|
||||
|
||||
async search(query?: string, opts?: MarketplaceSearchOpts): Promise<MarketplaceSearchResult> {
|
||||
@@ -270,8 +272,8 @@ class MarketplaceClientImpl implements MarketplaceClient {
|
||||
}
|
||||
|
||||
async reportInstall(id: string, version: string): Promise<void> {
|
||||
// Generate a stable site hash (best-effort, non-identifying)
|
||||
const siteHash = await generateSiteHash();
|
||||
// Generate a stable site hash from the site origin (best-effort, non-identifying)
|
||||
const siteHash = await generateSiteHash(this.siteOrigin);
|
||||
const url = `${this.baseUrl}/api/v1/plugins/${encodeURIComponent(id)}/installs`;
|
||||
|
||||
try {
|
||||
@@ -433,18 +435,27 @@ async function extractBundle(tarballBytes: Uint8Array): Promise<PluginBundle> {
|
||||
|
||||
// ── Helpers ────────────────────────────────────────────────────────
|
||||
|
||||
/** Generate a stable non-identifying site hash (best-effort) */
|
||||
async function generateSiteHash(): Promise<string> {
|
||||
// Use a timestamp-based approach since we can't reliably get the origin
|
||||
// in all contexts (Workers, Node, etc.)
|
||||
const seed = `emdash-${Date.now()}`;
|
||||
/**
|
||||
* Generate a stable non-identifying site hash from the site origin.
|
||||
* The same origin always produces the same hash, so the marketplace
|
||||
* installs table deduplicates correctly per (plugin_id, site_hash).
|
||||
*/
|
||||
async function generateSiteHash(siteOrigin?: string): Promise<string> {
|
||||
const seed = siteOrigin ? `emdash-site:${siteOrigin}` : `emdash-anonymous`;
|
||||
try {
|
||||
const hash = await crypto.subtle.digest("SHA-256", new TextEncoder().encode(seed));
|
||||
const arr = new Uint8Array(hash);
|
||||
return Array.from(arr.slice(0, 8), (b) => b.toString(16).padStart(2, "0")).join("");
|
||||
} catch {
|
||||
// Fallback for environments without crypto.subtle
|
||||
return Math.random().toString(36).slice(2, 18);
|
||||
// Fallback for environments without crypto.subtle: FNV-1a hash encoded as hex.
|
||||
// Deterministic, uniform distribution, no origin leakage.
|
||||
let h = 0x811c9dc5;
|
||||
for (let i = 0; i < seed.length; i++) {
|
||||
h ^= seed.charCodeAt(i);
|
||||
h = Math.imul(h, 0x01000193);
|
||||
}
|
||||
const h2 = h ^ (h >>> 16);
|
||||
return (h >>> 0).toString(16).padStart(8, "0") + (h2 >>> 0).toString(16).padStart(8, "0");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -454,7 +465,9 @@ async function generateSiteHash(): Promise<string> {
|
||||
* Create a MarketplaceClient for the given marketplace URL.
|
||||
*
|
||||
* @param baseUrl - The marketplace API base URL (e.g. "https://marketplace.emdashcms.com")
|
||||
* @param siteOrigin - The origin of the EmDash site (e.g. "https://myblog.example.com").
|
||||
* Used to generate a stable, non-identifying site hash for install deduplication.
|
||||
*/
|
||||
export function createMarketplaceClient(baseUrl: string): MarketplaceClient {
|
||||
return new MarketplaceClientImpl(baseUrl);
|
||||
export function createMarketplaceClient(baseUrl: string, siteOrigin?: string): MarketplaceClient {
|
||||
return new MarketplaceClientImpl(baseUrl, siteOrigin);
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@ import {
|
||||
} from "../../../src/plugins/marketplace.js";
|
||||
|
||||
const HEX_64_PATTERN = /^[a-f0-9]{64}$/;
|
||||
const HEX_16_PATTERN = /^[a-f0-9]{16}$/;
|
||||
|
||||
// ── Helpers ───────────<E29480><E29480><EFBFBD>────────────────────────────────────────────
|
||||
|
||||
@@ -432,6 +433,40 @@ describe("MarketplaceClient", () => {
|
||||
// Should not throw
|
||||
await client.reportInstall("test-seo", "1.0.0");
|
||||
});
|
||||
|
||||
it("sends a stable site hash across multiple calls", async () => {
|
||||
const clientWithOrigin = createMarketplaceClient(BASE_URL, "https://myblog.example.com");
|
||||
|
||||
fetchSpy.mockResolvedValue(new Response("OK", { status: 200 }));
|
||||
|
||||
await clientWithOrigin.reportInstall("test-seo", "1.0.0");
|
||||
await clientWithOrigin.reportInstall("test-seo", "1.0.0");
|
||||
|
||||
const calls = fetchSpy.mock.calls;
|
||||
expect(calls.length).toBe(2);
|
||||
|
||||
const body1 = JSON.parse(calls[0]![1]!.body as string);
|
||||
const body2 = JSON.parse(calls[1]![1]!.body as string);
|
||||
|
||||
// Same origin produces the same hash every time
|
||||
expect(body1.siteHash).toBe(body2.siteHash);
|
||||
expect(body1.siteHash).toMatch(HEX_16_PATTERN);
|
||||
});
|
||||
|
||||
it("produces different hashes for different site origins", async () => {
|
||||
const client1 = createMarketplaceClient(BASE_URL, "https://site-a.example.com");
|
||||
const client2 = createMarketplaceClient(BASE_URL, "https://site-b.example.com");
|
||||
|
||||
fetchSpy.mockResolvedValue(new Response("OK", { status: 200 }));
|
||||
|
||||
await client1.reportInstall("test-seo", "1.0.0");
|
||||
await client2.reportInstall("test-seo", "1.0.0");
|
||||
|
||||
const body1 = JSON.parse(fetchSpy.mock.calls[0]![1]!.body as string);
|
||||
const body2 = JSON.parse(fetchSpy.mock.calls[1]![1]!.body as string);
|
||||
|
||||
expect(body1.siteHash).not.toBe(body2.siteHash);
|
||||
});
|
||||
});
|
||||
|
||||
describe("trailing slash handling", () => {
|
||||
|
||||
@@ -99,13 +99,12 @@ export async function searchPlugins(
|
||||
break;
|
||||
case "installs":
|
||||
default:
|
||||
orderBy = "install_count DESC, p.created_at DESC";
|
||||
orderBy = "p.install_count DESC, p.created_at DESC";
|
||||
break;
|
||||
}
|
||||
|
||||
const query = `
|
||||
SELECT p.*, a.name AS author_name, a.avatar_url AS author_avatar_url, a.verified AS author_verified,
|
||||
(SELECT COUNT(*) FROM installs i WHERE i.plugin_id = p.id) AS install_count,
|
||||
lv.version AS latest_version,
|
||||
lv.status AS latest_status,
|
||||
lv.audit_verdict AS latest_audit_verdict,
|
||||
@@ -201,25 +200,25 @@ export async function getPluginVersion(
|
||||
|
||||
// ── Install queries ─────────────────────────────────────────────
|
||||
|
||||
export async function getInstallCount(db: D1Database, pluginId: string): Promise<number> {
|
||||
const row = await db
|
||||
.prepare("SELECT COUNT(*) AS count FROM installs WHERE plugin_id = ?")
|
||||
.bind(pluginId)
|
||||
.first<{ count: number }>();
|
||||
return row?.count ?? 0;
|
||||
}
|
||||
|
||||
export async function upsertInstall(
|
||||
db: D1Database,
|
||||
data: { pluginId: string; siteHash: string; version: string },
|
||||
): Promise<void> {
|
||||
await db
|
||||
// Run the install upsert and install_count recomputation together so the
|
||||
// plugin count stays consistent with the installs table.
|
||||
await db.batch([
|
||||
db
|
||||
.prepare(
|
||||
`INSERT INTO installs (plugin_id, site_hash, version) VALUES (?, ?, ?)
|
||||
ON CONFLICT (plugin_id, site_hash) DO UPDATE SET version = excluded.version, installed_at = datetime('now')`,
|
||||
)
|
||||
.bind(data.pluginId, data.siteHash, data.version)
|
||||
.run();
|
||||
.bind(data.pluginId, data.siteHash, data.version),
|
||||
db
|
||||
.prepare(
|
||||
`UPDATE plugins SET install_count = (SELECT COUNT(*) FROM installs WHERE plugin_id = ?) WHERE id = ?`,
|
||||
)
|
||||
.bind(data.pluginId, data.pluginId),
|
||||
]);
|
||||
}
|
||||
|
||||
// ── Write queries ───────────────────────────────────────────────
|
||||
|
||||
@@ -19,6 +19,7 @@ CREATE TABLE IF NOT EXISTS plugins (
|
||||
capabilities TEXT NOT NULL,
|
||||
keywords TEXT,
|
||||
has_icon INTEGER DEFAULT 0,
|
||||
install_count INTEGER NOT NULL DEFAULT 0,
|
||||
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
||||
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||
);
|
||||
|
||||
@@ -19,6 +19,7 @@ export interface PluginRow {
|
||||
capabilities: string;
|
||||
keywords: string | null;
|
||||
has_icon: number;
|
||||
install_count: number;
|
||||
created_at: string;
|
||||
updated_at: string;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
import { Hono } from "hono";
|
||||
|
||||
import {
|
||||
getInstallCount,
|
||||
getLatestVersion,
|
||||
getPluginVersion,
|
||||
getPluginVersions,
|
||||
@@ -100,7 +99,7 @@ publicRoutes.get("/plugins/:id", async (c) => {
|
||||
if (!plugin) return c.json({ error: "Plugin not found" }, 404);
|
||||
|
||||
const latestVersion = await getLatestVersion(c.env.DB, id);
|
||||
const installCount = await getInstallCount(c.env.DB, id);
|
||||
const installCount = plugin.install_count ?? 0;
|
||||
|
||||
const capabilities = safeJsonParse<string[]>(plugin.capabilities, []);
|
||||
const keywords = safeJsonParse<string[]>(plugin.keywords, []);
|
||||
|
||||
Reference in New Issue
Block a user