Emdash source with visual editor image upload fix
Fixes: 1. media.ts: wrap placeholder generation in try-catch 2. toolbar.ts: check r.ok, display error message in popover
This commit is contained in:
264
infra/perf-monitor/src/api.ts
Normal file
264
infra/perf-monitor/src/api.ts
Normal file
@@ -0,0 +1,264 @@
|
||||
/** HTTP API router for the perf monitor. */
|
||||
|
||||
import { runMeasurements } from "./measure.js";
|
||||
import {
|
||||
DEFAULT_SITE_ID,
|
||||
getSite,
|
||||
REGIONS,
|
||||
REGION_LABELS,
|
||||
SITES,
|
||||
TARGET_ROUTES,
|
||||
} from "./routes.js";
|
||||
import {
|
||||
queryResults,
|
||||
getLatestResults,
|
||||
getRollingMedians,
|
||||
getDeployResults,
|
||||
insertResults,
|
||||
type Source,
|
||||
} from "./store.js";
|
||||
|
||||
/** Route the request to the correct handler. */
|
||||
export async function handleApi(request: Request, url: URL, env: Env): Promise<Response | null> {
|
||||
const path = url.pathname;
|
||||
|
||||
if (path === "/api/results" && request.method === "GET") {
|
||||
return handleResults(url, env);
|
||||
}
|
||||
if (path === "/api/summary" && request.method === "GET") {
|
||||
return handleSummary(url, env);
|
||||
}
|
||||
if (path === "/api/chart" && request.method === "GET") {
|
||||
return handleChart(url, env);
|
||||
}
|
||||
if (path === "/api/config" && request.method === "GET") {
|
||||
return handleConfig();
|
||||
}
|
||||
if (path === "/api/trigger" && request.method === "POST") {
|
||||
return handleTrigger(request, env);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Narrow a query string to the allowed source values without a cast. */
|
||||
function parseSource(raw: string | null): Source | undefined {
|
||||
if (raw === "deploy" || raw === "cron" || raw === "manual") return raw;
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the requested site param against the known SITES list. Falls back
|
||||
* to the default site when absent so existing clients (dashboard) keep
|
||||
* working unchanged.
|
||||
*/
|
||||
function parseSiteParam(raw: string | null): string {
|
||||
if (raw && getSite(raw)) return raw;
|
||||
return DEFAULT_SITE_ID;
|
||||
}
|
||||
|
||||
/** GET /api/results?route=X®ion=Y&source=Z&site=W&since=ISO&limit=N */
|
||||
async function handleResults(url: URL, env: Env): Promise<Response> {
|
||||
const source = parseSource(url.searchParams.get("source"));
|
||||
const siteParam = url.searchParams.get("site");
|
||||
// Results is intentionally loose: no site param = return across all sites
|
||||
// (for raw tabular inspection). Summary/chart default to a single site.
|
||||
const site = siteParam && getSite(siteParam) ? siteParam : undefined;
|
||||
|
||||
const results = await queryResults(env.DB, {
|
||||
route: url.searchParams.get("route") ?? undefined,
|
||||
region: url.searchParams.get("region") ?? undefined,
|
||||
source,
|
||||
site,
|
||||
since: url.searchParams.get("since") ?? undefined,
|
||||
limit: url.searchParams.has("limit") ? parseInt(url.searchParams.get("limit")!, 10) : undefined,
|
||||
});
|
||||
|
||||
return Response.json({ results });
|
||||
}
|
||||
|
||||
/** GET /api/summary?site=X -- latest per route+region, rolling averages */
|
||||
async function handleSummary(url: URL, env: Env): Promise<Response> {
|
||||
const site = parseSiteParam(url.searchParams.get("site"));
|
||||
|
||||
const [latest, medians] = await Promise.all([
|
||||
getLatestResults(env.DB, site),
|
||||
getRollingMedians(env.DB, site),
|
||||
]);
|
||||
|
||||
return Response.json({
|
||||
site,
|
||||
latest,
|
||||
medians,
|
||||
config: {
|
||||
sites: SITES.map((s) => ({ id: s.id, label: s.label, targetUrl: s.targetUrl })),
|
||||
routes: TARGET_ROUTES,
|
||||
regions: REGIONS.map((r) => ({ id: r, label: REGION_LABELS[r] })),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/** GET /api/chart?route=X®ion=Y&site=W&since=ISO&limit=N -- time series data */
|
||||
async function handleChart(url: URL, env: Env): Promise<Response> {
|
||||
const route = url.searchParams.get("route");
|
||||
const region = url.searchParams.get("region");
|
||||
|
||||
if (!route || !region) {
|
||||
return Response.json({ error: "route and region are required" }, { status: 400 });
|
||||
}
|
||||
|
||||
const site = parseSiteParam(url.searchParams.get("site"));
|
||||
const since = url.searchParams.get("since") ?? undefined;
|
||||
const limit = url.searchParams.has("limit") ? parseInt(url.searchParams.get("limit")!, 10) : 200;
|
||||
|
||||
const [results, deployResults] = await Promise.all([
|
||||
queryResults(env.DB, { route, region, site, since, limit }),
|
||||
getDeployResults(env.DB, site, since),
|
||||
]);
|
||||
|
||||
// Query returns DESC -- reverse to chronological. Manual (ad-hoc) runs are
|
||||
// stripped from the graph so they don't create visual noise; they still
|
||||
// appear in the /api/results table.
|
||||
const graphResults = results.filter((r) => r.source !== "manual").toReversed();
|
||||
|
||||
// Deduplicate deploy results by SHA — multiple route/region combos produce
|
||||
// duplicates, but we only want one marker per deploy on the chart.
|
||||
const seenShas = new Set<string>();
|
||||
const deployMarkers = deployResults
|
||||
.filter((r) => {
|
||||
if (!r.sha) return false;
|
||||
if (r.route !== route || r.region !== region) return false;
|
||||
if (seenShas.has(r.sha)) return false;
|
||||
seenShas.add(r.sha);
|
||||
return true;
|
||||
})
|
||||
.map((r) => ({
|
||||
timestamp: r.timestamp,
|
||||
prNumber: r.pr_number,
|
||||
sha: r.sha,
|
||||
coldTtfbMs: r.cold_ttfb_ms,
|
||||
}));
|
||||
|
||||
return Response.json({
|
||||
route,
|
||||
region,
|
||||
site,
|
||||
data: graphResults.map((r) => ({
|
||||
timestamp: r.timestamp,
|
||||
coldTtfbMs: r.cold_ttfb_ms,
|
||||
warmTtfbMs: r.warm_ttfb_ms,
|
||||
p95TtfbMs: r.p95_ttfb_ms,
|
||||
source: r.source,
|
||||
sha: r.sha,
|
||||
prNumber: r.pr_number,
|
||||
})),
|
||||
deployMarkers,
|
||||
});
|
||||
}
|
||||
|
||||
/** GET /api/config -- available sites, routes, and regions */
|
||||
async function handleConfig(): Promise<Response> {
|
||||
return Response.json({
|
||||
sites: SITES.map((s) => ({ id: s.id, label: s.label, targetUrl: s.targetUrl })),
|
||||
defaultSite: DEFAULT_SITE_ID,
|
||||
routes: TARGET_ROUTES,
|
||||
regions: REGIONS.map((r) => ({ id: r, label: REGION_LABELS[r] })),
|
||||
});
|
||||
}
|
||||
|
||||
/** Accept short abbreviated or full-length hex SHAs. */
|
||||
const SHA_RE = /^[a-f0-9]{7,40}$/i;
|
||||
|
||||
/**
|
||||
* POST /api/trigger -- run an ad-hoc measurement, optionally record it.
|
||||
*
|
||||
* Body (all optional):
|
||||
* {
|
||||
* "note"?: string,
|
||||
* "sha"?: string,
|
||||
* "prNumber"?: number,
|
||||
* "ephemeral"?: boolean, // if true, run the probes but don't persist
|
||||
* "site"?: string // site id; omit to measure every site
|
||||
* }
|
||||
*
|
||||
* No auth in-Worker: this endpoint is expected to be protected by a
|
||||
* Cloudflare Access policy at the edge. If Access misroutes or is
|
||||
* misconfigured, the request will still run measurements -- keep Access
|
||||
* scoped tightly to POST /api/trigger.
|
||||
*
|
||||
* Persisted runs are tagged source=manual and are excluded from the
|
||||
* dashboard graph and summary cards but appear in the results table with
|
||||
* a "manual" badge. Ephemeral runs run the probes for real but skip the
|
||||
* insert entirely -- useful for private/local checks that shouldn't
|
||||
* appear on the dashboard at all.
|
||||
*/
|
||||
async function handleTrigger(request: Request, env: Env): Promise<Response> {
|
||||
let body: {
|
||||
note?: unknown;
|
||||
sha?: unknown;
|
||||
prNumber?: unknown;
|
||||
ephemeral?: unknown;
|
||||
site?: unknown;
|
||||
} = {};
|
||||
const contentLength = request.headers.get("content-length");
|
||||
if (contentLength && contentLength !== "0") {
|
||||
try {
|
||||
body = await request.json();
|
||||
} catch {
|
||||
return Response.json({ error: "invalid JSON body" }, { status: 400 });
|
||||
}
|
||||
}
|
||||
|
||||
const note = typeof body.note === "string" && body.note.trim() !== "" ? body.note.trim() : null;
|
||||
const sha = typeof body.sha === "string" && SHA_RE.test(body.sha) ? body.sha : null;
|
||||
const prNumber =
|
||||
typeof body.prNumber === "number" && Number.isInteger(body.prNumber) && body.prNumber > 0
|
||||
? body.prNumber
|
||||
: null;
|
||||
const ephemeral = body.ephemeral === true;
|
||||
|
||||
let sites = SITES;
|
||||
if (typeof body.site === "string") {
|
||||
const match = getSite(body.site);
|
||||
if (!match) {
|
||||
return Response.json(
|
||||
{ error: `unknown site "${body.site}"; valid: ${SITES.map((s) => s.id).join(", ")}` },
|
||||
{ status: 400 },
|
||||
);
|
||||
}
|
||||
sites = [match];
|
||||
}
|
||||
|
||||
const started = Date.now();
|
||||
const results = await runMeasurements(env, { source: "manual", sha, prNumber, note, sites });
|
||||
|
||||
if (results.length === 0) {
|
||||
return Response.json({ error: "no measurements returned from probes" }, { status: 502 });
|
||||
}
|
||||
|
||||
if (!ephemeral) {
|
||||
await insertResults(env.DB, results);
|
||||
}
|
||||
|
||||
return Response.json({
|
||||
inserted: ephemeral ? 0 : results.length,
|
||||
ephemeral,
|
||||
durationMs: Date.now() - started,
|
||||
note,
|
||||
sha,
|
||||
prNumber,
|
||||
sites: sites.map((s) => s.id),
|
||||
// Echo the structured result so the CLI can print it without a follow-up query.
|
||||
results: results.map((r) => ({
|
||||
site: r.site,
|
||||
route: r.route,
|
||||
region: r.region,
|
||||
coldTtfbMs: r.coldTtfbMs,
|
||||
warmTtfbMs: r.warmTtfbMs,
|
||||
p95TtfbMs: r.p95TtfbMs,
|
||||
cfColo: r.cfColo,
|
||||
coldServerTimings: r.coldServerTimings,
|
||||
warmServerTimings: r.warmServerTimings,
|
||||
})),
|
||||
});
|
||||
}
|
||||
59
infra/perf-monitor/src/events.ts
Normal file
59
infra/perf-monitor/src/events.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
/**
|
||||
* Type definitions for Cloudflare event subscription messages.
|
||||
* See: https://developers.cloudflare.com/queues/event-subscriptions/events-schemas/
|
||||
*/
|
||||
|
||||
/** Workers Builds `build.succeeded` event. */
|
||||
export interface BuildSucceededEvent {
|
||||
type: "cf.workersBuilds.worker.build.succeeded";
|
||||
source: {
|
||||
type: "workersBuilds.worker";
|
||||
workerName: string;
|
||||
};
|
||||
payload: {
|
||||
buildUuid: string;
|
||||
status: "success";
|
||||
buildOutcome: "success";
|
||||
createdAt: string;
|
||||
initializingAt: string;
|
||||
runningAt: string;
|
||||
stoppedAt: string;
|
||||
buildTriggerMetadata: {
|
||||
buildTriggerSource: string;
|
||||
branch: string;
|
||||
commitHash: string;
|
||||
commitMessage: string;
|
||||
author: string;
|
||||
buildCommand: string;
|
||||
deployCommand: string;
|
||||
rootDirectory: string;
|
||||
repoName: string;
|
||||
providerAccountName: string;
|
||||
providerType: string;
|
||||
};
|
||||
};
|
||||
metadata: {
|
||||
accountId: string;
|
||||
eventSubscriptionId: string;
|
||||
eventSchemaVersion: number;
|
||||
eventTimestamp: string;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Other event types we may receive from the subscription but ignore.
|
||||
* Kept loose (string `type`) so we don't block on schema updates.
|
||||
*/
|
||||
export interface UnknownEvent {
|
||||
type: string;
|
||||
source?: unknown;
|
||||
payload?: unknown;
|
||||
metadata?: unknown;
|
||||
}
|
||||
|
||||
export type PerfQueueMessage = BuildSucceededEvent | UnknownEvent;
|
||||
|
||||
/** Type guard for the only event we actually act on. */
|
||||
export function isBuildSucceeded(event: PerfQueueMessage): event is BuildSucceededEvent {
|
||||
return event.type === "cf.workersBuilds.worker.build.succeeded";
|
||||
}
|
||||
77
infra/perf-monitor/src/github.ts
Normal file
77
infra/perf-monitor/src/github.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
/**
|
||||
* GitHub API helpers for resolving a commit SHA to a merged PR number.
|
||||
*
|
||||
* Uses the "list pull requests associated with a commit" endpoint:
|
||||
* https://docs.github.com/en/rest/commits/commits#list-pull-requests-associated-with-a-commit
|
||||
*
|
||||
* Called unauthenticated. The public repo endpoint has a 60 req/hr limit per IP,
|
||||
* which is far more than our deploy rate. If that ever changes, add a token:
|
||||
* `headers.authorization = "Bearer " + env.GITHUB_TOKEN`.
|
||||
*/
|
||||
|
||||
import { GITHUB_REPO } from "./routes.js";
|
||||
|
||||
interface AssociatedPR {
|
||||
number: number;
|
||||
state: string;
|
||||
merged_at: string | null;
|
||||
base: { ref: string };
|
||||
}
|
||||
const PR_NUMBER_REGEX = /\(#(\d+)\)\s*$/;
|
||||
/**
|
||||
* Parse a PR number from a commit message. GitHub squash merges append the PR
|
||||
* number in parentheses, e.g. "feat: add feature (#123)".
|
||||
*/
|
||||
function parsePrFromMessage(commitMessage: string): number | null {
|
||||
const match = commitMessage.match(PR_NUMBER_REGEX);
|
||||
if (!match?.[1]) return null;
|
||||
return parseInt(match[1], 10);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the merged PR for a given commit SHA, if any.
|
||||
*
|
||||
* Strategy:
|
||||
* 1. Parse the commit message for `(#N)` — works for squash merges (the common case).
|
||||
* 2. Fall back to the GitHub "list PRs for a commit" API — works for merge commits.
|
||||
*
|
||||
* Returns null if no PR exists (e.g. direct push to main) or the lookup fails.
|
||||
*/
|
||||
export async function resolvePrForSha(sha: string, commitMessage?: string): Promise<number | null> {
|
||||
if (commitMessage) {
|
||||
const fromMessage = parsePrFromMessage(commitMessage);
|
||||
if (fromMessage) return fromMessage;
|
||||
}
|
||||
|
||||
const url = `https://api.github.com/repos/${GITHUB_REPO}/commits/${sha}/pulls`;
|
||||
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetch(url, {
|
||||
headers: {
|
||||
accept: "application/vnd.github+json",
|
||||
"user-agent": "emdash-perf-monitor",
|
||||
"x-github-api-version": "2022-11-28",
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
console.error("PR lookup failed:", err);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
console.warn(`PR lookup for ${sha} returned ${response.status}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
const prs = await response.json<AssociatedPR[]>();
|
||||
|
||||
// Prefer a merged PR targeting main. Fall back to any merged PR.
|
||||
const mainPr = prs.find((p) => p.merged_at && p.base.ref === "main");
|
||||
if (mainPr) return mainPr.number;
|
||||
|
||||
const anyMerged = prs.find((p) => p.merged_at);
|
||||
if (anyMerged) return anyMerged.number;
|
||||
|
||||
return null;
|
||||
}
|
||||
119
infra/perf-monitor/src/index.ts
Normal file
119
infra/perf-monitor/src/index.ts
Normal file
@@ -0,0 +1,119 @@
|
||||
/**
|
||||
* Perf monitor coordinator Worker.
|
||||
*
|
||||
* Triggers:
|
||||
* - Queue consumer: fires on every `build.succeeded` event from Cloudflare's event
|
||||
* subscriptions. We filter for the demo Worker and run measurements tagged with
|
||||
* the deploy's commit SHA. This is the primary deploy-attribution path.
|
||||
* - Cron (every 30 min): ambient baseline. Runs untagged; fills gaps between deploys
|
||||
* and catches drift the queue might miss (subscription downtime, DLQ, etc).
|
||||
* - POST /api/trigger: ad-hoc manual measurement, tagged `source=manual`.
|
||||
* Expected to be protected by a Cloudflare Access policy at the edge.
|
||||
*
|
||||
* HTTP endpoints other than /api/trigger are read-only: JSON API at /api/* and
|
||||
* the static dashboard at /.
|
||||
*/
|
||||
|
||||
import { handleApi } from "./api.js";
|
||||
import type { PerfQueueMessage } from "./events.js";
|
||||
import { isBuildSucceeded } from "./events.js";
|
||||
import { resolvePrForSha } from "./github.js";
|
||||
import { runMeasurements } from "./measure.js";
|
||||
import { TRIGGER_WORKER_NAME } from "./routes.js";
|
||||
import { insertResults } from "./store.js";
|
||||
|
||||
/**
|
||||
* Handle a single build-succeeded event: filter for the demo Worker, resolve
|
||||
* the PR number via GitHub, run measurements, persist. Errors are swallowed
|
||||
* so one bad message doesn't poison the batch.
|
||||
*/
|
||||
async function handleBuildSucceeded(
|
||||
env: Env,
|
||||
event: Extract<PerfQueueMessage, { type: "cf.workersBuilds.worker.build.succeeded" }>,
|
||||
): Promise<void> {
|
||||
const workerName = event.source.workerName;
|
||||
if (workerName !== TRIGGER_WORKER_NAME) {
|
||||
// Not our trigger worker -- ignore. Both demos build from the same
|
||||
// commit, so one event covers both sites; measuring on every known
|
||||
// worker's event would double our load without adding signal.
|
||||
return;
|
||||
}
|
||||
|
||||
const meta = event.payload.buildTriggerMetadata;
|
||||
if (meta.branch !== "main") {
|
||||
// Only measure main-branch deploys.
|
||||
return;
|
||||
}
|
||||
|
||||
const sha = meta.commitHash;
|
||||
if (!sha) {
|
||||
console.warn("build.succeeded event missing commitHash; skipping");
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`Running deploy-triggered measurement for ${workerName} @ ${sha.slice(0, 7)}`);
|
||||
|
||||
const prNumber = await resolvePrForSha(sha, meta.commitMessage);
|
||||
const results = await runMeasurements(env, { source: "deploy", sha, prNumber });
|
||||
|
||||
if (results.length > 0) {
|
||||
await insertResults(env.DB, results);
|
||||
console.log(
|
||||
`Stored ${results.length} deploy measurements for ${sha.slice(0, 7)}${prNumber ? ` (PR #${prNumber})` : ""}`,
|
||||
);
|
||||
} else {
|
||||
console.warn(`No measurements returned for ${sha.slice(0, 7)}`);
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
async fetch(request: Request, env: Env): Promise<Response> {
|
||||
const url = new URL(request.url);
|
||||
|
||||
const apiResponse = await handleApi(request, url, env);
|
||||
if (apiResponse) return apiResponse;
|
||||
|
||||
// Anything else falls through to Workers Assets for the dashboard.
|
||||
return new Response("Not found", { status: 404 });
|
||||
},
|
||||
|
||||
async scheduled(
|
||||
controller: ScheduledController,
|
||||
env: Env,
|
||||
_ctx: ExecutionContext,
|
||||
): Promise<void> {
|
||||
console.log(`Cron triggered at ${new Date(controller.scheduledTime).toISOString()}`);
|
||||
|
||||
const results = await runMeasurements(env, { source: "cron" });
|
||||
|
||||
if (results.length > 0) {
|
||||
await insertResults(env.DB, results);
|
||||
console.log(`Stored ${results.length} cron measurements`);
|
||||
} else {
|
||||
console.warn("No measurements returned from probes");
|
||||
}
|
||||
},
|
||||
|
||||
async queue(batch: MessageBatch<PerfQueueMessage>, env: Env): Promise<void> {
|
||||
// Messages are processed sequentially to avoid hammering the demo with
|
||||
// parallel measurement runs (each one issues N requests per region).
|
||||
// A batch of deploy events for different Workers is rare but possible.
|
||||
for (const message of batch.messages) {
|
||||
try {
|
||||
const event = message.body;
|
||||
if (!isBuildSucceeded(event)) {
|
||||
// Event type we don't care about (build.started, build.failed, etc).
|
||||
// Ack silently.
|
||||
message.ack();
|
||||
continue;
|
||||
}
|
||||
await handleBuildSucceeded(env, event);
|
||||
message.ack();
|
||||
} catch (err) {
|
||||
console.error("Failed to process queue message:", err);
|
||||
// Retry -- exhausted retries send to the DLQ configured in wrangler.jsonc.
|
||||
message.retry();
|
||||
}
|
||||
}
|
||||
},
|
||||
} satisfies ExportedHandler<Env, PerfQueueMessage>;
|
||||
101
infra/perf-monitor/src/measure.ts
Normal file
101
infra/perf-monitor/src/measure.ts
Normal file
@@ -0,0 +1,101 @@
|
||||
/** Orchestrates a measurement run across all regional probes. */
|
||||
|
||||
import type { MeasureResponse } from "../probe/src/measure.js";
|
||||
import { REGIONS, SITES, TARGET_ROUTES, WARM_REQUESTS } from "./routes.js";
|
||||
import type { Region, Site } from "./routes.js";
|
||||
import type { InsertParams, Source } from "./store.js";
|
||||
|
||||
const PROBE_BINDINGS: Record<
|
||||
Region,
|
||||
keyof Pick<Env, "PROBE_USE" | "PROBE_EUW" | "PROBE_APE" | "PROBE_APS">
|
||||
> = {
|
||||
use: "PROBE_USE",
|
||||
euw: "PROBE_EUW",
|
||||
ape: "PROBE_APE",
|
||||
aps: "PROBE_APS",
|
||||
};
|
||||
|
||||
function generateId(): string {
|
||||
const bytes = new Uint8Array(16);
|
||||
crypto.getRandomValues(bytes);
|
||||
return Array.from(bytes, (b) => b.toString(16).padStart(2, "0")).join("");
|
||||
}
|
||||
|
||||
/** Options for {@link runMeasurements} beyond the source tag. */
|
||||
export interface RunOptions {
|
||||
source: Source;
|
||||
sha?: string | null;
|
||||
prNumber?: number | null;
|
||||
note?: string | null;
|
||||
/**
|
||||
* Sites to measure. Defaults to every site in {@link SITES}. Pass a subset
|
||||
* when a caller wants to target only one deployment (e.g. manual triggers).
|
||||
*/
|
||||
sites?: readonly Site[];
|
||||
}
|
||||
|
||||
/** Dispatch measurements to all regional probes in parallel, for every site. */
|
||||
export async function runMeasurements(env: Env, opts: RunOptions): Promise<InsertParams[]> {
|
||||
const { source, sha = null, prNumber = null, note = null, sites = SITES } = opts;
|
||||
|
||||
// Fan out across (site × region). We run all probes in parallel -- each one
|
||||
// issues N requests per route on its own, so the measurement load on the
|
||||
// demos is bounded regardless of how many sites we have.
|
||||
const probePromises = sites.flatMap((site) =>
|
||||
REGIONS.map(async (region) => {
|
||||
const binding = PROBE_BINDINGS[region];
|
||||
const probe = env[binding];
|
||||
const payload = {
|
||||
targetUrl: site.targetUrl,
|
||||
routes: TARGET_ROUTES.map((r) => ({ path: r.path, label: r.label })),
|
||||
warmRequests: WARM_REQUESTS,
|
||||
region,
|
||||
};
|
||||
|
||||
try {
|
||||
const response = await probe.fetch("https://probe/measure", {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errText = await response.text();
|
||||
console.error(
|
||||
`Probe ${region} failed for site=${site.id}: ${response.status} ${errText}`,
|
||||
);
|
||||
return [];
|
||||
}
|
||||
|
||||
const data = await response.json<MeasureResponse>();
|
||||
|
||||
return data.results.map(
|
||||
(r): InsertParams => ({
|
||||
id: generateId(),
|
||||
sha,
|
||||
prNumber,
|
||||
route: r.path,
|
||||
region,
|
||||
coldTtfbMs: r.coldTtfbMs,
|
||||
warmTtfbMs: r.warmTtfbMs,
|
||||
p95TtfbMs: r.p95TtfbMs,
|
||||
statusCode: r.statusCode,
|
||||
cfColo: r.cfColo,
|
||||
cfPlacement: r.cfPlacement,
|
||||
coldServerTimings: r.coldServerTimings,
|
||||
warmServerTimings: r.warmServerTimings,
|
||||
note,
|
||||
source,
|
||||
site: site.id,
|
||||
}),
|
||||
);
|
||||
} catch (err) {
|
||||
console.error(`Probe ${region} error for site=${site.id}:`, err);
|
||||
return [];
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
const allResults = await Promise.all(probePromises);
|
||||
return allResults.flat();
|
||||
}
|
||||
111
infra/perf-monitor/src/routes.ts
Normal file
111
infra/perf-monitor/src/routes.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
/** Target routes to measure and their thresholds. */
|
||||
|
||||
export interface TargetRoute {
|
||||
path: string;
|
||||
label: string;
|
||||
/** Cold TTFB threshold in ms -- CI fails if exceeded. */
|
||||
coldThresholdMs: number;
|
||||
/**
|
||||
* HTTP status codes considered valid for this route. If a measurement returns
|
||||
* something outside this set, the CI trigger marks it as a sanity-check failure.
|
||||
* Measuring a 404 or 500 response tells us nothing about real-world perf -- the
|
||||
* route is either broken or has drifted (e.g. a referenced post was deleted).
|
||||
*
|
||||
* Note: the probe follows redirects, so this describes the final response status.
|
||||
* `/_emdash/admin` 302s to the login page (200), so 200 covers it.
|
||||
*/
|
||||
expectedStatuses: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* A deployed demo we measure. Sites share the same route set and are compared
|
||||
* head-to-head on the dashboard. `blog` is the baseline; `cache` runs with
|
||||
* Astro's experimental cache provider enabled.
|
||||
*/
|
||||
export interface Site {
|
||||
/** Stable slug stored in `perf_results.site`. */
|
||||
id: string;
|
||||
label: string;
|
||||
targetUrl: string;
|
||||
/** Cloudflare Worker name — matched against build.succeeded events. */
|
||||
workerName: string;
|
||||
}
|
||||
|
||||
export const SITES: readonly Site[] = [
|
||||
{
|
||||
id: "blog",
|
||||
label: "Baseline",
|
||||
targetUrl: "https://blog-demo.emdashcms.com",
|
||||
workerName: "emdash-demo-blog",
|
||||
},
|
||||
{
|
||||
id: "cache",
|
||||
label: "Astro cache",
|
||||
targetUrl: "https://cache-demo.emdashcms.com",
|
||||
workerName: "emdash-demo-cache",
|
||||
},
|
||||
] as const;
|
||||
|
||||
export const DEFAULT_SITE_ID = "blog";
|
||||
|
||||
export function getSite(id: string): Site | undefined {
|
||||
return SITES.find((s) => s.id === id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Worker name whose build.succeeded events drive deploy-attributed
|
||||
* measurements. Both sites build from the same repo on every main-branch
|
||||
* commit, so measuring on the baseline worker's event covers both (see
|
||||
* `handleBuildSucceeded`). If only cache-demo deploys (rare), the cron
|
||||
* job will catch it on the next tick.
|
||||
*/
|
||||
export const TRIGGER_WORKER_NAME = "emdash-demo-blog";
|
||||
|
||||
/**
|
||||
* GitHub repo used for PR number lookup. SHA -> merged PR resolution happens
|
||||
* via the GitHub API when a deploy event arrives.
|
||||
*/
|
||||
export const GITHUB_REPO = "emdash-cms/emdash";
|
||||
|
||||
/**
|
||||
* Routes we measure. Each exercises a different code path on the demo:
|
||||
* - "/" hits the homepage template and queries the latest posts
|
||||
* - "/posts/<slug>" renders a single post (different template + single-row fetch)
|
||||
* - "/_emdash/admin" returns a redirect from the admin root -- measures auth middleware latency
|
||||
*
|
||||
* We avoid `/_emdash/api/content/*` -- it requires auth and returns 401 immediately,
|
||||
* which doesn't reflect real query latency.
|
||||
*/
|
||||
export const TARGET_ROUTES: TargetRoute[] = [
|
||||
{
|
||||
path: "/",
|
||||
label: "Homepage",
|
||||
coldThresholdMs: 2000,
|
||||
expectedStatuses: [200],
|
||||
},
|
||||
{
|
||||
path: "/posts/marshland-birds-at-the-lake-havasu-national-wildlife-refuge",
|
||||
label: "Single Post",
|
||||
coldThresholdMs: 2000,
|
||||
expectedStatuses: [200],
|
||||
},
|
||||
{
|
||||
path: "/_emdash/admin",
|
||||
label: "Admin (login page)",
|
||||
coldThresholdMs: 1500,
|
||||
expectedStatuses: [200],
|
||||
},
|
||||
];
|
||||
|
||||
export const REGIONS = ["use", "euw", "ape", "aps"] as const;
|
||||
export type Region = (typeof REGIONS)[number];
|
||||
|
||||
export const REGION_LABELS: Record<Region, string> = {
|
||||
use: "US East",
|
||||
euw: "Europe West",
|
||||
ape: "Asia Pacific East",
|
||||
aps: "Asia Pacific South",
|
||||
};
|
||||
|
||||
/** Number of warm requests per route (we take the median). */
|
||||
export const WARM_REQUESTS = 5;
|
||||
245
infra/perf-monitor/src/store.ts
Normal file
245
infra/perf-monitor/src/store.ts
Normal file
@@ -0,0 +1,245 @@
|
||||
/** D1 storage layer for perf results. */
|
||||
|
||||
/** All valid values for the `source` column. */
|
||||
export type Source = "deploy" | "cron" | "manual";
|
||||
|
||||
export interface PerfResult {
|
||||
id: string;
|
||||
sha: string | null;
|
||||
pr_number: number | null;
|
||||
route: string;
|
||||
region: string;
|
||||
cold_ttfb_ms: number | null;
|
||||
warm_ttfb_ms: number | null;
|
||||
p95_ttfb_ms: number | null;
|
||||
status_code: number | null;
|
||||
cf_colo: string | null;
|
||||
cf_placement: string | null;
|
||||
/** Raw JSON string as stored. Use {@link parseColdServerTimings} to decode. */
|
||||
cold_server_timings: string | null;
|
||||
/**
|
||||
* Median duration per metric across warm requests, same JSON shape as
|
||||
* `cold_server_timings`. Null when the target didn't emit Server-Timing
|
||||
* on warm responses, or when no warm requests were issued.
|
||||
*/
|
||||
warm_server_timings: string | null;
|
||||
note: string | null;
|
||||
timestamp: string;
|
||||
source: string;
|
||||
site: string;
|
||||
}
|
||||
|
||||
export interface InsertParams {
|
||||
id: string;
|
||||
sha: string | null;
|
||||
prNumber: number | null;
|
||||
route: string;
|
||||
region: string;
|
||||
coldTtfbMs: number | null;
|
||||
warmTtfbMs: number | null;
|
||||
p95TtfbMs: number | null;
|
||||
statusCode: number | null;
|
||||
cfColo: string | null;
|
||||
cfPlacement: string | null;
|
||||
/** Will be JSON.stringify'd on the way in. Null if unavailable. */
|
||||
coldServerTimings: Record<string, { dur: number; desc?: string }> | null;
|
||||
/** Median-per-metric snapshot of warm Server-Timing. Null if unavailable. */
|
||||
warmServerTimings: Record<string, { dur: number; desc?: string }> | null;
|
||||
note: string | null;
|
||||
source: Source;
|
||||
site: string;
|
||||
}
|
||||
|
||||
/** Column list shared between insertResult and insertResults. */
|
||||
const INSERT_COLUMNS =
|
||||
"id, sha, pr_number, route, region, cold_ttfb_ms, warm_ttfb_ms, p95_ttfb_ms, status_code, cf_colo, cf_placement, cold_server_timings, warm_server_timings, note, source, site";
|
||||
const INSERT_PLACEHOLDERS = "?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?";
|
||||
|
||||
function bindInsert(stmt: D1PreparedStatement, p: InsertParams): D1PreparedStatement {
|
||||
return stmt.bind(
|
||||
p.id,
|
||||
p.sha,
|
||||
p.prNumber,
|
||||
p.route,
|
||||
p.region,
|
||||
p.coldTtfbMs,
|
||||
p.warmTtfbMs,
|
||||
p.p95TtfbMs,
|
||||
p.statusCode,
|
||||
p.cfColo,
|
||||
p.cfPlacement,
|
||||
p.coldServerTimings ? JSON.stringify(p.coldServerTimings) : null,
|
||||
p.warmServerTimings ? JSON.stringify(p.warmServerTimings) : null,
|
||||
p.note,
|
||||
p.source,
|
||||
p.site,
|
||||
);
|
||||
}
|
||||
|
||||
/** Insert a single measurement result. */
|
||||
export async function insertResult(db: D1Database, params: InsertParams): Promise<void> {
|
||||
await bindInsert(
|
||||
db.prepare(`INSERT INTO perf_results (${INSERT_COLUMNS}) VALUES (${INSERT_PLACEHOLDERS})`),
|
||||
params,
|
||||
).run();
|
||||
}
|
||||
|
||||
/** Insert a batch of results in a single transaction. */
|
||||
export async function insertResults(db: D1Database, results: InsertParams[]): Promise<void> {
|
||||
const stmt = db.prepare(
|
||||
`INSERT INTO perf_results (${INSERT_COLUMNS}) VALUES (${INSERT_PLACEHOLDERS})`,
|
||||
);
|
||||
await db.batch(results.map((p) => bindInsert(stmt, p)));
|
||||
}
|
||||
|
||||
export interface QueryParams {
|
||||
route?: string;
|
||||
region?: string;
|
||||
source?: Source;
|
||||
site?: string;
|
||||
since?: string;
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize an ISO-8601 timestamp (e.g. "2026-04-20T05:00:00.000Z") to the
|
||||
* " "-separated form D1's `datetime('now')` writes ("2026-04-20 05:00:00").
|
||||
*
|
||||
* SQLite compares TEXT lexicographically: space (0x20) sorts before "T"
|
||||
* (0x54). If we pass the client's ISO string straight into `timestamp >= ?`,
|
||||
* any stored row whose calendar date matches the since-boundary compares
|
||||
* LESS than since regardless of its actual time, so same-day filters (1h,
|
||||
* and the "today" portion of 24h) silently return zero rows.
|
||||
*/
|
||||
const SINCE_TIMESTAMP_RE = /^(\d{4}-\d{2}-\d{2})[T ](\d{2}:\d{2}:\d{2})/;
|
||||
|
||||
function normalizeSince(since: string): string {
|
||||
const match = SINCE_TIMESTAMP_RE.exec(since);
|
||||
return match ? `${match[1]} ${match[2]}` : since;
|
||||
}
|
||||
|
||||
/** Query historical results with optional filters. */
|
||||
export async function queryResults(db: D1Database, params: QueryParams): Promise<PerfResult[]> {
|
||||
const conditions: string[] = [];
|
||||
const bindings: (string | number)[] = [];
|
||||
|
||||
if (params.route) {
|
||||
conditions.push("route = ?");
|
||||
bindings.push(params.route);
|
||||
}
|
||||
if (params.region) {
|
||||
conditions.push("region = ?");
|
||||
bindings.push(params.region);
|
||||
}
|
||||
if (params.source) {
|
||||
conditions.push("source = ?");
|
||||
bindings.push(params.source);
|
||||
}
|
||||
if (params.site) {
|
||||
conditions.push("site = ?");
|
||||
bindings.push(params.site);
|
||||
}
|
||||
if (params.since) {
|
||||
conditions.push("timestamp >= ?");
|
||||
bindings.push(normalizeSince(params.since));
|
||||
}
|
||||
|
||||
const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
|
||||
const limit = Math.min(params.limit ?? 500, 1000);
|
||||
|
||||
const query = `SELECT * FROM perf_results ${where} ORDER BY timestamp DESC LIMIT ?`;
|
||||
bindings.push(limit);
|
||||
|
||||
const result = await db
|
||||
.prepare(query)
|
||||
.bind(...bindings)
|
||||
.all<PerfResult>();
|
||||
return result.results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the latest result per route/region combo for a given site.
|
||||
* Manual runs are excluded -- they're ad-hoc probes and would otherwise
|
||||
* poison the dashboard's "current state" cards whenever one was the most
|
||||
* recent sample.
|
||||
*/
|
||||
export async function getLatestResults(db: D1Database, site: string): Promise<PerfResult[]> {
|
||||
const result = await db
|
||||
.prepare(
|
||||
`SELECT p.* FROM perf_results p
|
||||
INNER JOIN (
|
||||
SELECT route, region, MAX(timestamp) as max_ts
|
||||
FROM perf_results
|
||||
WHERE source != 'manual' AND site = ?
|
||||
GROUP BY route, region
|
||||
) latest ON p.route = latest.route AND p.region = latest.region AND p.timestamp = latest.max_ts
|
||||
WHERE p.source != 'manual' AND p.site = ?
|
||||
ORDER BY p.region, p.route`,
|
||||
)
|
||||
.bind(site, site)
|
||||
.all<PerfResult>();
|
||||
return result.results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get rolling medians for each route/region over the last N days for a given site.
|
||||
* Manual runs are excluded so ad-hoc probes don't pull the baseline around.
|
||||
*/
|
||||
export async function getRollingMedians(
|
||||
db: D1Database,
|
||||
site: string,
|
||||
days: number = 7,
|
||||
): Promise<
|
||||
Array<{ route: string; region: string; median_cold: number; median_warm: number; count: number }>
|
||||
> {
|
||||
const result = await db
|
||||
.prepare(
|
||||
`SELECT
|
||||
route,
|
||||
region,
|
||||
COUNT(*) as count,
|
||||
-- SQLite doesn't have PERCENTILE_CONT, so we approximate with AVG of middle values
|
||||
AVG(cold_ttfb_ms) as median_cold,
|
||||
AVG(warm_ttfb_ms) as median_warm
|
||||
FROM perf_results
|
||||
WHERE timestamp >= datetime('now', ?)
|
||||
AND cold_ttfb_ms IS NOT NULL
|
||||
AND source != 'manual'
|
||||
AND site = ?
|
||||
GROUP BY route, region
|
||||
ORDER BY region, route`,
|
||||
)
|
||||
.bind(`-${days} days`, site)
|
||||
.all<{
|
||||
route: string;
|
||||
region: string;
|
||||
median_cold: number;
|
||||
median_warm: number;
|
||||
count: number;
|
||||
}>();
|
||||
return result.results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all deploy-triggered results (with SHA and PR info) for chart markers.
|
||||
* Only 'deploy' source has SHA attribution -- 'cron' is untagged baseline.
|
||||
*/
|
||||
export async function getDeployResults(
|
||||
db: D1Database,
|
||||
site: string,
|
||||
since?: string,
|
||||
): Promise<PerfResult[]> {
|
||||
const sinceClause = since ? "AND timestamp >= ?" : "";
|
||||
const bindings: string[] = [site];
|
||||
if (since) bindings.push(normalizeSince(since));
|
||||
|
||||
const result = await db
|
||||
.prepare(
|
||||
`SELECT * FROM perf_results
|
||||
WHERE source = 'deploy' AND site = ? ${sinceClause}
|
||||
ORDER BY timestamp ASC`,
|
||||
)
|
||||
.bind(...bindings)
|
||||
.all<PerfResult>();
|
||||
return result.results;
|
||||
}
|
||||
Reference in New Issue
Block a user