From 47992f48dd332b537fe76800c605c72d9eab564e Mon Sep 17 00:00:00 2001
From: Will Chen <willchen90@gmail.com>
Date: Mon, 15 Dec 2025 23:24:08 -0800
Subject: [PATCH] Leave GitHub comment with playwright results (#1965)

<!-- CURSOR_SUMMARY -->
> [!NOTE]
> Posts a per-OS Playwright test summary as a PR comment by adding a
JSON reporter and a CI step to generate and publish the results.
>
> - **CI / Reporting**
> - Add permissions and a GitHub Script step in
`.github/workflows/ci.yml` to generate and post/update a Playwright test
summary comment after merging shard reports.
>   - Upload merged HTML report artifact and link to full run.
> - **Playwright config**
> - Update `merge.config.ts` to add JSON reporter output to
`playwright-report/results.json` alongside HTML.
> - **New Script**
> - Add `scripts/generate-playwright-summary.js` to parse Playwright
JSON, compute per-OS (macOS/Windows) pass/fail/flaky/skipped counts,
list top failures/flaky tests, and write both PR comment and job
summary.
>
> <sup>Written by [Cursor
Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit
d5ca6987f65e9a7063533960382516af89e67391. This will update automatically
on new commits. Configure
[here](https://cursor.com/dashboard?tab=bugbot).</sup>
<!-- /CURSOR_SUMMARY -->





<!-- This is an auto-generated description by cubic. -->
---
## Summary by cubic
Posts Playwright test results as a PR comment with a per-OS summary,
flaky test counts, and top failures, improving CI visibility across
macOS and Windows. Adds a JSON reporter and a GitHub Script step that
links to the full report.

- **New Features**
  - Adds JSON reporter output to playwright-report/results.json.
- Adds a script to parse results, include flaky tests, post/update the
PR comment, and write the job summary.
- Updates CI workflow permissions and runs the summary script after
report merge.

<sup>Written for commit d5ca6987f65e9a7063533960382516af89e67391.
Summary will update automatically on new commits.</sup>

<!-- End of auto-generated description by cubic. -->
---
 .github/workflows/ci.yml               |  11 +
 merge.config.ts                        |   5 +-
 scripts/generate-playwright-summary.js | 281 +++++++++++++++++++++++++
 3 files changed, 296 insertions(+), 1 deletion(-)
 create mode 100644 scripts/generate-playwright-summary.js
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ee24284..f533288 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -107,6 +107,10 @@ jobs:
     # Merge reports after playwright-tests, even if some shards have failed
     if: ${{ !cancelled() }}
     needs: [test]
+    permissions:
+      contents: read
+      pull-requests: write
+      actions: read
 
     runs-on: ubuntu-latest
     steps:
@@ -147,3 +151,10 @@ jobs:
           name: html-report--attempt-${{ github.run_attempt }}
           path: playwright-report
           retention-days: 3
+
+      - name: Generate test summary comment
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const { run } = require('./scripts/generate-playwright-summary.js');
+            await run({ github, context, core });
diff --git a/merge.config.ts b/merge.config.ts
index 05f982f..6d99c67 100644
--- a/merge.config.ts
+++ b/merge.config.ts
@@ -1,4 +1,7 @@
 export default {
   testDir: "e2e-tests",
-  reporter: [["html", { open: "never" }]],
+  reporter: [
+    ["html", { open: "never" }],
+    ["json", { outputFile: "playwright-report/results.json" }],
+  ],
 };
diff --git a/scripts/generate-playwright-summary.js b/scripts/generate-playwright-summary.js
new file mode 100644
index 0000000..4f17284
--- /dev/null
+++ b/scripts/generate-playwright-summary.js
@@ -0,0 +1,281 @@
+// This script parses Playwright JSON results and generates a PR comment summary
+// Used by the CI workflow's merge-reports job
+
+const fs = require("fs");
+
+// Strip ANSI escape codes from terminal output
+function stripAnsi(str) {
+  if (!str) return str;
+  // eslint-disable-next-line no-control-regex
+  return str.replace(/\x1b\[[0-9;]*m/g, "").replace(/\u001b\[[0-9;]*m/g, "");
+}
+
+async function run({ github, context, core }) {
+  // Read the JSON report
+  const reportPath = "playwright-report/results.json";
+  if (!fs.existsSync(reportPath)) {
+    console.log("No results.json found, skipping comment");
+    return;
+  }
+
+  const report = JSON.parse(fs.readFileSync(reportPath, "utf8"));
+
+  // Identify which OS each blob report came from
+  const blobDir = "all-blob-reports";
+  const blobFiles = fs.existsSync(blobDir) ? fs.readdirSync(blobDir) : [];
+  const hasMacOS = blobFiles.some((f) => f.includes("darwin"));
+  const hasWindows = blobFiles.some((f) => f.includes("win32"));
+
+  // Initialize per-OS results
+  const resultsByOs = {};
+  if (hasMacOS)
+    resultsByOs["macOS"] = {
+      passed: 0,
+      failed: 0,
+      skipped: 0,
+      flaky: 0,
+      failures: [],
+      flakyTests: [],
+    };
+  if (hasWindows)
+    resultsByOs["Windows"] = {
+      passed: 0,
+      failed: 0,
+      skipped: 0,
+      flaky: 0,
+      failures: [],
+      flakyTests: [],
+    };
+
+  // Traverse suites and collect test results
+  function traverseSuites(suites, parentTitle = "") {
+    for (const suite of suites || []) {
+      const suiteTitle = parentTitle
+        ? `${parentTitle} > ${suite.title}`
+        : suite.title;
+
+      for (const spec of suite.specs || []) {
+        for (const test of spec.tests || []) {
+          const results = test.results || [];
+          if (results.length === 0) continue;
+
+          // Use the final result (last retry attempt) to determine the test outcome
+          const finalResult = results[results.length - 1];
+
+          // Determine OS from attachments in any result (they contain platform paths)
+          let os = null;
+          for (const result of results) {
+            for (const att of result.attachments || []) {
+              const p = att.path || "";
+              if (p.includes("darwin") || p.includes("macos")) {
+                os = "macOS";
+                break;
+              }
+              if (p.includes("win32") || p.includes("windows")) {
+                os = "Windows";
+                break;
+              }
+            }
+            if (os) break;
+
+            // Fallback: check error stack for OS paths
+            if (result.error?.stack) {
+              if (result.error.stack.includes("/Users/")) {
+                os = "macOS";
+                break;
+              } else if (
+                result.error.stack.includes("C:\\") ||
+                result.error.stack.includes("D:\\")
+              ) {
+                os = "Windows";
+                break;
+              }
+            }
+          }
+
+          // If we still don't know, assign to both (will be roughly split)
+          const osTargets = os ? [os] : Object.keys(resultsByOs);
+
+          // Check if this is a flaky test (passed eventually but had prior failures)
+          const hadPriorFailure = results
+            .slice(0, -1)
+            .some(
+              (r) =>
+                r.status === "failed" ||
+                r.status === "timedOut" ||
+                r.status === "interrupted",
+            );
+          const isFlaky = finalResult.status === "passed" && hadPriorFailure;
+
+          for (const targetOs of osTargets) {
+            if (!resultsByOs[targetOs]) continue;
+            const status = finalResult.status;
+
+            if (isFlaky) {
+              resultsByOs[targetOs].flaky++;
+              resultsByOs[targetOs].passed++;
+              resultsByOs[targetOs].flakyTests.push({
+                title: `${suiteTitle} > ${spec.title}`,
+                retries: results.length - 1,
+              });
+            } else if (status === "passed") {
+              resultsByOs[targetOs].passed++;
+            } else if (
+              status === "failed" ||
+              status === "timedOut" ||
+              status === "interrupted"
+            ) {
+              resultsByOs[targetOs].failed++;
+              const errorMsg =
+                finalResult.error?.message?.split("\n")[0] || "Test failed";
+              resultsByOs[targetOs].failures.push({
+                title: `${suiteTitle} > ${spec.title}`,
+                error: stripAnsi(errorMsg),
+              });
+            } else if (status === "skipped") {
+              resultsByOs[targetOs].skipped++;
+            }
+          }
+        }
+      }
+
+      // Recurse into nested suites
+      if (suite.suites) {
+        traverseSuites(suite.suites, suiteTitle);
+      }
+    }
+  }
+
+  traverseSuites(report.suites);
+
+  // Calculate totals
+  let totalPassed = 0,
+    totalFailed = 0,
+    totalSkipped = 0,
+    totalFlaky = 0;
+  for (const os of Object.keys(resultsByOs)) {
+    totalPassed += resultsByOs[os].passed;
+    totalFailed += resultsByOs[os].failed;
+    totalSkipped += resultsByOs[os].skipped;
+    totalFlaky += resultsByOs[os].flaky;
+  }
+
+  // Build the comment
+  let comment = "## 🎭 Playwright Test Results\n\n";
+  const allPassed = totalFailed === 0;
+
+  if (allPassed) {
+    comment += "### ✅ All tests passed!\n\n";
+    comment += "| OS | Passed | Flaky | Skipped |\n";
+    comment += "|:---|:---:|:---:|:---:|\n";
+    for (const [os, data] of Object.entries(resultsByOs)) {
+      const emoji = os === "macOS" ? "🍎" : "🪟";
+      comment += `| ${emoji} ${os} | ${data.passed} | ${data.flaky} | ${data.skipped} |\n`;
+    }
+    comment += `\n**Total: ${totalPassed} tests passed**`;
+    if (totalFlaky > 0) comment += ` (${totalFlaky} flaky)`;
+    if (totalSkipped > 0) comment += ` (${totalSkipped} skipped)`;
+
+    // List flaky tests even when all passed
+    if (totalFlaky > 0) {
+      comment += "\n\n### ⚠️ Flaky Tests\n\n";
+      for (const [os, data] of Object.entries(resultsByOs)) {
+        if (data.flakyTests.length === 0) continue;
+        const emoji = os === "macOS" ? "🍎" : "🪟";
+        comment += `#### ${emoji} ${os}\n\n`;
+        for (const f of data.flakyTests.slice(0, 10)) {
+          comment += `- \`${f.title}\` (passed after ${f.retries} ${f.retries === 1 ? "retry" : "retries"})\n`;
+        }
+        if (data.flakyTests.length > 10) {
+          comment += `- ... and ${data.flakyTests.length - 10} more\n`;
+        }
+        comment += "\n";
+      }
+    }
+  } else {
+    comment += "### ❌ Some tests failed\n\n";
+    comment += "| OS | Passed | Failed | Flaky | Skipped |\n";
+    comment += "|:---|:---:|:---:|:---:|:---:|\n";
+    for (const [os, data] of Object.entries(resultsByOs)) {
+      const emoji = os === "macOS" ? "🍎" : "🪟";
+      comment += `| ${emoji} ${os} | ${data.passed} | ${data.failed} | ${data.flaky} | ${data.skipped} |\n`;
+    }
+    comment += `\n**Summary: ${totalPassed} passed, ${totalFailed} failed**`;
+    if (totalFlaky > 0) comment += `, ${totalFlaky} flaky`;
+    if (totalSkipped > 0) comment += `, ${totalSkipped} skipped`;
+
+    comment += "\n\n### Failed Tests\n\n";
+
+    for (const [os, data] of Object.entries(resultsByOs)) {
+      if (data.failures.length === 0) continue;
+      const emoji = os === "macOS" ? "🍎" : "🪟";
+      comment += `#### ${emoji} ${os}\n\n`;
+      for (const f of data.failures.slice(0, 10)) {
+        const errorPreview =
+          f.error.length > 150 ? f.error.substring(0, 150) + "..." : f.error;
+        comment += `- \`${f.title}\`\n  - ${errorPreview}\n`;
+      }
+      if (data.failures.length > 10) {
+        comment += `- ... and ${data.failures.length - 10} more\n`;
+      }
+      comment += "\n";
+    }
+
+    // List flaky tests
+    if (totalFlaky > 0) {
+      comment += "### ⚠️ Flaky Tests\n\n";
+      for (const [os, data] of Object.entries(resultsByOs)) {
+        if (data.flakyTests.length === 0) continue;
+        const emoji = os === "macOS" ? "🍎" : "🪟";
+        comment += `#### ${emoji} ${os}\n\n`;
+        for (const f of data.flakyTests.slice(0, 10)) {
+          comment += `- \`${f.title}\` (passed after ${f.retries} ${f.retries === 1 ? "retry" : "retries"})\n`;
+        }
+        if (data.flakyTests.length > 10) {
+          comment += `- ... and ${data.flakyTests.length - 10} more\n`;
+        }
+        comment += "\n";
+      }
+    }
+  }
+
+  const repoUrl = `https://github.com/${process.env.GITHUB_REPOSITORY}`;
+  const runId = process.env.GITHUB_RUN_ID;
+  comment += `\n---\n📊 [View full report](${repoUrl}/actions/runs/${runId})`;
+
+  // Post or update comment on PR
+  if (context.eventName === "pull_request") {
+    const { data: comments } = await github.rest.issues.listComments({
+      owner: context.repo.owner,
+      repo: context.repo.repo,
+      issue_number: context.issue.number,
+    });
+
+    const botComment = comments.find(
+      (c) =>
+        c.user?.type === "Bot" &&
+        c.body?.includes("🎭 Playwright Test Results"),
+    );
+
+    if (botComment) {
+      await github.rest.issues.updateComment({
+        owner: context.repo.owner,
+        repo: context.repo.repo,
+        comment_id: botComment.id,
+        body: comment,
+      });
+    } else {
+      await github.rest.issues.createComment({
+        owner: context.repo.owner,
+        repo: context.repo.repo,
+        issue_number: context.issue.number,
+        body: comment,
+      });
+    }
+  }
+
+  // Always output to job summary
+  await core.summary.addRaw(comment).write();
+}
+
+module.exports = { run };