Leave GitHub comment with playwright results (#1965)

> [!NOTE] > Posts a per-OS Playwright test summary as a PR comment by adding a JSON reporter and a CI step to generate and publish the results. > > - **CI / Reporting** > - Add permissions and a GitHub Script step in `.github/workflows/ci.yml` to generate and post/update a Playwright test summary comment after merging shard reports. > - Upload merged HTML report artifact and link to full run. > - **Playwright config** > - Update `merge.config.ts` to add JSON reporter output to `playwright-report/results.json` alongside HTML. > - **New Script** > - Add `scripts/generate-playwright-summary.js` to parse Playwright JSON, compute per-OS (macOS/Windows) pass/fail/flaky/skipped counts, list top failures/flaky tests, and write both PR comment and job summary. > > <sup>Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit d5ca6987f65e9a7063533960382516af89e67391. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot).</sup>   --- ## Summary by cubic Posts Playwright test results as a PR comment with a per-OS summary, flaky test counts, and top failures, improving CI visibility across macOS and Windows. Adds a JSON reporter and a GitHub Script step that links to the full report. - **New Features** - Adds JSON reporter output to playwright-report/results.json. - Adds a script to parse results, include flaky tests, post/update the PR comment, and write the job summary. - Updates CI workflow permissions and runs the summary script after report merge. <sup>Written for commit d5ca6987f65e9a7063533960382516af89e67391. Summary will update automatically on new commits.</sup>
2025-12-15 23:24:08 -08:00
parent 91cf1e97c3
commit 47992f48dd
3 changed files with 296 additions and 1 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -107,6 +107,10 @@ jobs:
    # Merge reports after playwright-tests, even if some shards have failed
    if: ${{ !cancelled() }}
    needs: [test]
    permissions:
      contents: read
      pull-requests: write
      actions: read
    runs-on: ubuntu-latest
    steps:
@@ -147,3 +151,10 @@ jobs:
          name: html-report--attempt-${{ github.run_attempt }}
          path: playwright-report
          retention-days: 3
      - name: Generate test summary comment
        uses: actions/github-script@v7
        with:
          script: |
            const { run } = require('./scripts/generate-playwright-summary.js');
            await run({ github, context, core });
--- a/merge.config.ts
+++ b/merge.config.ts
@@ -1,4 +1,7 @@
 export default {
  testDir: "e2e-tests",
-  reporter: [["html", { open: "never" }]],
+  reporter: [
    ["html", { open: "never" }],
    ["json", { outputFile: "playwright-report/results.json" }],
  ],
 };
--- a/scripts/generate-playwright-summary.js
+++ b/scripts/generate-playwright-summary.js
@@ -0,0 +1,281 @@
 // This script parses Playwright JSON results and generates a PR comment summary
 // Used by the CI workflow's merge-reports job
 const fs = require("fs");
 // Strip ANSI escape codes from terminal output
 function stripAnsi(str) {
  if (!str) return str;
  // eslint-disable-next-line no-control-regex
  return str.replace(/\x1b\[[0-9;]*m/g, "").replace(/\u001b\[[0-9;]*m/g, "");
 }
 async function run({ github, context, core }) {
  // Read the JSON report
  const reportPath = "playwright-report/results.json";
  if (!fs.existsSync(reportPath)) {
    console.log("No results.json found, skipping comment");
    return;
  }
  const report = JSON.parse(fs.readFileSync(reportPath, "utf8"));
  // Identify which OS each blob report came from
  const blobDir = "all-blob-reports";
  const blobFiles = fs.existsSync(blobDir) ? fs.readdirSync(blobDir) : [];
  const hasMacOS = blobFiles.some((f) => f.includes("darwin"));
  const hasWindows = blobFiles.some((f) => f.includes("win32"));
  // Initialize per-OS results
  const resultsByOs = {};
  if (hasMacOS)
    resultsByOs["macOS"] = {
      passed: 0,
      failed: 0,
      skipped: 0,
      flaky: 0,
      failures: [],
      flakyTests: [],
    };
  if (hasWindows)
    resultsByOs["Windows"] = {
      passed: 0,
      failed: 0,
      skipped: 0,
      flaky: 0,
      failures: [],
      flakyTests: [],
    };
  // Traverse suites and collect test results
  function traverseSuites(suites, parentTitle = "") {
    for (const suite of suites || []) {
      const suiteTitle = parentTitle
        ? `${parentTitle} > ${suite.title}`
        : suite.title;
      for (const spec of suite.specs || []) {
        for (const test of spec.tests || []) {
          const results = test.results || [];
          if (results.length === 0) continue;
          // Use the final result (last retry attempt) to determine the test outcome
          const finalResult = results[results.length - 1];
          // Determine OS from attachments in any result (they contain platform paths)
          let os = null;
          for (const result of results) {
            for (const att of result.attachments || []) {
              const p = att.path || "";
              if (p.includes("darwin") || p.includes("macos")) {
                os = "macOS";
                break;
              }
              if (p.includes("win32") || p.includes("windows")) {
                os = "Windows";
                break;
              }
            }
            if (os) break;
            // Fallback: check error stack for OS paths
            if (result.error?.stack) {
              if (result.error.stack.includes("/Users/")) {
                os = "macOS";
                break;
              } else if (
                result.error.stack.includes("C:\\") ||
                result.error.stack.includes("D:\\")
              ) {
                os = "Windows";
                break;
              }
            }
          }
          // If we still don't know, assign to both (will be roughly split)
          const osTargets = os ? [os] : Object.keys(resultsByOs);
          // Check if this is a flaky test (passed eventually but had prior failures)
          const hadPriorFailure = results
            .slice(0, -1)
            .some(
              (r) =>
                r.status === "failed" ||
                r.status === "timedOut" ||
                r.status === "interrupted",
            );
          const isFlaky = finalResult.status === "passed" && hadPriorFailure;
          for (const targetOs of osTargets) {
            if (!resultsByOs[targetOs]) continue;
            const status = finalResult.status;
            if (isFlaky) {
              resultsByOs[targetOs].flaky++;
              resultsByOs[targetOs].passed++;
              resultsByOs[targetOs].flakyTests.push({
                title: `${suiteTitle} > ${spec.title}`,
                retries: results.length - 1,
              });
            } else if (status === "passed") {
              resultsByOs[targetOs].passed++;
            } else if (
              status === "failed" ||
              status === "timedOut" ||
              status === "interrupted"
            ) {
              resultsByOs[targetOs].failed++;
              const errorMsg =
                finalResult.error?.message?.split("\n")[0] || "Test failed";
              resultsByOs[targetOs].failures.push({
                title: `${suiteTitle} > ${spec.title}`,
                error: stripAnsi(errorMsg),
              });
            } else if (status === "skipped") {
              resultsByOs[targetOs].skipped++;
            }
          }
        }
      }
      // Recurse into nested suites
      if (suite.suites) {
        traverseSuites(suite.suites, suiteTitle);
      }
    }
  }
  traverseSuites(report.suites);
  // Calculate totals
  let totalPassed = 0,
    totalFailed = 0,
    totalSkipped = 0,
    totalFlaky = 0;
  for (const os of Object.keys(resultsByOs)) {
    totalPassed += resultsByOs[os].passed;
    totalFailed += resultsByOs[os].failed;
    totalSkipped += resultsByOs[os].skipped;
    totalFlaky += resultsByOs[os].flaky;
  }
  // Build the comment
  let comment = "## 🎭 Playwright Test Results\n\n";
  const allPassed = totalFailed === 0;
  if (allPassed) {
    comment += "### ✅ All tests passed!\n\n";
    comment += "| OS | Passed | Flaky | Skipped |\n";
    comment += "|:---|:---:|:---:|:---:|\n";
    for (const [os, data] of Object.entries(resultsByOs)) {
      const emoji = os === "macOS" ? "🍎" : "🪟";
      comment += `| ${emoji} ${os} | ${data.passed} | ${data.flaky} | ${data.skipped} |\n`;
    }
    comment += `\n**Total: ${totalPassed} tests passed**`;
    if (totalFlaky > 0) comment += ` (${totalFlaky} flaky)`;
    if (totalSkipped > 0) comment += ` (${totalSkipped} skipped)`;
    // List flaky tests even when all passed
    if (totalFlaky > 0) {
      comment += "\n\n### ⚠️ Flaky Tests\n\n";
      for (const [os, data] of Object.entries(resultsByOs)) {
        if (data.flakyTests.length === 0) continue;
        const emoji = os === "macOS" ? "🍎" : "🪟";
        comment += `#### ${emoji} ${os}\n\n`;
        for (const f of data.flakyTests.slice(0, 10)) {
          comment += `- \`${f.title}\` (passed after ${f.retries} ${f.retries === 1 ? "retry" : "retries"})\n`;
        }
        if (data.flakyTests.length > 10) {
          comment += `- ... and ${data.flakyTests.length - 10} more\n`;
        }
        comment += "\n";
      }
    }
  } else {
    comment += "### ❌ Some tests failed\n\n";
    comment += "| OS | Passed | Failed | Flaky | Skipped |\n";
    comment += "|:---|:---:|:---:|:---:|:---:|\n";
    for (const [os, data] of Object.entries(resultsByOs)) {
      const emoji = os === "macOS" ? "🍎" : "🪟";
      comment += `| ${emoji} ${os} | ${data.passed} | ${data.failed} | ${data.flaky} | ${data.skipped} |\n`;
    }
    comment += `\n**Summary: ${totalPassed} passed, ${totalFailed} failed**`;
    if (totalFlaky > 0) comment += `, ${totalFlaky} flaky`;
    if (totalSkipped > 0) comment += `, ${totalSkipped} skipped`;
    comment += "\n\n### Failed Tests\n\n";
    for (const [os, data] of Object.entries(resultsByOs)) {
      if (data.failures.length === 0) continue;
      const emoji = os === "macOS" ? "🍎" : "🪟";
      comment += `#### ${emoji} ${os}\n\n`;
      for (const f of data.failures.slice(0, 10)) {
        const errorPreview =
          f.error.length > 150 ? f.error.substring(0, 150) + "..." : f.error;
        comment += `- \`${f.title}\`\n  - ${errorPreview}\n`;
      }
      if (data.failures.length > 10) {
        comment += `- ... and ${data.failures.length - 10} more\n`;
      }
      comment += "\n";
    }
    // List flaky tests
    if (totalFlaky > 0) {
      comment += "### ⚠️ Flaky Tests\n\n";
      for (const [os, data] of Object.entries(resultsByOs)) {
        if (data.flakyTests.length === 0) continue;
        const emoji = os === "macOS" ? "🍎" : "🪟";
        comment += `#### ${emoji} ${os}\n\n`;
        for (const f of data.flakyTests.slice(0, 10)) {
          comment += `- \`${f.title}\` (passed after ${f.retries} ${f.retries === 1 ? "retry" : "retries"})\n`;
        }
        if (data.flakyTests.length > 10) {
          comment += `- ... and ${data.flakyTests.length - 10} more\n`;
        }
        comment += "\n";
      }
    }
  }
  const repoUrl = `https://github.com/${process.env.GITHUB_REPOSITORY}`;
  const runId = process.env.GITHUB_RUN_ID;
  comment += `\n---\n📊 [View full report](${repoUrl}/actions/runs/${runId})`;
  // Post or update comment on PR
  if (context.eventName === "pull_request") {
    const { data: comments } = await github.rest.issues.listComments({
      owner: context.repo.owner,
      repo: context.repo.repo,
      issue_number: context.issue.number,
    });
    const botComment = comments.find(
      (c) =>
        c.user?.type === "Bot" &&
        c.body?.includes("🎭 Playwright Test Results"),
    );
    if (botComment) {
      await github.rest.issues.updateComment({
        owner: context.repo.owner,
        repo: context.repo.repo,
        comment_id: botComment.id,
        body: comment,
      });
    } else {
      await github.rest.issues.createComment({
        owner: context.repo.owner,
        repo: context.repo.repo,
        issue_number: context.issue.number,
        body: comment,
      });
    }
  }
  // Always output to job summary
  await core.summary.addRaw(comment).write();
 }
 module.exports = { run };