From 47992f48dd332b537fe76800c605c72d9eab564e Mon Sep 17 00:00:00 2001 From: Will Chen Date: Mon, 15 Dec 2025 23:24:08 -0800 Subject: [PATCH] Leave GitHub comment with playwright results (#1965) > [!NOTE] > Posts a per-OS Playwright test summary as a PR comment by adding a JSON reporter and a CI step to generate and publish the results. > > - **CI / Reporting** > - Add permissions and a GitHub Script step in `.github/workflows/ci.yml` to generate and post/update a Playwright test summary comment after merging shard reports. > - Upload merged HTML report artifact and link to full run. > - **Playwright config** > - Update `merge.config.ts` to add JSON reporter output to `playwright-report/results.json` alongside HTML. > - **New Script** > - Add `scripts/generate-playwright-summary.js` to parse Playwright JSON, compute per-OS (macOS/Windows) pass/fail/flaky/skipped counts, list top failures/flaky tests, and write both PR comment and job summary. > > Written by [Cursor Bugbot](https://cursor.com/dashboard?tab=bugbot) for commit d5ca6987f65e9a7063533960382516af89e67391. This will update automatically on new commits. Configure [here](https://cursor.com/dashboard?tab=bugbot). --- ## Summary by cubic Posts Playwright test results as a PR comment with a per-OS summary, flaky test counts, and top failures, improving CI visibility across macOS and Windows. Adds a JSON reporter and a GitHub Script step that links to the full report. - **New Features** - Adds JSON reporter output to playwright-report/results.json. - Adds a script to parse results, include flaky tests, post/update the PR comment, and write the job summary. - Updates CI workflow permissions and runs the summary script after report merge. Written for commit d5ca6987f65e9a7063533960382516af89e67391. Summary will update automatically on new commits. --- .github/workflows/ci.yml | 11 + merge.config.ts | 5 +- scripts/generate-playwright-summary.js | 281 +++++++++++++++++++++++++ 3 files changed, 296 insertions(+), 1 deletion(-) create mode 100644 scripts/generate-playwright-summary.js diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ee24284..f533288 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -107,6 +107,10 @@ jobs: # Merge reports after playwright-tests, even if some shards have failed if: ${{ !cancelled() }} needs: [test] + permissions: + contents: read + pull-requests: write + actions: read runs-on: ubuntu-latest steps: @@ -147,3 +151,10 @@ jobs: name: html-report--attempt-${{ github.run_attempt }} path: playwright-report retention-days: 3 + + - name: Generate test summary comment + uses: actions/github-script@v7 + with: + script: | + const { run } = require('./scripts/generate-playwright-summary.js'); + await run({ github, context, core }); diff --git a/merge.config.ts b/merge.config.ts index 05f982f..6d99c67 100644 --- a/merge.config.ts +++ b/merge.config.ts @@ -1,4 +1,7 @@ export default { testDir: "e2e-tests", - reporter: [["html", { open: "never" }]], + reporter: [ + ["html", { open: "never" }], + ["json", { outputFile: "playwright-report/results.json" }], + ], }; diff --git a/scripts/generate-playwright-summary.js b/scripts/generate-playwright-summary.js new file mode 100644 index 0000000..4f17284 --- /dev/null +++ b/scripts/generate-playwright-summary.js @@ -0,0 +1,281 @@ +// This script parses Playwright JSON results and generates a PR comment summary +// Used by the CI workflow's merge-reports job + +const fs = require("fs"); + +// Strip ANSI escape codes from terminal output +function stripAnsi(str) { + if (!str) return str; + // eslint-disable-next-line no-control-regex + return str.replace(/\x1b\[[0-9;]*m/g, "").replace(/\u001b\[[0-9;]*m/g, ""); +} + +async function run({ github, context, core }) { + // Read the JSON report + const reportPath = "playwright-report/results.json"; + if (!fs.existsSync(reportPath)) { + console.log("No results.json found, skipping comment"); + return; + } + + const report = JSON.parse(fs.readFileSync(reportPath, "utf8")); + + // Identify which OS each blob report came from + const blobDir = "all-blob-reports"; + const blobFiles = fs.existsSync(blobDir) ? fs.readdirSync(blobDir) : []; + const hasMacOS = blobFiles.some((f) => f.includes("darwin")); + const hasWindows = blobFiles.some((f) => f.includes("win32")); + + // Initialize per-OS results + const resultsByOs = {}; + if (hasMacOS) + resultsByOs["macOS"] = { + passed: 0, + failed: 0, + skipped: 0, + flaky: 0, + failures: [], + flakyTests: [], + }; + if (hasWindows) + resultsByOs["Windows"] = { + passed: 0, + failed: 0, + skipped: 0, + flaky: 0, + failures: [], + flakyTests: [], + }; + + // Traverse suites and collect test results + function traverseSuites(suites, parentTitle = "") { + for (const suite of suites || []) { + const suiteTitle = parentTitle + ? `${parentTitle} > ${suite.title}` + : suite.title; + + for (const spec of suite.specs || []) { + for (const test of spec.tests || []) { + const results = test.results || []; + if (results.length === 0) continue; + + // Use the final result (last retry attempt) to determine the test outcome + const finalResult = results[results.length - 1]; + + // Determine OS from attachments in any result (they contain platform paths) + let os = null; + for (const result of results) { + for (const att of result.attachments || []) { + const p = att.path || ""; + if (p.includes("darwin") || p.includes("macos")) { + os = "macOS"; + break; + } + if (p.includes("win32") || p.includes("windows")) { + os = "Windows"; + break; + } + } + if (os) break; + + // Fallback: check error stack for OS paths + if (result.error?.stack) { + if (result.error.stack.includes("/Users/")) { + os = "macOS"; + break; + } else if ( + result.error.stack.includes("C:\\") || + result.error.stack.includes("D:\\") + ) { + os = "Windows"; + break; + } + } + } + + // If we still don't know, assign to both (will be roughly split) + const osTargets = os ? [os] : Object.keys(resultsByOs); + + // Check if this is a flaky test (passed eventually but had prior failures) + const hadPriorFailure = results + .slice(0, -1) + .some( + (r) => + r.status === "failed" || + r.status === "timedOut" || + r.status === "interrupted", + ); + const isFlaky = finalResult.status === "passed" && hadPriorFailure; + + for (const targetOs of osTargets) { + if (!resultsByOs[targetOs]) continue; + const status = finalResult.status; + + if (isFlaky) { + resultsByOs[targetOs].flaky++; + resultsByOs[targetOs].passed++; + resultsByOs[targetOs].flakyTests.push({ + title: `${suiteTitle} > ${spec.title}`, + retries: results.length - 1, + }); + } else if (status === "passed") { + resultsByOs[targetOs].passed++; + } else if ( + status === "failed" || + status === "timedOut" || + status === "interrupted" + ) { + resultsByOs[targetOs].failed++; + const errorMsg = + finalResult.error?.message?.split("\n")[0] || "Test failed"; + resultsByOs[targetOs].failures.push({ + title: `${suiteTitle} > ${spec.title}`, + error: stripAnsi(errorMsg), + }); + } else if (status === "skipped") { + resultsByOs[targetOs].skipped++; + } + } + } + } + + // Recurse into nested suites + if (suite.suites) { + traverseSuites(suite.suites, suiteTitle); + } + } + } + + traverseSuites(report.suites); + + // Calculate totals + let totalPassed = 0, + totalFailed = 0, + totalSkipped = 0, + totalFlaky = 0; + for (const os of Object.keys(resultsByOs)) { + totalPassed += resultsByOs[os].passed; + totalFailed += resultsByOs[os].failed; + totalSkipped += resultsByOs[os].skipped; + totalFlaky += resultsByOs[os].flaky; + } + + // Build the comment + let comment = "## šŸŽ­ Playwright Test Results\n\n"; + const allPassed = totalFailed === 0; + + if (allPassed) { + comment += "### āœ… All tests passed!\n\n"; + comment += "| OS | Passed | Flaky | Skipped |\n"; + comment += "|:---|:---:|:---:|:---:|\n"; + for (const [os, data] of Object.entries(resultsByOs)) { + const emoji = os === "macOS" ? "šŸŽ" : "🪟"; + comment += `| ${emoji} ${os} | ${data.passed} | ${data.flaky} | ${data.skipped} |\n`; + } + comment += `\n**Total: ${totalPassed} tests passed**`; + if (totalFlaky > 0) comment += ` (${totalFlaky} flaky)`; + if (totalSkipped > 0) comment += ` (${totalSkipped} skipped)`; + + // List flaky tests even when all passed + if (totalFlaky > 0) { + comment += "\n\n### āš ļø Flaky Tests\n\n"; + for (const [os, data] of Object.entries(resultsByOs)) { + if (data.flakyTests.length === 0) continue; + const emoji = os === "macOS" ? "šŸŽ" : "🪟"; + comment += `#### ${emoji} ${os}\n\n`; + for (const f of data.flakyTests.slice(0, 10)) { + comment += `- \`${f.title}\` (passed after ${f.retries} ${f.retries === 1 ? "retry" : "retries"})\n`; + } + if (data.flakyTests.length > 10) { + comment += `- ... and ${data.flakyTests.length - 10} more\n`; + } + comment += "\n"; + } + } + } else { + comment += "### āŒ Some tests failed\n\n"; + comment += "| OS | Passed | Failed | Flaky | Skipped |\n"; + comment += "|:---|:---:|:---:|:---:|:---:|\n"; + for (const [os, data] of Object.entries(resultsByOs)) { + const emoji = os === "macOS" ? "šŸŽ" : "🪟"; + comment += `| ${emoji} ${os} | ${data.passed} | ${data.failed} | ${data.flaky} | ${data.skipped} |\n`; + } + comment += `\n**Summary: ${totalPassed} passed, ${totalFailed} failed**`; + if (totalFlaky > 0) comment += `, ${totalFlaky} flaky`; + if (totalSkipped > 0) comment += `, ${totalSkipped} skipped`; + + comment += "\n\n### Failed Tests\n\n"; + + for (const [os, data] of Object.entries(resultsByOs)) { + if (data.failures.length === 0) continue; + const emoji = os === "macOS" ? "šŸŽ" : "🪟"; + comment += `#### ${emoji} ${os}\n\n`; + for (const f of data.failures.slice(0, 10)) { + const errorPreview = + f.error.length > 150 ? f.error.substring(0, 150) + "..." : f.error; + comment += `- \`${f.title}\`\n - ${errorPreview}\n`; + } + if (data.failures.length > 10) { + comment += `- ... and ${data.failures.length - 10} more\n`; + } + comment += "\n"; + } + + // List flaky tests + if (totalFlaky > 0) { + comment += "### āš ļø Flaky Tests\n\n"; + for (const [os, data] of Object.entries(resultsByOs)) { + if (data.flakyTests.length === 0) continue; + const emoji = os === "macOS" ? "šŸŽ" : "🪟"; + comment += `#### ${emoji} ${os}\n\n`; + for (const f of data.flakyTests.slice(0, 10)) { + comment += `- \`${f.title}\` (passed after ${f.retries} ${f.retries === 1 ? "retry" : "retries"})\n`; + } + if (data.flakyTests.length > 10) { + comment += `- ... and ${data.flakyTests.length - 10} more\n`; + } + comment += "\n"; + } + } + } + + const repoUrl = `https://github.com/${process.env.GITHUB_REPOSITORY}`; + const runId = process.env.GITHUB_RUN_ID; + comment += `\n---\nšŸ“Š [View full report](${repoUrl}/actions/runs/${runId})`; + + // Post or update comment on PR + if (context.eventName === "pull_request") { + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + + const botComment = comments.find( + (c) => + c.user?.type === "Bot" && + c.body?.includes("šŸŽ­ Playwright Test Results"), + ); + + if (botComment) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body: comment, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: comment, + }); + } + } + + // Always output to job summary + await core.summary.addRaw(comment).write(); +} + +module.exports = { run };