From 8ef84285fcee8e63eb9ca25a08d094fd9593d8a2 Mon Sep 17 00:00:00 2001
From: Will Chen <willchen90@gmail.com>
Date: Thu, 14 Aug 2025 13:48:27 -0700
Subject: [PATCH] Shard E2E tests (#941)

---
 .github/workflows/ci.yml         | 80 +++++++++++++++++++++++++-------
 e2e-tests/delete_app.spec.ts     |  1 +
 e2e-tests/helpers/test_helper.ts | 66 ++++++++++++--------------
 e2e-tests/problems.spec.ts       | 14 +++---
 merge.config.ts                  |  4 ++
 package.json                     |  3 +-
 playwright.config.ts             | 18 ++++++-
 7 files changed, 124 insertions(+), 62 deletions(-)
 create mode 100644 merge.config.ts

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ebf182b..0d8b3c6 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -24,12 +24,14 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os:
-          [
-            { name: "windows-arm", image: "windows-11-arm" },
+        os: [
+            # npm install is very slow
+            # { name: "windows-arm", image: "windows-11-arm" },
             { name: "windows", image: "windows-latest" },
             { name: "macos", image: "macos-latest" },
           ]
+        shard: [1, 2, 3, 4]
+        shardTotal: [4]
     runs-on: ${{ matrix.os.image }}
     steps:
       - name: Checkout code
@@ -44,15 +46,18 @@ jobs:
         run: npm ci --no-audit --no-fund --progress=false
       - name: Presubmit check (e.g. lint, format)
         # do not run this on Windows (it fails and not necessary)
-        if: contains(matrix.os.name, 'macos')
+        # Only run on shard 1 to avoid redundant execution
+        if: contains(matrix.os.name, 'macos') && matrix.shard == 1
         run: npm run presubmit
       - name: Type-checking
         # do not run this on windows (it's redunant)
-        if: contains(matrix.os.name, 'macos')
+        # Only run on shard 1 to avoid redundant execution
+        if: contains(matrix.os.name, 'macos') && matrix.shard == 1
         run: npm run ts
       - name: Unit tests
         # do not run this on windows (it's redunant)
-        if: contains(matrix.os.name, 'macos')
+        # Only run on shard 1 to avoid redundant execution
+        if: contains(matrix.os.name, 'macos') && matrix.shard == 1
         run: npm run test
       - name: Setup pnpm
         uses: pnpm/action-setup@a7487c7e89a18df4991f7f222e4898a00d66ddda # v4.1.0
@@ -83,19 +88,60 @@ jobs:
         run: npm run pre:e2e
       - name: Prep test server
         run: cd testing/fake-llm-server && npm install && npm run build && cd -
-      - name: E2E tests
+      - name: E2E tests (Shard ${{ matrix.shard }}/4)
         # You can add debug logging to make it easier to see what's failing
         # by adding "DEBUG=pw:browser" in front.
-        run: DEBUG=pw:browser npm run e2e
-      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
-        if: failure()
+        # Use blob reporter for sharding and merge capabilities
+        run: DEBUG=pw:browser npx playwright test --shard=${{ matrix.shard }}/${{ matrix.shardTotal }}
+      - name: Upload shard results
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        if: ${{ !cancelled() }}
         with:
-          name: playwright-report-${{ matrix.os.name }}
-          path: playwright-report/
-          retention-days: 3
-      - uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
-        if: failure()
+          name: blob-report-${{ matrix.os.name }}-shard-${{ matrix.shard }}
+          path: blob-report
+          retention-days: 1
+
+  merge-reports:
+    # Merge reports after playwright-tests, even if some shards have failed
+    if: ${{ !cancelled() }}
+    needs: [test]
+
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
         with:
-          name: test-results-${{ matrix.os.name }}
-          path: test-results/
+          node-version: lts/*
+      - name: Install dependencies
+        run: npm ci --no-audit --no-fund --progress=false
+
+      - name: Download blob reports from GitHub Actions Artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: all-blob-reports
+          pattern: blob-report-*
+          merge-multiple: true
+
+      - name: Debug - List downloaded blob reports
+        run: |
+          echo "Contents of all-blob-reports directory:"
+          ls -la all-blob-reports/
+          echo "File sizes and details:"
+          find all-blob-reports/ -type f -exec ls -lh {} \; || echo "No files found"
+
+      - name: Merge into HTML Report
+        run: PLAYWRIGHT_HTML_OUTPUT_DIR=playwright-report npx playwright merge-reports --config=merge.config.ts ./all-blob-reports
+
+      - name: Debug - List playwright-report contents
+        run: |
+          echo "Contents of playwright-report directory:"
+          ls -la playwright-report/ || echo "playwright-report directory does not exist"
+          echo "Current directory contents:"
+          ls -la
+
+      - name: Upload HTML report
+        uses: actions/upload-artifact@v4
+        with:
+          name: html-report--attempt-${{ github.run_attempt }}
+          path: playwright-report
           retention-days: 3
diff --git a/e2e-tests/delete_app.spec.ts b/e2e-tests/delete_app.spec.ts
index d4fe12d..1024868 100644
--- a/e2e-tests/delete_app.spec.ts
+++ b/e2e-tests/delete_app.spec.ts
@@ -1,6 +1,7 @@
 import fs from "fs";
 import { test } from "./helpers/test_helper";
 import { expect } from "@playwright/test";
+
 test("delete app", async ({ po }) => {
   await po.setUp();
   await po.sendPrompt("hi");
diff --git a/e2e-tests/helpers/test_helper.ts b/e2e-tests/helpers/test_helper.ts
index 1727c31..26b61bf 100644
--- a/e2e-tests/helpers/test_helper.ts
+++ b/e2e-tests/helpers/test_helper.ts
@@ -251,58 +251,52 @@ export class PageObject {
     await this.goToAppsTab();
   }
 
-  async runPnpmInstall() {
+  async ensurePnpmInstall() {
     const appPath = await this.getCurrentAppPath();
     if (!appPath) {
       throw new Error("No app selected");
     }
 
-    const maxRetries = 3;
-    let lastError: any;
+    const maxDurationMs = 180_000; // 3 minutes
+    const retryIntervalMs = 15_000;
+    const startTime = Date.now();
+    let lastOutput = "";
 
-    for (let attempt = 1; attempt <= maxRetries; attempt++) {
+    const checkCommand = `node -e 'const pkg=require("./package.json");const{execSync}=require("child_process");try{const prodResult=JSON.parse(execSync("pnpm list --json --depth=0",{encoding:"utf8"}));const devResult=JSON.parse(execSync("pnpm list --json --depth=0 --dev",{encoding:"utf8"}));const installed={...(prodResult[0]||{}).dependencies||{},...(devResult[0]||{}).devDependencies||{}};const expected=Object.keys({...pkg.dependencies||{},...pkg.devDependencies||{}});const missing=expected.filter(dep=>!installed[dep]);console.log(missing.length?"MISSING: "+missing.join(", "):"All dependencies installed")}catch(e){console.log("Error:",e.message)}'`;
+
+    while (Date.now() - startTime < maxDurationMs) {
       try {
-        console.log(
-          `Running 'pnpm install' in ${appPath} (attempt ${attempt}/${maxRetries})`,
-        );
-        execSync("pnpm install", {
+        console.log(`Checking installed dependencies in ${appPath}...`);
+        const stdout = execSync(checkCommand, {
           cwd: appPath,
           stdio: "pipe",
           encoding: "utf8",
         });
-        console.log(`'pnpm install' succeeded on attempt ${attempt}`);
-        return; // Success, exit the function
+        lastOutput = (stdout || "").toString().trim();
+        console.log(`Dependency check output: ${lastOutput}`);
+        if (lastOutput.includes("All dependencies installed")) {
+          return;
+        }
       } catch (error: any) {
-        lastError = error;
-        console.error(
-          `Attempt ${attempt}/${maxRetries} failed to run 'pnpm install' in ${appPath}`,
-        );
-        console.error(`Exit code: ${error.status}`);
-        console.error(`Command: ${error.cmd || "pnpm install"}`);
-
-        if (error.stdout) {
-          console.error(`STDOUT:\n${error.stdout}`);
-        }
-
-        if (error.stderr) {
-          console.error(`STDERR:\n${error.stderr}`);
-        }
-
-        // If this wasn't the last attempt, wait a bit before retrying
-        if (attempt < maxRetries) {
-          const delayMs = 1000 * attempt; // Exponential backoff: 1s, 2s
-          console.log(`Waiting ${delayMs}ms before retry...`);
-          await new Promise((resolve) => setTimeout(resolve, delayMs));
-        }
+        // Capture any error output to include in the final error if we time out
+        const stdOut = error?.stdout ? error.stdout.toString() : "";
+        const stdErr = error?.stderr ? error.stderr.toString() : "";
+        lastOutput = [stdOut, stdErr, error?.message]
+          .filter(Boolean)
+          .join("\n");
+        console.error("Dependency check command failed:", lastOutput);
       }
+
+      const elapsed = Date.now() - startTime;
+      const remaining = Math.max(0, maxDurationMs - elapsed);
+      const waitMs = Math.min(retryIntervalMs, remaining);
+      if (waitMs <= 0) break;
+      console.log(`Waiting ${waitMs}ms before retry...`);
+      await new Promise((resolve) => setTimeout(resolve, waitMs));
     }
 
-    // All attempts failed, throw the last error with enhanced message
     throw new Error(
-      `pnpm install failed in ${appPath} after ${maxRetries} attempts. ` +
-        `Exit code: ${lastError.status}. ` +
-        `${lastError.stderr ? `Error: ${lastError.stderr}` : ""}` +
-        `${lastError.stdout ? ` Output: ${lastError.stdout}` : ""}`,
+      `Dependencies not fully installed in ${appPath} after 3 minutes. Last output: ${lastOutput}`,
     );
   }
 
diff --git a/e2e-tests/problems.spec.ts b/e2e-tests/problems.spec.ts
index 4e11ca4..fd22175 100644
--- a/e2e-tests/problems.spec.ts
+++ b/e2e-tests/problems.spec.ts
@@ -1,4 +1,4 @@
-import { test } from "./helpers/test_helper";
+import { test, testSkipIfWindows } from "./helpers/test_helper";
 import { expect } from "@playwright/test";
 import fs from "fs";
 import path from "path";
@@ -58,7 +58,7 @@ test("problems auto-fix - disabled", async ({ po }) => {
   await po.snapshotMessages();
 });
 
-test("problems - fix all", async ({ po }) => {
+testSkipIfWindows("problems - fix all", async ({ po }) => {
   await po.setUp({ enableAutoFixProblems: true });
   await po.importApp(MINIMAL_APP);
   const appPath = await po.getCurrentAppPath();
@@ -73,7 +73,7 @@ nonExistentFunction3();
 export default App;
 `,
   );
-  await po.runPnpmInstall();
+  await po.ensurePnpmInstall();
 
   await po.sendPrompt("tc=create-ts-errors");
   await po.selectPreviewMode("problems");
@@ -83,7 +83,7 @@ export default App;
   await po.snapshotMessages({ replaceDumpPath: true });
 });
 
-test("problems - manual edit (react/vite)", async ({ po }) => {
+testSkipIfWindows("problems - manual edit (react/vite)", async ({ po }) => {
   await po.setUp({ enableAutoFixProblems: true });
   await po.sendPrompt("tc=1");
 
@@ -97,7 +97,7 @@ nonExistentFunction();
 export default App;
 `,
   );
-  await po.runPnpmInstall();
+  await po.ensurePnpmInstall();
   await po.clickTogglePreviewPanel();
 
   await po.selectPreviewMode("problems");
@@ -110,7 +110,7 @@ export default App;
   await po.snapshotProblemsPane();
 });
 
-test("problems - manual edit (next.js)", async ({ po }) => {
+testSkipIfWindows("problems - manual edit (next.js)", async ({ po }) => {
   await po.setUp({ enableAutoFixProblems: true });
   await po.goToHubAndSelectTemplate("Next.js Template");
   await po.sendPrompt("tc=1");
@@ -125,7 +125,7 @@ test("problems - manual edit (next.js)", async ({ po }) => {
   export default App;
   `,
   );
-  await po.runPnpmInstall();
+  await po.ensurePnpmInstall();
   await po.clickTogglePreviewPanel();
 
   await po.selectPreviewMode("problems");
diff --git a/merge.config.ts b/merge.config.ts
new file mode 100644
index 0000000..05f982f
--- /dev/null
+++ b/merge.config.ts
@@ -0,0 +1,4 @@
+export default {
+  testDir: "e2e-tests",
+  reporter: [["html", { open: "never" }]],
+};
diff --git a/package.json b/package.json
index 702029c..b0a4874 100644
--- a/package.json
+++ b/package.json
@@ -38,7 +38,8 @@
     "extract-codebase": "ts-node scripts/extract-codebase.ts",
     "prepare": "husky install",
     "pre:e2e": "cross-env E2E_TEST_BUILD=true npm run package",
-    "e2e": "playwright test"
+    "e2e": "playwright test",
+    "e2e:shard": "playwright test --shard"
   },
   "keywords": [],
   "author": {
diff --git a/playwright.config.ts b/playwright.config.ts
index f6db8fa..510529d 100644
--- a/playwright.config.ts
+++ b/playwright.config.ts
@@ -1,4 +1,7 @@
 import { PlaywrightTestConfig } from "@playwright/test";
+import os from "os";
+
+const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
 
 const config: PlaywrightTestConfig = {
   testDir: "./e2e-tests",
@@ -12,7 +15,20 @@ const config: PlaywrightTestConfig = {
     "{testDir}/{testFileDir}/snapshots/{testFileName}_{arg}{ext}",
 
   /* Reporter to use. See https://playwright.dev/docs/test-reporters */
-  reporter: "html",
+  // Why not use GitHub reporter? Because we're using matrix and it's discouraged:
+  // https://playwright.dev/docs/test-reporters#github-actions-annotations
+  reporter: process.env.CI
+    ? [
+        [
+          "blob",
+          {
+            // Speculatively fix https://github.com/actions/download-artifact/issues/298#issuecomment-2016075998
+            // by using a timestamp in the filename
+            outputFile: `./blob-report/report-${os.platform()}-${timestamp}.zip`,
+          },
+        ],
+      ]
+    : [["html"], ["line"]],
   /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
   use: {
     /* See https://playwright.dev/docs/trace-viewer */