diff --git a/apps/admin-ui/src/components/SiteScannerTab.tsx b/apps/admin-ui/src/components/SiteScannerTab.tsx
index e486f9f..ae83fdd 100644
--- a/apps/admin-ui/src/components/SiteScannerTab.tsx
+++ b/apps/admin-ui/src/components/SiteScannerTab.tsx
@@ -2,17 +2,34 @@ import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query';
 import { Fragment, useState } from 'react';
 
 import { getScan, getScanDiff, listScans, triggerScan } from '../api/scanner';
+import { getSiteConfig, updateSiteConfig } from '../api/sites';
 import { trackFeatureUsage } from '../services/analytics';
-import type { CookieDiffItem, ScanDiff, ScanJob, ScanJobDetail, ScanResult } from '../types/api';
+import type { CookieDiffItem, ScanDiff, ScanJob, ScanJobDetail, ScanResult, SiteConfig } from '../types/api';
 import { Alert } from './ui/alert';
 import { Badge } from './ui/badge';
 import { Button } from './ui/button';
+import { Card } from './ui/card';
 import { LoadingState } from './ui/loading-state';
+import { Select } from './ui/select';
 
 interface Props {
   siteId: string;
 }
 
+const SCHEDULE_OPTIONS: { value: string; label: string; cron: string | null }[] = [
+  { value: 'disabled', label: 'Disabled', cron: null },
+  { value: 'daily', label: 'Daily', cron: '0 3 * * *' },
+  { value: 'weekly', label: 'Weekly', cron: '0 3 * * 0' },
+  { value: 'fortnightly', label: 'Fortnightly', cron: '0 3 1,15 * *' },
+  { value: 'monthly', label: 'Monthly', cron: '0 3 1 * *' },
+];
+
+function cronToScheduleValue(cron: string | null | undefined): string {
+  if (!cron) return 'disabled';
+  const match = SCHEDULE_OPTIONS.find((o) => o.cron === cron);
+  return match?.value ?? 'custom';
+}
+
 function statusVariant(status: string): 'warning' | 'info' | 'success' | 'error' | 'neutral' {
   const map: Record<string, 'warning' | 'info' | 'success' | 'error'> = {
     pending: 'warning',
@@ -183,6 +200,45 @@ export default function SiteScannerTab({ siteId }: Props) {
   const queryClient = useQueryClient();
   const [expandedScanId, setExpandedScanId] = useState<string | null>(null);
 
+  const { data: config } = useQuery<SiteConfig>({
+    queryKey: ['sites', siteId, 'config'],
+    queryFn: () => getSiteConfig(siteId),
+  });
+
+  const currentCron = config?.scan_schedule_cron ?? null;
+  const savedValue = cronToScheduleValue(currentCron);
+  const [selectedSchedule, setSelectedSchedule] = useState<string | null>(null);
+  const [customCron, setCustomCron] = useState('');
+
+  // Use local selection if the user has interacted, otherwise fall
+  // back to what's saved on the server.
+  const activeValue = selectedSchedule ?? savedValue;
+  const showCustomInput = activeValue === 'custom';
+
+  const scheduleMutation = useMutation({
+    mutationFn: (cron: string | null) => updateSiteConfig(siteId, { scan_schedule_cron: cron } as Partial<SiteConfig>),
+    onSuccess: () => {
+      queryClient.invalidateQueries({ queryKey: ['sites', siteId, 'config'] });
+      trackFeatureUsage('scan', 'schedule_change', { site_id: siteId });
+      setSelectedSchedule(null); // reset to server state
+    },
+  });
+
+  const handleScheduleChange = (value: string) => {
+    setSelectedSchedule(value);
+    if (value === 'custom') {
+      setCustomCron(currentCron ?? '');
+      return;
+    }
+    const option = SCHEDULE_OPTIONS.find((o) => o.value === value);
+    scheduleMutation.mutate(option?.cron ?? null);
+  };
+
+  const handleCustomSave = () => {
+    const trimmed = customCron.trim();
+    scheduleMutation.mutate(trimmed || null);
+  };
+
   const { data: scans, isLoading } = useQuery<ScanJob[]>({
     queryKey: ['scans', siteId],
     queryFn: () => listScans(siteId),
@@ -202,6 +258,64 @@ export default function SiteScannerTab({ siteId }: Props) {
 
   return (
     <div>
+      {/* Scan schedule */}
+      <Card className="mb-6 p-5">
+        <h3 className="font-heading mb-3 text-sm font-semibold text-foreground">Scan Schedule</h3>
+        <p className="mb-3 text-xs text-text-secondary">
+          Scheduled scans run automatically and re-discover cookies so your inventory stays
+          current. Select a preset or enter a custom cron expression.
+        </p>
+        <div className="flex flex-wrap items-end gap-3">
+          <div className="min-w-[180px]">
+            <Select
+              value={activeValue}
+              onChange={(e) => handleScheduleChange(e.target.value)}
+              disabled={scheduleMutation.isPending}
+            >
+              {SCHEDULE_OPTIONS.map((o) => (
+                <option key={o.value} value={o.value}>{o.label}</option>
+              ))}
+              <option value="custom">Custom cron</option>
+            </Select>
+          </div>
+          {showCustomInput && (
+            <>
+              <input
+                type="text"
+                className="rounded-md border border-border bg-background px-3 py-2 font-mono text-sm text-foreground placeholder:text-text-tertiary focus:border-copper focus:outline-none"
+                placeholder="0 3 * * 0"
+                value={customCron}
+                onChange={(e) => setCustomCron(e.target.value)}
+              />
+              <Button
+                variant="secondary"
+                size="sm"
+                onClick={handleCustomSave}
+                disabled={scheduleMutation.isPending || !customCron.trim()}
+              >
+                Save
+              </Button>
+              <a
+                href="https://crontab.guru"
+                target="_blank"
+                rel="noopener noreferrer"
+                className="text-xs text-copper hover:underline"
+              >
+                Need help? Use crontab.guru &rarr;
+              </a>
+            </>
+          )}
+          {scheduleMutation.isPending && (
+            <span className="text-xs text-text-secondary">Saving…</span>
+          )}
+        </div>
+        {currentCron && (
+          <p className="mt-2 text-xs text-text-secondary">
+            Current schedule: <code className="rounded bg-mist px-1.5 py-0.5 font-mono">{currentCron}</code>
+          </p>
+        )}
+      </Card>
+
       {/* Header with trigger button */}
       <div className="mb-4 flex items-center justify-between">
         <h2 className="font-heading text-lg font-semibold text-foreground">Cookie Scans</h2>
diff --git a/apps/admin-ui/src/test/SiteCategoriesTab.test.tsx b/apps/admin-ui/src/test/SiteCategoriesTab.test.tsx
index 956c13c..3f098ba 100644
--- a/apps/admin-ui/src/test/SiteCategoriesTab.test.tsx
+++ b/apps/admin-ui/src/test/SiteCategoriesTab.test.tsx
@@ -43,6 +43,7 @@ const BASE_CONFIG: SiteConfig = {
   scan_enabled: true,
   scan_frequency_hours: 168,
   scan_max_pages: 50,
+  scan_schedule_cron: null,
   enabled_categories: null,
   created_at: '2025-01-01T00:00:00Z',
   updated_at: '2025-01-01T00:00:00Z',
diff --git a/apps/admin-ui/src/test/SiteConfigTab.test.tsx b/apps/admin-ui/src/test/SiteConfigTab.test.tsx
index fba00d2..62784e9 100644
--- a/apps/admin-ui/src/test/SiteConfigTab.test.tsx
+++ b/apps/admin-ui/src/test/SiteConfigTab.test.tsx
@@ -41,6 +41,7 @@ const BASE_CONFIG: SiteConfig = {
   scan_enabled: true,
   scan_frequency_hours: 168,
   scan_max_pages: 50,
+  scan_schedule_cron: null,
   enabled_categories: null,
   created_at: '2025-01-01T00:00:00Z',
   updated_at: '2025-01-01T00:00:00Z',
diff --git a/apps/admin-ui/src/types/api.ts b/apps/admin-ui/src/types/api.ts
index d0636b3..ef2026d 100644
--- a/apps/admin-ui/src/types/api.ts
+++ b/apps/admin-ui/src/types/api.ts
@@ -129,6 +129,7 @@ export interface SiteConfig {
   scan_enabled: boolean;
   scan_frequency_hours: number;
   scan_max_pages: number;
+  scan_schedule_cron: string | null;
   /**
    * Cookie categories the banner should display. ``null`` means
    * "inherit from the cascade" (group → org → system default of all
diff --git a/apps/api/data/open-cookie-database.csv b/apps/api/data/open-cookie-database.csv
index b44506a..c91fefa 100644
--- a/apps/api/data/open-cookie-database.csv
+++ b/apps/api/data/open-cookie-database.csv
@@ -2263,3 +2263,4 @@ c7d8e9f0-0012-4567-890a-000000000012,Plausible Analytics,Analytics,plausible_,,"
 c7d8e9f0-0013-4567-890a-000000000013,Fathom Analytics,Analytics,_fathom,,"Privacy-focused simple website analytics with minimal data collection.",Varies,Conva Ventures Inc,https://usefathom.com/privacy,1
 c7d8e9f0-0014-4567-890a-000000000014,Umami,Analytics,umami.,,"Open-source privacy-friendly web analytics alternative.",Varies,Website operator,https://umami.is/docs/about,1
 c7d8e9f0-0015-4567-890a-000000000015,Vercel,Functional,_vercel_,,"Vercel platform cookies for deployment previews and analytics.",Varies,Vercel Inc,https://vercel.com/legal/privacy-policy,1
+c7d8e9f0-0016-4567-890a-000000000016,Google Ads,Marketing,_gcl_ls,,"Google Click Identifier for localStorage-based ad conversion tracking.",90 Days,Google,https://business.safety.google/privacy/,0
diff --git a/apps/api/src/services/classification.py b/apps/api/src/services/classification.py
index ec7ecc3..9faeeec 100644
--- a/apps/api/src/services/classification.py
+++ b/apps/api/src/services/classification.py
@@ -174,6 +174,26 @@ def classify_cookie(
 
     This is a pure function — all data is passed in, no DB calls.
     """
+    # 0. ConsentOS's own cookies are always necessary. The banner's
+    #    blocker already treats ``_consentos_*`` as exempt; the
+    #    classifier must agree so the admin UI shows them in the
+    #    right category without requiring a known-cookies DB entry.
+    if cookie_name.startswith("_consentos_"):
+        necessary = next(
+            (cat for cat in category_map.values() if cat.slug == "necessary"),
+            None,
+        )
+        return ClassificationResult(
+            cookie_name=cookie_name,
+            cookie_domain=cookie_domain,
+            category_id=necessary.id if necessary else None,
+            category_slug="necessary",
+            vendor="ConsentOS",
+            description="ConsentOS consent management cookie.",
+            match_source=MatchSource.KNOWN_EXACT,
+            matched=True,
+        )
+
     # 1. Check allow-list first (site-specific overrides)
     allow_match = _match_allow_list(cookie_name, cookie_domain, allow_list)
     if allow_match:
diff --git a/apps/api/src/services/scanner.py b/apps/api/src/services/scanner.py
index fc56f47..81f8bb0 100644
--- a/apps/api/src/services/scanner.py
+++ b/apps/api/src/services/scanner.py
@@ -12,7 +12,7 @@ from datetime import UTC, datetime
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from src.models.cookie import Cookie
+from src.models.cookie import Cookie, CookieCategory
 from src.models.scan import ScanJob, ScanResult
 from src.models.site import Site
 from src.schemas.scanner import (
@@ -261,7 +261,13 @@ async def sync_scan_results_to_cookies(
     """Upsert scan results into the site's cookie inventory.
 
     Creates new Cookie records for newly discovered items or updates
-    last_seen_at for existing ones. Returns the number of new cookies.
+    ``last_seen_at`` for existing ones. When ``auto_category`` is set
+    on the scan result and the cookie doesn't already have a
+    manually-assigned category, the auto-classified category is
+    propagated to the cookie inventory so it shows up categorised in
+    the admin UI without requiring manual review.
+
+    Returns the number of new cookies.
     """
     results = await db.execute(select(ScanResult).where(ScanResult.scan_job_id == scan_job_id))
     items = list(results.scalars().all())
@@ -269,6 +275,10 @@ async def sync_scan_results_to_cookies(
     now_iso = datetime.now(UTC).isoformat()
     new_count = 0
 
+    # Pre-load the category slug → id mapping so we don't query per cookie.
+    cat_rows = await db.execute(select(CookieCategory))
+    slug_to_id: dict[str, uuid.UUID] = {cat.slug: cat.id for cat in cat_rows.scalars().all()}
+
     for item in items:
         existing = await db.execute(
             select(Cookie).where(
@@ -280,14 +290,21 @@ async def sync_scan_results_to_cookies(
         )
         cookie = existing.scalar_one_or_none()
 
+        # Resolve the auto-category slug to a category_id.
+        auto_cat_id = slug_to_id.get(item.auto_category) if item.auto_category else None
+
         if cookie:
             cookie.last_seen_at = now_iso
+            # Back-fill the category if not manually assigned yet.
+            if auto_cat_id and not cookie.category_id:
+                cookie.category_id = auto_cat_id
         else:
             cookie = Cookie(
                 site_id=site_id,
                 name=item.cookie_name,
                 domain=item.cookie_domain,
                 storage_type=item.storage_type,
+                category_id=auto_cat_id,
                 review_status="pending",
                 first_seen_at=now_iso,
                 last_seen_at=now_iso,
diff --git a/apps/scanner/src/crawler.py b/apps/scanner/src/crawler.py
index c87f5fb..bffd1c9 100644
--- a/apps/scanner/src/crawler.py
+++ b/apps/scanner/src/crawler.py
@@ -68,11 +68,13 @@ def _build_consent_cookie(url: str) -> dict:
         "bannerVersion": "scanner",
     }
     value = quote(json.dumps(state, separators=(",", ":")), safe="")
+    # Playwright's ``add_cookies`` accepts EITHER ``url`` (from which
+    # it derives domain/path/secure) OR explicit ``domain`` + ``path``
+    # — but not both. Using ``url`` is simplest.
     return {
         "name": _CONSENT_COOKIE_NAME,
         "value": value,
         "url": url,
-        "path": "/",
         "expires": time.time() + 365 * 86400,
         "sameSite": "Lax",
     }
@@ -201,6 +203,9 @@ class CookieCrawler:
         script_cookies: dict[str, str] = {}  # cookie name → script URL
         initiator_map: dict[str, str] = {}  # request URL → initiating URL
         initiator_chains: dict[str, list[str]] = {}  # cookie name → chain
+        # Cookies discovered directly from Set-Cookie response headers.
+        # Keyed by (name, domain) so they can be merged with CDP results.
+        header_cookies: dict[tuple[str, str], DiscoveredCookie] = {}
 
         context: BrowserContext | None = None
         try:
@@ -236,7 +241,9 @@ class CookieCrawler:
 
             page.on("request", _on_request)
 
-            # Track Set-Cookie headers from responses
+            # Track Set-Cookie headers from responses and create
+            # DiscoveredCookie entries directly — CDP's context.cookies()
+            # may not enumerate cross-domain cookies.
             async def _on_response(response: Response) -> None:
                 try:
                     headers = await response.all_headers()
@@ -247,25 +254,67 @@ class CookieCrawler:
                         initiator = _get_script_initiator(request)
                         # Build the initiator chain for this request
                         chain = _build_initiator_chain(request.url, initiator_map)
+                        resp_domain = urlparse(response.url).hostname or ""
                         for cookie_str in set_cookie.split("\n"):
                             name = cookie_str.split("=")[0].strip()
                             if name:
                                 if initiator:
                                     script_cookies[name] = initiator
                                 initiator_chains[name] = chain
+                                # Parse optional Domain attribute from
+                                # the Set-Cookie header; fall back to
+                                # the response hostname.
+                                domain = resp_domain
+                                for part in cookie_str.split(";")[1:]:
+                                    part = part.strip()
+                                    if part.lower().startswith("domain="):
+                                        domain = part.split("=", 1)[1].strip()
+                                        break
+                                key = (name, domain)
+                                if key not in header_cookies:
+                                    header_cookies[key] = DiscoveredCookie(
+                                        name=name,
+                                        domain=domain,
+                                        storage_type="cookie",
+                                        script_source=initiator,
+                                        page_url=url,
+                                        initiator_chain=chain,
+                                    )
                 except Exception:
                     pass  # Non-critical — response may have been aborted
 
             page.on("response", _on_response)
 
-            # Navigate
-            await page.goto(url, wait_until="domcontentloaded", timeout=self._timeout_ms)
-            # Allow additional time for scripts to set cookies after DOM load.
-            await page.wait_for_timeout(3000)
+            # Navigate — networkidle waits until ≤2 active connections for
+            # 500ms, which catches the GA beacon round-trip that
+            # domcontentloaded misses.
+            await page.goto(url, wait_until="networkidle", timeout=self._timeout_ms)
+            # Safety margin for late-firing scripts (e.g. deferred GTM tags).
+            await page.wait_for_timeout(5000)
 
-            # Enumerate browser cookies via CDP
+            # First pass — enumerate browser cookies via CDP.
             cdp_cookies = await context.cookies()
+
+            # Second pass — wait a further 2 seconds for any delayed
+            # Set-Cookie headers, then merge newly appeared cookies.
+            await page.wait_for_timeout(2000)
+            delayed_cookies = await context.cookies()
+
+            # Merge: index first-pass cookies by (name, domain), then
+            # add any that only appeared in the second pass.
+            seen_keys: set[tuple[str, str]] = set()
+            all_cdp_cookies: list[dict] = []
             for c in cdp_cookies:
+                key = (c["name"], c["domain"])
+                seen_keys.add(key)
+                all_cdp_cookies.append(c)
+            for c in delayed_cookies:
+                key = (c["name"], c["domain"])
+                if key not in seen_keys:
+                    seen_keys.add(key)
+                    all_cdp_cookies.append(c)
+
+            for c in all_cdp_cookies:
                 result.cookies.append(
                     DiscoveredCookie(
                         name=c["name"],
@@ -283,6 +332,13 @@ class CookieCrawler:
                     )
                 )
 
+            # Merge cookies seen in Set-Cookie headers but NOT in the
+            # CDP cookie jar (e.g. cross-domain cookies that the browser
+            # scoped to a different origin).
+            for key, hc in header_cookies.items():
+                if key not in seen_keys:
+                    result.cookies.append(hc)
+
             # Enumerate localStorage
             ls_items = await page.evaluate("""() => {
                 const items = [];
diff --git a/apps/scanner/src/sitemap.py b/apps/scanner/src/sitemap.py
index 32e6c91..f65f4a3 100644
--- a/apps/scanner/src/sitemap.py
+++ b/apps/scanner/src/sitemap.py
@@ -75,6 +75,13 @@ async def _fetch_sitemap(
         if resp.status_code != 200:
             return []
 
+        # SPAs with catch-all nginx/Caddy rules return 200 + text/html
+        # for /sitemap.xml. Don't try to parse HTML as XML.
+        content_type = resp.headers.get("content-type", "")
+        if "html" in content_type and "xml" not in content_type:
+            logger.debug("Sitemap %s returned HTML, skipping", url)
+            return []
+
         root = ElementTree.fromstring(resp.text)
 
         # Check if it's a sitemap index
diff --git a/apps/scanner/tests/test_crawler.py b/apps/scanner/tests/test_crawler.py
index 797c08b..3c987b3 100644
--- a/apps/scanner/tests/test_crawler.py
+++ b/apps/scanner/tests/test_crawler.py
@@ -42,11 +42,35 @@ def _make_mock_page(
     return page
 
 
-def _make_mock_context(page, cookies: list[dict] | None = None):
-    """Build a mock BrowserContext."""
+def _make_mock_context(
+    page,
+    cookies: list[dict] | None = None,
+    delayed_cookies: list[dict] | None = None,
+):
+    """Build a mock BrowserContext.
+
+    *cookies* is returned on the first ``context.cookies()`` call (the
+    initial CDP enumeration).  *delayed_cookies* is returned on the
+    second call (the delayed pass); defaults to the same list so
+    existing tests need no changes.
+    """
     context = AsyncMock()
     context.new_page = AsyncMock(return_value=page)
-    context.cookies = AsyncMock(return_value=cookies or [])
+    first = cookies or []
+    second = delayed_cookies if delayed_cookies is not None else first
+    # The crawler calls context.cookies() twice per page (initial +
+    # delayed pass). Using a cycling function instead of a fixed-length
+    # side_effect list so multi-page tests don't exhaust the mock.
+    _cycle = [first, second]
+    _call_count = 0
+
+    async def _cycling_cookies(*_args, **_kwargs):
+        nonlocal _call_count
+        result = _cycle[_call_count % len(_cycle)]
+        _call_count += 1
+        return result
+
+    context.cookies = AsyncMock(side_effect=_cycling_cookies)
     context.clear_cookies = AsyncMock()
     context.close = AsyncMock()
     return context
@@ -373,6 +397,44 @@ class TestCrawlPage:
         call_kwargs = browser.new_context.call_args[1]
         assert call_kwargs["user_agent"] == "CMPBot/1.0"
 
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_two_pass_cookie_collection_merges_delayed(self):
+        """Cookies appearing only in the second CDP pass are still discovered."""
+        first_pass = [
+            {"name": "_ga", "domain": ".example.com", "value": "GA1.2.12345"},
+        ]
+        second_pass = [
+            {"name": "_ga", "domain": ".example.com", "value": "GA1.2.12345"},
+            {"name": "_gid", "domain": ".example.com", "value": "GID.99"},
+        ]
+
+        page = _make_mock_page()
+        context = _make_mock_context(page, cookies=first_pass, delayed_cookies=second_pass)
+        browser = _make_mock_browser(context)
+
+        crawler = CookieCrawler()
+        result = await crawler._crawl_page(browser, "https://example.com/")
+
+        cookie_names = [c.name for c in result.cookies if c.storage_type == "cookie"]
+        assert "_ga" in cookie_names
+        assert "_gid" in cookie_names
+        # _ga must not be duplicated
+        assert cookie_names.count("_ga") == 1
+
+    @pytest.mark.asyncio(loop_scope="session")
+    async def test_uses_networkidle_wait(self):
+        """page.goto must use wait_until='networkidle'."""
+        page = _make_mock_page()
+        context = _make_mock_context(page)
+        browser = _make_mock_browser(context)
+
+        crawler = CookieCrawler()
+        await crawler._crawl_page(browser, "https://example.com/")
+
+        page.goto.assert_awaited_once()
+        call_kwargs = page.goto.call_args[1]
+        assert call_kwargs.get("wait_until") == "networkidle"
+
 
 # ── CookieCrawler.crawl_site ───────────────────────────────────────────
 
@@ -457,7 +519,9 @@ class TestBuildConsentCookie:
         """``url`` lets Playwright derive domain / path / secure."""
         cookie = _build_consent_cookie("https://example.com/page")
         assert cookie["url"] == "https://example.com/page"
-        assert cookie["path"] == "/"
+        # ``path`` is NOT set explicitly — Playwright derives it from ``url``.
+        # Setting both would cause ``add_cookies`` to reject the cookie.
+        assert "path" not in cookie
 
     def test_cookie_value_decodes_to_consent_state_with_all_categories(self):
         import json as _json
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index b646144..7256d2f 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -27,7 +27,7 @@ services:
     command:
       - "sh"
       - "-c"
-      - "python -m alembic upgrade head && python -m src.cli.bootstrap_admin"
+      - "python -m alembic upgrade head && python -m src.cli.bootstrap_admin && python -m src.cli.seed_known_cookies"
     restart: "no"
     depends_on:
       postgres: