Files
consentos/apps/scanner/tests/test_worker.py
James Cottrill fbf26453f2 feat: initial public release
ConsentOS — a privacy-first cookie consent management platform.

Self-hosted, source-available alternative to OneTrust, Cookiebot, and
CookieYes. Full standards coverage (IAB TCF v2.2, GPP v1, Google
Consent Mode v2, GPC, Shopify Customer Privacy API), multi-tenant
architecture with role-based access, configuration cascade
(system → org → group → site → region), dark-pattern detection in
the scanner, and a tamper-evident consent record audit trail.

This is the initial public release. Prior development history is
retained internally.

See README.md for the feature list, architecture overview, and
quick-start instructions. Licensed under the Elastic Licence 2.0 —
self-host freely; do not resell as a managed service.
2026-04-14 09:18:18 +00:00

123 lines
3.8 KiB
Python

"""Tests for the scanner HTTP service."""
from unittest.mock import AsyncMock, patch
import pytest
from fastapi.testclient import TestClient
from src.worker import create_app
@pytest.fixture
def client():
"""Create a test client for the scanner app."""
app = create_app()
return TestClient(app)
def test_health_endpoint(client):
"""Health endpoint returns ok."""
resp = client.get("/health")
assert resp.status_code == 200
assert resp.json() == {"status": "ok"}
@patch("src.sitemap.discover_urls", new_callable=AsyncMock)
@patch("src.crawler.CookieCrawler.crawl_site", new_callable=AsyncMock)
def test_scan_endpoint_with_domain(mock_crawl, mock_discover, client):
"""POST /scan with just a domain discovers URLs and crawls."""
from src.crawler import CrawlResult, DiscoveredCookie, SiteCrawlResult
mock_discover.return_value = ["https://example.com/"]
mock_crawl.return_value = SiteCrawlResult(
domain="example.com",
pages=[
CrawlResult(
url="https://example.com/",
cookies=[
DiscoveredCookie(
name="_ga",
domain=".example.com",
storage_type="cookie",
page_url="https://example.com/",
value_length=30,
),
DiscoveredCookie(
name="session_id",
domain="example.com",
storage_type="cookie",
page_url="https://example.com/",
value_length=36,
http_only=True,
secure=True,
),
],
),
],
total_cookies_found=2,
)
resp = client.post("/scan", json={"domain": "example.com", "max_pages": 5})
assert resp.status_code == 200
data = resp.json()
assert data["domain"] == "example.com"
assert data["pages_crawled"] == 1
assert data["total_cookies"] == 2
assert len(data["cookies"]) == 2
assert data["cookies"][0]["name"] == "_ga"
assert data["cookies"][1]["name"] == "session_id"
assert data["cookies"][1]["secure"] is True
@patch("src.crawler.CookieCrawler.crawl_site", new_callable=AsyncMock)
def test_scan_endpoint_with_urls(mock_crawl, client):
"""POST /scan with explicit URLs skips URL discovery."""
from src.crawler import CrawlResult, SiteCrawlResult
mock_crawl.return_value = SiteCrawlResult(
domain="example.com",
pages=[CrawlResult(url="https://example.com/about", cookies=[])],
total_cookies_found=0,
)
resp = client.post(
"/scan",
json={
"domain": "example.com",
"urls": ["https://example.com/about"],
"max_pages": 1,
},
)
assert resp.status_code == 200
data = resp.json()
assert data["pages_crawled"] == 1
assert data["cookies"] == []
@patch("src.sitemap.discover_urls", new_callable=AsyncMock)
@patch("src.crawler.CookieCrawler.crawl_site", new_callable=AsyncMock)
def test_scan_endpoint_with_errors(mock_crawl, mock_discover, client):
"""Scan results include page errors."""
from src.crawler import CrawlResult, SiteCrawlResult
mock_discover.return_value = ["https://example.com/"]
mock_crawl.return_value = SiteCrawlResult(
domain="example.com",
pages=[
CrawlResult(url="https://example.com/", cookies=[], error="Timeout"),
],
total_cookies_found=0,
)
resp = client.post("/scan", json={"domain": "example.com"})
assert resp.status_code == 200
data = resp.json()
assert data["errors"] == ["Timeout"]
def test_scan_request_validation(client):
"""Missing domain returns 422."""
resp = client.post("/scan", json={})
assert resp.status_code == 422