first commit

This commit is contained in:
Matt Kane
2026-04-01 10:44:22 +01:00
commit 43fcb9a131
1789 changed files with 395041 additions and 0 deletions

View File

@@ -0,0 +1,827 @@
/**
* Tests for the main Gutenberg to Portable Text converter
*/
import { describe, it, expect } from "vitest";
import { gutenbergToPortableText, htmlToPortableText, parseGutenbergBlocks } from "../src/index.js";
import type { PortableTextTextBlock, PortableTextImageBlock } from "../src/types.js";
const HTML_TAG_PATTERN = /<[^>]+>/g;
describe("gutenbergToPortableText", () => {
describe("empty content", () => {
it("returns empty array for empty string", () => {
expect(gutenbergToPortableText("")).toEqual([]);
});
it("returns empty array for whitespace", () => {
expect(gutenbergToPortableText(" \n\t ")).toEqual([]);
});
it("returns empty array for null-ish values", () => {
expect(gutenbergToPortableText(null as unknown as string)).toEqual([]);
expect(gutenbergToPortableText(undefined as unknown as string)).toEqual([]);
});
});
describe("paragraph blocks", () => {
it("converts simple paragraph", () => {
const content = `<!-- wp:paragraph -->
<p>Hello world</p>
<!-- /wp:paragraph -->`;
const result = gutenbergToPortableText(content);
expect(result).toHaveLength(1);
expect(result[0]).toMatchObject({
_type: "block",
style: "normal",
});
const block = result[0] as PortableTextTextBlock;
expect(block.children[0]?.text).toBe("Hello world");
});
it("converts paragraph with inline formatting", () => {
const content = `<!-- wp:paragraph -->
<p>Hello <strong>bold</strong> and <em>italic</em> world</p>
<!-- /wp:paragraph -->`;
const result = gutenbergToPortableText(content);
const block = result[0] as PortableTextTextBlock;
expect(block.children.length).toBeGreaterThan(1);
const boldSpan = block.children.find((c) => c.marks?.includes("strong"));
const italicSpan = block.children.find((c) => c.marks?.includes("em"));
expect(boldSpan?.text).toBe("bold");
expect(italicSpan?.text).toBe("italic");
});
it("converts paragraph with link", () => {
const content = `<!-- wp:paragraph -->
<p>Visit <a href="https://example.com">our site</a></p>
<!-- /wp:paragraph -->`;
const result = gutenbergToPortableText(content);
const block = result[0] as PortableTextTextBlock;
expect(block.markDefs).toHaveLength(1);
expect(block.markDefs?.[0]).toMatchObject({
_type: "link",
href: "https://example.com",
});
});
it("skips empty paragraphs", () => {
const content = `<!-- wp:paragraph -->
<p></p>
<!-- /wp:paragraph -->`;
const result = gutenbergToPortableText(content);
expect(result).toHaveLength(0);
});
it("handles multiple paragraphs", () => {
const content = `<!-- wp:paragraph -->
<p>First paragraph</p>
<!-- /wp:paragraph -->
<!-- wp:paragraph -->
<p>Second paragraph</p>
<!-- /wp:paragraph -->`;
const result = gutenbergToPortableText(content);
expect(result).toHaveLength(2);
});
});
describe("heading blocks", () => {
it("converts h1", () => {
const content = `<!-- wp:heading {"level":1} -->
<h1>Main Title</h1>
<!-- /wp:heading -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "block",
style: "h1",
});
});
it("converts h2 (default level)", () => {
const content = `<!-- wp:heading -->
<h2>Subtitle</h2>
<!-- /wp:heading -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "block",
style: "h2",
});
});
it("converts h3-h6", () => {
for (let level = 3; level <= 6; level++) {
const content = `<!-- wp:heading {"level":${level}} -->
<h${level}>Heading ${level}</h${level}>
<!-- /wp:heading -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "block",
style: `h${level}`,
});
}
});
it("preserves formatting in headings", () => {
const content = `<!-- wp:heading {"level":2} -->
<h2>Title with <strong>bold</strong></h2>
<!-- /wp:heading -->`;
const result = gutenbergToPortableText(content);
const block = result[0] as PortableTextTextBlock;
const boldSpan = block.children.find((c) => c.marks?.includes("strong"));
expect(boldSpan?.text).toBe("bold");
});
});
describe("list blocks", () => {
it("converts unordered list", () => {
const content = `<!-- wp:list -->
<ul>
<li>Item one</li>
<li>Item two</li>
<li>Item three</li>
</ul>
<!-- /wp:list -->`;
const result = gutenbergToPortableText(content);
expect(result).toHaveLength(3);
result.forEach((block) => {
expect(block).toMatchObject({
_type: "block",
listItem: "bullet",
level: 1,
});
});
});
it("converts ordered list", () => {
const content = `<!-- wp:list {"ordered":true} -->
<ol>
<li>First</li>
<li>Second</li>
</ol>
<!-- /wp:list -->`;
const result = gutenbergToPortableText(content);
expect(result).toHaveLength(2);
result.forEach((block) => {
expect(block).toMatchObject({
_type: "block",
listItem: "number",
level: 1,
});
});
});
it("preserves formatting in list items", () => {
const content = `<!-- wp:list -->
<ul>
<li>Item with <strong>bold</strong></li>
</ul>
<!-- /wp:list -->`;
const result = gutenbergToPortableText(content);
const block = result[0] as PortableTextTextBlock;
const boldSpan = block.children.find((c) => c.marks?.includes("strong"));
expect(boldSpan?.text).toBe("bold");
});
it("handles nested lists", () => {
const content = `<!-- wp:list -->
<ul>
<li>Parent item
<ul>
<li>Nested item</li>
</ul>
</li>
</ul>
<!-- /wp:list -->`;
const result = gutenbergToPortableText(content);
const level1 = result.filter((b) => (b as PortableTextTextBlock).level === 1);
const level2 = result.filter((b) => (b as PortableTextTextBlock).level === 2);
expect(level1.length).toBeGreaterThanOrEqual(1);
expect(level2.length).toBeGreaterThanOrEqual(1);
});
});
describe("quote blocks", () => {
it("converts simple quote", () => {
const content = `<!-- wp:quote -->
<blockquote class="wp-block-quote"><p>To be or not to be</p></blockquote>
<!-- /wp:quote -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "block",
style: "blockquote",
});
});
it("handles quote with citation", () => {
const content = `<!-- wp:quote {"citation":"Shakespeare"} -->
<blockquote class="wp-block-quote"><p>To be or not to be</p></blockquote>
<!-- /wp:quote -->`;
const result = gutenbergToPortableText(content);
// Should have quote block + citation block
expect(result.length).toBeGreaterThanOrEqual(1);
expect(result[0]).toMatchObject({
_type: "block",
style: "blockquote",
});
});
it("handles multi-paragraph quote", () => {
const content = `<!-- wp:quote -->
<blockquote class="wp-block-quote">
<p>First paragraph of quote</p>
<p>Second paragraph of quote</p>
</blockquote>
<!-- /wp:quote -->`;
const result = gutenbergToPortableText(content);
const quoteBlocks = result.filter((b) => (b as PortableTextTextBlock).style === "blockquote");
expect(quoteBlocks).toHaveLength(2);
});
});
describe("image blocks", () => {
it("converts image with URL in attrs", () => {
const content = `<!-- wp:image {"id":123,"sizeSlug":"large","url":"https://example.com/photo.jpg"} -->
<figure class="wp-block-image size-large"><img src="https://example.com/photo.jpg" alt="A photo" class="wp-image-123"/></figure>
<!-- /wp:image -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "image",
alt: "A photo",
});
const img = result[0] as PortableTextImageBlock;
expect(img.asset.url).toBe("https://example.com/photo.jpg");
});
it("extracts image from HTML when not in attrs", () => {
const content = `<!-- wp:image {"id":123} -->
<figure class="wp-block-image"><img src="https://example.com/photo.jpg" alt="Photo"/></figure>
<!-- /wp:image -->`;
const result = gutenbergToPortableText(content);
const img = result[0] as PortableTextImageBlock;
expect(img.asset.url).toBe("https://example.com/photo.jpg");
expect(img.alt).toBe("Photo");
});
it("extracts caption from figcaption", () => {
const content = `<!-- wp:image {"id":123} -->
<figure class="wp-block-image"><img src="photo.jpg"/><figcaption>My caption</figcaption></figure>
<!-- /wp:image -->`;
const result = gutenbergToPortableText(content);
const img = result[0] as PortableTextImageBlock;
expect(img.caption).toBe("My caption");
});
it("uses media map when provided", () => {
const content = `<!-- wp:image {"id":123} -->
<figure><img src="photo.jpg"/></figure>
<!-- /wp:image -->`;
const mediaMap = new Map([[123, "emdash-media-abc"]]);
const result = gutenbergToPortableText(content, { mediaMap });
const img = result[0] as PortableTextImageBlock;
expect(img.asset._ref).toBe("emdash-media-abc");
});
it("handles alignment", () => {
const content = `<!-- wp:image {"id":123,"align":"center"} -->
<figure class="wp-block-image aligncenter"><img src="photo.jpg"/></figure>
<!-- /wp:image -->`;
const result = gutenbergToPortableText(content);
const img = result[0] as PortableTextImageBlock;
expect(img.alignment).toBe("center");
});
});
describe("code blocks", () => {
it("converts code block", () => {
const content = `<!-- wp:code -->
<pre class="wp-block-code"><code>const x = 1;</code></pre>
<!-- /wp:code -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "code",
code: "const x = 1;",
});
});
it("preserves language attribute", () => {
const content = `<!-- wp:code {"language":"javascript"} -->
<pre class="wp-block-code"><code>const x = 1;</code></pre>
<!-- /wp:code -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "code",
language: "javascript",
});
});
it("decodes HTML entities in code", () => {
const content = `<!-- wp:code -->
<pre class="wp-block-code"><code>&lt;div&gt;Hello&lt;/div&gt;</code></pre>
<!-- /wp:code -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "code",
code: "<div>Hello</div>",
});
});
it("handles multiline code", () => {
const content = `<!-- wp:code -->
<pre class="wp-block-code"><code>function hello() {
return "world";
}</code></pre>
<!-- /wp:code -->`;
const result = gutenbergToPortableText(content);
expect((result[0] as { code: string }).code).toContain("\n");
});
});
describe("embed blocks", () => {
it("converts YouTube embed", () => {
const content = `<!-- wp:embed {"url":"https://www.youtube.com/watch?v=abc123","type":"video","providerNameSlug":"youtube"} -->
<figure class="wp-block-embed is-type-video is-provider-youtube">
<div class="wp-block-embed__wrapper">
https://www.youtube.com/watch?v=abc123
</div>
</figure>
<!-- /wp:embed -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "embed",
url: "https://www.youtube.com/watch?v=abc123",
provider: "youtube",
});
});
it("converts Twitter embed", () => {
const content = `<!-- wp:embed {"url":"https://twitter.com/user/status/123","type":"rich","providerNameSlug":"twitter"} -->
<figure class="wp-block-embed is-provider-twitter">
<div class="wp-block-embed__wrapper">
https://twitter.com/user/status/123
</div>
</figure>
<!-- /wp:embed -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "embed",
provider: "twitter",
});
});
it("detects provider from URL when not specified", () => {
const content = `<!-- wp:embed {"url":"https://vimeo.com/123456"} -->
<figure class="wp-block-embed">
<div class="wp-block-embed__wrapper">
https://vimeo.com/123456
</div>
</figure>
<!-- /wp:embed -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "embed",
provider: "vimeo",
});
});
});
describe("separator/spacer blocks", () => {
it("converts separator to break", () => {
const content = `<!-- wp:separator -->
<hr class="wp-block-separator"/>
<!-- /wp:separator -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "break",
style: "lineBreak",
});
});
it("converts spacer to break", () => {
const content = `<!-- wp:spacer {"height":"50px"} -->
<div style="height:50px" aria-hidden="true" class="wp-block-spacer"></div>
<!-- /wp:spacer -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "break",
});
});
});
describe("columns blocks", () => {
it("converts columns with content", () => {
const content = `<!-- wp:columns -->
<div class="wp-block-columns">
<!-- wp:column -->
<div class="wp-block-column">
<!-- wp:paragraph -->
<p>Column 1</p>
<!-- /wp:paragraph -->
</div>
<!-- /wp:column -->
<!-- wp:column -->
<div class="wp-block-column">
<!-- wp:paragraph -->
<p>Column 2</p>
<!-- /wp:paragraph -->
</div>
<!-- /wp:column -->
</div>
<!-- /wp:columns -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "columns",
});
const cols = result[0] as { columns: Array<{ content: unknown[] }> };
expect(cols.columns).toHaveLength(2);
expect(cols.columns[0]?.content.length).toBeGreaterThan(0);
});
});
describe("group blocks", () => {
it("flattens group block content", () => {
const content = `<!-- wp:group -->
<div class="wp-block-group">
<!-- wp:paragraph -->
<p>Paragraph in group</p>
<!-- /wp:paragraph -->
<!-- wp:heading -->
<h2>Heading in group</h2>
<!-- /wp:heading -->
</div>
<!-- /wp:group -->`;
const result = gutenbergToPortableText(content);
// Group should be flattened - we get the inner blocks directly
expect(result.some((b) => (b as PortableTextTextBlock).style === "normal")).toBe(true);
expect(result.some((b) => (b as PortableTextTextBlock).style === "h2")).toBe(true);
});
});
describe("unknown blocks", () => {
it("creates htmlBlock fallback for unknown blocks", () => {
const content = `<!-- wp:my-plugin/custom-block {"foo":"bar"} -->
<div class="custom-block">Custom content</div>
<!-- /wp:my-plugin/custom-block -->`;
const result = gutenbergToPortableText(content);
expect(result[0]).toMatchObject({
_type: "htmlBlock",
originalBlockName: "my-plugin/custom-block",
});
expect((result[0] as { html: string }).html).toContain("Custom content");
});
it("preserves original attrs in fallback", () => {
const content = `<!-- wp:unknown/block {"setting":true,"count":5} -->
<div>Content</div>
<!-- /wp:unknown/block -->`;
const result = gutenbergToPortableText(content);
expect((result[0] as { originalAttrs: Record<string, unknown> }).originalAttrs).toMatchObject(
{
setting: true,
count: 5,
},
);
});
});
describe("custom transformers", () => {
it("uses custom transformer when provided", () => {
const content = `<!-- wp:my-plugin/testimonial {"rating":5} -->
<div class="testimonial">Great product!</div>
<!-- /wp:my-plugin/testimonial -->`;
const result = gutenbergToPortableText(content, {
customTransformers: {
"my-plugin/testimonial": (block, _opts, ctx) => [
{
_type: "testimonial" as const,
_key: ctx.generateKey(),
text: block.innerHTML.replace(HTML_TAG_PATTERN, "").trim(),
rating: block.attrs.rating as number,
} as unknown as import("../src/types.js").PortableTextBlock,
],
},
});
expect(result[0]).toMatchObject({
_type: "testimonial",
text: "Great product!",
rating: 5,
});
});
});
describe("mixed content", () => {
it("handles complex document with multiple block types", () => {
const content = `<!-- wp:heading {"level":1} -->
<h1>Welcome</h1>
<!-- /wp:heading -->
<!-- wp:paragraph -->
<p>This is the <strong>introduction</strong>.</p>
<!-- /wp:paragraph -->
<!-- wp:image {"id":1} -->
<figure><img src="hero.jpg" alt="Hero"/></figure>
<!-- /wp:image -->
<!-- wp:list -->
<ul>
<li>Feature one</li>
<li>Feature two</li>
</ul>
<!-- /wp:list -->
<!-- wp:quote -->
<blockquote><p>A quote</p></blockquote>
<!-- /wp:quote -->`;
const result = gutenbergToPortableText(content);
// h1 + p + image + 2 list items + quote = 6 blocks
expect(result.length).toBeGreaterThanOrEqual(5);
const types = result.map((b) => b._type);
expect(types).toContain("block");
expect(types).toContain("image");
});
});
});
describe("htmlToPortableText", () => {
it("converts simple HTML paragraphs", () => {
const html = "<p>Hello world</p>";
const result = htmlToPortableText(html);
expect(result).toHaveLength(1);
expect(result[0]).toMatchObject({
_type: "block",
style: "normal",
});
});
it("converts headings", () => {
const html = "<h1>Title</h1><h2>Subtitle</h2>";
const result = htmlToPortableText(html);
expect(result).toHaveLength(2);
expect(result[0]).toMatchObject({ style: "h1" });
expect(result[1]).toMatchObject({ style: "h2" });
});
it("converts lists", () => {
const html = "<ul><li>One</li><li>Two</li></ul>";
const result = htmlToPortableText(html);
expect(result).toHaveLength(2);
result.forEach((b) => {
expect(b).toMatchObject({ listItem: "bullet" });
});
});
it("converts blockquotes", () => {
const html = "<blockquote>A quote</blockquote>";
const result = htmlToPortableText(html);
expect(result[0]).toMatchObject({
_type: "block",
style: "blockquote",
});
});
it("converts code blocks", () => {
const html = "<pre><code>const x = 1;</code></pre>";
const result = htmlToPortableText(html);
expect(result[0]).toMatchObject({
_type: "code",
code: "const x = 1;",
});
});
it("converts horizontal rules", () => {
const html = "<p>Before</p><hr><p>After</p>";
const result = htmlToPortableText(html);
const breakBlock = result.find((b) => b._type === "break");
expect(breakBlock).toBeDefined();
});
it("handles inline formatting", () => {
const html = "<p>Hello <strong>bold</strong> and <em>italic</em></p>";
const result = htmlToPortableText(html);
const block = result[0] as PortableTextTextBlock;
expect(block.children.some((c) => c.marks?.includes("strong"))).toBe(true);
expect(block.children.some((c) => c.marks?.includes("em"))).toBe(true);
});
});
describe("WordPress.com classic editor content", () => {
// Test case from sparge.wordpress.com - classic editor with linked images
// and HTML entities in URLs (&#038; instead of &)
const spargePostContent = `<p><a href="https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg"><img data-attachment-id="238" data-permalink="https://sparge.wordpress.com/2011/11/27/now-brewing-hip-hop-nelson-sauvin/hip-hop/" data-orig-file="https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg" data-orig-size="384,560" data-comments-opened="1" data-image-meta="{&quot;aperture&quot;:&quot;0&quot;,&quot;credit&quot;:&quot;&quot;,&quot;camera&quot;:&quot;&quot;,&quot;caption&quot;:&quot;&quot;,&quot;created_timestamp&quot;:&quot;0&quot;,&quot;copyright&quot;:&quot;&quot;,&quot;focal_length&quot;:&quot;0&quot;,&quot;iso&quot;:&quot;0&quot;,&quot;shutter_speed&quot;:&quot;0&quot;,&quot;title&quot;:&quot;&quot;}" data-image-title="hip-hop" data-image-description="" data-image-caption="" data-medium-file="https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg?w=205" data-large-file="https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg?w=384" class="alignright size-medium wp-image-238" title="hip-hop" src="https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg?w=205&#038;h=300" alt="" width="205" height="300" /></a>Hip Hops Nelson Sauvin is the first of my Christmas brews.</p>
<p>It's inspired by <a href="http://www.brewdog.com/product/77-lager" target="_blank" rel="noopener">BrewDog 77</a>, which is a classic lager dry-hopped with Nelson Sauvin. OK, so Hip Hops is 6.3% rather than 4.9%, and uses ordinary Saaz in the boil, but the essence is the same: it's a delicious, crisp German-style lager, given a New Zealand accent with a big hit of Nelson Sauvin.</p>`;
it("extracts linked images with decoded URLs", () => {
const result = htmlToPortableText(spargePostContent);
// Should have at least one image block
const imageBlocks = result.filter((b) => b._type === "image");
expect(imageBlocks.length).toBeGreaterThanOrEqual(1);
// First block should be the image
const img = imageBlocks[0];
expect(img._type).toBe("image");
// URL should have decoded HTML entities (& not &#038;)
expect(img.asset.url).toBe(
"https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg?w=205&h=300",
);
expect(img.asset.url).not.toContain("&#038;");
// Link should be preserved
expect(img.link).toBe("https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg");
});
it("preserves text content alongside images", () => {
const result = htmlToPortableText(spargePostContent);
// Should have text blocks with the paragraph content
const textBlocks = result.filter((b) => b._type === "block");
expect(textBlocks.length).toBeGreaterThanOrEqual(1);
// Check that text content is preserved
const allText = textBlocks.flatMap((b) => b.children.map((c) => c.text)).join("");
expect(allText).toContain("Hip Hops Nelson Sauvin");
expect(allText).toContain("Christmas brews");
});
it("preserves links in text", () => {
const result = htmlToPortableText(spargePostContent);
// Should have text blocks with links
const textBlocks = result.filter((b) => b._type === "block");
// Find block with BrewDog link
const blockWithLink = textBlocks.find((b) => b.markDefs?.length);
expect(blockWithLink).toBeDefined();
expect(blockWithLink?.markDefs).toContainEqual(
expect.objectContaining({
_type: "link",
href: "http://www.brewdog.com/product/77-lager",
}),
);
});
it("decodes HTML entities in standalone image src", () => {
const html = `<img src="https://example.com/photo.jpg?w=200&#038;h=300" alt="test">`;
const result = htmlToPortableText(html);
expect(result).toHaveLength(1);
const img = result[0] as PortableTextImageBlock;
expect(img._type).toBe("image");
expect(img.asset.url).toBe("https://example.com/photo.jpg?w=200&h=300");
});
it("decodes &#38; variant in URLs", () => {
const html = `<p><img src="https://example.com/photo.jpg?a=1&#38;b=2" alt="test"></p>`;
const result = htmlToPortableText(html);
const img = result.find((b) => b._type === "image") as PortableTextImageBlock;
expect(img.asset.url).toBe("https://example.com/photo.jpg?a=1&b=2");
});
it("decodes &amp; in URLs", () => {
const html = `<p><img src="https://example.com/photo.jpg?a=1&amp;b=2" alt="test"></p>`;
const result = htmlToPortableText(html);
const img = result.find((b) => b._type === "image") as PortableTextImageBlock;
expect(img.asset.url).toBe("https://example.com/photo.jpg?a=1&b=2");
});
// Test for figure with HTML entities
it("decodes HTML entities in figure images", () => {
const html = `<figure><img src="https://example.com/photo.jpg?w=200&#038;h=300" alt="test"><figcaption>Caption</figcaption></figure>`;
const result = htmlToPortableText(html);
const img = result[0] as PortableTextImageBlock;
expect(img._type).toBe("image");
expect(img.asset.url).toBe("https://example.com/photo.jpg?w=200&h=300");
expect(img.caption).toBe("Caption");
});
});
describe("parseGutenbergBlocks", () => {
it("parses blocks without converting", () => {
const content = `<!-- wp:paragraph -->
<p>Hello</p>
<!-- /wp:paragraph -->`;
const blocks = parseGutenbergBlocks(content);
expect(blocks).toHaveLength(1);
expect(blocks[0]?.blockName).toBe("core/paragraph");
expect(blocks[0]?.innerHTML).toContain("Hello");
});
it("returns empty array for empty content", () => {
expect(parseGutenbergBlocks("")).toEqual([]);
});
it("preserves block attributes", () => {
const content = `<!-- wp:heading {"level":3,"align":"center"} -->
<h3>Title</h3>
<!-- /wp:heading -->`;
const blocks = parseGutenbergBlocks(content);
expect(blocks[0]?.attrs).toMatchObject({
level: 3,
align: "center",
});
});
it("handles nested blocks", () => {
const content = `<!-- wp:columns -->
<div>
<!-- wp:column -->
<div>
<!-- wp:paragraph -->
<p>Nested</p>
<!-- /wp:paragraph -->
</div>
<!-- /wp:column -->
</div>
<!-- /wp:columns -->`;
const blocks = parseGutenbergBlocks(content);
expect(blocks[0]?.blockName).toBe("core/columns");
expect(blocks[0]?.innerBlocks.length).toBeGreaterThan(0);
});
});

View File

@@ -0,0 +1,379 @@
/**
* Tests for inline HTML parsing
*/
import { describe, it, expect } from "vitest";
import {
parseInlineContent,
extractText,
extractAlt,
extractCaption,
extractSrc,
} from "../src/inline.js";
let keyCounter = 0;
const generateKey = () => `key-${++keyCounter}`;
const NEWLINE_PATTERN = /\n/g;
describe("parseInlineContent", () => {
describe("plain text", () => {
it("parses plain text", () => {
const result = parseInlineContent("Hello world", generateKey);
expect(result.children).toHaveLength(1);
expect(result.children[0]).toMatchObject({
_type: "span",
text: "Hello world",
});
expect(result.markDefs).toHaveLength(0);
});
it("handles empty string", () => {
const result = parseInlineContent("", generateKey);
expect(result.children).toHaveLength(1);
expect(result.children[0]).toMatchObject({
_type: "span",
text: "",
});
});
it("handles whitespace-only string", () => {
const result = parseInlineContent(" ", generateKey);
expect(result.children).toHaveLength(1);
expect(result.children[0]?.text).toBe(" ");
});
it("preserves newlines in text", () => {
const result = parseInlineContent("line1\nline2", generateKey);
// Should have one span with newline appended, then another span
expect(result.children.length).toBeGreaterThanOrEqual(1);
const fullText = result.children.map((c) => c.text).join("");
expect(fullText).toContain("line1");
expect(fullText).toContain("line2");
});
});
describe("basic formatting", () => {
it("parses <strong> tags", () => {
const result = parseInlineContent("Hello <strong>bold</strong> world", generateKey);
expect(result.children).toHaveLength(3);
expect(result.children[0]).toMatchObject({ text: "Hello " });
expect(result.children[1]).toMatchObject({
text: "bold",
marks: ["strong"],
});
expect(result.children[2]).toMatchObject({ text: " world" });
});
it("parses <b> tags as strong", () => {
const result = parseInlineContent("Hello <b>bold</b> world", generateKey);
expect(result.children[1]).toMatchObject({
text: "bold",
marks: ["strong"],
});
});
it("parses <em> tags", () => {
const result = parseInlineContent("Hello <em>italic</em> world", generateKey);
expect(result.children[1]).toMatchObject({
text: "italic",
marks: ["em"],
});
});
it("parses <i> tags as em", () => {
const result = parseInlineContent("Hello <i>italic</i> world", generateKey);
expect(result.children[1]).toMatchObject({
text: "italic",
marks: ["em"],
});
});
it("parses <u> tags", () => {
const result = parseInlineContent("Hello <u>underline</u> world", generateKey);
expect(result.children[1]).toMatchObject({
text: "underline",
marks: ["underline"],
});
});
it("parses <s> tags as strike-through", () => {
const result = parseInlineContent("Hello <s>strikethrough</s> world", generateKey);
expect(result.children[1]).toMatchObject({
text: "strikethrough",
marks: ["strike-through"],
});
});
it("parses <del> tags as strike-through", () => {
const result = parseInlineContent("Hello <del>deleted</del> world", generateKey);
expect(result.children[1]).toMatchObject({
text: "deleted",
marks: ["strike-through"],
});
});
it("parses <code> tags", () => {
const result = parseInlineContent("Use <code>const x = 1</code> for variables", generateKey);
expect(result.children[1]).toMatchObject({
text: "const x = 1",
marks: ["code"],
});
});
it("parses <sup> tags", () => {
const result = parseInlineContent("x<sup>2</sup>", generateKey);
expect(result.children[1]).toMatchObject({
text: "2",
marks: ["superscript"],
});
});
it("parses <sub> tags", () => {
const result = parseInlineContent("H<sub>2</sub>O", generateKey);
expect(result.children[1]).toMatchObject({
text: "2",
marks: ["subscript"],
});
});
});
describe("nested formatting", () => {
it("handles nested strong and em", () => {
const result = parseInlineContent("<strong><em>bold italic</em></strong>", generateKey);
expect(result.children).toHaveLength(1);
expect(result.children[0]).toMatchObject({
text: "bold italic",
marks: expect.arrayContaining(["strong", "em"]),
});
});
it("handles deeply nested marks", () => {
const result = parseInlineContent("<strong><em><code>code</code></em></strong>", generateKey);
expect(result.children[0]?.marks).toContain("strong");
expect(result.children[0]?.marks).toContain("em");
expect(result.children[0]?.marks).toContain("code");
});
it("handles mixed content with nested marks", () => {
const result = parseInlineContent(
"Start <strong>bold <em>bold-italic</em> bold</strong> end",
generateKey,
);
expect(result.children.length).toBeGreaterThanOrEqual(4);
// Find the bold-italic span
const boldItalic = result.children.find(
(c) => c.marks?.includes("strong") && c.marks?.includes("em"),
);
expect(boldItalic?.text).toBe("bold-italic");
});
});
describe("links", () => {
it("parses simple links", () => {
const result = parseInlineContent(
'Visit <a href="https://example.com">our site</a>',
generateKey,
);
expect(result.markDefs).toHaveLength(1);
expect(result.markDefs[0]).toMatchObject({
_type: "link",
href: "https://example.com",
});
const linkSpan = result.children.find((c) =>
c.marks?.includes(result.markDefs[0]?._key ?? ""),
);
expect(linkSpan?.text).toBe("our site");
});
it("handles links with target=_blank", () => {
const result = parseInlineContent(
'<a href="https://example.com" target="_blank">link</a>',
generateKey,
);
expect(result.markDefs[0]).toMatchObject({
_type: "link",
href: "https://example.com",
blank: true,
});
});
it("deduplicates identical links", () => {
const result = parseInlineContent(
'<a href="https://example.com">link1</a> and <a href="https://example.com">link2</a>',
generateKey,
);
expect(result.markDefs).toHaveLength(1);
const linkKey = result.markDefs[0]?._key;
const linkSpans = result.children.filter((c) => c.marks?.includes(linkKey ?? ""));
expect(linkSpans).toHaveLength(2);
});
it("creates separate markDefs for different links", () => {
const result = parseInlineContent(
'<a href="https://a.com">link1</a> and <a href="https://b.com">link2</a>',
generateKey,
);
expect(result.markDefs).toHaveLength(2);
expect(result.markDefs.map((m) => m.href)).toContain("https://a.com");
expect(result.markDefs.map((m) => m.href)).toContain("https://b.com");
});
it("handles links with formatting inside", () => {
const result = parseInlineContent(
'<a href="https://example.com"><strong>bold link</strong></a>',
generateKey,
);
const span = result.children.find((c) => c.text === "bold link");
expect(span?.marks).toContain("strong");
expect(span?.marks?.length).toBe(2); // strong + link key
});
});
describe("line breaks", () => {
it("handles <br> tags", () => {
const result = parseInlineContent("line1<br>line2", generateKey);
const fullText = result.children.map((c) => c.text).join("");
expect(fullText).toContain("line1");
expect(fullText).toContain("\n");
expect(fullText).toContain("line2");
});
it("handles self-closing <br /> tags", () => {
const result = parseInlineContent("line1<br />line2", generateKey);
const fullText = result.children.map((c) => c.text).join("");
expect(fullText).toContain("\n");
});
it("handles multiple consecutive <br> tags", () => {
const result = parseInlineContent("a<br><br>b", generateKey);
const fullText = result.children.map((c) => c.text).join("");
expect(fullText.match(NEWLINE_PATTERN)?.length).toBeGreaterThanOrEqual(2);
});
});
describe("block wrapper stripping", () => {
it("strips <p> wrapper", () => {
const result = parseInlineContent("<p>content</p>", generateKey);
expect(result.children).toHaveLength(1);
expect(result.children[0]?.text).toBe("content");
});
it("strips heading wrappers", () => {
const result = parseInlineContent("<h2>heading</h2>", generateKey);
expect(result.children[0]?.text).toBe("heading");
});
it("strips <li> wrapper", () => {
const result = parseInlineContent("<li>list item</li>", generateKey);
expect(result.children[0]?.text).toBe("list item");
});
it("preserves content when wrapper has attributes", () => {
const result = parseInlineContent('<p class="intro">content</p>', generateKey);
expect(result.children[0]?.text).toBe("content");
});
});
});
describe("extractText", () => {
it("extracts plain text", () => {
expect(extractText("Hello world")).toBe("Hello world");
});
it("strips HTML tags", () => {
expect(extractText("<p>Hello <strong>world</strong></p>")).toBe("Hello world");
});
it("handles nested elements", () => {
expect(extractText("<div><p>Nested <em>text</em></p></div>")).toBe("Nested text");
});
it("handles empty string", () => {
expect(extractText("")).toBe("");
});
});
describe("extractAlt", () => {
it("extracts alt from img tag", () => {
expect(extractAlt('<img src="photo.jpg" alt="A photo">')).toBe("A photo");
});
it("handles missing alt", () => {
expect(extractAlt('<img src="photo.jpg">')).toBeUndefined();
});
it("handles empty alt", () => {
expect(extractAlt('<img src="photo.jpg" alt="">')).toBe("");
});
it("handles single quotes", () => {
expect(extractAlt("<img src='photo.jpg' alt='A photo'>")).toBe("A photo");
});
});
describe("extractCaption", () => {
it("extracts caption from figcaption", () => {
expect(extractCaption("<figure><img><figcaption>My caption</figcaption></figure>")).toBe(
"My caption",
);
});
it("strips HTML from caption", () => {
expect(
extractCaption("<figure><figcaption>Caption with <em>formatting</em></figcaption></figure>"),
).toBe("Caption with formatting");
});
it("handles missing figcaption", () => {
expect(extractCaption("<figure><img></figure>")).toBeUndefined();
});
});
describe("extractSrc", () => {
it("extracts src from img tag", () => {
expect(extractSrc('<img src="https://example.com/photo.jpg">')).toBe(
"https://example.com/photo.jpg",
);
});
it("handles relative URLs", () => {
expect(extractSrc('<img src="/uploads/photo.jpg">')).toBe("/uploads/photo.jpg");
});
it("handles missing src", () => {
expect(extractSrc("<img alt='no source'>")).toBeUndefined();
});
});