first commit
This commit is contained in:
53
packages/gutenberg-to-portable-text/package.json
Normal file
53
packages/gutenberg-to-portable-text/package.json
Normal file
@@ -0,0 +1,53 @@
|
||||
{
|
||||
"name": "@emdashcms/gutenberg-to-portable-text",
|
||||
"version": "0.0.0",
|
||||
"description": "Convert WordPress Gutenberg blocks to Portable Text",
|
||||
"type": "module",
|
||||
"main": "dist/index.mjs",
|
||||
"types": "dist/index.d.mts",
|
||||
"exports": {
|
||||
".": {
|
||||
"types": "./dist/index.d.mts",
|
||||
"default": "./dist/index.mjs"
|
||||
}
|
||||
},
|
||||
"files": [
|
||||
"dist"
|
||||
],
|
||||
"scripts": {
|
||||
"build": "tsdown src/index.ts --format esm --dts --clean",
|
||||
"dev": "tsdown src/index.ts --format esm --dts --watch",
|
||||
"test": "vitest",
|
||||
"prepublishOnly": "node --run build",
|
||||
"check": "publint && attw --pack --ignore-rules=cjs-resolves-to-esm",
|
||||
"typecheck": "tsgo --noEmit"
|
||||
},
|
||||
"dependencies": {
|
||||
"@wordpress/block-serialization-default-parser": "^5.13.0",
|
||||
"parse5": "^7.2.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@arethetypeswrong/cli": "catalog:",
|
||||
"publint": "catalog:",
|
||||
"tsdown": "catalog:",
|
||||
"typescript": "catalog:",
|
||||
"vitest": "catalog:"
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "git+https://github.com/cloudflare/emdash.git",
|
||||
"directory": "packages/gutenberg-to-portable-text"
|
||||
},
|
||||
"homepage": "https://github.com/cloudflare/emdash",
|
||||
"keywords": [
|
||||
"wordpress",
|
||||
"gutenberg",
|
||||
"portable-text",
|
||||
"migration",
|
||||
"blocks"
|
||||
],
|
||||
"author": "Matt Kane",
|
||||
"license": "MIT",
|
||||
"peerDependencies": {},
|
||||
"optionalDependencies": {}
|
||||
}
|
||||
467
packages/gutenberg-to-portable-text/src/index.ts
Normal file
467
packages/gutenberg-to-portable-text/src/index.ts
Normal file
@@ -0,0 +1,467 @@
|
||||
/**
|
||||
* Gutenberg to Portable Text Converter
|
||||
*
|
||||
* Converts WordPress Gutenberg block content to Portable Text format.
|
||||
* Uses @wordpress/block-serialization-default-parser to parse the hybrid
|
||||
* HTML+JSON format that WordPress uses.
|
||||
*/
|
||||
|
||||
import { parse } from "@wordpress/block-serialization-default-parser";
|
||||
|
||||
import { parseInlineContent } from "./inline.js";
|
||||
import { getTransformer, defaultTransformers, fallbackTransformer } from "./transformers/index.js";
|
||||
import type {
|
||||
GutenbergBlock,
|
||||
PortableTextBlock,
|
||||
ConvertOptions,
|
||||
TransformContext,
|
||||
} from "./types.js";
|
||||
|
||||
// Regex patterns for HTML parsing and conversion
|
||||
const BLOCK_ELEMENT_PATTERN =
|
||||
/<(p|h[1-6]|blockquote|pre|ul|ol|figure|div|hr)[^>]*>([\s\S]*?)<\/\1>|<(hr|br)\s*\/?>|<img\s+[^>]+\/?>/gu;
|
||||
const LINKED_IMAGE_PATTERN = /<a\s+[^>]*href=["']([^"']+)["'][^>]*>\s*<img\s+([^>]+)\/?>\s*<\/a>/gu;
|
||||
const STANDALONE_IMAGE_PATTERN = /<img\s+[^>]+\/?>/gu;
|
||||
const IMG_TAG_PATTERN = /<img[^>]+>/i;
|
||||
const SRC_ATTR_PATTERN = /src=["']([^"']+)["']/i;
|
||||
const ALT_ATTR_PATTERN = /alt=["']([^"']*)["']/i;
|
||||
const LIST_ITEM_PATTERN = /<li[^>]*>([\s\S]*?)<\/li>/gu;
|
||||
const CODE_TAG_PATTERN = /<code[^>]*>([\s\S]*?)<\/code>/i;
|
||||
const HTML_TAG_PATTERN = /<[^>]+>/g;
|
||||
const FIGCAPTION_TAG_PATTERN = /<figcaption[^>]*>([\s\S]*?)<\/figcaption>/i;
|
||||
const AMP_ENTITY_PATTERN = /&/g;
|
||||
const LESS_THAN_ENTITY_PATTERN = /</g;
|
||||
const GREATER_THAN_ENTITY_PATTERN = />/g;
|
||||
const QUOTE_ENTITY_PATTERN = /"/g;
|
||||
const APOS_ENTITY_PATTERN = /'/g;
|
||||
const NUMERIC_AMP_ENTITY_PATTERN = /�?38;/g;
|
||||
const HEX_AMP_ENTITY_PATTERN = /&/gi;
|
||||
const NBSP_ENTITY_PATTERN = / /g;
|
||||
|
||||
// Re-export types
|
||||
export type {
|
||||
GutenbergBlock,
|
||||
PortableTextBlock,
|
||||
PortableTextTextBlock,
|
||||
PortableTextImageBlock,
|
||||
PortableTextCodeBlock,
|
||||
PortableTextEmbedBlock,
|
||||
PortableTextGalleryBlock,
|
||||
PortableTextColumnsBlock,
|
||||
PortableTextBreakBlock,
|
||||
PortableTextHtmlBlock,
|
||||
PortableTextButtonBlock,
|
||||
PortableTextButtonsBlock,
|
||||
PortableTextCoverBlock,
|
||||
PortableTextFileBlock,
|
||||
PortableTextPullquoteBlock,
|
||||
PortableTextSpan,
|
||||
PortableTextMarkDef,
|
||||
ConvertOptions,
|
||||
BlockTransformer,
|
||||
TransformContext,
|
||||
} from "./types.js";
|
||||
|
||||
// Re-export transformers for customization
|
||||
export { defaultTransformers, fallbackTransformer } from "./transformers/index.js";
|
||||
export * as coreTransformers from "./transformers/core.js";
|
||||
export * as embedTransformers from "./transformers/embed.js";
|
||||
|
||||
// Re-export inline utilities
|
||||
export {
|
||||
parseInlineContent,
|
||||
extractText,
|
||||
extractAlt,
|
||||
extractCaption,
|
||||
extractSrc,
|
||||
} from "./inline.js";
|
||||
|
||||
/**
|
||||
* Default key generator
|
||||
*/
|
||||
function createKeyGenerator(): () => string {
|
||||
let counter = 0;
|
||||
return () => {
|
||||
counter++;
|
||||
return `key-${counter}-${Math.random().toString(36).substring(2, 7)}`;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize parsed blocks from the WP parser into our GutenbergBlock type.
|
||||
* The WP parser returns `attrs: Record<string, any> | null`, so we normalize
|
||||
* null attrs to empty objects and recursively process innerBlocks.
|
||||
*/
|
||||
function normalizeBlocks(blocks: ReturnType<typeof parse>): GutenbergBlock[] {
|
||||
return blocks.map(
|
||||
(block): GutenbergBlock => ({
|
||||
blockName: block.blockName,
|
||||
attrs: (block.attrs ?? {}) satisfies Record<string, unknown>,
|
||||
innerHTML: block.innerHTML,
|
||||
innerBlocks: normalizeBlocks(block.innerBlocks),
|
||||
innerContent: block.innerContent,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert WordPress Gutenberg content to Portable Text
|
||||
*
|
||||
* @param content - WordPress post content (HTML with Gutenberg block comments)
|
||||
* @param options - Conversion options
|
||||
* @returns Array of Portable Text blocks
|
||||
*
|
||||
* @example
|
||||
* ```ts
|
||||
* const portableText = gutenbergToPortableText(`
|
||||
* <!-- wp:paragraph -->
|
||||
* <p>Hello <strong>world</strong>!</p>
|
||||
* <!-- /wp:paragraph -->
|
||||
* `);
|
||||
* // → [{ _type: "block", style: "normal", children: [...] }]
|
||||
* ```
|
||||
*/
|
||||
export function gutenbergToPortableText(
|
||||
content: string,
|
||||
options: ConvertOptions = {},
|
||||
): PortableTextBlock[] {
|
||||
// Handle empty content
|
||||
if (!content || !content.trim()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Check if content has Gutenberg blocks
|
||||
const hasBlocks = content.includes("<!-- wp:");
|
||||
|
||||
if (!hasBlocks) {
|
||||
// Classic editor content - treat as HTML
|
||||
return htmlToPortableText(content, options);
|
||||
}
|
||||
|
||||
// Parse Gutenberg blocks
|
||||
const blocks = normalizeBlocks(parse(content));
|
||||
|
||||
// Create key generator
|
||||
const generateKey = options.keyGenerator || createKeyGenerator();
|
||||
|
||||
// Create transform context
|
||||
const context = createTransformContext(options, generateKey);
|
||||
|
||||
// Transform blocks
|
||||
return blocks.flatMap((block) => transformBlock(block, options, context));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert plain HTML (classic editor) to Portable Text
|
||||
*/
|
||||
export function htmlToPortableText(
|
||||
html: string,
|
||||
options: ConvertOptions = {},
|
||||
): PortableTextBlock[] {
|
||||
const generateKey = options.keyGenerator || createKeyGenerator();
|
||||
const blocks: PortableTextBlock[] = [];
|
||||
|
||||
// Split on block-level elements (including standalone img tags)
|
||||
let lastIndex = 0;
|
||||
let match;
|
||||
|
||||
while ((match = BLOCK_ELEMENT_PATTERN.exec(html)) !== null) {
|
||||
const fullMatch = match[0];
|
||||
const tag = (match[1] || match[3] || "").toLowerCase();
|
||||
const content = match[2] || "";
|
||||
|
||||
// Handle text between matches
|
||||
const between = html.slice(lastIndex, match.index).trim();
|
||||
if (between) {
|
||||
const { children, markDefs } = parseInlineContent(between, generateKey);
|
||||
if (children.some((c) => c.text.trim())) {
|
||||
blocks.push({
|
||||
_type: "block",
|
||||
_key: generateKey(),
|
||||
style: "normal",
|
||||
children,
|
||||
markDefs: markDefs.length > 0 ? markDefs : undefined,
|
||||
});
|
||||
}
|
||||
}
|
||||
lastIndex = match.index + match[0].length;
|
||||
|
||||
// Check for standalone <img> tag (not wrapped in figure/p)
|
||||
if (fullMatch.toLowerCase().startsWith("<img")) {
|
||||
const srcMatch = fullMatch.match(SRC_ATTR_PATTERN);
|
||||
const altMatch = fullMatch.match(ALT_ATTR_PATTERN);
|
||||
if (srcMatch?.[1]) {
|
||||
const imgUrl = decodeUrlEntities(srcMatch[1]);
|
||||
blocks.push({
|
||||
_type: "image",
|
||||
_key: generateKey(),
|
||||
asset: {
|
||||
_type: "reference",
|
||||
_ref: imgUrl,
|
||||
url: imgUrl,
|
||||
},
|
||||
alt: altMatch?.[1],
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Transform based on tag
|
||||
switch (tag) {
|
||||
case "p":
|
||||
case "div": {
|
||||
// Extract any images first (including those wrapped in <a> tags)
|
||||
// Match: <a...><img...></a> or standalone <img...>
|
||||
// Track positions of linked images so we don't double-process
|
||||
const linkedImgPositions: Array<{ start: number; end: number }> = [];
|
||||
|
||||
// First extract linked images
|
||||
let linkedMatch;
|
||||
while ((linkedMatch = LINKED_IMAGE_PATTERN.exec(content)) !== null) {
|
||||
const linkUrl = decodeUrlEntities(linkedMatch[1]!);
|
||||
const imgAttrs = linkedMatch[2]!;
|
||||
const srcMatch = imgAttrs.match(SRC_ATTR_PATTERN);
|
||||
const altMatch = imgAttrs.match(ALT_ATTR_PATTERN);
|
||||
if (srcMatch?.[1]) {
|
||||
const imgUrl = decodeUrlEntities(srcMatch[1]);
|
||||
blocks.push({
|
||||
_type: "image",
|
||||
_key: generateKey(),
|
||||
asset: {
|
||||
_type: "reference",
|
||||
_ref: imgUrl,
|
||||
url: imgUrl,
|
||||
},
|
||||
alt: altMatch?.[1],
|
||||
link: linkUrl,
|
||||
});
|
||||
}
|
||||
linkedImgPositions.push({
|
||||
start: linkedMatch.index,
|
||||
end: linkedMatch.index + linkedMatch[0].length,
|
||||
});
|
||||
}
|
||||
|
||||
// Then extract standalone images (not inside <a> tags)
|
||||
let imgMatch;
|
||||
while ((imgMatch = STANDALONE_IMAGE_PATTERN.exec(content)) !== null) {
|
||||
// Skip if this image is inside a linked image we already processed
|
||||
const isLinked = linkedImgPositions.some(
|
||||
(pos) => imgMatch!.index >= pos.start && imgMatch!.index < pos.end,
|
||||
);
|
||||
if (isLinked) continue;
|
||||
|
||||
const srcMatch = imgMatch[0].match(SRC_ATTR_PATTERN);
|
||||
const altMatch = imgMatch[0].match(ALT_ATTR_PATTERN);
|
||||
if (srcMatch?.[1]) {
|
||||
const imgUrl = decodeUrlEntities(srcMatch[1]);
|
||||
blocks.push({
|
||||
_type: "image",
|
||||
_key: generateKey(),
|
||||
asset: {
|
||||
_type: "reference",
|
||||
_ref: imgUrl,
|
||||
url: imgUrl,
|
||||
},
|
||||
alt: altMatch?.[1],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Then handle the text content (with images and image links stripped)
|
||||
let textContent = content
|
||||
.replace(LINKED_IMAGE_PATTERN, "") // Remove linked images
|
||||
.replace(STANDALONE_IMAGE_PATTERN, "") // Remove standalone images
|
||||
.trim();
|
||||
if (textContent) {
|
||||
const { children, markDefs } = parseInlineContent(textContent, generateKey);
|
||||
if (children.some((c) => c.text.trim())) {
|
||||
blocks.push({
|
||||
_type: "block",
|
||||
_key: generateKey(),
|
||||
style: "normal",
|
||||
children,
|
||||
markDefs: markDefs.length > 0 ? markDefs : undefined,
|
||||
});
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "h1":
|
||||
case "h2":
|
||||
case "h3":
|
||||
case "h4":
|
||||
case "h5":
|
||||
case "h6": {
|
||||
const { children, markDefs } = parseInlineContent(content, generateKey);
|
||||
blocks.push({
|
||||
_type: "block",
|
||||
_key: generateKey(),
|
||||
style: tag,
|
||||
children,
|
||||
markDefs: markDefs.length > 0 ? markDefs : undefined,
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case "blockquote": {
|
||||
const { children, markDefs } = parseInlineContent(content, generateKey);
|
||||
blocks.push({
|
||||
_type: "block",
|
||||
_key: generateKey(),
|
||||
style: "blockquote",
|
||||
children,
|
||||
markDefs: markDefs.length > 0 ? markDefs : undefined,
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case "pre": {
|
||||
// Extract code content
|
||||
const codeMatch = content.match(CODE_TAG_PATTERN);
|
||||
const code = codeMatch?.[1] || content;
|
||||
blocks.push({
|
||||
_type: "code",
|
||||
_key: generateKey(),
|
||||
code: decodeHtmlEntities(code),
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case "ul":
|
||||
case "ol": {
|
||||
const listItem = tag === "ol" ? "number" : "bullet";
|
||||
let liMatch;
|
||||
while ((liMatch = LIST_ITEM_PATTERN.exec(content)) !== null) {
|
||||
const liContent = liMatch[1] || "";
|
||||
const { children, markDefs } = parseInlineContent(liContent, generateKey);
|
||||
blocks.push({
|
||||
_type: "block",
|
||||
_key: generateKey(),
|
||||
style: "normal",
|
||||
listItem,
|
||||
level: 1,
|
||||
children,
|
||||
markDefs: markDefs.length > 0 ? markDefs : undefined,
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "hr": {
|
||||
blocks.push({
|
||||
_type: "break",
|
||||
_key: generateKey(),
|
||||
style: "lineBreak",
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
case "figure": {
|
||||
// Check for image
|
||||
const imgMatch = content.match(IMG_TAG_PATTERN);
|
||||
if (imgMatch) {
|
||||
const srcMatch = imgMatch[0].match(SRC_ATTR_PATTERN);
|
||||
const altMatch = imgMatch[0].match(ALT_ATTR_PATTERN);
|
||||
const captionMatch = content.match(FIGCAPTION_TAG_PATTERN);
|
||||
const imgUrl = srcMatch?.[1] ? decodeUrlEntities(srcMatch[1]) : "";
|
||||
|
||||
blocks.push({
|
||||
_type: "image",
|
||||
_key: generateKey(),
|
||||
asset: {
|
||||
_type: "reference",
|
||||
_ref: imgUrl,
|
||||
url: imgUrl || undefined,
|
||||
},
|
||||
alt: altMatch?.[1],
|
||||
caption: captionMatch?.[1]?.replace(HTML_TAG_PATTERN, "").trim(),
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle remaining text
|
||||
const remaining = html.slice(lastIndex).trim();
|
||||
if (remaining) {
|
||||
const { children, markDefs } = parseInlineContent(remaining, generateKey);
|
||||
if (children.some((c) => c.text.trim())) {
|
||||
blocks.push({
|
||||
_type: "block",
|
||||
_key: generateKey(),
|
||||
style: "normal",
|
||||
children,
|
||||
markDefs: markDefs.length > 0 ? markDefs : undefined,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create transform context for recursive block transformation
|
||||
*/
|
||||
function createTransformContext(
|
||||
options: ConvertOptions,
|
||||
generateKey: () => string,
|
||||
): TransformContext {
|
||||
const context: TransformContext = {
|
||||
generateKey,
|
||||
parseInlineContent: (html: string) => parseInlineContent(html, generateKey),
|
||||
transformBlocks: (blocks: GutenbergBlock[]) =>
|
||||
blocks.flatMap((block) => transformBlock(block, options, context)),
|
||||
};
|
||||
return context;
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform a single block
|
||||
*/
|
||||
function transformBlock(
|
||||
block: GutenbergBlock,
|
||||
options: ConvertOptions,
|
||||
context: TransformContext,
|
||||
): PortableTextBlock[] {
|
||||
const transformer = getTransformer(block.blockName, options.customTransformers);
|
||||
return transformer(block, options, context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode HTML entities
|
||||
*/
|
||||
function decodeHtmlEntities(html: string): string {
|
||||
return html
|
||||
.replace(LESS_THAN_ENTITY_PATTERN, "<")
|
||||
.replace(GREATER_THAN_ENTITY_PATTERN, ">")
|
||||
.replace(AMP_ENTITY_PATTERN, "&")
|
||||
.replace(QUOTE_ENTITY_PATTERN, '"')
|
||||
.replace(APOS_ENTITY_PATTERN, "'")
|
||||
.replace(NUMERIC_AMP_ENTITY_PATTERN, "&") // & or &
|
||||
.replace(HEX_AMP_ENTITY_PATTERN, "&") // &
|
||||
.replace(NBSP_ENTITY_PATTERN, " ");
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode HTML entities in URLs (used for image src attributes)
|
||||
*/
|
||||
function decodeUrlEntities(url: string): string {
|
||||
return url
|
||||
.replace(AMP_ENTITY_PATTERN, "&")
|
||||
.replace(NUMERIC_AMP_ENTITY_PATTERN, "&")
|
||||
.replace(HEX_AMP_ENTITY_PATTERN, "&");
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse Gutenberg blocks without converting to Portable Text
|
||||
* Useful for inspection and debugging
|
||||
*/
|
||||
export function parseGutenbergBlocks(content: string): GutenbergBlock[] {
|
||||
if (!content || !content.trim()) {
|
||||
return [];
|
||||
}
|
||||
return normalizeBlocks(parse(content));
|
||||
}
|
||||
333
packages/gutenberg-to-portable-text/src/inline.ts
Normal file
333
packages/gutenberg-to-portable-text/src/inline.ts
Normal file
@@ -0,0 +1,333 @@
|
||||
/**
|
||||
* Inline HTML to Portable Text spans converter
|
||||
*
|
||||
* Parses inline HTML elements (strong, em, a, code, etc.) and converts
|
||||
* them to Portable Text spans with marks.
|
||||
*/
|
||||
|
||||
import { parseFragment, type DefaultTreeAdapterMap } from "parse5";
|
||||
|
||||
import type { PortableTextSpan, PortableTextMarkDef } from "./types.js";
|
||||
import { sanitizeHref } from "./url.js";
|
||||
|
||||
// Regex patterns for inline parsing
|
||||
const WHITESPACE_PATTERN = /\S/;
|
||||
|
||||
// Pre-compiled block tag patterns
|
||||
const BLOCK_TAG_PATTERNS: Record<string, { open: RegExp; close: RegExp }> = {
|
||||
p: { open: /^<p[^>]*>/i, close: /<\/p>$/i },
|
||||
h1: { open: /^<h1[^>]*>/i, close: /<\/h1>$/i },
|
||||
h2: { open: /^<h2[^>]*>/i, close: /<\/h2>$/i },
|
||||
h3: { open: /^<h3[^>]*>/i, close: /<\/h3>$/i },
|
||||
h4: { open: /^<h4[^>]*>/i, close: /<\/h4>$/i },
|
||||
h5: { open: /^<h5[^>]*>/i, close: /<\/h5>$/i },
|
||||
h6: { open: /^<h6[^>]*>/i, close: /<\/h6>$/i },
|
||||
li: { open: /^<li[^>]*>/i, close: /<\/li>$/i },
|
||||
blockquote: { open: /^<blockquote[^>]*>/i, close: /<\/blockquote>$/i },
|
||||
figcaption: { open: /^<figcaption[^>]*>/i, close: /<\/figcaption>$/i },
|
||||
};
|
||||
|
||||
// Regex patterns for extracting attributes
|
||||
const IMG_ALT_PATTERN = /<img[^>]+alt=["']([^"']*)["']/i;
|
||||
const FIGCAPTION_PATTERN = /<figcaption[^>]*>([\s\S]*?)<\/figcaption>/i;
|
||||
const IMG_SRC_PATTERN = /<img[^>]+src=["']([^"']*)["']/i;
|
||||
const URL_AMP_ENTITY_PATTERN = /&/g;
|
||||
const URL_NUMERIC_AMP_ENTITY_PATTERN = /�?38;/g;
|
||||
const URL_HEX_AMP_ENTITY_PATTERN = /&/gi;
|
||||
|
||||
type Node = DefaultTreeAdapterMap["node"];
|
||||
type TextNode = DefaultTreeAdapterMap["textNode"];
|
||||
type Element = DefaultTreeAdapterMap["element"];
|
||||
|
||||
interface ParseResult {
|
||||
children: PortableTextSpan[];
|
||||
markDefs: PortableTextMarkDef[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse inline HTML content into Portable Text spans
|
||||
*/
|
||||
export function parseInlineContent(html: string, generateKey: () => string): ParseResult {
|
||||
const children: PortableTextSpan[] = [];
|
||||
const markDefs: PortableTextMarkDef[] = [];
|
||||
const markDefMap = new Map<string, string>();
|
||||
|
||||
// Handle whitespace-only input BEFORE stripping (parse5 normalizes whitespace away)
|
||||
if (html.length > 0 && !WHITESPACE_PATTERN.test(html)) {
|
||||
return {
|
||||
children: [{ _type: "span", _key: generateKey(), text: html }],
|
||||
markDefs: [],
|
||||
};
|
||||
}
|
||||
|
||||
// Strip wrapping tags like <p>, <h1>, etc.
|
||||
const strippedHtml = stripBlockTags(html);
|
||||
|
||||
// Parse HTML fragment
|
||||
const fragment = parseFragment(strippedHtml);
|
||||
|
||||
// Walk the tree and build spans
|
||||
walkNodes(fragment.childNodes, [], children, markDefs, markDefMap, generateKey);
|
||||
|
||||
// Ensure at least one span exists
|
||||
if (children.length === 0) {
|
||||
children.push({
|
||||
_type: "span",
|
||||
_key: generateKey(),
|
||||
text: "",
|
||||
});
|
||||
}
|
||||
|
||||
return { children, markDefs };
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip common block-level wrapper tags
|
||||
*/
|
||||
function stripBlockTags(html: string): string {
|
||||
// Remove leading/trailing whitespace
|
||||
let stripped = html.trim();
|
||||
|
||||
// Strip common block wrappers
|
||||
const blockTags = ["p", "h1", "h2", "h3", "h4", "h5", "h6", "li", "blockquote", "figcaption"];
|
||||
|
||||
for (const tag of blockTags) {
|
||||
const patterns = BLOCK_TAG_PATTERNS[tag];
|
||||
if (patterns && patterns.open.test(stripped) && patterns.close.test(stripped)) {
|
||||
stripped = stripped.replace(patterns.open, "").replace(patterns.close, "").trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return stripped;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively walk DOM nodes and build spans
|
||||
*/
|
||||
function walkNodes(
|
||||
nodes: Node[],
|
||||
currentMarks: string[],
|
||||
children: PortableTextSpan[],
|
||||
markDefs: PortableTextMarkDef[],
|
||||
markDefMap: Map<string, string>,
|
||||
generateKey: () => string,
|
||||
): void {
|
||||
for (const node of nodes) {
|
||||
if (isTextNode(node)) {
|
||||
const text = node.value;
|
||||
if (text) {
|
||||
// Handle line breaks in text
|
||||
const parts = text.split("\n");
|
||||
for (let i = 0; i < parts.length; i++) {
|
||||
const part = parts[i];
|
||||
if (part || i > 0) {
|
||||
// Add text span
|
||||
if (part) {
|
||||
children.push({
|
||||
_type: "span",
|
||||
_key: generateKey(),
|
||||
text: part,
|
||||
marks: currentMarks.length > 0 ? [...currentMarks] : undefined,
|
||||
});
|
||||
}
|
||||
// Add newline (except after last part)
|
||||
if (i < parts.length - 1) {
|
||||
// Append newline to previous span or create new one
|
||||
if (children.length > 0) {
|
||||
const lastChild = children.at(-1);
|
||||
if (lastChild) {
|
||||
lastChild.text += "\n";
|
||||
}
|
||||
} else {
|
||||
children.push({
|
||||
_type: "span",
|
||||
_key: generateKey(),
|
||||
text: "\n",
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (isElement(node)) {
|
||||
const tagName = node.tagName.toLowerCase();
|
||||
|
||||
// Handle <br> as newline
|
||||
if (tagName === "br") {
|
||||
if (children.length > 0) {
|
||||
const lastChild = children.at(-1);
|
||||
if (lastChild) {
|
||||
lastChild.text += "\n";
|
||||
}
|
||||
} else {
|
||||
children.push({
|
||||
_type: "span",
|
||||
_key: generateKey(),
|
||||
text: "\n",
|
||||
});
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get mark for this element
|
||||
const markResult = getMarkForElement(node, markDefs, markDefMap, generateKey);
|
||||
const newMarks = markResult ? [...currentMarks, markResult] : currentMarks;
|
||||
|
||||
// Recurse into children
|
||||
walkNodes(node.childNodes, newMarks, children, markDefs, markDefMap, generateKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Portable Text mark for an HTML element
|
||||
*/
|
||||
function getMarkForElement(
|
||||
element: Element,
|
||||
markDefs: PortableTextMarkDef[],
|
||||
markDefMap: Map<string, string>,
|
||||
generateKey: () => string,
|
||||
): string | null {
|
||||
const tagName = element.tagName.toLowerCase();
|
||||
|
||||
switch (tagName) {
|
||||
case "strong":
|
||||
case "b":
|
||||
return "strong";
|
||||
|
||||
case "em":
|
||||
case "i":
|
||||
return "em";
|
||||
|
||||
case "u":
|
||||
return "underline";
|
||||
|
||||
case "s":
|
||||
case "strike":
|
||||
case "del":
|
||||
return "strike-through";
|
||||
|
||||
case "code":
|
||||
return "code";
|
||||
|
||||
case "sup":
|
||||
return "superscript";
|
||||
|
||||
case "sub":
|
||||
return "subscript";
|
||||
|
||||
case "a": {
|
||||
const href = sanitizeHref(getAttr(element, "href"));
|
||||
const target = getAttr(element, "target");
|
||||
|
||||
// Check if we already have a markDef for this href
|
||||
const existingKey = markDefMap.get(href);
|
||||
if (existingKey) {
|
||||
return existingKey;
|
||||
}
|
||||
|
||||
// Create new mark definition
|
||||
const key = generateKey();
|
||||
const markDef: PortableTextMarkDef = {
|
||||
_type: "link",
|
||||
_key: key,
|
||||
href,
|
||||
};
|
||||
if (target === "_blank") {
|
||||
markDef.blank = true;
|
||||
}
|
||||
markDefs.push(markDef);
|
||||
markDefMap.set(href, key);
|
||||
return key;
|
||||
}
|
||||
|
||||
default:
|
||||
// Unknown inline element - ignore the tag, process children
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get attribute value from element
|
||||
*/
|
||||
function getAttr(element: Element, name: string): string | undefined {
|
||||
const attr = element.attrs.find((a) => a.name.toLowerCase() === name);
|
||||
return attr?.value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard for text nodes
|
||||
*/
|
||||
function isTextNode(node: Node): node is TextNode {
|
||||
return node.nodeName === "#text";
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard for elements
|
||||
*/
|
||||
function isElement(node: Node): node is Element {
|
||||
return "tagName" in node;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract plain text from HTML (for alt text, captions)
|
||||
*/
|
||||
export function extractText(html: string): string {
|
||||
const fragment = parseFragment(html);
|
||||
return getTextContent(fragment.childNodes);
|
||||
}
|
||||
|
||||
function getTextContent(nodes: Node[]): string {
|
||||
let text = "";
|
||||
for (const node of nodes) {
|
||||
if (isTextNode(node)) {
|
||||
text += node.value;
|
||||
} else if (isElement(node)) {
|
||||
text += getTextContent(node.childNodes);
|
||||
}
|
||||
}
|
||||
return text.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract alt text from an img element in HTML
|
||||
*/
|
||||
export function extractAlt(html: string): string | undefined {
|
||||
const match = html.match(IMG_ALT_PATTERN);
|
||||
if (match) {
|
||||
return match[1]; // Can be empty string ""
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract caption from a figcaption element
|
||||
*/
|
||||
export function extractCaption(html: string): string | undefined {
|
||||
const match = html.match(FIGCAPTION_PATTERN);
|
||||
if (match?.[1]) {
|
||||
return extractText(match[1]);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract src from an img element
|
||||
*/
|
||||
export function extractSrc(html: string): string | undefined {
|
||||
const match = html.match(IMG_SRC_PATTERN);
|
||||
if (!match?.[1]) return undefined;
|
||||
// Decode HTML entities in URLs
|
||||
return decodeUrlEntities(match[1]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode HTML entities commonly found in URLs
|
||||
*/
|
||||
function decodeUrlEntities(url: string): string {
|
||||
return url
|
||||
.replace(URL_AMP_ENTITY_PATTERN, "&")
|
||||
.replace(URL_NUMERIC_AMP_ENTITY_PATTERN, "&")
|
||||
.replace(URL_HEX_AMP_ENTITY_PATTERN, "&");
|
||||
}
|
||||
1034
packages/gutenberg-to-portable-text/src/transformers/core.ts
Normal file
1034
packages/gutenberg-to-portable-text/src/transformers/core.ts
Normal file
File diff suppressed because it is too large
Load Diff
142
packages/gutenberg-to-portable-text/src/transformers/embed.ts
Normal file
142
packages/gutenberg-to-portable-text/src/transformers/embed.ts
Normal file
@@ -0,0 +1,142 @@
|
||||
/**
|
||||
* Transformers for WordPress embed blocks
|
||||
*/
|
||||
|
||||
import type { BlockTransformer } from "../types.js";
|
||||
import { attrString } from "../types.js";
|
||||
|
||||
// Regex patterns for embed parsing
|
||||
const IFRAME_SRC_PATTERN = /<iframe[^>]+src=["']([^"']+)["']/i;
|
||||
const VIDEO_SRC_PATTERN = /<video[^>]+src=["']([^"']+)["']/i;
|
||||
const VIDEO_SOURCE_PATTERN = /<source[^>]+src=["']([^"']+)["']/i;
|
||||
const AUDIO_SRC_PATTERN = /<audio[^>]+src=["']([^"']+)["']/i;
|
||||
const AUDIO_SOURCE_PATTERN = /<source[^>]+src=["']([^"']+)["']/i;
|
||||
|
||||
/**
|
||||
* core/embed and variants → embed block
|
||||
*/
|
||||
export const embed: BlockTransformer = (block, _options, context) => {
|
||||
const url = attrString(block.attrs, "url");
|
||||
const providerSlug = attrString(block.attrs, "providerNameSlug");
|
||||
|
||||
// Extract iframe src if present
|
||||
const iframeMatch = block.innerHTML.match(IFRAME_SRC_PATTERN);
|
||||
const iframeSrc = iframeMatch?.[1];
|
||||
|
||||
return [
|
||||
{
|
||||
_type: "embed",
|
||||
_key: context.generateKey(),
|
||||
url: url || iframeSrc || "",
|
||||
provider: providerSlug || detectProvider(url || iframeSrc || ""),
|
||||
html: block.innerHTML.trim() || undefined,
|
||||
},
|
||||
];
|
||||
};
|
||||
|
||||
/**
|
||||
* core-embed/youtube → embed block
|
||||
*/
|
||||
export const youtube: BlockTransformer = (block, options, context) => {
|
||||
return embed(block, options, context);
|
||||
};
|
||||
|
||||
/**
|
||||
* core-embed/twitter → embed block
|
||||
*/
|
||||
export const twitter: BlockTransformer = (block, options, context) => {
|
||||
return embed(block, options, context);
|
||||
};
|
||||
|
||||
/**
|
||||
* core-embed/vimeo → embed block
|
||||
*/
|
||||
export const vimeo: BlockTransformer = (block, options, context) => {
|
||||
return embed(block, options, context);
|
||||
};
|
||||
|
||||
/**
|
||||
* core/video → embed block (self-hosted video)
|
||||
*/
|
||||
export const video: BlockTransformer = (block, _options, context) => {
|
||||
const src = attrString(block.attrs, "src");
|
||||
|
||||
// Extract from video tag if not in attrs
|
||||
const videoMatch = block.innerHTML.match(VIDEO_SRC_PATTERN);
|
||||
const sourceMatch = block.innerHTML.match(VIDEO_SOURCE_PATTERN);
|
||||
const videoSrc = src || videoMatch?.[1] || sourceMatch?.[1];
|
||||
|
||||
return [
|
||||
{
|
||||
_type: "embed",
|
||||
_key: context.generateKey(),
|
||||
url: videoSrc || "",
|
||||
provider: "video",
|
||||
html: block.innerHTML.trim() || undefined,
|
||||
},
|
||||
];
|
||||
};
|
||||
|
||||
/**
|
||||
* core/audio → embed block (self-hosted audio)
|
||||
*/
|
||||
export const audio: BlockTransformer = (block, _options, context) => {
|
||||
const src = attrString(block.attrs, "src");
|
||||
|
||||
// Extract from audio tag if not in attrs
|
||||
const audioMatch = block.innerHTML.match(AUDIO_SRC_PATTERN);
|
||||
const sourceMatch = block.innerHTML.match(AUDIO_SOURCE_PATTERN);
|
||||
const audioSrc = src || audioMatch?.[1] || sourceMatch?.[1];
|
||||
|
||||
return [
|
||||
{
|
||||
_type: "embed",
|
||||
_key: context.generateKey(),
|
||||
url: audioSrc || "",
|
||||
provider: "audio",
|
||||
html: block.innerHTML.trim() || undefined,
|
||||
},
|
||||
];
|
||||
};
|
||||
|
||||
/**
|
||||
* Detect embed provider from URL
|
||||
*/
|
||||
function detectProvider(url: string): string | undefined {
|
||||
if (!url) return undefined;
|
||||
|
||||
const urlLower = url.toLowerCase();
|
||||
|
||||
if (urlLower.includes("youtube.com") || urlLower.includes("youtu.be")) {
|
||||
return "youtube";
|
||||
}
|
||||
if (urlLower.includes("vimeo.com")) {
|
||||
return "vimeo";
|
||||
}
|
||||
if (urlLower.includes("twitter.com") || urlLower.includes("x.com")) {
|
||||
return "twitter";
|
||||
}
|
||||
if (urlLower.includes("instagram.com")) {
|
||||
return "instagram";
|
||||
}
|
||||
if (urlLower.includes("facebook.com")) {
|
||||
return "facebook";
|
||||
}
|
||||
if (urlLower.includes("tiktok.com")) {
|
||||
return "tiktok";
|
||||
}
|
||||
if (urlLower.includes("spotify.com")) {
|
||||
return "spotify";
|
||||
}
|
||||
if (urlLower.includes("soundcloud.com")) {
|
||||
return "soundcloud";
|
||||
}
|
||||
if (urlLower.includes("codepen.io")) {
|
||||
return "codepen";
|
||||
}
|
||||
if (urlLower.includes("gist.github.com")) {
|
||||
return "gist";
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
115
packages/gutenberg-to-portable-text/src/transformers/index.ts
Normal file
115
packages/gutenberg-to-portable-text/src/transformers/index.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
/**
|
||||
* Block transformers registry
|
||||
*/
|
||||
|
||||
import type { BlockTransformer, PortableTextBlock } from "../types.js";
|
||||
import * as core from "./core.js";
|
||||
import * as embed from "./embed.js";
|
||||
|
||||
/**
|
||||
* Default block transformers for core WordPress blocks
|
||||
*/
|
||||
export const defaultTransformers: Record<string, BlockTransformer> = {
|
||||
// Text blocks
|
||||
"core/paragraph": core.paragraph,
|
||||
"core/heading": core.heading,
|
||||
"core/list": core.list,
|
||||
"core/quote": core.quote,
|
||||
"core/code": core.code,
|
||||
"core/preformatted": core.preformatted,
|
||||
"core/pullquote": core.pullquote,
|
||||
"core/verse": core.verse,
|
||||
|
||||
// Media blocks
|
||||
"core/image": core.image,
|
||||
"core/gallery": core.gallery,
|
||||
"core/file": core.file,
|
||||
"core/media-text": core.mediaText,
|
||||
"core/cover": core.cover,
|
||||
|
||||
// Layout blocks
|
||||
"core/columns": core.columns,
|
||||
"core/group": core.group,
|
||||
"core/separator": core.separator,
|
||||
"core/spacer": core.separator,
|
||||
"core/table": core.table,
|
||||
"core/buttons": core.buttons,
|
||||
"core/button": core.button,
|
||||
|
||||
// Structural blocks
|
||||
"core/more": core.more,
|
||||
"core/nextpage": core.nextpage,
|
||||
|
||||
// Pass-through blocks (preserve as HTML)
|
||||
"core/html": core.html,
|
||||
"core/shortcode": core.shortcode,
|
||||
|
||||
// Embed blocks
|
||||
"core/embed": embed.embed,
|
||||
"core/video": embed.video,
|
||||
"core/audio": embed.audio,
|
||||
|
||||
// Legacy embed block names (WP < 5.6)
|
||||
"core-embed/youtube": embed.youtube,
|
||||
"core-embed/twitter": embed.twitter,
|
||||
"core-embed/vimeo": embed.vimeo,
|
||||
"core-embed/facebook": embed.embed,
|
||||
"core-embed/instagram": embed.embed,
|
||||
"core-embed/soundcloud": embed.embed,
|
||||
"core-embed/spotify": embed.embed,
|
||||
};
|
||||
|
||||
/**
|
||||
* Fallback transformer for unknown blocks
|
||||
* Stores the original HTML for manual review
|
||||
*/
|
||||
export const fallbackTransformer: BlockTransformer = (
|
||||
block,
|
||||
_options,
|
||||
context,
|
||||
): PortableTextBlock[] => {
|
||||
// Skip completely empty blocks
|
||||
if (!block.innerHTML.trim() && block.innerBlocks.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// If it has inner blocks, try to transform those
|
||||
if (block.innerBlocks.length > 0) {
|
||||
return context.transformBlocks(block.innerBlocks);
|
||||
}
|
||||
|
||||
// Store as HTML fallback
|
||||
return [
|
||||
{
|
||||
_type: "htmlBlock",
|
||||
_key: context.generateKey(),
|
||||
html: block.innerHTML,
|
||||
originalBlockName: block.blockName,
|
||||
originalAttrs: Object.keys(block.attrs).length > 0 ? block.attrs : undefined,
|
||||
},
|
||||
];
|
||||
};
|
||||
|
||||
/**
|
||||
* Get transformer for a block
|
||||
*/
|
||||
export function getTransformer(
|
||||
blockName: string | null,
|
||||
customTransformers?: Record<string, BlockTransformer>,
|
||||
): BlockTransformer {
|
||||
if (!blockName) {
|
||||
return fallbackTransformer;
|
||||
}
|
||||
|
||||
// Check custom transformers first
|
||||
if (customTransformers?.[blockName]) {
|
||||
return customTransformers[blockName];
|
||||
}
|
||||
|
||||
// Check default transformers
|
||||
if (defaultTransformers[blockName]) {
|
||||
return defaultTransformers[blockName];
|
||||
}
|
||||
|
||||
return fallbackTransformer;
|
||||
}
|
||||
307
packages/gutenberg-to-portable-text/src/types.ts
Normal file
307
packages/gutenberg-to-portable-text/src/types.ts
Normal file
@@ -0,0 +1,307 @@
|
||||
/**
|
||||
* Types for Gutenberg to Portable Text conversion
|
||||
*/
|
||||
|
||||
/**
|
||||
* Gutenberg block as parsed by @wordpress/block-serialization-default-parser
|
||||
*/
|
||||
export interface GutenbergBlock {
|
||||
/** Block name like "core/paragraph" or null for freeform HTML */
|
||||
blockName: string | null;
|
||||
/** Block attributes from the JSON comment */
|
||||
attrs: Record<string, unknown>;
|
||||
/** Inner HTML content */
|
||||
innerHTML: string;
|
||||
/** Nested blocks (for columns, groups, etc.) */
|
||||
innerBlocks: GutenbergBlock[];
|
||||
/** Content parts between inner blocks */
|
||||
innerContent: Array<string | null>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text span (inline text with marks)
|
||||
*/
|
||||
export interface PortableTextSpan {
|
||||
_type: "span";
|
||||
_key: string;
|
||||
text: string;
|
||||
marks?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text mark definition (for links, annotations)
|
||||
*/
|
||||
export interface PortableTextMarkDef {
|
||||
_type: string;
|
||||
_key: string;
|
||||
[key: string]: unknown;
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text text block
|
||||
*/
|
||||
export interface PortableTextTextBlock {
|
||||
_type: "block";
|
||||
_key: string;
|
||||
style?: "normal" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "blockquote";
|
||||
listItem?: "bullet" | "number";
|
||||
level?: number;
|
||||
children: PortableTextSpan[];
|
||||
markDefs?: PortableTextMarkDef[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text image block
|
||||
*/
|
||||
export interface PortableTextImageBlock {
|
||||
_type: "image";
|
||||
_key: string;
|
||||
asset: {
|
||||
_type: "reference";
|
||||
_ref: string;
|
||||
url?: string;
|
||||
};
|
||||
alt?: string;
|
||||
caption?: string;
|
||||
alignment?: "left" | "center" | "right" | "wide" | "full";
|
||||
link?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text code block
|
||||
*/
|
||||
export interface PortableTextCodeBlock {
|
||||
_type: "code";
|
||||
_key: string;
|
||||
code: string;
|
||||
language?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text embed block (YouTube, Twitter, etc.)
|
||||
*/
|
||||
export interface PortableTextEmbedBlock {
|
||||
_type: "embed";
|
||||
_key: string;
|
||||
url: string;
|
||||
provider?: string;
|
||||
html?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text gallery block
|
||||
*/
|
||||
export interface PortableTextGalleryBlock {
|
||||
_type: "gallery";
|
||||
_key: string;
|
||||
images: Array<{
|
||||
_type: "image";
|
||||
_key: string;
|
||||
asset: { _type: "reference"; _ref: string; url?: string };
|
||||
alt?: string;
|
||||
caption?: string;
|
||||
}>;
|
||||
columns?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text columns block
|
||||
*/
|
||||
export interface PortableTextColumnsBlock {
|
||||
_type: "columns";
|
||||
_key: string;
|
||||
columns: Array<{
|
||||
_type: "column";
|
||||
_key: string;
|
||||
content: PortableTextBlock[];
|
||||
}>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text break/divider block
|
||||
*/
|
||||
export interface PortableTextBreakBlock {
|
||||
_type: "break";
|
||||
_key: string;
|
||||
style: "lineBreak";
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text table block
|
||||
*/
|
||||
export interface PortableTextTableBlock {
|
||||
_type: "table";
|
||||
_key: string;
|
||||
rows: Array<{
|
||||
_type: "tableRow";
|
||||
_key: string;
|
||||
cells: Array<{
|
||||
_type: "tableCell";
|
||||
_key: string;
|
||||
content: PortableTextSpan[];
|
||||
markDefs?: PortableTextMarkDef[];
|
||||
isHeader?: boolean;
|
||||
}>;
|
||||
}>;
|
||||
hasHeaderRow?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fallback HTML block for unconvertible content
|
||||
*/
|
||||
export interface PortableTextHtmlBlock {
|
||||
_type: "htmlBlock";
|
||||
_key: string;
|
||||
html: string;
|
||||
originalBlockName?: string | null;
|
||||
originalAttrs?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text button block
|
||||
*/
|
||||
export interface PortableTextButtonBlock {
|
||||
_type: "button";
|
||||
_key: string;
|
||||
text: string;
|
||||
url?: string;
|
||||
style?: "default" | "outline" | "fill";
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text buttons container block
|
||||
*/
|
||||
export interface PortableTextButtonsBlock {
|
||||
_type: "buttons";
|
||||
_key: string;
|
||||
buttons: PortableTextButtonBlock[];
|
||||
layout?: "horizontal" | "vertical";
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text cover block (image/video with text overlay)
|
||||
*/
|
||||
export interface PortableTextCoverBlock {
|
||||
_type: "cover";
|
||||
_key: string;
|
||||
backgroundImage?: string;
|
||||
backgroundVideo?: string;
|
||||
overlayColor?: string;
|
||||
overlayOpacity?: number;
|
||||
content: PortableTextBlock[];
|
||||
minHeight?: string;
|
||||
alignment?: "left" | "center" | "right";
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text file download block
|
||||
*/
|
||||
export interface PortableTextFileBlock {
|
||||
_type: "file";
|
||||
_key: string;
|
||||
url: string;
|
||||
filename?: string;
|
||||
showDownloadButton?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Portable Text pullquote block
|
||||
*/
|
||||
export interface PortableTextPullquoteBlock {
|
||||
_type: "pullquote";
|
||||
_key: string;
|
||||
text: string;
|
||||
citation?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Union of all Portable Text block types
|
||||
*/
|
||||
export type PortableTextBlock =
|
||||
| PortableTextTextBlock
|
||||
| PortableTextImageBlock
|
||||
| PortableTextCodeBlock
|
||||
| PortableTextEmbedBlock
|
||||
| PortableTextGalleryBlock
|
||||
| PortableTextColumnsBlock
|
||||
| PortableTextBreakBlock
|
||||
| PortableTextTableBlock
|
||||
| PortableTextHtmlBlock
|
||||
| PortableTextButtonBlock
|
||||
| PortableTextButtonsBlock
|
||||
| PortableTextCoverBlock
|
||||
| PortableTextFileBlock
|
||||
| PortableTextPullquoteBlock;
|
||||
|
||||
/**
|
||||
* Options for the conversion
|
||||
*/
|
||||
export interface ConvertOptions {
|
||||
/** Map of WordPress media IDs to EmDash media IDs/URLs */
|
||||
mediaMap?: Map<number, string>;
|
||||
/** Custom block transformers */
|
||||
customTransformers?: Record<string, BlockTransformer>;
|
||||
/** Whether to generate keys (default: true) */
|
||||
generateKeys?: boolean;
|
||||
/** Custom key generator */
|
||||
keyGenerator?: () => string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Block transformer function
|
||||
*/
|
||||
export type BlockTransformer = (
|
||||
block: GutenbergBlock,
|
||||
options: ConvertOptions,
|
||||
context: TransformContext,
|
||||
) => PortableTextBlock[];
|
||||
|
||||
/**
|
||||
* Context passed to transformers
|
||||
*/
|
||||
export interface TransformContext {
|
||||
/** Transform child blocks recursively */
|
||||
transformBlocks: (blocks: GutenbergBlock[]) => PortableTextBlock[];
|
||||
/** Parse inline HTML to spans */
|
||||
parseInlineContent: (html: string) => {
|
||||
children: PortableTextSpan[];
|
||||
markDefs: PortableTextMarkDef[];
|
||||
};
|
||||
/** Generate a unique key */
|
||||
generateKey: () => string;
|
||||
}
|
||||
|
||||
// ── Attribute accessor helpers ──────────────────────────────────────
|
||||
// Gutenberg attrs are Record<string, unknown>. These narrow safely
|
||||
// without `as` casts.
|
||||
|
||||
/** Extract a string attribute, returning undefined if missing or wrong type */
|
||||
export function attrString(attrs: Record<string, unknown>, key: string): string | undefined {
|
||||
const v = attrs[key];
|
||||
return typeof v === "string" ? v : undefined;
|
||||
}
|
||||
|
||||
/** Extract a number attribute, returning undefined if missing or wrong type */
|
||||
export function attrNumber(attrs: Record<string, unknown>, key: string): number | undefined {
|
||||
const v = attrs[key];
|
||||
return typeof v === "number" ? v : undefined;
|
||||
}
|
||||
|
||||
/** Extract a boolean attribute, returning undefined if missing or wrong type */
|
||||
export function attrBoolean(attrs: Record<string, unknown>, key: string): boolean | undefined {
|
||||
const v = attrs[key];
|
||||
return typeof v === "boolean" ? v : undefined;
|
||||
}
|
||||
|
||||
function isRecord(v: unknown): v is Record<string, unknown> {
|
||||
return typeof v === "object" && v !== null && !Array.isArray(v);
|
||||
}
|
||||
|
||||
/** Extract an object attribute, returning undefined if missing or wrong type */
|
||||
export function attrObject(
|
||||
attrs: Record<string, unknown>,
|
||||
key: string,
|
||||
): Record<string, unknown> | undefined {
|
||||
const v = attrs[key];
|
||||
return isRecord(v) ? v : undefined;
|
||||
}
|
||||
21
packages/gutenberg-to-portable-text/src/url.ts
Normal file
21
packages/gutenberg-to-portable-text/src/url.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* URL scheme validation for the converter pipeline (defense-in-depth).
|
||||
*
|
||||
* This mirrors the canonical sanitizeHref in packages/core/src/utils/url.ts.
|
||||
* The converter is a standalone zero-dependency package, so it carries its own
|
||||
* copy. The render layer in core is the primary defense; this is secondary.
|
||||
*/
|
||||
|
||||
const SAFE_URL_SCHEME_RE = /^(https?:|mailto:|tel:|\/(?!\/)|#)/i;
|
||||
|
||||
/**
|
||||
* Returns the URL unchanged if it uses a safe scheme, otherwise returns "".
|
||||
*
|
||||
* Returns empty string (not "#") because this is the converter layer — we
|
||||
* strip bad URLs rather than substituting anchors. The render layer handles
|
||||
* the fallback to "#".
|
||||
*/
|
||||
export function sanitizeHref(url: string | undefined | null): string {
|
||||
if (!url) return "";
|
||||
return SAFE_URL_SCHEME_RE.test(url) ? url : "";
|
||||
}
|
||||
827
packages/gutenberg-to-portable-text/tests/converter.test.ts
Normal file
827
packages/gutenberg-to-portable-text/tests/converter.test.ts
Normal file
@@ -0,0 +1,827 @@
|
||||
/**
|
||||
* Tests for the main Gutenberg to Portable Text converter
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "vitest";
|
||||
|
||||
import { gutenbergToPortableText, htmlToPortableText, parseGutenbergBlocks } from "../src/index.js";
|
||||
import type { PortableTextTextBlock, PortableTextImageBlock } from "../src/types.js";
|
||||
|
||||
const HTML_TAG_PATTERN = /<[^>]+>/g;
|
||||
|
||||
describe("gutenbergToPortableText", () => {
|
||||
describe("empty content", () => {
|
||||
it("returns empty array for empty string", () => {
|
||||
expect(gutenbergToPortableText("")).toEqual([]);
|
||||
});
|
||||
|
||||
it("returns empty array for whitespace", () => {
|
||||
expect(gutenbergToPortableText(" \n\t ")).toEqual([]);
|
||||
});
|
||||
|
||||
it("returns empty array for null-ish values", () => {
|
||||
expect(gutenbergToPortableText(null as unknown as string)).toEqual([]);
|
||||
expect(gutenbergToPortableText(undefined as unknown as string)).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe("paragraph blocks", () => {
|
||||
it("converts simple paragraph", () => {
|
||||
const content = `<!-- wp:paragraph -->
|
||||
<p>Hello world</p>
|
||||
<!-- /wp:paragraph -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "block",
|
||||
style: "normal",
|
||||
});
|
||||
const block = result[0] as PortableTextTextBlock;
|
||||
expect(block.children[0]?.text).toBe("Hello world");
|
||||
});
|
||||
|
||||
it("converts paragraph with inline formatting", () => {
|
||||
const content = `<!-- wp:paragraph -->
|
||||
<p>Hello <strong>bold</strong> and <em>italic</em> world</p>
|
||||
<!-- /wp:paragraph -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
const block = result[0] as PortableTextTextBlock;
|
||||
|
||||
expect(block.children.length).toBeGreaterThan(1);
|
||||
const boldSpan = block.children.find((c) => c.marks?.includes("strong"));
|
||||
const italicSpan = block.children.find((c) => c.marks?.includes("em"));
|
||||
expect(boldSpan?.text).toBe("bold");
|
||||
expect(italicSpan?.text).toBe("italic");
|
||||
});
|
||||
|
||||
it("converts paragraph with link", () => {
|
||||
const content = `<!-- wp:paragraph -->
|
||||
<p>Visit <a href="https://example.com">our site</a></p>
|
||||
<!-- /wp:paragraph -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
const block = result[0] as PortableTextTextBlock;
|
||||
|
||||
expect(block.markDefs).toHaveLength(1);
|
||||
expect(block.markDefs?.[0]).toMatchObject({
|
||||
_type: "link",
|
||||
href: "https://example.com",
|
||||
});
|
||||
});
|
||||
|
||||
it("skips empty paragraphs", () => {
|
||||
const content = `<!-- wp:paragraph -->
|
||||
<p></p>
|
||||
<!-- /wp:paragraph -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
expect(result).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("handles multiple paragraphs", () => {
|
||||
const content = `<!-- wp:paragraph -->
|
||||
<p>First paragraph</p>
|
||||
<!-- /wp:paragraph -->
|
||||
|
||||
<!-- wp:paragraph -->
|
||||
<p>Second paragraph</p>
|
||||
<!-- /wp:paragraph -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
expect(result).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("heading blocks", () => {
|
||||
it("converts h1", () => {
|
||||
const content = `<!-- wp:heading {"level":1} -->
|
||||
<h1>Main Title</h1>
|
||||
<!-- /wp:heading -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "block",
|
||||
style: "h1",
|
||||
});
|
||||
});
|
||||
|
||||
it("converts h2 (default level)", () => {
|
||||
const content = `<!-- wp:heading -->
|
||||
<h2>Subtitle</h2>
|
||||
<!-- /wp:heading -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "block",
|
||||
style: "h2",
|
||||
});
|
||||
});
|
||||
|
||||
it("converts h3-h6", () => {
|
||||
for (let level = 3; level <= 6; level++) {
|
||||
const content = `<!-- wp:heading {"level":${level}} -->
|
||||
<h${level}>Heading ${level}</h${level}>
|
||||
<!-- /wp:heading -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "block",
|
||||
style: `h${level}`,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it("preserves formatting in headings", () => {
|
||||
const content = `<!-- wp:heading {"level":2} -->
|
||||
<h2>Title with <strong>bold</strong></h2>
|
||||
<!-- /wp:heading -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
const block = result[0] as PortableTextTextBlock;
|
||||
const boldSpan = block.children.find((c) => c.marks?.includes("strong"));
|
||||
expect(boldSpan?.text).toBe("bold");
|
||||
});
|
||||
});
|
||||
|
||||
describe("list blocks", () => {
|
||||
it("converts unordered list", () => {
|
||||
const content = `<!-- wp:list -->
|
||||
<ul>
|
||||
<li>Item one</li>
|
||||
<li>Item two</li>
|
||||
<li>Item three</li>
|
||||
</ul>
|
||||
<!-- /wp:list -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result).toHaveLength(3);
|
||||
result.forEach((block) => {
|
||||
expect(block).toMatchObject({
|
||||
_type: "block",
|
||||
listItem: "bullet",
|
||||
level: 1,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("converts ordered list", () => {
|
||||
const content = `<!-- wp:list {"ordered":true} -->
|
||||
<ol>
|
||||
<li>First</li>
|
||||
<li>Second</li>
|
||||
</ol>
|
||||
<!-- /wp:list -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
result.forEach((block) => {
|
||||
expect(block).toMatchObject({
|
||||
_type: "block",
|
||||
listItem: "number",
|
||||
level: 1,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves formatting in list items", () => {
|
||||
const content = `<!-- wp:list -->
|
||||
<ul>
|
||||
<li>Item with <strong>bold</strong></li>
|
||||
</ul>
|
||||
<!-- /wp:list -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
const block = result[0] as PortableTextTextBlock;
|
||||
const boldSpan = block.children.find((c) => c.marks?.includes("strong"));
|
||||
expect(boldSpan?.text).toBe("bold");
|
||||
});
|
||||
|
||||
it("handles nested lists", () => {
|
||||
const content = `<!-- wp:list -->
|
||||
<ul>
|
||||
<li>Parent item
|
||||
<ul>
|
||||
<li>Nested item</li>
|
||||
</ul>
|
||||
</li>
|
||||
</ul>
|
||||
<!-- /wp:list -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
const level1 = result.filter((b) => (b as PortableTextTextBlock).level === 1);
|
||||
const level2 = result.filter((b) => (b as PortableTextTextBlock).level === 2);
|
||||
|
||||
expect(level1.length).toBeGreaterThanOrEqual(1);
|
||||
expect(level2.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("quote blocks", () => {
|
||||
it("converts simple quote", () => {
|
||||
const content = `<!-- wp:quote -->
|
||||
<blockquote class="wp-block-quote"><p>To be or not to be</p></blockquote>
|
||||
<!-- /wp:quote -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "block",
|
||||
style: "blockquote",
|
||||
});
|
||||
});
|
||||
|
||||
it("handles quote with citation", () => {
|
||||
const content = `<!-- wp:quote {"citation":"Shakespeare"} -->
|
||||
<blockquote class="wp-block-quote"><p>To be or not to be</p></blockquote>
|
||||
<!-- /wp:quote -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
// Should have quote block + citation block
|
||||
expect(result.length).toBeGreaterThanOrEqual(1);
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "block",
|
||||
style: "blockquote",
|
||||
});
|
||||
});
|
||||
|
||||
it("handles multi-paragraph quote", () => {
|
||||
const content = `<!-- wp:quote -->
|
||||
<blockquote class="wp-block-quote">
|
||||
<p>First paragraph of quote</p>
|
||||
<p>Second paragraph of quote</p>
|
||||
</blockquote>
|
||||
<!-- /wp:quote -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
const quoteBlocks = result.filter((b) => (b as PortableTextTextBlock).style === "blockquote");
|
||||
expect(quoteBlocks).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("image blocks", () => {
|
||||
it("converts image with URL in attrs", () => {
|
||||
const content = `<!-- wp:image {"id":123,"sizeSlug":"large","url":"https://example.com/photo.jpg"} -->
|
||||
<figure class="wp-block-image size-large"><img src="https://example.com/photo.jpg" alt="A photo" class="wp-image-123"/></figure>
|
||||
<!-- /wp:image -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "image",
|
||||
alt: "A photo",
|
||||
});
|
||||
const img = result[0] as PortableTextImageBlock;
|
||||
expect(img.asset.url).toBe("https://example.com/photo.jpg");
|
||||
});
|
||||
|
||||
it("extracts image from HTML when not in attrs", () => {
|
||||
const content = `<!-- wp:image {"id":123} -->
|
||||
<figure class="wp-block-image"><img src="https://example.com/photo.jpg" alt="Photo"/></figure>
|
||||
<!-- /wp:image -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
const img = result[0] as PortableTextImageBlock;
|
||||
|
||||
expect(img.asset.url).toBe("https://example.com/photo.jpg");
|
||||
expect(img.alt).toBe("Photo");
|
||||
});
|
||||
|
||||
it("extracts caption from figcaption", () => {
|
||||
const content = `<!-- wp:image {"id":123} -->
|
||||
<figure class="wp-block-image"><img src="photo.jpg"/><figcaption>My caption</figcaption></figure>
|
||||
<!-- /wp:image -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
const img = result[0] as PortableTextImageBlock;
|
||||
|
||||
expect(img.caption).toBe("My caption");
|
||||
});
|
||||
|
||||
it("uses media map when provided", () => {
|
||||
const content = `<!-- wp:image {"id":123} -->
|
||||
<figure><img src="photo.jpg"/></figure>
|
||||
<!-- /wp:image -->`;
|
||||
|
||||
const mediaMap = new Map([[123, "emdash-media-abc"]]);
|
||||
const result = gutenbergToPortableText(content, { mediaMap });
|
||||
const img = result[0] as PortableTextImageBlock;
|
||||
|
||||
expect(img.asset._ref).toBe("emdash-media-abc");
|
||||
});
|
||||
|
||||
it("handles alignment", () => {
|
||||
const content = `<!-- wp:image {"id":123,"align":"center"} -->
|
||||
<figure class="wp-block-image aligncenter"><img src="photo.jpg"/></figure>
|
||||
<!-- /wp:image -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
const img = result[0] as PortableTextImageBlock;
|
||||
|
||||
expect(img.alignment).toBe("center");
|
||||
});
|
||||
});
|
||||
|
||||
describe("code blocks", () => {
|
||||
it("converts code block", () => {
|
||||
const content = `<!-- wp:code -->
|
||||
<pre class="wp-block-code"><code>const x = 1;</code></pre>
|
||||
<!-- /wp:code -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "code",
|
||||
code: "const x = 1;",
|
||||
});
|
||||
});
|
||||
|
||||
it("preserves language attribute", () => {
|
||||
const content = `<!-- wp:code {"language":"javascript"} -->
|
||||
<pre class="wp-block-code"><code>const x = 1;</code></pre>
|
||||
<!-- /wp:code -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "code",
|
||||
language: "javascript",
|
||||
});
|
||||
});
|
||||
|
||||
it("decodes HTML entities in code", () => {
|
||||
const content = `<!-- wp:code -->
|
||||
<pre class="wp-block-code"><code><div>Hello</div></code></pre>
|
||||
<!-- /wp:code -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "code",
|
||||
code: "<div>Hello</div>",
|
||||
});
|
||||
});
|
||||
|
||||
it("handles multiline code", () => {
|
||||
const content = `<!-- wp:code -->
|
||||
<pre class="wp-block-code"><code>function hello() {
|
||||
return "world";
|
||||
}</code></pre>
|
||||
<!-- /wp:code -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect((result[0] as { code: string }).code).toContain("\n");
|
||||
});
|
||||
});
|
||||
|
||||
describe("embed blocks", () => {
|
||||
it("converts YouTube embed", () => {
|
||||
const content = `<!-- wp:embed {"url":"https://www.youtube.com/watch?v=abc123","type":"video","providerNameSlug":"youtube"} -->
|
||||
<figure class="wp-block-embed is-type-video is-provider-youtube">
|
||||
<div class="wp-block-embed__wrapper">
|
||||
https://www.youtube.com/watch?v=abc123
|
||||
</div>
|
||||
</figure>
|
||||
<!-- /wp:embed -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "embed",
|
||||
url: "https://www.youtube.com/watch?v=abc123",
|
||||
provider: "youtube",
|
||||
});
|
||||
});
|
||||
|
||||
it("converts Twitter embed", () => {
|
||||
const content = `<!-- wp:embed {"url":"https://twitter.com/user/status/123","type":"rich","providerNameSlug":"twitter"} -->
|
||||
<figure class="wp-block-embed is-provider-twitter">
|
||||
<div class="wp-block-embed__wrapper">
|
||||
https://twitter.com/user/status/123
|
||||
</div>
|
||||
</figure>
|
||||
<!-- /wp:embed -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "embed",
|
||||
provider: "twitter",
|
||||
});
|
||||
});
|
||||
|
||||
it("detects provider from URL when not specified", () => {
|
||||
const content = `<!-- wp:embed {"url":"https://vimeo.com/123456"} -->
|
||||
<figure class="wp-block-embed">
|
||||
<div class="wp-block-embed__wrapper">
|
||||
https://vimeo.com/123456
|
||||
</div>
|
||||
</figure>
|
||||
<!-- /wp:embed -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "embed",
|
||||
provider: "vimeo",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("separator/spacer blocks", () => {
|
||||
it("converts separator to break", () => {
|
||||
const content = `<!-- wp:separator -->
|
||||
<hr class="wp-block-separator"/>
|
||||
<!-- /wp:separator -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "break",
|
||||
style: "lineBreak",
|
||||
});
|
||||
});
|
||||
|
||||
it("converts spacer to break", () => {
|
||||
const content = `<!-- wp:spacer {"height":"50px"} -->
|
||||
<div style="height:50px" aria-hidden="true" class="wp-block-spacer"></div>
|
||||
<!-- /wp:spacer -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "break",
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("columns blocks", () => {
|
||||
it("converts columns with content", () => {
|
||||
const content = `<!-- wp:columns -->
|
||||
<div class="wp-block-columns">
|
||||
<!-- wp:column -->
|
||||
<div class="wp-block-column">
|
||||
<!-- wp:paragraph -->
|
||||
<p>Column 1</p>
|
||||
<!-- /wp:paragraph -->
|
||||
</div>
|
||||
<!-- /wp:column -->
|
||||
|
||||
<!-- wp:column -->
|
||||
<div class="wp-block-column">
|
||||
<!-- wp:paragraph -->
|
||||
<p>Column 2</p>
|
||||
<!-- /wp:paragraph -->
|
||||
</div>
|
||||
<!-- /wp:column -->
|
||||
</div>
|
||||
<!-- /wp:columns -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "columns",
|
||||
});
|
||||
const cols = result[0] as { columns: Array<{ content: unknown[] }> };
|
||||
expect(cols.columns).toHaveLength(2);
|
||||
expect(cols.columns[0]?.content.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("group blocks", () => {
|
||||
it("flattens group block content", () => {
|
||||
const content = `<!-- wp:group -->
|
||||
<div class="wp-block-group">
|
||||
<!-- wp:paragraph -->
|
||||
<p>Paragraph in group</p>
|
||||
<!-- /wp:paragraph -->
|
||||
<!-- wp:heading -->
|
||||
<h2>Heading in group</h2>
|
||||
<!-- /wp:heading -->
|
||||
</div>
|
||||
<!-- /wp:group -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
// Group should be flattened - we get the inner blocks directly
|
||||
expect(result.some((b) => (b as PortableTextTextBlock).style === "normal")).toBe(true);
|
||||
expect(result.some((b) => (b as PortableTextTextBlock).style === "h2")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("unknown blocks", () => {
|
||||
it("creates htmlBlock fallback for unknown blocks", () => {
|
||||
const content = `<!-- wp:my-plugin/custom-block {"foo":"bar"} -->
|
||||
<div class="custom-block">Custom content</div>
|
||||
<!-- /wp:my-plugin/custom-block -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "htmlBlock",
|
||||
originalBlockName: "my-plugin/custom-block",
|
||||
});
|
||||
expect((result[0] as { html: string }).html).toContain("Custom content");
|
||||
});
|
||||
|
||||
it("preserves original attrs in fallback", () => {
|
||||
const content = `<!-- wp:unknown/block {"setting":true,"count":5} -->
|
||||
<div>Content</div>
|
||||
<!-- /wp:unknown/block -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
expect((result[0] as { originalAttrs: Record<string, unknown> }).originalAttrs).toMatchObject(
|
||||
{
|
||||
setting: true,
|
||||
count: 5,
|
||||
},
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("custom transformers", () => {
|
||||
it("uses custom transformer when provided", () => {
|
||||
const content = `<!-- wp:my-plugin/testimonial {"rating":5} -->
|
||||
<div class="testimonial">Great product!</div>
|
||||
<!-- /wp:my-plugin/testimonial -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content, {
|
||||
customTransformers: {
|
||||
"my-plugin/testimonial": (block, _opts, ctx) => [
|
||||
{
|
||||
_type: "testimonial" as const,
|
||||
_key: ctx.generateKey(),
|
||||
text: block.innerHTML.replace(HTML_TAG_PATTERN, "").trim(),
|
||||
rating: block.attrs.rating as number,
|
||||
} as unknown as import("../src/types.js").PortableTextBlock,
|
||||
],
|
||||
},
|
||||
});
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "testimonial",
|
||||
text: "Great product!",
|
||||
rating: 5,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("mixed content", () => {
|
||||
it("handles complex document with multiple block types", () => {
|
||||
const content = `<!-- wp:heading {"level":1} -->
|
||||
<h1>Welcome</h1>
|
||||
<!-- /wp:heading -->
|
||||
|
||||
<!-- wp:paragraph -->
|
||||
<p>This is the <strong>introduction</strong>.</p>
|
||||
<!-- /wp:paragraph -->
|
||||
|
||||
<!-- wp:image {"id":1} -->
|
||||
<figure><img src="hero.jpg" alt="Hero"/></figure>
|
||||
<!-- /wp:image -->
|
||||
|
||||
<!-- wp:list -->
|
||||
<ul>
|
||||
<li>Feature one</li>
|
||||
<li>Feature two</li>
|
||||
</ul>
|
||||
<!-- /wp:list -->
|
||||
|
||||
<!-- wp:quote -->
|
||||
<blockquote><p>A quote</p></blockquote>
|
||||
<!-- /wp:quote -->`;
|
||||
|
||||
const result = gutenbergToPortableText(content);
|
||||
|
||||
// h1 + p + image + 2 list items + quote = 6 blocks
|
||||
expect(result.length).toBeGreaterThanOrEqual(5);
|
||||
|
||||
const types = result.map((b) => b._type);
|
||||
expect(types).toContain("block");
|
||||
expect(types).toContain("image");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("htmlToPortableText", () => {
|
||||
it("converts simple HTML paragraphs", () => {
|
||||
const html = "<p>Hello world</p>";
|
||||
const result = htmlToPortableText(html);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "block",
|
||||
style: "normal",
|
||||
});
|
||||
});
|
||||
|
||||
it("converts headings", () => {
|
||||
const html = "<h1>Title</h1><h2>Subtitle</h2>";
|
||||
const result = htmlToPortableText(html);
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
expect(result[0]).toMatchObject({ style: "h1" });
|
||||
expect(result[1]).toMatchObject({ style: "h2" });
|
||||
});
|
||||
|
||||
it("converts lists", () => {
|
||||
const html = "<ul><li>One</li><li>Two</li></ul>";
|
||||
const result = htmlToPortableText(html);
|
||||
|
||||
expect(result).toHaveLength(2);
|
||||
result.forEach((b) => {
|
||||
expect(b).toMatchObject({ listItem: "bullet" });
|
||||
});
|
||||
});
|
||||
|
||||
it("converts blockquotes", () => {
|
||||
const html = "<blockquote>A quote</blockquote>";
|
||||
const result = htmlToPortableText(html);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "block",
|
||||
style: "blockquote",
|
||||
});
|
||||
});
|
||||
|
||||
it("converts code blocks", () => {
|
||||
const html = "<pre><code>const x = 1;</code></pre>";
|
||||
const result = htmlToPortableText(html);
|
||||
|
||||
expect(result[0]).toMatchObject({
|
||||
_type: "code",
|
||||
code: "const x = 1;",
|
||||
});
|
||||
});
|
||||
|
||||
it("converts horizontal rules", () => {
|
||||
const html = "<p>Before</p><hr><p>After</p>";
|
||||
const result = htmlToPortableText(html);
|
||||
|
||||
const breakBlock = result.find((b) => b._type === "break");
|
||||
expect(breakBlock).toBeDefined();
|
||||
});
|
||||
|
||||
it("handles inline formatting", () => {
|
||||
const html = "<p>Hello <strong>bold</strong> and <em>italic</em></p>";
|
||||
const result = htmlToPortableText(html);
|
||||
|
||||
const block = result[0] as PortableTextTextBlock;
|
||||
expect(block.children.some((c) => c.marks?.includes("strong"))).toBe(true);
|
||||
expect(block.children.some((c) => c.marks?.includes("em"))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("WordPress.com classic editor content", () => {
|
||||
// Test case from sparge.wordpress.com - classic editor with linked images
|
||||
// and HTML entities in URLs (& instead of &)
|
||||
const spargePostContent = `<p><a href="https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg"><img data-attachment-id="238" data-permalink="https://sparge.wordpress.com/2011/11/27/now-brewing-hip-hop-nelson-sauvin/hip-hop/" data-orig-file="https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg" data-orig-size="384,560" data-comments-opened="1" data-image-meta="{"aperture":"0","credit":"","camera":"","caption":"","created_timestamp":"0","copyright":"","focal_length":"0","iso":"0","shutter_speed":"0","title":""}" data-image-title="hip-hop" data-image-description="" data-image-caption="" data-medium-file="https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg?w=205" data-large-file="https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg?w=384" class="alignright size-medium wp-image-238" title="hip-hop" src="https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg?w=205&h=300" alt="" width="205" height="300" /></a>Hip Hops Nelson Sauvin is the first of my Christmas brews.</p>
|
||||
<p>It's inspired by <a href="http://www.brewdog.com/product/77-lager" target="_blank" rel="noopener">BrewDog 77</a>, which is a classic lager dry-hopped with Nelson Sauvin. OK, so Hip Hops is 6.3% rather than 4.9%, and uses ordinary Saaz in the boil, but the essence is the same: it's a delicious, crisp German-style lager, given a New Zealand accent with a big hit of Nelson Sauvin.</p>`;
|
||||
|
||||
it("extracts linked images with decoded URLs", () => {
|
||||
const result = htmlToPortableText(spargePostContent);
|
||||
|
||||
// Should have at least one image block
|
||||
const imageBlocks = result.filter((b) => b._type === "image");
|
||||
expect(imageBlocks.length).toBeGreaterThanOrEqual(1);
|
||||
|
||||
// First block should be the image
|
||||
const img = imageBlocks[0];
|
||||
expect(img._type).toBe("image");
|
||||
|
||||
// URL should have decoded HTML entities (& not &)
|
||||
expect(img.asset.url).toBe(
|
||||
"https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg?w=205&h=300",
|
||||
);
|
||||
expect(img.asset.url).not.toContain("&");
|
||||
|
||||
// Link should be preserved
|
||||
expect(img.link).toBe("https://sparge.wordpress.com/wp-content/uploads/2011/11/hip-hop.jpg");
|
||||
});
|
||||
|
||||
it("preserves text content alongside images", () => {
|
||||
const result = htmlToPortableText(spargePostContent);
|
||||
|
||||
// Should have text blocks with the paragraph content
|
||||
const textBlocks = result.filter((b) => b._type === "block");
|
||||
expect(textBlocks.length).toBeGreaterThanOrEqual(1);
|
||||
|
||||
// Check that text content is preserved
|
||||
const allText = textBlocks.flatMap((b) => b.children.map((c) => c.text)).join("");
|
||||
expect(allText).toContain("Hip Hops Nelson Sauvin");
|
||||
expect(allText).toContain("Christmas brews");
|
||||
});
|
||||
|
||||
it("preserves links in text", () => {
|
||||
const result = htmlToPortableText(spargePostContent);
|
||||
|
||||
// Should have text blocks with links
|
||||
const textBlocks = result.filter((b) => b._type === "block");
|
||||
|
||||
// Find block with BrewDog link
|
||||
const blockWithLink = textBlocks.find((b) => b.markDefs?.length);
|
||||
expect(blockWithLink).toBeDefined();
|
||||
expect(blockWithLink?.markDefs).toContainEqual(
|
||||
expect.objectContaining({
|
||||
_type: "link",
|
||||
href: "http://www.brewdog.com/product/77-lager",
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("decodes HTML entities in standalone image src", () => {
|
||||
const html = `<img src="https://example.com/photo.jpg?w=200&h=300" alt="test">`;
|
||||
const result = htmlToPortableText(html);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
const img = result[0] as PortableTextImageBlock;
|
||||
expect(img._type).toBe("image");
|
||||
expect(img.asset.url).toBe("https://example.com/photo.jpg?w=200&h=300");
|
||||
});
|
||||
|
||||
it("decodes & variant in URLs", () => {
|
||||
const html = `<p><img src="https://example.com/photo.jpg?a=1&b=2" alt="test"></p>`;
|
||||
const result = htmlToPortableText(html);
|
||||
|
||||
const img = result.find((b) => b._type === "image") as PortableTextImageBlock;
|
||||
expect(img.asset.url).toBe("https://example.com/photo.jpg?a=1&b=2");
|
||||
});
|
||||
|
||||
it("decodes & in URLs", () => {
|
||||
const html = `<p><img src="https://example.com/photo.jpg?a=1&b=2" alt="test"></p>`;
|
||||
const result = htmlToPortableText(html);
|
||||
|
||||
const img = result.find((b) => b._type === "image") as PortableTextImageBlock;
|
||||
expect(img.asset.url).toBe("https://example.com/photo.jpg?a=1&b=2");
|
||||
});
|
||||
|
||||
// Test for figure with HTML entities
|
||||
it("decodes HTML entities in figure images", () => {
|
||||
const html = `<figure><img src="https://example.com/photo.jpg?w=200&h=300" alt="test"><figcaption>Caption</figcaption></figure>`;
|
||||
const result = htmlToPortableText(html);
|
||||
|
||||
const img = result[0] as PortableTextImageBlock;
|
||||
expect(img._type).toBe("image");
|
||||
expect(img.asset.url).toBe("https://example.com/photo.jpg?w=200&h=300");
|
||||
expect(img.caption).toBe("Caption");
|
||||
});
|
||||
});
|
||||
|
||||
describe("parseGutenbergBlocks", () => {
|
||||
it("parses blocks without converting", () => {
|
||||
const content = `<!-- wp:paragraph -->
|
||||
<p>Hello</p>
|
||||
<!-- /wp:paragraph -->`;
|
||||
|
||||
const blocks = parseGutenbergBlocks(content);
|
||||
|
||||
expect(blocks).toHaveLength(1);
|
||||
expect(blocks[0]?.blockName).toBe("core/paragraph");
|
||||
expect(blocks[0]?.innerHTML).toContain("Hello");
|
||||
});
|
||||
|
||||
it("returns empty array for empty content", () => {
|
||||
expect(parseGutenbergBlocks("")).toEqual([]);
|
||||
});
|
||||
|
||||
it("preserves block attributes", () => {
|
||||
const content = `<!-- wp:heading {"level":3,"align":"center"} -->
|
||||
<h3>Title</h3>
|
||||
<!-- /wp:heading -->`;
|
||||
|
||||
const blocks = parseGutenbergBlocks(content);
|
||||
|
||||
expect(blocks[0]?.attrs).toMatchObject({
|
||||
level: 3,
|
||||
align: "center",
|
||||
});
|
||||
});
|
||||
|
||||
it("handles nested blocks", () => {
|
||||
const content = `<!-- wp:columns -->
|
||||
<div>
|
||||
<!-- wp:column -->
|
||||
<div>
|
||||
<!-- wp:paragraph -->
|
||||
<p>Nested</p>
|
||||
<!-- /wp:paragraph -->
|
||||
</div>
|
||||
<!-- /wp:column -->
|
||||
</div>
|
||||
<!-- /wp:columns -->`;
|
||||
|
||||
const blocks = parseGutenbergBlocks(content);
|
||||
|
||||
expect(blocks[0]?.blockName).toBe("core/columns");
|
||||
expect(blocks[0]?.innerBlocks.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
379
packages/gutenberg-to-portable-text/tests/inline.test.ts
Normal file
379
packages/gutenberg-to-portable-text/tests/inline.test.ts
Normal file
@@ -0,0 +1,379 @@
|
||||
/**
|
||||
* Tests for inline HTML parsing
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "vitest";
|
||||
|
||||
import {
|
||||
parseInlineContent,
|
||||
extractText,
|
||||
extractAlt,
|
||||
extractCaption,
|
||||
extractSrc,
|
||||
} from "../src/inline.js";
|
||||
|
||||
let keyCounter = 0;
|
||||
const generateKey = () => `key-${++keyCounter}`;
|
||||
|
||||
const NEWLINE_PATTERN = /\n/g;
|
||||
|
||||
describe("parseInlineContent", () => {
|
||||
describe("plain text", () => {
|
||||
it("parses plain text", () => {
|
||||
const result = parseInlineContent("Hello world", generateKey);
|
||||
|
||||
expect(result.children).toHaveLength(1);
|
||||
expect(result.children[0]).toMatchObject({
|
||||
_type: "span",
|
||||
text: "Hello world",
|
||||
});
|
||||
expect(result.markDefs).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("handles empty string", () => {
|
||||
const result = parseInlineContent("", generateKey);
|
||||
|
||||
expect(result.children).toHaveLength(1);
|
||||
expect(result.children[0]).toMatchObject({
|
||||
_type: "span",
|
||||
text: "",
|
||||
});
|
||||
});
|
||||
|
||||
it("handles whitespace-only string", () => {
|
||||
const result = parseInlineContent(" ", generateKey);
|
||||
|
||||
expect(result.children).toHaveLength(1);
|
||||
expect(result.children[0]?.text).toBe(" ");
|
||||
});
|
||||
|
||||
it("preserves newlines in text", () => {
|
||||
const result = parseInlineContent("line1\nline2", generateKey);
|
||||
|
||||
// Should have one span with newline appended, then another span
|
||||
expect(result.children.length).toBeGreaterThanOrEqual(1);
|
||||
const fullText = result.children.map((c) => c.text).join("");
|
||||
expect(fullText).toContain("line1");
|
||||
expect(fullText).toContain("line2");
|
||||
});
|
||||
});
|
||||
|
||||
describe("basic formatting", () => {
|
||||
it("parses <strong> tags", () => {
|
||||
const result = parseInlineContent("Hello <strong>bold</strong> world", generateKey);
|
||||
|
||||
expect(result.children).toHaveLength(3);
|
||||
expect(result.children[0]).toMatchObject({ text: "Hello " });
|
||||
expect(result.children[1]).toMatchObject({
|
||||
text: "bold",
|
||||
marks: ["strong"],
|
||||
});
|
||||
expect(result.children[2]).toMatchObject({ text: " world" });
|
||||
});
|
||||
|
||||
it("parses <b> tags as strong", () => {
|
||||
const result = parseInlineContent("Hello <b>bold</b> world", generateKey);
|
||||
|
||||
expect(result.children[1]).toMatchObject({
|
||||
text: "bold",
|
||||
marks: ["strong"],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses <em> tags", () => {
|
||||
const result = parseInlineContent("Hello <em>italic</em> world", generateKey);
|
||||
|
||||
expect(result.children[1]).toMatchObject({
|
||||
text: "italic",
|
||||
marks: ["em"],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses <i> tags as em", () => {
|
||||
const result = parseInlineContent("Hello <i>italic</i> world", generateKey);
|
||||
|
||||
expect(result.children[1]).toMatchObject({
|
||||
text: "italic",
|
||||
marks: ["em"],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses <u> tags", () => {
|
||||
const result = parseInlineContent("Hello <u>underline</u> world", generateKey);
|
||||
|
||||
expect(result.children[1]).toMatchObject({
|
||||
text: "underline",
|
||||
marks: ["underline"],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses <s> tags as strike-through", () => {
|
||||
const result = parseInlineContent("Hello <s>strikethrough</s> world", generateKey);
|
||||
|
||||
expect(result.children[1]).toMatchObject({
|
||||
text: "strikethrough",
|
||||
marks: ["strike-through"],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses <del> tags as strike-through", () => {
|
||||
const result = parseInlineContent("Hello <del>deleted</del> world", generateKey);
|
||||
|
||||
expect(result.children[1]).toMatchObject({
|
||||
text: "deleted",
|
||||
marks: ["strike-through"],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses <code> tags", () => {
|
||||
const result = parseInlineContent("Use <code>const x = 1</code> for variables", generateKey);
|
||||
|
||||
expect(result.children[1]).toMatchObject({
|
||||
text: "const x = 1",
|
||||
marks: ["code"],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses <sup> tags", () => {
|
||||
const result = parseInlineContent("x<sup>2</sup>", generateKey);
|
||||
|
||||
expect(result.children[1]).toMatchObject({
|
||||
text: "2",
|
||||
marks: ["superscript"],
|
||||
});
|
||||
});
|
||||
|
||||
it("parses <sub> tags", () => {
|
||||
const result = parseInlineContent("H<sub>2</sub>O", generateKey);
|
||||
|
||||
expect(result.children[1]).toMatchObject({
|
||||
text: "2",
|
||||
marks: ["subscript"],
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("nested formatting", () => {
|
||||
it("handles nested strong and em", () => {
|
||||
const result = parseInlineContent("<strong><em>bold italic</em></strong>", generateKey);
|
||||
|
||||
expect(result.children).toHaveLength(1);
|
||||
expect(result.children[0]).toMatchObject({
|
||||
text: "bold italic",
|
||||
marks: expect.arrayContaining(["strong", "em"]),
|
||||
});
|
||||
});
|
||||
|
||||
it("handles deeply nested marks", () => {
|
||||
const result = parseInlineContent("<strong><em><code>code</code></em></strong>", generateKey);
|
||||
|
||||
expect(result.children[0]?.marks).toContain("strong");
|
||||
expect(result.children[0]?.marks).toContain("em");
|
||||
expect(result.children[0]?.marks).toContain("code");
|
||||
});
|
||||
|
||||
it("handles mixed content with nested marks", () => {
|
||||
const result = parseInlineContent(
|
||||
"Start <strong>bold <em>bold-italic</em> bold</strong> end",
|
||||
generateKey,
|
||||
);
|
||||
|
||||
expect(result.children.length).toBeGreaterThanOrEqual(4);
|
||||
// Find the bold-italic span
|
||||
const boldItalic = result.children.find(
|
||||
(c) => c.marks?.includes("strong") && c.marks?.includes("em"),
|
||||
);
|
||||
expect(boldItalic?.text).toBe("bold-italic");
|
||||
});
|
||||
});
|
||||
|
||||
describe("links", () => {
|
||||
it("parses simple links", () => {
|
||||
const result = parseInlineContent(
|
||||
'Visit <a href="https://example.com">our site</a>',
|
||||
generateKey,
|
||||
);
|
||||
|
||||
expect(result.markDefs).toHaveLength(1);
|
||||
expect(result.markDefs[0]).toMatchObject({
|
||||
_type: "link",
|
||||
href: "https://example.com",
|
||||
});
|
||||
|
||||
const linkSpan = result.children.find((c) =>
|
||||
c.marks?.includes(result.markDefs[0]?._key ?? ""),
|
||||
);
|
||||
expect(linkSpan?.text).toBe("our site");
|
||||
});
|
||||
|
||||
it("handles links with target=_blank", () => {
|
||||
const result = parseInlineContent(
|
||||
'<a href="https://example.com" target="_blank">link</a>',
|
||||
generateKey,
|
||||
);
|
||||
|
||||
expect(result.markDefs[0]).toMatchObject({
|
||||
_type: "link",
|
||||
href: "https://example.com",
|
||||
blank: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("deduplicates identical links", () => {
|
||||
const result = parseInlineContent(
|
||||
'<a href="https://example.com">link1</a> and <a href="https://example.com">link2</a>',
|
||||
generateKey,
|
||||
);
|
||||
|
||||
expect(result.markDefs).toHaveLength(1);
|
||||
|
||||
const linkKey = result.markDefs[0]?._key;
|
||||
const linkSpans = result.children.filter((c) => c.marks?.includes(linkKey ?? ""));
|
||||
expect(linkSpans).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("creates separate markDefs for different links", () => {
|
||||
const result = parseInlineContent(
|
||||
'<a href="https://a.com">link1</a> and <a href="https://b.com">link2</a>',
|
||||
generateKey,
|
||||
);
|
||||
|
||||
expect(result.markDefs).toHaveLength(2);
|
||||
expect(result.markDefs.map((m) => m.href)).toContain("https://a.com");
|
||||
expect(result.markDefs.map((m) => m.href)).toContain("https://b.com");
|
||||
});
|
||||
|
||||
it("handles links with formatting inside", () => {
|
||||
const result = parseInlineContent(
|
||||
'<a href="https://example.com"><strong>bold link</strong></a>',
|
||||
generateKey,
|
||||
);
|
||||
|
||||
const span = result.children.find((c) => c.text === "bold link");
|
||||
expect(span?.marks).toContain("strong");
|
||||
expect(span?.marks?.length).toBe(2); // strong + link key
|
||||
});
|
||||
});
|
||||
|
||||
describe("line breaks", () => {
|
||||
it("handles <br> tags", () => {
|
||||
const result = parseInlineContent("line1<br>line2", generateKey);
|
||||
|
||||
const fullText = result.children.map((c) => c.text).join("");
|
||||
expect(fullText).toContain("line1");
|
||||
expect(fullText).toContain("\n");
|
||||
expect(fullText).toContain("line2");
|
||||
});
|
||||
|
||||
it("handles self-closing <br /> tags", () => {
|
||||
const result = parseInlineContent("line1<br />line2", generateKey);
|
||||
|
||||
const fullText = result.children.map((c) => c.text).join("");
|
||||
expect(fullText).toContain("\n");
|
||||
});
|
||||
|
||||
it("handles multiple consecutive <br> tags", () => {
|
||||
const result = parseInlineContent("a<br><br>b", generateKey);
|
||||
|
||||
const fullText = result.children.map((c) => c.text).join("");
|
||||
expect(fullText.match(NEWLINE_PATTERN)?.length).toBeGreaterThanOrEqual(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("block wrapper stripping", () => {
|
||||
it("strips <p> wrapper", () => {
|
||||
const result = parseInlineContent("<p>content</p>", generateKey);
|
||||
|
||||
expect(result.children).toHaveLength(1);
|
||||
expect(result.children[0]?.text).toBe("content");
|
||||
});
|
||||
|
||||
it("strips heading wrappers", () => {
|
||||
const result = parseInlineContent("<h2>heading</h2>", generateKey);
|
||||
|
||||
expect(result.children[0]?.text).toBe("heading");
|
||||
});
|
||||
|
||||
it("strips <li> wrapper", () => {
|
||||
const result = parseInlineContent("<li>list item</li>", generateKey);
|
||||
|
||||
expect(result.children[0]?.text).toBe("list item");
|
||||
});
|
||||
|
||||
it("preserves content when wrapper has attributes", () => {
|
||||
const result = parseInlineContent('<p class="intro">content</p>', generateKey);
|
||||
|
||||
expect(result.children[0]?.text).toBe("content");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractText", () => {
|
||||
it("extracts plain text", () => {
|
||||
expect(extractText("Hello world")).toBe("Hello world");
|
||||
});
|
||||
|
||||
it("strips HTML tags", () => {
|
||||
expect(extractText("<p>Hello <strong>world</strong></p>")).toBe("Hello world");
|
||||
});
|
||||
|
||||
it("handles nested elements", () => {
|
||||
expect(extractText("<div><p>Nested <em>text</em></p></div>")).toBe("Nested text");
|
||||
});
|
||||
|
||||
it("handles empty string", () => {
|
||||
expect(extractText("")).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractAlt", () => {
|
||||
it("extracts alt from img tag", () => {
|
||||
expect(extractAlt('<img src="photo.jpg" alt="A photo">')).toBe("A photo");
|
||||
});
|
||||
|
||||
it("handles missing alt", () => {
|
||||
expect(extractAlt('<img src="photo.jpg">')).toBeUndefined();
|
||||
});
|
||||
|
||||
it("handles empty alt", () => {
|
||||
expect(extractAlt('<img src="photo.jpg" alt="">')).toBe("");
|
||||
});
|
||||
|
||||
it("handles single quotes", () => {
|
||||
expect(extractAlt("<img src='photo.jpg' alt='A photo'>")).toBe("A photo");
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractCaption", () => {
|
||||
it("extracts caption from figcaption", () => {
|
||||
expect(extractCaption("<figure><img><figcaption>My caption</figcaption></figure>")).toBe(
|
||||
"My caption",
|
||||
);
|
||||
});
|
||||
|
||||
it("strips HTML from caption", () => {
|
||||
expect(
|
||||
extractCaption("<figure><figcaption>Caption with <em>formatting</em></figcaption></figure>"),
|
||||
).toBe("Caption with formatting");
|
||||
});
|
||||
|
||||
it("handles missing figcaption", () => {
|
||||
expect(extractCaption("<figure><img></figure>")).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractSrc", () => {
|
||||
it("extracts src from img tag", () => {
|
||||
expect(extractSrc('<img src="https://example.com/photo.jpg">')).toBe(
|
||||
"https://example.com/photo.jpg",
|
||||
);
|
||||
});
|
||||
|
||||
it("handles relative URLs", () => {
|
||||
expect(extractSrc('<img src="/uploads/photo.jpg">')).toBe("/uploads/photo.jpg");
|
||||
});
|
||||
|
||||
it("handles missing src", () => {
|
||||
expect(extractSrc("<img alt='no source'>")).toBeUndefined();
|
||||
});
|
||||
});
|
||||
20
packages/gutenberg-to-portable-text/tsconfig.json
Normal file
20
packages/gutenberg-to-portable-text/tsconfig.json
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "preserve",
|
||||
"moduleResolution": "bundler",
|
||||
"strict": true,
|
||||
"declaration": true,
|
||||
"declarationMap": true,
|
||||
"sourceMap": true,
|
||||
"outDir": "dist",
|
||||
"rootDir": "src",
|
||||
"esModuleInterop": true,
|
||||
"skipLibCheck": true,
|
||||
"noUncheckedIndexedAccess": true,
|
||||
"noImplicitOverride": true,
|
||||
"verbatimModuleSyntax": true
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist"]
|
||||
}
|
||||
Reference in New Issue
Block a user