Emdash source with visual editor image upload fix
Fixes: 1. media.ts: wrap placeholder generation in try-catch 2. toolbar.ts: check r.ok, display error message in popover
This commit is contained in:
90
packages/core/src/import/index.ts
Normal file
90
packages/core/src/import/index.ts
Normal file
@@ -0,0 +1,90 @@
|
||||
/**
|
||||
* Import system
|
||||
*
|
||||
* Provides a pluggable system for importing content from various sources.
|
||||
*/
|
||||
|
||||
// Core types
|
||||
export type {
|
||||
ImportSource,
|
||||
ImportAnalysis,
|
||||
ImportContext,
|
||||
SourceInput,
|
||||
FileInput,
|
||||
UrlInput,
|
||||
OAuthInput,
|
||||
SourceProbeResult,
|
||||
ProbeResult,
|
||||
SourceAuth,
|
||||
SourceCapabilities,
|
||||
SuggestedAction,
|
||||
PostTypeAnalysis,
|
||||
ImportFieldDef,
|
||||
FieldCompatibility,
|
||||
CollectionSchemaStatus,
|
||||
AttachmentInfo,
|
||||
NormalizedItem,
|
||||
ImportConfig,
|
||||
ImportResult,
|
||||
FetchOptions,
|
||||
PostTypeMapping,
|
||||
NavMenuAnalysis,
|
||||
TaxonomyAnalysis,
|
||||
} from "./types.js";
|
||||
|
||||
// Menu import
|
||||
export {
|
||||
importMenusFromWxr,
|
||||
importMenusFromPlugin,
|
||||
type MenuImportResult,
|
||||
type PluginMenu,
|
||||
type PluginMenuItem,
|
||||
} from "./menus.js";
|
||||
|
||||
// Sections import
|
||||
export { importReusableBlocksAsSections, type SectionsImportResult } from "./sections.js";
|
||||
|
||||
// Site settings import
|
||||
export {
|
||||
importSiteSettings,
|
||||
parseSiteSettingsFromPlugin,
|
||||
type SiteSettingsAnalysis,
|
||||
type SettingsImportResult,
|
||||
type WidgetAreaAnalysis,
|
||||
} from "./settings.js";
|
||||
|
||||
// Registry
|
||||
export {
|
||||
registerSource,
|
||||
getSource,
|
||||
getAllSources,
|
||||
getFileSources,
|
||||
getUrlSources,
|
||||
probeUrl,
|
||||
clearSources,
|
||||
} from "./registry.js";
|
||||
|
||||
// SSRF protection
|
||||
export { validateExternalUrl, ssrfSafeFetch, SsrfError } from "./ssrf.js";
|
||||
|
||||
// Sources
|
||||
export { wxrSource, parseWxrDate } from "./sources/wxr.js";
|
||||
export { wordpressRestSource } from "./sources/wordpress-rest.js";
|
||||
export {
|
||||
wordpressPluginSource,
|
||||
createBasicAuthToken,
|
||||
fetchPluginMedia,
|
||||
fetchPluginTaxonomies,
|
||||
} from "./sources/wordpress-plugin.js";
|
||||
|
||||
// Auto-register built-in sources
|
||||
import { registerSource } from "./registry.js";
|
||||
import { wordpressPluginSource } from "./sources/wordpress-plugin.js";
|
||||
import { wordpressRestSource } from "./sources/wordpress-rest.js";
|
||||
import { wxrSource } from "./sources/wxr.js";
|
||||
|
||||
// Register in priority order (most specific first)
|
||||
// Plugin source first - if they have our plugin, use it
|
||||
registerSource(wordpressPluginSource);
|
||||
registerSource(wordpressRestSource);
|
||||
registerSource(wxrSource);
|
||||
436
packages/core/src/import/menus.ts
Normal file
436
packages/core/src/import/menus.ts
Normal file
@@ -0,0 +1,436 @@
|
||||
/**
|
||||
* Menu import functions
|
||||
*
|
||||
* Import navigation menus from WordPress WXR exports or plugin API.
|
||||
*/
|
||||
|
||||
import type { Kysely } from "kysely";
|
||||
import { ulid } from "ulidx";
|
||||
|
||||
import type { WxrNavMenu, WxrNavMenuItem } from "../cli/wxr/parser.js";
|
||||
import type { Database } from "../database/types.js";
|
||||
import type { MenuItemType } from "../menus/types.js";
|
||||
|
||||
/**
|
||||
* Result of menu import operation
|
||||
*/
|
||||
export interface MenuImportResult {
|
||||
/** Number of menus created */
|
||||
menusCreated: number;
|
||||
/** Number of menu items created */
|
||||
itemsCreated: number;
|
||||
/** Mapping from WP menu slug to EmDash menu ID */
|
||||
menuIdMap: Map<string, string>;
|
||||
/** Errors encountered during import */
|
||||
errors: Array<{ menu: string; error: string }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Plugin API menu format (matches /emdash/v1/menus response)
|
||||
*/
|
||||
export interface PluginMenu {
|
||||
id: number;
|
||||
name: string; // slug
|
||||
label: string;
|
||||
items: PluginMenuItem[];
|
||||
}
|
||||
|
||||
export interface PluginMenuItem {
|
||||
id: number;
|
||||
parent_id: number | null;
|
||||
sort_order: number;
|
||||
type: "custom" | "post_type" | "taxonomy";
|
||||
object: string | null; // 'page', 'post', 'category'
|
||||
object_id: number | null;
|
||||
url: string;
|
||||
title: string;
|
||||
target: string | null;
|
||||
classes: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Import navigation menus from WXR export
|
||||
*
|
||||
* @param menus - Parsed navigation menus from WXR
|
||||
* @param db - Database connection
|
||||
* @param contentIdMap - Map from WP post ID to EmDash content ID (for resolving references)
|
||||
* @returns Import result with counts and ID mapping
|
||||
*/
|
||||
export async function importMenusFromWxr(
|
||||
menus: WxrNavMenu[],
|
||||
db: Kysely<Database>,
|
||||
contentIdMap: Map<number, string>,
|
||||
): Promise<MenuImportResult> {
|
||||
const result: MenuImportResult = {
|
||||
menusCreated: 0,
|
||||
itemsCreated: 0,
|
||||
menuIdMap: new Map(),
|
||||
errors: [],
|
||||
};
|
||||
|
||||
for (const menu of menus) {
|
||||
try {
|
||||
// Check if menu already exists
|
||||
const existing = await db
|
||||
.selectFrom("_emdash_menus")
|
||||
.select("id")
|
||||
.where("name", "=", menu.name)
|
||||
.executeTakeFirst();
|
||||
|
||||
if (existing) {
|
||||
result.menuIdMap.set(menu.name, existing.id);
|
||||
continue; // Skip existing menus
|
||||
}
|
||||
|
||||
// Create the menu
|
||||
const menuId = ulid();
|
||||
await db
|
||||
.insertInto("_emdash_menus")
|
||||
.values({
|
||||
id: menuId,
|
||||
name: menu.name,
|
||||
label: menu.label,
|
||||
})
|
||||
.execute();
|
||||
|
||||
result.menusCreated++;
|
||||
result.menuIdMap.set(menu.name, menuId);
|
||||
|
||||
// Import menu items
|
||||
const itemsCreated = await importWxrMenuItems(menu.items, menuId, db, contentIdMap);
|
||||
result.itemsCreated += itemsCreated;
|
||||
} catch (error) {
|
||||
result.errors.push({
|
||||
menu: menu.name,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Import navigation menus from Plugin API
|
||||
*
|
||||
* @param menus - Menus from plugin API
|
||||
* @param db - Database connection
|
||||
* @param contentIdMap - Map from WP post ID to EmDash content ID
|
||||
* @returns Import result with counts and ID mapping
|
||||
*/
|
||||
export async function importMenusFromPlugin(
|
||||
menus: PluginMenu[],
|
||||
db: Kysely<Database>,
|
||||
contentIdMap: Map<number, string>,
|
||||
): Promise<MenuImportResult> {
|
||||
const result: MenuImportResult = {
|
||||
menusCreated: 0,
|
||||
itemsCreated: 0,
|
||||
menuIdMap: new Map(),
|
||||
errors: [],
|
||||
};
|
||||
|
||||
for (const menu of menus) {
|
||||
try {
|
||||
// Check if menu already exists
|
||||
const existing = await db
|
||||
.selectFrom("_emdash_menus")
|
||||
.select("id")
|
||||
.where("name", "=", menu.name)
|
||||
.executeTakeFirst();
|
||||
|
||||
if (existing) {
|
||||
result.menuIdMap.set(menu.name, existing.id);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create the menu
|
||||
const menuId = ulid();
|
||||
await db
|
||||
.insertInto("_emdash_menus")
|
||||
.values({
|
||||
id: menuId,
|
||||
name: menu.name,
|
||||
label: menu.label,
|
||||
})
|
||||
.execute();
|
||||
|
||||
result.menusCreated++;
|
||||
result.menuIdMap.set(menu.name, menuId);
|
||||
|
||||
// Import menu items
|
||||
const itemsCreated = await importPluginMenuItems(menu.items, menuId, db, contentIdMap);
|
||||
result.itemsCreated += itemsCreated;
|
||||
} catch (error) {
|
||||
result.errors.push({
|
||||
menu: menu.name,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Import menu items from WXR format
|
||||
*/
|
||||
async function importWxrMenuItems(
|
||||
items: WxrNavMenuItem[],
|
||||
menuId: string,
|
||||
db: Kysely<Database>,
|
||||
contentIdMap: Map<number, string>,
|
||||
): Promise<number> {
|
||||
// Build a map of WP menu item IDs to EmDash IDs for parent resolution
|
||||
const itemIdMap = new Map<number, string>();
|
||||
let count = 0;
|
||||
|
||||
// Sort items by sort order to maintain hierarchy
|
||||
const sortedItems = items.toSorted((a, b) => a.sortOrder - b.sortOrder);
|
||||
|
||||
// First pass: create all items with temporary parent IDs
|
||||
for (const item of sortedItems) {
|
||||
const itemId = ulid();
|
||||
itemIdMap.set(item.id, itemId);
|
||||
|
||||
const { type, collection, referenceId, customUrl } = mapWxrMenuItem(item, contentIdMap);
|
||||
|
||||
await db
|
||||
.insertInto("_emdash_menu_items")
|
||||
.values({
|
||||
id: itemId,
|
||||
menu_id: menuId,
|
||||
parent_id: null, // Will be set in second pass
|
||||
sort_order: item.sortOrder,
|
||||
type,
|
||||
reference_collection: collection,
|
||||
reference_id: referenceId,
|
||||
custom_url: customUrl,
|
||||
label: item.title,
|
||||
title_attr: null,
|
||||
target: item.target || null,
|
||||
css_classes: item.classes || null,
|
||||
})
|
||||
.execute();
|
||||
|
||||
count++;
|
||||
}
|
||||
|
||||
// Second pass: update parent IDs
|
||||
for (const item of sortedItems) {
|
||||
if (item.parentId) {
|
||||
const itemId = itemIdMap.get(item.id);
|
||||
const parentId = itemIdMap.get(item.parentId);
|
||||
|
||||
if (itemId && parentId) {
|
||||
await db
|
||||
.updateTable("_emdash_menu_items")
|
||||
.set({ parent_id: parentId })
|
||||
.where("id", "=", itemId)
|
||||
.execute();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Import menu items from Plugin API format
|
||||
*/
|
||||
async function importPluginMenuItems(
|
||||
items: PluginMenuItem[],
|
||||
menuId: string,
|
||||
db: Kysely<Database>,
|
||||
contentIdMap: Map<number, string>,
|
||||
): Promise<number> {
|
||||
const itemIdMap = new Map<number, string>();
|
||||
let count = 0;
|
||||
|
||||
const sortedItems = items.toSorted((a, b) => a.sort_order - b.sort_order);
|
||||
|
||||
for (const item of sortedItems) {
|
||||
const itemId = ulid();
|
||||
itemIdMap.set(item.id, itemId);
|
||||
|
||||
const { type, collection, referenceId, customUrl } = mapPluginMenuItem(item, contentIdMap);
|
||||
|
||||
await db
|
||||
.insertInto("_emdash_menu_items")
|
||||
.values({
|
||||
id: itemId,
|
||||
menu_id: menuId,
|
||||
parent_id: null,
|
||||
sort_order: item.sort_order,
|
||||
type,
|
||||
reference_collection: collection,
|
||||
reference_id: referenceId,
|
||||
custom_url: customUrl,
|
||||
label: item.title,
|
||||
title_attr: null,
|
||||
target: item.target || null,
|
||||
css_classes: item.classes || null,
|
||||
})
|
||||
.execute();
|
||||
|
||||
count++;
|
||||
}
|
||||
|
||||
// Second pass: update parent IDs
|
||||
for (const item of sortedItems) {
|
||||
if (item.parent_id) {
|
||||
const itemId = itemIdMap.get(item.id);
|
||||
const parentId = itemIdMap.get(item.parent_id);
|
||||
|
||||
if (itemId && parentId) {
|
||||
await db
|
||||
.updateTable("_emdash_menu_items")
|
||||
.set({ parent_id: parentId })
|
||||
.where("id", "=", itemId)
|
||||
.execute();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Map WXR menu item to EmDash format
|
||||
*/
|
||||
function mapWxrMenuItem(
|
||||
item: WxrNavMenuItem,
|
||||
contentIdMap: Map<number, string>,
|
||||
): {
|
||||
type: MenuItemType;
|
||||
collection: string | null;
|
||||
referenceId: string | null;
|
||||
customUrl: string | null;
|
||||
} {
|
||||
switch (item.type) {
|
||||
case "custom":
|
||||
return {
|
||||
type: "custom",
|
||||
collection: null,
|
||||
referenceId: null,
|
||||
customUrl: item.url || "#",
|
||||
};
|
||||
|
||||
case "post_type": {
|
||||
// Map WordPress object type to collection
|
||||
const collection = mapObjectToCollection(item.objectType);
|
||||
const referenceId = item.objectId ? contentIdMap.get(item.objectId) || null : null;
|
||||
|
||||
// If we can't resolve the reference, fall back to custom URL
|
||||
if (!referenceId && item.url) {
|
||||
return {
|
||||
type: "custom",
|
||||
collection: null,
|
||||
referenceId: null,
|
||||
customUrl: item.url,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
type: collection === "pages" ? "page" : "post",
|
||||
collection,
|
||||
referenceId,
|
||||
customUrl: null,
|
||||
};
|
||||
}
|
||||
|
||||
case "taxonomy":
|
||||
// For taxonomies, we need taxonomy support in menus
|
||||
// Fall back to custom URL for now
|
||||
return {
|
||||
type: "custom",
|
||||
collection: null,
|
||||
referenceId: null,
|
||||
customUrl: item.url || "#",
|
||||
};
|
||||
|
||||
default:
|
||||
return {
|
||||
type: "custom",
|
||||
collection: null,
|
||||
referenceId: null,
|
||||
customUrl: item.url || "#",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map Plugin menu item to EmDash format
|
||||
*/
|
||||
function mapPluginMenuItem(
|
||||
item: PluginMenuItem,
|
||||
contentIdMap: Map<number, string>,
|
||||
): {
|
||||
type: MenuItemType;
|
||||
collection: string | null;
|
||||
referenceId: string | null;
|
||||
customUrl: string | null;
|
||||
} {
|
||||
switch (item.type) {
|
||||
case "custom":
|
||||
return {
|
||||
type: "custom",
|
||||
collection: null,
|
||||
referenceId: null,
|
||||
customUrl: item.url || "#",
|
||||
};
|
||||
|
||||
case "post_type": {
|
||||
const collection = mapObjectToCollection(item.object);
|
||||
const referenceId = item.object_id ? contentIdMap.get(item.object_id) || null : null;
|
||||
|
||||
if (!referenceId && item.url) {
|
||||
return {
|
||||
type: "custom",
|
||||
collection: null,
|
||||
referenceId: null,
|
||||
customUrl: item.url,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
type: collection === "pages" ? "page" : "post",
|
||||
collection,
|
||||
referenceId,
|
||||
customUrl: null,
|
||||
};
|
||||
}
|
||||
|
||||
case "taxonomy":
|
||||
return {
|
||||
type: "custom",
|
||||
collection: null,
|
||||
referenceId: null,
|
||||
customUrl: item.url || "#",
|
||||
};
|
||||
|
||||
default:
|
||||
return {
|
||||
type: "custom",
|
||||
collection: null,
|
||||
referenceId: null,
|
||||
customUrl: item.url || "#",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Map WordPress object type to EmDash collection name
|
||||
*/
|
||||
function mapObjectToCollection(objectType: string | undefined | null): string {
|
||||
if (!objectType) return "posts";
|
||||
|
||||
const mapping: Record<string, string> = {
|
||||
post: "posts",
|
||||
page: "pages",
|
||||
product: "products",
|
||||
portfolio: "portfolio",
|
||||
};
|
||||
|
||||
return mapping[objectType] || objectType;
|
||||
}
|
||||
112
packages/core/src/import/registry.ts
Normal file
112
packages/core/src/import/registry.ts
Normal file
@@ -0,0 +1,112 @@
|
||||
/**
|
||||
* Import source registry
|
||||
*
|
||||
* Manages available import sources and provides URL probing.
|
||||
*/
|
||||
|
||||
import { resolveAndValidateExternalUrl } from "./ssrf.js";
|
||||
import type { ImportSource, ProbeResult, SourceProbeResult } from "./types.js";
|
||||
|
||||
// Regex pattern for URL normalization
|
||||
const TRAILING_SLASHES_PATTERN = /\/+$/;
|
||||
|
||||
/** Registered import sources */
|
||||
const sources = new Map<string, ImportSource>();
|
||||
|
||||
/**
|
||||
* Register an import source
|
||||
*/
|
||||
export function registerSource(source: ImportSource): void {
|
||||
sources.set(source.id, source);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a source by ID
|
||||
*/
|
||||
export function getSource(id: string): ImportSource | undefined {
|
||||
return sources.get(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all registered sources
|
||||
*/
|
||||
export function getAllSources(): ImportSource[] {
|
||||
return [...sources.values()];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get sources that can handle file uploads
|
||||
*/
|
||||
export function getFileSources(): ImportSource[] {
|
||||
return getAllSources().filter((s) => s.requiresFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get sources that can probe URLs
|
||||
*/
|
||||
export function getUrlSources(): ImportSource[] {
|
||||
return getAllSources().filter((s) => s.canProbe);
|
||||
}
|
||||
|
||||
/**
|
||||
* Probe a URL against all registered sources
|
||||
*
|
||||
* Returns probe results sorted by confidence (definite > likely > possible)
|
||||
*/
|
||||
export async function probeUrl(url: string): Promise<ProbeResult> {
|
||||
// Normalize URL
|
||||
let normalizedUrl = url.trim();
|
||||
if (!normalizedUrl.startsWith("http")) {
|
||||
normalizedUrl = `https://${normalizedUrl}`;
|
||||
}
|
||||
|
||||
// Remove trailing slash for consistency
|
||||
normalizedUrl = normalizedUrl.replace(TRAILING_SLASHES_PATTERN, "");
|
||||
|
||||
// SSRF: reject internal/private network targets. DNS resolution
|
||||
// catches hostnames that resolve to private addresses.
|
||||
await resolveAndValidateExternalUrl(normalizedUrl);
|
||||
|
||||
const results: SourceProbeResult[] = [];
|
||||
const urlSources = getUrlSources();
|
||||
|
||||
// Probe all sources in parallel
|
||||
const probePromises = urlSources.map(async (source) => {
|
||||
try {
|
||||
const result = await source.probe?.(normalizedUrl);
|
||||
if (result) {
|
||||
return result;
|
||||
}
|
||||
} catch (error) {
|
||||
// Probe failed, skip this source
|
||||
console.debug(`Probe failed for ${source.id}:`, error);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
|
||||
const probeResults = await Promise.allSettled(probePromises);
|
||||
|
||||
for (const result of probeResults) {
|
||||
if (result.status === "fulfilled" && result.value) {
|
||||
results.push(result.value);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by confidence
|
||||
const confidenceOrder = { definite: 0, likely: 1, possible: 2 };
|
||||
results.sort((a, b) => confidenceOrder[a.confidence] - confidenceOrder[b.confidence]);
|
||||
|
||||
return {
|
||||
url: normalizedUrl,
|
||||
isWordPress: results.length > 0,
|
||||
bestMatch: results[0] ?? null,
|
||||
allMatches: results,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear all registered sources (useful for testing)
|
||||
*/
|
||||
export function clearSources(): void {
|
||||
sources.clear();
|
||||
}
|
||||
103
packages/core/src/import/sections.ts
Normal file
103
packages/core/src/import/sections.ts
Normal file
@@ -0,0 +1,103 @@
|
||||
/**
|
||||
* Sections import functions
|
||||
*
|
||||
* Import reusable blocks from WordPress WXR exports as EmDash sections.
|
||||
*/
|
||||
|
||||
import type { PortableTextBlock } from "@emdash-cms/gutenberg-to-portable-text";
|
||||
import { gutenbergToPortableText } from "@emdash-cms/gutenberg-to-portable-text";
|
||||
import type { Kysely } from "kysely";
|
||||
import { ulid } from "ulidx";
|
||||
|
||||
import type { WxrPost } from "../cli/wxr/parser.js";
|
||||
import type { Database } from "../database/types.js";
|
||||
import { slugify } from "../utils/slugify.js";
|
||||
|
||||
/**
|
||||
* Result of sections import operation
|
||||
*/
|
||||
export interface SectionsImportResult {
|
||||
/** Number of sections created */
|
||||
sectionsCreated: number;
|
||||
/** Number of sections skipped (already exist) */
|
||||
sectionsSkipped: number;
|
||||
/** Errors encountered during import */
|
||||
errors: Array<{ title: string; error: string }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Import reusable blocks (wp_block post type) from WXR as sections
|
||||
*
|
||||
* @param posts - All posts from WXR (will filter to wp_block)
|
||||
* @param db - Database connection
|
||||
* @returns Import result with counts
|
||||
*/
|
||||
export async function importReusableBlocksAsSections(
|
||||
posts: WxrPost[],
|
||||
db: Kysely<Database>,
|
||||
): Promise<SectionsImportResult> {
|
||||
const result: SectionsImportResult = {
|
||||
sectionsCreated: 0,
|
||||
sectionsSkipped: 0,
|
||||
errors: [],
|
||||
};
|
||||
|
||||
// Filter to only wp_block posts
|
||||
const reusableBlocks = posts.filter((post) => post.postType === "wp_block");
|
||||
|
||||
if (reusableBlocks.length === 0) {
|
||||
return result;
|
||||
}
|
||||
|
||||
for (const block of reusableBlocks) {
|
||||
try {
|
||||
const slug = block.postName || slugify(block.title || `block-${block.id || Date.now()}`);
|
||||
|
||||
// Check if section already exists
|
||||
const existing = await db
|
||||
.selectFrom("_emdash_sections")
|
||||
.select("id")
|
||||
.where("slug", "=", slug)
|
||||
.executeTakeFirst();
|
||||
|
||||
if (existing) {
|
||||
result.sectionsSkipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Convert Gutenberg content to Portable Text
|
||||
const content: PortableTextBlock[] = block.content
|
||||
? gutenbergToPortableText(block.content)
|
||||
: [];
|
||||
|
||||
const id = ulid();
|
||||
const now = new Date().toISOString();
|
||||
|
||||
await db
|
||||
.insertInto("_emdash_sections")
|
||||
.values({
|
||||
id,
|
||||
slug,
|
||||
title: block.title || "Untitled Block",
|
||||
description: null,
|
||||
keywords: null,
|
||||
content: JSON.stringify(content),
|
||||
preview_media_id: null,
|
||||
source: "import",
|
||||
theme_id: null,
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
})
|
||||
.execute();
|
||||
|
||||
result.sectionsCreated++;
|
||||
} catch (error) {
|
||||
result.errors.push({
|
||||
title: block.title || "Untitled Block",
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
281
packages/core/src/import/settings.ts
Normal file
281
packages/core/src/import/settings.ts
Normal file
@@ -0,0 +1,281 @@
|
||||
/**
|
||||
* Site settings import functions
|
||||
*
|
||||
* Import site settings from WordPress (title, tagline, logo, favicon, etc.)
|
||||
*/
|
||||
|
||||
import type { Kysely } from "kysely";
|
||||
|
||||
import type { Database } from "../database/types.js";
|
||||
|
||||
/**
|
||||
* Site settings analysis from import source
|
||||
*/
|
||||
export interface SiteSettingsAnalysis {
|
||||
/** Site title */
|
||||
title?: string;
|
||||
/** Site tagline/description */
|
||||
tagline?: string;
|
||||
/** Custom logo */
|
||||
logo?: { url: string; id?: number };
|
||||
/** Favicon/site icon */
|
||||
favicon?: { url: string; id?: number };
|
||||
/** Front page settings */
|
||||
frontPage?: { type: "posts" | "page"; pageId?: number };
|
||||
/** SEO settings (Yoast, RankMath, etc.) */
|
||||
seo?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Widget area analysis
|
||||
*/
|
||||
export interface WidgetAreaAnalysis {
|
||||
/** Widget area ID */
|
||||
id: string;
|
||||
/** Widget area name */
|
||||
name: string;
|
||||
/** Widget area label */
|
||||
label: string;
|
||||
/** Number of widgets */
|
||||
widgetCount: number;
|
||||
/** Widget summaries */
|
||||
widgets: Array<{ type: string; title?: string }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result of site settings import
|
||||
*/
|
||||
export interface SettingsImportResult {
|
||||
/** Settings that were applied */
|
||||
applied: string[];
|
||||
/** Settings that were skipped (already set) */
|
||||
skipped: string[];
|
||||
/** Errors encountered */
|
||||
errors: Array<{ setting: string; error: string }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Import site settings from analysis
|
||||
*
|
||||
* @param settings - Site settings analysis
|
||||
* @param db - Database connection
|
||||
* @param overwrite - Whether to overwrite existing settings
|
||||
* @returns Import result
|
||||
*/
|
||||
export async function importSiteSettings(
|
||||
settings: SiteSettingsAnalysis,
|
||||
db: Kysely<Database>,
|
||||
overwrite = false,
|
||||
): Promise<SettingsImportResult> {
|
||||
const result: SettingsImportResult = {
|
||||
applied: [],
|
||||
skipped: [],
|
||||
errors: [],
|
||||
};
|
||||
|
||||
// Import title
|
||||
if (settings.title) {
|
||||
try {
|
||||
const applied = await setOption(db, "site_title", settings.title, overwrite);
|
||||
if (applied) {
|
||||
result.applied.push("site_title");
|
||||
} else {
|
||||
result.skipped.push("site_title");
|
||||
}
|
||||
} catch (error) {
|
||||
result.errors.push({
|
||||
setting: "site_title",
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Import tagline
|
||||
if (settings.tagline) {
|
||||
try {
|
||||
const applied = await setOption(db, "site_tagline", settings.tagline, overwrite);
|
||||
if (applied) {
|
||||
result.applied.push("site_tagline");
|
||||
} else {
|
||||
result.skipped.push("site_tagline");
|
||||
}
|
||||
} catch (error) {
|
||||
result.errors.push({
|
||||
setting: "site_tagline",
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Import logo URL (actual media import handled separately)
|
||||
if (settings.logo?.url) {
|
||||
try {
|
||||
const applied = await setOption(db, "site_logo_url", settings.logo.url, overwrite);
|
||||
if (applied) {
|
||||
result.applied.push("site_logo_url");
|
||||
} else {
|
||||
result.skipped.push("site_logo_url");
|
||||
}
|
||||
} catch (error) {
|
||||
result.errors.push({
|
||||
setting: "site_logo_url",
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Import favicon URL
|
||||
if (settings.favicon?.url) {
|
||||
try {
|
||||
const applied = await setOption(db, "site_favicon_url", settings.favicon.url, overwrite);
|
||||
if (applied) {
|
||||
result.applied.push("site_favicon_url");
|
||||
} else {
|
||||
result.skipped.push("site_favicon_url");
|
||||
}
|
||||
} catch (error) {
|
||||
result.errors.push({
|
||||
setting: "site_favicon_url",
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Import front page settings
|
||||
if (settings.frontPage) {
|
||||
try {
|
||||
const applied = await setOption(db, "front_page_type", settings.frontPage.type, overwrite);
|
||||
if (applied) {
|
||||
result.applied.push("front_page_type");
|
||||
} else {
|
||||
result.skipped.push("front_page_type");
|
||||
}
|
||||
|
||||
if (settings.frontPage.pageId) {
|
||||
const pageApplied = await setOption(
|
||||
db,
|
||||
"front_page_id",
|
||||
String(settings.frontPage.pageId),
|
||||
overwrite,
|
||||
);
|
||||
if (pageApplied) {
|
||||
result.applied.push("front_page_id");
|
||||
} else {
|
||||
result.skipped.push("front_page_id");
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
result.errors.push({
|
||||
setting: "front_page",
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Import SEO settings as JSON blob
|
||||
if (settings.seo && Object.keys(settings.seo).length > 0) {
|
||||
try {
|
||||
const applied = await setOption(db, "seo_settings", JSON.stringify(settings.seo), overwrite);
|
||||
if (applied) {
|
||||
result.applied.push("seo_settings");
|
||||
} else {
|
||||
result.skipped.push("seo_settings");
|
||||
}
|
||||
} catch (error) {
|
||||
result.errors.push({
|
||||
setting: "seo_settings",
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set an option in the database
|
||||
*
|
||||
* @returns true if the option was set, false if skipped (already exists and !overwrite)
|
||||
*/
|
||||
async function setOption(
|
||||
db: Kysely<Database>,
|
||||
key: string,
|
||||
value: string,
|
||||
overwrite: boolean,
|
||||
): Promise<boolean> {
|
||||
const existing = await db
|
||||
.selectFrom("options")
|
||||
.select("value")
|
||||
.where("name", "=", key)
|
||||
.executeTakeFirst();
|
||||
|
||||
if (existing && !overwrite) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (existing) {
|
||||
await db.updateTable("options").set({ value }).where("name", "=", key).execute();
|
||||
} else {
|
||||
await db.insertInto("options").values({ name: key, value }).execute();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse site settings from WordPress plugin options response
|
||||
*/
|
||||
export function parseSiteSettingsFromPlugin(
|
||||
options: Record<string, unknown>,
|
||||
): SiteSettingsAnalysis {
|
||||
const settings: SiteSettingsAnalysis = {};
|
||||
|
||||
// Basic settings
|
||||
if (typeof options.blogname === "string") {
|
||||
settings.title = options.blogname;
|
||||
}
|
||||
if (typeof options.blogdescription === "string") {
|
||||
settings.tagline = options.blogdescription;
|
||||
}
|
||||
|
||||
// Logo and favicon
|
||||
if (typeof options.custom_logo_url === "string") {
|
||||
settings.logo = {
|
||||
url: options.custom_logo_url,
|
||||
id: typeof options.custom_logo === "number" ? options.custom_logo : undefined,
|
||||
};
|
||||
}
|
||||
if (typeof options.site_icon_url === "string") {
|
||||
settings.favicon = {
|
||||
url: options.site_icon_url,
|
||||
id: typeof options.site_icon === "number" ? options.site_icon : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
// Front page settings
|
||||
if (options.show_on_front === "page") {
|
||||
settings.frontPage = {
|
||||
type: "page",
|
||||
pageId: typeof options.page_on_front === "number" ? options.page_on_front : undefined,
|
||||
};
|
||||
} else {
|
||||
settings.frontPage = { type: "posts" };
|
||||
}
|
||||
|
||||
// SEO settings (Yoast)
|
||||
const seo: Record<string, unknown> = {};
|
||||
if (typeof options.wpseo === "object" && options.wpseo !== null) {
|
||||
seo.yoast = options.wpseo;
|
||||
}
|
||||
if (typeof options.wpseo_titles === "object" && options.wpseo_titles !== null) {
|
||||
seo.yoast_titles = options.wpseo_titles;
|
||||
}
|
||||
if (typeof options.wpseo_social === "object" && options.wpseo_social !== null) {
|
||||
seo.yoast_social = options.wpseo_social;
|
||||
}
|
||||
if (Object.keys(seo).length > 0) {
|
||||
settings.seo = seo;
|
||||
}
|
||||
|
||||
return settings;
|
||||
}
|
||||
641
packages/core/src/import/sources/wordpress-plugin.ts
Normal file
641
packages/core/src/import/sources/wordpress-plugin.ts
Normal file
@@ -0,0 +1,641 @@
|
||||
/**
|
||||
* WordPress Plugin (EmDash Exporter) import source
|
||||
*
|
||||
* Connects to self-hosted WordPress sites running the EmDash Exporter plugin.
|
||||
* Provides full access to all content including drafts, custom post types, and ACF fields.
|
||||
*/
|
||||
|
||||
import { gutenbergToPortableText } from "@emdash-cms/gutenberg-to-portable-text";
|
||||
|
||||
import { encodeBase64 } from "../../utils/base64.js";
|
||||
import { ssrfSafeFetch, validateExternalUrl } from "../ssrf.js";
|
||||
import type {
|
||||
ImportSource,
|
||||
ImportAnalysis,
|
||||
ImportContext,
|
||||
SourceInput,
|
||||
SourceProbeResult,
|
||||
I18nDetection,
|
||||
FetchOptions,
|
||||
NormalizedItem,
|
||||
PostTypeAnalysis,
|
||||
AttachmentInfo,
|
||||
} from "../types.js";
|
||||
import {
|
||||
BASE_REQUIRED_FIELDS,
|
||||
FEATURED_IMAGE_FIELD,
|
||||
mapPostTypeToCollection,
|
||||
mapWpStatus,
|
||||
normalizeUrl,
|
||||
checkSchemaCompatibility,
|
||||
} from "../utils.js";
|
||||
|
||||
// =============================================================================
|
||||
// API Response Types
|
||||
// =============================================================================
|
||||
|
||||
/** Detected i18n plugin info from the WordPress site */
|
||||
interface PluginI18nInfo {
|
||||
/** Which multilingual plugin is active */
|
||||
plugin: "wpml" | "polylang";
|
||||
/** BCP 47 default locale */
|
||||
default_locale: string;
|
||||
/** All configured locales */
|
||||
locales: string[];
|
||||
}
|
||||
|
||||
/** Probe response from /emdash/v1/probe */
|
||||
interface PluginProbeResponse {
|
||||
emdash_exporter: string;
|
||||
wordpress_version: string;
|
||||
site: {
|
||||
title: string;
|
||||
description: string;
|
||||
url: string;
|
||||
home: string;
|
||||
language: string;
|
||||
timezone: string;
|
||||
};
|
||||
capabilities: {
|
||||
application_passwords: boolean;
|
||||
acf: boolean;
|
||||
yoast: boolean;
|
||||
rankmath: boolean;
|
||||
};
|
||||
post_types: Array<{
|
||||
name: string;
|
||||
label: string;
|
||||
count: number;
|
||||
}>;
|
||||
media_count: number;
|
||||
endpoints: Record<string, string>;
|
||||
auth_instructions: {
|
||||
method: string;
|
||||
instructions: string;
|
||||
url?: string;
|
||||
};
|
||||
/** Detected multilingual plugin (WPML or Polylang). Absent when neither is active. */
|
||||
i18n?: PluginI18nInfo;
|
||||
}
|
||||
|
||||
/** Analyze response from /emdash/v1/analyze */
|
||||
interface PluginAnalyzeResponse {
|
||||
site: {
|
||||
title: string;
|
||||
url: string;
|
||||
};
|
||||
post_types: Array<{
|
||||
name: string;
|
||||
label: string;
|
||||
label_singular: string;
|
||||
total: number;
|
||||
by_status: Record<string, number>;
|
||||
supports: Record<string, unknown>;
|
||||
taxonomies: string[];
|
||||
custom_fields: Array<{
|
||||
key: string;
|
||||
count: number;
|
||||
inferred_type: string;
|
||||
sample: string | null;
|
||||
}>;
|
||||
hierarchical: boolean;
|
||||
has_archive: boolean;
|
||||
}>;
|
||||
taxonomies: Array<{
|
||||
name: string;
|
||||
label: string;
|
||||
hierarchical: boolean;
|
||||
term_count: number;
|
||||
object_types: string[];
|
||||
}>;
|
||||
authors: Array<{
|
||||
id: number;
|
||||
login: string;
|
||||
email: string;
|
||||
display_name: string;
|
||||
post_count: number;
|
||||
}>;
|
||||
attachments: {
|
||||
count: number;
|
||||
by_type: Record<string, number>;
|
||||
};
|
||||
acf?: Array<{
|
||||
key: string;
|
||||
title: string;
|
||||
fields: Array<{
|
||||
key: string;
|
||||
name: string;
|
||||
label: string;
|
||||
type: string;
|
||||
required: boolean;
|
||||
}>;
|
||||
}>;
|
||||
/** Detected multilingual plugin (WPML or Polylang). Absent when neither is active. */
|
||||
i18n?: PluginI18nInfo;
|
||||
}
|
||||
|
||||
/** Content response from /emdash/v1/content */
|
||||
interface PluginContentResponse {
|
||||
items: PluginPost[];
|
||||
total: number;
|
||||
pages: number;
|
||||
page: number;
|
||||
per_page: number;
|
||||
}
|
||||
|
||||
/** Single post from plugin API */
|
||||
interface PluginPost {
|
||||
id: number;
|
||||
post_type: string;
|
||||
status: string;
|
||||
slug: string;
|
||||
title: string;
|
||||
content: string;
|
||||
excerpt: string;
|
||||
date: string;
|
||||
date_gmt: string;
|
||||
modified: string;
|
||||
modified_gmt: string;
|
||||
author: {
|
||||
id: number;
|
||||
login: string;
|
||||
email: string;
|
||||
display_name: string;
|
||||
} | null;
|
||||
parent: number | null;
|
||||
menu_order: number;
|
||||
taxonomies: Record<string, Array<{ id: number; name: string; slug: string }>>;
|
||||
featured_image?: {
|
||||
id: number;
|
||||
url: string;
|
||||
filename: string;
|
||||
mime_type: string;
|
||||
alt: string;
|
||||
title: string;
|
||||
caption: string;
|
||||
width: number | null;
|
||||
height: number | null;
|
||||
};
|
||||
meta: Record<string, unknown>;
|
||||
acf?: Record<string, unknown>;
|
||||
yoast?: Record<string, string>;
|
||||
rankmath?: Record<string, string>;
|
||||
/** BCP 47 locale from WPML/Polylang (when detected) */
|
||||
locale?: string;
|
||||
/** Translation group ID from WPML trid or Polylang (when detected) */
|
||||
translation_group?: string;
|
||||
}
|
||||
|
||||
/** Media response from /emdash/v1/media */
|
||||
interface PluginMediaResponse {
|
||||
items: PluginMediaItem[];
|
||||
total: number;
|
||||
pages: number;
|
||||
page: number;
|
||||
per_page: number;
|
||||
}
|
||||
|
||||
interface PluginMediaItem {
|
||||
id: number;
|
||||
url: string;
|
||||
filename: string;
|
||||
mime_type: string;
|
||||
title: string;
|
||||
alt: string;
|
||||
caption: string;
|
||||
description: string;
|
||||
width?: number;
|
||||
height?: number;
|
||||
filesize?: number;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Constants
|
||||
// =============================================================================
|
||||
|
||||
/** Pattern to remove spaces from application passwords */
|
||||
const SPACE_PATTERN = /\s/g;
|
||||
|
||||
// =============================================================================
|
||||
// Import Source
|
||||
// =============================================================================
|
||||
|
||||
export const wordpressPluginSource: ImportSource = {
|
||||
id: "wordpress-plugin",
|
||||
name: "WordPress (EmDash Exporter)",
|
||||
description: "Import from WordPress sites with the EmDash Exporter plugin installed",
|
||||
icon: "plug",
|
||||
requiresFile: false,
|
||||
canProbe: true,
|
||||
|
||||
async probe(url: string): Promise<SourceProbeResult | null> {
|
||||
try {
|
||||
const siteUrl = normalizeUrl(url);
|
||||
|
||||
// SSRF protection: validate URL before any outbound requests
|
||||
validateExternalUrl(siteUrl);
|
||||
|
||||
const probeUrl = `${siteUrl}/wp-json/emdash/v1/probe`;
|
||||
|
||||
const response = await ssrfSafeFetch(probeUrl, {
|
||||
headers: { Accept: "application/json" },
|
||||
signal: AbortSignal.timeout(10000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const data: PluginProbeResponse = await response.json();
|
||||
|
||||
// Verify it's actually our plugin
|
||||
if (!data.emdash_exporter) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return {
|
||||
sourceId: "wordpress-plugin",
|
||||
confidence: "definite",
|
||||
detected: {
|
||||
platform: "wordpress",
|
||||
version: data.wordpress_version,
|
||||
siteTitle: data.site.title,
|
||||
siteUrl: data.site.url,
|
||||
},
|
||||
capabilities: {
|
||||
publicContent: true,
|
||||
privateContent: true, // Full access with auth
|
||||
customPostTypes: true,
|
||||
allMeta: true,
|
||||
mediaStream: true,
|
||||
},
|
||||
auth: data.capabilities.application_passwords
|
||||
? {
|
||||
type: "password",
|
||||
instructions: data.auth_instructions.instructions,
|
||||
}
|
||||
: undefined,
|
||||
preview: {
|
||||
posts: data.post_types.find((p) => p.name === "post")?.count,
|
||||
pages: data.post_types.find((p) => p.name === "page")?.count,
|
||||
media: data.media_count,
|
||||
},
|
||||
suggestedAction: {
|
||||
type: "proceed",
|
||||
},
|
||||
i18n: pluginI18nToDetection(data.i18n),
|
||||
};
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
},
|
||||
|
||||
async analyze(input: SourceInput, context: ImportContext): Promise<ImportAnalysis> {
|
||||
const { siteUrl, headers } = getRequestConfig(input);
|
||||
|
||||
const response = await ssrfSafeFetch(`${siteUrl}/wp-json/emdash/v1/analyze`, {
|
||||
headers,
|
||||
signal: AbortSignal.timeout(30000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json().catch(() => ({}));
|
||||
throw new Error(error.message || `Failed to analyze site: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data: PluginAnalyzeResponse = await response.json();
|
||||
|
||||
// Get existing collections for schema check
|
||||
const existingCollections = context.getExistingCollections
|
||||
? await context.getExistingCollections()
|
||||
: new Map();
|
||||
|
||||
// Build post type analysis
|
||||
const postTypes: PostTypeAnalysis[] = data.post_types
|
||||
.filter((pt) => pt.total > 0)
|
||||
.map((pt) => {
|
||||
const suggestedCollection = mapPostTypeToCollection(pt.name);
|
||||
const existingCollection = existingCollections.get(suggestedCollection);
|
||||
|
||||
// Include featured_image if post type supports thumbnails
|
||||
const supportsThumbnail = pt.supports && "thumbnail" in pt.supports;
|
||||
const requiredFields = supportsThumbnail
|
||||
? [...BASE_REQUIRED_FIELDS, FEATURED_IMAGE_FIELD]
|
||||
: [...BASE_REQUIRED_FIELDS];
|
||||
|
||||
return {
|
||||
name: pt.name,
|
||||
count: pt.total,
|
||||
suggestedCollection,
|
||||
requiredFields,
|
||||
schemaStatus: checkSchemaCompatibility(requiredFields, existingCollection),
|
||||
};
|
||||
});
|
||||
|
||||
// Fetch media list for attachment info
|
||||
const attachments: AttachmentInfo[] = [];
|
||||
if (data.attachments.count > 0) {
|
||||
try {
|
||||
// Fetch first page of media to populate attachment info
|
||||
const mediaResponse = await ssrfSafeFetch(
|
||||
`${siteUrl}/wp-json/emdash/v1/media?per_page=500`,
|
||||
{
|
||||
headers,
|
||||
signal: AbortSignal.timeout(30000),
|
||||
},
|
||||
);
|
||||
if (mediaResponse.ok) {
|
||||
const mediaData: PluginMediaResponse = await mediaResponse.json();
|
||||
for (const item of mediaData.items) {
|
||||
attachments.push({
|
||||
id: item.id,
|
||||
url: item.url,
|
||||
filename: item.filename,
|
||||
mimeType: item.mime_type,
|
||||
title: item.title,
|
||||
alt: item.alt,
|
||||
caption: item.caption,
|
||||
width: item.width,
|
||||
height: item.height,
|
||||
});
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn("Failed to fetch media list:", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Count categories and tags
|
||||
const categoryTaxonomy = data.taxonomies.find((t) => t.name === "category");
|
||||
const tagTaxonomy = data.taxonomies.find((t) => t.name === "post_tag");
|
||||
|
||||
return {
|
||||
sourceId: "wordpress-plugin",
|
||||
site: {
|
||||
title: data.site.title,
|
||||
url: data.site.url,
|
||||
},
|
||||
postTypes,
|
||||
attachments: {
|
||||
count: data.attachments.count,
|
||||
items: attachments,
|
||||
},
|
||||
categories: categoryTaxonomy?.term_count ?? 0,
|
||||
tags: tagTaxonomy?.term_count ?? 0,
|
||||
authors: data.authors.map((a) => ({
|
||||
id: a.id,
|
||||
login: a.login,
|
||||
email: a.email,
|
||||
displayName: a.display_name,
|
||||
postCount: a.post_count,
|
||||
})),
|
||||
i18n: pluginI18nToDetection(data.i18n),
|
||||
};
|
||||
},
|
||||
|
||||
async *fetchContent(input: SourceInput, options: FetchOptions): AsyncGenerator<NormalizedItem> {
|
||||
const { siteUrl, headers } = getRequestConfig(input);
|
||||
|
||||
for (const postType of options.postTypes) {
|
||||
let page = 1;
|
||||
let totalPages = 1;
|
||||
let yielded = 0;
|
||||
|
||||
while (page <= totalPages) {
|
||||
const status = options.includeDrafts ? "any" : "publish";
|
||||
const url = `${siteUrl}/wp-json/emdash/v1/content?post_type=${postType}&status=${status}&per_page=100&page=${page}`;
|
||||
|
||||
const response = await ssrfSafeFetch(url, {
|
||||
headers,
|
||||
signal: AbortSignal.timeout(60000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch ${postType}: ${response.statusText}`);
|
||||
}
|
||||
|
||||
const data: PluginContentResponse = await response.json();
|
||||
totalPages = data.pages;
|
||||
|
||||
for (const post of data.items) {
|
||||
yield pluginPostToNormalizedItem(post);
|
||||
yielded++;
|
||||
|
||||
if (options.limit && yielded >= options.limit) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
page++;
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
async fetchMedia(url: string, _input: SourceInput): Promise<Blob> {
|
||||
// SSRF protection: validate media URL before fetching
|
||||
validateExternalUrl(url);
|
||||
|
||||
// Media URLs are publicly accessible on WP (ssrfSafeFetch validates redirects)
|
||||
const response = await ssrfSafeFetch(url);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch media: ${response.statusText}`);
|
||||
}
|
||||
return response.blob();
|
||||
},
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Helper Functions
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Convert plugin i18n info to the shared I18nDetection type.
|
||||
* Returns undefined when no multilingual plugin is detected.
|
||||
*/
|
||||
function pluginI18nToDetection(i18n: PluginI18nInfo | undefined): I18nDetection | undefined {
|
||||
if (!i18n) return undefined;
|
||||
return {
|
||||
plugin: i18n.plugin,
|
||||
defaultLocale: i18n.default_locale,
|
||||
locales: i18n.locales,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get request configuration from input
|
||||
*/
|
||||
function getRequestConfig(input: SourceInput): {
|
||||
siteUrl: string;
|
||||
headers: HeadersInit;
|
||||
} {
|
||||
if (input.type === "url") {
|
||||
const siteUrl = normalizeUrl(input.url);
|
||||
|
||||
// SSRF protection: validate URL before any outbound requests
|
||||
validateExternalUrl(siteUrl);
|
||||
const headers: HeadersInit = {
|
||||
Accept: "application/json",
|
||||
};
|
||||
|
||||
if (input.token) {
|
||||
// Token format: "username:password" base64 encoded
|
||||
headers["Authorization"] = `Basic ${input.token}`;
|
||||
}
|
||||
|
||||
return { siteUrl, headers };
|
||||
}
|
||||
|
||||
if (input.type === "oauth") {
|
||||
const oauthSiteUrl = normalizeUrl(input.url);
|
||||
|
||||
// SSRF protection: validate URL before any outbound requests
|
||||
validateExternalUrl(oauthSiteUrl);
|
||||
|
||||
return {
|
||||
siteUrl: oauthSiteUrl,
|
||||
headers: {
|
||||
Accept: "application/json",
|
||||
Authorization: `Bearer ${input.accessToken}`,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
throw new Error("WordPress plugin source requires URL or OAuth input");
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert plugin post to normalized item
|
||||
*/
|
||||
function pluginPostToNormalizedItem(post: PluginPost): NormalizedItem {
|
||||
const content = post.content ? gutenbergToPortableText(post.content) : [];
|
||||
|
||||
// Extract categories and tags from taxonomies
|
||||
const categories =
|
||||
post.taxonomies?.category?.map((c) => c.slug) ??
|
||||
post.taxonomies?.categories?.map((c) => c.slug) ??
|
||||
[];
|
||||
const tags =
|
||||
post.taxonomies?.post_tag?.map((t) => t.slug) ??
|
||||
post.taxonomies?.tags?.map((t) => t.slug) ??
|
||||
[];
|
||||
|
||||
// Build meta from various sources
|
||||
const meta: Record<string, unknown> = { ...post.meta };
|
||||
|
||||
// Include ACF fields in meta
|
||||
if (post.acf) {
|
||||
meta._acf = post.acf;
|
||||
}
|
||||
|
||||
// Include SEO data in meta
|
||||
if (post.yoast) {
|
||||
meta._yoast = post.yoast;
|
||||
}
|
||||
if (post.rankmath) {
|
||||
meta._rankmath = post.rankmath;
|
||||
}
|
||||
|
||||
return {
|
||||
sourceId: post.id,
|
||||
postType: post.post_type,
|
||||
status: mapWpStatus(post.status),
|
||||
slug: post.slug,
|
||||
title: post.title,
|
||||
content,
|
||||
excerpt: post.excerpt || undefined,
|
||||
date: new Date(post.date_gmt || post.date),
|
||||
modified: post.modified_gmt ? new Date(post.modified_gmt) : new Date(post.modified),
|
||||
author: post.author?.login,
|
||||
categories,
|
||||
tags,
|
||||
meta,
|
||||
featuredImage: post.featured_image?.url,
|
||||
locale: post.locale,
|
||||
translationGroup: post.translation_group,
|
||||
};
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Utility Functions for External Use
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Create a Basic Auth token from username and password
|
||||
*/
|
||||
export function createBasicAuthToken(username: string, password: string): string {
|
||||
// Remove spaces from application password (WP formats them with spaces)
|
||||
const cleanPassword = password.replace(SPACE_PATTERN, "");
|
||||
return encodeBase64(`${username}:${cleanPassword}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch media list from plugin API
|
||||
*/
|
||||
export async function fetchPluginMedia(
|
||||
siteUrl: string,
|
||||
authToken: string,
|
||||
page = 1,
|
||||
perPage = 100,
|
||||
): Promise<PluginMediaResponse> {
|
||||
const normalizedSiteUrl = normalizeUrl(siteUrl);
|
||||
|
||||
// SSRF protection: validate URL before any outbound requests
|
||||
validateExternalUrl(normalizedSiteUrl);
|
||||
|
||||
const url = `${normalizedSiteUrl}/wp-json/emdash/v1/media?per_page=${perPage}&page=${page}`;
|
||||
|
||||
const response = await ssrfSafeFetch(url, {
|
||||
headers: {
|
||||
Accept: "application/json",
|
||||
Authorization: `Basic ${authToken}`,
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch media: ${response.statusText}`);
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch taxonomies from plugin API
|
||||
*/
|
||||
export async function fetchPluginTaxonomies(
|
||||
siteUrl: string,
|
||||
authToken: string,
|
||||
): Promise<
|
||||
Array<{
|
||||
name: string;
|
||||
label: string;
|
||||
hierarchical: boolean;
|
||||
terms: Array<{
|
||||
id: number;
|
||||
name: string;
|
||||
slug: string;
|
||||
description: string;
|
||||
parent: number | null;
|
||||
count: number;
|
||||
}>;
|
||||
}>
|
||||
> {
|
||||
const normalizedSiteUrl = normalizeUrl(siteUrl);
|
||||
|
||||
// SSRF protection: validate URL before any outbound requests
|
||||
validateExternalUrl(normalizedSiteUrl);
|
||||
|
||||
const url = `${normalizedSiteUrl}/wp-json/emdash/v1/taxonomies`;
|
||||
|
||||
const response = await ssrfSafeFetch(url, {
|
||||
headers: {
|
||||
Accept: "application/json",
|
||||
Authorization: `Basic ${authToken}`,
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch taxonomies: ${response.statusText}`);
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
191
packages/core/src/import/sources/wordpress-rest.ts
Normal file
191
packages/core/src/import/sources/wordpress-rest.ts
Normal file
@@ -0,0 +1,191 @@
|
||||
/**
|
||||
* WordPress REST API probe
|
||||
*
|
||||
* Probes self-hosted WordPress sites to detect capabilities.
|
||||
* This source is probe-only - it tells users what's available
|
||||
* and suggests next steps (usually: upload WXR file).
|
||||
*/
|
||||
|
||||
import { ssrfSafeFetch, validateExternalUrl } from "../ssrf.js";
|
||||
import type {
|
||||
ImportSource,
|
||||
ImportAnalysis,
|
||||
ImportContext,
|
||||
SourceInput,
|
||||
SourceProbeResult,
|
||||
FetchOptions,
|
||||
NormalizedItem,
|
||||
} from "../types.js";
|
||||
|
||||
const TRAILING_SLASHES = /\/+$/;
|
||||
const WP_JSON_SUFFIX = /\/wp-json\/?$/;
|
||||
|
||||
/** WordPress REST API discovery response */
|
||||
interface WpApiDiscovery {
|
||||
name?: string;
|
||||
description?: string;
|
||||
url?: string;
|
||||
home?: string;
|
||||
gmt_offset?: number;
|
||||
timezone_string?: string;
|
||||
namespaces?: string[];
|
||||
authentication?: Record<string, unknown>;
|
||||
routes?: Record<string, unknown>;
|
||||
}
|
||||
|
||||
export const wordpressRestSource: ImportSource = {
|
||||
id: "wordpress-rest",
|
||||
name: "WordPress Site",
|
||||
description: "Connect to a self-hosted WordPress site",
|
||||
icon: "globe",
|
||||
requiresFile: false,
|
||||
canProbe: true,
|
||||
|
||||
async probe(url: string): Promise<SourceProbeResult | null> {
|
||||
try {
|
||||
const siteUrl = normalizeUrl(url);
|
||||
|
||||
// SSRF protection: validate URL before any outbound requests
|
||||
validateExternalUrl(siteUrl);
|
||||
|
||||
// Try to fetch the WP REST API root
|
||||
const apiUrl = `${siteUrl}/wp-json/`;
|
||||
const response = await ssrfSafeFetch(apiUrl, {
|
||||
headers: { Accept: "application/json" },
|
||||
signal: AbortSignal.timeout(10000),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
// Try alternate location (some sites use different prefix)
|
||||
const altResponse = await ssrfSafeFetch(`${siteUrl}/?rest_route=/`, {
|
||||
headers: { Accept: "application/json" },
|
||||
signal: AbortSignal.timeout(10000),
|
||||
});
|
||||
|
||||
if (!altResponse.ok) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const data: WpApiDiscovery = await response.json();
|
||||
|
||||
// Check if this looks like WordPress
|
||||
if (!data.namespaces?.includes("wp/v2")) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Get content counts (unauthenticated - published only)
|
||||
const preview = await getPublicContentCounts(siteUrl);
|
||||
|
||||
// Check for authentication methods
|
||||
const hasAppPasswords = !!data.authentication?.["application-passwords"];
|
||||
|
||||
return {
|
||||
sourceId: "wordpress-rest",
|
||||
confidence: "definite",
|
||||
detected: {
|
||||
platform: "wordpress",
|
||||
siteTitle: data.name,
|
||||
siteUrl: data.url || data.home || siteUrl,
|
||||
},
|
||||
capabilities: {
|
||||
publicContent: true,
|
||||
privateContent: false, // Would need auth
|
||||
customPostTypes: false, // Only if show_in_rest: true
|
||||
allMeta: false, // Only if registered for REST
|
||||
mediaStream: true,
|
||||
},
|
||||
auth: hasAppPasswords
|
||||
? {
|
||||
type: "password",
|
||||
instructions:
|
||||
"To import drafts and private content, create an Application Password in WordPress → Users → Your Profile → Application Passwords",
|
||||
}
|
||||
: undefined,
|
||||
preview,
|
||||
suggestedAction: {
|
||||
type: "upload",
|
||||
instructions:
|
||||
"For a complete import including drafts, custom post types, and all metadata, export your content from WordPress (Tools → Export) and upload the file here.",
|
||||
},
|
||||
};
|
||||
} catch {
|
||||
// Probe failed - not a WordPress site or not accessible
|
||||
return null;
|
||||
}
|
||||
},
|
||||
|
||||
async analyze(_input: SourceInput, _context: ImportContext): Promise<ImportAnalysis> {
|
||||
// REST-only import not implemented - we use this for probe only
|
||||
// and suggest WXR upload for actual import
|
||||
throw new Error("Direct REST API import not implemented. Please upload a WXR export file.");
|
||||
},
|
||||
|
||||
// eslint-disable-next-line require-yield
|
||||
async *fetchContent(_input: SourceInput, _options: FetchOptions): AsyncGenerator<NormalizedItem> {
|
||||
throw new Error("Direct REST API import not implemented. Please upload a WXR export file.");
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Normalize a URL for API requests
|
||||
*/
|
||||
function normalizeUrl(url: string): string {
|
||||
let normalized = url.trim();
|
||||
|
||||
// Add protocol if missing
|
||||
if (!normalized.startsWith("http")) {
|
||||
normalized = `https://${normalized}`;
|
||||
}
|
||||
|
||||
// Remove trailing slash
|
||||
normalized = normalized.replace(TRAILING_SLASHES, "");
|
||||
|
||||
// Remove /wp-json if included
|
||||
normalized = normalized.replace(WP_JSON_SUFFIX, "");
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get public content counts from REST API
|
||||
*/
|
||||
async function getPublicContentCounts(
|
||||
siteUrl: string,
|
||||
): Promise<{ posts?: number; pages?: number; media?: number }> {
|
||||
const result: { posts?: number; pages?: number; media?: number } = {};
|
||||
|
||||
try {
|
||||
// Fetch with per_page=1 to get total from headers
|
||||
const [postsRes, pagesRes, mediaRes] = await Promise.allSettled([
|
||||
ssrfSafeFetch(`${siteUrl}/wp-json/wp/v2/posts?per_page=1`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
}),
|
||||
ssrfSafeFetch(`${siteUrl}/wp-json/wp/v2/pages?per_page=1`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
}),
|
||||
ssrfSafeFetch(`${siteUrl}/wp-json/wp/v2/media?per_page=1`, {
|
||||
signal: AbortSignal.timeout(5000),
|
||||
}),
|
||||
]);
|
||||
|
||||
if (postsRes.status === "fulfilled" && postsRes.value.ok) {
|
||||
const total = postsRes.value.headers.get("X-WP-Total");
|
||||
if (total) result.posts = parseInt(total, 10);
|
||||
}
|
||||
|
||||
if (pagesRes.status === "fulfilled" && pagesRes.value.ok) {
|
||||
const total = pagesRes.value.headers.get("X-WP-Total");
|
||||
if (total) result.pages = parseInt(total, 10);
|
||||
}
|
||||
|
||||
if (mediaRes.status === "fulfilled" && mediaRes.value.ok) {
|
||||
const total = mediaRes.value.headers.get("X-WP-Total");
|
||||
if (total) result.media = parseInt(total, 10);
|
||||
}
|
||||
} catch {
|
||||
// Counts are optional, continue without them
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
373
packages/core/src/import/sources/wxr.ts
Normal file
373
packages/core/src/import/sources/wxr.ts
Normal file
@@ -0,0 +1,373 @@
|
||||
/**
|
||||
* WXR (WordPress eXtended RSS) import source
|
||||
*
|
||||
* Handles WordPress export file uploads (.xml).
|
||||
* This wraps the existing WXR parsing and analysis logic.
|
||||
*/
|
||||
|
||||
import { gutenbergToPortableText } from "@emdash-cms/gutenberg-to-portable-text";
|
||||
|
||||
import { parseWxrString, type WxrData, type WxrPost } from "../../cli/wxr/parser.js";
|
||||
import type {
|
||||
ImportSource,
|
||||
ImportAnalysis,
|
||||
ImportContext,
|
||||
SourceInput,
|
||||
FetchOptions,
|
||||
NormalizedItem,
|
||||
PostTypeAnalysis,
|
||||
AttachmentInfo,
|
||||
NavMenuAnalysis,
|
||||
TaxonomyAnalysis,
|
||||
ReusableBlockAnalysis,
|
||||
} from "../types.js";
|
||||
import {
|
||||
BASE_REQUIRED_FIELDS,
|
||||
FEATURED_IMAGE_FIELD,
|
||||
isInternalPostType,
|
||||
isInternalMetaKey,
|
||||
mapWpStatus,
|
||||
mapPostTypeToCollection,
|
||||
mapMetaKeyToField,
|
||||
inferMetaType,
|
||||
slugify,
|
||||
buildAttachmentMap,
|
||||
getFilenameFromUrl,
|
||||
guessMimeType,
|
||||
checkSchemaCompatibility,
|
||||
} from "../utils.js";
|
||||
|
||||
export const wxrSource: ImportSource = {
|
||||
id: "wxr",
|
||||
name: "WordPress Export File",
|
||||
description: "Upload a WordPress export file (.xml)",
|
||||
icon: "upload",
|
||||
requiresFile: true,
|
||||
canProbe: false,
|
||||
|
||||
async analyze(input: SourceInput, context: ImportContext): Promise<ImportAnalysis> {
|
||||
if (input.type !== "file") {
|
||||
throw new Error("WXR source requires a file input");
|
||||
}
|
||||
|
||||
const text = await input.file.text();
|
||||
const wxr = await parseWxrString(text);
|
||||
|
||||
// Get existing collections for schema compatibility check
|
||||
const existingCollections = context.getExistingCollections
|
||||
? await context.getExistingCollections()
|
||||
: new Map();
|
||||
|
||||
return analyzeWxrData(wxr, existingCollections);
|
||||
},
|
||||
|
||||
async *fetchContent(input: SourceInput, options: FetchOptions): AsyncGenerator<NormalizedItem> {
|
||||
if (input.type !== "file") {
|
||||
throw new Error("WXR source requires a file input");
|
||||
}
|
||||
|
||||
const text = await input.file.text();
|
||||
const wxr = await parseWxrString(text);
|
||||
|
||||
// Build attachment ID -> URL map for resolving featured images
|
||||
const attachmentMap = buildAttachmentMap(wxr.attachments);
|
||||
|
||||
let count = 0;
|
||||
for (const post of wxr.posts) {
|
||||
const postType = post.postType || "post";
|
||||
|
||||
// Skip if not in requested post types
|
||||
if (!options.postTypes.includes(postType)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip internal post types
|
||||
if (isInternalPostType(postType)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip drafts if not requested
|
||||
if (!options.includeDrafts && post.status !== "publish") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Convert to normalized item
|
||||
yield wxrPostToNormalizedItem(post, attachmentMap);
|
||||
|
||||
count++;
|
||||
if (options.limit && count >= options.limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Analyze WXR data and return normalized ImportAnalysis
|
||||
*/
|
||||
function analyzeWxrData(
|
||||
wxr: WxrData,
|
||||
existingCollections: Map<string, { slug: string; fields: Map<string, { type: string }> }>,
|
||||
): ImportAnalysis {
|
||||
// Count post types and track which have featured images
|
||||
const postTypeCounts = new Map<string, number>();
|
||||
const postTypesWithThumbnails = new Set<string>();
|
||||
const metaKeys = new Map<string, { count: number; samples: string[]; isInternal: boolean }>();
|
||||
const authorPostCounts = new Map<string, number>();
|
||||
|
||||
for (const post of wxr.posts) {
|
||||
const type = post.postType || "post";
|
||||
postTypeCounts.set(type, (postTypeCounts.get(type) || 0) + 1);
|
||||
|
||||
// Count posts per author (by login)
|
||||
if (post.creator) {
|
||||
authorPostCounts.set(post.creator, (authorPostCounts.get(post.creator) || 0) + 1);
|
||||
}
|
||||
|
||||
// Track if this post type has featured images
|
||||
if (post.meta.has("_thumbnail_id")) {
|
||||
postTypesWithThumbnails.add(type);
|
||||
}
|
||||
|
||||
// Analyze meta keys
|
||||
for (const [key, value] of post.meta) {
|
||||
const existing = metaKeys.get(key);
|
||||
if (existing) {
|
||||
existing.count++;
|
||||
if (existing.samples.length < 3 && value) {
|
||||
existing.samples.push(value.slice(0, 100));
|
||||
}
|
||||
} else {
|
||||
metaKeys.set(key, {
|
||||
count: 1,
|
||||
samples: value ? [value.slice(0, 100)] : [],
|
||||
isInternal: isInternalMetaKey(key),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Map meta keys to fields (for custom fields analysis)
|
||||
const customFields = [...metaKeys.entries()]
|
||||
.filter(([_, info]) => !info.isInternal)
|
||||
.map(([key, info]) => ({
|
||||
key,
|
||||
count: info.count,
|
||||
samples: info.samples,
|
||||
suggestedField: mapMetaKeyToField(key),
|
||||
suggestedType: inferMetaType(key, info.samples[0]),
|
||||
isInternal: info.isInternal,
|
||||
}))
|
||||
.toSorted((a, b) => b.count - a.count);
|
||||
|
||||
// Build post type analysis with schema compatibility
|
||||
const postTypes: PostTypeAnalysis[] = [...postTypeCounts.entries()]
|
||||
.filter(([type]) => !isInternalPostType(type))
|
||||
.map(([name, count]) => {
|
||||
const suggestedCollection = mapPostTypeToCollection(name);
|
||||
const existingCollection = existingCollections.get(suggestedCollection);
|
||||
|
||||
// Build required fields - add featured_image only if posts have thumbnails
|
||||
const requiredFields = [...BASE_REQUIRED_FIELDS];
|
||||
if (postTypesWithThumbnails.has(name)) {
|
||||
requiredFields.push(FEATURED_IMAGE_FIELD);
|
||||
}
|
||||
|
||||
const schemaStatus = checkSchemaCompatibility(requiredFields, existingCollection);
|
||||
|
||||
return {
|
||||
name,
|
||||
count,
|
||||
suggestedCollection,
|
||||
requiredFields,
|
||||
schemaStatus,
|
||||
};
|
||||
})
|
||||
.toSorted((a, b) => b.count - a.count);
|
||||
|
||||
// Build attachment info list
|
||||
const attachmentItems: AttachmentInfo[] = wxr.attachments.map((att) => {
|
||||
const filename = att.url ? getFilenameFromUrl(att.url) : undefined;
|
||||
const mimeType = filename ? guessMimeType(filename) : undefined;
|
||||
return {
|
||||
id: att.id,
|
||||
title: att.title,
|
||||
url: att.url,
|
||||
filename,
|
||||
mimeType,
|
||||
};
|
||||
});
|
||||
|
||||
// Analyze navigation menus
|
||||
const navMenus: NavMenuAnalysis[] = wxr.navMenus.map((menu) => ({
|
||||
name: menu.name,
|
||||
label: menu.label,
|
||||
itemCount: menu.items.length,
|
||||
}));
|
||||
|
||||
// Analyze custom taxonomies (from wp:term elements, excluding category/post_tag/nav_menu)
|
||||
const taxonomyMap = new Map<string, { count: number; samples: string[] }>();
|
||||
for (const term of wxr.terms) {
|
||||
if (
|
||||
term.taxonomy === "category" ||
|
||||
term.taxonomy === "post_tag" ||
|
||||
term.taxonomy === "nav_menu"
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const existing = taxonomyMap.get(term.taxonomy);
|
||||
if (existing) {
|
||||
existing.count++;
|
||||
if (existing.samples.length < 3) {
|
||||
existing.samples.push(term.name);
|
||||
}
|
||||
} else {
|
||||
taxonomyMap.set(term.taxonomy, {
|
||||
count: 1,
|
||||
samples: [term.name],
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const customTaxonomies: TaxonomyAnalysis[] = Array.from(
|
||||
taxonomyMap.entries(),
|
||||
([slug, info]) => ({
|
||||
slug,
|
||||
termCount: info.count,
|
||||
sampleTerms: info.samples,
|
||||
}),
|
||||
).toSorted((a, b) => b.termCount - a.termCount);
|
||||
|
||||
// Analyze reusable blocks (wp_block post type)
|
||||
const reusableBlocks: ReusableBlockAnalysis[] = wxr.posts
|
||||
.filter((post) => post.postType === "wp_block")
|
||||
.map((post) => ({
|
||||
id: post.id || 0,
|
||||
title: post.title || "Untitled Block",
|
||||
slug: post.postName || slugify(post.title || `block-${post.id || Date.now()}`),
|
||||
}));
|
||||
|
||||
return {
|
||||
sourceId: "wxr",
|
||||
site: {
|
||||
title: wxr.site.title || "WordPress Site",
|
||||
url: wxr.site.link || "",
|
||||
},
|
||||
postTypes,
|
||||
attachments: {
|
||||
count: wxr.attachments.length,
|
||||
items: attachmentItems,
|
||||
},
|
||||
categories: wxr.categories.length,
|
||||
tags: wxr.tags.length,
|
||||
authors: wxr.authors.map((a) => ({
|
||||
id: a.id,
|
||||
login: a.login,
|
||||
email: a.email,
|
||||
displayName: a.displayName || a.login || "Unknown",
|
||||
postCount: a.login ? authorPostCounts.get(a.login) || 0 : 0,
|
||||
})),
|
||||
navMenus: navMenus.length > 0 ? navMenus : undefined,
|
||||
customTaxonomies: customTaxonomies.length > 0 ? customTaxonomies : undefined,
|
||||
reusableBlocks: reusableBlocks.length > 0 ? reusableBlocks : undefined,
|
||||
customFields,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a WXR post to a normalized item
|
||||
*/
|
||||
function wxrPostToNormalizedItem(
|
||||
post: WxrPost,
|
||||
attachmentMap: Map<string, string>,
|
||||
): NormalizedItem {
|
||||
const content = post.content ? gutenbergToPortableText(post.content) : [];
|
||||
|
||||
// Resolve featured image: _thumbnail_id is the attachment ID, look up the URL
|
||||
const thumbnailId = post.meta.get("_thumbnail_id");
|
||||
const featuredImage = thumbnailId ? attachmentMap.get(String(thumbnailId)) : undefined;
|
||||
|
||||
// Convert custom taxonomies Map to Record
|
||||
let customTaxonomies: Record<string, string[]> | undefined;
|
||||
if (post.customTaxonomies && post.customTaxonomies.size > 0) {
|
||||
customTaxonomies = Object.fromEntries(post.customTaxonomies);
|
||||
}
|
||||
|
||||
return {
|
||||
sourceId: post.id || 0,
|
||||
postType: post.postType || "post",
|
||||
status: mapWpStatus(post.status),
|
||||
slug: post.postName || slugify(post.title || `post-${post.id || Date.now()}`),
|
||||
title: post.title || "Untitled",
|
||||
content,
|
||||
excerpt: post.excerpt,
|
||||
date: parseWxrDate(post.postDateGmt, post.pubDate, post.postDate) ?? new Date(),
|
||||
modified: parseWxrDate(post.postModifiedGmt, undefined, post.postModified),
|
||||
author: post.creator,
|
||||
categories: post.categories,
|
||||
tags: post.tags,
|
||||
meta: Object.fromEntries(post.meta),
|
||||
featuredImage,
|
||||
// Hierarchical content support
|
||||
parentId: post.postParent && post.postParent !== 0 ? post.postParent : undefined,
|
||||
menuOrder: post.menuOrder,
|
||||
// Custom taxonomy assignments
|
||||
customTaxonomies,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* WordPress uses "0000-00-00 00:00:00" as a sentinel for missing GMT dates
|
||||
* (e.g. unpublished drafts). This must be treated as absent.
|
||||
*/
|
||||
export const WXR_ZERO_DATE = "0000-00-00 00:00:00";
|
||||
|
||||
/**
|
||||
* Parse a WXR date with the correct fallback chain:
|
||||
* 1. GMT date (always UTC, most reliable)
|
||||
* 2. pubDate (RFC 2822, includes timezone offset)
|
||||
* 3. Site-local date (MySQL datetime without timezone, imprecise but best available)
|
||||
*
|
||||
* Returns undefined when none of the inputs yield a valid date.
|
||||
* Callers that need a guaranteed Date should use `?? new Date()`.
|
||||
*/
|
||||
export function parseWxrDate(
|
||||
gmtDate: string | undefined,
|
||||
pubDate: string | undefined,
|
||||
localDate: string | undefined,
|
||||
): Date | undefined {
|
||||
if (gmtDate && gmtDate !== WXR_ZERO_DATE) {
|
||||
// GMT dates from WordPress are "YYYY-MM-DD HH:MM:SS" in UTC.
|
||||
// Append "Z" so the JS Date constructor treats them as UTC.
|
||||
return new Date(gmtDate.replace(" ", "T") + "Z");
|
||||
}
|
||||
|
||||
if (pubDate) {
|
||||
// RFC 2822 format includes timezone offset, JS Date parses it correctly
|
||||
const d = new Date(pubDate);
|
||||
if (!isNaN(d.getTime())) return d;
|
||||
}
|
||||
|
||||
if (localDate) {
|
||||
// Site-local time without timezone. Normalize to ISO-like form so
|
||||
// runtimes that reject "YYYY-MM-DD HH:MM:SS" can still parse it as
|
||||
// local time. If parsing still fails, return undefined.
|
||||
const d = new Date(localDate.replace(" ", "T"));
|
||||
if (!isNaN(d.getTime())) return d;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
// Export for use in other sources
|
||||
export { analyzeWxrData, wxrPostToNormalizedItem };
|
||||
|
||||
// Re-export shared utilities that other sources may need
|
||||
export {
|
||||
BASE_REQUIRED_FIELDS,
|
||||
FEATURED_IMAGE_FIELD,
|
||||
mapPostTypeToCollection,
|
||||
isInternalPostType,
|
||||
checkSchemaCompatibility,
|
||||
} from "../utils.js";
|
||||
501
packages/core/src/import/ssrf.ts
Normal file
501
packages/core/src/import/ssrf.ts
Normal file
@@ -0,0 +1,501 @@
|
||||
/**
|
||||
* SSRF protection for import URLs.
|
||||
*
|
||||
* Validates that URLs don't target internal/private network addresses.
|
||||
* Applied before any fetch() call in the import pipeline.
|
||||
*/
|
||||
|
||||
const IPV4_MAPPED_IPV6_DOTTED_PATTERN = /^::ffff:(\d+\.\d+\.\d+\.\d+)$/i;
|
||||
const IPV4_MAPPED_IPV6_HEX_PATTERN = /^::ffff:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/i;
|
||||
const IPV4_TRANSLATED_HEX_PATTERN = /^::ffff:0:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/i;
|
||||
const IPV6_EXPANDED_MAPPED_PATTERN =
|
||||
/^0{0,4}:0{0,4}:0{0,4}:0{0,4}:0{0,4}:ffff:([0-9a-f]{1,4}):([0-9a-f]{1,4})$/i;
|
||||
|
||||
/**
|
||||
* IPv4-compatible (deprecated) addresses: ::XXXX:XXXX
|
||||
*
|
||||
* The WHATWG URL parser normalizes [::127.0.0.1] to [::7f00:1] (no ffff prefix).
|
||||
* These are deprecated but still parsed, and bypass the ffff-based checks.
|
||||
*/
|
||||
const IPV4_COMPATIBLE_HEX_PATTERN = /^::([0-9a-f]{1,4}):([0-9a-f]{1,4})$/i;
|
||||
|
||||
/**
|
||||
* NAT64 prefix (RFC 6052): 64:ff9b::XXXX:XXXX
|
||||
*
|
||||
* Used by NAT64 gateways to embed IPv4 addresses in IPv6.
|
||||
* [64:ff9b::127.0.0.1] normalizes to [64:ff9b::7f00:1].
|
||||
*/
|
||||
const NAT64_HEX_PATTERN = /^64:ff9b::([0-9a-f]{1,4}):([0-9a-f]{1,4})$/i;
|
||||
|
||||
const IPV6_BRACKET_PATTERN = /^\[|\]$/g;
|
||||
|
||||
/** Match fc00::/7 ULA — first byte 0xfc or 0xfd followed by any byte. */
|
||||
const IPV6_ULA_FC_PATTERN = /^fc[0-9a-f]{2}:/;
|
||||
const IPV6_ULA_FD_PATTERN = /^fd[0-9a-f]{2}:/;
|
||||
|
||||
/** Strip trailing dots from an FQDN-form hostname ("localhost." -> "localhost"). */
|
||||
const TRAILING_DOT_PATTERN = /\.+$/;
|
||||
|
||||
/**
|
||||
* Private and reserved IP ranges that should never be fetched.
|
||||
*
|
||||
* Includes:
|
||||
* - Loopback (127.0.0.0/8)
|
||||
* - Private (10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16)
|
||||
* - Link-local (169.254.0.0/16)
|
||||
* - Cloud metadata (169.254.169.254 — AWS/GCP/Azure)
|
||||
* - IPv6 loopback and link-local
|
||||
*/
|
||||
const BLOCKED_PATTERNS: Array<{ start: number; end: number }> = [
|
||||
// 127.0.0.0/8 — loopback
|
||||
{ start: ip4ToNum(127, 0, 0, 0), end: ip4ToNum(127, 255, 255, 255) },
|
||||
// 10.0.0.0/8 — private
|
||||
{ start: ip4ToNum(10, 0, 0, 0), end: ip4ToNum(10, 255, 255, 255) },
|
||||
// 172.16.0.0/12 — private
|
||||
{ start: ip4ToNum(172, 16, 0, 0), end: ip4ToNum(172, 31, 255, 255) },
|
||||
// 192.168.0.0/16 — private
|
||||
{ start: ip4ToNum(192, 168, 0, 0), end: ip4ToNum(192, 168, 255, 255) },
|
||||
// 169.254.0.0/16 — link-local (includes cloud metadata endpoint)
|
||||
{ start: ip4ToNum(169, 254, 0, 0), end: ip4ToNum(169, 254, 255, 255) },
|
||||
// 0.0.0.0/8 — current network
|
||||
{ start: ip4ToNum(0, 0, 0, 0), end: ip4ToNum(0, 255, 255, 255) },
|
||||
];
|
||||
|
||||
// Bracket-stripped form is used for lookups (validateExternalUrl strips
|
||||
// brackets from parsed.hostname before checking), so "::1" appears here
|
||||
// without brackets. The "::1" case is already covered by isPrivateIp, but
|
||||
// keeping it here makes the intent explicit and gives a clearer error
|
||||
// message for the common `http://[::1]/` form.
|
||||
const BLOCKED_HOSTNAMES = new Set([
|
||||
"localhost",
|
||||
"metadata.google.internal",
|
||||
"metadata.google",
|
||||
"::1",
|
||||
]);
|
||||
|
||||
/**
|
||||
* Wildcard DNS services that publicly resolve arbitrary IPs embedded in the
|
||||
* hostname. Commonly used in local dev and by SSRF exploit tooling to bypass
|
||||
* hostname-only blocklists (e.g. 127.0.0.1.nip.io -> 127.0.0.1).
|
||||
*
|
||||
* Matched case-insensitively as a suffix, so both the apex and any subdomain
|
||||
* are blocked.
|
||||
*/
|
||||
const BLOCKED_HOSTNAME_SUFFIXES = [
|
||||
"nip.io",
|
||||
"sslip.io",
|
||||
"xip.io",
|
||||
"traefik.me",
|
||||
"lvh.me",
|
||||
"localtest.me",
|
||||
];
|
||||
|
||||
/** Blocked URL schemes */
|
||||
const ALLOWED_SCHEMES = new Set(["http:", "https:"]);
|
||||
|
||||
function ip4ToNum(a: number, b: number, c: number, d: number): number {
|
||||
return ((a << 24) | (b << 16) | (c << 8) | d) >>> 0;
|
||||
}
|
||||
|
||||
function parseIpv4(ip: string): number | null {
|
||||
const parts = ip.split(".");
|
||||
if (parts.length !== 4) return null;
|
||||
|
||||
const nums = parts.map(Number);
|
||||
if (nums.some((n) => isNaN(n) || n < 0 || n > 255)) return null;
|
||||
|
||||
return ip4ToNum(nums[0], nums[1], nums[2], nums[3]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert IPv4-mapped/translated IPv6 addresses from hex form back to IPv4.
|
||||
*
|
||||
* The WHATWG URL parser normalizes dotted-decimal to hex:
|
||||
* [::ffff:127.0.0.1] -> [::ffff:7f00:1]
|
||||
* [::ffff:169.254.169.254] -> [::ffff:a9fe:a9fe]
|
||||
*
|
||||
* Without this conversion, the hex forms bypass isPrivateIp() regex checks.
|
||||
*/
|
||||
export function normalizeIPv6MappedToIPv4(ip: string): string | null {
|
||||
// Match hex-form IPv4-mapped IPv6: ::ffff:XXXX:XXXX
|
||||
let match = ip.match(IPV4_MAPPED_IPV6_HEX_PATTERN);
|
||||
if (!match) {
|
||||
// Match IPv4-translated (RFC 6052): ::ffff:0:XXXX:XXXX
|
||||
match = ip.match(IPV4_TRANSLATED_HEX_PATTERN);
|
||||
}
|
||||
if (!match) {
|
||||
// Match fully expanded form: 0000:0000:0000:0000:0000:ffff:XXXX:XXXX
|
||||
match = ip.match(IPV6_EXPANDED_MAPPED_PATTERN);
|
||||
}
|
||||
if (!match) {
|
||||
// Match IPv4-compatible (deprecated) form: ::XXXX:XXXX (no ffff prefix)
|
||||
match = ip.match(IPV4_COMPATIBLE_HEX_PATTERN);
|
||||
}
|
||||
if (!match) {
|
||||
// Match NAT64 prefix (RFC 6052): 64:ff9b::XXXX:XXXX
|
||||
match = ip.match(NAT64_HEX_PATTERN);
|
||||
}
|
||||
if (match) {
|
||||
const high = parseInt(match[1] ?? "", 16);
|
||||
const low = parseInt(match[2] ?? "", 16);
|
||||
return `${(high >> 8) & 0xff}.${high & 0xff}.${(low >> 8) & 0xff}.${low & 0xff}`;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function isPrivateIp(ip: string): boolean {
|
||||
// Normalize IPv6 strings to lowercase. `new URL().hostname` already
|
||||
// lowercases, but resolver output (from DoH or an injected resolver) may
|
||||
// not. Without this, "FE80::1" bypasses the link-local check.
|
||||
const normalized = ip.toLowerCase();
|
||||
|
||||
// Handle IPv6 loopback
|
||||
if (normalized === "::1" || normalized === "::ffff:127.0.0.1") return true;
|
||||
|
||||
// Handle IPv4-mapped IPv6 in hex form (WHATWG URL parser normalizes to this)
|
||||
// e.g. ::ffff:7f00:1 -> 127.0.0.1, ::ffff:a9fe:a9fe -> 169.254.169.254
|
||||
const hexIpv4 = normalizeIPv6MappedToIPv4(normalized);
|
||||
if (hexIpv4) return isPrivateIp(hexIpv4);
|
||||
|
||||
// Handle IPv4-mapped IPv6 in dotted-decimal form
|
||||
const v4Match = normalized.match(IPV4_MAPPED_IPV6_DOTTED_PATTERN);
|
||||
const ipv4 = v4Match ? v4Match[1] : normalized;
|
||||
|
||||
const num = parseIpv4(ipv4);
|
||||
if (num === null) {
|
||||
// If we can't parse it, block IPv6 addresses that look internal.
|
||||
// fc00::/7 is Unique Local (first byte 0xfc or 0xfd), fe80::/10 is
|
||||
// link-local. Only match when followed by hex digit + colon to avoid
|
||||
// collisions with hypothetical non-address strings.
|
||||
return (
|
||||
normalized.startsWith("fe80:") ||
|
||||
IPV6_ULA_FC_PATTERN.test(normalized) ||
|
||||
IPV6_ULA_FD_PATTERN.test(normalized)
|
||||
);
|
||||
}
|
||||
|
||||
return BLOCKED_PATTERNS.some((range) => num >= range.start && num <= range.end);
|
||||
}
|
||||
|
||||
/**
|
||||
* Error thrown when SSRF protection blocks a URL.
|
||||
*/
|
||||
export class SsrfError extends Error {
|
||||
code = "SSRF_BLOCKED" as const;
|
||||
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = "SsrfError";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that a URL is safe to fetch (not targeting internal networks).
|
||||
*
|
||||
* Checks:
|
||||
* 1. URL is well-formed with http/https scheme
|
||||
* 2. Hostname is not a known internal name (localhost, metadata endpoints)
|
||||
* 3. If hostname is an IP literal, it's not in a private range
|
||||
*
|
||||
* Note: DNS rebinding attacks are not fully mitigated (hostname could resolve
|
||||
* to a private IP). Full protection requires resolving DNS and checking the IP
|
||||
* before connecting, which needs a custom fetch implementation. This covers
|
||||
* the most common SSRF vectors.
|
||||
*
|
||||
* @throws SsrfError if the URL targets an internal address
|
||||
*/
|
||||
/** Maximum number of redirects to follow in ssrfSafeFetch */
|
||||
const MAX_REDIRECTS = 5;
|
||||
|
||||
export function validateExternalUrl(url: string): URL {
|
||||
let parsed: URL;
|
||||
try {
|
||||
parsed = new URL(url);
|
||||
} catch {
|
||||
throw new SsrfError("Invalid URL");
|
||||
}
|
||||
|
||||
// Only allow http/https
|
||||
if (!ALLOWED_SCHEMES.has(parsed.protocol)) {
|
||||
throw new SsrfError(`Scheme '${parsed.protocol}' is not allowed`);
|
||||
}
|
||||
|
||||
// Strip brackets from IPv6 hostname
|
||||
const hostname = parsed.hostname.replace(IPV6_BRACKET_PATTERN, "");
|
||||
|
||||
// Normalize the hostname for blocklist matching: lowercase + strip any
|
||||
// trailing dots. WHATWG preserves trailing dots on .hostname, so without
|
||||
// this normalization "localhost." and "nip.io." bypass the checks.
|
||||
const normalizedHost = hostname.toLowerCase().replace(TRAILING_DOT_PATTERN, "");
|
||||
|
||||
// Check against known internal hostnames
|
||||
if (BLOCKED_HOSTNAMES.has(normalizedHost)) {
|
||||
throw new SsrfError("URLs targeting internal hosts are not allowed");
|
||||
}
|
||||
|
||||
// Check against wildcard DNS services used by SSRF tooling to bypass
|
||||
// hostname-only checks. Match the apex and any subdomain.
|
||||
for (const suffix of BLOCKED_HOSTNAME_SUFFIXES) {
|
||||
if (normalizedHost === suffix || normalizedHost.endsWith(`.${suffix}`)) {
|
||||
throw new SsrfError("URLs targeting wildcard DNS services are not allowed");
|
||||
}
|
||||
}
|
||||
|
||||
// Check if hostname is an IP address in a private range. Use the
|
||||
// normalized form so "127.0.0.1.." and friends don't bypass parseIpv4
|
||||
// (which rejects extra trailing dots).
|
||||
if (isPrivateIp(normalizedHost)) {
|
||||
throw new SsrfError("URLs targeting private IP addresses are not allowed");
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DNS-aware validation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* A resolver that maps a hostname to a list of IPv4/IPv6 addresses.
|
||||
* Injectable so callers can swap in OS-level DNS on Node, stub it in tests,
|
||||
* or point to a different DoH endpoint.
|
||||
*/
|
||||
export type DnsResolver = (hostname: string) => Promise<string[]>;
|
||||
|
||||
/**
|
||||
* Module-level default resolver. Tests can swap this with a stub so fetch
|
||||
* mocks don't see unexpected DoH round-trips. Production code should leave
|
||||
* it alone.
|
||||
*/
|
||||
let defaultResolver: DnsResolver | null = null;
|
||||
|
||||
/** Override the default DNS resolver. Returns the previous value. */
|
||||
export function setDefaultDnsResolver(resolver: DnsResolver | null): DnsResolver | null {
|
||||
const previous = defaultResolver;
|
||||
defaultResolver = resolver;
|
||||
return previous;
|
||||
}
|
||||
|
||||
/** Timeout for a single DoH request, in milliseconds. */
|
||||
const DOH_TIMEOUT_MS = 3000;
|
||||
|
||||
/** Default DoH endpoint — Cloudflare's public resolver. */
|
||||
const DEFAULT_DOH_URL = "https://cloudflare-dns.com/dns-query";
|
||||
|
||||
interface DohAnswer {
|
||||
data: string;
|
||||
}
|
||||
|
||||
interface DohResponse {
|
||||
Status: number;
|
||||
Answer: DohAnswer[];
|
||||
}
|
||||
|
||||
function hasProperty<K extends string>(obj: unknown, key: K): obj is Record<K, unknown> {
|
||||
return typeof obj === "object" && obj !== null && key in obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* Narrow an unknown JSON body to a DohResponse shape we can read safely.
|
||||
* Throws if the body doesn't look like a DoH response — a malformed body is
|
||||
* indistinguishable from a failure and must not be silently treated as empty.
|
||||
*/
|
||||
function parseDohResponse(raw: unknown): DohResponse {
|
||||
if (!hasProperty(raw, "Status") || typeof raw.Status !== "number") {
|
||||
throw new Error("DoH response missing Status field");
|
||||
}
|
||||
const answers: DohAnswer[] = [];
|
||||
if (hasProperty(raw, "Answer") && Array.isArray(raw.Answer)) {
|
||||
for (const entry of raw.Answer) {
|
||||
if (hasProperty(entry, "data") && typeof entry.data === "string") {
|
||||
answers.push({ data: entry.data });
|
||||
}
|
||||
}
|
||||
}
|
||||
return { Status: raw.Status, Answer: answers };
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a hostname via DNS over HTTPS (Cloudflare). Returns all A and AAAA
|
||||
* records. Works in both Workers and Node without requiring node:dns.
|
||||
*
|
||||
* Fails closed: any network error, non-2xx response, or DNS rcode != 0
|
||||
* causes a rejected promise so the calling validator treats it as a block.
|
||||
*/
|
||||
export const cloudflareDohResolver: DnsResolver = async (hostname) => {
|
||||
async function query(type: "A" | "AAAA"): Promise<string[]> {
|
||||
const params = new URLSearchParams({ name: hostname, type });
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), DOH_TIMEOUT_MS);
|
||||
try {
|
||||
const response = await globalThis.fetch(`${DEFAULT_DOH_URL}?${params.toString()}`, {
|
||||
headers: { Accept: "application/dns-json" },
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (!response.ok) {
|
||||
throw new Error(`DoH lookup failed: ${response.status}`);
|
||||
}
|
||||
const raw = await response.json();
|
||||
const body = parseDohResponse(raw);
|
||||
// NXDOMAIN (3) is a legitimate "does not exist" — treat as empty.
|
||||
// Any other non-zero status (SERVFAIL=2, REFUSED=5, etc.) is
|
||||
// ambiguous and could be a split-view attacker hiding records
|
||||
// from our resolver. Fail closed.
|
||||
if (body.Status === 3) return [];
|
||||
if (body.Status !== 0) {
|
||||
throw new Error(`DoH ${type} lookup failed: rcode=${body.Status}`);
|
||||
}
|
||||
// DoH Answer arrays often include CNAME records alongside A/AAAA
|
||||
// records. Their `data` is a hostname, not an IP. Filter to just
|
||||
// IP literals so isPrivateIp sees real addresses.
|
||||
return body.Answer.map((a) => a.data).filter(isIpLiteral);
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
const [a, aaaa] = await Promise.all([query("A"), query("AAAA")]);
|
||||
return [...a, ...aaaa];
|
||||
};
|
||||
|
||||
/**
|
||||
* Validate a URL and resolve its hostname to check the actual IPs against
|
||||
* the private-range blocklist. This catches DNS rebinding attacks using
|
||||
* attacker-controlled domains that publicly resolve to private addresses,
|
||||
* and wildcard DNS services like nip.io used by exploit tooling.
|
||||
*
|
||||
* Runs `validateExternalUrl` first for cheap pre-flight checks (scheme,
|
||||
* literal IP, known-bad hostnames). Then resolves the hostname and rejects
|
||||
* if ANY returned address is private.
|
||||
*
|
||||
* Fails closed: if resolution fails or returns no records, throws SsrfError.
|
||||
*
|
||||
* **Caveats.** This does NOT fully close the TOCTOU between check and
|
||||
* connect. Attacks that still work against this layer include:
|
||||
*
|
||||
* - TTL=0 rebind: authoritative server returns public IP to the check, then
|
||||
* private IP to the subsequent fetch() a few milliseconds later.
|
||||
* - Split-view via EDNS Client Subnet or source-IP inspection: the
|
||||
* authoritative server returns public IP to Cloudflare's DoH resolver and
|
||||
* private IP to the victim's own resolver (used by fetch()).
|
||||
* - Host-file overrides or split-horizon corporate DNS on self-hosted Node.
|
||||
* - Attacker-controlled rebinding services the caller has allowlisted.
|
||||
*
|
||||
* The only complete defense is a network-layer egress firewall. On
|
||||
* Cloudflare Workers, the platform fetch pipeline provides most of that.
|
||||
* On self-hosted Node, operators must restrict egress themselves.
|
||||
*/
|
||||
export async function resolveAndValidateExternalUrl(
|
||||
url: string,
|
||||
options?: { resolver?: DnsResolver },
|
||||
): Promise<URL> {
|
||||
const parsed = validateExternalUrl(url);
|
||||
|
||||
// Strip brackets from IPv6 hostnames
|
||||
const hostname = parsed.hostname.replace(IPV6_BRACKET_PATTERN, "");
|
||||
|
||||
// If the hostname is already an IP literal, validateExternalUrl has
|
||||
// already checked it against the private-range list. Skip DNS.
|
||||
if (isIpLiteral(hostname)) {
|
||||
return parsed;
|
||||
}
|
||||
|
||||
const resolver = options?.resolver ?? defaultResolver ?? cloudflareDohResolver;
|
||||
|
||||
let addresses: string[];
|
||||
try {
|
||||
addresses = await resolver(hostname);
|
||||
} catch (error) {
|
||||
throw new SsrfError(
|
||||
`Could not resolve hostname: ${error instanceof Error ? error.message : String(error)}`,
|
||||
);
|
||||
}
|
||||
|
||||
if (addresses.length === 0) {
|
||||
throw new SsrfError("Hostname resolved to no addresses");
|
||||
}
|
||||
|
||||
for (const ip of addresses) {
|
||||
if (isPrivateIp(ip)) {
|
||||
throw new SsrfError("Hostname resolves to a private IP address");
|
||||
}
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
/** True when a string looks like an IPv4 or IPv6 literal. */
|
||||
function isIpLiteral(host: string): boolean {
|
||||
if (parseIpv4(host) !== null) return true;
|
||||
// Very loose IPv6 heuristic — matches anything with a colon, which is
|
||||
// never valid in DNS hostnames, so this is safe.
|
||||
return host.includes(":");
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch a URL with SSRF protection on redirects.
|
||||
*
|
||||
* Uses `redirect: "manual"` to intercept redirects and re-validate each
|
||||
* redirect target against SSRF rules before following it. This prevents
|
||||
* an attacker from setting up an allowed external URL that redirects to
|
||||
* an internal IP (e.g. 169.254.169.254 for cloud metadata).
|
||||
*
|
||||
* @throws SsrfError if the initial URL or any redirect target is internal
|
||||
*/
|
||||
/** Headers that must be stripped when a redirect crosses origins */
|
||||
const CREDENTIAL_HEADERS = ["authorization", "cookie", "proxy-authorization"];
|
||||
|
||||
export async function ssrfSafeFetch(
|
||||
url: string,
|
||||
init?: RequestInit,
|
||||
options?: { resolver?: DnsResolver },
|
||||
): Promise<Response> {
|
||||
let currentUrl = url;
|
||||
let currentInit = init;
|
||||
|
||||
for (let i = 0; i <= MAX_REDIRECTS; i++) {
|
||||
await resolveAndValidateExternalUrl(currentUrl, options);
|
||||
|
||||
const response = await globalThis.fetch(currentUrl, {
|
||||
...currentInit,
|
||||
redirect: "manual",
|
||||
});
|
||||
|
||||
// Not a redirect -- return directly
|
||||
if (response.status < 300 || response.status >= 400) {
|
||||
return response;
|
||||
}
|
||||
|
||||
// Extract redirect target
|
||||
const location = response.headers.get("Location");
|
||||
if (!location) {
|
||||
return response;
|
||||
}
|
||||
|
||||
// Resolve relative redirects against the current URL
|
||||
const previousOrigin = new URL(currentUrl).origin;
|
||||
currentUrl = new URL(location, currentUrl).href;
|
||||
const nextOrigin = new URL(currentUrl).origin;
|
||||
|
||||
// Strip credential headers on cross-origin redirects
|
||||
if (previousOrigin !== nextOrigin && currentInit) {
|
||||
currentInit = stripCredentialHeaders(currentInit);
|
||||
}
|
||||
}
|
||||
|
||||
throw new SsrfError(`Too many redirects (max ${MAX_REDIRECTS})`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a copy of init with credential headers removed.
|
||||
*/
|
||||
export function stripCredentialHeaders(init: RequestInit): RequestInit {
|
||||
if (!init.headers) return init;
|
||||
|
||||
const headers = new Headers(init.headers);
|
||||
for (const name of CREDENTIAL_HEADERS) {
|
||||
headers.delete(name);
|
||||
}
|
||||
|
||||
return { ...init, headers };
|
||||
}
|
||||
418
packages/core/src/import/types.ts
Normal file
418
packages/core/src/import/types.ts
Normal file
@@ -0,0 +1,418 @@
|
||||
/**
|
||||
* Import source abstraction
|
||||
*
|
||||
* Allows different import sources (WXR file, WordPress.com API, REST API, plugin)
|
||||
* to all produce the same normalized format for the import flow.
|
||||
*/
|
||||
|
||||
import type { PortableTextBlock } from "@emdash-cms/gutenberg-to-portable-text";
|
||||
|
||||
// =============================================================================
|
||||
// Author Types
|
||||
// =============================================================================
|
||||
|
||||
/** Author info from WordPress */
|
||||
export interface WpAuthorInfo {
|
||||
id?: number;
|
||||
login?: string;
|
||||
email?: string;
|
||||
displayName?: string;
|
||||
postCount: number;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Source Input Types
|
||||
// =============================================================================
|
||||
|
||||
/** File-based input (WXR upload) */
|
||||
export interface FileInput {
|
||||
type: "file";
|
||||
file: File;
|
||||
}
|
||||
|
||||
/** URL-based input (REST API probe) */
|
||||
export interface UrlInput {
|
||||
type: "url";
|
||||
url: string;
|
||||
/** Optional auth token for authenticated requests */
|
||||
token?: string;
|
||||
}
|
||||
|
||||
/** OAuth-based input (WordPress.com) */
|
||||
export interface OAuthInput {
|
||||
type: "oauth";
|
||||
url: string;
|
||||
accessToken: string;
|
||||
/** Site ID for WordPress.com */
|
||||
siteId?: string;
|
||||
}
|
||||
|
||||
export type SourceInput = FileInput | UrlInput | OAuthInput;
|
||||
|
||||
// =============================================================================
|
||||
// Probe Result Types
|
||||
// =============================================================================
|
||||
|
||||
/** Auth requirements for an import source */
|
||||
export interface SourceAuth {
|
||||
type: "oauth" | "token" | "password" | "none";
|
||||
/** OAuth provider identifier */
|
||||
provider?: string;
|
||||
/** OAuth authorization URL */
|
||||
oauthUrl?: string;
|
||||
/** Human-readable instructions */
|
||||
instructions?: string;
|
||||
}
|
||||
|
||||
/** What the source can provide */
|
||||
export interface SourceCapabilities {
|
||||
/** Can fetch published content without auth */
|
||||
publicContent: boolean;
|
||||
/** Can fetch drafts/private (may need auth) */
|
||||
privateContent: boolean;
|
||||
/** Can fetch all custom post types */
|
||||
customPostTypes: boolean;
|
||||
/** Can fetch all meta fields */
|
||||
allMeta: boolean;
|
||||
/** Can stream media directly */
|
||||
mediaStream: boolean;
|
||||
}
|
||||
|
||||
/** Suggested next action after probe */
|
||||
export type SuggestedAction =
|
||||
| { type: "proceed" }
|
||||
| { type: "oauth"; url: string; provider: string }
|
||||
| { type: "upload"; instructions: string }
|
||||
| { type: "install-plugin"; instructions: string };
|
||||
|
||||
/** Detected i18n/multilingual plugin info */
|
||||
export interface I18nDetection {
|
||||
/** Multilingual plugin name (e.g. "wpml", "polylang") */
|
||||
plugin: string;
|
||||
/** BCP 47 default locale */
|
||||
defaultLocale: string;
|
||||
/** All configured locales */
|
||||
locales: string[];
|
||||
}
|
||||
|
||||
/** Result of probing a URL for a specific source */
|
||||
export interface SourceProbeResult {
|
||||
/** Which source can handle this */
|
||||
sourceId: string;
|
||||
|
||||
/** Confidence level */
|
||||
confidence: "definite" | "likely" | "possible";
|
||||
|
||||
/** What we detected */
|
||||
detected: {
|
||||
platform: string;
|
||||
version?: string;
|
||||
siteTitle?: string;
|
||||
siteUrl?: string;
|
||||
};
|
||||
|
||||
/** What capabilities are available */
|
||||
capabilities: SourceCapabilities;
|
||||
|
||||
/** What auth is needed, if any */
|
||||
auth?: SourceAuth;
|
||||
|
||||
/** Suggested next step */
|
||||
suggestedAction: SuggestedAction;
|
||||
|
||||
/** Preview data if available (e.g., post counts from REST API) */
|
||||
preview?: {
|
||||
posts?: number;
|
||||
pages?: number;
|
||||
media?: number;
|
||||
};
|
||||
|
||||
/** Detected multilingual plugin. Absent when none detected. */
|
||||
i18n?: I18nDetection;
|
||||
}
|
||||
|
||||
/** Combined probe result from all sources */
|
||||
export interface ProbeResult {
|
||||
url: string;
|
||||
isWordPress: boolean;
|
||||
/** Best matching source (highest confidence) */
|
||||
bestMatch: SourceProbeResult | null;
|
||||
/** All matching sources */
|
||||
allMatches: SourceProbeResult[];
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Analysis Types (normalized from all sources)
|
||||
// =============================================================================
|
||||
|
||||
/** Field definition for import */
|
||||
export interface ImportFieldDef {
|
||||
slug: string;
|
||||
label: string;
|
||||
type: string;
|
||||
required: boolean;
|
||||
searchable?: boolean;
|
||||
}
|
||||
|
||||
/** Field compatibility with existing schema */
|
||||
export type FieldCompatibility = "compatible" | "type_mismatch" | "missing";
|
||||
|
||||
/** Schema status for a collection */
|
||||
export interface CollectionSchemaStatus {
|
||||
exists: boolean;
|
||||
fieldStatus: Record<
|
||||
string,
|
||||
{
|
||||
status: FieldCompatibility;
|
||||
existingType?: string;
|
||||
requiredType: string;
|
||||
}
|
||||
>;
|
||||
canImport: boolean;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
/** Analysis of a single post type */
|
||||
export interface PostTypeAnalysis {
|
||||
name: string;
|
||||
count: number;
|
||||
suggestedCollection: string;
|
||||
requiredFields: ImportFieldDef[];
|
||||
schemaStatus: CollectionSchemaStatus;
|
||||
}
|
||||
|
||||
/** Attachment/media info */
|
||||
export interface AttachmentInfo {
|
||||
id?: number;
|
||||
title?: string;
|
||||
url?: string;
|
||||
filename?: string;
|
||||
mimeType?: string;
|
||||
alt?: string;
|
||||
caption?: string;
|
||||
width?: number;
|
||||
height?: number;
|
||||
}
|
||||
|
||||
/** Navigation menu analysis */
|
||||
export interface NavMenuAnalysis {
|
||||
/** Menu name/slug */
|
||||
name: string;
|
||||
/** Menu display label */
|
||||
label: string;
|
||||
/** Number of items in this menu */
|
||||
itemCount: number;
|
||||
}
|
||||
|
||||
/** Custom taxonomy analysis */
|
||||
export interface TaxonomyAnalysis {
|
||||
/** Taxonomy slug (e.g., 'genre', 'portfolio_category') */
|
||||
slug: string;
|
||||
/** Number of terms in this taxonomy */
|
||||
termCount: number;
|
||||
/** Sample term names */
|
||||
sampleTerms: string[];
|
||||
}
|
||||
|
||||
/** Reusable block analysis (wp_block post type) */
|
||||
export interface ReusableBlockAnalysis {
|
||||
/** Original WP ID */
|
||||
id: number;
|
||||
/** Block title */
|
||||
title: string;
|
||||
/** Block slug */
|
||||
slug: string;
|
||||
}
|
||||
|
||||
/** Normalized analysis result - same format for all sources */
|
||||
export interface ImportAnalysis {
|
||||
/** Source that produced this analysis */
|
||||
sourceId: string;
|
||||
|
||||
site: {
|
||||
title: string;
|
||||
url: string;
|
||||
};
|
||||
|
||||
postTypes: PostTypeAnalysis[];
|
||||
|
||||
attachments: {
|
||||
count: number;
|
||||
items: AttachmentInfo[];
|
||||
};
|
||||
|
||||
categories: number;
|
||||
tags: number;
|
||||
authors: WpAuthorInfo[];
|
||||
|
||||
/** Navigation menus found in the export */
|
||||
navMenus?: NavMenuAnalysis[];
|
||||
|
||||
/** Custom taxonomies (beyond categories/tags) */
|
||||
customTaxonomies?: TaxonomyAnalysis[];
|
||||
|
||||
/** Reusable blocks (wp_block post type) - will be imported as sections */
|
||||
reusableBlocks?: ReusableBlockAnalysis[];
|
||||
|
||||
/** Source-specific custom fields analysis */
|
||||
customFields?: Array<{
|
||||
key: string;
|
||||
count: number;
|
||||
samples: string[];
|
||||
suggestedField: string;
|
||||
suggestedType: "string" | "number" | "boolean" | "date" | "json";
|
||||
isInternal: boolean;
|
||||
}>;
|
||||
|
||||
/** Detected multilingual plugin. Absent when none detected. */
|
||||
i18n?: I18nDetection;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Normalized Content Types
|
||||
// =============================================================================
|
||||
|
||||
/** Normalized content item - produced by all sources */
|
||||
export interface NormalizedItem {
|
||||
/** Original ID from source */
|
||||
sourceId: string | number;
|
||||
/** WordPress post type */
|
||||
postType: string;
|
||||
/** Content status */
|
||||
status: "publish" | "draft" | "pending" | "private" | "future";
|
||||
/** URL slug */
|
||||
slug: string;
|
||||
/** Title */
|
||||
title: string;
|
||||
/** Content as Portable Text (already converted) */
|
||||
content: PortableTextBlock[];
|
||||
/** Excerpt/summary */
|
||||
excerpt?: string;
|
||||
/** Publication date */
|
||||
date: Date;
|
||||
/** Last modified date */
|
||||
modified?: Date;
|
||||
/** Author identifier */
|
||||
author?: string;
|
||||
/** Category slugs */
|
||||
categories?: string[];
|
||||
/** Tag slugs */
|
||||
tags?: string[];
|
||||
/** Custom meta fields */
|
||||
meta?: Record<string, unknown>;
|
||||
/** Featured image URL */
|
||||
featuredImage?: string;
|
||||
/** Parent post ID (for hierarchical content like pages) */
|
||||
parentId?: string | number;
|
||||
/** Menu order for sorting */
|
||||
menuOrder?: number;
|
||||
/** Custom taxonomy assignments beyond categories/tags */
|
||||
customTaxonomies?: Record<string, string[]>;
|
||||
|
||||
/** BCP 47 locale code. When omitted, defaults to defaultLocale. */
|
||||
locale?: string;
|
||||
|
||||
/**
|
||||
* Source-side translation group ID (opaque string from the origin system).
|
||||
* Items sharing the same translationGroup are linked as translations.
|
||||
* Resolved to an EmDash translation_group ULID during execute.
|
||||
*/
|
||||
translationGroup?: string;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Import Configuration & Results
|
||||
// =============================================================================
|
||||
|
||||
/** Post type mapping configuration */
|
||||
export interface PostTypeMapping {
|
||||
enabled: boolean;
|
||||
collection: string;
|
||||
}
|
||||
|
||||
/** Import configuration */
|
||||
export interface ImportConfig {
|
||||
postTypeMappings: Record<string, PostTypeMapping>;
|
||||
skipExisting?: boolean;
|
||||
}
|
||||
|
||||
/** Options for fetching content */
|
||||
export interface FetchOptions {
|
||||
/** Post types to fetch */
|
||||
postTypes: string[];
|
||||
/** Whether to include drafts */
|
||||
includeDrafts?: boolean;
|
||||
/** Limit number of items (for testing) */
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
/** Import result */
|
||||
export interface ImportResult {
|
||||
success: boolean;
|
||||
imported: number;
|
||||
skipped: number;
|
||||
errors: Array<{ title: string; error: string }>;
|
||||
byCollection: Record<string, number>;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Import Source Interface
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* An import source provides content from an external system.
|
||||
* All sources produce the same normalized analysis and content format.
|
||||
*/
|
||||
export interface ImportSource {
|
||||
/** Unique identifier */
|
||||
id: string;
|
||||
|
||||
/** Display name */
|
||||
name: string;
|
||||
|
||||
/** Description for UI */
|
||||
description: string;
|
||||
|
||||
/** Icon identifier */
|
||||
icon: "upload" | "globe" | "wordpress" | "plug";
|
||||
|
||||
/** Whether this source requires a file upload */
|
||||
requiresFile?: boolean;
|
||||
|
||||
/** Whether this source can probe URLs */
|
||||
canProbe?: boolean;
|
||||
|
||||
/**
|
||||
* Probe a URL to see if this source can handle it.
|
||||
* Returns null if not applicable.
|
||||
*/
|
||||
probe?(url: string): Promise<SourceProbeResult | null>;
|
||||
|
||||
/**
|
||||
* Analyze content from this source.
|
||||
* Returns normalized ImportAnalysis.
|
||||
*/
|
||||
analyze(input: SourceInput, context: ImportContext): Promise<ImportAnalysis>;
|
||||
|
||||
/**
|
||||
* Stream content items for import.
|
||||
* Yields normalized content items.
|
||||
*/
|
||||
fetchContent(input: SourceInput, options: FetchOptions): AsyncGenerator<NormalizedItem>;
|
||||
|
||||
/**
|
||||
* Fetch a media item's data.
|
||||
* Used for media import.
|
||||
*/
|
||||
fetchMedia?(url: string, input: SourceInput): Promise<Blob>;
|
||||
}
|
||||
|
||||
/** Context passed to import sources */
|
||||
export interface ImportContext {
|
||||
/** Database connection for schema checks */
|
||||
db?: unknown;
|
||||
/** Function to check existing collections */
|
||||
getExistingCollections?: () => Promise<
|
||||
Map<string, { slug: string; fields: Map<string, { type: string }> }>
|
||||
>;
|
||||
}
|
||||
412
packages/core/src/import/utils.ts
Normal file
412
packages/core/src/import/utils.ts
Normal file
@@ -0,0 +1,412 @@
|
||||
/**
|
||||
* Shared import utilities
|
||||
*
|
||||
* Common constants and functions used across all WordPress import sources.
|
||||
*/
|
||||
|
||||
import mime from "mime/lite";
|
||||
|
||||
import type { ImportFieldDef, CollectionSchemaStatus } from "./types.js";
|
||||
|
||||
// =============================================================================
|
||||
// Constants
|
||||
// =============================================================================
|
||||
|
||||
/** Internal WordPress post types that should be excluded from import */
|
||||
export const INTERNAL_POST_TYPES = [
|
||||
"revision",
|
||||
"nav_menu_item",
|
||||
"custom_css",
|
||||
"customize_changeset",
|
||||
"oembed_cache",
|
||||
"wp_global_styles",
|
||||
"wp_navigation",
|
||||
"wp_template",
|
||||
"wp_template_part",
|
||||
"attachment", // Handled separately as media
|
||||
"wp_block", // Handled separately as sections (reusable blocks)
|
||||
];
|
||||
|
||||
/** Internal meta key prefixes to filter out */
|
||||
export const INTERNAL_META_PREFIXES = ["_edit_", "_wp_"];
|
||||
|
||||
const NUMERIC_PATTERN = /^-?\d+(\.\d+)?$/;
|
||||
const TRAILING_SLASHES = /\/+$/;
|
||||
const WP_JSON_SUFFIX = /\/wp-json\/?.*$/;
|
||||
|
||||
/** Specific internal meta keys */
|
||||
export const INTERNAL_META_KEYS = ["_edit_last", "_edit_lock", "_pingme", "_encloseme"];
|
||||
|
||||
/** Base fields required for any WordPress import */
|
||||
export const BASE_REQUIRED_FIELDS: ImportFieldDef[] = [
|
||||
{ slug: "title", label: "Title", type: "string", required: true, searchable: true },
|
||||
{ slug: "content", label: "Content", type: "portableText", required: false, searchable: true },
|
||||
{ slug: "excerpt", label: "Excerpt", type: "text", required: false },
|
||||
];
|
||||
|
||||
/** Featured image field - only added to post types that have _thumbnail_id */
|
||||
export const FEATURED_IMAGE_FIELD: ImportFieldDef = {
|
||||
slug: "featured_image",
|
||||
label: "Featured Image",
|
||||
type: "image",
|
||||
required: false,
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
// Type Guards
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Check if a post type is internal/should be excluded
|
||||
*/
|
||||
export function isInternalPostType(type: string): boolean {
|
||||
return INTERNAL_POST_TYPES.includes(type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a meta key is internal/should be filtered out
|
||||
*/
|
||||
export function isInternalMetaKey(key: string): boolean {
|
||||
// Check specific keys
|
||||
if (INTERNAL_META_KEYS.includes(key)) return true;
|
||||
|
||||
// Check prefixes
|
||||
for (const prefix of INTERNAL_META_PREFIXES) {
|
||||
if (key.startsWith(prefix)) return true;
|
||||
}
|
||||
|
||||
// Keep these useful ones
|
||||
if (key === "_thumbnail_id") return false;
|
||||
if (key.startsWith("_yoast_")) return false;
|
||||
if (key.startsWith("_rank_math_")) return false;
|
||||
|
||||
// Other underscore prefixes are usually internal
|
||||
if (key.startsWith("_")) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Status Mapping
|
||||
// =============================================================================
|
||||
|
||||
/** Valid WordPress statuses */
|
||||
export type WpStatus = "publish" | "draft" | "pending" | "private" | "future";
|
||||
|
||||
/**
|
||||
* Map WordPress status to normalized status
|
||||
*/
|
||||
export function mapWpStatus(status: string | undefined): WpStatus {
|
||||
switch (status) {
|
||||
case "publish":
|
||||
return "publish";
|
||||
case "draft":
|
||||
return "draft";
|
||||
case "pending":
|
||||
return "pending";
|
||||
case "private":
|
||||
return "private";
|
||||
case "future":
|
||||
return "future";
|
||||
default:
|
||||
return "draft";
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Collection Mapping
|
||||
// =============================================================================
|
||||
|
||||
/** Default mappings from WordPress post types to EmDash collections */
|
||||
const POST_TYPE_TO_COLLECTION: Record<string, string> = {
|
||||
post: "posts",
|
||||
page: "pages",
|
||||
attachment: "media",
|
||||
product: "products",
|
||||
portfolio: "portfolio",
|
||||
testimonial: "testimonials",
|
||||
team: "team",
|
||||
event: "events",
|
||||
faq: "faqs",
|
||||
};
|
||||
|
||||
/**
|
||||
* Map WordPress post type to EmDash collection name
|
||||
*/
|
||||
export function mapPostTypeToCollection(postType: string): string {
|
||||
return POST_TYPE_TO_COLLECTION[postType] || postType;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Meta Key Mapping
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Map WordPress meta key to EmDash field slug
|
||||
*/
|
||||
export function mapMetaKeyToField(key: string): string {
|
||||
// SEO plugins
|
||||
if (key === "_yoast_wpseo_title") return "seo_title";
|
||||
if (key === "_yoast_wpseo_metadesc") return "seo_description";
|
||||
if (key === "_rank_math_title") return "seo_title";
|
||||
if (key === "_rank_math_description") return "seo_description";
|
||||
if (key === "_thumbnail_id") return "featured_image";
|
||||
|
||||
// Remove leading underscore
|
||||
if (key.startsWith("_")) return key.slice(1);
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
/**
|
||||
* Infer field type from meta key name and sample value
|
||||
*/
|
||||
export function inferMetaType(
|
||||
key: string,
|
||||
value: string | undefined,
|
||||
): "string" | "number" | "boolean" | "date" | "json" {
|
||||
if (key.endsWith("_id") || key === "_thumbnail_id") return "string";
|
||||
if (key.endsWith("_date") || key.endsWith("_time")) return "date";
|
||||
if (key.endsWith("_count") || key.endsWith("_number")) return "number";
|
||||
|
||||
if (!value) return "string";
|
||||
|
||||
// Serialized PHP or JSON
|
||||
if (value.startsWith("a:") || value.startsWith("{") || value.startsWith("[")) return "json";
|
||||
|
||||
// Number
|
||||
if (NUMERIC_PATTERN.test(value)) return "number";
|
||||
|
||||
// Boolean
|
||||
if (["0", "1", "true", "false"].includes(value)) return "boolean";
|
||||
|
||||
return "string";
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// String Utilities
|
||||
// =============================================================================
|
||||
|
||||
export { slugify } from "../utils/slugify.js";
|
||||
|
||||
/**
|
||||
* Normalize URL for API requests
|
||||
*/
|
||||
export function normalizeUrl(url: string): string {
|
||||
let normalized = url.trim();
|
||||
|
||||
// Add protocol if missing
|
||||
if (!normalized.startsWith("http")) {
|
||||
normalized = `https://${normalized}`;
|
||||
}
|
||||
|
||||
// Remove trailing slash
|
||||
normalized = normalized.replace(TRAILING_SLASHES, "");
|
||||
|
||||
// Remove /wp-json if included
|
||||
normalized = normalized.replace(WP_JSON_SUFFIX, "");
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// File Utilities
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Extract filename from URL
|
||||
*/
|
||||
export function getFilenameFromUrl(url: string): string | undefined {
|
||||
try {
|
||||
const parsed = new URL(url);
|
||||
const segments = parsed.pathname.split("/").filter(Boolean);
|
||||
return segments.pop();
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Guess MIME type from filename
|
||||
*/
|
||||
export function guessMimeType(filename: string): string | undefined {
|
||||
return mime.getType(filename) ?? undefined;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Attachment Map Builder
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Build a map of attachment IDs to URLs for resolving featured images
|
||||
*/
|
||||
export function buildAttachmentMap(
|
||||
attachments: Array<{ id?: number | string; url?: string }>,
|
||||
): Map<string, string> {
|
||||
const map = new Map<string, string>();
|
||||
for (const att of attachments) {
|
||||
if (att.id && att.url) {
|
||||
map.set(String(att.id), att.url);
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Schema Compatibility
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Check if two field types are compatible for import
|
||||
*/
|
||||
export function isTypeCompatible(requiredType: string, existingType: string): boolean {
|
||||
if (requiredType === existingType) return true;
|
||||
|
||||
const compatibleTypes: Record<string, string[]> = {
|
||||
string: ["string", "text", "slug"],
|
||||
text: ["string", "text"],
|
||||
portableText: ["portableText", "json"],
|
||||
number: ["number", "integer"],
|
||||
integer: ["number", "integer"],
|
||||
};
|
||||
|
||||
const compatible = compatibleTypes[requiredType];
|
||||
return compatible?.includes(existingType) ?? false;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Byline Import Utilities
|
||||
// =============================================================================
|
||||
|
||||
import type { BylineRepository } from "../database/repositories/byline.js";
|
||||
import { slugify as slugifyFn } from "../utils/slugify.js";
|
||||
|
||||
const MAX_SLUG_COLLISION_ATTEMPTS = 1000;
|
||||
|
||||
/**
|
||||
* Find or create a unique byline slug, capped at MAX_SLUG_COLLISION_ATTEMPTS.
|
||||
*/
|
||||
export async function ensureUniqueBylineSlug(
|
||||
bylineRepo: BylineRepository,
|
||||
baseSlug: string,
|
||||
): Promise<string> {
|
||||
let candidate = baseSlug;
|
||||
let suffix = 2;
|
||||
while (await bylineRepo.findBySlug(candidate)) {
|
||||
if (suffix > MAX_SLUG_COLLISION_ATTEMPTS) {
|
||||
throw new Error(
|
||||
`Byline slug collision limit exceeded for base slug "${baseSlug}". ` +
|
||||
`Tried ${MAX_SLUG_COLLISION_ATTEMPTS} variants.`,
|
||||
);
|
||||
}
|
||||
candidate = `${baseSlug}-${suffix}`;
|
||||
suffix++;
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve (find-or-create) a byline for an imported WordPress author.
|
||||
* Caches results in `cache` keyed by `authorLogin:mappedUserId`.
|
||||
*/
|
||||
export async function resolveImportByline(
|
||||
authorLogin: string | undefined,
|
||||
displayName: string | undefined,
|
||||
mappedUserId: string | undefined,
|
||||
bylineRepo: BylineRepository,
|
||||
cache: Map<string, string>,
|
||||
): Promise<string | undefined> {
|
||||
if (!authorLogin) return undefined;
|
||||
const cacheKey = `${authorLogin}:${mappedUserId ?? ""}`;
|
||||
const cached = cache.get(cacheKey);
|
||||
if (cached) return cached;
|
||||
|
||||
if (mappedUserId) {
|
||||
const existingForUser = await bylineRepo.findByUserId(mappedUserId);
|
||||
if (existingForUser) {
|
||||
cache.set(cacheKey, existingForUser.id);
|
||||
return existingForUser.id;
|
||||
}
|
||||
}
|
||||
|
||||
const name = displayName || authorLogin;
|
||||
const slugBase = slugifyFn(authorLogin);
|
||||
const slug = await ensureUniqueBylineSlug(bylineRepo, slugBase || "author");
|
||||
const created = await bylineRepo.create({
|
||||
slug,
|
||||
displayName: name,
|
||||
userId: mappedUserId ?? null,
|
||||
isGuest: !mappedUserId,
|
||||
});
|
||||
|
||||
cache.set(cacheKey, created.id);
|
||||
return created.id;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Schema Compatibility
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Check schema compatibility between required fields and existing collection
|
||||
*/
|
||||
export function checkSchemaCompatibility(
|
||||
requiredFields: ImportFieldDef[],
|
||||
existingCollection: { slug: string; fields: Map<string, { type: string }> } | undefined,
|
||||
): CollectionSchemaStatus {
|
||||
if (!existingCollection) {
|
||||
// Collection doesn't exist - will need to create it
|
||||
const fieldStatus: CollectionSchemaStatus["fieldStatus"] = {};
|
||||
for (const field of requiredFields) {
|
||||
fieldStatus[field.slug] = {
|
||||
status: "missing",
|
||||
requiredType: field.type,
|
||||
};
|
||||
}
|
||||
return {
|
||||
exists: false,
|
||||
fieldStatus,
|
||||
canImport: true,
|
||||
};
|
||||
}
|
||||
|
||||
// Collection exists - check field compatibility
|
||||
const fieldStatus: CollectionSchemaStatus["fieldStatus"] = {};
|
||||
const incompatibleFields: string[] = [];
|
||||
|
||||
for (const field of requiredFields) {
|
||||
const existingField = existingCollection.fields.get(field.slug);
|
||||
|
||||
if (!existingField) {
|
||||
fieldStatus[field.slug] = {
|
||||
status: "missing",
|
||||
requiredType: field.type,
|
||||
};
|
||||
} else if (isTypeCompatible(field.type, existingField.type)) {
|
||||
fieldStatus[field.slug] = {
|
||||
status: "compatible",
|
||||
existingType: existingField.type,
|
||||
requiredType: field.type,
|
||||
};
|
||||
} else {
|
||||
fieldStatus[field.slug] = {
|
||||
status: "type_mismatch",
|
||||
existingType: existingField.type,
|
||||
requiredType: field.type,
|
||||
};
|
||||
incompatibleFields.push(field.slug);
|
||||
}
|
||||
}
|
||||
|
||||
const canImport = incompatibleFields.length === 0;
|
||||
const reason = canImport
|
||||
? undefined
|
||||
: `Incompatible field types: ${incompatibleFields.join(", ")}`;
|
||||
|
||||
return {
|
||||
exists: true,
|
||||
fieldStatus,
|
||||
canImport,
|
||||
reason,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user