Initial commit: New MoreminiMore website with fresh design

This commit is contained in:
MoreminiMore
2026-04-22 01:59:05 +07:00
commit 76409638cc
14010 changed files with 2052041 additions and 0 deletions

17
node_modules/sitemap/dist/cjs/index.d.ts generated vendored Normal file
View File

@@ -0,0 +1,17 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
export { SitemapItemStream, SitemapItemStreamOptions, } from './lib/sitemap-item-stream.js';
export { IndexTagNames, SitemapIndexStream, SitemapIndexStreamOptions, SitemapAndIndexStream, SitemapAndIndexStreamOptions, } from './lib/sitemap-index-stream.js';
export { streamToPromise, SitemapStream, SitemapStreamOptions, } from './lib/sitemap-stream.js';
export * from './lib/errors.js';
export * from './lib/types.js';
export { lineSeparatedURLsToSitemapOptions, mergeStreams, validateSMIOptions, normalizeURL, ReadlineStream, ReadlineStreamOptions, } from './lib/utils.js';
export { xmlLint } from './lib/xmllint.js';
export { parseSitemap, XMLToSitemapItemStream, XMLToSitemapItemStreamOptions, ObjectStreamToJSON, ObjectStreamToJSONOptions, } from './lib/sitemap-parser.js';
export { parseSitemapIndex, XMLToSitemapIndexStream, XMLToSitemapIndexItemStreamOptions, IndexObjectStreamToJSON, IndexObjectStreamToJSONOptions, } from './lib/sitemap-index-parser.js';
export { simpleSitemapAndIndex, SimpleSitemapAndIndexOptions, } from './lib/sitemap-simple.js';
export { validateURL, validatePath, validateLimit, validatePublicBasePath, validateXSLUrl, validators, isPriceType, isResolution, isValidChangeFreq, isValidYesNo, isAllowDeny, } from './lib/validation.js';
export { LIMITS, DEFAULT_SITEMAP_ITEM_LIMIT } from './lib/constants.js';

66
node_modules/sitemap/dist/cjs/index.js generated vendored Normal file
View File

@@ -0,0 +1,66 @@
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __exportStar = (this && this.__exportStar) || function(m, exports) {
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.DEFAULT_SITEMAP_ITEM_LIMIT = exports.LIMITS = exports.isAllowDeny = exports.isValidYesNo = exports.isValidChangeFreq = exports.isResolution = exports.isPriceType = exports.validators = exports.validateXSLUrl = exports.validatePublicBasePath = exports.validateLimit = exports.validatePath = exports.validateURL = exports.simpleSitemapAndIndex = exports.IndexObjectStreamToJSON = exports.XMLToSitemapIndexStream = exports.parseSitemapIndex = exports.ObjectStreamToJSON = exports.XMLToSitemapItemStream = exports.parseSitemap = exports.xmlLint = exports.ReadlineStream = exports.normalizeURL = exports.validateSMIOptions = exports.mergeStreams = exports.lineSeparatedURLsToSitemapOptions = exports.SitemapStream = exports.streamToPromise = exports.SitemapAndIndexStream = exports.SitemapIndexStream = exports.IndexTagNames = exports.SitemapItemStream = void 0;
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
var sitemap_item_stream_js_1 = require("./lib/sitemap-item-stream.js");
Object.defineProperty(exports, "SitemapItemStream", { enumerable: true, get: function () { return sitemap_item_stream_js_1.SitemapItemStream; } });
var sitemap_index_stream_js_1 = require("./lib/sitemap-index-stream.js");
Object.defineProperty(exports, "IndexTagNames", { enumerable: true, get: function () { return sitemap_index_stream_js_1.IndexTagNames; } });
Object.defineProperty(exports, "SitemapIndexStream", { enumerable: true, get: function () { return sitemap_index_stream_js_1.SitemapIndexStream; } });
Object.defineProperty(exports, "SitemapAndIndexStream", { enumerable: true, get: function () { return sitemap_index_stream_js_1.SitemapAndIndexStream; } });
var sitemap_stream_js_1 = require("./lib/sitemap-stream.js");
Object.defineProperty(exports, "streamToPromise", { enumerable: true, get: function () { return sitemap_stream_js_1.streamToPromise; } });
Object.defineProperty(exports, "SitemapStream", { enumerable: true, get: function () { return sitemap_stream_js_1.SitemapStream; } });
__exportStar(require("./lib/errors.js"), exports);
__exportStar(require("./lib/types.js"), exports);
var utils_js_1 = require("./lib/utils.js");
Object.defineProperty(exports, "lineSeparatedURLsToSitemapOptions", { enumerable: true, get: function () { return utils_js_1.lineSeparatedURLsToSitemapOptions; } });
Object.defineProperty(exports, "mergeStreams", { enumerable: true, get: function () { return utils_js_1.mergeStreams; } });
Object.defineProperty(exports, "validateSMIOptions", { enumerable: true, get: function () { return utils_js_1.validateSMIOptions; } });
Object.defineProperty(exports, "normalizeURL", { enumerable: true, get: function () { return utils_js_1.normalizeURL; } });
Object.defineProperty(exports, "ReadlineStream", { enumerable: true, get: function () { return utils_js_1.ReadlineStream; } });
var xmllint_js_1 = require("./lib/xmllint.js");
Object.defineProperty(exports, "xmlLint", { enumerable: true, get: function () { return xmllint_js_1.xmlLint; } });
var sitemap_parser_js_1 = require("./lib/sitemap-parser.js");
Object.defineProperty(exports, "parseSitemap", { enumerable: true, get: function () { return sitemap_parser_js_1.parseSitemap; } });
Object.defineProperty(exports, "XMLToSitemapItemStream", { enumerable: true, get: function () { return sitemap_parser_js_1.XMLToSitemapItemStream; } });
Object.defineProperty(exports, "ObjectStreamToJSON", { enumerable: true, get: function () { return sitemap_parser_js_1.ObjectStreamToJSON; } });
var sitemap_index_parser_js_1 = require("./lib/sitemap-index-parser.js");
Object.defineProperty(exports, "parseSitemapIndex", { enumerable: true, get: function () { return sitemap_index_parser_js_1.parseSitemapIndex; } });
Object.defineProperty(exports, "XMLToSitemapIndexStream", { enumerable: true, get: function () { return sitemap_index_parser_js_1.XMLToSitemapIndexStream; } });
Object.defineProperty(exports, "IndexObjectStreamToJSON", { enumerable: true, get: function () { return sitemap_index_parser_js_1.IndexObjectStreamToJSON; } });
var sitemap_simple_js_1 = require("./lib/sitemap-simple.js");
Object.defineProperty(exports, "simpleSitemapAndIndex", { enumerable: true, get: function () { return sitemap_simple_js_1.simpleSitemapAndIndex; } });
var validation_js_1 = require("./lib/validation.js");
Object.defineProperty(exports, "validateURL", { enumerable: true, get: function () { return validation_js_1.validateURL; } });
Object.defineProperty(exports, "validatePath", { enumerable: true, get: function () { return validation_js_1.validatePath; } });
Object.defineProperty(exports, "validateLimit", { enumerable: true, get: function () { return validation_js_1.validateLimit; } });
Object.defineProperty(exports, "validatePublicBasePath", { enumerable: true, get: function () { return validation_js_1.validatePublicBasePath; } });
Object.defineProperty(exports, "validateXSLUrl", { enumerable: true, get: function () { return validation_js_1.validateXSLUrl; } });
Object.defineProperty(exports, "validators", { enumerable: true, get: function () { return validation_js_1.validators; } });
Object.defineProperty(exports, "isPriceType", { enumerable: true, get: function () { return validation_js_1.isPriceType; } });
Object.defineProperty(exports, "isResolution", { enumerable: true, get: function () { return validation_js_1.isResolution; } });
Object.defineProperty(exports, "isValidChangeFreq", { enumerable: true, get: function () { return validation_js_1.isValidChangeFreq; } });
Object.defineProperty(exports, "isValidYesNo", { enumerable: true, get: function () { return validation_js_1.isValidYesNo; } });
Object.defineProperty(exports, "isAllowDeny", { enumerable: true, get: function () { return validation_js_1.isAllowDeny; } });
var constants_js_1 = require("./lib/constants.js");
Object.defineProperty(exports, "LIMITS", { enumerable: true, get: function () { return constants_js_1.LIMITS; } });
Object.defineProperty(exports, "DEFAULT_SITEMAP_ITEM_LIMIT", { enumerable: true, get: function () { return constants_js_1.DEFAULT_SITEMAP_ITEM_LIMIT; } });

49
node_modules/sitemap/dist/cjs/lib/constants.d.ts generated vendored Normal file
View File

@@ -0,0 +1,49 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
/**
* Shared constants used across the sitemap library
* This file serves as a single source of truth for limits and validation patterns
*/
/**
* Security limits for sitemap generation and parsing
*
* These limits are based on:
* - sitemaps.org protocol specification
* - Security best practices to prevent DoS and injection attacks
* - Google's sitemap extension specifications
*
* @see https://www.sitemaps.org/protocol.html
* @see https://developers.google.com/search/docs/advanced/sitemaps/build-sitemap
*/
export declare const LIMITS: {
readonly MAX_URL_LENGTH: 2048;
readonly URL_PROTOCOL_REGEX: RegExp;
readonly MIN_SITEMAP_ITEM_LIMIT: 1;
readonly MAX_SITEMAP_ITEM_LIMIT: 50000;
readonly MAX_VIDEO_TITLE_LENGTH: 100;
readonly MAX_VIDEO_DESCRIPTION_LENGTH: 2048;
readonly MAX_VIDEO_CATEGORY_LENGTH: 256;
readonly MAX_TAGS_PER_VIDEO: 32;
readonly MAX_NEWS_TITLE_LENGTH: 200;
readonly MAX_NEWS_NAME_LENGTH: 256;
readonly MAX_IMAGE_CAPTION_LENGTH: 512;
readonly MAX_IMAGE_TITLE_LENGTH: 512;
readonly MAX_IMAGES_PER_URL: 1000;
readonly MAX_VIDEOS_PER_URL: 100;
readonly MAX_LINKS_PER_URL: 100;
readonly MAX_URL_ENTRIES: 50000;
readonly ISO_DATE_REGEX: RegExp;
readonly MAX_CUSTOM_NAMESPACES: 20;
readonly MAX_NAMESPACE_LENGTH: 512;
readonly MAX_PARSER_ERRORS: 100;
};
/**
* Default maximum number of items in each sitemap XML file
* Set below the max to leave room for URLs added during processing
*
* @see https://www.sitemaps.org/protocol.html#index
*/
export declare const DEFAULT_SITEMAP_ITEM_LIMIT = 45000;

63
node_modules/sitemap/dist/cjs/lib/constants.js generated vendored Normal file
View File

@@ -0,0 +1,63 @@
"use strict";
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.DEFAULT_SITEMAP_ITEM_LIMIT = exports.LIMITS = void 0;
/**
* Shared constants used across the sitemap library
* This file serves as a single source of truth for limits and validation patterns
*/
/**
* Security limits for sitemap generation and parsing
*
* These limits are based on:
* - sitemaps.org protocol specification
* - Security best practices to prevent DoS and injection attacks
* - Google's sitemap extension specifications
*
* @see https://www.sitemaps.org/protocol.html
* @see https://developers.google.com/search/docs/advanced/sitemaps/build-sitemap
*/
exports.LIMITS = {
// URL constraints per sitemaps.org spec
MAX_URL_LENGTH: 2048,
URL_PROTOCOL_REGEX: /^https?:\/\//i,
// Sitemap size limits per sitemaps.org spec
MIN_SITEMAP_ITEM_LIMIT: 1,
MAX_SITEMAP_ITEM_LIMIT: 50000,
// Video field length constraints per Google spec
MAX_VIDEO_TITLE_LENGTH: 100,
MAX_VIDEO_DESCRIPTION_LENGTH: 2048,
MAX_VIDEO_CATEGORY_LENGTH: 256,
MAX_TAGS_PER_VIDEO: 32,
// News field length constraints per Google spec
MAX_NEWS_TITLE_LENGTH: 200,
MAX_NEWS_NAME_LENGTH: 256,
// Image field length constraints per Google spec
MAX_IMAGE_CAPTION_LENGTH: 512,
MAX_IMAGE_TITLE_LENGTH: 512,
// Limits on number of items per URL entry
MAX_IMAGES_PER_URL: 1000,
MAX_VIDEOS_PER_URL: 100,
MAX_LINKS_PER_URL: 100,
// Total entries in a sitemap
MAX_URL_ENTRIES: 50000,
// Date validation - ISO 8601 / W3C format
ISO_DATE_REGEX: /^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{3})?([+-]\d{2}:\d{2}|Z)?)?$/,
// Custom namespace limits to prevent DoS
MAX_CUSTOM_NAMESPACES: 20,
MAX_NAMESPACE_LENGTH: 512,
// Cap on stored parser errors to prevent memory DoS (BB-03)
// Errors beyond this limit are counted in errorCount but not retained as objects
MAX_PARSER_ERRORS: 100,
};
/**
* Default maximum number of items in each sitemap XML file
* Set below the max to leave room for URLs added during processing
*
* @see https://www.sitemaps.org/protocol.html#index
*/
exports.DEFAULT_SITEMAP_ITEM_LIMIT = 45000;

116
node_modules/sitemap/dist/cjs/lib/errors.d.ts generated vendored Normal file
View File

@@ -0,0 +1,116 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
/**
* URL in SitemapItem does not exist
*/
export declare class NoURLError extends Error {
constructor(message?: string);
}
/**
* Config was not passed to SitemapItem constructor
*/
export declare class NoConfigError extends Error {
constructor(message?: string);
}
/**
* changefreq property in sitemap is invalid
*/
export declare class ChangeFreqInvalidError extends Error {
constructor(url: string, changefreq: any);
}
/**
* priority property in sitemap is invalid
*/
export declare class PriorityInvalidError extends Error {
constructor(url: string, priority: any);
}
/**
* SitemapIndex target Folder does not exists
*/
export declare class UndefinedTargetFolder extends Error {
constructor(message?: string);
}
export declare class InvalidVideoFormat extends Error {
constructor(url: string);
}
export declare class InvalidVideoDuration extends Error {
constructor(url: string, duration: any);
}
export declare class InvalidVideoDescription extends Error {
constructor(url: string, length: number);
}
export declare class InvalidVideoRating extends Error {
constructor(url: string, title: any, rating: any);
}
export declare class InvalidAttrValue extends Error {
constructor(key: string, val: any, validator: RegExp);
}
export declare class InvalidAttr extends Error {
constructor(key: string);
}
export declare class InvalidNewsFormat extends Error {
constructor(url: string);
}
export declare class InvalidNewsAccessValue extends Error {
constructor(url: string, access: any);
}
export declare class XMLLintUnavailable extends Error {
constructor(message?: string);
}
export declare class InvalidVideoTitle extends Error {
constructor(url: string, length: number);
}
export declare class InvalidVideoViewCount extends Error {
constructor(url: string, count: number);
}
export declare class InvalidVideoTagCount extends Error {
constructor(url: string, count: number);
}
export declare class InvalidVideoCategory extends Error {
constructor(url: string, count: number);
}
export declare class InvalidVideoFamilyFriendly extends Error {
constructor(url: string, fam: string);
}
export declare class InvalidVideoRestriction extends Error {
constructor(url: string, code: string);
}
export declare class InvalidVideoRestrictionRelationship extends Error {
constructor(url: string, val?: string);
}
export declare class InvalidVideoPriceType extends Error {
constructor(url: string, priceType?: string, price?: string);
}
export declare class InvalidVideoResolution extends Error {
constructor(url: string, resolution: string);
}
export declare class InvalidVideoPriceCurrency extends Error {
constructor(url: string, currency: string);
}
export declare class EmptyStream extends Error {
constructor();
}
export declare class EmptySitemap extends Error {
constructor();
}
export declare class InvalidPathError extends Error {
constructor(path: string, reason: string);
}
export declare class InvalidHostnameError extends Error {
constructor(hostname: string, reason: string);
}
export declare class InvalidLimitError extends Error {
constructor(limit: any);
}
export declare class InvalidPublicBasePathError extends Error {
constructor(publicBasePath: string, reason: string);
}
export declare class InvalidXSLUrlError extends Error {
constructor(xslUrl: string, reason: string);
}
export declare class InvalidXMLAttributeNameError extends Error {
constructor(attributeName: string);
}

291
node_modules/sitemap/dist/cjs/lib/errors.js generated vendored Normal file
View File

@@ -0,0 +1,291 @@
"use strict";
/* eslint-disable @typescript-eslint/no-explicit-any */
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.InvalidXMLAttributeNameError = exports.InvalidXSLUrlError = exports.InvalidPublicBasePathError = exports.InvalidLimitError = exports.InvalidHostnameError = exports.InvalidPathError = exports.EmptySitemap = exports.EmptyStream = exports.InvalidVideoPriceCurrency = exports.InvalidVideoResolution = exports.InvalidVideoPriceType = exports.InvalidVideoRestrictionRelationship = exports.InvalidVideoRestriction = exports.InvalidVideoFamilyFriendly = exports.InvalidVideoCategory = exports.InvalidVideoTagCount = exports.InvalidVideoViewCount = exports.InvalidVideoTitle = exports.XMLLintUnavailable = exports.InvalidNewsAccessValue = exports.InvalidNewsFormat = exports.InvalidAttr = exports.InvalidAttrValue = exports.InvalidVideoRating = exports.InvalidVideoDescription = exports.InvalidVideoDuration = exports.InvalidVideoFormat = exports.UndefinedTargetFolder = exports.PriorityInvalidError = exports.ChangeFreqInvalidError = exports.NoConfigError = exports.NoURLError = void 0;
/**
* URL in SitemapItem does not exist
*/
class NoURLError extends Error {
constructor(message) {
super(message || 'URL is required');
this.name = 'NoURLError';
Error.captureStackTrace(this, NoURLError);
}
}
exports.NoURLError = NoURLError;
/**
* Config was not passed to SitemapItem constructor
*/
class NoConfigError extends Error {
constructor(message) {
super(message || 'SitemapItem requires a configuration');
this.name = 'NoConfigError';
Error.captureStackTrace(this, NoConfigError);
}
}
exports.NoConfigError = NoConfigError;
/**
* changefreq property in sitemap is invalid
*/
class ChangeFreqInvalidError extends Error {
constructor(url, changefreq) {
super(`${url}: changefreq "${changefreq}" is invalid`);
this.name = 'ChangeFreqInvalidError';
Error.captureStackTrace(this, ChangeFreqInvalidError);
}
}
exports.ChangeFreqInvalidError = ChangeFreqInvalidError;
/**
* priority property in sitemap is invalid
*/
class PriorityInvalidError extends Error {
constructor(url, priority) {
super(`${url}: priority "${priority}" must be a number between 0 and 1 inclusive`);
this.name = 'PriorityInvalidError';
Error.captureStackTrace(this, PriorityInvalidError);
}
}
exports.PriorityInvalidError = PriorityInvalidError;
/**
* SitemapIndex target Folder does not exists
*/
class UndefinedTargetFolder extends Error {
constructor(message) {
super(message || 'Target folder must exist');
this.name = 'UndefinedTargetFolder';
Error.captureStackTrace(this, UndefinedTargetFolder);
}
}
exports.UndefinedTargetFolder = UndefinedTargetFolder;
class InvalidVideoFormat extends Error {
constructor(url) {
super(`${url} video must include thumbnail_loc, title and description fields for videos`);
this.name = 'InvalidVideoFormat';
Error.captureStackTrace(this, InvalidVideoFormat);
}
}
exports.InvalidVideoFormat = InvalidVideoFormat;
class InvalidVideoDuration extends Error {
constructor(url, duration) {
super(`${url} duration "${duration}" must be an integer of seconds between 0 and 28800`);
this.name = 'InvalidVideoDuration';
Error.captureStackTrace(this, InvalidVideoDuration);
}
}
exports.InvalidVideoDuration = InvalidVideoDuration;
class InvalidVideoDescription extends Error {
constructor(url, length) {
const message = `${url}: video description is too long ${length} vs limit of 2048 characters.`;
super(message);
this.name = 'InvalidVideoDescription';
Error.captureStackTrace(this, InvalidVideoDescription);
}
}
exports.InvalidVideoDescription = InvalidVideoDescription;
class InvalidVideoRating extends Error {
constructor(url, title, rating) {
super(`${url}: video "${title}" rating "${rating}" must be between 0 and 5 inclusive`);
this.name = 'InvalidVideoRating';
Error.captureStackTrace(this, InvalidVideoRating);
}
}
exports.InvalidVideoRating = InvalidVideoRating;
class InvalidAttrValue extends Error {
constructor(key, val, validator) {
super('"' +
val +
'" tested against: ' +
validator +
' is not a valid value for attr: "' +
key +
'"');
this.name = 'InvalidAttrValue';
Error.captureStackTrace(this, InvalidAttrValue);
}
}
exports.InvalidAttrValue = InvalidAttrValue;
// InvalidAttr is only thrown when attrbuilder is called incorrectly internally
/* istanbul ignore next */
class InvalidAttr extends Error {
constructor(key) {
super('"' + key + '" is malformed');
this.name = 'InvalidAttr';
Error.captureStackTrace(this, InvalidAttr);
}
}
exports.InvalidAttr = InvalidAttr;
class InvalidNewsFormat extends Error {
constructor(url) {
super(`${url} News must include publication, publication name, publication language, title, and publication_date for news`);
this.name = 'InvalidNewsFormat';
Error.captureStackTrace(this, InvalidNewsFormat);
}
}
exports.InvalidNewsFormat = InvalidNewsFormat;
class InvalidNewsAccessValue extends Error {
constructor(url, access) {
super(`${url} News access "${access}" must be either Registration, Subscription or not be present`);
this.name = 'InvalidNewsAccessValue';
Error.captureStackTrace(this, InvalidNewsAccessValue);
}
}
exports.InvalidNewsAccessValue = InvalidNewsAccessValue;
class XMLLintUnavailable extends Error {
constructor(message) {
super(message || 'xmlLint is not installed. XMLLint is required to validate');
this.name = 'XMLLintUnavailable';
Error.captureStackTrace(this, XMLLintUnavailable);
}
}
exports.XMLLintUnavailable = XMLLintUnavailable;
class InvalidVideoTitle extends Error {
constructor(url, length) {
super(`${url}: video title is too long ${length} vs 100 character limit`);
this.name = 'InvalidVideoTitle';
Error.captureStackTrace(this, InvalidVideoTitle);
}
}
exports.InvalidVideoTitle = InvalidVideoTitle;
class InvalidVideoViewCount extends Error {
constructor(url, count) {
super(`${url}: video view count must be positive, view count was ${count}`);
this.name = 'InvalidVideoViewCount';
Error.captureStackTrace(this, InvalidVideoViewCount);
}
}
exports.InvalidVideoViewCount = InvalidVideoViewCount;
class InvalidVideoTagCount extends Error {
constructor(url, count) {
super(`${url}: video can have no more than 32 tags, this has ${count}`);
this.name = 'InvalidVideoTagCount';
Error.captureStackTrace(this, InvalidVideoTagCount);
}
}
exports.InvalidVideoTagCount = InvalidVideoTagCount;
class InvalidVideoCategory extends Error {
constructor(url, count) {
super(`${url}: video category can only be 256 characters but was passed ${count}`);
this.name = 'InvalidVideoCategory';
Error.captureStackTrace(this, InvalidVideoCategory);
}
}
exports.InvalidVideoCategory = InvalidVideoCategory;
class InvalidVideoFamilyFriendly extends Error {
constructor(url, fam) {
super(`${url}: video family friendly must be yes or no, was passed "${fam}"`);
this.name = 'InvalidVideoFamilyFriendly';
Error.captureStackTrace(this, InvalidVideoFamilyFriendly);
}
}
exports.InvalidVideoFamilyFriendly = InvalidVideoFamilyFriendly;
class InvalidVideoRestriction extends Error {
constructor(url, code) {
super(`${url}: video restriction must be one or more two letter country codes. Was passed "${code}"`);
this.name = 'InvalidVideoRestriction';
Error.captureStackTrace(this, InvalidVideoRestriction);
}
}
exports.InvalidVideoRestriction = InvalidVideoRestriction;
class InvalidVideoRestrictionRelationship extends Error {
constructor(url, val) {
super(`${url}: video restriction relationship must be either allow or deny. Was passed "${val}"`);
this.name = 'InvalidVideoRestrictionRelationship';
Error.captureStackTrace(this, InvalidVideoRestrictionRelationship);
}
}
exports.InvalidVideoRestrictionRelationship = InvalidVideoRestrictionRelationship;
class InvalidVideoPriceType extends Error {
constructor(url, priceType, price) {
super(priceType === undefined && price === ''
? `${url}: video priceType is required when price is not provided`
: `${url}: video price type "${priceType}" is not "rent" or "purchase"`);
this.name = 'InvalidVideoPriceType';
Error.captureStackTrace(this, InvalidVideoPriceType);
}
}
exports.InvalidVideoPriceType = InvalidVideoPriceType;
class InvalidVideoResolution extends Error {
constructor(url, resolution) {
super(`${url}: video price resolution "${resolution}" is not hd or sd`);
this.name = 'InvalidVideoResolution';
Error.captureStackTrace(this, InvalidVideoResolution);
}
}
exports.InvalidVideoResolution = InvalidVideoResolution;
class InvalidVideoPriceCurrency extends Error {
constructor(url, currency) {
super(`${url}: video price currency "${currency}" must be a three capital letter abbrieviation for the country currency`);
this.name = 'InvalidVideoPriceCurrency';
Error.captureStackTrace(this, InvalidVideoPriceCurrency);
}
}
exports.InvalidVideoPriceCurrency = InvalidVideoPriceCurrency;
class EmptyStream extends Error {
constructor() {
super('You have ended the stream before anything was written. streamToPromise MUST be called before ending the stream.');
this.name = 'EmptyStream';
Error.captureStackTrace(this, EmptyStream);
}
}
exports.EmptyStream = EmptyStream;
class EmptySitemap extends Error {
constructor() {
super('You ended the stream without writing anything.');
this.name = 'EmptySitemap';
Error.captureStackTrace(this, EmptyStream);
}
}
exports.EmptySitemap = EmptySitemap;
class InvalidPathError extends Error {
constructor(path, reason) {
super(`Invalid path "${path}": ${reason}`);
this.name = 'InvalidPathError';
Error.captureStackTrace(this, InvalidPathError);
}
}
exports.InvalidPathError = InvalidPathError;
class InvalidHostnameError extends Error {
constructor(hostname, reason) {
super(`Invalid hostname "${hostname}": ${reason}`);
this.name = 'InvalidHostnameError';
Error.captureStackTrace(this, InvalidHostnameError);
}
}
exports.InvalidHostnameError = InvalidHostnameError;
class InvalidLimitError extends Error {
constructor(limit) {
super(`Invalid limit "${limit}": must be a number between 1 and 50000 (per sitemaps.org spec)`);
this.name = 'InvalidLimitError';
Error.captureStackTrace(this, InvalidLimitError);
}
}
exports.InvalidLimitError = InvalidLimitError;
class InvalidPublicBasePathError extends Error {
constructor(publicBasePath, reason) {
super(`Invalid publicBasePath "${publicBasePath}": ${reason}`);
this.name = 'InvalidPublicBasePathError';
Error.captureStackTrace(this, InvalidPublicBasePathError);
}
}
exports.InvalidPublicBasePathError = InvalidPublicBasePathError;
class InvalidXSLUrlError extends Error {
constructor(xslUrl, reason) {
super(`Invalid xslUrl "${xslUrl}": ${reason}`);
this.name = 'InvalidXSLUrlError';
Error.captureStackTrace(this, InvalidXSLUrlError);
}
}
exports.InvalidXSLUrlError = InvalidXSLUrlError;
class InvalidXMLAttributeNameError extends Error {
constructor(attributeName) {
super(`Invalid XML attribute name "${attributeName}": must contain only alphanumeric characters, hyphens, underscores, and colons`);
this.name = 'InvalidXMLAttributeNameError';
Error.captureStackTrace(this, InvalidXMLAttributeNameError);
}
}
exports.InvalidXMLAttributeNameError = InvalidXMLAttributeNameError;

View File

@@ -0,0 +1,55 @@
import type { SAXStream } from 'sax';
import { Readable, Transform, TransformOptions, TransformCallback } from 'node:stream';
import { IndexItem, ErrorLevel } from './types.js';
type Logger = (level: 'warn' | 'error' | 'info' | 'log', ...message: Parameters<Console['log']>) => void;
export interface XMLToSitemapIndexItemStreamOptions extends TransformOptions {
level?: ErrorLevel;
logger?: Logger | false;
}
/**
* Takes a stream of xml and transforms it into a stream of IndexItems
* Use this to parse existing sitemap indices into config options compatible with this library
*/
export declare class XMLToSitemapIndexStream extends Transform {
level: ErrorLevel;
logger: Logger;
error: Error | null;
saxStream: SAXStream;
constructor(opts?: XMLToSitemapIndexItemStreamOptions);
_transform(data: string, encoding: string, callback: TransformCallback): void;
private err;
}
/**
Read xml and resolve with the configuration that would produce it or reject with
an error
```
const { createReadStream } = require('fs')
const { parseSitemapIndex, createSitemap } = require('sitemap')
parseSitemapIndex(createReadStream('./example-index.xml')).then(
// produces the same xml
// you can, of course, more practically modify it or store it
(xmlConfig) => console.log(createSitemap(xmlConfig).toString()),
(err) => console.log(err)
)
```
@param {Readable} xml what to parse
@param {number} maxEntries Maximum number of sitemap entries to parse (default: 50,000 per sitemaps.org spec)
@return {Promise<IndexItem[]>} resolves with list of index items that can be fed into a SitemapIndexStream. Rejects with an Error object.
*/
export declare function parseSitemapIndex(xml: Readable, maxEntries?: number): Promise<IndexItem[]>;
export interface IndexObjectStreamToJSONOptions extends TransformOptions {
lineSeparated: boolean;
}
/**
* A Transform that converts a stream of objects into a JSON Array or a line
* separated stringified JSON
* @param [lineSeparated=false] whether to separate entries by a new line or comma
*/
export declare class IndexObjectStreamToJSON extends Transform {
lineSeparated: boolean;
firstWritten: boolean;
constructor(opts?: IndexObjectStreamToJSONOptions);
_transform(chunk: IndexItem, encoding: string, cb: TransformCallback): void;
_flush(cb: TransformCallback): void;
}
export {};

View File

@@ -0,0 +1,271 @@
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.IndexObjectStreamToJSON = exports.XMLToSitemapIndexStream = void 0;
exports.parseSitemapIndex = parseSitemapIndex;
const sax_1 = __importDefault(require("sax"));
const node_stream_1 = require("node:stream");
const types_js_1 = require("./types.js");
const validation_js_1 = require("./validation.js");
const constants_js_1 = require("./constants.js");
function isValidTagName(tagName) {
// This only works because the enum name and value are the same
return tagName in types_js_1.IndexTagNames;
}
function tagTemplate() {
return {
url: '',
};
}
const defaultLogger = (level, ...message) => console[level](...message);
const defaultStreamOpts = {
logger: defaultLogger,
};
/**
* Takes a stream of xml and transforms it into a stream of IndexItems
* Use this to parse existing sitemap indices into config options compatible with this library
*/
class XMLToSitemapIndexStream extends node_stream_1.Transform {
level;
logger;
error;
saxStream;
constructor(opts = defaultStreamOpts) {
opts.objectMode = true;
super(opts);
this.error = null;
this.saxStream = sax_1.default.createStream(true, {
xmlns: true,
// @ts-expect-error - SAX types don't include strictEntities option
strictEntities: true,
trim: true,
});
this.level = opts.level || types_js_1.ErrorLevel.WARN;
if (this.level !== types_js_1.ErrorLevel.SILENT && opts.logger !== false) {
this.logger = opts.logger ?? defaultLogger;
}
else {
this.logger = () => undefined;
}
let currentItem = tagTemplate();
let currentTag;
this.saxStream.on('opentagstart', (tag) => {
currentTag = tag.name;
});
this.saxStream.on('opentag', (tag) => {
if (!isValidTagName(tag.name)) {
this.logger('warn', 'unhandled tag', tag.name);
this.err(`unhandled tag: ${tag.name}`);
}
});
this.saxStream.on('text', (text) => {
switch (currentTag) {
case types_js_1.IndexTagNames.loc:
// Validate URL for security: prevents protocol injection, checks length limits
try {
(0, validation_js_1.validateURL)(text, 'Sitemap index URL');
currentItem.url = text;
}
catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
this.logger('warn', 'Invalid URL in sitemap index:', errMsg);
this.err(`Invalid URL in sitemap index: ${errMsg}`);
}
break;
case types_js_1.IndexTagNames.lastmod:
// Validate date format for security and spec compliance
if (text && !constants_js_1.LIMITS.ISO_DATE_REGEX.test(text)) {
this.logger('warn', 'Invalid lastmod date format in sitemap index:', text);
this.err(`Invalid lastmod date format: ${text}`);
}
else {
currentItem.lastmod = text;
}
break;
default:
this.logger('log', 'unhandled text for tag:', currentTag, `'${text}'`);
this.err(`unhandled text for tag: ${currentTag} '${text}'`);
break;
}
});
this.saxStream.on('cdata', (text) => {
switch (currentTag) {
case types_js_1.IndexTagNames.loc:
// Validate URL for security: prevents protocol injection, checks length limits
try {
(0, validation_js_1.validateURL)(text, 'Sitemap index URL');
currentItem.url = text;
}
catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
this.logger('warn', 'Invalid URL in sitemap index:', errMsg);
this.err(`Invalid URL in sitemap index: ${errMsg}`);
}
break;
case types_js_1.IndexTagNames.lastmod:
// Validate date format for security and spec compliance
if (text && !constants_js_1.LIMITS.ISO_DATE_REGEX.test(text)) {
this.logger('warn', 'Invalid lastmod date format in sitemap index:', text);
this.err(`Invalid lastmod date format: ${text}`);
}
else {
currentItem.lastmod = text;
}
break;
default:
this.logger('log', 'unhandled cdata for tag:', currentTag);
this.err(`unhandled cdata for tag: ${currentTag}`);
break;
}
});
this.saxStream.on('attribute', (attr) => {
switch (currentTag) {
case types_js_1.IndexTagNames.sitemapindex:
break;
default:
this.logger('log', 'unhandled attr', currentTag, attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
});
this.saxStream.on('closetag', (tag) => {
switch (tag) {
case types_js_1.IndexTagNames.sitemap:
// Only push items with valid URLs (non-empty after validation)
if (currentItem.url) {
this.push(currentItem);
}
currentItem = tagTemplate();
break;
default:
break;
}
});
}
_transform(data, encoding, callback) {
try {
const cb = () => callback(this.level === types_js_1.ErrorLevel.THROW ? this.error : null);
// correcting the type here can be done without making it a breaking change
// TODO fix this
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
if (!this.saxStream.write(data, encoding)) {
this.saxStream.once('drain', cb);
}
else {
process.nextTick(cb);
}
}
catch (error) {
callback(error);
}
}
err(msg) {
if (!this.error)
this.error = new Error(msg);
}
}
exports.XMLToSitemapIndexStream = XMLToSitemapIndexStream;
/**
Read xml and resolve with the configuration that would produce it or reject with
an error
```
const { createReadStream } = require('fs')
const { parseSitemapIndex, createSitemap } = require('sitemap')
parseSitemapIndex(createReadStream('./example-index.xml')).then(
// produces the same xml
// you can, of course, more practically modify it or store it
(xmlConfig) => console.log(createSitemap(xmlConfig).toString()),
(err) => console.log(err)
)
```
@param {Readable} xml what to parse
@param {number} maxEntries Maximum number of sitemap entries to parse (default: 50,000 per sitemaps.org spec)
@return {Promise<IndexItem[]>} resolves with list of index items that can be fed into a SitemapIndexStream. Rejects with an Error object.
*/
async function parseSitemapIndex(xml, maxEntries = constants_js_1.LIMITS.MAX_SITEMAP_ITEM_LIMIT) {
const urls = [];
return new Promise((resolve, reject) => {
let settled = false;
const parser = new XMLToSitemapIndexStream();
// Handle source stream errors (prevents unhandled error events on xml)
xml.on('error', (error) => {
if (!settled) {
settled = true;
reject(error);
}
});
xml
.pipe(parser)
.on('data', (smi) => {
if (settled)
return;
// Security: Prevent memory exhaustion by limiting number of entries
if (urls.length >= maxEntries) {
settled = true;
reject(new Error(`Sitemap index exceeds maximum allowed entries (${maxEntries})`));
// Immediately destroy both streams to stop further processing (BB-05)
parser.destroy();
xml.destroy();
return;
}
urls.push(smi);
})
.on('end', () => {
if (!settled) {
settled = true;
resolve(urls);
}
})
.on('error', (error) => {
if (!settled) {
settled = true;
reject(error);
}
});
});
}
const defaultObjectStreamOpts = {
lineSeparated: false,
};
/**
* A Transform that converts a stream of objects into a JSON Array or a line
* separated stringified JSON
* @param [lineSeparated=false] whether to separate entries by a new line or comma
*/
class IndexObjectStreamToJSON extends node_stream_1.Transform {
lineSeparated;
firstWritten;
constructor(opts = defaultObjectStreamOpts) {
opts.writableObjectMode = true;
super(opts);
this.lineSeparated = opts.lineSeparated;
this.firstWritten = false;
}
_transform(chunk, encoding, cb) {
if (!this.firstWritten) {
this.firstWritten = true;
if (!this.lineSeparated) {
this.push('[');
}
}
else if (this.lineSeparated) {
this.push('\n');
}
else {
this.push(',');
}
if (chunk) {
this.push(JSON.stringify(chunk));
}
cb();
}
_flush(cb) {
if (!this.lineSeparated) {
this.push(']');
}
cb();
}
}
exports.IndexObjectStreamToJSON = IndexObjectStreamToJSON;

View File

@@ -0,0 +1,169 @@
import { WriteStream } from 'node:fs';
import { Transform, TransformOptions, TransformCallback } from 'node:stream';
import { IndexItem, SitemapItemLoose, ErrorLevel, IndexTagNames } from './types.js';
import { SitemapStream } from './sitemap-stream.js';
export { IndexTagNames };
/**
* Options for the SitemapIndexStream
*/
export interface SitemapIndexStreamOptions extends TransformOptions {
/**
* Whether to output the lastmod date only (no time)
*
* @default false
*/
lastmodDateOnly?: boolean;
/**
* How to handle errors in passed in urls
*
* @default ErrorLevel.WARN
*/
level?: ErrorLevel;
/**
* URL to an XSL stylesheet to include in the XML
*/
xslUrl?: string;
}
/**
* `SitemapIndexStream` is a Transform stream that takes `IndexItem`s or sitemap URL strings and outputs a stream of sitemap index XML.
*
* It automatically handles the XML declaration and the opening and closing tags for the sitemap index.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @extends {Transform}
*/
export declare class SitemapIndexStream extends Transform {
lastmodDateOnly: boolean;
level: ErrorLevel;
xslUrl?: string;
private hasHeadOutput;
/**
* `SitemapIndexStream` is a Transform stream that takes `IndexItem`s or sitemap URL strings and outputs a stream of sitemap index XML.
*
* It automatically handles the XML declaration and the opening and closing tags for the sitemap index.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @param {SitemapIndexStreamOptions} [opts=defaultStreamOpts] - Stream options.
*/
constructor(opts?: SitemapIndexStreamOptions);
private writeHeadOutput;
_transform(item: IndexItem | string, encoding: string, callback: TransformCallback): void;
_flush(cb: TransformCallback): void;
}
/**
* Callback function type for creating new sitemap streams when the item limit is reached.
*
* This function is called by SitemapAndIndexStream to create a new sitemap file when
* the current one reaches the item limit.
*
* @param i - The zero-based index of the sitemap file being created (0 for first sitemap,
* 1 for second, etc.)
* @returns A tuple containing:
* - [0]: IndexItem or URL string to add to the sitemap index
* - [1]: SitemapStream instance for writing sitemap items
* - [2]: WriteStream where the sitemap will be piped (the stream will be
* awaited for 'finish' before creating the next sitemap)
*
* @example
* ```typescript
* const getSitemapStream = (i: number) => {
* const sitemapStream = new SitemapStream();
* const path = `./sitemap-${i}.xml`;
* const writeStream = createWriteStream(path);
* sitemapStream.pipe(writeStream);
* return [`https://example.com/${path}`, sitemapStream, writeStream];
* };
* ```
*/
type getSitemapStreamFunc = (i: number) => [IndexItem | string, SitemapStream, WriteStream];
/**
* Options for the SitemapAndIndexStream
*
* @extends {SitemapIndexStreamOptions}
*/
export interface SitemapAndIndexStreamOptions extends SitemapIndexStreamOptions {
/**
* Max number of items in each sitemap XML file.
*
* When the limit is reached the current sitemap file will be closed,
* a wait for `finish` on the target write stream will happen,
* and a new sitemap file will be created.
*
* Range: 1 - 50,000
*
* @default 45000
*/
limit?: number;
/**
* Callback for SitemapIndexAndStream that creates a new sitemap stream for a given sitemap index.
*
* Called when a new sitemap file is needed.
*
* The write stream is the destination where the sitemap was piped.
* SitemapAndIndexStream will wait for the `finish` event on each sitemap's
* write stream before moving on to the next sitemap. This ensures that the
* contents of the write stream will be fully written before being used
* by any following operations (e.g. uploading, reading contents for unit tests).
*
* @param i - The index of the sitemap file
* @returns A tuple containing the index item to be written into the sitemap index, the sitemap stream, and the write stream for the sitemap pipe destination
*/
getSitemapStream: getSitemapStreamFunc;
}
/**
* `SitemapAndIndexStream` is a Transform stream that takes in sitemap items,
* writes them to sitemap files, adds the sitemap files to a sitemap index,
* and creates new sitemap files when the count limit is reached.
*
* It waits for the target stream of the current sitemap file to finish before
* moving on to the next if the target stream is returned by the `getSitemapStream`
* callback in the 3rd position of the tuple.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @extends {SitemapIndexStream}
*/
export declare class SitemapAndIndexStream extends SitemapIndexStream {
private itemsWritten;
private getSitemapStream;
private currentSitemap?;
private limit;
private currentSitemapPipeline?;
/**
* Flag to prevent race conditions when creating new sitemap files.
* Set to true while waiting for the current sitemap to finish and
* a new one to be created.
*/
private isCreatingSitemap;
/**
* `SitemapAndIndexStream` is a Transform stream that takes in sitemap items,
* writes them to sitemap files, adds the sitemap files to a sitemap index,
* and creates new sitemap files when the count limit is reached.
*
* It waits for the target stream of the current sitemap file to finish before
* moving on to the next if the target stream is returned by the `getSitemapStream`
* callback in the 3rd position of the tuple.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @param {SitemapAndIndexStreamOptions} opts - Stream options.
*/
constructor(opts: SitemapAndIndexStreamOptions);
_transform(item: SitemapItemLoose, encoding: string, callback: TransformCallback): void;
private writeItem;
/**
* Called when the stream is finished.
* If there is a current sitemap, we wait for it to finish before calling the callback.
* Includes proper event listener cleanup to prevent memory leaks.
*
* @param cb - The callback to invoke when flushing is complete
*/
_flush(cb: TransformCallback): void;
private createSitemap;
}

View File

@@ -0,0 +1,363 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.SitemapAndIndexStream = exports.SitemapIndexStream = exports.IndexTagNames = void 0;
const node_stream_1 = require("node:stream");
const types_js_1 = require("./types.js");
Object.defineProperty(exports, "IndexTagNames", { enumerable: true, get: function () { return types_js_1.IndexTagNames; } });
const sitemap_stream_js_1 = require("./sitemap-stream.js");
const sitemap_xml_js_1 = require("./sitemap-xml.js");
const constants_js_1 = require("./constants.js");
const validation_js_1 = require("./validation.js");
const xmlDec = '<?xml version="1.0" encoding="UTF-8"?>';
const sitemapIndexTagStart = '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">';
const closetag = '</sitemapindex>';
const defaultStreamOpts = {};
/**
* `SitemapIndexStream` is a Transform stream that takes `IndexItem`s or sitemap URL strings and outputs a stream of sitemap index XML.
*
* It automatically handles the XML declaration and the opening and closing tags for the sitemap index.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @extends {Transform}
*/
class SitemapIndexStream extends node_stream_1.Transform {
lastmodDateOnly;
level;
xslUrl;
hasHeadOutput;
/**
* `SitemapIndexStream` is a Transform stream that takes `IndexItem`s or sitemap URL strings and outputs a stream of sitemap index XML.
*
* It automatically handles the XML declaration and the opening and closing tags for the sitemap index.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @param {SitemapIndexStreamOptions} [opts=defaultStreamOpts] - Stream options.
*/
constructor(opts = defaultStreamOpts) {
opts.objectMode = true;
super(opts);
this.hasHeadOutput = false;
this.lastmodDateOnly = opts.lastmodDateOnly || false;
this.level = opts.level ?? types_js_1.ErrorLevel.WARN;
if (opts.xslUrl !== undefined) {
(0, validation_js_1.validateXSLUrl)(opts.xslUrl);
}
this.xslUrl = opts.xslUrl;
}
writeHeadOutput() {
this.hasHeadOutput = true;
let stylesheet = '';
if (this.xslUrl) {
stylesheet = (0, sitemap_stream_js_1.stylesheetInclude)(this.xslUrl);
}
this.push(xmlDec + stylesheet + sitemapIndexTagStart);
}
_transform(item, encoding, callback) {
if (!this.hasHeadOutput) {
this.writeHeadOutput();
}
try {
// Validate URL using centralized validation (checks protocol, length, format)
const url = typeof item === 'string' ? item : item.url;
if (!url || typeof url !== 'string') {
const error = new Error('Invalid sitemap index item: URL must be a non-empty string');
if (this.level === types_js_1.ErrorLevel.THROW) {
callback(error);
return;
}
else if (this.level === types_js_1.ErrorLevel.WARN) {
console.warn(error.message, item);
}
// For SILENT or after WARN, skip this item
callback();
return;
}
// Security: Use centralized validation to enforce protocol restrictions,
// length limits, and prevent injection attacks
try {
(0, validation_js_1.validateURL)(url, 'Sitemap index URL');
}
catch (error) {
// Wrap the validation error with consistent message format
const validationMsg = error instanceof Error ? error.message : String(error);
const err = new Error(`Invalid URL in sitemap index: ${validationMsg}`);
if (this.level === types_js_1.ErrorLevel.THROW) {
callback(err);
return;
}
else if (this.level === types_js_1.ErrorLevel.WARN) {
console.warn(err.message);
}
// For SILENT or after WARN, skip this item
callback();
return;
}
this.push((0, sitemap_xml_js_1.otag)(types_js_1.IndexTagNames.sitemap));
if (typeof item === 'string') {
this.push((0, sitemap_xml_js_1.element)(types_js_1.IndexTagNames.loc, item));
}
else {
this.push((0, sitemap_xml_js_1.element)(types_js_1.IndexTagNames.loc, item.url));
if (item.lastmod) {
try {
const lastmod = new Date(item.lastmod).toISOString();
this.push((0, sitemap_xml_js_1.element)(types_js_1.IndexTagNames.lastmod, this.lastmodDateOnly ? lastmod.slice(0, 10) : lastmod));
}
catch {
const error = new Error(`Invalid lastmod date in sitemap index: ${item.lastmod}`);
if (this.level === types_js_1.ErrorLevel.THROW) {
callback(error);
return;
}
else if (this.level === types_js_1.ErrorLevel.WARN) {
console.warn(error.message);
}
// Continue without lastmod for SILENT or after WARN
}
}
}
this.push((0, sitemap_xml_js_1.ctag)(types_js_1.IndexTagNames.sitemap));
callback();
}
catch (error) {
callback(error instanceof Error ? error : new Error(String(error)));
}
}
_flush(cb) {
if (!this.hasHeadOutput) {
this.writeHeadOutput();
}
this.push(closetag);
cb();
}
}
exports.SitemapIndexStream = SitemapIndexStream;
/**
* `SitemapAndIndexStream` is a Transform stream that takes in sitemap items,
* writes them to sitemap files, adds the sitemap files to a sitemap index,
* and creates new sitemap files when the count limit is reached.
*
* It waits for the target stream of the current sitemap file to finish before
* moving on to the next if the target stream is returned by the `getSitemapStream`
* callback in the 3rd position of the tuple.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @extends {SitemapIndexStream}
*/
class SitemapAndIndexStream extends SitemapIndexStream {
itemsWritten;
getSitemapStream;
currentSitemap;
limit;
currentSitemapPipeline;
/**
* Flag to prevent race conditions when creating new sitemap files.
* Set to true while waiting for the current sitemap to finish and
* a new one to be created.
*/
isCreatingSitemap;
/**
* `SitemapAndIndexStream` is a Transform stream that takes in sitemap items,
* writes them to sitemap files, adds the sitemap files to a sitemap index,
* and creates new sitemap files when the count limit is reached.
*
* It waits for the target stream of the current sitemap file to finish before
* moving on to the next if the target stream is returned by the `getSitemapStream`
* callback in the 3rd position of the tuple.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @param {SitemapAndIndexStreamOptions} opts - Stream options.
*/
constructor(opts) {
opts.objectMode = true;
super(opts);
this.itemsWritten = 0;
this.getSitemapStream = opts.getSitemapStream;
this.limit = opts.limit ?? constants_js_1.DEFAULT_SITEMAP_ITEM_LIMIT;
this.isCreatingSitemap = false;
// Validate limit is within acceptable range per sitemaps.org spec
// See: https://www.sitemaps.org/protocol.html#index
if (this.limit < constants_js_1.LIMITS.MIN_SITEMAP_ITEM_LIMIT ||
this.limit > constants_js_1.LIMITS.MAX_SITEMAP_ITEM_LIMIT) {
throw new Error(`limit must be between ${constants_js_1.LIMITS.MIN_SITEMAP_ITEM_LIMIT} and ${constants_js_1.LIMITS.MAX_SITEMAP_ITEM_LIMIT} per sitemaps.org spec, got ${this.limit}`);
}
}
_transform(item, encoding, callback) {
if (this.itemsWritten % this.limit === 0) {
// Prevent race condition if multiple items arrive during sitemap creation
if (this.isCreatingSitemap) {
// Wait and retry on next tick
process.nextTick(() => this._transform(item, encoding, callback));
return;
}
if (this.currentSitemap) {
this.isCreatingSitemap = true;
const currentSitemap = this.currentSitemap;
const currentPipeline = this.currentSitemapPipeline;
// Set up promises with proper cleanup to prevent memory leaks
const onFinish = new Promise((resolve, reject) => {
const finishHandler = () => {
currentSitemap.off('error', errorHandler);
resolve();
};
const errorHandler = (err) => {
currentSitemap.off('finish', finishHandler);
reject(err);
};
currentSitemap.on('finish', finishHandler);
currentSitemap.on('error', errorHandler);
currentSitemap.end();
});
const onPipelineFinish = currentPipeline
? new Promise((resolve, reject) => {
const finishHandler = () => {
currentPipeline.off('error', errorHandler);
resolve();
};
const errorHandler = (err) => {
currentPipeline.off('finish', finishHandler);
reject(err);
};
currentPipeline.on('finish', finishHandler);
currentPipeline.on('error', errorHandler);
})
: Promise.resolve();
Promise.all([onFinish, onPipelineFinish])
.then(() => {
this.isCreatingSitemap = false;
this.createSitemap(encoding);
this.writeItem(item, callback);
})
.catch((err) => {
this.isCreatingSitemap = false;
callback(err);
});
return;
}
else {
this.createSitemap(encoding);
}
}
this.writeItem(item, callback);
}
writeItem(item, callback) {
if (!this.currentSitemap) {
callback(new Error('No sitemap stream available'));
return;
}
if (!this.currentSitemap.write(item)) {
this.currentSitemap.once('drain', callback);
}
else {
process.nextTick(callback);
}
// Increment the count of items written
this.itemsWritten++;
}
/**
* Called when the stream is finished.
* If there is a current sitemap, we wait for it to finish before calling the callback.
* Includes proper event listener cleanup to prevent memory leaks.
*
* @param cb - The callback to invoke when flushing is complete
*/
_flush(cb) {
const currentSitemap = this.currentSitemap;
const currentPipeline = this.currentSitemapPipeline;
const onFinish = new Promise((resolve, reject) => {
if (currentSitemap) {
const finishHandler = () => {
currentSitemap.off('error', errorHandler);
resolve();
};
const errorHandler = (err) => {
currentSitemap.off('finish', finishHandler);
reject(err);
};
currentSitemap.on('finish', finishHandler);
currentSitemap.on('error', errorHandler);
currentSitemap.end();
}
else {
resolve();
}
});
const onPipelineFinish = new Promise((resolve, reject) => {
if (currentPipeline) {
const finishHandler = () => {
currentPipeline.off('error', errorHandler);
resolve();
};
const errorHandler = (err) => {
currentPipeline.off('finish', finishHandler);
reject(err);
};
currentPipeline.on('finish', finishHandler);
currentPipeline.on('error', errorHandler);
// The pipeline (pipe target) will get its end() call
// from the sitemap stream ending.
}
else {
resolve();
}
});
Promise.all([onFinish, onPipelineFinish])
.then(() => {
super._flush(cb);
})
.catch((err) => {
cb(err);
});
}
createSitemap(encoding) {
const sitemapIndex = this.itemsWritten / this.limit;
let result;
try {
result = this.getSitemapStream(sitemapIndex);
}
catch (err) {
this.emit('error', new Error(`getSitemapStream callback threw an error for index ${sitemapIndex}: ${err instanceof Error ? err.message : String(err)}`));
return;
}
// Validate the return value
if (!Array.isArray(result) || result.length !== 3) {
this.emit('error', new Error(`getSitemapStream must return a 3-element array [IndexItem | string, SitemapStream, WriteStream], got: ${typeof result}`));
return;
}
const [idxItem, currentSitemap, currentSitemapPipeline] = result;
// Validate each element
if (!idxItem ||
(typeof idxItem !== 'string' && typeof idxItem !== 'object')) {
this.emit('error', new Error('getSitemapStream must return an IndexItem or string as the first element'));
return;
}
if (!currentSitemap || typeof currentSitemap.write !== 'function') {
this.emit('error', new Error('getSitemapStream must return a SitemapStream as the second element'));
return;
}
if (currentSitemapPipeline &&
typeof currentSitemapPipeline.write !== 'function') {
this.emit('error', new Error('getSitemapStream must return a WriteStream or undefined as the third element'));
return;
}
// Propagate errors from the sitemap stream
currentSitemap.on('error', (err) => this.emit('error', err));
this.currentSitemap = currentSitemap;
this.currentSitemapPipeline = currentSitemapPipeline;
super._transform(idxItem, encoding, () => {
// We are not too concerned about waiting for the index item to be written
// as we'll wait for the file to finish at the end, and index file write
// volume tends to be small in comparison to sitemap writes.
// noop
});
}
}
exports.SitemapAndIndexStream = SitemapAndIndexStream;

View File

@@ -0,0 +1,21 @@
import { Transform, TransformOptions, TransformCallback } from 'node:stream';
import { SitemapItem, ErrorLevel } from './types.js';
export interface SitemapItemStreamOptions extends TransformOptions {
level?: ErrorLevel;
}
/**
* Takes a stream of SitemapItemOptions and spits out xml for each
* @example
* // writes <url><loc>https://example.com</loc><url><url><loc>https://example.com/2</loc><url>
* const smis = new SitemapItemStream({level: 'warn'})
* smis.pipe(writestream)
* smis.write({url: 'https://example.com', img: [], video: [], links: []})
* smis.write({url: 'https://example.com/2', img: [], video: [], links: []})
* smis.end()
* @param level - Error level
*/
export declare class SitemapItemStream extends Transform {
level: ErrorLevel;
constructor(opts?: SitemapItemStreamOptions);
_transform(item: SitemapItem, encoding: string, callback: TransformCallback): void;
}

View File

@@ -0,0 +1,208 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.SitemapItemStream = void 0;
const node_stream_1 = require("node:stream");
const errors_js_1 = require("./errors.js");
const types_js_1 = require("./types.js");
const sitemap_xml_js_1 = require("./sitemap-xml.js");
/**
* Builds an attributes object for XML elements from configuration object
* Extracts attributes based on colon-delimited keys (e.g., 'price:currency' -> { currency: value })
*
* @param conf - Configuration object containing attribute values
* @param keys - Single key or array of keys in format 'namespace:attribute'
* @returns Record of attribute names to string values (may contain non-string values from conf)
* @throws {InvalidAttr} When key format is invalid (must contain exactly one colon)
*
* @example
* attrBuilder({ 'price:currency': 'USD', 'price:type': 'rent' }, ['price:currency', 'price:type'])
* // Returns: { currency: 'USD', type: 'rent' }
*/
function attrBuilder(conf, keys) {
if (typeof keys === 'string') {
keys = [keys];
}
const iv = {};
return keys.reduce((attrs, key) => {
if (conf[key] !== undefined) {
const keyAr = key.split(':');
if (keyAr.length !== 2) {
throw new errors_js_1.InvalidAttr(key);
}
attrs[keyAr[1]] = conf[key];
}
return attrs;
}, iv);
}
/**
* Takes a stream of SitemapItemOptions and spits out xml for each
* @example
* // writes <url><loc>https://example.com</loc><url><url><loc>https://example.com/2</loc><url>
* const smis = new SitemapItemStream({level: 'warn'})
* smis.pipe(writestream)
* smis.write({url: 'https://example.com', img: [], video: [], links: []})
* smis.write({url: 'https://example.com/2', img: [], video: [], links: []})
* smis.end()
* @param level - Error level
*/
class SitemapItemStream extends node_stream_1.Transform {
level;
constructor(opts = { level: types_js_1.ErrorLevel.WARN }) {
opts.objectMode = true;
super(opts);
this.level = opts.level || types_js_1.ErrorLevel.WARN;
}
_transform(item, encoding, callback) {
this.push((0, sitemap_xml_js_1.otag)(types_js_1.TagNames.url));
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames.loc, item.url));
if (item.lastmod) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames.lastmod, item.lastmod));
}
if (item.changefreq) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames.changefreq, item.changefreq));
}
if (item.priority !== undefined && item.priority !== null) {
if (item.fullPrecisionPriority) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames.priority, item.priority.toString()));
}
else {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames.priority, item.priority.toFixed(1)));
}
}
item.video.forEach((video) => {
this.push((0, sitemap_xml_js_1.otag)(types_js_1.TagNames['video:video']));
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:thumbnail_loc'], video.thumbnail_loc));
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:title'], video.title));
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:description'], video.description));
if (video.content_loc) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:content_loc'], video.content_loc));
}
if (video.player_loc) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:player_loc'], attrBuilder(video, [
'player_loc:autoplay',
'player_loc:allow_embed',
]), video.player_loc));
}
if (video.duration) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:duration'], video.duration.toString()));
}
if (video.expiration_date) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:expiration_date'], video.expiration_date));
}
if (video.rating !== undefined) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:rating'], video.rating.toString()));
}
if (video.view_count !== undefined) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:view_count'], String(video.view_count)));
}
if (video.publication_date) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:publication_date'], video.publication_date));
}
if (video.tag && video.tag.length > 0) {
for (const tag of video.tag) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:tag'], tag));
}
}
if (video.category) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:category'], video.category));
}
if (video.family_friendly) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:family_friendly'], video.family_friendly));
}
if (video.restriction) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:restriction'], attrBuilder(video, 'restriction:relationship'), video.restriction));
}
if (video.gallery_loc) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:gallery_loc'], attrBuilder(video, 'gallery_loc:title'), video.gallery_loc));
}
if (video.price) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:price'], attrBuilder(video, [
'price:resolution',
'price:currency',
'price:type',
]), video.price));
}
if (video.requires_subscription) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:requires_subscription'], video.requires_subscription));
}
if (video.uploader) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:uploader'], attrBuilder(video, 'uploader:info'), video.uploader));
}
if (video.platform) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:platform'], attrBuilder(video, 'platform:relationship'), video.platform));
}
if (video.live) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:live'], video.live));
}
if (video.id) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['video:id'], { type: 'url' }, video.id));
}
this.push((0, sitemap_xml_js_1.ctag)(types_js_1.TagNames['video:video']));
});
item.links.forEach((link) => {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['xhtml:link'], {
rel: 'alternate',
hreflang: link.lang || link.hreflang,
href: link.url,
}));
});
if (item.expires) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames.expires, new Date(item.expires).toISOString()));
}
if (item.androidLink) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['xhtml:link'], {
rel: 'alternate',
href: item.androidLink,
}));
}
if (item.ampLink) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['xhtml:link'], {
rel: 'amphtml',
href: item.ampLink,
}));
}
if (item.news) {
this.push((0, sitemap_xml_js_1.otag)(types_js_1.TagNames['news:news']));
this.push((0, sitemap_xml_js_1.otag)(types_js_1.TagNames['news:publication']));
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['news:name'], item.news.publication.name));
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['news:language'], item.news.publication.language));
this.push((0, sitemap_xml_js_1.ctag)(types_js_1.TagNames['news:publication']));
if (item.news.access) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['news:access'], item.news.access));
}
if (item.news.genres) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['news:genres'], item.news.genres));
}
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['news:publication_date'], item.news.publication_date));
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['news:title'], item.news.title));
if (item.news.keywords) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['news:keywords'], item.news.keywords));
}
if (item.news.stock_tickers) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['news:stock_tickers'], item.news.stock_tickers));
}
this.push((0, sitemap_xml_js_1.ctag)(types_js_1.TagNames['news:news']));
}
// Image handling
item.img.forEach((image) => {
this.push((0, sitemap_xml_js_1.otag)(types_js_1.TagNames['image:image']));
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['image:loc'], image.url));
if (image.caption) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['image:caption'], image.caption));
}
if (image.geoLocation) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['image:geo_location'], image.geoLocation));
}
if (image.title) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['image:title'], image.title));
}
if (image.license) {
this.push((0, sitemap_xml_js_1.element)(types_js_1.TagNames['image:license'], image.license));
}
this.push((0, sitemap_xml_js_1.ctag)(types_js_1.TagNames['image:image']));
});
this.push((0, sitemap_xml_js_1.ctag)(types_js_1.TagNames.url));
callback();
}
}
exports.SitemapItemStream = SitemapItemStream;

62
node_modules/sitemap/dist/cjs/lib/sitemap-parser.d.ts generated vendored Normal file
View File

@@ -0,0 +1,62 @@
import type { SAXStream } from 'sax';
import { Readable, Transform, TransformOptions, TransformCallback } from 'node:stream';
import { SitemapItem, ErrorLevel } from './types.js';
type Logger = (level: 'warn' | 'error' | 'info' | 'log', ...message: Parameters<Console['log']>[0]) => void;
export interface XMLToSitemapItemStreamOptions extends TransformOptions {
level?: ErrorLevel;
logger?: Logger | false;
}
/**
* Takes a stream of xml and transforms it into a stream of SitemapItems
* Use this to parse existing sitemaps into config options compatible with this library
*/
export declare class XMLToSitemapItemStream extends Transform {
level: ErrorLevel;
logger: Logger;
/**
* Errors encountered during parsing, capped at LIMITS.MAX_PARSER_ERRORS entries
* to prevent memory DoS from malformed XML (BB-03).
* Use errorCount for the total number of errors regardless of the cap.
*/
errors: Error[];
/** Total number of errors seen, including those beyond the stored cap. */
errorCount: number;
saxStream: SAXStream;
urlCount: number;
constructor(opts?: XMLToSitemapItemStreamOptions);
_transform(data: string, encoding: string, callback: TransformCallback): void;
private err;
}
/**
Read xml and resolve with the configuration that would produce it or reject with
an error
```
const { createReadStream } = require('fs')
const { parseSitemap, createSitemap } = require('sitemap')
parseSitemap(createReadStream('./example.xml')).then(
// produces the same xml
// you can, of course, more practically modify it or store it
(xmlConfig) => console.log(createSitemap(xmlConfig).toString()),
(err) => console.log(err)
)
```
@param {Readable} xml what to parse
@return {Promise<SitemapItem[]>} resolves with list of sitemap items that can be fed into a SitemapStream. Rejects with an Error object.
*/
export declare function parseSitemap(xml: Readable): Promise<SitemapItem[]>;
export interface ObjectStreamToJSONOptions extends TransformOptions {
lineSeparated: boolean;
}
/**
* A Transform that converts a stream of objects into a JSON Array or a line
* separated stringified JSON
* @param [lineSeparated=false] whether to separate entries by a new line or comma
*/
export declare class ObjectStreamToJSON extends Transform {
lineSeparated: boolean;
firstWritten: boolean;
constructor(opts?: ObjectStreamToJSONOptions);
_transform(chunk: SitemapItem, encoding: string, cb: TransformCallback): void;
_flush(cb: TransformCallback): void;
}
export {};

788
node_modules/sitemap/dist/cjs/lib/sitemap-parser.js generated vendored Normal file
View File

@@ -0,0 +1,788 @@
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.ObjectStreamToJSON = exports.XMLToSitemapItemStream = void 0;
exports.parseSitemap = parseSitemap;
const sax_1 = __importDefault(require("sax"));
const node_stream_1 = require("node:stream");
const types_js_1 = require("./types.js");
const validation_js_1 = require("./validation.js");
const constants_js_1 = require("./constants.js");
function isValidTagName(tagName) {
// This only works because the enum name and value are the same
return tagName in types_js_1.TagNames;
}
function getAttrValue(attr) {
if (!attr)
return undefined;
return typeof attr === 'string' ? attr : attr.value;
}
function tagTemplate() {
return {
img: [],
video: [],
links: [],
url: '',
};
}
function videoTemplate() {
return {
tag: [],
thumbnail_loc: '',
title: '',
description: '',
};
}
const imageTemplate = {
url: '',
};
const linkTemplate = {
lang: '',
url: '',
};
function newsTemplate() {
return {
publication: { name: '', language: '' },
publication_date: '',
title: '',
};
}
const defaultLogger = (level, ...message) => console[level](...message);
const defaultStreamOpts = {
logger: defaultLogger,
};
// TODO does this need to end with `options`
/**
* Takes a stream of xml and transforms it into a stream of SitemapItems
* Use this to parse existing sitemaps into config options compatible with this library
*/
class XMLToSitemapItemStream extends node_stream_1.Transform {
level;
logger;
/**
* Errors encountered during parsing, capped at LIMITS.MAX_PARSER_ERRORS entries
* to prevent memory DoS from malformed XML (BB-03).
* Use errorCount for the total number of errors regardless of the cap.
*/
errors;
/** Total number of errors seen, including those beyond the stored cap. */
errorCount;
saxStream;
urlCount;
constructor(opts = defaultStreamOpts) {
opts.objectMode = true;
super(opts);
this.errors = [];
this.errorCount = 0;
this.urlCount = 0;
this.saxStream = sax_1.default.createStream(true, {
xmlns: true,
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
strictEntities: true,
trim: true,
});
this.level = opts.level || types_js_1.ErrorLevel.WARN;
if (this.level !== types_js_1.ErrorLevel.SILENT && opts.logger !== false) {
this.logger = opts.logger ?? defaultLogger;
}
else {
this.logger = () => undefined;
}
let currentItem = tagTemplate();
let currentTag;
let currentVideo = videoTemplate();
let currentImage = { ...imageTemplate };
let currentLink = { ...linkTemplate };
let dontpushCurrentLink = false;
this.saxStream.on('opentagstart', (tag) => {
currentTag = tag.name;
if (currentTag.startsWith('news:') && !currentItem.news) {
currentItem.news = newsTemplate();
}
});
this.saxStream.on('opentag', (tag) => {
if (isValidTagName(tag.name)) {
if (tag.name === 'xhtml:link') {
// SAX returns attributes as objects with {name, value, prefix, local, uri}
// Check if required attributes exist and have values
const rel = getAttrValue(tag.attributes.rel);
const href = getAttrValue(tag.attributes.href);
const hreflang = getAttrValue(tag.attributes.hreflang);
if (!rel || !href) {
this.logger('warn', 'xhtml:link missing required rel or href attribute');
this.err('xhtml:link missing required rel or href attribute');
return;
}
if (rel === 'alternate' && hreflang) {
currentLink.url = href;
currentLink.lang = hreflang;
}
else if (rel === 'alternate') {
dontpushCurrentLink = true;
currentItem.androidLink = href;
}
else if (rel === 'amphtml') {
dontpushCurrentLink = true;
currentItem.ampLink = href;
}
else {
this.logger('log', 'unhandled attr for xhtml:link', tag.attributes);
this.err(`unhandled attr for xhtml:link ${JSON.stringify(tag.attributes)}`);
}
}
}
else {
this.logger('warn', 'unhandled tag', tag.name);
this.err(`unhandled tag: ${tag.name}`);
}
});
this.saxStream.on('text', (text) => {
switch (currentTag) {
case 'mobile:mobile':
break;
case types_js_1.TagNames.loc:
// Validate URL
if (text.length > constants_js_1.LIMITS.MAX_URL_LENGTH) {
this.logger('warn', `URL exceeds max length of ${constants_js_1.LIMITS.MAX_URL_LENGTH}: ${text.substring(0, 100)}...`);
this.err(`URL exceeds max length of ${constants_js_1.LIMITS.MAX_URL_LENGTH}`);
}
else if (!constants_js_1.LIMITS.URL_PROTOCOL_REGEX.test(text)) {
this.logger('warn', `URL must start with http:// or https://: ${text}`);
this.err(`URL must start with http:// or https://: ${text}`);
}
else {
currentItem.url = text;
}
break;
case types_js_1.TagNames.changefreq:
if ((0, validation_js_1.isValidChangeFreq)(text)) {
currentItem.changefreq = text;
}
break;
case types_js_1.TagNames.priority:
{
const priority = parseFloat(text);
if (isNaN(priority) ||
!isFinite(priority) ||
priority < 0 ||
priority > 1) {
this.logger('warn', `Invalid priority "${text}" - must be between 0 and 1`);
this.err(`Invalid priority "${text}" - must be between 0 and 1`);
}
else {
currentItem.priority = priority;
}
}
break;
case types_js_1.TagNames.lastmod:
if (constants_js_1.LIMITS.ISO_DATE_REGEX.test(text)) {
currentItem.lastmod = text;
}
else {
this.logger('warn', `Invalid lastmod date format "${text}" - expected ISO 8601 format`);
this.err(`Invalid lastmod date format "${text}" - expected ISO 8601 format`);
}
break;
case types_js_1.TagNames['video:thumbnail_loc']:
currentVideo.thumbnail_loc = text;
break;
case types_js_1.TagNames['video:tag']:
if (currentVideo.tag.length < constants_js_1.LIMITS.MAX_TAGS_PER_VIDEO) {
currentVideo.tag.push(text);
}
else {
this.logger('warn', `video has too many tags (max ${constants_js_1.LIMITS.MAX_TAGS_PER_VIDEO})`);
this.err(`video has too many tags (max ${constants_js_1.LIMITS.MAX_TAGS_PER_VIDEO})`);
}
break;
case types_js_1.TagNames['video:duration']:
{
const duration = parseInt(text, 10);
if (isNaN(duration) ||
!isFinite(duration) ||
duration < 0 ||
duration > 28800) {
this.logger('warn', `Invalid video duration "${text}" - must be between 0 and 28800 seconds`);
this.err(`Invalid video duration "${text}" - must be between 0 and 28800 seconds`);
}
else {
currentVideo.duration = duration;
}
}
break;
case types_js_1.TagNames['video:player_loc']:
currentVideo.player_loc = text;
break;
case types_js_1.TagNames['video:content_loc']:
currentVideo.content_loc = text;
break;
case types_js_1.TagNames['video:requires_subscription']:
if ((0, validation_js_1.isValidYesNo)(text)) {
currentVideo.requires_subscription = text;
}
break;
case types_js_1.TagNames['video:publication_date']:
if (constants_js_1.LIMITS.ISO_DATE_REGEX.test(text)) {
currentVideo.publication_date = text;
}
else {
this.logger('warn', `Invalid video publication_date format "${text}" - expected ISO 8601 format`);
this.err(`Invalid video publication_date format "${text}" - expected ISO 8601 format`);
}
break;
case types_js_1.TagNames['video:id']:
currentVideo.id = text;
break;
case types_js_1.TagNames['video:restriction']:
currentVideo.restriction = text;
break;
case types_js_1.TagNames['video:view_count']:
{
const viewCount = parseInt(text, 10);
if (isNaN(viewCount) || !isFinite(viewCount) || viewCount < 0) {
this.logger('warn', `Invalid video view_count "${text}" - must be a positive integer`);
this.err(`Invalid video view_count "${text}" - must be a positive integer`);
}
else {
currentVideo.view_count = viewCount;
}
}
break;
case types_js_1.TagNames['video:uploader']:
currentVideo.uploader = text;
break;
case types_js_1.TagNames['video:family_friendly']:
if ((0, validation_js_1.isValidYesNo)(text)) {
currentVideo.family_friendly = text;
}
break;
case types_js_1.TagNames['video:expiration_date']:
if (constants_js_1.LIMITS.ISO_DATE_REGEX.test(text)) {
currentVideo.expiration_date = text;
}
else {
this.logger('warn', `Invalid video expiration_date format "${text}" - expected ISO 8601 format`);
this.err(`Invalid video expiration_date format "${text}" - expected ISO 8601 format`);
}
break;
case types_js_1.TagNames['video:platform']:
currentVideo.platform = text;
break;
case types_js_1.TagNames['video:price']:
currentVideo.price = text;
break;
case types_js_1.TagNames['video:rating']:
{
const rating = parseFloat(text);
if (isNaN(rating) ||
!isFinite(rating) ||
rating < 0 ||
rating > 5) {
this.logger('warn', `Invalid video rating "${text}" - must be between 0 and 5`);
this.err(`Invalid video rating "${text}" - must be between 0 and 5`);
}
else {
currentVideo.rating = rating;
}
}
break;
case types_js_1.TagNames['video:category']:
currentVideo.category = text;
break;
case types_js_1.TagNames['video:live']:
if ((0, validation_js_1.isValidYesNo)(text)) {
currentVideo.live = text;
}
break;
case types_js_1.TagNames['video:gallery_loc']:
currentVideo.gallery_loc = text;
break;
case types_js_1.TagNames['image:loc']:
currentImage.url = text;
break;
case types_js_1.TagNames['image:geo_location']:
currentImage.geoLocation = text;
break;
case types_js_1.TagNames['image:license']:
currentImage.license = text;
break;
case types_js_1.TagNames['news:access']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
if (text === 'Registration' || text === 'Subscription') {
currentItem.news.access = text;
}
else {
this.logger('warn', `Invalid news:access value "${text}" - must be "Registration" or "Subscription"`);
this.err(`Invalid news:access value "${text}" - must be "Registration" or "Subscription"`);
}
break;
case types_js_1.TagNames['news:genres']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
currentItem.news.genres = text;
break;
case types_js_1.TagNames['news:publication_date']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
if (constants_js_1.LIMITS.ISO_DATE_REGEX.test(text)) {
currentItem.news.publication_date = text;
}
else {
this.logger('warn', `Invalid news publication_date format "${text}" - expected ISO 8601 format`);
this.err(`Invalid news publication_date format "${text}" - expected ISO 8601 format`);
}
break;
case types_js_1.TagNames['news:keywords']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
currentItem.news.keywords = text;
break;
case types_js_1.TagNames['news:stock_tickers']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
currentItem.news.stock_tickers = text;
break;
case types_js_1.TagNames['news:language']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
currentItem.news.publication.language = text;
break;
case types_js_1.TagNames['video:title']:
if (currentVideo.title.length + text.length <=
constants_js_1.LIMITS.MAX_VIDEO_TITLE_LENGTH) {
currentVideo.title += text;
}
else {
this.logger('warn', `video title exceeds max length of ${constants_js_1.LIMITS.MAX_VIDEO_TITLE_LENGTH}`);
this.err(`video title exceeds max length of ${constants_js_1.LIMITS.MAX_VIDEO_TITLE_LENGTH}`);
}
break;
case types_js_1.TagNames['video:description']:
if (currentVideo.description.length + text.length <=
constants_js_1.LIMITS.MAX_VIDEO_DESCRIPTION_LENGTH) {
currentVideo.description += text;
}
else {
this.logger('warn', `video description exceeds max length of ${constants_js_1.LIMITS.MAX_VIDEO_DESCRIPTION_LENGTH}`);
this.err(`video description exceeds max length of ${constants_js_1.LIMITS.MAX_VIDEO_DESCRIPTION_LENGTH}`);
}
break;
case types_js_1.TagNames['news:name']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
if (currentItem.news.publication.name.length + text.length <=
constants_js_1.LIMITS.MAX_NEWS_NAME_LENGTH) {
currentItem.news.publication.name += text;
}
else {
this.logger('warn', `news name exceeds max length of ${constants_js_1.LIMITS.MAX_NEWS_NAME_LENGTH}`);
this.err(`news name exceeds max length of ${constants_js_1.LIMITS.MAX_NEWS_NAME_LENGTH}`);
}
break;
case types_js_1.TagNames['news:title']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
if (currentItem.news.title.length + text.length <=
constants_js_1.LIMITS.MAX_NEWS_TITLE_LENGTH) {
currentItem.news.title += text;
}
else {
this.logger('warn', `news title exceeds max length of ${constants_js_1.LIMITS.MAX_NEWS_TITLE_LENGTH}`);
this.err(`news title exceeds max length of ${constants_js_1.LIMITS.MAX_NEWS_TITLE_LENGTH}`);
}
break;
case types_js_1.TagNames['image:caption']:
if (!currentImage.caption) {
currentImage.caption =
text.length <= constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH
? text
: text.substring(0, constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH);
if (text.length > constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH) {
this.logger('warn', `image caption exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
this.err(`image caption exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
}
}
else if (currentImage.caption.length + text.length <=
constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH) {
currentImage.caption += text;
}
else {
this.logger('warn', `image caption exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
this.err(`image caption exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
}
break;
case types_js_1.TagNames['image:title']:
if (!currentImage.title) {
currentImage.title =
text.length <= constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH
? text
: text.substring(0, constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH);
if (text.length > constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH) {
this.logger('warn', `image title exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
this.err(`image title exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
}
}
else if (currentImage.title.length + text.length <=
constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH) {
currentImage.title += text;
}
else {
this.logger('warn', `image title exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
this.err(`image title exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
}
break;
default:
this.logger('log', 'unhandled text for tag:', currentTag, `'${text}'`);
this.err(`unhandled text for tag: ${currentTag} '${text}'`);
break;
}
});
this.saxStream.on('cdata', (text) => {
switch (currentTag) {
case types_js_1.TagNames.loc:
// Validate URL
if (text.length > constants_js_1.LIMITS.MAX_URL_LENGTH) {
this.logger('warn', `URL exceeds max length of ${constants_js_1.LIMITS.MAX_URL_LENGTH}: ${text.substring(0, 100)}...`);
this.err(`URL exceeds max length of ${constants_js_1.LIMITS.MAX_URL_LENGTH}`);
}
else if (!constants_js_1.LIMITS.URL_PROTOCOL_REGEX.test(text)) {
this.logger('warn', `URL must start with http:// or https://: ${text}`);
this.err(`URL must start with http:// or https://: ${text}`);
}
else {
currentItem.url = text;
}
break;
case types_js_1.TagNames['image:loc']:
currentImage.url = text;
break;
case types_js_1.TagNames['video:title']:
if (currentVideo.title.length + text.length <=
constants_js_1.LIMITS.MAX_VIDEO_TITLE_LENGTH) {
currentVideo.title += text;
}
else {
this.logger('warn', `video title exceeds max length of ${constants_js_1.LIMITS.MAX_VIDEO_TITLE_LENGTH}`);
this.err(`video title exceeds max length of ${constants_js_1.LIMITS.MAX_VIDEO_TITLE_LENGTH}`);
}
break;
case types_js_1.TagNames['video:description']:
if (currentVideo.description.length + text.length <=
constants_js_1.LIMITS.MAX_VIDEO_DESCRIPTION_LENGTH) {
currentVideo.description += text;
}
else {
this.logger('warn', `video description exceeds max length of ${constants_js_1.LIMITS.MAX_VIDEO_DESCRIPTION_LENGTH}`);
this.err(`video description exceeds max length of ${constants_js_1.LIMITS.MAX_VIDEO_DESCRIPTION_LENGTH}`);
}
break;
case types_js_1.TagNames['news:name']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
if (currentItem.news.publication.name.length + text.length <=
constants_js_1.LIMITS.MAX_NEWS_NAME_LENGTH) {
currentItem.news.publication.name += text;
}
else {
this.logger('warn', `news name exceeds max length of ${constants_js_1.LIMITS.MAX_NEWS_NAME_LENGTH}`);
this.err(`news name exceeds max length of ${constants_js_1.LIMITS.MAX_NEWS_NAME_LENGTH}`);
}
break;
case types_js_1.TagNames['news:title']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
if (currentItem.news.title.length + text.length <=
constants_js_1.LIMITS.MAX_NEWS_TITLE_LENGTH) {
currentItem.news.title += text;
}
else {
this.logger('warn', `news title exceeds max length of ${constants_js_1.LIMITS.MAX_NEWS_TITLE_LENGTH}`);
this.err(`news title exceeds max length of ${constants_js_1.LIMITS.MAX_NEWS_TITLE_LENGTH}`);
}
break;
case types_js_1.TagNames['image:caption']:
if (!currentImage.caption) {
currentImage.caption =
text.length <= constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH
? text
: text.substring(0, constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH);
if (text.length > constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH) {
this.logger('warn', `image caption exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
this.err(`image caption exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
}
}
else if (currentImage.caption.length + text.length <=
constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH) {
currentImage.caption += text;
}
else {
this.logger('warn', `image caption exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
this.err(`image caption exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
}
break;
case types_js_1.TagNames['image:title']:
if (!currentImage.title) {
currentImage.title =
text.length <= constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH
? text
: text.substring(0, constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH);
if (text.length > constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH) {
this.logger('warn', `image title exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
this.err(`image title exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
}
}
else if (currentImage.title.length + text.length <=
constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH) {
currentImage.title += text;
}
else {
this.logger('warn', `image title exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
this.err(`image title exceeds max length of ${constants_js_1.LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
}
break;
default:
this.logger('log', 'unhandled cdata for tag:', currentTag);
this.err(`unhandled cdata for tag: ${currentTag}`);
break;
}
});
this.saxStream.on('attribute', (attr) => {
switch (currentTag) {
case types_js_1.TagNames['urlset']:
case types_js_1.TagNames['xhtml:link']:
case types_js_1.TagNames['video:id']:
break;
case types_js_1.TagNames['video:restriction']:
if (attr.name === 'relationship' && (0, validation_js_1.isAllowDeny)(attr.value)) {
currentVideo['restriction:relationship'] = attr.value;
}
else {
this.logger('log', 'unhandled attr', currentTag, attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
break;
case types_js_1.TagNames['video:price']:
if (attr.name === 'type' && (0, validation_js_1.isPriceType)(attr.value)) {
currentVideo['price:type'] = attr.value;
}
else if (attr.name === 'currency') {
currentVideo['price:currency'] = attr.value;
}
else if (attr.name === 'resolution' && (0, validation_js_1.isResolution)(attr.value)) {
currentVideo['price:resolution'] = attr.value;
}
else {
this.logger('log', 'unhandled attr for video:price', attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
break;
case types_js_1.TagNames['video:player_loc']:
if (attr.name === 'autoplay') {
currentVideo['player_loc:autoplay'] = attr.value;
}
else if (attr.name === 'allow_embed' && (0, validation_js_1.isValidYesNo)(attr.value)) {
currentVideo['player_loc:allow_embed'] = attr.value;
}
else {
this.logger('log', 'unhandled attr for video:player_loc', attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
break;
case types_js_1.TagNames['video:platform']:
if (attr.name === 'relationship' && (0, validation_js_1.isAllowDeny)(attr.value)) {
currentVideo['platform:relationship'] = attr.value;
}
else {
this.logger('log', 'unhandled attr for video:platform', attr.name, attr.value);
this.err(`unhandled attr: ${currentTag} ${attr.name} ${attr.value}`);
}
break;
case types_js_1.TagNames['video:gallery_loc']:
if (attr.name === 'title') {
currentVideo['gallery_loc:title'] = attr.value;
}
else {
this.logger('log', 'unhandled attr for video:galler_loc', attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
break;
case types_js_1.TagNames['video:uploader']:
if (attr.name === 'info') {
currentVideo['uploader:info'] = attr.value;
}
else {
this.logger('log', 'unhandled attr for video:uploader', attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
break;
default:
this.logger('log', 'unhandled attr', currentTag, attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
});
this.saxStream.on('closetag', (tag) => {
switch (tag) {
case types_js_1.TagNames.url:
this.urlCount++;
if (this.urlCount > constants_js_1.LIMITS.MAX_URL_ENTRIES) {
this.logger('error', `Sitemap exceeds maximum of ${constants_js_1.LIMITS.MAX_URL_ENTRIES} URLs`);
this.err(`Sitemap exceeds maximum of ${constants_js_1.LIMITS.MAX_URL_ENTRIES} URLs`);
currentItem = tagTemplate();
break;
}
this.push(currentItem);
currentItem = tagTemplate();
break;
case types_js_1.TagNames['video:video']:
if (currentItem.video.length < constants_js_1.LIMITS.MAX_VIDEOS_PER_URL) {
currentItem.video.push(currentVideo);
}
else {
this.logger('warn', `URL has too many videos (max ${constants_js_1.LIMITS.MAX_VIDEOS_PER_URL})`);
this.err(`URL has too many videos (max ${constants_js_1.LIMITS.MAX_VIDEOS_PER_URL})`);
}
currentVideo = videoTemplate();
break;
case types_js_1.TagNames['image:image']:
if (currentItem.img.length < constants_js_1.LIMITS.MAX_IMAGES_PER_URL) {
currentItem.img.push(currentImage);
}
else {
this.logger('warn', `URL has too many images (max ${constants_js_1.LIMITS.MAX_IMAGES_PER_URL})`);
this.err(`URL has too many images (max ${constants_js_1.LIMITS.MAX_IMAGES_PER_URL})`);
}
currentImage = { ...imageTemplate };
break;
case types_js_1.TagNames['xhtml:link']:
if (!dontpushCurrentLink) {
if (currentItem.links.length < constants_js_1.LIMITS.MAX_LINKS_PER_URL) {
currentItem.links.push(currentLink);
}
else {
this.logger('warn', `URL has too many links (max ${constants_js_1.LIMITS.MAX_LINKS_PER_URL})`);
this.err(`URL has too many links (max ${constants_js_1.LIMITS.MAX_LINKS_PER_URL})`);
}
}
currentLink = { ...linkTemplate };
dontpushCurrentLink = false; // Reset flag for next link
break;
default:
break;
}
});
}
_transform(data, encoding, callback) {
try {
const cb = () => callback(this.level === types_js_1.ErrorLevel.THROW && this.errors.length > 0
? this.errors[0]
: null);
// correcting the type here can be done without making it a breaking change
// TODO fix this
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
if (!this.saxStream.write(data, encoding)) {
this.saxStream.once('drain', cb);
}
else {
process.nextTick(cb);
}
}
catch (error) {
callback(error);
}
}
err(msg) {
this.errorCount++;
if (this.errors.length < constants_js_1.LIMITS.MAX_PARSER_ERRORS) {
this.errors.push(new Error(msg));
}
}
}
exports.XMLToSitemapItemStream = XMLToSitemapItemStream;
/**
Read xml and resolve with the configuration that would produce it or reject with
an error
```
const { createReadStream } = require('fs')
const { parseSitemap, createSitemap } = require('sitemap')
parseSitemap(createReadStream('./example.xml')).then(
// produces the same xml
// you can, of course, more practically modify it or store it
(xmlConfig) => console.log(createSitemap(xmlConfig).toString()),
(err) => console.log(err)
)
```
@param {Readable} xml what to parse
@return {Promise<SitemapItem[]>} resolves with list of sitemap items that can be fed into a SitemapStream. Rejects with an Error object.
*/
async function parseSitemap(xml) {
const urls = [];
return new Promise((resolve, reject) => {
xml
.pipe(new XMLToSitemapItemStream())
.on('data', (smi) => urls.push(smi))
.on('end', () => {
resolve(urls);
})
.on('error', (error) => {
reject(error);
});
});
}
const defaultObjectStreamOpts = {
lineSeparated: false,
};
/**
* A Transform that converts a stream of objects into a JSON Array or a line
* separated stringified JSON
* @param [lineSeparated=false] whether to separate entries by a new line or comma
*/
class ObjectStreamToJSON extends node_stream_1.Transform {
lineSeparated;
firstWritten;
constructor(opts = defaultObjectStreamOpts) {
opts.writableObjectMode = true;
super(opts);
this.lineSeparated = opts.lineSeparated;
this.firstWritten = false;
}
_transform(chunk, encoding, cb) {
if (!this.firstWritten) {
this.firstWritten = true;
if (!this.lineSeparated) {
this.push('[');
}
}
else if (this.lineSeparated) {
this.push('\n');
}
else {
this.push(',');
}
if (chunk) {
this.push(JSON.stringify(chunk));
}
cb();
}
_flush(cb) {
if (!this.lineSeparated) {
this.push(']');
}
cb();
}
}
exports.ObjectStreamToJSON = ObjectStreamToJSON;

63
node_modules/sitemap/dist/cjs/lib/sitemap-simple.d.ts generated vendored Normal file
View File

@@ -0,0 +1,63 @@
import { Readable } from 'node:stream';
import { SitemapItemLoose } from './types.js';
/**
* Options for the simpleSitemapAndIndex function
*/
export interface SimpleSitemapAndIndexOptions {
/**
* The hostname for all URLs
* Must be a valid http:// or https:// URL
*/
hostname: string;
/**
* The hostname for the sitemaps if different than hostname
* Must be a valid http:// or https:// URL
*/
sitemapHostname?: string;
/**
* The urls you want to make a sitemap out of.
* Can be an array of items, a file path string, a Readable stream, or an array of strings
*/
sourceData: SitemapItemLoose[] | string | Readable | string[];
/**
* Where to write the sitemaps and index
* Must be a relative path without path traversal sequences
*/
destinationDir: string;
/**
* Where the sitemaps are relative to the hostname. Defaults to root.
* Must not contain path traversal sequences
*/
publicBasePath?: string;
/**
* How many URLs to write before switching to a new file
* Must be between 1 and 50,000 per sitemaps.org spec
* @default 50000
*/
limit?: number;
/**
* Whether to compress the written files
* @default true
*/
gzip?: boolean;
/**
* Optional URL to an XSL stylesheet
* Must be a valid http:// or https:// URL
*/
xslUrl?: string;
}
/**
* A simpler interface for creating sitemaps and indexes.
* Automatically handles splitting large datasets into multiple sitemap files.
*
* @param options - Configuration options
* @returns A promise that resolves when all sitemaps and the index are written
* @throws {InvalidHostnameError} If hostname or sitemapHostname is invalid
* @throws {InvalidPathError} If destinationDir contains path traversal
* @throws {InvalidPublicBasePathError} If publicBasePath is invalid
* @throws {InvalidLimitError} If limit is out of range
* @throws {InvalidXSLUrlError} If xslUrl is invalid
* @throws {Error} If sourceData type is not supported
*/
export declare const simpleSitemapAndIndex: ({ hostname, sitemapHostname, sourceData, destinationDir, limit, gzip, publicBasePath, xslUrl, }: SimpleSitemapAndIndexOptions) => Promise<void>;
export default simpleSitemapAndIndex;

113
node_modules/sitemap/dist/cjs/lib/sitemap-simple.js generated vendored Normal file
View File

@@ -0,0 +1,113 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.simpleSitemapAndIndex = void 0;
const sitemap_index_stream_js_1 = require("./sitemap-index-stream.js");
const sitemap_stream_js_1 = require("./sitemap-stream.js");
const utils_js_1 = require("./utils.js");
const node_zlib_1 = require("node:zlib");
const node_fs_1 = require("node:fs");
const node_path_1 = require("node:path");
const node_stream_1 = require("node:stream");
const promises_1 = require("node:stream/promises");
const node_url_1 = require("node:url");
const validation_js_1 = require("./validation.js");
/**
* A simpler interface for creating sitemaps and indexes.
* Automatically handles splitting large datasets into multiple sitemap files.
*
* @param options - Configuration options
* @returns A promise that resolves when all sitemaps and the index are written
* @throws {InvalidHostnameError} If hostname or sitemapHostname is invalid
* @throws {InvalidPathError} If destinationDir contains path traversal
* @throws {InvalidPublicBasePathError} If publicBasePath is invalid
* @throws {InvalidLimitError} If limit is out of range
* @throws {InvalidXSLUrlError} If xslUrl is invalid
* @throws {Error} If sourceData type is not supported
*/
const simpleSitemapAndIndex = async ({ hostname, sitemapHostname = hostname, // if different
sourceData, destinationDir, limit = 50000, gzip = true, publicBasePath = './', xslUrl, }) => {
// Validate all inputs upfront
(0, validation_js_1.validateURL)(hostname, 'hostname');
(0, validation_js_1.validateURL)(sitemapHostname, 'sitemapHostname');
(0, validation_js_1.validatePath)(destinationDir, 'destinationDir');
(0, validation_js_1.validateLimit)(limit);
(0, validation_js_1.validatePublicBasePath)(publicBasePath);
if (xslUrl) {
(0, validation_js_1.validateXSLUrl)(xslUrl);
}
// Create destination directory with error context
try {
await node_fs_1.promises.mkdir(destinationDir, { recursive: true });
}
catch (err) {
throw new Error(`Failed to create destination directory "${destinationDir}": ${err instanceof Error ? err.message : String(err)}`);
}
// Normalize publicBasePath (don't mutate the parameter)
const normalizedPublicBasePath = publicBasePath.endsWith('/')
? publicBasePath
: publicBasePath + '/';
const sitemapAndIndexStream = new sitemap_index_stream_js_1.SitemapAndIndexStream({
limit,
getSitemapStream: (i) => {
const sitemapStream = new sitemap_stream_js_1.SitemapStream({
hostname,
xslUrl,
});
const path = `./sitemap-${i}.xml`;
const writePath = (0, node_path_1.resolve)(destinationDir, path + (gzip ? '.gz' : ''));
// Construct public path for the sitemap index
const publicPath = (0, node_path_1.normalize)(normalizedPublicBasePath + path);
// Construct the URL with proper error handling
let sitemapUrl;
try {
sitemapUrl = new node_url_1.URL(`${publicPath}${gzip ? '.gz' : ''}`, sitemapHostname).toString();
}
catch (err) {
throw new Error(`Failed to construct sitemap URL for index ${i}: ${err instanceof Error ? err.message : String(err)}`);
}
let writeStream;
if (gzip) {
writeStream = sitemapStream
.pipe((0, node_zlib_1.createGzip)()) // compress the output of the sitemap
.pipe((0, node_fs_1.createWriteStream)(writePath)); // write it to sitemap-NUMBER.xml
}
else {
writeStream = sitemapStream.pipe((0, node_fs_1.createWriteStream)(writePath)); // write it to sitemap-NUMBER.xml
}
return [sitemapUrl, sitemapStream, writeStream];
},
});
// Handle different sourceData types with proper error handling
let src;
if (typeof sourceData === 'string') {
try {
src = (0, utils_js_1.lineSeparatedURLsToSitemapOptions)((0, node_fs_1.createReadStream)(sourceData));
}
catch (err) {
throw new Error(`Failed to read sourceData file "${sourceData}": ${err instanceof Error ? err.message : String(err)}`);
}
}
else if (sourceData instanceof node_stream_1.Readable) {
src = sourceData;
}
else if (Array.isArray(sourceData)) {
src = node_stream_1.Readable.from(sourceData);
}
else {
throw new Error(`Invalid sourceData type: expected array, string (file path), or Readable stream, got ${typeof sourceData}`);
}
const writePath = (0, node_path_1.resolve)(destinationDir, `./sitemap-index.xml${gzip ? '.gz' : ''}`);
try {
if (gzip) {
return await (0, promises_1.pipeline)(src, sitemapAndIndexStream, (0, node_zlib_1.createGzip)(), (0, node_fs_1.createWriteStream)(writePath));
}
else {
return await (0, promises_1.pipeline)(src, sitemapAndIndexStream, (0, node_fs_1.createWriteStream)(writePath));
}
}
catch (err) {
throw new Error(`Failed to write sitemap files: ${err instanceof Error ? err.message : String(err)}`);
}
};
exports.simpleSitemapAndIndex = simpleSitemapAndIndex;
exports.default = exports.simpleSitemapAndIndex;

79
node_modules/sitemap/dist/cjs/lib/sitemap-stream.d.ts generated vendored Normal file
View File

@@ -0,0 +1,79 @@
import { Transform, TransformOptions, TransformCallback, Readable } from 'node:stream';
import { SitemapItemLoose, ErrorLevel, ErrorHandler } from './types.js';
export declare const stylesheetInclude: (url: string) => string;
export interface NSArgs {
news: boolean;
video: boolean;
xhtml: boolean;
image: boolean;
custom?: string[];
}
export declare const closetag = "</urlset>";
export interface SitemapStreamOptions extends TransformOptions {
hostname?: string;
level?: ErrorLevel;
lastmodDateOnly?: boolean;
xmlns?: NSArgs;
xslUrl?: string;
errorHandler?: ErrorHandler;
}
/**
* A [Transform](https://nodejs.org/api/stream.html#stream_implementing_a_transform_stream)
* for turning a
* [Readable stream](https://nodejs.org/api/stream.html#stream_readable_streams)
* of either [SitemapItemOptions](#sitemap-item-options) or url strings into a
* Sitemap. The readable stream it transforms **must** be in object mode.
*
* @param {SitemapStreamOptions} opts - Configuration options
* @param {string} [opts.hostname] - Base URL for relative paths. Must use http:// or https:// protocol
* @param {ErrorLevel} [opts.level=ErrorLevel.WARN] - Error handling level (SILENT, WARN, or THROW)
* @param {boolean} [opts.lastmodDateOnly=false] - Format lastmod as date only (YYYY-MM-DD)
* @param {NSArgs} [opts.xmlns] - Control which XML namespaces to include in output
* @param {string} [opts.xslUrl] - URL to XSL stylesheet for sitemap display. Must use http:// or https://
* @param {ErrorHandler} [opts.errorHandler] - Custom error handler function
*
* @throws {InvalidHostnameError} If hostname is provided but invalid (non-http(s), malformed, or >2048 chars)
* @throws {InvalidXSLUrlError} If xslUrl is provided but invalid (non-http(s), malformed, >2048 chars, or contains malicious content)
* @throws {Error} If xmlns.custom contains invalid namespace declarations
*
* @example
* ```typescript
* const stream = new SitemapStream({
* hostname: 'https://example.com',
* level: ErrorLevel.THROW
* });
* stream.write({ url: '/page', changefreq: 'daily' });
* stream.end();
* ```
*
* @security
* - Hostname and xslUrl are validated to prevent URL injection attacks
* - Custom namespaces are validated to prevent XML injection
* - All URLs are normalized and validated before output
* - XML content is properly escaped to prevent injection
*/
export declare class SitemapStream extends Transform {
hostname?: string;
level: ErrorLevel;
hasHeadOutput: boolean;
xmlNS: NSArgs;
xslUrl?: string;
errorHandler?: ErrorHandler;
private smiStream;
lastmodDateOnly: boolean;
constructor(opts?: SitemapStreamOptions);
_transform(item: SitemapItemLoose, encoding: string, callback: TransformCallback): void;
_flush(cb: TransformCallback): void;
}
/**
* Converts a readable stream into a promise that resolves with the concatenated data from the stream.
*
* The function listens for 'data' events from the stream, and when the stream ends, it resolves the promise with the concatenated data. If an error occurs while reading from the stream, the promise is rejected with the error.
*
* ⚠️ CAUTION: This function should not generally be used in production / when writing to files as it holds a copy of the entire file contents in memory until finished.
*
* @param {Readable} stream - The readable stream to convert to a promise.
* @returns {Promise<Buffer>} A promise that resolves with the concatenated data from the stream as a Buffer, or rejects with an error if one occurred while reading from the stream. If the stream is empty, the promise is rejected with an EmptyStream error.
* @throws {EmptyStream} If the stream is empty.
*/
export declare function streamToPromise(stream: Readable): Promise<Buffer>;

218
node_modules/sitemap/dist/cjs/lib/sitemap-stream.js generated vendored Normal file
View File

@@ -0,0 +1,218 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.SitemapStream = exports.closetag = exports.stylesheetInclude = void 0;
exports.streamToPromise = streamToPromise;
const node_stream_1 = require("node:stream");
const types_js_1 = require("./types.js");
const utils_js_1 = require("./utils.js");
const validation_js_1 = require("./validation.js");
const sitemap_item_stream_js_1 = require("./sitemap-item-stream.js");
const errors_js_1 = require("./errors.js");
const constants_js_1 = require("./constants.js");
const xmlDec = '<?xml version="1.0" encoding="UTF-8"?>';
const stylesheetInclude = (url) => {
const safe = url
.replace(/&/g, '&amp;')
.replace(/"/g, '&quot;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');
return `<?xml-stylesheet type="text/xsl" href="${safe}"?>`;
};
exports.stylesheetInclude = stylesheetInclude;
const urlsetTagStart = '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"';
/**
* Validates custom namespace declarations for security
* @param custom - Array of custom namespace declarations
* @throws {Error} If namespace format is invalid or contains malicious content
*/
function validateCustomNamespaces(custom) {
if (!Array.isArray(custom)) {
throw new Error('Custom namespaces must be an array');
}
// Limit number of custom namespaces to prevent DoS
if (custom.length > constants_js_1.LIMITS.MAX_CUSTOM_NAMESPACES) {
throw new Error(`Too many custom namespaces: ${custom.length} exceeds limit of ${constants_js_1.LIMITS.MAX_CUSTOM_NAMESPACES}`);
}
// Basic format validation for xmlns declarations and namespace-qualified attributes
// Supports both xmlns:prefix="uri" and prefix:attribute="value" (e.g., xsi:schemaLocation)
const xmlAttributePattern = /^[a-zA-Z_][\w.-]*:[a-zA-Z_][\w.-]*="[^"<>]*"$/;
for (const ns of custom) {
if (typeof ns !== 'string' || ns.length === 0) {
throw new Error('Custom namespace must be a non-empty string');
}
if (ns.length > constants_js_1.LIMITS.MAX_NAMESPACE_LENGTH) {
throw new Error(`Custom namespace exceeds maximum length of ${constants_js_1.LIMITS.MAX_NAMESPACE_LENGTH} characters: ${ns.substring(0, 50)}...`);
}
// Check for potentially malicious content BEFORE format check
// (format check will reject < and > but we want specific error message)
const lowerNs = ns.toLowerCase();
if (lowerNs.includes('<script') ||
lowerNs.includes('javascript:') ||
lowerNs.includes('data:text/html')) {
throw new Error(`Custom namespace contains potentially malicious content: ${ns.substring(0, 50)}`);
}
// Check format matches xmlns declaration or namespace-qualified attribute
if (!xmlAttributePattern.test(ns)) {
throw new Error(`Invalid namespace format (must be prefix:name="value", e.g., xmlns:prefix="uri" or xsi:schemaLocation="..."): ${ns.substring(0, 50)}`);
}
}
}
const getURLSetNs = ({ news, video, image, xhtml, custom }, xslURL) => {
let ns = xmlDec;
if (xslURL) {
ns += (0, exports.stylesheetInclude)(xslURL);
}
ns += urlsetTagStart;
if (news) {
ns += ' xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"';
}
if (xhtml) {
ns += ' xmlns:xhtml="http://www.w3.org/1999/xhtml"';
}
if (image) {
ns += ' xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"';
}
if (video) {
ns += ' xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"';
}
if (custom) {
validateCustomNamespaces(custom);
ns += ' ' + custom.join(' ');
}
return ns + '>';
};
exports.closetag = '</urlset>';
const defaultXMLNS = {
news: true,
xhtml: true,
image: true,
video: true,
};
const defaultStreamOpts = {
xmlns: defaultXMLNS,
};
/**
* A [Transform](https://nodejs.org/api/stream.html#stream_implementing_a_transform_stream)
* for turning a
* [Readable stream](https://nodejs.org/api/stream.html#stream_readable_streams)
* of either [SitemapItemOptions](#sitemap-item-options) or url strings into a
* Sitemap. The readable stream it transforms **must** be in object mode.
*
* @param {SitemapStreamOptions} opts - Configuration options
* @param {string} [opts.hostname] - Base URL for relative paths. Must use http:// or https:// protocol
* @param {ErrorLevel} [opts.level=ErrorLevel.WARN] - Error handling level (SILENT, WARN, or THROW)
* @param {boolean} [opts.lastmodDateOnly=false] - Format lastmod as date only (YYYY-MM-DD)
* @param {NSArgs} [opts.xmlns] - Control which XML namespaces to include in output
* @param {string} [opts.xslUrl] - URL to XSL stylesheet for sitemap display. Must use http:// or https://
* @param {ErrorHandler} [opts.errorHandler] - Custom error handler function
*
* @throws {InvalidHostnameError} If hostname is provided but invalid (non-http(s), malformed, or >2048 chars)
* @throws {InvalidXSLUrlError} If xslUrl is provided but invalid (non-http(s), malformed, >2048 chars, or contains malicious content)
* @throws {Error} If xmlns.custom contains invalid namespace declarations
*
* @example
* ```typescript
* const stream = new SitemapStream({
* hostname: 'https://example.com',
* level: ErrorLevel.THROW
* });
* stream.write({ url: '/page', changefreq: 'daily' });
* stream.end();
* ```
*
* @security
* - Hostname and xslUrl are validated to prevent URL injection attacks
* - Custom namespaces are validated to prevent XML injection
* - All URLs are normalized and validated before output
* - XML content is properly escaped to prevent injection
*/
class SitemapStream extends node_stream_1.Transform {
hostname;
level;
hasHeadOutput;
xmlNS;
xslUrl;
errorHandler;
smiStream;
lastmodDateOnly;
constructor(opts = defaultStreamOpts) {
opts.objectMode = true;
super(opts);
// Validate hostname if provided
if (opts.hostname !== undefined) {
(0, validation_js_1.validateURL)(opts.hostname, 'hostname');
}
// Validate xslUrl if provided
if (opts.xslUrl !== undefined) {
(0, validation_js_1.validateXSLUrl)(opts.xslUrl);
}
this.hasHeadOutput = false;
this.hostname = opts.hostname;
this.level = opts.level || types_js_1.ErrorLevel.WARN;
this.errorHandler = opts.errorHandler;
this.smiStream = new sitemap_item_stream_js_1.SitemapItemStream({ level: opts.level });
this.smiStream.on('data', (data) => this.push(data));
this.lastmodDateOnly = opts.lastmodDateOnly || false;
this.xmlNS = opts.xmlns || defaultXMLNS;
this.xslUrl = opts.xslUrl;
}
_transform(item, encoding, callback) {
if (!this.hasHeadOutput) {
this.hasHeadOutput = true;
this.push(getURLSetNs(this.xmlNS, this.xslUrl));
}
if (!this.smiStream.write((0, validation_js_1.validateSMIOptions)((0, utils_js_1.normalizeURL)(item, this.hostname, this.lastmodDateOnly), this.level, this.errorHandler))) {
this.smiStream.once('drain', callback);
}
else {
process.nextTick(callback);
}
}
_flush(cb) {
if (!this.hasHeadOutput) {
cb(new errors_js_1.EmptySitemap());
}
else {
this.push(exports.closetag);
cb();
}
}
}
exports.SitemapStream = SitemapStream;
/**
* Converts a readable stream into a promise that resolves with the concatenated data from the stream.
*
* The function listens for 'data' events from the stream, and when the stream ends, it resolves the promise with the concatenated data. If an error occurs while reading from the stream, the promise is rejected with the error.
*
* ⚠️ CAUTION: This function should not generally be used in production / when writing to files as it holds a copy of the entire file contents in memory until finished.
*
* @param {Readable} stream - The readable stream to convert to a promise.
* @returns {Promise<Buffer>} A promise that resolves with the concatenated data from the stream as a Buffer, or rejects with an error if one occurred while reading from the stream. If the stream is empty, the promise is rejected with an EmptyStream error.
* @throws {EmptyStream} If the stream is empty.
*/
function streamToPromise(stream) {
return new Promise((resolve, reject) => {
const drain = [];
stream
// Error propagation is not automatic
// Bubble up errors on the read stream
.on('error', reject)
.pipe(new node_stream_1.Writable({
write(chunk, enc, next) {
drain.push(chunk);
next();
},
}))
// This bubbles up errors when writing to the internal buffer
// This is unlikely to happen, but we have this for completeness
.on('error', reject)
.on('finish', () => {
if (!drain.length) {
reject(new errors_js_1.EmptyStream());
}
else {
resolve(Buffer.concat(drain));
}
});
});
}

107
node_modules/sitemap/dist/cjs/lib/sitemap-xml.d.ts generated vendored Normal file
View File

@@ -0,0 +1,107 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
import { TagNames, IndexTagNames, StringObj } from './types.js';
/**
* Escapes text content for safe inclusion in XML text nodes.
*
* **Security Model:**
* - Escapes `&` → `&amp;` (required to prevent entity interpretation)
* - Escapes `<` → `&lt;` (required to prevent tag injection)
* - Escapes `>` → `&gt;` (defense-in-depth, prevents CDATA injection)
* - Does NOT escape `"` or `'` (not required in text content, only in attributes)
* - Removes invalid XML Unicode characters per XML 1.0 spec
*
* **Why quotes aren't escaped:**
* In XML text content (between tags), quotes have no special meaning and don't
* need escaping. They only need escaping in attribute values, which is handled
* by the `otag()` function.
*
* @param txt - The text content to escape
* @returns XML-safe escaped text with invalid characters removed
* @throws {TypeError} If txt is not a string
*
* @example
* text('Hello & World'); // Returns: 'Hello &amp; World'
* text('5 < 10'); // Returns: '5 &lt; 10'
* text('Hello "World"'); // Returns: 'Hello "World"' (quotes OK in text)
*
* @see https://www.w3.org/TR/xml/#syntax
*/
export declare function text(txt: string): string;
/**
* Generates an opening XML tag with optional attributes.
*
* **Security Model:**
* - Validates attribute names to prevent injection via malformed names
* - Escapes all attribute values with proper XML entity encoding
* - Escapes `&`, `<`, `>`, `"`, and `'` in attribute values
* - Removes invalid XML Unicode characters
*
* Attribute values use full escaping (including quotes) because they appear
* within quoted strings in the XML output: `<tag attr="value">`.
*
* @param nodeName - The XML element name (e.g., 'url', 'loc', 'video:title')
* @param attrs - Optional object mapping attribute names to string values
* @param selfClose - If true, generates a self-closing tag (e.g., `<tag/>`)
* @returns Opening XML tag string
* @throws {InvalidXMLAttributeNameError} If an attribute name contains invalid characters
* @throws {TypeError} If nodeName is not a string or attrs values are not strings
*
* @example
* otag('url'); // Returns: '<url>'
* otag('video:player_loc', { autoplay: 'ap=1' }); // Returns: '<video:player_loc autoplay="ap=1">'
* otag('image:image', {}, true); // Returns: '<image:image/>'
*
* @see https://www.w3.org/TR/xml/#NT-Attribute
*/
export declare function otag(nodeName: TagNames | IndexTagNames, attrs?: StringObj, selfClose?: boolean): string;
/**
* Generates a closing XML tag.
*
* @param nodeName - The XML element name (e.g., 'url', 'loc', 'video:title')
* @returns Closing XML tag string
* @throws {TypeError} If nodeName is not a string
*
* @example
* ctag('url'); // Returns: '</url>'
* ctag('video:title'); // Returns: '</video:title>'
*/
export declare function ctag(nodeName: TagNames | IndexTagNames): string;
/**
* Generates a complete XML element with optional attributes and text content.
*
* This is a convenience function that combines `otag()`, `text()`, and `ctag()`.
* It supports three usage patterns via function overloading:
*
* 1. Element with text content: `element('loc', 'https://example.com')`
* 2. Element with attributes and text: `element('video:player_loc', { autoplay: 'ap=1' }, 'https://...')`
* 3. Self-closing element with attributes: `element('image:image', { href: '...' })`
*
* @param nodeName - The XML element name
* @param attrs - Either a string (text content) or object (attributes)
* @param innerText - Optional text content when attrs is an object
* @returns Complete XML element string
* @throws {InvalidXMLAttributeNameError} If an attribute name contains invalid characters
* @throws {TypeError} If arguments have invalid types
*
* @example
* // Pattern 1: Simple element with text
* element('loc', 'https://example.com')
* // Returns: '<loc>https://example.com</loc>'
*
* @example
* // Pattern 2: Element with attributes and text
* element('video:player_loc', { autoplay: 'ap=1' }, 'https://example.com/video')
* // Returns: '<video:player_loc autoplay="ap=1">https://example.com/video</video:player_loc>'
*
* @example
* // Pattern 3: Self-closing element with attributes
* element('xhtml:link', { rel: 'alternate', href: 'https://example.com/fr' })
* // Returns: '<xhtml:link rel="alternate" href="https://example.com/fr"/>'
*/
export declare function element(nodeName: TagNames, attrs: StringObj, innerText: string): string;
export declare function element(nodeName: TagNames | IndexTagNames, innerText: string): string;
export declare function element(nodeName: TagNames, attrs: StringObj): string;

187
node_modules/sitemap/dist/cjs/lib/sitemap-xml.js generated vendored Normal file
View File

@@ -0,0 +1,187 @@
"use strict";
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.text = text;
exports.otag = otag;
exports.ctag = ctag;
exports.element = element;
const errors_js_1 = require("./errors.js");
/**
* Regular expression matching invalid XML 1.0 Unicode characters that must be removed.
*
* Based on the XML 1.0 specification (https://www.w3.org/TR/xml/#charsets):
* - Control characters (U+0000-U+001F except tab, newline, carriage return)
* - Delete character (U+007F)
* - Invalid control characters (U+0080-U+009F except U+0085)
* - Surrogate pairs (U+D800-U+DFFF)
* - Non-characters (\p{NChar} - permanently reserved code points)
*
* Performance note: This regex uses Unicode property escapes and may be slower
* on very large strings (100KB+). Consider pre-validation for untrusted input.
*
* @see https://www.w3.org/TR/xml/#charsets
*/
const invalidXMLUnicodeRegex =
// eslint-disable-next-line no-control-regex
/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\p{NChar}]/gu;
/**
* Regular expressions for XML entity escaping
*/
const amp = /&/g;
const lt = /</g;
const gt = />/g;
const apos = /'/g;
const quot = /"/g;
/**
* Valid XML attribute name pattern. XML names must:
* - Start with a letter, underscore, or colon
* - Contain only letters, digits, hyphens, underscores, colons, or periods
*
* This is a simplified validation that accepts the most common attribute names.
* Note: In practice, this library only uses namespaced attributes like "video:title"
* which are guaranteed to be valid.
*
* @see https://www.w3.org/TR/xml/#NT-Name
*/
const validAttributeNameRegex = /^[a-zA-Z_:][\w:.-]*$/;
/**
* Validates that an attribute name is a valid XML identifier.
*
* XML attribute names must start with a letter, underscore, or colon,
* and contain only alphanumeric characters, hyphens, underscores, colons, or periods.
*
* @param name - The attribute name to validate
* @throws {InvalidXMLAttributeNameError} If the attribute name is invalid
*
* @example
* validateAttributeName('href'); // OK
* validateAttributeName('xml:lang'); // OK
* validateAttributeName('data-value'); // OK
* validateAttributeName('<script>'); // Throws InvalidXMLAttributeNameError
*/
function validateAttributeName(name) {
if (!validAttributeNameRegex.test(name)) {
throw new errors_js_1.InvalidXMLAttributeNameError(name);
}
}
/**
* Escapes text content for safe inclusion in XML text nodes.
*
* **Security Model:**
* - Escapes `&` → `&amp;` (required to prevent entity interpretation)
* - Escapes `<` → `&lt;` (required to prevent tag injection)
* - Escapes `>` → `&gt;` (defense-in-depth, prevents CDATA injection)
* - Does NOT escape `"` or `'` (not required in text content, only in attributes)
* - Removes invalid XML Unicode characters per XML 1.0 spec
*
* **Why quotes aren't escaped:**
* In XML text content (between tags), quotes have no special meaning and don't
* need escaping. They only need escaping in attribute values, which is handled
* by the `otag()` function.
*
* @param txt - The text content to escape
* @returns XML-safe escaped text with invalid characters removed
* @throws {TypeError} If txt is not a string
*
* @example
* text('Hello & World'); // Returns: 'Hello &amp; World'
* text('5 < 10'); // Returns: '5 &lt; 10'
* text('Hello "World"'); // Returns: 'Hello "World"' (quotes OK in text)
*
* @see https://www.w3.org/TR/xml/#syntax
*/
function text(txt) {
if (typeof txt !== 'string') {
throw new TypeError(`text() requires a string, received ${typeof txt}: ${String(txt)}`);
}
return txt
.replace(amp, '&amp;')
.replace(lt, '&lt;')
.replace(gt, '&gt;')
.replace(invalidXMLUnicodeRegex, '');
}
/**
* Generates an opening XML tag with optional attributes.
*
* **Security Model:**
* - Validates attribute names to prevent injection via malformed names
* - Escapes all attribute values with proper XML entity encoding
* - Escapes `&`, `<`, `>`, `"`, and `'` in attribute values
* - Removes invalid XML Unicode characters
*
* Attribute values use full escaping (including quotes) because they appear
* within quoted strings in the XML output: `<tag attr="value">`.
*
* @param nodeName - The XML element name (e.g., 'url', 'loc', 'video:title')
* @param attrs - Optional object mapping attribute names to string values
* @param selfClose - If true, generates a self-closing tag (e.g., `<tag/>`)
* @returns Opening XML tag string
* @throws {InvalidXMLAttributeNameError} If an attribute name contains invalid characters
* @throws {TypeError} If nodeName is not a string or attrs values are not strings
*
* @example
* otag('url'); // Returns: '<url>'
* otag('video:player_loc', { autoplay: 'ap=1' }); // Returns: '<video:player_loc autoplay="ap=1">'
* otag('image:image', {}, true); // Returns: '<image:image/>'
*
* @see https://www.w3.org/TR/xml/#NT-Attribute
*/
function otag(nodeName, attrs, selfClose = false) {
if (typeof nodeName !== 'string') {
throw new TypeError(`otag() nodeName must be a string, received ${typeof nodeName}: ${String(nodeName)}`);
}
let attrstr = '';
for (const k in attrs) {
// Validate attribute name to prevent injection
validateAttributeName(k);
const attrValue = attrs[k];
if (typeof attrValue !== 'string') {
throw new TypeError(`otag() attribute "${k}" value must be a string, received ${typeof attrValue}: ${String(attrValue)}`);
}
// Escape attribute value with full entity encoding
const val = attrValue
.replace(amp, '&amp;')
.replace(lt, '&lt;')
.replace(gt, '&gt;')
.replace(apos, '&apos;')
.replace(quot, '&quot;')
.replace(invalidXMLUnicodeRegex, '');
attrstr += ` ${k}="${val}"`;
}
return `<${nodeName}${attrstr}${selfClose ? '/' : ''}>`;
}
/**
* Generates a closing XML tag.
*
* @param nodeName - The XML element name (e.g., 'url', 'loc', 'video:title')
* @returns Closing XML tag string
* @throws {TypeError} If nodeName is not a string
*
* @example
* ctag('url'); // Returns: '</url>'
* ctag('video:title'); // Returns: '</video:title>'
*/
function ctag(nodeName) {
if (typeof nodeName !== 'string') {
throw new TypeError(`ctag() nodeName must be a string, received ${typeof nodeName}: ${String(nodeName)}`);
}
return `</${nodeName}>`;
}
function element(nodeName, attrs, innerText) {
if (typeof attrs === 'string') {
// Pattern 1: element(nodeName, textContent)
return otag(nodeName) + text(attrs) + ctag(nodeName);
}
else if (innerText !== undefined) {
// Pattern 2: element(nodeName, attrs, textContent)
return otag(nodeName, attrs) + text(innerText) + ctag(nodeName);
}
else {
// Pattern 3: element(nodeName, attrs) - self-closing
return otag(nodeName, attrs, true);
}
}

400
node_modules/sitemap/dist/cjs/lib/types.d.ts generated vendored Normal file
View File

@@ -0,0 +1,400 @@
import { URL } from 'node:url';
/**
* How frequently the page is likely to change. This value provides general
* information to search engines and may not correlate exactly to how often they crawl the page. Please note that the
* value of this tag is considered a hint and not a command. See
* <https://www.sitemaps.org/protocol.html#xmlTagDefinitions> for the acceptable
* values
*/
export declare enum EnumChangefreq {
DAILY = "daily",
MONTHLY = "monthly",
ALWAYS = "always",
HOURLY = "hourly",
WEEKLY = "weekly",
YEARLY = "yearly",
NEVER = "never"
}
export declare enum EnumYesNo {
YES = "YES",
NO = "NO",
Yes = "Yes",
No = "No",
yes = "yes",
no = "no"
}
export declare enum EnumAllowDeny {
ALLOW = "allow",
DENY = "deny"
}
/**
* https://support.google.com/webmasters/answer/74288?hl=en&ref_topic=4581190
*/
export interface NewsItem {
access?: 'Registration' | 'Subscription';
publication: {
name: string;
/**
* The `<language>` is the language of your publication. Use an ISO 639
* language code (2 or 3 letters).
*/
language: string;
};
/**
* @example 'PressRelease, Blog'
*/
genres?: string;
/**
* Article publication date in W3C format, using either the "complete date" (YYYY-MM-DD) format or the "complete date
* plus hours, minutes, and seconds"
*/
publication_date: string;
/**
* The title of the news article
* @example 'Companies A, B in Merger Talks'
*/
title: string;
/**
* @example 'business, merger, acquisition'
*/
keywords?: string;
/**
* @example 'NASDAQ:A, NASDAQ:B'
*/
stock_tickers?: string;
}
/**
* Sitemap Image
* https://support.google.com/webmasters/answer/178636?hl=en&ref_topic=4581190
*/
export interface Img {
/**
* The URL of the image
* @example 'https://example.com/image.jpg'
*/
url: string;
/**
* The caption of the image
* @example 'Thanksgiving dinner'
*/
caption?: string;
/**
* The title of the image
* @example 'Star Wars EP IV'
*/
title?: string;
/**
* The geographic location of the image.
* @example 'Limerick, Ireland'
*/
geoLocation?: string;
/**
* A URL to the license of the image.
* @example 'https://example.com/license.txt'
*/
license?: string;
}
interface VideoItemBase {
/**
* A URL pointing to the video thumbnail image file
* @example "https://rtv3-img-roosterteeth.akamaized.net/store/0e841100-289b-4184-ae30-b6a16736960a.jpg/sm/thumb3.jpg"
*/
thumbnail_loc: string;
/**
* The title of the video
* @example '2018:E6 - GoldenEye: Source'
*/
title: string;
/**
* A description of the video. Maximum 2048 characters.
* @example 'We play gun game in GoldenEye: Source with a good friend of ours. His name is Gruchy. Dan Gruchy.'
*/
description: string;
/**
* A URL pointing to the actual video media file. Should be one of the supported formats. HTML is not a supported
* format. Flash is allowed, but no longer supported on most mobile platforms, and so may be indexed less well. Must
* not be the same as the `<loc>` URL.
* @example "http://streamserver.example.com/video123.mp4"
*/
content_loc?: string;
/**
* A URL pointing to a player for a specific video. Usually this is the information in the src element of an `<embed>`
* tag. Must not be the same as the `<loc>` URL
* @example "https://roosterteeth.com/embed/rouletsplay-2018-goldeneye-source"
*/
player_loc?: string;
/**
* A string the search engine can append as a query param to enable automatic
* playback. Equivilant to auto play attr on player_loc tag.
* @example 'ap=1'
*/
'player_loc:autoplay'?: string;
/**
* Whether the search engine can embed the video in search results. Allowed values are yes or no.
*/
'player_loc:allow_embed'?: EnumYesNo;
/**
* The length of the video in seconds
* @example 600
*/
duration?: number;
/**
* The date after which the video will no longer be available.
* @example "2012-07-16T19:20:30+08:00"
*/
expiration_date?: string;
/**
* The number of times the video has been viewed
*/
view_count?: number;
/**
* The date the video was first published, in W3C format.
* @example "2012-07-16T19:20:30+08:00"
*/
publication_date?: string;
/**
* A short description of the broad category that the video belongs to. This is a string no longer than 256 characters.
* @example Baking
*/
category?: string;
/**
* Whether to show or hide your video in search results from specific countries.
* @example "IE GB US CA"
*/
restriction?: string;
/**
* Whether the countries in restriction are allowed or denied
* @example 'deny'
*/
'restriction:relationship'?: EnumAllowDeny;
gallery_loc?: string;
/**
* [Optional] Specifies the URL of a webpage with additional information about this uploader. This URL must be in the same domain as the <loc> tag.
* @see https://developers.google.com/search/docs/advanced/sitemaps/video-sitemaps
* @example http://www.example.com/users/grillymcgrillerson
*/
'uploader:info'?: string;
'gallery_loc:title'?: string;
/**
* The price to download or view the video. Omit this tag for free videos.
* @example "1.99"
*/
price?: string;
/**
* Specifies the resolution of the purchased version. Supported values are hd and sd.
* @example "HD"
*/
'price:resolution'?: Resolution;
/**
* Specifies the currency in ISO4217 format.
* @example "USD"
*/
'price:currency'?: string;
/**
* Specifies the purchase option. Supported values are rend and own.
* @example "rent"
*/
'price:type'?: PriceType;
/**
* The video uploader's name. Only one <video:uploader> is allowed per video. String value, max 255 characters.
* @example "GrillyMcGrillerson"
*/
uploader?: string;
/**
* Whether to show or hide your video in search results on specified platform types. This is a list of space-delimited
* platform types. See <https://support.google.com/webmasters/answer/80471?hl=en&ref_topic=4581190> for more detail
* @example "tv"
*/
platform?: string;
id?: string;
'platform:relationship'?: EnumAllowDeny;
}
/**
* Video price type - supports both lowercase and uppercase variants
* as allowed by the Google Video Sitemap specification
* @see https://developers.google.com/search/docs/advanced/sitemaps/video-sitemaps
*/
export type PriceType = 'rent' | 'purchase' | 'RENT' | 'PURCHASE';
/**
* Video resolution - supports both lowercase and uppercase variants
* as allowed by the Google Video Sitemap specification
* @see https://developers.google.com/search/docs/advanced/sitemaps/video-sitemaps
*/
export type Resolution = 'HD' | 'hd' | 'sd' | 'SD';
/**
* Sitemap video. <https://support.google.com/webmasters/answer/80471?hl=en&ref_topic=4581190>
*/
export interface VideoItem extends VideoItemBase {
/**
* An arbitrary string tag describing the video. Tags are generally very short descriptions of key concepts associated
* with a video or piece of content.
* @example ['Baking']
*/
tag: string[];
/**
* The rating of the video. Supported values are float numbers.
* @example 2.5
*/
rating?: number;
family_friendly?: EnumYesNo;
/**
* Indicates whether a subscription (either paid or free) is required to view
* the video. Allowed values are yes or no.
*/
requires_subscription?: EnumYesNo;
/**
* Indicates whether the video is a live stream. Supported values are yes or no.
*/
live?: EnumYesNo;
}
/**
* Sitemap video. <https://support.google.com/webmasters/answer/80471?hl=en&ref_topic=4581190>
*/
export interface VideoItemLoose extends VideoItemBase {
/**
* An arbitrary string tag describing the video. Tags are generally very short descriptions of key concepts associated
* with a video or piece of content.
* @example ['Baking']
*/
tag?: string | string[];
/**
* The rating of the video. Supported values are float numbers.
* @example 2.5
*/
rating?: string | number;
family_friendly?: EnumYesNo | boolean;
requires_subscription?: EnumYesNo | boolean;
/**
* Indicates whether the video is a live stream. Supported values are yes or no.
*/
live?: EnumYesNo | boolean;
}
/**
* https://support.google.com/webmasters/answer/189077
*/
export interface LinkItem {
/**
* @example 'en'
*/
lang: string;
/**
* @example 'en-us'
*/
hreflang?: string;
url: string;
}
export interface IndexItem {
url: string;
lastmod?: string;
}
interface SitemapItemBase {
lastmod?: string;
changefreq?: EnumChangefreq;
fullPrecisionPriority?: boolean;
priority?: number;
news?: NewsItem;
expires?: string;
androidLink?: string;
ampLink?: string;
url: string;
}
/**
* Strict options for individual sitemap entries
*/
export interface SitemapItem extends SitemapItemBase {
img: Img[];
video: VideoItem[];
links: LinkItem[];
}
/**
* Options for individual sitemap entries prior to normalization
*/
export interface SitemapItemLoose extends SitemapItemBase {
video?: VideoItemLoose | VideoItemLoose[];
img?: string | Img | (string | Img)[];
links?: LinkItem[];
lastmodfile?: string | Buffer | URL;
lastmodISO?: string;
lastmodrealtime?: boolean;
}
/**
* How to handle errors in passed in urls
*/
export declare enum ErrorLevel {
/**
* Validation will be skipped and nothing logged or thrown.
*/
SILENT = "silent",
/**
* If an invalid value is encountered, a console.warn will be called with details
*/
WARN = "warn",
/**
* An Error will be thrown on encountering invalid data.
*/
THROW = "throw"
}
export type ErrorHandler = (error: Error, level: ErrorLevel) => void;
export declare enum TagNames {
url = "url",
loc = "loc",
urlset = "urlset",
lastmod = "lastmod",
changefreq = "changefreq",
priority = "priority",
'video:thumbnail_loc' = "video:thumbnail_loc",
'video:video' = "video:video",
'video:title' = "video:title",
'video:description' = "video:description",
'video:tag' = "video:tag",
'video:duration' = "video:duration",
'video:player_loc' = "video:player_loc",
'video:content_loc' = "video:content_loc",
'image:image' = "image:image",
'image:loc' = "image:loc",
'image:geo_location' = "image:geo_location",
'image:license' = "image:license",
'image:title' = "image:title",
'image:caption' = "image:caption",
'video:requires_subscription' = "video:requires_subscription",
'video:publication_date' = "video:publication_date",
'video:id' = "video:id",
'video:restriction' = "video:restriction",
'video:family_friendly' = "video:family_friendly",
'video:view_count' = "video:view_count",
'video:uploader' = "video:uploader",
'video:expiration_date' = "video:expiration_date",
'video:platform' = "video:platform",
'video:price' = "video:price",
'video:rating' = "video:rating",
'video:category' = "video:category",
'video:live' = "video:live",
'video:gallery_loc' = "video:gallery_loc",
'news:news' = "news:news",
'news:publication' = "news:publication",
'news:name' = "news:name",
'news:access' = "news:access",
'news:genres' = "news:genres",
'news:publication_date' = "news:publication_date",
'news:title' = "news:title",
'news:keywords' = "news:keywords",
'news:stock_tickers' = "news:stock_tickers",
'news:language' = "news:language",
'mobile:mobile' = "mobile:mobile",
'xhtml:link' = "xhtml:link",
'expires' = "expires"
}
export declare enum IndexTagNames {
sitemap = "sitemap",
sitemapindex = "sitemapindex",
loc = "loc",
lastmod = "lastmod"
}
/**
* Generic object with string keys and any values
* Used for XML attribute building and other flexible data structures
*/
export interface StringObj {
[index: string]: any;
}
export {};

109
node_modules/sitemap/dist/cjs/lib/types.js generated vendored Normal file
View File

@@ -0,0 +1,109 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.IndexTagNames = exports.TagNames = exports.ErrorLevel = exports.EnumAllowDeny = exports.EnumYesNo = exports.EnumChangefreq = void 0;
/**
* How frequently the page is likely to change. This value provides general
* information to search engines and may not correlate exactly to how often they crawl the page. Please note that the
* value of this tag is considered a hint and not a command. See
* <https://www.sitemaps.org/protocol.html#xmlTagDefinitions> for the acceptable
* values
*/
var EnumChangefreq;
(function (EnumChangefreq) {
EnumChangefreq["DAILY"] = "daily";
EnumChangefreq["MONTHLY"] = "monthly";
EnumChangefreq["ALWAYS"] = "always";
EnumChangefreq["HOURLY"] = "hourly";
EnumChangefreq["WEEKLY"] = "weekly";
EnumChangefreq["YEARLY"] = "yearly";
EnumChangefreq["NEVER"] = "never";
})(EnumChangefreq || (exports.EnumChangefreq = EnumChangefreq = {}));
var EnumYesNo;
(function (EnumYesNo) {
EnumYesNo["YES"] = "YES";
EnumYesNo["NO"] = "NO";
EnumYesNo["Yes"] = "Yes";
EnumYesNo["No"] = "No";
EnumYesNo["yes"] = "yes";
EnumYesNo["no"] = "no";
})(EnumYesNo || (exports.EnumYesNo = EnumYesNo = {}));
var EnumAllowDeny;
(function (EnumAllowDeny) {
EnumAllowDeny["ALLOW"] = "allow";
EnumAllowDeny["DENY"] = "deny";
})(EnumAllowDeny || (exports.EnumAllowDeny = EnumAllowDeny = {}));
/**
* How to handle errors in passed in urls
*/
var ErrorLevel;
(function (ErrorLevel) {
/**
* Validation will be skipped and nothing logged or thrown.
*/
ErrorLevel["SILENT"] = "silent";
/**
* If an invalid value is encountered, a console.warn will be called with details
*/
ErrorLevel["WARN"] = "warn";
/**
* An Error will be thrown on encountering invalid data.
*/
ErrorLevel["THROW"] = "throw";
})(ErrorLevel || (exports.ErrorLevel = ErrorLevel = {}));
var TagNames;
(function (TagNames) {
TagNames["url"] = "url";
TagNames["loc"] = "loc";
TagNames["urlset"] = "urlset";
TagNames["lastmod"] = "lastmod";
TagNames["changefreq"] = "changefreq";
TagNames["priority"] = "priority";
TagNames["video:thumbnail_loc"] = "video:thumbnail_loc";
TagNames["video:video"] = "video:video";
TagNames["video:title"] = "video:title";
TagNames["video:description"] = "video:description";
TagNames["video:tag"] = "video:tag";
TagNames["video:duration"] = "video:duration";
TagNames["video:player_loc"] = "video:player_loc";
TagNames["video:content_loc"] = "video:content_loc";
TagNames["image:image"] = "image:image";
TagNames["image:loc"] = "image:loc";
TagNames["image:geo_location"] = "image:geo_location";
TagNames["image:license"] = "image:license";
TagNames["image:title"] = "image:title";
TagNames["image:caption"] = "image:caption";
TagNames["video:requires_subscription"] = "video:requires_subscription";
TagNames["video:publication_date"] = "video:publication_date";
TagNames["video:id"] = "video:id";
TagNames["video:restriction"] = "video:restriction";
TagNames["video:family_friendly"] = "video:family_friendly";
TagNames["video:view_count"] = "video:view_count";
TagNames["video:uploader"] = "video:uploader";
TagNames["video:expiration_date"] = "video:expiration_date";
TagNames["video:platform"] = "video:platform";
TagNames["video:price"] = "video:price";
TagNames["video:rating"] = "video:rating";
TagNames["video:category"] = "video:category";
TagNames["video:live"] = "video:live";
TagNames["video:gallery_loc"] = "video:gallery_loc";
TagNames["news:news"] = "news:news";
TagNames["news:publication"] = "news:publication";
TagNames["news:name"] = "news:name";
TagNames["news:access"] = "news:access";
TagNames["news:genres"] = "news:genres";
TagNames["news:publication_date"] = "news:publication_date";
TagNames["news:title"] = "news:title";
TagNames["news:keywords"] = "news:keywords";
TagNames["news:stock_tickers"] = "news:stock_tickers";
TagNames["news:language"] = "news:language";
TagNames["mobile:mobile"] = "mobile:mobile";
TagNames["xhtml:link"] = "xhtml:link";
TagNames["expires"] = "expires";
})(TagNames || (exports.TagNames = TagNames = {}));
var IndexTagNames;
(function (IndexTagNames) {
IndexTagNames["sitemap"] = "sitemap";
IndexTagNames["sitemapindex"] = "sitemapindex";
IndexTagNames["loc"] = "loc";
IndexTagNames["lastmod"] = "lastmod";
})(IndexTagNames || (exports.IndexTagNames = IndexTagNames = {}));

48
node_modules/sitemap/dist/cjs/lib/utils.d.ts generated vendored Normal file
View File

@@ -0,0 +1,48 @@
import { Readable, ReadableOptions, TransformOptions } from 'node:stream';
import { SitemapItem, SitemapItemLoose } from './types.js';
export { validateSMIOptions } from './validation.js';
/**
* Combines multiple streams into one
* @param streams the streams to combine
*/
export declare function mergeStreams(streams: Readable[], options?: TransformOptions): Readable;
export interface ReadlineStreamOptions extends ReadableOptions {
input: Readable;
}
/**
* Wraps node's ReadLine in a stream
*/
export declare class ReadlineStream extends Readable {
private _source;
constructor(options: ReadlineStreamOptions);
_read(size: number): void;
}
/**
* Takes a stream likely from fs.createReadStream('./path') and returns a stream
* of sitemap items
* @param stream a stream of line separated urls.
* @param opts.isJSON is the stream line separated JSON. leave undefined to guess
*/
export declare function lineSeparatedURLsToSitemapOptions(stream: Readable, { isJSON }?: {
isJSON?: boolean;
}): Readable;
/**
* Based on lodash's implementation of chunk.
*
* Copyright JS Foundation and other contributors <https://js.foundation/>
*
* Based on Underscore.js, copyright Jeremy Ashkenas,
* DocumentCloud and Investigative Reporters & Editors <http://underscorejs.org/>
*
* This software consists of voluntary contributions made by many
* individuals. For exact contribution history, see the revision history
* available at https://github.com/lodash/lodash
*/
export declare function chunk(array: any[], size?: number): any[];
/**
* Converts the passed in sitemap entry into one capable of being consumed by SitemapItem
* @param {string | SitemapItemLoose} elem the string or object to be converted
* @param {string} hostname
* @returns SitemapItemOptions a strict sitemap item option
*/
export declare function normalizeURL(elem: string | SitemapItemLoose, hostname?: string, lastmodDateOnly?: boolean): SitemapItem;

230
node_modules/sitemap/dist/cjs/lib/utils.js generated vendored Normal file
View File

@@ -0,0 +1,230 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.ReadlineStream = exports.validateSMIOptions = void 0;
exports.mergeStreams = mergeStreams;
exports.lineSeparatedURLsToSitemapOptions = lineSeparatedURLsToSitemapOptions;
exports.chunk = chunk;
exports.normalizeURL = normalizeURL;
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
const node_fs_1 = require("node:fs");
const node_stream_1 = require("node:stream");
const node_readline_1 = require("node:readline");
const node_url_1 = require("node:url");
const types_js_1 = require("./types.js");
// Re-export validateSMIOptions from validation.ts for backward compatibility
var validation_js_1 = require("./validation.js");
Object.defineProperty(exports, "validateSMIOptions", { enumerable: true, get: function () { return validation_js_1.validateSMIOptions; } });
/**
* Combines multiple streams into one
* @param streams the streams to combine
*/
function mergeStreams(streams, options) {
let pass = new node_stream_1.PassThrough(options);
let waiting = streams.length;
for (const stream of streams) {
pass = stream.pipe(pass, { end: false });
stream.once('end', () => --waiting === 0 && pass.emit('end'));
}
return pass;
}
/**
* Wraps node's ReadLine in a stream
*/
class ReadlineStream extends node_stream_1.Readable {
_source;
constructor(options) {
if (options.autoDestroy === undefined) {
options.autoDestroy = true;
}
options.objectMode = true;
super(options);
this._source = (0, node_readline_1.createInterface)({
input: options.input,
terminal: false,
crlfDelay: Infinity,
});
// Every time there's data, push it into the internal buffer.
this._source.on('line', (chunk) => {
// If push() returns false, then stop reading from source.
if (!this.push(chunk))
this._source.pause();
});
// When the source ends, push the EOF-signaling `null` chunk.
this._source.on('close', () => {
this.push(null);
});
}
// _read() will be called when the stream wants to pull more data in.
// The advisory size argument is ignored in this case.
_read(size) {
this._source.resume();
}
}
exports.ReadlineStream = ReadlineStream;
/**
* Takes a stream likely from fs.createReadStream('./path') and returns a stream
* of sitemap items
* @param stream a stream of line separated urls.
* @param opts.isJSON is the stream line separated JSON. leave undefined to guess
*/
function lineSeparatedURLsToSitemapOptions(stream, { isJSON } = {}) {
return new ReadlineStream({ input: stream }).pipe(new node_stream_1.Transform({
objectMode: true,
transform: (line, encoding, cb) => {
if (isJSON || (isJSON === undefined && line[0] === '{')) {
cb(null, JSON.parse(line));
}
else {
cb(null, line);
}
},
}));
}
/**
* Based on lodash's implementation of chunk.
*
* Copyright JS Foundation and other contributors <https://js.foundation/>
*
* Based on Underscore.js, copyright Jeremy Ashkenas,
* DocumentCloud and Investigative Reporters & Editors <http://underscorejs.org/>
*
* This software consists of voluntary contributions made by many
* individuals. For exact contribution history, see the revision history
* available at https://github.com/lodash/lodash
*/
/* eslint-disable @typescript-eslint/no-explicit-any */
function chunk(array, size = 1) {
size = Math.max(Math.trunc(size), 0);
const length = array ? array.length : 0;
if (!length || size < 1) {
return [];
}
const result = Array(Math.ceil(length / size));
let index = 0, resIndex = 0;
while (index < length) {
result[resIndex++] = array.slice(index, (index += size));
}
return result;
}
function boolToYESNO(bool) {
if (bool === undefined) {
return undefined;
}
if (typeof bool === 'boolean') {
return bool ? types_js_1.EnumYesNo.yes : types_js_1.EnumYesNo.no;
}
return bool;
}
/**
* Converts the passed in sitemap entry into one capable of being consumed by SitemapItem
* @param {string | SitemapItemLoose} elem the string or object to be converted
* @param {string} hostname
* @returns SitemapItemOptions a strict sitemap item option
*/
function normalizeURL(elem, hostname, lastmodDateOnly = false) {
// SitemapItem
// create object with url property
const smi = {
img: [],
video: [],
links: [],
url: '',
};
if (typeof elem === 'string') {
smi.url = new node_url_1.URL(elem, hostname).toString();
return smi;
}
const { url, img, links, video, lastmodfile, lastmodISO, lastmod, ...other } = elem;
Object.assign(smi, other);
smi.url = new node_url_1.URL(url, hostname).toString();
if (img) {
// prepend hostname to all image urls
smi.img = (Array.isArray(img) ? img : [img]).map((el) => typeof el === 'string'
? { url: new node_url_1.URL(el, hostname).toString() }
: { ...el, url: new node_url_1.URL(el.url, hostname).toString() });
}
if (links) {
smi.links = links.map((link) => ({
...link,
url: new node_url_1.URL(link.url, hostname).toString(),
}));
}
if (video) {
smi.video = (Array.isArray(video) ? video : [video]).map((video) => {
const nv = {
...video,
family_friendly: boolToYESNO(video.family_friendly),
live: boolToYESNO(video.live),
requires_subscription: boolToYESNO(video.requires_subscription),
tag: [],
rating: undefined,
};
if (video.tag !== undefined) {
nv.tag = !Array.isArray(video.tag) ? [video.tag] : video.tag;
}
if (video.rating !== undefined) {
if (typeof video.rating === 'string') {
const parsedRating = parseFloat(video.rating);
// Validate parsed rating is a valid number
if (Number.isNaN(parsedRating)) {
throw new Error(`Invalid video rating "${video.rating}" for URL "${elem.url}": must be a valid number`);
}
nv.rating = parsedRating;
}
else {
nv.rating = video.rating;
}
}
if (typeof video.view_count === 'string') {
const parsedViewCount = parseInt(video.view_count, 10);
// Validate parsed view count is a valid non-negative integer
if (Number.isNaN(parsedViewCount)) {
throw new Error(`Invalid video view_count "${video.view_count}" for URL "${elem.url}": must be a valid number`);
}
if (parsedViewCount < 0) {
throw new Error(`Invalid video view_count "${video.view_count}" for URL "${elem.url}": cannot be negative`);
}
nv.view_count = parsedViewCount;
}
else if (typeof video.view_count === 'number') {
nv.view_count = video.view_count;
}
return nv;
});
}
// If given a file to use for last modified date
if (lastmodfile) {
const { mtime } = (0, node_fs_1.statSync)(lastmodfile);
const lastmodDate = new Date(mtime);
// Validate date is valid
if (Number.isNaN(lastmodDate.getTime())) {
throw new Error(`Invalid date from file stats for URL "${smi.url}": file modification time is invalid`);
}
smi.lastmod = lastmodDate.toISOString();
// The date of last modification (YYYY-MM-DD)
}
else if (lastmodISO) {
const lastmodDate = new Date(lastmodISO);
// Validate date is valid
if (Number.isNaN(lastmodDate.getTime())) {
throw new Error(`Invalid lastmodISO "${lastmodISO}" for URL "${smi.url}": must be a valid date string`);
}
smi.lastmod = lastmodDate.toISOString();
}
else if (lastmod) {
const lastmodDate = new Date(lastmod);
// Validate date is valid
if (Number.isNaN(lastmodDate.getTime())) {
throw new Error(`Invalid lastmod "${lastmod}" for URL "${smi.url}": must be a valid date string`);
}
smi.lastmod = lastmodDate.toISOString();
}
if (lastmodDateOnly && smi.lastmod) {
smi.lastmod = smi.lastmod.slice(0, 10);
}
return smi;
}

94
node_modules/sitemap/dist/cjs/lib/validation.d.ts generated vendored Normal file
View File

@@ -0,0 +1,94 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
import { SitemapItem, ErrorLevel, EnumChangefreq, EnumYesNo, EnumAllowDeny, PriceType, Resolution, ErrorHandler } from './types.js';
export declare const validators: {
[index: string]: RegExp;
};
/**
* Type guard to check if a string is a valid price type
*/
export declare function isPriceType(pt: string | PriceType): pt is PriceType;
/**
* Type guard to check if a string is a valid resolution
*/
export declare function isResolution(res: string): res is Resolution;
export declare function isValidChangeFreq(freq: string): freq is EnumChangefreq;
/**
* Type guard to check if a string is a valid yes/no value
*/
export declare function isValidYesNo(yn: string): yn is EnumYesNo;
/**
* Type guard to check if a string is a valid allow/deny value
*/
export declare function isAllowDeny(ad: string): ad is EnumAllowDeny;
/**
* Validates that a URL is well-formed and meets security requirements
*
* Security: This function enforces that URLs use safe protocols (http/https),
* are within reasonable length limits (2048 chars per sitemaps.org spec),
* and can be properly parsed. This prevents protocol injection attacks and
* ensures compliance with sitemap specifications.
*
* @param url - The URL to validate
* @param paramName - The parameter name for error messages
* @throws {InvalidHostnameError} If the URL is invalid
*/
export declare function validateURL(url: string, paramName: string): void;
/**
* Validates that a path doesn't contain path traversal sequences
*
* Security: This function prevents path traversal attacks by detecting
* any occurrence of '..' in the path, whether it appears as '../', '/..',
* or standalone. This prevents attackers from accessing files outside
* the intended directory structure.
*
* @param path - The path to validate
* @param paramName - The parameter name for error messages
* @throws {InvalidPathError} If the path contains traversal sequences
*/
export declare function validatePath(path: string, paramName: string): void;
/**
* Validates that a public base path is safe for URL construction
*
* Security: This function prevents path traversal attacks and validates
* that the path is safe for use in URL construction within sitemap indexes.
* It checks for '..' sequences, null bytes, and invalid whitespace that
* could be used to manipulate URL structure or inject malicious content.
*
* @param publicBasePath - The public base path to validate
* @throws {InvalidPublicBasePathError} If the path is invalid
*/
export declare function validatePublicBasePath(publicBasePath: string): void;
/**
* Validates that a limit is within acceptable range per sitemaps.org spec
*
* Security: This function enforces sitemap size limits (1-50,000 URLs per
* sitemap) as specified by sitemaps.org. This prevents resource exhaustion
* attacks and ensures compliance with search engine requirements.
*
* @param limit - The limit to validate
* @throws {InvalidLimitError} If the limit is out of range
*/
export declare function validateLimit(limit: number): void;
/**
* Validates that an XSL URL is safe and well-formed
*
* Security: This function validates XSL stylesheet URLs to prevent
* injection attacks. It blocks dangerous protocols and content patterns
* that could be used for XSS or other attacks. The validation uses
* case-insensitive matching to catch obfuscated attacks.
*
* @param xslUrl - The XSL URL to validate
* @throws {InvalidXSLUrlError} If the URL is invalid
*/
export declare function validateXSLUrl(xslUrl: string): void;
/**
* Verifies all data passed in will comply with sitemap spec.
* @param conf Options to validate
* @param level logging level
* @param errorHandler error handling func
*/
export declare function validateSMIOptions(conf: SitemapItem, level?: ErrorLevel, errorHandler?: ErrorHandler): SitemapItem;

398
node_modules/sitemap/dist/cjs/lib/validation.js generated vendored Normal file
View File

@@ -0,0 +1,398 @@
"use strict";
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.validators = void 0;
exports.isPriceType = isPriceType;
exports.isResolution = isResolution;
exports.isValidChangeFreq = isValidChangeFreq;
exports.isValidYesNo = isValidYesNo;
exports.isAllowDeny = isAllowDeny;
exports.validateURL = validateURL;
exports.validatePath = validatePath;
exports.validatePublicBasePath = validatePublicBasePath;
exports.validateLimit = validateLimit;
exports.validateXSLUrl = validateXSLUrl;
exports.validateSMIOptions = validateSMIOptions;
const errors_js_1 = require("./errors.js");
const types_js_1 = require("./types.js");
const constants_js_1 = require("./constants.js");
const node_path_1 = require("node:path");
/**
* Validator regular expressions for various sitemap fields
*/
const allowDeny = /^(?:allow|deny)$/;
exports.validators = {
'price:currency': /^[A-Z]{3}$/,
'price:type': /^(?:rent|purchase|RENT|PURCHASE)$/,
'price:resolution': /^(?:HD|hd|sd|SD)$/,
'platform:relationship': allowDeny,
'restriction:relationship': allowDeny,
restriction: /^([A-Z]{2}( +[A-Z]{2})*)?$/,
platform: /^((web|mobile|tv)( (web|mobile|tv))*)?$/,
// Language codes: zh-cn, zh-tw, or ISO 639 2-3 letter codes
language: /^(zh-cn|zh-tw|[a-z]{2,3})$/,
genres: /^(PressRelease|Satire|Blog|OpEd|Opinion|UserGenerated)(, *(PressRelease|Satire|Blog|OpEd|Opinion|UserGenerated))*$/,
stock_tickers: /^(\w+:\w+(, *\w+:\w+){0,4})?$/,
};
/**
* Type guard to check if a string is a valid price type
*/
function isPriceType(pt) {
return exports.validators['price:type'].test(pt);
}
/**
* Type guard to check if a string is a valid resolution
*/
function isResolution(res) {
return exports.validators['price:resolution'].test(res);
}
/**
* Type guard to check if a string is a valid changefreq value
*/
const CHANGEFREQ = Object.values(types_js_1.EnumChangefreq);
function isValidChangeFreq(freq) {
return CHANGEFREQ.includes(freq);
}
/**
* Type guard to check if a string is a valid yes/no value
*/
function isValidYesNo(yn) {
return /^YES|NO|[Yy]es|[Nn]o$/.test(yn);
}
/**
* Type guard to check if a string is a valid allow/deny value
*/
function isAllowDeny(ad) {
return allowDeny.test(ad);
}
/**
* Validates that a URL is well-formed and meets security requirements
*
* Security: This function enforces that URLs use safe protocols (http/https),
* are within reasonable length limits (2048 chars per sitemaps.org spec),
* and can be properly parsed. This prevents protocol injection attacks and
* ensures compliance with sitemap specifications.
*
* @param url - The URL to validate
* @param paramName - The parameter name for error messages
* @throws {InvalidHostnameError} If the URL is invalid
*/
function validateURL(url, paramName) {
if (!url || typeof url !== 'string') {
throw new errors_js_1.InvalidHostnameError(url, `${paramName} must be a non-empty string`);
}
if (url.length > constants_js_1.LIMITS.MAX_URL_LENGTH) {
throw new errors_js_1.InvalidHostnameError(url, `${paramName} exceeds maximum length of ${constants_js_1.LIMITS.MAX_URL_LENGTH} characters`);
}
if (!constants_js_1.LIMITS.URL_PROTOCOL_REGEX.test(url)) {
throw new errors_js_1.InvalidHostnameError(url, `${paramName} must use http:// or https:// protocol`);
}
// Validate URL can be parsed
try {
new URL(url);
}
catch (err) {
throw new errors_js_1.InvalidHostnameError(url, `${paramName} is not a valid URL: ${err instanceof Error ? err.message : String(err)}`);
}
}
/**
* Validates that a path doesn't contain path traversal sequences
*
* Security: This function prevents path traversal attacks by detecting
* any occurrence of '..' in the path, whether it appears as '../', '/..',
* or standalone. This prevents attackers from accessing files outside
* the intended directory structure.
*
* @param path - The path to validate
* @param paramName - The parameter name for error messages
* @throws {InvalidPathError} If the path contains traversal sequences
*/
function validatePath(path, paramName) {
if (!path || typeof path !== 'string') {
throw new errors_js_1.InvalidPathError(path, `${paramName} must be a non-empty string`);
}
// Reject absolute paths to prevent arbitrary write location when caller input
// reaches destinationDir (BB-04)
if ((0, node_path_1.isAbsolute)(path)) {
throw new errors_js_1.InvalidPathError(path, `${paramName} must be a relative path (absolute paths are not allowed)`);
}
// Check for path traversal sequences - must check before and after normalization
// to catch both Windows-style (\) and Unix-style (/) separators
if (path.includes('..')) {
throw new errors_js_1.InvalidPathError(path, `${paramName} contains path traversal sequence (..)`);
}
// Additional check after normalization to catch encoded or obfuscated attempts
const normalizedPath = path.replace(/\\/g, '/');
const pathComponents = normalizedPath.split('/').filter((p) => p.length > 0);
if (pathComponents.includes('..')) {
throw new errors_js_1.InvalidPathError(path, `${paramName} contains path traversal sequence (..)`);
}
// Check for null bytes (security issue in some contexts)
if (path.includes('\0')) {
throw new errors_js_1.InvalidPathError(path, `${paramName} contains null byte character`);
}
}
/**
* Validates that a public base path is safe for URL construction
*
* Security: This function prevents path traversal attacks and validates
* that the path is safe for use in URL construction within sitemap indexes.
* It checks for '..' sequences, null bytes, and invalid whitespace that
* could be used to manipulate URL structure or inject malicious content.
*
* @param publicBasePath - The public base path to validate
* @throws {InvalidPublicBasePathError} If the path is invalid
*/
function validatePublicBasePath(publicBasePath) {
if (!publicBasePath || typeof publicBasePath !== 'string') {
throw new errors_js_1.InvalidPublicBasePathError(publicBasePath, 'must be a non-empty string');
}
// Check for path traversal - check the raw string first
if (publicBasePath.includes('..')) {
throw new errors_js_1.InvalidPublicBasePathError(publicBasePath, 'contains path traversal sequence (..)');
}
// Additional check for path components after normalization
const normalizedPath = publicBasePath.replace(/\\/g, '/');
const pathComponents = normalizedPath.split('/').filter((p) => p.length > 0);
if (pathComponents.includes('..')) {
throw new errors_js_1.InvalidPublicBasePathError(publicBasePath, 'contains path traversal sequence (..)');
}
// Check for null bytes
if (publicBasePath.includes('\0')) {
throw new errors_js_1.InvalidPublicBasePathError(publicBasePath, 'contains null byte character');
}
// Check for potentially dangerous characters that could break URL construction
if (/[\r\n\t]/.test(publicBasePath)) {
throw new errors_js_1.InvalidPublicBasePathError(publicBasePath, 'contains invalid whitespace characters');
}
}
/**
* Validates that a limit is within acceptable range per sitemaps.org spec
*
* Security: This function enforces sitemap size limits (1-50,000 URLs per
* sitemap) as specified by sitemaps.org. This prevents resource exhaustion
* attacks and ensures compliance with search engine requirements.
*
* @param limit - The limit to validate
* @throws {InvalidLimitError} If the limit is out of range
*/
function validateLimit(limit) {
if (typeof limit !== 'number' ||
!Number.isFinite(limit) ||
Number.isNaN(limit)) {
throw new errors_js_1.InvalidLimitError(limit);
}
if (limit < constants_js_1.LIMITS.MIN_SITEMAP_ITEM_LIMIT ||
limit > constants_js_1.LIMITS.MAX_SITEMAP_ITEM_LIMIT) {
throw new errors_js_1.InvalidLimitError(limit);
}
// Ensure it's an integer
if (!Number.isInteger(limit)) {
throw new errors_js_1.InvalidLimitError(limit);
}
}
/**
* Validates that an XSL URL is safe and well-formed
*
* Security: This function validates XSL stylesheet URLs to prevent
* injection attacks. It blocks dangerous protocols and content patterns
* that could be used for XSS or other attacks. The validation uses
* case-insensitive matching to catch obfuscated attacks.
*
* @param xslUrl - The XSL URL to validate
* @throws {InvalidXSLUrlError} If the URL is invalid
*/
function validateXSLUrl(xslUrl) {
if (!xslUrl || typeof xslUrl !== 'string') {
throw new errors_js_1.InvalidXSLUrlError(xslUrl, 'must be a non-empty string');
}
if (xslUrl.length > constants_js_1.LIMITS.MAX_URL_LENGTH) {
throw new errors_js_1.InvalidXSLUrlError(xslUrl, `exceeds maximum length of ${constants_js_1.LIMITS.MAX_URL_LENGTH} characters`);
}
if (!constants_js_1.LIMITS.URL_PROTOCOL_REGEX.test(xslUrl)) {
throw new errors_js_1.InvalidXSLUrlError(xslUrl, 'must use http:// or https:// protocol');
}
// Validate URL can be parsed
try {
new URL(xslUrl);
}
catch (err) {
throw new errors_js_1.InvalidXSLUrlError(xslUrl, `is not a valid URL: ${err instanceof Error ? err.message : String(err)}`);
}
// Check for potentially dangerous content (case-insensitive)
const lowerUrl = xslUrl.toLowerCase();
// Block dangerous HTML/script content
if (lowerUrl.includes('<script')) {
throw new errors_js_1.InvalidXSLUrlError(xslUrl, 'contains potentially malicious content (<script tag)');
}
// Block dangerous protocols (already checked http/https above, but double-check for encoded variants)
const dangerousProtocols = [
'javascript:',
'data:',
'vbscript:',
'file:',
'about:',
];
for (const protocol of dangerousProtocols) {
if (lowerUrl.includes(protocol)) {
throw new errors_js_1.InvalidXSLUrlError(xslUrl, `contains dangerous protocol: ${protocol}`);
}
}
// Check for URL-encoded variants of dangerous patterns
// %3C = '<', %3E = '>', %3A = ':'
const encodedPatterns = [
'%3cscript', // <script
'%3c%73%63%72%69%70%74', // <script (fully encoded)
'javascript%3a', // javascript:
'data%3a', // data:
];
for (const pattern of encodedPatterns) {
if (lowerUrl.includes(pattern)) {
throw new errors_js_1.InvalidXSLUrlError(xslUrl, 'contains URL-encoded malicious content');
}
}
// Reject unencoded XML special characters — these must be percent-encoded in
// valid URLs and could break out of XML attribute context if left raw.
if (xslUrl.includes('"') || xslUrl.includes('<') || xslUrl.includes('>')) {
throw new errors_js_1.InvalidXSLUrlError(xslUrl, 'contains unencoded XML special characters (" < >); percent-encode them in the URL');
}
}
/**
* Internal helper to validate fields against their validators
*/
function validate(subject, name, url, level) {
Object.keys(subject).forEach((key) => {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const val = subject[key];
if (exports.validators[key] && !exports.validators[key].test(val)) {
if (level === types_js_1.ErrorLevel.THROW) {
throw new errors_js_1.InvalidAttrValue(key, val, exports.validators[key]);
}
else {
console.warn(`${url}: ${name} key ${key} has invalid value: ${val}`);
}
}
});
}
/**
* Internal helper to handle errors based on error level
*/
function handleError(error, level) {
if (level === types_js_1.ErrorLevel.THROW) {
throw error;
}
else if (level === types_js_1.ErrorLevel.WARN) {
console.warn(error.name, error.message);
}
}
/**
* Verifies all data passed in will comply with sitemap spec.
* @param conf Options to validate
* @param level logging level
* @param errorHandler error handling func
*/
function validateSMIOptions(conf, level = types_js_1.ErrorLevel.WARN, errorHandler = handleError) {
if (!conf) {
throw new errors_js_1.NoConfigError();
}
if (level === types_js_1.ErrorLevel.SILENT) {
return conf;
}
const { url, changefreq, priority, news, video } = conf;
if (!url) {
errorHandler(new errors_js_1.NoURLError(), level);
}
if (changefreq) {
if (!isValidChangeFreq(changefreq)) {
errorHandler(new errors_js_1.ChangeFreqInvalidError(url, changefreq), level);
}
}
if (priority) {
if (!(priority >= 0.0 && priority <= 1.0)) {
errorHandler(new errors_js_1.PriorityInvalidError(url, priority), level);
}
}
if (news) {
if (news.access &&
news.access !== 'Registration' &&
news.access !== 'Subscription') {
errorHandler(new errors_js_1.InvalidNewsAccessValue(url, news.access), level);
}
if (!news.publication ||
!news.publication.name ||
!news.publication.language ||
!news.publication_date ||
!news.title) {
errorHandler(new errors_js_1.InvalidNewsFormat(url), level);
}
validate(news, 'news', url, level);
validate(news.publication, 'publication', url, level);
}
if (video) {
video.forEach((vid) => {
if (vid.duration !== undefined) {
if (vid.duration < 0 || vid.duration > 28800) {
errorHandler(new errors_js_1.InvalidVideoDuration(url, vid.duration), level);
}
}
if (vid.rating !== undefined && (vid.rating < 0 || vid.rating > 5)) {
errorHandler(new errors_js_1.InvalidVideoRating(url, vid.title, vid.rating), level);
}
if (typeof vid !== 'object' ||
!vid.thumbnail_loc ||
!vid.title ||
!vid.description) {
// has to be an object and include required categories https://support.google.com/webmasters/answer/80471?hl=en&ref_topic=4581190
errorHandler(new errors_js_1.InvalidVideoFormat(url), level);
}
if (vid.title.length > 100) {
errorHandler(new errors_js_1.InvalidVideoTitle(url, vid.title.length), level);
}
if (vid.description.length > 2048) {
errorHandler(new errors_js_1.InvalidVideoDescription(url, vid.description.length), level);
}
if (vid.view_count !== undefined && vid.view_count < 0) {
errorHandler(new errors_js_1.InvalidVideoViewCount(url, vid.view_count), level);
}
if (vid.tag.length > 32) {
errorHandler(new errors_js_1.InvalidVideoTagCount(url, vid.tag.length), level);
}
if (vid.category !== undefined && vid.category?.length > 256) {
errorHandler(new errors_js_1.InvalidVideoCategory(url, vid.category.length), level);
}
if (vid.family_friendly !== undefined &&
!isValidYesNo(vid.family_friendly)) {
errorHandler(new errors_js_1.InvalidVideoFamilyFriendly(url, vid.family_friendly), level);
}
if (vid.restriction) {
if (!exports.validators.restriction.test(vid.restriction)) {
errorHandler(new errors_js_1.InvalidVideoRestriction(url, vid.restriction), level);
}
if (!vid['restriction:relationship'] ||
!isAllowDeny(vid['restriction:relationship'])) {
errorHandler(new errors_js_1.InvalidVideoRestrictionRelationship(url, vid['restriction:relationship']), level);
}
}
// TODO price element should be unbounded
if ((vid.price === '' && vid['price:type'] === undefined) ||
(vid['price:type'] !== undefined && !isPriceType(vid['price:type']))) {
errorHandler(new errors_js_1.InvalidVideoPriceType(url, vid['price:type'], vid.price), level);
}
if (vid['price:resolution'] !== undefined &&
!isResolution(vid['price:resolution'])) {
errorHandler(new errors_js_1.InvalidVideoResolution(url, vid['price:resolution']), level);
}
if (vid['price:currency'] !== undefined &&
!exports.validators['price:currency'].test(vid['price:currency'])) {
errorHandler(new errors_js_1.InvalidVideoPriceCurrency(url, vid['price:currency']), level);
}
validate(vid, 'video', url, level);
});
}
return conf;
}

12
node_modules/sitemap/dist/cjs/lib/xmllint.d.ts generated vendored Normal file
View File

@@ -0,0 +1,12 @@
import { Readable } from 'node:stream';
/**
* Verify the passed in xml is valid. Requires xmllib be installed
*
* Security: This function always pipes XML content via stdin to prevent
* command injection vulnerabilities. Never pass user-controlled strings
* as file path arguments to xmllint.
*
* @param xml what you want validated (string or Readable stream)
* @return {Promise<void>} resolves on valid rejects [error stderr]
*/
export declare function xmlLint(xml: string | Readable): Promise<void>;

81
node_modules/sitemap/dist/cjs/lib/xmllint.js generated vendored Normal file
View File

@@ -0,0 +1,81 @@
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.xmlLint = xmlLint;
const node_fs_1 = require("node:fs");
const node_path_1 = require("node:path");
const node_child_process_1 = require("node:child_process");
const errors_js_1 = require("./errors.js");
/**
* Finds the `schema` directory with robust path resolution.
* Searches from the project root directory using process.cwd().
* This works correctly regardless of whether the code is running from:
* - Source: lib/xmllint.ts
* - ESM build: dist/esm/lib/xmllint.js
* - CJS build: dist/cjs/lib/xmllint.js
* - Test environment
*
* @throws {Error} if the schema directory is not found
* @returns {string} the path to the schema directory
*/
function findSchemaDir() {
// Search for schema directory from project root
// This works in test, build, and source environments
const possiblePaths = [
(0, node_path_1.resolve)(process.cwd(), 'schema'), // From project root
(0, node_path_1.resolve)(process.cwd(), '..', 'schema'), // One level up
(0, node_path_1.resolve)(process.cwd(), '..', '..', 'schema'), // Two levels up
];
for (const schemaPath of possiblePaths) {
if ((0, node_fs_1.existsSync)(schemaPath)) {
return schemaPath;
}
}
throw new Error(`Schema directory not found. Searched paths: ${possiblePaths.join(', ')}`);
}
/**
* Verify the passed in xml is valid. Requires xmllib be installed
*
* Security: This function always pipes XML content via stdin to prevent
* command injection vulnerabilities. Never pass user-controlled strings
* as file path arguments to xmllint.
*
* @param xml what you want validated (string or Readable stream)
* @return {Promise<void>} resolves on valid rejects [error stderr]
*/
function xmlLint(xml) {
const args = [
'--schema',
(0, node_path_1.resolve)(findSchemaDir(), 'all.xsd'),
'--noout',
'-', // Always read from stdin for security
];
return new Promise((resolve, reject) => {
(0, node_child_process_1.execFile)('which', ['xmllint'], (error, stdout, stderr) => {
if (error) {
reject([new errors_js_1.XMLLintUnavailable()]);
return;
}
const xmllint = (0, node_child_process_1.execFile)('xmllint', args, (error, stdout, stderr) => {
if (error) {
reject([error, stderr]);
}
resolve();
});
// Always pipe XML content via stdin for security
if (xmllint.stdin) {
if (typeof xml === 'string') {
// Convert string to stream and pipe to stdin
xmllint.stdin.write(xml);
xmllint.stdin.end();
}
else if (xml) {
// Pipe readable stream to stdin
xml.pipe(xmllint.stdin);
}
}
if (xmllint.stdout) {
xmllint.stdout.unpipe();
}
});
});
}

1
node_modules/sitemap/dist/cjs/package.json generated vendored Normal file
View File

@@ -0,0 +1 @@
{"type":"commonjs"}

2
node_modules/sitemap/dist/esm/cli.d.ts generated vendored Normal file
View File

@@ -0,0 +1,2 @@
#!/usr/bin/env node
export {};

156
node_modules/sitemap/dist/esm/cli.js generated vendored Executable file
View File

@@ -0,0 +1,156 @@
#!/usr/bin/env node
import { createReadStream, createWriteStream } from 'node:fs';
import { readFileSync } from 'node:fs';
import { resolve } from 'node:path';
import { xmlLint } from './lib/xmllint.js';
import { XMLLintUnavailable } from './lib/errors.js';
import { ObjectStreamToJSON, XMLToSitemapItemStream, } from './lib/sitemap-parser.js';
import { lineSeparatedURLsToSitemapOptions } from './lib/utils.js';
import { SitemapStream } from './lib/sitemap-stream.js';
import { SitemapAndIndexStream } from './lib/sitemap-index-stream.js';
import { URL } from 'node:url';
import { createGzip } from 'node:zlib';
import { ErrorLevel } from './lib/types.js';
import arg from 'arg';
// Read package.json from the project root (one level up from dist/esm or dist/cjs)
// In ESM, __dirname is not defined, so we use import.meta.url
// In CJS, __dirname is defined and import.meta is not available
let currentDir;
try {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore - __dirname may not be defined in ESM
currentDir = __dirname;
}
catch {
// ESM fallback using import.meta.url
currentDir = new URL('.', import.meta.url).pathname;
}
const packageJson = JSON.parse(readFileSync(resolve(currentDir, '../../package.json'), 'utf8'));
const pickStreamOrArg = (argv) => {
if (!argv._.length) {
return process.stdin;
}
else {
return createReadStream(argv._[0], { encoding: 'utf8' });
}
};
const argSpec = {
'--help': Boolean,
'--version': Boolean,
'--validate': Boolean,
'--index': Boolean,
'--index-base-url': String,
'--limit': Number,
'--parse': Boolean,
'--single-line-json': Boolean,
'--prepend': String,
'--gzip': Boolean,
'-h': '--help',
};
const argv = arg(argSpec);
function getStream() {
if (argv._ && argv._.length) {
return createReadStream(argv._[0]);
}
else {
console.warn('Reading from stdin. If you are not piping anything in, this command is not doing anything');
return process.stdin;
}
}
if (argv['--version']) {
console.log(packageJson.version);
}
else if (argv['--help']) {
console.log(`
Turn a list of urls into a sitemap xml.
Options:
--help Print this text
--version Print the version
--validate Ensure the passed in file is conforms to the sitemap spec
--index Create an index and stream that out. Writes out sitemaps along the way.
--index-base-url Base url the sitemaps will be hosted eg. https://example.com/sitemaps/
--limit=45000 Set a custom limit to the items per sitemap
--parse Parse fed xml and spit out config
--prepend=sitemap.xml Prepend the streamed in sitemap configs to sitemap.xml
--gzip Compress output
--single-line-json When used with parse, it spits out each entry as json rather than the whole json.
# examples
Generate a sitemap index file as well as sitemaps
npx sitemap --gzip --index --index-base-url https://example.com/path/to/sitemaps/ < listofurls.txt > sitemap-index.xml.gz
Add to a sitemap
npx sitemap --prepend sitemap.xml < listofurls.json
Turn an existing sitemap into configuration understood by the sitemap library
npx sitemap --parse sitemap.xml
Use XMLLib to validate your sitemap (requires xmllib)
npx sitemap --validate sitemap.xml
`);
}
else if (argv['--parse']) {
let oStream = getStream()
.pipe(new XMLToSitemapItemStream({ level: ErrorLevel.THROW }))
.pipe(new ObjectStreamToJSON({ lineSeparated: !argv['--single-line-json'] }));
if (argv['--gzip']) {
oStream = oStream.pipe(createGzip());
}
oStream.pipe(process.stdout);
}
else if (argv['--validate']) {
xmlLint(getStream())
.then(() => console.log('valid'))
.catch(([error, stderr]) => {
if (error instanceof XMLLintUnavailable) {
console.error(error.message);
return;
}
else {
console.log(stderr);
}
});
}
else if (argv['--index']) {
const limit = argv['--limit'];
const baseURL = argv['--index-base-url'];
if (!baseURL) {
throw new Error("You must specify where the sitemaps will be hosted. use --index-base-url 'https://example.com/path'");
}
const sms = new SitemapAndIndexStream({
limit,
getSitemapStream: (i) => {
const sm = new SitemapStream();
const path = `./sitemap-${i}.xml`;
let ws;
if (argv['--gzip']) {
ws = sm.pipe(createGzip()).pipe(createWriteStream(path));
}
else {
ws = sm.pipe(createWriteStream(path));
}
return [new URL(path, baseURL).toString(), sm, ws];
},
});
let oStream = lineSeparatedURLsToSitemapOptions(pickStreamOrArg(argv)).pipe(sms);
if (argv['--gzip']) {
oStream = oStream.pipe(createGzip());
}
oStream.pipe(process.stdout);
}
else {
const sms = new SitemapStream();
if (argv['--prepend']) {
createReadStream(argv['--prepend'])
.pipe(new XMLToSitemapItemStream())
.pipe(sms);
}
const oStream = lineSeparatedURLsToSitemapOptions(pickStreamOrArg(argv)).pipe(sms);
if (argv['--gzip']) {
oStream.pipe(createGzip()).pipe(process.stdout);
}
else {
oStream.pipe(process.stdout);
}
}

17
node_modules/sitemap/dist/esm/index.d.ts generated vendored Normal file
View File

@@ -0,0 +1,17 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
export { SitemapItemStream, SitemapItemStreamOptions, } from './lib/sitemap-item-stream.js';
export { IndexTagNames, SitemapIndexStream, SitemapIndexStreamOptions, SitemapAndIndexStream, SitemapAndIndexStreamOptions, } from './lib/sitemap-index-stream.js';
export { streamToPromise, SitemapStream, SitemapStreamOptions, } from './lib/sitemap-stream.js';
export * from './lib/errors.js';
export * from './lib/types.js';
export { lineSeparatedURLsToSitemapOptions, mergeStreams, validateSMIOptions, normalizeURL, ReadlineStream, ReadlineStreamOptions, } from './lib/utils.js';
export { xmlLint } from './lib/xmllint.js';
export { parseSitemap, XMLToSitemapItemStream, XMLToSitemapItemStreamOptions, ObjectStreamToJSON, ObjectStreamToJSONOptions, } from './lib/sitemap-parser.js';
export { parseSitemapIndex, XMLToSitemapIndexStream, XMLToSitemapIndexItemStreamOptions, IndexObjectStreamToJSON, IndexObjectStreamToJSONOptions, } from './lib/sitemap-index-parser.js';
export { simpleSitemapAndIndex, SimpleSitemapAndIndexOptions, } from './lib/sitemap-simple.js';
export { validateURL, validatePath, validateLimit, validatePublicBasePath, validateXSLUrl, validators, isPriceType, isResolution, isValidChangeFreq, isValidYesNo, isAllowDeny, } from './lib/validation.js';
export { LIMITS, DEFAULT_SITEMAP_ITEM_LIMIT } from './lib/constants.js';

17
node_modules/sitemap/dist/esm/index.js generated vendored Normal file
View File

@@ -0,0 +1,17 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
export { SitemapItemStream, } from './lib/sitemap-item-stream.js';
export { IndexTagNames, SitemapIndexStream, SitemapAndIndexStream, } from './lib/sitemap-index-stream.js';
export { streamToPromise, SitemapStream, } from './lib/sitemap-stream.js';
export * from './lib/errors.js';
export * from './lib/types.js';
export { lineSeparatedURLsToSitemapOptions, mergeStreams, validateSMIOptions, normalizeURL, ReadlineStream, } from './lib/utils.js';
export { xmlLint } from './lib/xmllint.js';
export { parseSitemap, XMLToSitemapItemStream, ObjectStreamToJSON, } from './lib/sitemap-parser.js';
export { parseSitemapIndex, XMLToSitemapIndexStream, IndexObjectStreamToJSON, } from './lib/sitemap-index-parser.js';
export { simpleSitemapAndIndex, } from './lib/sitemap-simple.js';
export { validateURL, validatePath, validateLimit, validatePublicBasePath, validateXSLUrl, validators, isPriceType, isResolution, isValidChangeFreq, isValidYesNo, isAllowDeny, } from './lib/validation.js';
export { LIMITS, DEFAULT_SITEMAP_ITEM_LIMIT } from './lib/constants.js';

49
node_modules/sitemap/dist/esm/lib/constants.d.ts generated vendored Normal file
View File

@@ -0,0 +1,49 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
/**
* Shared constants used across the sitemap library
* This file serves as a single source of truth for limits and validation patterns
*/
/**
* Security limits for sitemap generation and parsing
*
* These limits are based on:
* - sitemaps.org protocol specification
* - Security best practices to prevent DoS and injection attacks
* - Google's sitemap extension specifications
*
* @see https://www.sitemaps.org/protocol.html
* @see https://developers.google.com/search/docs/advanced/sitemaps/build-sitemap
*/
export declare const LIMITS: {
readonly MAX_URL_LENGTH: 2048;
readonly URL_PROTOCOL_REGEX: RegExp;
readonly MIN_SITEMAP_ITEM_LIMIT: 1;
readonly MAX_SITEMAP_ITEM_LIMIT: 50000;
readonly MAX_VIDEO_TITLE_LENGTH: 100;
readonly MAX_VIDEO_DESCRIPTION_LENGTH: 2048;
readonly MAX_VIDEO_CATEGORY_LENGTH: 256;
readonly MAX_TAGS_PER_VIDEO: 32;
readonly MAX_NEWS_TITLE_LENGTH: 200;
readonly MAX_NEWS_NAME_LENGTH: 256;
readonly MAX_IMAGE_CAPTION_LENGTH: 512;
readonly MAX_IMAGE_TITLE_LENGTH: 512;
readonly MAX_IMAGES_PER_URL: 1000;
readonly MAX_VIDEOS_PER_URL: 100;
readonly MAX_LINKS_PER_URL: 100;
readonly MAX_URL_ENTRIES: 50000;
readonly ISO_DATE_REGEX: RegExp;
readonly MAX_CUSTOM_NAMESPACES: 20;
readonly MAX_NAMESPACE_LENGTH: 512;
readonly MAX_PARSER_ERRORS: 100;
};
/**
* Default maximum number of items in each sitemap XML file
* Set below the max to leave room for URLs added during processing
*
* @see https://www.sitemaps.org/protocol.html#index
*/
export declare const DEFAULT_SITEMAP_ITEM_LIMIT = 45000;

60
node_modules/sitemap/dist/esm/lib/constants.js generated vendored Normal file
View File

@@ -0,0 +1,60 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
/**
* Shared constants used across the sitemap library
* This file serves as a single source of truth for limits and validation patterns
*/
/**
* Security limits for sitemap generation and parsing
*
* These limits are based on:
* - sitemaps.org protocol specification
* - Security best practices to prevent DoS and injection attacks
* - Google's sitemap extension specifications
*
* @see https://www.sitemaps.org/protocol.html
* @see https://developers.google.com/search/docs/advanced/sitemaps/build-sitemap
*/
export const LIMITS = {
// URL constraints per sitemaps.org spec
MAX_URL_LENGTH: 2048,
URL_PROTOCOL_REGEX: /^https?:\/\//i,
// Sitemap size limits per sitemaps.org spec
MIN_SITEMAP_ITEM_LIMIT: 1,
MAX_SITEMAP_ITEM_LIMIT: 50000,
// Video field length constraints per Google spec
MAX_VIDEO_TITLE_LENGTH: 100,
MAX_VIDEO_DESCRIPTION_LENGTH: 2048,
MAX_VIDEO_CATEGORY_LENGTH: 256,
MAX_TAGS_PER_VIDEO: 32,
// News field length constraints per Google spec
MAX_NEWS_TITLE_LENGTH: 200,
MAX_NEWS_NAME_LENGTH: 256,
// Image field length constraints per Google spec
MAX_IMAGE_CAPTION_LENGTH: 512,
MAX_IMAGE_TITLE_LENGTH: 512,
// Limits on number of items per URL entry
MAX_IMAGES_PER_URL: 1000,
MAX_VIDEOS_PER_URL: 100,
MAX_LINKS_PER_URL: 100,
// Total entries in a sitemap
MAX_URL_ENTRIES: 50000,
// Date validation - ISO 8601 / W3C format
ISO_DATE_REGEX: /^\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{3})?([+-]\d{2}:\d{2}|Z)?)?$/,
// Custom namespace limits to prevent DoS
MAX_CUSTOM_NAMESPACES: 20,
MAX_NAMESPACE_LENGTH: 512,
// Cap on stored parser errors to prevent memory DoS (BB-03)
// Errors beyond this limit are counted in errorCount but not retained as objects
MAX_PARSER_ERRORS: 100,
};
/**
* Default maximum number of items in each sitemap XML file
* Set below the max to leave room for URLs added during processing
*
* @see https://www.sitemaps.org/protocol.html#index
*/
export const DEFAULT_SITEMAP_ITEM_LIMIT = 45000;

116
node_modules/sitemap/dist/esm/lib/errors.d.ts generated vendored Normal file
View File

@@ -0,0 +1,116 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
/**
* URL in SitemapItem does not exist
*/
export declare class NoURLError extends Error {
constructor(message?: string);
}
/**
* Config was not passed to SitemapItem constructor
*/
export declare class NoConfigError extends Error {
constructor(message?: string);
}
/**
* changefreq property in sitemap is invalid
*/
export declare class ChangeFreqInvalidError extends Error {
constructor(url: string, changefreq: any);
}
/**
* priority property in sitemap is invalid
*/
export declare class PriorityInvalidError extends Error {
constructor(url: string, priority: any);
}
/**
* SitemapIndex target Folder does not exists
*/
export declare class UndefinedTargetFolder extends Error {
constructor(message?: string);
}
export declare class InvalidVideoFormat extends Error {
constructor(url: string);
}
export declare class InvalidVideoDuration extends Error {
constructor(url: string, duration: any);
}
export declare class InvalidVideoDescription extends Error {
constructor(url: string, length: number);
}
export declare class InvalidVideoRating extends Error {
constructor(url: string, title: any, rating: any);
}
export declare class InvalidAttrValue extends Error {
constructor(key: string, val: any, validator: RegExp);
}
export declare class InvalidAttr extends Error {
constructor(key: string);
}
export declare class InvalidNewsFormat extends Error {
constructor(url: string);
}
export declare class InvalidNewsAccessValue extends Error {
constructor(url: string, access: any);
}
export declare class XMLLintUnavailable extends Error {
constructor(message?: string);
}
export declare class InvalidVideoTitle extends Error {
constructor(url: string, length: number);
}
export declare class InvalidVideoViewCount extends Error {
constructor(url: string, count: number);
}
export declare class InvalidVideoTagCount extends Error {
constructor(url: string, count: number);
}
export declare class InvalidVideoCategory extends Error {
constructor(url: string, count: number);
}
export declare class InvalidVideoFamilyFriendly extends Error {
constructor(url: string, fam: string);
}
export declare class InvalidVideoRestriction extends Error {
constructor(url: string, code: string);
}
export declare class InvalidVideoRestrictionRelationship extends Error {
constructor(url: string, val?: string);
}
export declare class InvalidVideoPriceType extends Error {
constructor(url: string, priceType?: string, price?: string);
}
export declare class InvalidVideoResolution extends Error {
constructor(url: string, resolution: string);
}
export declare class InvalidVideoPriceCurrency extends Error {
constructor(url: string, currency: string);
}
export declare class EmptyStream extends Error {
constructor();
}
export declare class EmptySitemap extends Error {
constructor();
}
export declare class InvalidPathError extends Error {
constructor(path: string, reason: string);
}
export declare class InvalidHostnameError extends Error {
constructor(hostname: string, reason: string);
}
export declare class InvalidLimitError extends Error {
constructor(limit: any);
}
export declare class InvalidPublicBasePathError extends Error {
constructor(publicBasePath: string, reason: string);
}
export declare class InvalidXSLUrlError extends Error {
constructor(xslUrl: string, reason: string);
}
export declare class InvalidXMLAttributeNameError extends Error {
constructor(attributeName: string);
}

256
node_modules/sitemap/dist/esm/lib/errors.js generated vendored Normal file
View File

@@ -0,0 +1,256 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
/**
* URL in SitemapItem does not exist
*/
export class NoURLError extends Error {
constructor(message) {
super(message || 'URL is required');
this.name = 'NoURLError';
Error.captureStackTrace(this, NoURLError);
}
}
/**
* Config was not passed to SitemapItem constructor
*/
export class NoConfigError extends Error {
constructor(message) {
super(message || 'SitemapItem requires a configuration');
this.name = 'NoConfigError';
Error.captureStackTrace(this, NoConfigError);
}
}
/**
* changefreq property in sitemap is invalid
*/
export class ChangeFreqInvalidError extends Error {
constructor(url, changefreq) {
super(`${url}: changefreq "${changefreq}" is invalid`);
this.name = 'ChangeFreqInvalidError';
Error.captureStackTrace(this, ChangeFreqInvalidError);
}
}
/**
* priority property in sitemap is invalid
*/
export class PriorityInvalidError extends Error {
constructor(url, priority) {
super(`${url}: priority "${priority}" must be a number between 0 and 1 inclusive`);
this.name = 'PriorityInvalidError';
Error.captureStackTrace(this, PriorityInvalidError);
}
}
/**
* SitemapIndex target Folder does not exists
*/
export class UndefinedTargetFolder extends Error {
constructor(message) {
super(message || 'Target folder must exist');
this.name = 'UndefinedTargetFolder';
Error.captureStackTrace(this, UndefinedTargetFolder);
}
}
export class InvalidVideoFormat extends Error {
constructor(url) {
super(`${url} video must include thumbnail_loc, title and description fields for videos`);
this.name = 'InvalidVideoFormat';
Error.captureStackTrace(this, InvalidVideoFormat);
}
}
export class InvalidVideoDuration extends Error {
constructor(url, duration) {
super(`${url} duration "${duration}" must be an integer of seconds between 0 and 28800`);
this.name = 'InvalidVideoDuration';
Error.captureStackTrace(this, InvalidVideoDuration);
}
}
export class InvalidVideoDescription extends Error {
constructor(url, length) {
const message = `${url}: video description is too long ${length} vs limit of 2048 characters.`;
super(message);
this.name = 'InvalidVideoDescription';
Error.captureStackTrace(this, InvalidVideoDescription);
}
}
export class InvalidVideoRating extends Error {
constructor(url, title, rating) {
super(`${url}: video "${title}" rating "${rating}" must be between 0 and 5 inclusive`);
this.name = 'InvalidVideoRating';
Error.captureStackTrace(this, InvalidVideoRating);
}
}
export class InvalidAttrValue extends Error {
constructor(key, val, validator) {
super('"' +
val +
'" tested against: ' +
validator +
' is not a valid value for attr: "' +
key +
'"');
this.name = 'InvalidAttrValue';
Error.captureStackTrace(this, InvalidAttrValue);
}
}
// InvalidAttr is only thrown when attrbuilder is called incorrectly internally
/* istanbul ignore next */
export class InvalidAttr extends Error {
constructor(key) {
super('"' + key + '" is malformed');
this.name = 'InvalidAttr';
Error.captureStackTrace(this, InvalidAttr);
}
}
export class InvalidNewsFormat extends Error {
constructor(url) {
super(`${url} News must include publication, publication name, publication language, title, and publication_date for news`);
this.name = 'InvalidNewsFormat';
Error.captureStackTrace(this, InvalidNewsFormat);
}
}
export class InvalidNewsAccessValue extends Error {
constructor(url, access) {
super(`${url} News access "${access}" must be either Registration, Subscription or not be present`);
this.name = 'InvalidNewsAccessValue';
Error.captureStackTrace(this, InvalidNewsAccessValue);
}
}
export class XMLLintUnavailable extends Error {
constructor(message) {
super(message || 'xmlLint is not installed. XMLLint is required to validate');
this.name = 'XMLLintUnavailable';
Error.captureStackTrace(this, XMLLintUnavailable);
}
}
export class InvalidVideoTitle extends Error {
constructor(url, length) {
super(`${url}: video title is too long ${length} vs 100 character limit`);
this.name = 'InvalidVideoTitle';
Error.captureStackTrace(this, InvalidVideoTitle);
}
}
export class InvalidVideoViewCount extends Error {
constructor(url, count) {
super(`${url}: video view count must be positive, view count was ${count}`);
this.name = 'InvalidVideoViewCount';
Error.captureStackTrace(this, InvalidVideoViewCount);
}
}
export class InvalidVideoTagCount extends Error {
constructor(url, count) {
super(`${url}: video can have no more than 32 tags, this has ${count}`);
this.name = 'InvalidVideoTagCount';
Error.captureStackTrace(this, InvalidVideoTagCount);
}
}
export class InvalidVideoCategory extends Error {
constructor(url, count) {
super(`${url}: video category can only be 256 characters but was passed ${count}`);
this.name = 'InvalidVideoCategory';
Error.captureStackTrace(this, InvalidVideoCategory);
}
}
export class InvalidVideoFamilyFriendly extends Error {
constructor(url, fam) {
super(`${url}: video family friendly must be yes or no, was passed "${fam}"`);
this.name = 'InvalidVideoFamilyFriendly';
Error.captureStackTrace(this, InvalidVideoFamilyFriendly);
}
}
export class InvalidVideoRestriction extends Error {
constructor(url, code) {
super(`${url}: video restriction must be one or more two letter country codes. Was passed "${code}"`);
this.name = 'InvalidVideoRestriction';
Error.captureStackTrace(this, InvalidVideoRestriction);
}
}
export class InvalidVideoRestrictionRelationship extends Error {
constructor(url, val) {
super(`${url}: video restriction relationship must be either allow or deny. Was passed "${val}"`);
this.name = 'InvalidVideoRestrictionRelationship';
Error.captureStackTrace(this, InvalidVideoRestrictionRelationship);
}
}
export class InvalidVideoPriceType extends Error {
constructor(url, priceType, price) {
super(priceType === undefined && price === ''
? `${url}: video priceType is required when price is not provided`
: `${url}: video price type "${priceType}" is not "rent" or "purchase"`);
this.name = 'InvalidVideoPriceType';
Error.captureStackTrace(this, InvalidVideoPriceType);
}
}
export class InvalidVideoResolution extends Error {
constructor(url, resolution) {
super(`${url}: video price resolution "${resolution}" is not hd or sd`);
this.name = 'InvalidVideoResolution';
Error.captureStackTrace(this, InvalidVideoResolution);
}
}
export class InvalidVideoPriceCurrency extends Error {
constructor(url, currency) {
super(`${url}: video price currency "${currency}" must be a three capital letter abbrieviation for the country currency`);
this.name = 'InvalidVideoPriceCurrency';
Error.captureStackTrace(this, InvalidVideoPriceCurrency);
}
}
export class EmptyStream extends Error {
constructor() {
super('You have ended the stream before anything was written. streamToPromise MUST be called before ending the stream.');
this.name = 'EmptyStream';
Error.captureStackTrace(this, EmptyStream);
}
}
export class EmptySitemap extends Error {
constructor() {
super('You ended the stream without writing anything.');
this.name = 'EmptySitemap';
Error.captureStackTrace(this, EmptyStream);
}
}
export class InvalidPathError extends Error {
constructor(path, reason) {
super(`Invalid path "${path}": ${reason}`);
this.name = 'InvalidPathError';
Error.captureStackTrace(this, InvalidPathError);
}
}
export class InvalidHostnameError extends Error {
constructor(hostname, reason) {
super(`Invalid hostname "${hostname}": ${reason}`);
this.name = 'InvalidHostnameError';
Error.captureStackTrace(this, InvalidHostnameError);
}
}
export class InvalidLimitError extends Error {
constructor(limit) {
super(`Invalid limit "${limit}": must be a number between 1 and 50000 (per sitemaps.org spec)`);
this.name = 'InvalidLimitError';
Error.captureStackTrace(this, InvalidLimitError);
}
}
export class InvalidPublicBasePathError extends Error {
constructor(publicBasePath, reason) {
super(`Invalid publicBasePath "${publicBasePath}": ${reason}`);
this.name = 'InvalidPublicBasePathError';
Error.captureStackTrace(this, InvalidPublicBasePathError);
}
}
export class InvalidXSLUrlError extends Error {
constructor(xslUrl, reason) {
super(`Invalid xslUrl "${xslUrl}": ${reason}`);
this.name = 'InvalidXSLUrlError';
Error.captureStackTrace(this, InvalidXSLUrlError);
}
}
export class InvalidXMLAttributeNameError extends Error {
constructor(attributeName) {
super(`Invalid XML attribute name "${attributeName}": must contain only alphanumeric characters, hyphens, underscores, and colons`);
this.name = 'InvalidXMLAttributeNameError';
Error.captureStackTrace(this, InvalidXMLAttributeNameError);
}
}

View File

@@ -0,0 +1,55 @@
import type { SAXStream } from 'sax';
import { Readable, Transform, TransformOptions, TransformCallback } from 'node:stream';
import { IndexItem, ErrorLevel } from './types.js';
type Logger = (level: 'warn' | 'error' | 'info' | 'log', ...message: Parameters<Console['log']>) => void;
export interface XMLToSitemapIndexItemStreamOptions extends TransformOptions {
level?: ErrorLevel;
logger?: Logger | false;
}
/**
* Takes a stream of xml and transforms it into a stream of IndexItems
* Use this to parse existing sitemap indices into config options compatible with this library
*/
export declare class XMLToSitemapIndexStream extends Transform {
level: ErrorLevel;
logger: Logger;
error: Error | null;
saxStream: SAXStream;
constructor(opts?: XMLToSitemapIndexItemStreamOptions);
_transform(data: string, encoding: string, callback: TransformCallback): void;
private err;
}
/**
Read xml and resolve with the configuration that would produce it or reject with
an error
```
const { createReadStream } = require('fs')
const { parseSitemapIndex, createSitemap } = require('sitemap')
parseSitemapIndex(createReadStream('./example-index.xml')).then(
// produces the same xml
// you can, of course, more practically modify it or store it
(xmlConfig) => console.log(createSitemap(xmlConfig).toString()),
(err) => console.log(err)
)
```
@param {Readable} xml what to parse
@param {number} maxEntries Maximum number of sitemap entries to parse (default: 50,000 per sitemaps.org spec)
@return {Promise<IndexItem[]>} resolves with list of index items that can be fed into a SitemapIndexStream. Rejects with an Error object.
*/
export declare function parseSitemapIndex(xml: Readable, maxEntries?: number): Promise<IndexItem[]>;
export interface IndexObjectStreamToJSONOptions extends TransformOptions {
lineSeparated: boolean;
}
/**
* A Transform that converts a stream of objects into a JSON Array or a line
* separated stringified JSON
* @param [lineSeparated=false] whether to separate entries by a new line or comma
*/
export declare class IndexObjectStreamToJSON extends Transform {
lineSeparated: boolean;
firstWritten: boolean;
constructor(opts?: IndexObjectStreamToJSONOptions);
_transform(chunk: IndexItem, encoding: string, cb: TransformCallback): void;
_flush(cb: TransformCallback): void;
}
export {};

View File

@@ -0,0 +1,262 @@
import sax from 'sax';
import { Transform, } from 'node:stream';
import { ErrorLevel, IndexTagNames } from './types.js';
import { validateURL } from './validation.js';
import { LIMITS } from './constants.js';
function isValidTagName(tagName) {
// This only works because the enum name and value are the same
return tagName in IndexTagNames;
}
function tagTemplate() {
return {
url: '',
};
}
const defaultLogger = (level, ...message) => console[level](...message);
const defaultStreamOpts = {
logger: defaultLogger,
};
/**
* Takes a stream of xml and transforms it into a stream of IndexItems
* Use this to parse existing sitemap indices into config options compatible with this library
*/
export class XMLToSitemapIndexStream extends Transform {
level;
logger;
error;
saxStream;
constructor(opts = defaultStreamOpts) {
opts.objectMode = true;
super(opts);
this.error = null;
this.saxStream = sax.createStream(true, {
xmlns: true,
// @ts-expect-error - SAX types don't include strictEntities option
strictEntities: true,
trim: true,
});
this.level = opts.level || ErrorLevel.WARN;
if (this.level !== ErrorLevel.SILENT && opts.logger !== false) {
this.logger = opts.logger ?? defaultLogger;
}
else {
this.logger = () => undefined;
}
let currentItem = tagTemplate();
let currentTag;
this.saxStream.on('opentagstart', (tag) => {
currentTag = tag.name;
});
this.saxStream.on('opentag', (tag) => {
if (!isValidTagName(tag.name)) {
this.logger('warn', 'unhandled tag', tag.name);
this.err(`unhandled tag: ${tag.name}`);
}
});
this.saxStream.on('text', (text) => {
switch (currentTag) {
case IndexTagNames.loc:
// Validate URL for security: prevents protocol injection, checks length limits
try {
validateURL(text, 'Sitemap index URL');
currentItem.url = text;
}
catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
this.logger('warn', 'Invalid URL in sitemap index:', errMsg);
this.err(`Invalid URL in sitemap index: ${errMsg}`);
}
break;
case IndexTagNames.lastmod:
// Validate date format for security and spec compliance
if (text && !LIMITS.ISO_DATE_REGEX.test(text)) {
this.logger('warn', 'Invalid lastmod date format in sitemap index:', text);
this.err(`Invalid lastmod date format: ${text}`);
}
else {
currentItem.lastmod = text;
}
break;
default:
this.logger('log', 'unhandled text for tag:', currentTag, `'${text}'`);
this.err(`unhandled text for tag: ${currentTag} '${text}'`);
break;
}
});
this.saxStream.on('cdata', (text) => {
switch (currentTag) {
case IndexTagNames.loc:
// Validate URL for security: prevents protocol injection, checks length limits
try {
validateURL(text, 'Sitemap index URL');
currentItem.url = text;
}
catch (error) {
const errMsg = error instanceof Error ? error.message : String(error);
this.logger('warn', 'Invalid URL in sitemap index:', errMsg);
this.err(`Invalid URL in sitemap index: ${errMsg}`);
}
break;
case IndexTagNames.lastmod:
// Validate date format for security and spec compliance
if (text && !LIMITS.ISO_DATE_REGEX.test(text)) {
this.logger('warn', 'Invalid lastmod date format in sitemap index:', text);
this.err(`Invalid lastmod date format: ${text}`);
}
else {
currentItem.lastmod = text;
}
break;
default:
this.logger('log', 'unhandled cdata for tag:', currentTag);
this.err(`unhandled cdata for tag: ${currentTag}`);
break;
}
});
this.saxStream.on('attribute', (attr) => {
switch (currentTag) {
case IndexTagNames.sitemapindex:
break;
default:
this.logger('log', 'unhandled attr', currentTag, attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
});
this.saxStream.on('closetag', (tag) => {
switch (tag) {
case IndexTagNames.sitemap:
// Only push items with valid URLs (non-empty after validation)
if (currentItem.url) {
this.push(currentItem);
}
currentItem = tagTemplate();
break;
default:
break;
}
});
}
_transform(data, encoding, callback) {
try {
const cb = () => callback(this.level === ErrorLevel.THROW ? this.error : null);
// correcting the type here can be done without making it a breaking change
// TODO fix this
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
if (!this.saxStream.write(data, encoding)) {
this.saxStream.once('drain', cb);
}
else {
process.nextTick(cb);
}
}
catch (error) {
callback(error);
}
}
err(msg) {
if (!this.error)
this.error = new Error(msg);
}
}
/**
Read xml and resolve with the configuration that would produce it or reject with
an error
```
const { createReadStream } = require('fs')
const { parseSitemapIndex, createSitemap } = require('sitemap')
parseSitemapIndex(createReadStream('./example-index.xml')).then(
// produces the same xml
// you can, of course, more practically modify it or store it
(xmlConfig) => console.log(createSitemap(xmlConfig).toString()),
(err) => console.log(err)
)
```
@param {Readable} xml what to parse
@param {number} maxEntries Maximum number of sitemap entries to parse (default: 50,000 per sitemaps.org spec)
@return {Promise<IndexItem[]>} resolves with list of index items that can be fed into a SitemapIndexStream. Rejects with an Error object.
*/
export async function parseSitemapIndex(xml, maxEntries = LIMITS.MAX_SITEMAP_ITEM_LIMIT) {
const urls = [];
return new Promise((resolve, reject) => {
let settled = false;
const parser = new XMLToSitemapIndexStream();
// Handle source stream errors (prevents unhandled error events on xml)
xml.on('error', (error) => {
if (!settled) {
settled = true;
reject(error);
}
});
xml
.pipe(parser)
.on('data', (smi) => {
if (settled)
return;
// Security: Prevent memory exhaustion by limiting number of entries
if (urls.length >= maxEntries) {
settled = true;
reject(new Error(`Sitemap index exceeds maximum allowed entries (${maxEntries})`));
// Immediately destroy both streams to stop further processing (BB-05)
parser.destroy();
xml.destroy();
return;
}
urls.push(smi);
})
.on('end', () => {
if (!settled) {
settled = true;
resolve(urls);
}
})
.on('error', (error) => {
if (!settled) {
settled = true;
reject(error);
}
});
});
}
const defaultObjectStreamOpts = {
lineSeparated: false,
};
/**
* A Transform that converts a stream of objects into a JSON Array or a line
* separated stringified JSON
* @param [lineSeparated=false] whether to separate entries by a new line or comma
*/
export class IndexObjectStreamToJSON extends Transform {
lineSeparated;
firstWritten;
constructor(opts = defaultObjectStreamOpts) {
opts.writableObjectMode = true;
super(opts);
this.lineSeparated = opts.lineSeparated;
this.firstWritten = false;
}
_transform(chunk, encoding, cb) {
if (!this.firstWritten) {
this.firstWritten = true;
if (!this.lineSeparated) {
this.push('[');
}
}
else if (this.lineSeparated) {
this.push('\n');
}
else {
this.push(',');
}
if (chunk) {
this.push(JSON.stringify(chunk));
}
cb();
}
_flush(cb) {
if (!this.lineSeparated) {
this.push(']');
}
cb();
}
}

View File

@@ -0,0 +1,169 @@
import { WriteStream } from 'node:fs';
import { Transform, TransformOptions, TransformCallback } from 'node:stream';
import { IndexItem, SitemapItemLoose, ErrorLevel, IndexTagNames } from './types.js';
import { SitemapStream } from './sitemap-stream.js';
export { IndexTagNames };
/**
* Options for the SitemapIndexStream
*/
export interface SitemapIndexStreamOptions extends TransformOptions {
/**
* Whether to output the lastmod date only (no time)
*
* @default false
*/
lastmodDateOnly?: boolean;
/**
* How to handle errors in passed in urls
*
* @default ErrorLevel.WARN
*/
level?: ErrorLevel;
/**
* URL to an XSL stylesheet to include in the XML
*/
xslUrl?: string;
}
/**
* `SitemapIndexStream` is a Transform stream that takes `IndexItem`s or sitemap URL strings and outputs a stream of sitemap index XML.
*
* It automatically handles the XML declaration and the opening and closing tags for the sitemap index.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @extends {Transform}
*/
export declare class SitemapIndexStream extends Transform {
lastmodDateOnly: boolean;
level: ErrorLevel;
xslUrl?: string;
private hasHeadOutput;
/**
* `SitemapIndexStream` is a Transform stream that takes `IndexItem`s or sitemap URL strings and outputs a stream of sitemap index XML.
*
* It automatically handles the XML declaration and the opening and closing tags for the sitemap index.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @param {SitemapIndexStreamOptions} [opts=defaultStreamOpts] - Stream options.
*/
constructor(opts?: SitemapIndexStreamOptions);
private writeHeadOutput;
_transform(item: IndexItem | string, encoding: string, callback: TransformCallback): void;
_flush(cb: TransformCallback): void;
}
/**
* Callback function type for creating new sitemap streams when the item limit is reached.
*
* This function is called by SitemapAndIndexStream to create a new sitemap file when
* the current one reaches the item limit.
*
* @param i - The zero-based index of the sitemap file being created (0 for first sitemap,
* 1 for second, etc.)
* @returns A tuple containing:
* - [0]: IndexItem or URL string to add to the sitemap index
* - [1]: SitemapStream instance for writing sitemap items
* - [2]: WriteStream where the sitemap will be piped (the stream will be
* awaited for 'finish' before creating the next sitemap)
*
* @example
* ```typescript
* const getSitemapStream = (i: number) => {
* const sitemapStream = new SitemapStream();
* const path = `./sitemap-${i}.xml`;
* const writeStream = createWriteStream(path);
* sitemapStream.pipe(writeStream);
* return [`https://example.com/${path}`, sitemapStream, writeStream];
* };
* ```
*/
type getSitemapStreamFunc = (i: number) => [IndexItem | string, SitemapStream, WriteStream];
/**
* Options for the SitemapAndIndexStream
*
* @extends {SitemapIndexStreamOptions}
*/
export interface SitemapAndIndexStreamOptions extends SitemapIndexStreamOptions {
/**
* Max number of items in each sitemap XML file.
*
* When the limit is reached the current sitemap file will be closed,
* a wait for `finish` on the target write stream will happen,
* and a new sitemap file will be created.
*
* Range: 1 - 50,000
*
* @default 45000
*/
limit?: number;
/**
* Callback for SitemapIndexAndStream that creates a new sitemap stream for a given sitemap index.
*
* Called when a new sitemap file is needed.
*
* The write stream is the destination where the sitemap was piped.
* SitemapAndIndexStream will wait for the `finish` event on each sitemap's
* write stream before moving on to the next sitemap. This ensures that the
* contents of the write stream will be fully written before being used
* by any following operations (e.g. uploading, reading contents for unit tests).
*
* @param i - The index of the sitemap file
* @returns A tuple containing the index item to be written into the sitemap index, the sitemap stream, and the write stream for the sitemap pipe destination
*/
getSitemapStream: getSitemapStreamFunc;
}
/**
* `SitemapAndIndexStream` is a Transform stream that takes in sitemap items,
* writes them to sitemap files, adds the sitemap files to a sitemap index,
* and creates new sitemap files when the count limit is reached.
*
* It waits for the target stream of the current sitemap file to finish before
* moving on to the next if the target stream is returned by the `getSitemapStream`
* callback in the 3rd position of the tuple.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @extends {SitemapIndexStream}
*/
export declare class SitemapAndIndexStream extends SitemapIndexStream {
private itemsWritten;
private getSitemapStream;
private currentSitemap?;
private limit;
private currentSitemapPipeline?;
/**
* Flag to prevent race conditions when creating new sitemap files.
* Set to true while waiting for the current sitemap to finish and
* a new one to be created.
*/
private isCreatingSitemap;
/**
* `SitemapAndIndexStream` is a Transform stream that takes in sitemap items,
* writes them to sitemap files, adds the sitemap files to a sitemap index,
* and creates new sitemap files when the count limit is reached.
*
* It waits for the target stream of the current sitemap file to finish before
* moving on to the next if the target stream is returned by the `getSitemapStream`
* callback in the 3rd position of the tuple.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @param {SitemapAndIndexStreamOptions} opts - Stream options.
*/
constructor(opts: SitemapAndIndexStreamOptions);
_transform(item: SitemapItemLoose, encoding: string, callback: TransformCallback): void;
private writeItem;
/**
* Called when the stream is finished.
* If there is a current sitemap, we wait for it to finish before calling the callback.
* Includes proper event listener cleanup to prevent memory leaks.
*
* @param cb - The callback to invoke when flushing is complete
*/
_flush(cb: TransformCallback): void;
private createSitemap;
}

View File

@@ -0,0 +1,359 @@
import { Transform } from 'node:stream';
import { ErrorLevel, IndexTagNames, } from './types.js';
import { stylesheetInclude } from './sitemap-stream.js';
import { element, otag, ctag } from './sitemap-xml.js';
import { LIMITS, DEFAULT_SITEMAP_ITEM_LIMIT } from './constants.js';
import { validateURL, validateXSLUrl } from './validation.js';
// Re-export IndexTagNames for backward compatibility
export { IndexTagNames };
const xmlDec = '<?xml version="1.0" encoding="UTF-8"?>';
const sitemapIndexTagStart = '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">';
const closetag = '</sitemapindex>';
const defaultStreamOpts = {};
/**
* `SitemapIndexStream` is a Transform stream that takes `IndexItem`s or sitemap URL strings and outputs a stream of sitemap index XML.
*
* It automatically handles the XML declaration and the opening and closing tags for the sitemap index.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @extends {Transform}
*/
export class SitemapIndexStream extends Transform {
lastmodDateOnly;
level;
xslUrl;
hasHeadOutput;
/**
* `SitemapIndexStream` is a Transform stream that takes `IndexItem`s or sitemap URL strings and outputs a stream of sitemap index XML.
*
* It automatically handles the XML declaration and the opening and closing tags for the sitemap index.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @param {SitemapIndexStreamOptions} [opts=defaultStreamOpts] - Stream options.
*/
constructor(opts = defaultStreamOpts) {
opts.objectMode = true;
super(opts);
this.hasHeadOutput = false;
this.lastmodDateOnly = opts.lastmodDateOnly || false;
this.level = opts.level ?? ErrorLevel.WARN;
if (opts.xslUrl !== undefined) {
validateXSLUrl(opts.xslUrl);
}
this.xslUrl = opts.xslUrl;
}
writeHeadOutput() {
this.hasHeadOutput = true;
let stylesheet = '';
if (this.xslUrl) {
stylesheet = stylesheetInclude(this.xslUrl);
}
this.push(xmlDec + stylesheet + sitemapIndexTagStart);
}
_transform(item, encoding, callback) {
if (!this.hasHeadOutput) {
this.writeHeadOutput();
}
try {
// Validate URL using centralized validation (checks protocol, length, format)
const url = typeof item === 'string' ? item : item.url;
if (!url || typeof url !== 'string') {
const error = new Error('Invalid sitemap index item: URL must be a non-empty string');
if (this.level === ErrorLevel.THROW) {
callback(error);
return;
}
else if (this.level === ErrorLevel.WARN) {
console.warn(error.message, item);
}
// For SILENT or after WARN, skip this item
callback();
return;
}
// Security: Use centralized validation to enforce protocol restrictions,
// length limits, and prevent injection attacks
try {
validateURL(url, 'Sitemap index URL');
}
catch (error) {
// Wrap the validation error with consistent message format
const validationMsg = error instanceof Error ? error.message : String(error);
const err = new Error(`Invalid URL in sitemap index: ${validationMsg}`);
if (this.level === ErrorLevel.THROW) {
callback(err);
return;
}
else if (this.level === ErrorLevel.WARN) {
console.warn(err.message);
}
// For SILENT or after WARN, skip this item
callback();
return;
}
this.push(otag(IndexTagNames.sitemap));
if (typeof item === 'string') {
this.push(element(IndexTagNames.loc, item));
}
else {
this.push(element(IndexTagNames.loc, item.url));
if (item.lastmod) {
try {
const lastmod = new Date(item.lastmod).toISOString();
this.push(element(IndexTagNames.lastmod, this.lastmodDateOnly ? lastmod.slice(0, 10) : lastmod));
}
catch {
const error = new Error(`Invalid lastmod date in sitemap index: ${item.lastmod}`);
if (this.level === ErrorLevel.THROW) {
callback(error);
return;
}
else if (this.level === ErrorLevel.WARN) {
console.warn(error.message);
}
// Continue without lastmod for SILENT or after WARN
}
}
}
this.push(ctag(IndexTagNames.sitemap));
callback();
}
catch (error) {
callback(error instanceof Error ? error : new Error(String(error)));
}
}
_flush(cb) {
if (!this.hasHeadOutput) {
this.writeHeadOutput();
}
this.push(closetag);
cb();
}
}
/**
* `SitemapAndIndexStream` is a Transform stream that takes in sitemap items,
* writes them to sitemap files, adds the sitemap files to a sitemap index,
* and creates new sitemap files when the count limit is reached.
*
* It waits for the target stream of the current sitemap file to finish before
* moving on to the next if the target stream is returned by the `getSitemapStream`
* callback in the 3rd position of the tuple.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @extends {SitemapIndexStream}
*/
export class SitemapAndIndexStream extends SitemapIndexStream {
itemsWritten;
getSitemapStream;
currentSitemap;
limit;
currentSitemapPipeline;
/**
* Flag to prevent race conditions when creating new sitemap files.
* Set to true while waiting for the current sitemap to finish and
* a new one to be created.
*/
isCreatingSitemap;
/**
* `SitemapAndIndexStream` is a Transform stream that takes in sitemap items,
* writes them to sitemap files, adds the sitemap files to a sitemap index,
* and creates new sitemap files when the count limit is reached.
*
* It waits for the target stream of the current sitemap file to finish before
* moving on to the next if the target stream is returned by the `getSitemapStream`
* callback in the 3rd position of the tuple.
*
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
* before `finish` will be emitted. Failure to read the stream will result in hangs.
*
* @param {SitemapAndIndexStreamOptions} opts - Stream options.
*/
constructor(opts) {
opts.objectMode = true;
super(opts);
this.itemsWritten = 0;
this.getSitemapStream = opts.getSitemapStream;
this.limit = opts.limit ?? DEFAULT_SITEMAP_ITEM_LIMIT;
this.isCreatingSitemap = false;
// Validate limit is within acceptable range per sitemaps.org spec
// See: https://www.sitemaps.org/protocol.html#index
if (this.limit < LIMITS.MIN_SITEMAP_ITEM_LIMIT ||
this.limit > LIMITS.MAX_SITEMAP_ITEM_LIMIT) {
throw new Error(`limit must be between ${LIMITS.MIN_SITEMAP_ITEM_LIMIT} and ${LIMITS.MAX_SITEMAP_ITEM_LIMIT} per sitemaps.org spec, got ${this.limit}`);
}
}
_transform(item, encoding, callback) {
if (this.itemsWritten % this.limit === 0) {
// Prevent race condition if multiple items arrive during sitemap creation
if (this.isCreatingSitemap) {
// Wait and retry on next tick
process.nextTick(() => this._transform(item, encoding, callback));
return;
}
if (this.currentSitemap) {
this.isCreatingSitemap = true;
const currentSitemap = this.currentSitemap;
const currentPipeline = this.currentSitemapPipeline;
// Set up promises with proper cleanup to prevent memory leaks
const onFinish = new Promise((resolve, reject) => {
const finishHandler = () => {
currentSitemap.off('error', errorHandler);
resolve();
};
const errorHandler = (err) => {
currentSitemap.off('finish', finishHandler);
reject(err);
};
currentSitemap.on('finish', finishHandler);
currentSitemap.on('error', errorHandler);
currentSitemap.end();
});
const onPipelineFinish = currentPipeline
? new Promise((resolve, reject) => {
const finishHandler = () => {
currentPipeline.off('error', errorHandler);
resolve();
};
const errorHandler = (err) => {
currentPipeline.off('finish', finishHandler);
reject(err);
};
currentPipeline.on('finish', finishHandler);
currentPipeline.on('error', errorHandler);
})
: Promise.resolve();
Promise.all([onFinish, onPipelineFinish])
.then(() => {
this.isCreatingSitemap = false;
this.createSitemap(encoding);
this.writeItem(item, callback);
})
.catch((err) => {
this.isCreatingSitemap = false;
callback(err);
});
return;
}
else {
this.createSitemap(encoding);
}
}
this.writeItem(item, callback);
}
writeItem(item, callback) {
if (!this.currentSitemap) {
callback(new Error('No sitemap stream available'));
return;
}
if (!this.currentSitemap.write(item)) {
this.currentSitemap.once('drain', callback);
}
else {
process.nextTick(callback);
}
// Increment the count of items written
this.itemsWritten++;
}
/**
* Called when the stream is finished.
* If there is a current sitemap, we wait for it to finish before calling the callback.
* Includes proper event listener cleanup to prevent memory leaks.
*
* @param cb - The callback to invoke when flushing is complete
*/
_flush(cb) {
const currentSitemap = this.currentSitemap;
const currentPipeline = this.currentSitemapPipeline;
const onFinish = new Promise((resolve, reject) => {
if (currentSitemap) {
const finishHandler = () => {
currentSitemap.off('error', errorHandler);
resolve();
};
const errorHandler = (err) => {
currentSitemap.off('finish', finishHandler);
reject(err);
};
currentSitemap.on('finish', finishHandler);
currentSitemap.on('error', errorHandler);
currentSitemap.end();
}
else {
resolve();
}
});
const onPipelineFinish = new Promise((resolve, reject) => {
if (currentPipeline) {
const finishHandler = () => {
currentPipeline.off('error', errorHandler);
resolve();
};
const errorHandler = (err) => {
currentPipeline.off('finish', finishHandler);
reject(err);
};
currentPipeline.on('finish', finishHandler);
currentPipeline.on('error', errorHandler);
// The pipeline (pipe target) will get its end() call
// from the sitemap stream ending.
}
else {
resolve();
}
});
Promise.all([onFinish, onPipelineFinish])
.then(() => {
super._flush(cb);
})
.catch((err) => {
cb(err);
});
}
createSitemap(encoding) {
const sitemapIndex = this.itemsWritten / this.limit;
let result;
try {
result = this.getSitemapStream(sitemapIndex);
}
catch (err) {
this.emit('error', new Error(`getSitemapStream callback threw an error for index ${sitemapIndex}: ${err instanceof Error ? err.message : String(err)}`));
return;
}
// Validate the return value
if (!Array.isArray(result) || result.length !== 3) {
this.emit('error', new Error(`getSitemapStream must return a 3-element array [IndexItem | string, SitemapStream, WriteStream], got: ${typeof result}`));
return;
}
const [idxItem, currentSitemap, currentSitemapPipeline] = result;
// Validate each element
if (!idxItem ||
(typeof idxItem !== 'string' && typeof idxItem !== 'object')) {
this.emit('error', new Error('getSitemapStream must return an IndexItem or string as the first element'));
return;
}
if (!currentSitemap || typeof currentSitemap.write !== 'function') {
this.emit('error', new Error('getSitemapStream must return a SitemapStream as the second element'));
return;
}
if (currentSitemapPipeline &&
typeof currentSitemapPipeline.write !== 'function') {
this.emit('error', new Error('getSitemapStream must return a WriteStream or undefined as the third element'));
return;
}
// Propagate errors from the sitemap stream
currentSitemap.on('error', (err) => this.emit('error', err));
this.currentSitemap = currentSitemap;
this.currentSitemapPipeline = currentSitemapPipeline;
super._transform(idxItem, encoding, () => {
// We are not too concerned about waiting for the index item to be written
// as we'll wait for the file to finish at the end, and index file write
// volume tends to be small in comparison to sitemap writes.
// noop
});
}
}

View File

@@ -0,0 +1,21 @@
import { Transform, TransformOptions, TransformCallback } from 'node:stream';
import { SitemapItem, ErrorLevel } from './types.js';
export interface SitemapItemStreamOptions extends TransformOptions {
level?: ErrorLevel;
}
/**
* Takes a stream of SitemapItemOptions and spits out xml for each
* @example
* // writes <url><loc>https://example.com</loc><url><url><loc>https://example.com/2</loc><url>
* const smis = new SitemapItemStream({level: 'warn'})
* smis.pipe(writestream)
* smis.write({url: 'https://example.com', img: [], video: [], links: []})
* smis.write({url: 'https://example.com/2', img: [], video: [], links: []})
* smis.end()
* @param level - Error level
*/
export declare class SitemapItemStream extends Transform {
level: ErrorLevel;
constructor(opts?: SitemapItemStreamOptions);
_transform(item: SitemapItem, encoding: string, callback: TransformCallback): void;
}

View File

@@ -0,0 +1,204 @@
import { Transform } from 'node:stream';
import { InvalidAttr } from './errors.js';
import { ErrorLevel, TagNames } from './types.js';
import { element, otag, ctag } from './sitemap-xml.js';
/**
* Builds an attributes object for XML elements from configuration object
* Extracts attributes based on colon-delimited keys (e.g., 'price:currency' -> { currency: value })
*
* @param conf - Configuration object containing attribute values
* @param keys - Single key or array of keys in format 'namespace:attribute'
* @returns Record of attribute names to string values (may contain non-string values from conf)
* @throws {InvalidAttr} When key format is invalid (must contain exactly one colon)
*
* @example
* attrBuilder({ 'price:currency': 'USD', 'price:type': 'rent' }, ['price:currency', 'price:type'])
* // Returns: { currency: 'USD', type: 'rent' }
*/
function attrBuilder(conf, keys) {
if (typeof keys === 'string') {
keys = [keys];
}
const iv = {};
return keys.reduce((attrs, key) => {
if (conf[key] !== undefined) {
const keyAr = key.split(':');
if (keyAr.length !== 2) {
throw new InvalidAttr(key);
}
attrs[keyAr[1]] = conf[key];
}
return attrs;
}, iv);
}
/**
* Takes a stream of SitemapItemOptions and spits out xml for each
* @example
* // writes <url><loc>https://example.com</loc><url><url><loc>https://example.com/2</loc><url>
* const smis = new SitemapItemStream({level: 'warn'})
* smis.pipe(writestream)
* smis.write({url: 'https://example.com', img: [], video: [], links: []})
* smis.write({url: 'https://example.com/2', img: [], video: [], links: []})
* smis.end()
* @param level - Error level
*/
export class SitemapItemStream extends Transform {
level;
constructor(opts = { level: ErrorLevel.WARN }) {
opts.objectMode = true;
super(opts);
this.level = opts.level || ErrorLevel.WARN;
}
_transform(item, encoding, callback) {
this.push(otag(TagNames.url));
this.push(element(TagNames.loc, item.url));
if (item.lastmod) {
this.push(element(TagNames.lastmod, item.lastmod));
}
if (item.changefreq) {
this.push(element(TagNames.changefreq, item.changefreq));
}
if (item.priority !== undefined && item.priority !== null) {
if (item.fullPrecisionPriority) {
this.push(element(TagNames.priority, item.priority.toString()));
}
else {
this.push(element(TagNames.priority, item.priority.toFixed(1)));
}
}
item.video.forEach((video) => {
this.push(otag(TagNames['video:video']));
this.push(element(TagNames['video:thumbnail_loc'], video.thumbnail_loc));
this.push(element(TagNames['video:title'], video.title));
this.push(element(TagNames['video:description'], video.description));
if (video.content_loc) {
this.push(element(TagNames['video:content_loc'], video.content_loc));
}
if (video.player_loc) {
this.push(element(TagNames['video:player_loc'], attrBuilder(video, [
'player_loc:autoplay',
'player_loc:allow_embed',
]), video.player_loc));
}
if (video.duration) {
this.push(element(TagNames['video:duration'], video.duration.toString()));
}
if (video.expiration_date) {
this.push(element(TagNames['video:expiration_date'], video.expiration_date));
}
if (video.rating !== undefined) {
this.push(element(TagNames['video:rating'], video.rating.toString()));
}
if (video.view_count !== undefined) {
this.push(element(TagNames['video:view_count'], String(video.view_count)));
}
if (video.publication_date) {
this.push(element(TagNames['video:publication_date'], video.publication_date));
}
if (video.tag && video.tag.length > 0) {
for (const tag of video.tag) {
this.push(element(TagNames['video:tag'], tag));
}
}
if (video.category) {
this.push(element(TagNames['video:category'], video.category));
}
if (video.family_friendly) {
this.push(element(TagNames['video:family_friendly'], video.family_friendly));
}
if (video.restriction) {
this.push(element(TagNames['video:restriction'], attrBuilder(video, 'restriction:relationship'), video.restriction));
}
if (video.gallery_loc) {
this.push(element(TagNames['video:gallery_loc'], attrBuilder(video, 'gallery_loc:title'), video.gallery_loc));
}
if (video.price) {
this.push(element(TagNames['video:price'], attrBuilder(video, [
'price:resolution',
'price:currency',
'price:type',
]), video.price));
}
if (video.requires_subscription) {
this.push(element(TagNames['video:requires_subscription'], video.requires_subscription));
}
if (video.uploader) {
this.push(element(TagNames['video:uploader'], attrBuilder(video, 'uploader:info'), video.uploader));
}
if (video.platform) {
this.push(element(TagNames['video:platform'], attrBuilder(video, 'platform:relationship'), video.platform));
}
if (video.live) {
this.push(element(TagNames['video:live'], video.live));
}
if (video.id) {
this.push(element(TagNames['video:id'], { type: 'url' }, video.id));
}
this.push(ctag(TagNames['video:video']));
});
item.links.forEach((link) => {
this.push(element(TagNames['xhtml:link'], {
rel: 'alternate',
hreflang: link.lang || link.hreflang,
href: link.url,
}));
});
if (item.expires) {
this.push(element(TagNames.expires, new Date(item.expires).toISOString()));
}
if (item.androidLink) {
this.push(element(TagNames['xhtml:link'], {
rel: 'alternate',
href: item.androidLink,
}));
}
if (item.ampLink) {
this.push(element(TagNames['xhtml:link'], {
rel: 'amphtml',
href: item.ampLink,
}));
}
if (item.news) {
this.push(otag(TagNames['news:news']));
this.push(otag(TagNames['news:publication']));
this.push(element(TagNames['news:name'], item.news.publication.name));
this.push(element(TagNames['news:language'], item.news.publication.language));
this.push(ctag(TagNames['news:publication']));
if (item.news.access) {
this.push(element(TagNames['news:access'], item.news.access));
}
if (item.news.genres) {
this.push(element(TagNames['news:genres'], item.news.genres));
}
this.push(element(TagNames['news:publication_date'], item.news.publication_date));
this.push(element(TagNames['news:title'], item.news.title));
if (item.news.keywords) {
this.push(element(TagNames['news:keywords'], item.news.keywords));
}
if (item.news.stock_tickers) {
this.push(element(TagNames['news:stock_tickers'], item.news.stock_tickers));
}
this.push(ctag(TagNames['news:news']));
}
// Image handling
item.img.forEach((image) => {
this.push(otag(TagNames['image:image']));
this.push(element(TagNames['image:loc'], image.url));
if (image.caption) {
this.push(element(TagNames['image:caption'], image.caption));
}
if (image.geoLocation) {
this.push(element(TagNames['image:geo_location'], image.geoLocation));
}
if (image.title) {
this.push(element(TagNames['image:title'], image.title));
}
if (image.license) {
this.push(element(TagNames['image:license'], image.license));
}
this.push(ctag(TagNames['image:image']));
});
this.push(ctag(TagNames.url));
callback();
}
}

62
node_modules/sitemap/dist/esm/lib/sitemap-parser.d.ts generated vendored Normal file
View File

@@ -0,0 +1,62 @@
import type { SAXStream } from 'sax';
import { Readable, Transform, TransformOptions, TransformCallback } from 'node:stream';
import { SitemapItem, ErrorLevel } from './types.js';
type Logger = (level: 'warn' | 'error' | 'info' | 'log', ...message: Parameters<Console['log']>[0]) => void;
export interface XMLToSitemapItemStreamOptions extends TransformOptions {
level?: ErrorLevel;
logger?: Logger | false;
}
/**
* Takes a stream of xml and transforms it into a stream of SitemapItems
* Use this to parse existing sitemaps into config options compatible with this library
*/
export declare class XMLToSitemapItemStream extends Transform {
level: ErrorLevel;
logger: Logger;
/**
* Errors encountered during parsing, capped at LIMITS.MAX_PARSER_ERRORS entries
* to prevent memory DoS from malformed XML (BB-03).
* Use errorCount for the total number of errors regardless of the cap.
*/
errors: Error[];
/** Total number of errors seen, including those beyond the stored cap. */
errorCount: number;
saxStream: SAXStream;
urlCount: number;
constructor(opts?: XMLToSitemapItemStreamOptions);
_transform(data: string, encoding: string, callback: TransformCallback): void;
private err;
}
/**
Read xml and resolve with the configuration that would produce it or reject with
an error
```
const { createReadStream } = require('fs')
const { parseSitemap, createSitemap } = require('sitemap')
parseSitemap(createReadStream('./example.xml')).then(
// produces the same xml
// you can, of course, more practically modify it or store it
(xmlConfig) => console.log(createSitemap(xmlConfig).toString()),
(err) => console.log(err)
)
```
@param {Readable} xml what to parse
@return {Promise<SitemapItem[]>} resolves with list of sitemap items that can be fed into a SitemapStream. Rejects with an Error object.
*/
export declare function parseSitemap(xml: Readable): Promise<SitemapItem[]>;
export interface ObjectStreamToJSONOptions extends TransformOptions {
lineSeparated: boolean;
}
/**
* A Transform that converts a stream of objects into a JSON Array or a line
* separated stringified JSON
* @param [lineSeparated=false] whether to separate entries by a new line or comma
*/
export declare class ObjectStreamToJSON extends Transform {
lineSeparated: boolean;
firstWritten: boolean;
constructor(opts?: ObjectStreamToJSONOptions);
_transform(chunk: SitemapItem, encoding: string, cb: TransformCallback): void;
_flush(cb: TransformCallback): void;
}
export {};

779
node_modules/sitemap/dist/esm/lib/sitemap-parser.js generated vendored Normal file
View File

@@ -0,0 +1,779 @@
import sax from 'sax';
import { Transform, } from 'node:stream';
import { ErrorLevel, TagNames, } from './types.js';
import { isValidChangeFreq, isValidYesNo, isAllowDeny, isPriceType, isResolution, } from './validation.js';
import { LIMITS } from './constants.js';
function isValidTagName(tagName) {
// This only works because the enum name and value are the same
return tagName in TagNames;
}
function getAttrValue(attr) {
if (!attr)
return undefined;
return typeof attr === 'string' ? attr : attr.value;
}
function tagTemplate() {
return {
img: [],
video: [],
links: [],
url: '',
};
}
function videoTemplate() {
return {
tag: [],
thumbnail_loc: '',
title: '',
description: '',
};
}
const imageTemplate = {
url: '',
};
const linkTemplate = {
lang: '',
url: '',
};
function newsTemplate() {
return {
publication: { name: '', language: '' },
publication_date: '',
title: '',
};
}
const defaultLogger = (level, ...message) => console[level](...message);
const defaultStreamOpts = {
logger: defaultLogger,
};
// TODO does this need to end with `options`
/**
* Takes a stream of xml and transforms it into a stream of SitemapItems
* Use this to parse existing sitemaps into config options compatible with this library
*/
export class XMLToSitemapItemStream extends Transform {
level;
logger;
/**
* Errors encountered during parsing, capped at LIMITS.MAX_PARSER_ERRORS entries
* to prevent memory DoS from malformed XML (BB-03).
* Use errorCount for the total number of errors regardless of the cap.
*/
errors;
/** Total number of errors seen, including those beyond the stored cap. */
errorCount;
saxStream;
urlCount;
constructor(opts = defaultStreamOpts) {
opts.objectMode = true;
super(opts);
this.errors = [];
this.errorCount = 0;
this.urlCount = 0;
this.saxStream = sax.createStream(true, {
xmlns: true,
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
strictEntities: true,
trim: true,
});
this.level = opts.level || ErrorLevel.WARN;
if (this.level !== ErrorLevel.SILENT && opts.logger !== false) {
this.logger = opts.logger ?? defaultLogger;
}
else {
this.logger = () => undefined;
}
let currentItem = tagTemplate();
let currentTag;
let currentVideo = videoTemplate();
let currentImage = { ...imageTemplate };
let currentLink = { ...linkTemplate };
let dontpushCurrentLink = false;
this.saxStream.on('opentagstart', (tag) => {
currentTag = tag.name;
if (currentTag.startsWith('news:') && !currentItem.news) {
currentItem.news = newsTemplate();
}
});
this.saxStream.on('opentag', (tag) => {
if (isValidTagName(tag.name)) {
if (tag.name === 'xhtml:link') {
// SAX returns attributes as objects with {name, value, prefix, local, uri}
// Check if required attributes exist and have values
const rel = getAttrValue(tag.attributes.rel);
const href = getAttrValue(tag.attributes.href);
const hreflang = getAttrValue(tag.attributes.hreflang);
if (!rel || !href) {
this.logger('warn', 'xhtml:link missing required rel or href attribute');
this.err('xhtml:link missing required rel or href attribute');
return;
}
if (rel === 'alternate' && hreflang) {
currentLink.url = href;
currentLink.lang = hreflang;
}
else if (rel === 'alternate') {
dontpushCurrentLink = true;
currentItem.androidLink = href;
}
else if (rel === 'amphtml') {
dontpushCurrentLink = true;
currentItem.ampLink = href;
}
else {
this.logger('log', 'unhandled attr for xhtml:link', tag.attributes);
this.err(`unhandled attr for xhtml:link ${JSON.stringify(tag.attributes)}`);
}
}
}
else {
this.logger('warn', 'unhandled tag', tag.name);
this.err(`unhandled tag: ${tag.name}`);
}
});
this.saxStream.on('text', (text) => {
switch (currentTag) {
case 'mobile:mobile':
break;
case TagNames.loc:
// Validate URL
if (text.length > LIMITS.MAX_URL_LENGTH) {
this.logger('warn', `URL exceeds max length of ${LIMITS.MAX_URL_LENGTH}: ${text.substring(0, 100)}...`);
this.err(`URL exceeds max length of ${LIMITS.MAX_URL_LENGTH}`);
}
else if (!LIMITS.URL_PROTOCOL_REGEX.test(text)) {
this.logger('warn', `URL must start with http:// or https://: ${text}`);
this.err(`URL must start with http:// or https://: ${text}`);
}
else {
currentItem.url = text;
}
break;
case TagNames.changefreq:
if (isValidChangeFreq(text)) {
currentItem.changefreq = text;
}
break;
case TagNames.priority:
{
const priority = parseFloat(text);
if (isNaN(priority) ||
!isFinite(priority) ||
priority < 0 ||
priority > 1) {
this.logger('warn', `Invalid priority "${text}" - must be between 0 and 1`);
this.err(`Invalid priority "${text}" - must be between 0 and 1`);
}
else {
currentItem.priority = priority;
}
}
break;
case TagNames.lastmod:
if (LIMITS.ISO_DATE_REGEX.test(text)) {
currentItem.lastmod = text;
}
else {
this.logger('warn', `Invalid lastmod date format "${text}" - expected ISO 8601 format`);
this.err(`Invalid lastmod date format "${text}" - expected ISO 8601 format`);
}
break;
case TagNames['video:thumbnail_loc']:
currentVideo.thumbnail_loc = text;
break;
case TagNames['video:tag']:
if (currentVideo.tag.length < LIMITS.MAX_TAGS_PER_VIDEO) {
currentVideo.tag.push(text);
}
else {
this.logger('warn', `video has too many tags (max ${LIMITS.MAX_TAGS_PER_VIDEO})`);
this.err(`video has too many tags (max ${LIMITS.MAX_TAGS_PER_VIDEO})`);
}
break;
case TagNames['video:duration']:
{
const duration = parseInt(text, 10);
if (isNaN(duration) ||
!isFinite(duration) ||
duration < 0 ||
duration > 28800) {
this.logger('warn', `Invalid video duration "${text}" - must be between 0 and 28800 seconds`);
this.err(`Invalid video duration "${text}" - must be between 0 and 28800 seconds`);
}
else {
currentVideo.duration = duration;
}
}
break;
case TagNames['video:player_loc']:
currentVideo.player_loc = text;
break;
case TagNames['video:content_loc']:
currentVideo.content_loc = text;
break;
case TagNames['video:requires_subscription']:
if (isValidYesNo(text)) {
currentVideo.requires_subscription = text;
}
break;
case TagNames['video:publication_date']:
if (LIMITS.ISO_DATE_REGEX.test(text)) {
currentVideo.publication_date = text;
}
else {
this.logger('warn', `Invalid video publication_date format "${text}" - expected ISO 8601 format`);
this.err(`Invalid video publication_date format "${text}" - expected ISO 8601 format`);
}
break;
case TagNames['video:id']:
currentVideo.id = text;
break;
case TagNames['video:restriction']:
currentVideo.restriction = text;
break;
case TagNames['video:view_count']:
{
const viewCount = parseInt(text, 10);
if (isNaN(viewCount) || !isFinite(viewCount) || viewCount < 0) {
this.logger('warn', `Invalid video view_count "${text}" - must be a positive integer`);
this.err(`Invalid video view_count "${text}" - must be a positive integer`);
}
else {
currentVideo.view_count = viewCount;
}
}
break;
case TagNames['video:uploader']:
currentVideo.uploader = text;
break;
case TagNames['video:family_friendly']:
if (isValidYesNo(text)) {
currentVideo.family_friendly = text;
}
break;
case TagNames['video:expiration_date']:
if (LIMITS.ISO_DATE_REGEX.test(text)) {
currentVideo.expiration_date = text;
}
else {
this.logger('warn', `Invalid video expiration_date format "${text}" - expected ISO 8601 format`);
this.err(`Invalid video expiration_date format "${text}" - expected ISO 8601 format`);
}
break;
case TagNames['video:platform']:
currentVideo.platform = text;
break;
case TagNames['video:price']:
currentVideo.price = text;
break;
case TagNames['video:rating']:
{
const rating = parseFloat(text);
if (isNaN(rating) ||
!isFinite(rating) ||
rating < 0 ||
rating > 5) {
this.logger('warn', `Invalid video rating "${text}" - must be between 0 and 5`);
this.err(`Invalid video rating "${text}" - must be between 0 and 5`);
}
else {
currentVideo.rating = rating;
}
}
break;
case TagNames['video:category']:
currentVideo.category = text;
break;
case TagNames['video:live']:
if (isValidYesNo(text)) {
currentVideo.live = text;
}
break;
case TagNames['video:gallery_loc']:
currentVideo.gallery_loc = text;
break;
case TagNames['image:loc']:
currentImage.url = text;
break;
case TagNames['image:geo_location']:
currentImage.geoLocation = text;
break;
case TagNames['image:license']:
currentImage.license = text;
break;
case TagNames['news:access']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
if (text === 'Registration' || text === 'Subscription') {
currentItem.news.access = text;
}
else {
this.logger('warn', `Invalid news:access value "${text}" - must be "Registration" or "Subscription"`);
this.err(`Invalid news:access value "${text}" - must be "Registration" or "Subscription"`);
}
break;
case TagNames['news:genres']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
currentItem.news.genres = text;
break;
case TagNames['news:publication_date']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
if (LIMITS.ISO_DATE_REGEX.test(text)) {
currentItem.news.publication_date = text;
}
else {
this.logger('warn', `Invalid news publication_date format "${text}" - expected ISO 8601 format`);
this.err(`Invalid news publication_date format "${text}" - expected ISO 8601 format`);
}
break;
case TagNames['news:keywords']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
currentItem.news.keywords = text;
break;
case TagNames['news:stock_tickers']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
currentItem.news.stock_tickers = text;
break;
case TagNames['news:language']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
currentItem.news.publication.language = text;
break;
case TagNames['video:title']:
if (currentVideo.title.length + text.length <=
LIMITS.MAX_VIDEO_TITLE_LENGTH) {
currentVideo.title += text;
}
else {
this.logger('warn', `video title exceeds max length of ${LIMITS.MAX_VIDEO_TITLE_LENGTH}`);
this.err(`video title exceeds max length of ${LIMITS.MAX_VIDEO_TITLE_LENGTH}`);
}
break;
case TagNames['video:description']:
if (currentVideo.description.length + text.length <=
LIMITS.MAX_VIDEO_DESCRIPTION_LENGTH) {
currentVideo.description += text;
}
else {
this.logger('warn', `video description exceeds max length of ${LIMITS.MAX_VIDEO_DESCRIPTION_LENGTH}`);
this.err(`video description exceeds max length of ${LIMITS.MAX_VIDEO_DESCRIPTION_LENGTH}`);
}
break;
case TagNames['news:name']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
if (currentItem.news.publication.name.length + text.length <=
LIMITS.MAX_NEWS_NAME_LENGTH) {
currentItem.news.publication.name += text;
}
else {
this.logger('warn', `news name exceeds max length of ${LIMITS.MAX_NEWS_NAME_LENGTH}`);
this.err(`news name exceeds max length of ${LIMITS.MAX_NEWS_NAME_LENGTH}`);
}
break;
case TagNames['news:title']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
if (currentItem.news.title.length + text.length <=
LIMITS.MAX_NEWS_TITLE_LENGTH) {
currentItem.news.title += text;
}
else {
this.logger('warn', `news title exceeds max length of ${LIMITS.MAX_NEWS_TITLE_LENGTH}`);
this.err(`news title exceeds max length of ${LIMITS.MAX_NEWS_TITLE_LENGTH}`);
}
break;
case TagNames['image:caption']:
if (!currentImage.caption) {
currentImage.caption =
text.length <= LIMITS.MAX_IMAGE_CAPTION_LENGTH
? text
: text.substring(0, LIMITS.MAX_IMAGE_CAPTION_LENGTH);
if (text.length > LIMITS.MAX_IMAGE_CAPTION_LENGTH) {
this.logger('warn', `image caption exceeds max length of ${LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
this.err(`image caption exceeds max length of ${LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
}
}
else if (currentImage.caption.length + text.length <=
LIMITS.MAX_IMAGE_CAPTION_LENGTH) {
currentImage.caption += text;
}
else {
this.logger('warn', `image caption exceeds max length of ${LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
this.err(`image caption exceeds max length of ${LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
}
break;
case TagNames['image:title']:
if (!currentImage.title) {
currentImage.title =
text.length <= LIMITS.MAX_IMAGE_TITLE_LENGTH
? text
: text.substring(0, LIMITS.MAX_IMAGE_TITLE_LENGTH);
if (text.length > LIMITS.MAX_IMAGE_TITLE_LENGTH) {
this.logger('warn', `image title exceeds max length of ${LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
this.err(`image title exceeds max length of ${LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
}
}
else if (currentImage.title.length + text.length <=
LIMITS.MAX_IMAGE_TITLE_LENGTH) {
currentImage.title += text;
}
else {
this.logger('warn', `image title exceeds max length of ${LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
this.err(`image title exceeds max length of ${LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
}
break;
default:
this.logger('log', 'unhandled text for tag:', currentTag, `'${text}'`);
this.err(`unhandled text for tag: ${currentTag} '${text}'`);
break;
}
});
this.saxStream.on('cdata', (text) => {
switch (currentTag) {
case TagNames.loc:
// Validate URL
if (text.length > LIMITS.MAX_URL_LENGTH) {
this.logger('warn', `URL exceeds max length of ${LIMITS.MAX_URL_LENGTH}: ${text.substring(0, 100)}...`);
this.err(`URL exceeds max length of ${LIMITS.MAX_URL_LENGTH}`);
}
else if (!LIMITS.URL_PROTOCOL_REGEX.test(text)) {
this.logger('warn', `URL must start with http:// or https://: ${text}`);
this.err(`URL must start with http:// or https://: ${text}`);
}
else {
currentItem.url = text;
}
break;
case TagNames['image:loc']:
currentImage.url = text;
break;
case TagNames['video:title']:
if (currentVideo.title.length + text.length <=
LIMITS.MAX_VIDEO_TITLE_LENGTH) {
currentVideo.title += text;
}
else {
this.logger('warn', `video title exceeds max length of ${LIMITS.MAX_VIDEO_TITLE_LENGTH}`);
this.err(`video title exceeds max length of ${LIMITS.MAX_VIDEO_TITLE_LENGTH}`);
}
break;
case TagNames['video:description']:
if (currentVideo.description.length + text.length <=
LIMITS.MAX_VIDEO_DESCRIPTION_LENGTH) {
currentVideo.description += text;
}
else {
this.logger('warn', `video description exceeds max length of ${LIMITS.MAX_VIDEO_DESCRIPTION_LENGTH}`);
this.err(`video description exceeds max length of ${LIMITS.MAX_VIDEO_DESCRIPTION_LENGTH}`);
}
break;
case TagNames['news:name']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
if (currentItem.news.publication.name.length + text.length <=
LIMITS.MAX_NEWS_NAME_LENGTH) {
currentItem.news.publication.name += text;
}
else {
this.logger('warn', `news name exceeds max length of ${LIMITS.MAX_NEWS_NAME_LENGTH}`);
this.err(`news name exceeds max length of ${LIMITS.MAX_NEWS_NAME_LENGTH}`);
}
break;
case TagNames['news:title']:
if (!currentItem.news) {
currentItem.news = newsTemplate();
}
if (currentItem.news.title.length + text.length <=
LIMITS.MAX_NEWS_TITLE_LENGTH) {
currentItem.news.title += text;
}
else {
this.logger('warn', `news title exceeds max length of ${LIMITS.MAX_NEWS_TITLE_LENGTH}`);
this.err(`news title exceeds max length of ${LIMITS.MAX_NEWS_TITLE_LENGTH}`);
}
break;
case TagNames['image:caption']:
if (!currentImage.caption) {
currentImage.caption =
text.length <= LIMITS.MAX_IMAGE_CAPTION_LENGTH
? text
: text.substring(0, LIMITS.MAX_IMAGE_CAPTION_LENGTH);
if (text.length > LIMITS.MAX_IMAGE_CAPTION_LENGTH) {
this.logger('warn', `image caption exceeds max length of ${LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
this.err(`image caption exceeds max length of ${LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
}
}
else if (currentImage.caption.length + text.length <=
LIMITS.MAX_IMAGE_CAPTION_LENGTH) {
currentImage.caption += text;
}
else {
this.logger('warn', `image caption exceeds max length of ${LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
this.err(`image caption exceeds max length of ${LIMITS.MAX_IMAGE_CAPTION_LENGTH}`);
}
break;
case TagNames['image:title']:
if (!currentImage.title) {
currentImage.title =
text.length <= LIMITS.MAX_IMAGE_TITLE_LENGTH
? text
: text.substring(0, LIMITS.MAX_IMAGE_TITLE_LENGTH);
if (text.length > LIMITS.MAX_IMAGE_TITLE_LENGTH) {
this.logger('warn', `image title exceeds max length of ${LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
this.err(`image title exceeds max length of ${LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
}
}
else if (currentImage.title.length + text.length <=
LIMITS.MAX_IMAGE_TITLE_LENGTH) {
currentImage.title += text;
}
else {
this.logger('warn', `image title exceeds max length of ${LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
this.err(`image title exceeds max length of ${LIMITS.MAX_IMAGE_TITLE_LENGTH}`);
}
break;
default:
this.logger('log', 'unhandled cdata for tag:', currentTag);
this.err(`unhandled cdata for tag: ${currentTag}`);
break;
}
});
this.saxStream.on('attribute', (attr) => {
switch (currentTag) {
case TagNames['urlset']:
case TagNames['xhtml:link']:
case TagNames['video:id']:
break;
case TagNames['video:restriction']:
if (attr.name === 'relationship' && isAllowDeny(attr.value)) {
currentVideo['restriction:relationship'] = attr.value;
}
else {
this.logger('log', 'unhandled attr', currentTag, attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
break;
case TagNames['video:price']:
if (attr.name === 'type' && isPriceType(attr.value)) {
currentVideo['price:type'] = attr.value;
}
else if (attr.name === 'currency') {
currentVideo['price:currency'] = attr.value;
}
else if (attr.name === 'resolution' && isResolution(attr.value)) {
currentVideo['price:resolution'] = attr.value;
}
else {
this.logger('log', 'unhandled attr for video:price', attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
break;
case TagNames['video:player_loc']:
if (attr.name === 'autoplay') {
currentVideo['player_loc:autoplay'] = attr.value;
}
else if (attr.name === 'allow_embed' && isValidYesNo(attr.value)) {
currentVideo['player_loc:allow_embed'] = attr.value;
}
else {
this.logger('log', 'unhandled attr for video:player_loc', attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
break;
case TagNames['video:platform']:
if (attr.name === 'relationship' && isAllowDeny(attr.value)) {
currentVideo['platform:relationship'] = attr.value;
}
else {
this.logger('log', 'unhandled attr for video:platform', attr.name, attr.value);
this.err(`unhandled attr: ${currentTag} ${attr.name} ${attr.value}`);
}
break;
case TagNames['video:gallery_loc']:
if (attr.name === 'title') {
currentVideo['gallery_loc:title'] = attr.value;
}
else {
this.logger('log', 'unhandled attr for video:galler_loc', attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
break;
case TagNames['video:uploader']:
if (attr.name === 'info') {
currentVideo['uploader:info'] = attr.value;
}
else {
this.logger('log', 'unhandled attr for video:uploader', attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
break;
default:
this.logger('log', 'unhandled attr', currentTag, attr.name);
this.err(`unhandled attr: ${currentTag} ${attr.name}`);
}
});
this.saxStream.on('closetag', (tag) => {
switch (tag) {
case TagNames.url:
this.urlCount++;
if (this.urlCount > LIMITS.MAX_URL_ENTRIES) {
this.logger('error', `Sitemap exceeds maximum of ${LIMITS.MAX_URL_ENTRIES} URLs`);
this.err(`Sitemap exceeds maximum of ${LIMITS.MAX_URL_ENTRIES} URLs`);
currentItem = tagTemplate();
break;
}
this.push(currentItem);
currentItem = tagTemplate();
break;
case TagNames['video:video']:
if (currentItem.video.length < LIMITS.MAX_VIDEOS_PER_URL) {
currentItem.video.push(currentVideo);
}
else {
this.logger('warn', `URL has too many videos (max ${LIMITS.MAX_VIDEOS_PER_URL})`);
this.err(`URL has too many videos (max ${LIMITS.MAX_VIDEOS_PER_URL})`);
}
currentVideo = videoTemplate();
break;
case TagNames['image:image']:
if (currentItem.img.length < LIMITS.MAX_IMAGES_PER_URL) {
currentItem.img.push(currentImage);
}
else {
this.logger('warn', `URL has too many images (max ${LIMITS.MAX_IMAGES_PER_URL})`);
this.err(`URL has too many images (max ${LIMITS.MAX_IMAGES_PER_URL})`);
}
currentImage = { ...imageTemplate };
break;
case TagNames['xhtml:link']:
if (!dontpushCurrentLink) {
if (currentItem.links.length < LIMITS.MAX_LINKS_PER_URL) {
currentItem.links.push(currentLink);
}
else {
this.logger('warn', `URL has too many links (max ${LIMITS.MAX_LINKS_PER_URL})`);
this.err(`URL has too many links (max ${LIMITS.MAX_LINKS_PER_URL})`);
}
}
currentLink = { ...linkTemplate };
dontpushCurrentLink = false; // Reset flag for next link
break;
default:
break;
}
});
}
_transform(data, encoding, callback) {
try {
const cb = () => callback(this.level === ErrorLevel.THROW && this.errors.length > 0
? this.errors[0]
: null);
// correcting the type here can be done without making it a breaking change
// TODO fix this
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
if (!this.saxStream.write(data, encoding)) {
this.saxStream.once('drain', cb);
}
else {
process.nextTick(cb);
}
}
catch (error) {
callback(error);
}
}
err(msg) {
this.errorCount++;
if (this.errors.length < LIMITS.MAX_PARSER_ERRORS) {
this.errors.push(new Error(msg));
}
}
}
/**
Read xml and resolve with the configuration that would produce it or reject with
an error
```
const { createReadStream } = require('fs')
const { parseSitemap, createSitemap } = require('sitemap')
parseSitemap(createReadStream('./example.xml')).then(
// produces the same xml
// you can, of course, more practically modify it or store it
(xmlConfig) => console.log(createSitemap(xmlConfig).toString()),
(err) => console.log(err)
)
```
@param {Readable} xml what to parse
@return {Promise<SitemapItem[]>} resolves with list of sitemap items that can be fed into a SitemapStream. Rejects with an Error object.
*/
export async function parseSitemap(xml) {
const urls = [];
return new Promise((resolve, reject) => {
xml
.pipe(new XMLToSitemapItemStream())
.on('data', (smi) => urls.push(smi))
.on('end', () => {
resolve(urls);
})
.on('error', (error) => {
reject(error);
});
});
}
const defaultObjectStreamOpts = {
lineSeparated: false,
};
/**
* A Transform that converts a stream of objects into a JSON Array or a line
* separated stringified JSON
* @param [lineSeparated=false] whether to separate entries by a new line or comma
*/
export class ObjectStreamToJSON extends Transform {
lineSeparated;
firstWritten;
constructor(opts = defaultObjectStreamOpts) {
opts.writableObjectMode = true;
super(opts);
this.lineSeparated = opts.lineSeparated;
this.firstWritten = false;
}
_transform(chunk, encoding, cb) {
if (!this.firstWritten) {
this.firstWritten = true;
if (!this.lineSeparated) {
this.push('[');
}
}
else if (this.lineSeparated) {
this.push('\n');
}
else {
this.push(',');
}
if (chunk) {
this.push(JSON.stringify(chunk));
}
cb();
}
_flush(cb) {
if (!this.lineSeparated) {
this.push(']');
}
cb();
}
}

63
node_modules/sitemap/dist/esm/lib/sitemap-simple.d.ts generated vendored Normal file
View File

@@ -0,0 +1,63 @@
import { Readable } from 'node:stream';
import { SitemapItemLoose } from './types.js';
/**
* Options for the simpleSitemapAndIndex function
*/
export interface SimpleSitemapAndIndexOptions {
/**
* The hostname for all URLs
* Must be a valid http:// or https:// URL
*/
hostname: string;
/**
* The hostname for the sitemaps if different than hostname
* Must be a valid http:// or https:// URL
*/
sitemapHostname?: string;
/**
* The urls you want to make a sitemap out of.
* Can be an array of items, a file path string, a Readable stream, or an array of strings
*/
sourceData: SitemapItemLoose[] | string | Readable | string[];
/**
* Where to write the sitemaps and index
* Must be a relative path without path traversal sequences
*/
destinationDir: string;
/**
* Where the sitemaps are relative to the hostname. Defaults to root.
* Must not contain path traversal sequences
*/
publicBasePath?: string;
/**
* How many URLs to write before switching to a new file
* Must be between 1 and 50,000 per sitemaps.org spec
* @default 50000
*/
limit?: number;
/**
* Whether to compress the written files
* @default true
*/
gzip?: boolean;
/**
* Optional URL to an XSL stylesheet
* Must be a valid http:// or https:// URL
*/
xslUrl?: string;
}
/**
* A simpler interface for creating sitemaps and indexes.
* Automatically handles splitting large datasets into multiple sitemap files.
*
* @param options - Configuration options
* @returns A promise that resolves when all sitemaps and the index are written
* @throws {InvalidHostnameError} If hostname or sitemapHostname is invalid
* @throws {InvalidPathError} If destinationDir contains path traversal
* @throws {InvalidPublicBasePathError} If publicBasePath is invalid
* @throws {InvalidLimitError} If limit is out of range
* @throws {InvalidXSLUrlError} If xslUrl is invalid
* @throws {Error} If sourceData type is not supported
*/
export declare const simpleSitemapAndIndex: ({ hostname, sitemapHostname, sourceData, destinationDir, limit, gzip, publicBasePath, xslUrl, }: SimpleSitemapAndIndexOptions) => Promise<void>;
export default simpleSitemapAndIndex;

109
node_modules/sitemap/dist/esm/lib/sitemap-simple.js generated vendored Normal file
View File

@@ -0,0 +1,109 @@
import { SitemapAndIndexStream } from './sitemap-index-stream.js';
import { SitemapStream } from './sitemap-stream.js';
import { lineSeparatedURLsToSitemapOptions } from './utils.js';
import { createGzip } from 'node:zlib';
import { createWriteStream, createReadStream, promises, } from 'node:fs';
import { normalize, resolve } from 'node:path';
import { Readable } from 'node:stream';
import { pipeline } from 'node:stream/promises';
import { URL } from 'node:url';
import { validateURL, validatePath, validateLimit, validatePublicBasePath, validateXSLUrl, } from './validation.js';
/**
* A simpler interface for creating sitemaps and indexes.
* Automatically handles splitting large datasets into multiple sitemap files.
*
* @param options - Configuration options
* @returns A promise that resolves when all sitemaps and the index are written
* @throws {InvalidHostnameError} If hostname or sitemapHostname is invalid
* @throws {InvalidPathError} If destinationDir contains path traversal
* @throws {InvalidPublicBasePathError} If publicBasePath is invalid
* @throws {InvalidLimitError} If limit is out of range
* @throws {InvalidXSLUrlError} If xslUrl is invalid
* @throws {Error} If sourceData type is not supported
*/
export const simpleSitemapAndIndex = async ({ hostname, sitemapHostname = hostname, // if different
sourceData, destinationDir, limit = 50000, gzip = true, publicBasePath = './', xslUrl, }) => {
// Validate all inputs upfront
validateURL(hostname, 'hostname');
validateURL(sitemapHostname, 'sitemapHostname');
validatePath(destinationDir, 'destinationDir');
validateLimit(limit);
validatePublicBasePath(publicBasePath);
if (xslUrl) {
validateXSLUrl(xslUrl);
}
// Create destination directory with error context
try {
await promises.mkdir(destinationDir, { recursive: true });
}
catch (err) {
throw new Error(`Failed to create destination directory "${destinationDir}": ${err instanceof Error ? err.message : String(err)}`);
}
// Normalize publicBasePath (don't mutate the parameter)
const normalizedPublicBasePath = publicBasePath.endsWith('/')
? publicBasePath
: publicBasePath + '/';
const sitemapAndIndexStream = new SitemapAndIndexStream({
limit,
getSitemapStream: (i) => {
const sitemapStream = new SitemapStream({
hostname,
xslUrl,
});
const path = `./sitemap-${i}.xml`;
const writePath = resolve(destinationDir, path + (gzip ? '.gz' : ''));
// Construct public path for the sitemap index
const publicPath = normalize(normalizedPublicBasePath + path);
// Construct the URL with proper error handling
let sitemapUrl;
try {
sitemapUrl = new URL(`${publicPath}${gzip ? '.gz' : ''}`, sitemapHostname).toString();
}
catch (err) {
throw new Error(`Failed to construct sitemap URL for index ${i}: ${err instanceof Error ? err.message : String(err)}`);
}
let writeStream;
if (gzip) {
writeStream = sitemapStream
.pipe(createGzip()) // compress the output of the sitemap
.pipe(createWriteStream(writePath)); // write it to sitemap-NUMBER.xml
}
else {
writeStream = sitemapStream.pipe(createWriteStream(writePath)); // write it to sitemap-NUMBER.xml
}
return [sitemapUrl, sitemapStream, writeStream];
},
});
// Handle different sourceData types with proper error handling
let src;
if (typeof sourceData === 'string') {
try {
src = lineSeparatedURLsToSitemapOptions(createReadStream(sourceData));
}
catch (err) {
throw new Error(`Failed to read sourceData file "${sourceData}": ${err instanceof Error ? err.message : String(err)}`);
}
}
else if (sourceData instanceof Readable) {
src = sourceData;
}
else if (Array.isArray(sourceData)) {
src = Readable.from(sourceData);
}
else {
throw new Error(`Invalid sourceData type: expected array, string (file path), or Readable stream, got ${typeof sourceData}`);
}
const writePath = resolve(destinationDir, `./sitemap-index.xml${gzip ? '.gz' : ''}`);
try {
if (gzip) {
return await pipeline(src, sitemapAndIndexStream, createGzip(), createWriteStream(writePath));
}
else {
return await pipeline(src, sitemapAndIndexStream, createWriteStream(writePath));
}
}
catch (err) {
throw new Error(`Failed to write sitemap files: ${err instanceof Error ? err.message : String(err)}`);
}
};
export default simpleSitemapAndIndex;

79
node_modules/sitemap/dist/esm/lib/sitemap-stream.d.ts generated vendored Normal file
View File

@@ -0,0 +1,79 @@
import { Transform, TransformOptions, TransformCallback, Readable } from 'node:stream';
import { SitemapItemLoose, ErrorLevel, ErrorHandler } from './types.js';
export declare const stylesheetInclude: (url: string) => string;
export interface NSArgs {
news: boolean;
video: boolean;
xhtml: boolean;
image: boolean;
custom?: string[];
}
export declare const closetag = "</urlset>";
export interface SitemapStreamOptions extends TransformOptions {
hostname?: string;
level?: ErrorLevel;
lastmodDateOnly?: boolean;
xmlns?: NSArgs;
xslUrl?: string;
errorHandler?: ErrorHandler;
}
/**
* A [Transform](https://nodejs.org/api/stream.html#stream_implementing_a_transform_stream)
* for turning a
* [Readable stream](https://nodejs.org/api/stream.html#stream_readable_streams)
* of either [SitemapItemOptions](#sitemap-item-options) or url strings into a
* Sitemap. The readable stream it transforms **must** be in object mode.
*
* @param {SitemapStreamOptions} opts - Configuration options
* @param {string} [opts.hostname] - Base URL for relative paths. Must use http:// or https:// protocol
* @param {ErrorLevel} [opts.level=ErrorLevel.WARN] - Error handling level (SILENT, WARN, or THROW)
* @param {boolean} [opts.lastmodDateOnly=false] - Format lastmod as date only (YYYY-MM-DD)
* @param {NSArgs} [opts.xmlns] - Control which XML namespaces to include in output
* @param {string} [opts.xslUrl] - URL to XSL stylesheet for sitemap display. Must use http:// or https://
* @param {ErrorHandler} [opts.errorHandler] - Custom error handler function
*
* @throws {InvalidHostnameError} If hostname is provided but invalid (non-http(s), malformed, or >2048 chars)
* @throws {InvalidXSLUrlError} If xslUrl is provided but invalid (non-http(s), malformed, >2048 chars, or contains malicious content)
* @throws {Error} If xmlns.custom contains invalid namespace declarations
*
* @example
* ```typescript
* const stream = new SitemapStream({
* hostname: 'https://example.com',
* level: ErrorLevel.THROW
* });
* stream.write({ url: '/page', changefreq: 'daily' });
* stream.end();
* ```
*
* @security
* - Hostname and xslUrl are validated to prevent URL injection attacks
* - Custom namespaces are validated to prevent XML injection
* - All URLs are normalized and validated before output
* - XML content is properly escaped to prevent injection
*/
export declare class SitemapStream extends Transform {
hostname?: string;
level: ErrorLevel;
hasHeadOutput: boolean;
xmlNS: NSArgs;
xslUrl?: string;
errorHandler?: ErrorHandler;
private smiStream;
lastmodDateOnly: boolean;
constructor(opts?: SitemapStreamOptions);
_transform(item: SitemapItemLoose, encoding: string, callback: TransformCallback): void;
_flush(cb: TransformCallback): void;
}
/**
* Converts a readable stream into a promise that resolves with the concatenated data from the stream.
*
* The function listens for 'data' events from the stream, and when the stream ends, it resolves the promise with the concatenated data. If an error occurs while reading from the stream, the promise is rejected with the error.
*
* ⚠️ CAUTION: This function should not generally be used in production / when writing to files as it holds a copy of the entire file contents in memory until finished.
*
* @param {Readable} stream - The readable stream to convert to a promise.
* @returns {Promise<Buffer>} A promise that resolves with the concatenated data from the stream as a Buffer, or rejects with an error if one occurred while reading from the stream. If the stream is empty, the promise is rejected with an EmptyStream error.
* @throws {EmptyStream} If the stream is empty.
*/
export declare function streamToPromise(stream: Readable): Promise<Buffer>;

212
node_modules/sitemap/dist/esm/lib/sitemap-stream.js generated vendored Normal file
View File

@@ -0,0 +1,212 @@
import { Transform, Writable, } from 'node:stream';
import { ErrorLevel } from './types.js';
import { normalizeURL } from './utils.js';
import { validateSMIOptions, validateURL, validateXSLUrl, } from './validation.js';
import { SitemapItemStream } from './sitemap-item-stream.js';
import { EmptyStream, EmptySitemap } from './errors.js';
import { LIMITS } from './constants.js';
const xmlDec = '<?xml version="1.0" encoding="UTF-8"?>';
export const stylesheetInclude = (url) => {
const safe = url
.replace(/&/g, '&amp;')
.replace(/"/g, '&quot;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;');
return `<?xml-stylesheet type="text/xsl" href="${safe}"?>`;
};
const urlsetTagStart = '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"';
/**
* Validates custom namespace declarations for security
* @param custom - Array of custom namespace declarations
* @throws {Error} If namespace format is invalid or contains malicious content
*/
function validateCustomNamespaces(custom) {
if (!Array.isArray(custom)) {
throw new Error('Custom namespaces must be an array');
}
// Limit number of custom namespaces to prevent DoS
if (custom.length > LIMITS.MAX_CUSTOM_NAMESPACES) {
throw new Error(`Too many custom namespaces: ${custom.length} exceeds limit of ${LIMITS.MAX_CUSTOM_NAMESPACES}`);
}
// Basic format validation for xmlns declarations and namespace-qualified attributes
// Supports both xmlns:prefix="uri" and prefix:attribute="value" (e.g., xsi:schemaLocation)
const xmlAttributePattern = /^[a-zA-Z_][\w.-]*:[a-zA-Z_][\w.-]*="[^"<>]*"$/;
for (const ns of custom) {
if (typeof ns !== 'string' || ns.length === 0) {
throw new Error('Custom namespace must be a non-empty string');
}
if (ns.length > LIMITS.MAX_NAMESPACE_LENGTH) {
throw new Error(`Custom namespace exceeds maximum length of ${LIMITS.MAX_NAMESPACE_LENGTH} characters: ${ns.substring(0, 50)}...`);
}
// Check for potentially malicious content BEFORE format check
// (format check will reject < and > but we want specific error message)
const lowerNs = ns.toLowerCase();
if (lowerNs.includes('<script') ||
lowerNs.includes('javascript:') ||
lowerNs.includes('data:text/html')) {
throw new Error(`Custom namespace contains potentially malicious content: ${ns.substring(0, 50)}`);
}
// Check format matches xmlns declaration or namespace-qualified attribute
if (!xmlAttributePattern.test(ns)) {
throw new Error(`Invalid namespace format (must be prefix:name="value", e.g., xmlns:prefix="uri" or xsi:schemaLocation="..."): ${ns.substring(0, 50)}`);
}
}
}
const getURLSetNs = ({ news, video, image, xhtml, custom }, xslURL) => {
let ns = xmlDec;
if (xslURL) {
ns += stylesheetInclude(xslURL);
}
ns += urlsetTagStart;
if (news) {
ns += ' xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"';
}
if (xhtml) {
ns += ' xmlns:xhtml="http://www.w3.org/1999/xhtml"';
}
if (image) {
ns += ' xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"';
}
if (video) {
ns += ' xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"';
}
if (custom) {
validateCustomNamespaces(custom);
ns += ' ' + custom.join(' ');
}
return ns + '>';
};
export const closetag = '</urlset>';
const defaultXMLNS = {
news: true,
xhtml: true,
image: true,
video: true,
};
const defaultStreamOpts = {
xmlns: defaultXMLNS,
};
/**
* A [Transform](https://nodejs.org/api/stream.html#stream_implementing_a_transform_stream)
* for turning a
* [Readable stream](https://nodejs.org/api/stream.html#stream_readable_streams)
* of either [SitemapItemOptions](#sitemap-item-options) or url strings into a
* Sitemap. The readable stream it transforms **must** be in object mode.
*
* @param {SitemapStreamOptions} opts - Configuration options
* @param {string} [opts.hostname] - Base URL for relative paths. Must use http:// or https:// protocol
* @param {ErrorLevel} [opts.level=ErrorLevel.WARN] - Error handling level (SILENT, WARN, or THROW)
* @param {boolean} [opts.lastmodDateOnly=false] - Format lastmod as date only (YYYY-MM-DD)
* @param {NSArgs} [opts.xmlns] - Control which XML namespaces to include in output
* @param {string} [opts.xslUrl] - URL to XSL stylesheet for sitemap display. Must use http:// or https://
* @param {ErrorHandler} [opts.errorHandler] - Custom error handler function
*
* @throws {InvalidHostnameError} If hostname is provided but invalid (non-http(s), malformed, or >2048 chars)
* @throws {InvalidXSLUrlError} If xslUrl is provided but invalid (non-http(s), malformed, >2048 chars, or contains malicious content)
* @throws {Error} If xmlns.custom contains invalid namespace declarations
*
* @example
* ```typescript
* const stream = new SitemapStream({
* hostname: 'https://example.com',
* level: ErrorLevel.THROW
* });
* stream.write({ url: '/page', changefreq: 'daily' });
* stream.end();
* ```
*
* @security
* - Hostname and xslUrl are validated to prevent URL injection attacks
* - Custom namespaces are validated to prevent XML injection
* - All URLs are normalized and validated before output
* - XML content is properly escaped to prevent injection
*/
export class SitemapStream extends Transform {
hostname;
level;
hasHeadOutput;
xmlNS;
xslUrl;
errorHandler;
smiStream;
lastmodDateOnly;
constructor(opts = defaultStreamOpts) {
opts.objectMode = true;
super(opts);
// Validate hostname if provided
if (opts.hostname !== undefined) {
validateURL(opts.hostname, 'hostname');
}
// Validate xslUrl if provided
if (opts.xslUrl !== undefined) {
validateXSLUrl(opts.xslUrl);
}
this.hasHeadOutput = false;
this.hostname = opts.hostname;
this.level = opts.level || ErrorLevel.WARN;
this.errorHandler = opts.errorHandler;
this.smiStream = new SitemapItemStream({ level: opts.level });
this.smiStream.on('data', (data) => this.push(data));
this.lastmodDateOnly = opts.lastmodDateOnly || false;
this.xmlNS = opts.xmlns || defaultXMLNS;
this.xslUrl = opts.xslUrl;
}
_transform(item, encoding, callback) {
if (!this.hasHeadOutput) {
this.hasHeadOutput = true;
this.push(getURLSetNs(this.xmlNS, this.xslUrl));
}
if (!this.smiStream.write(validateSMIOptions(normalizeURL(item, this.hostname, this.lastmodDateOnly), this.level, this.errorHandler))) {
this.smiStream.once('drain', callback);
}
else {
process.nextTick(callback);
}
}
_flush(cb) {
if (!this.hasHeadOutput) {
cb(new EmptySitemap());
}
else {
this.push(closetag);
cb();
}
}
}
/**
* Converts a readable stream into a promise that resolves with the concatenated data from the stream.
*
* The function listens for 'data' events from the stream, and when the stream ends, it resolves the promise with the concatenated data. If an error occurs while reading from the stream, the promise is rejected with the error.
*
* ⚠️ CAUTION: This function should not generally be used in production / when writing to files as it holds a copy of the entire file contents in memory until finished.
*
* @param {Readable} stream - The readable stream to convert to a promise.
* @returns {Promise<Buffer>} A promise that resolves with the concatenated data from the stream as a Buffer, or rejects with an error if one occurred while reading from the stream. If the stream is empty, the promise is rejected with an EmptyStream error.
* @throws {EmptyStream} If the stream is empty.
*/
export function streamToPromise(stream) {
return new Promise((resolve, reject) => {
const drain = [];
stream
// Error propagation is not automatic
// Bubble up errors on the read stream
.on('error', reject)
.pipe(new Writable({
write(chunk, enc, next) {
drain.push(chunk);
next();
},
}))
// This bubbles up errors when writing to the internal buffer
// This is unlikely to happen, but we have this for completeness
.on('error', reject)
.on('finish', () => {
if (!drain.length) {
reject(new EmptyStream());
}
else {
resolve(Buffer.concat(drain));
}
});
});
}

107
node_modules/sitemap/dist/esm/lib/sitemap-xml.d.ts generated vendored Normal file
View File

@@ -0,0 +1,107 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
import { TagNames, IndexTagNames, StringObj } from './types.js';
/**
* Escapes text content for safe inclusion in XML text nodes.
*
* **Security Model:**
* - Escapes `&` → `&amp;` (required to prevent entity interpretation)
* - Escapes `<` → `&lt;` (required to prevent tag injection)
* - Escapes `>` → `&gt;` (defense-in-depth, prevents CDATA injection)
* - Does NOT escape `"` or `'` (not required in text content, only in attributes)
* - Removes invalid XML Unicode characters per XML 1.0 spec
*
* **Why quotes aren't escaped:**
* In XML text content (between tags), quotes have no special meaning and don't
* need escaping. They only need escaping in attribute values, which is handled
* by the `otag()` function.
*
* @param txt - The text content to escape
* @returns XML-safe escaped text with invalid characters removed
* @throws {TypeError} If txt is not a string
*
* @example
* text('Hello & World'); // Returns: 'Hello &amp; World'
* text('5 < 10'); // Returns: '5 &lt; 10'
* text('Hello "World"'); // Returns: 'Hello "World"' (quotes OK in text)
*
* @see https://www.w3.org/TR/xml/#syntax
*/
export declare function text(txt: string): string;
/**
* Generates an opening XML tag with optional attributes.
*
* **Security Model:**
* - Validates attribute names to prevent injection via malformed names
* - Escapes all attribute values with proper XML entity encoding
* - Escapes `&`, `<`, `>`, `"`, and `'` in attribute values
* - Removes invalid XML Unicode characters
*
* Attribute values use full escaping (including quotes) because they appear
* within quoted strings in the XML output: `<tag attr="value">`.
*
* @param nodeName - The XML element name (e.g., 'url', 'loc', 'video:title')
* @param attrs - Optional object mapping attribute names to string values
* @param selfClose - If true, generates a self-closing tag (e.g., `<tag/>`)
* @returns Opening XML tag string
* @throws {InvalidXMLAttributeNameError} If an attribute name contains invalid characters
* @throws {TypeError} If nodeName is not a string or attrs values are not strings
*
* @example
* otag('url'); // Returns: '<url>'
* otag('video:player_loc', { autoplay: 'ap=1' }); // Returns: '<video:player_loc autoplay="ap=1">'
* otag('image:image', {}, true); // Returns: '<image:image/>'
*
* @see https://www.w3.org/TR/xml/#NT-Attribute
*/
export declare function otag(nodeName: TagNames | IndexTagNames, attrs?: StringObj, selfClose?: boolean): string;
/**
* Generates a closing XML tag.
*
* @param nodeName - The XML element name (e.g., 'url', 'loc', 'video:title')
* @returns Closing XML tag string
* @throws {TypeError} If nodeName is not a string
*
* @example
* ctag('url'); // Returns: '</url>'
* ctag('video:title'); // Returns: '</video:title>'
*/
export declare function ctag(nodeName: TagNames | IndexTagNames): string;
/**
* Generates a complete XML element with optional attributes and text content.
*
* This is a convenience function that combines `otag()`, `text()`, and `ctag()`.
* It supports three usage patterns via function overloading:
*
* 1. Element with text content: `element('loc', 'https://example.com')`
* 2. Element with attributes and text: `element('video:player_loc', { autoplay: 'ap=1' }, 'https://...')`
* 3. Self-closing element with attributes: `element('image:image', { href: '...' })`
*
* @param nodeName - The XML element name
* @param attrs - Either a string (text content) or object (attributes)
* @param innerText - Optional text content when attrs is an object
* @returns Complete XML element string
* @throws {InvalidXMLAttributeNameError} If an attribute name contains invalid characters
* @throws {TypeError} If arguments have invalid types
*
* @example
* // Pattern 1: Simple element with text
* element('loc', 'https://example.com')
* // Returns: '<loc>https://example.com</loc>'
*
* @example
* // Pattern 2: Element with attributes and text
* element('video:player_loc', { autoplay: 'ap=1' }, 'https://example.com/video')
* // Returns: '<video:player_loc autoplay="ap=1">https://example.com/video</video:player_loc>'
*
* @example
* // Pattern 3: Self-closing element with attributes
* element('xhtml:link', { rel: 'alternate', href: 'https://example.com/fr' })
* // Returns: '<xhtml:link rel="alternate" href="https://example.com/fr"/>'
*/
export declare function element(nodeName: TagNames, attrs: StringObj, innerText: string): string;
export declare function element(nodeName: TagNames | IndexTagNames, innerText: string): string;
export declare function element(nodeName: TagNames, attrs: StringObj): string;

181
node_modules/sitemap/dist/esm/lib/sitemap-xml.js generated vendored Normal file
View File

@@ -0,0 +1,181 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
import { InvalidXMLAttributeNameError } from './errors.js';
/**
* Regular expression matching invalid XML 1.0 Unicode characters that must be removed.
*
* Based on the XML 1.0 specification (https://www.w3.org/TR/xml/#charsets):
* - Control characters (U+0000-U+001F except tab, newline, carriage return)
* - Delete character (U+007F)
* - Invalid control characters (U+0080-U+009F except U+0085)
* - Surrogate pairs (U+D800-U+DFFF)
* - Non-characters (\p{NChar} - permanently reserved code points)
*
* Performance note: This regex uses Unicode property escapes and may be slower
* on very large strings (100KB+). Consider pre-validation for untrusted input.
*
* @see https://www.w3.org/TR/xml/#charsets
*/
const invalidXMLUnicodeRegex =
// eslint-disable-next-line no-control-regex
/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F-\u0084\u0086-\u009F\uD800-\uDFFF\p{NChar}]/gu;
/**
* Regular expressions for XML entity escaping
*/
const amp = /&/g;
const lt = /</g;
const gt = />/g;
const apos = /'/g;
const quot = /"/g;
/**
* Valid XML attribute name pattern. XML names must:
* - Start with a letter, underscore, or colon
* - Contain only letters, digits, hyphens, underscores, colons, or periods
*
* This is a simplified validation that accepts the most common attribute names.
* Note: In practice, this library only uses namespaced attributes like "video:title"
* which are guaranteed to be valid.
*
* @see https://www.w3.org/TR/xml/#NT-Name
*/
const validAttributeNameRegex = /^[a-zA-Z_:][\w:.-]*$/;
/**
* Validates that an attribute name is a valid XML identifier.
*
* XML attribute names must start with a letter, underscore, or colon,
* and contain only alphanumeric characters, hyphens, underscores, colons, or periods.
*
* @param name - The attribute name to validate
* @throws {InvalidXMLAttributeNameError} If the attribute name is invalid
*
* @example
* validateAttributeName('href'); // OK
* validateAttributeName('xml:lang'); // OK
* validateAttributeName('data-value'); // OK
* validateAttributeName('<script>'); // Throws InvalidXMLAttributeNameError
*/
function validateAttributeName(name) {
if (!validAttributeNameRegex.test(name)) {
throw new InvalidXMLAttributeNameError(name);
}
}
/**
* Escapes text content for safe inclusion in XML text nodes.
*
* **Security Model:**
* - Escapes `&` → `&amp;` (required to prevent entity interpretation)
* - Escapes `<` → `&lt;` (required to prevent tag injection)
* - Escapes `>` → `&gt;` (defense-in-depth, prevents CDATA injection)
* - Does NOT escape `"` or `'` (not required in text content, only in attributes)
* - Removes invalid XML Unicode characters per XML 1.0 spec
*
* **Why quotes aren't escaped:**
* In XML text content (between tags), quotes have no special meaning and don't
* need escaping. They only need escaping in attribute values, which is handled
* by the `otag()` function.
*
* @param txt - The text content to escape
* @returns XML-safe escaped text with invalid characters removed
* @throws {TypeError} If txt is not a string
*
* @example
* text('Hello & World'); // Returns: 'Hello &amp; World'
* text('5 < 10'); // Returns: '5 &lt; 10'
* text('Hello "World"'); // Returns: 'Hello "World"' (quotes OK in text)
*
* @see https://www.w3.org/TR/xml/#syntax
*/
export function text(txt) {
if (typeof txt !== 'string') {
throw new TypeError(`text() requires a string, received ${typeof txt}: ${String(txt)}`);
}
return txt
.replace(amp, '&amp;')
.replace(lt, '&lt;')
.replace(gt, '&gt;')
.replace(invalidXMLUnicodeRegex, '');
}
/**
* Generates an opening XML tag with optional attributes.
*
* **Security Model:**
* - Validates attribute names to prevent injection via malformed names
* - Escapes all attribute values with proper XML entity encoding
* - Escapes `&`, `<`, `>`, `"`, and `'` in attribute values
* - Removes invalid XML Unicode characters
*
* Attribute values use full escaping (including quotes) because they appear
* within quoted strings in the XML output: `<tag attr="value">`.
*
* @param nodeName - The XML element name (e.g., 'url', 'loc', 'video:title')
* @param attrs - Optional object mapping attribute names to string values
* @param selfClose - If true, generates a self-closing tag (e.g., `<tag/>`)
* @returns Opening XML tag string
* @throws {InvalidXMLAttributeNameError} If an attribute name contains invalid characters
* @throws {TypeError} If nodeName is not a string or attrs values are not strings
*
* @example
* otag('url'); // Returns: '<url>'
* otag('video:player_loc', { autoplay: 'ap=1' }); // Returns: '<video:player_loc autoplay="ap=1">'
* otag('image:image', {}, true); // Returns: '<image:image/>'
*
* @see https://www.w3.org/TR/xml/#NT-Attribute
*/
export function otag(nodeName, attrs, selfClose = false) {
if (typeof nodeName !== 'string') {
throw new TypeError(`otag() nodeName must be a string, received ${typeof nodeName}: ${String(nodeName)}`);
}
let attrstr = '';
for (const k in attrs) {
// Validate attribute name to prevent injection
validateAttributeName(k);
const attrValue = attrs[k];
if (typeof attrValue !== 'string') {
throw new TypeError(`otag() attribute "${k}" value must be a string, received ${typeof attrValue}: ${String(attrValue)}`);
}
// Escape attribute value with full entity encoding
const val = attrValue
.replace(amp, '&amp;')
.replace(lt, '&lt;')
.replace(gt, '&gt;')
.replace(apos, '&apos;')
.replace(quot, '&quot;')
.replace(invalidXMLUnicodeRegex, '');
attrstr += ` ${k}="${val}"`;
}
return `<${nodeName}${attrstr}${selfClose ? '/' : ''}>`;
}
/**
* Generates a closing XML tag.
*
* @param nodeName - The XML element name (e.g., 'url', 'loc', 'video:title')
* @returns Closing XML tag string
* @throws {TypeError} If nodeName is not a string
*
* @example
* ctag('url'); // Returns: '</url>'
* ctag('video:title'); // Returns: '</video:title>'
*/
export function ctag(nodeName) {
if (typeof nodeName !== 'string') {
throw new TypeError(`ctag() nodeName must be a string, received ${typeof nodeName}: ${String(nodeName)}`);
}
return `</${nodeName}>`;
}
export function element(nodeName, attrs, innerText) {
if (typeof attrs === 'string') {
// Pattern 1: element(nodeName, textContent)
return otag(nodeName) + text(attrs) + ctag(nodeName);
}
else if (innerText !== undefined) {
// Pattern 2: element(nodeName, attrs, textContent)
return otag(nodeName, attrs) + text(innerText) + ctag(nodeName);
}
else {
// Pattern 3: element(nodeName, attrs) - self-closing
return otag(nodeName, attrs, true);
}
}

400
node_modules/sitemap/dist/esm/lib/types.d.ts generated vendored Normal file
View File

@@ -0,0 +1,400 @@
import { URL } from 'node:url';
/**
* How frequently the page is likely to change. This value provides general
* information to search engines and may not correlate exactly to how often they crawl the page. Please note that the
* value of this tag is considered a hint and not a command. See
* <https://www.sitemaps.org/protocol.html#xmlTagDefinitions> for the acceptable
* values
*/
export declare enum EnumChangefreq {
DAILY = "daily",
MONTHLY = "monthly",
ALWAYS = "always",
HOURLY = "hourly",
WEEKLY = "weekly",
YEARLY = "yearly",
NEVER = "never"
}
export declare enum EnumYesNo {
YES = "YES",
NO = "NO",
Yes = "Yes",
No = "No",
yes = "yes",
no = "no"
}
export declare enum EnumAllowDeny {
ALLOW = "allow",
DENY = "deny"
}
/**
* https://support.google.com/webmasters/answer/74288?hl=en&ref_topic=4581190
*/
export interface NewsItem {
access?: 'Registration' | 'Subscription';
publication: {
name: string;
/**
* The `<language>` is the language of your publication. Use an ISO 639
* language code (2 or 3 letters).
*/
language: string;
};
/**
* @example 'PressRelease, Blog'
*/
genres?: string;
/**
* Article publication date in W3C format, using either the "complete date" (YYYY-MM-DD) format or the "complete date
* plus hours, minutes, and seconds"
*/
publication_date: string;
/**
* The title of the news article
* @example 'Companies A, B in Merger Talks'
*/
title: string;
/**
* @example 'business, merger, acquisition'
*/
keywords?: string;
/**
* @example 'NASDAQ:A, NASDAQ:B'
*/
stock_tickers?: string;
}
/**
* Sitemap Image
* https://support.google.com/webmasters/answer/178636?hl=en&ref_topic=4581190
*/
export interface Img {
/**
* The URL of the image
* @example 'https://example.com/image.jpg'
*/
url: string;
/**
* The caption of the image
* @example 'Thanksgiving dinner'
*/
caption?: string;
/**
* The title of the image
* @example 'Star Wars EP IV'
*/
title?: string;
/**
* The geographic location of the image.
* @example 'Limerick, Ireland'
*/
geoLocation?: string;
/**
* A URL to the license of the image.
* @example 'https://example.com/license.txt'
*/
license?: string;
}
interface VideoItemBase {
/**
* A URL pointing to the video thumbnail image file
* @example "https://rtv3-img-roosterteeth.akamaized.net/store/0e841100-289b-4184-ae30-b6a16736960a.jpg/sm/thumb3.jpg"
*/
thumbnail_loc: string;
/**
* The title of the video
* @example '2018:E6 - GoldenEye: Source'
*/
title: string;
/**
* A description of the video. Maximum 2048 characters.
* @example 'We play gun game in GoldenEye: Source with a good friend of ours. His name is Gruchy. Dan Gruchy.'
*/
description: string;
/**
* A URL pointing to the actual video media file. Should be one of the supported formats. HTML is not a supported
* format. Flash is allowed, but no longer supported on most mobile platforms, and so may be indexed less well. Must
* not be the same as the `<loc>` URL.
* @example "http://streamserver.example.com/video123.mp4"
*/
content_loc?: string;
/**
* A URL pointing to a player for a specific video. Usually this is the information in the src element of an `<embed>`
* tag. Must not be the same as the `<loc>` URL
* @example "https://roosterteeth.com/embed/rouletsplay-2018-goldeneye-source"
*/
player_loc?: string;
/**
* A string the search engine can append as a query param to enable automatic
* playback. Equivilant to auto play attr on player_loc tag.
* @example 'ap=1'
*/
'player_loc:autoplay'?: string;
/**
* Whether the search engine can embed the video in search results. Allowed values are yes or no.
*/
'player_loc:allow_embed'?: EnumYesNo;
/**
* The length of the video in seconds
* @example 600
*/
duration?: number;
/**
* The date after which the video will no longer be available.
* @example "2012-07-16T19:20:30+08:00"
*/
expiration_date?: string;
/**
* The number of times the video has been viewed
*/
view_count?: number;
/**
* The date the video was first published, in W3C format.
* @example "2012-07-16T19:20:30+08:00"
*/
publication_date?: string;
/**
* A short description of the broad category that the video belongs to. This is a string no longer than 256 characters.
* @example Baking
*/
category?: string;
/**
* Whether to show or hide your video in search results from specific countries.
* @example "IE GB US CA"
*/
restriction?: string;
/**
* Whether the countries in restriction are allowed or denied
* @example 'deny'
*/
'restriction:relationship'?: EnumAllowDeny;
gallery_loc?: string;
/**
* [Optional] Specifies the URL of a webpage with additional information about this uploader. This URL must be in the same domain as the <loc> tag.
* @see https://developers.google.com/search/docs/advanced/sitemaps/video-sitemaps
* @example http://www.example.com/users/grillymcgrillerson
*/
'uploader:info'?: string;
'gallery_loc:title'?: string;
/**
* The price to download or view the video. Omit this tag for free videos.
* @example "1.99"
*/
price?: string;
/**
* Specifies the resolution of the purchased version. Supported values are hd and sd.
* @example "HD"
*/
'price:resolution'?: Resolution;
/**
* Specifies the currency in ISO4217 format.
* @example "USD"
*/
'price:currency'?: string;
/**
* Specifies the purchase option. Supported values are rend and own.
* @example "rent"
*/
'price:type'?: PriceType;
/**
* The video uploader's name. Only one <video:uploader> is allowed per video. String value, max 255 characters.
* @example "GrillyMcGrillerson"
*/
uploader?: string;
/**
* Whether to show or hide your video in search results on specified platform types. This is a list of space-delimited
* platform types. See <https://support.google.com/webmasters/answer/80471?hl=en&ref_topic=4581190> for more detail
* @example "tv"
*/
platform?: string;
id?: string;
'platform:relationship'?: EnumAllowDeny;
}
/**
* Video price type - supports both lowercase and uppercase variants
* as allowed by the Google Video Sitemap specification
* @see https://developers.google.com/search/docs/advanced/sitemaps/video-sitemaps
*/
export type PriceType = 'rent' | 'purchase' | 'RENT' | 'PURCHASE';
/**
* Video resolution - supports both lowercase and uppercase variants
* as allowed by the Google Video Sitemap specification
* @see https://developers.google.com/search/docs/advanced/sitemaps/video-sitemaps
*/
export type Resolution = 'HD' | 'hd' | 'sd' | 'SD';
/**
* Sitemap video. <https://support.google.com/webmasters/answer/80471?hl=en&ref_topic=4581190>
*/
export interface VideoItem extends VideoItemBase {
/**
* An arbitrary string tag describing the video. Tags are generally very short descriptions of key concepts associated
* with a video or piece of content.
* @example ['Baking']
*/
tag: string[];
/**
* The rating of the video. Supported values are float numbers.
* @example 2.5
*/
rating?: number;
family_friendly?: EnumYesNo;
/**
* Indicates whether a subscription (either paid or free) is required to view
* the video. Allowed values are yes or no.
*/
requires_subscription?: EnumYesNo;
/**
* Indicates whether the video is a live stream. Supported values are yes or no.
*/
live?: EnumYesNo;
}
/**
* Sitemap video. <https://support.google.com/webmasters/answer/80471?hl=en&ref_topic=4581190>
*/
export interface VideoItemLoose extends VideoItemBase {
/**
* An arbitrary string tag describing the video. Tags are generally very short descriptions of key concepts associated
* with a video or piece of content.
* @example ['Baking']
*/
tag?: string | string[];
/**
* The rating of the video. Supported values are float numbers.
* @example 2.5
*/
rating?: string | number;
family_friendly?: EnumYesNo | boolean;
requires_subscription?: EnumYesNo | boolean;
/**
* Indicates whether the video is a live stream. Supported values are yes or no.
*/
live?: EnumYesNo | boolean;
}
/**
* https://support.google.com/webmasters/answer/189077
*/
export interface LinkItem {
/**
* @example 'en'
*/
lang: string;
/**
* @example 'en-us'
*/
hreflang?: string;
url: string;
}
export interface IndexItem {
url: string;
lastmod?: string;
}
interface SitemapItemBase {
lastmod?: string;
changefreq?: EnumChangefreq;
fullPrecisionPriority?: boolean;
priority?: number;
news?: NewsItem;
expires?: string;
androidLink?: string;
ampLink?: string;
url: string;
}
/**
* Strict options for individual sitemap entries
*/
export interface SitemapItem extends SitemapItemBase {
img: Img[];
video: VideoItem[];
links: LinkItem[];
}
/**
* Options for individual sitemap entries prior to normalization
*/
export interface SitemapItemLoose extends SitemapItemBase {
video?: VideoItemLoose | VideoItemLoose[];
img?: string | Img | (string | Img)[];
links?: LinkItem[];
lastmodfile?: string | Buffer | URL;
lastmodISO?: string;
lastmodrealtime?: boolean;
}
/**
* How to handle errors in passed in urls
*/
export declare enum ErrorLevel {
/**
* Validation will be skipped and nothing logged or thrown.
*/
SILENT = "silent",
/**
* If an invalid value is encountered, a console.warn will be called with details
*/
WARN = "warn",
/**
* An Error will be thrown on encountering invalid data.
*/
THROW = "throw"
}
export type ErrorHandler = (error: Error, level: ErrorLevel) => void;
export declare enum TagNames {
url = "url",
loc = "loc",
urlset = "urlset",
lastmod = "lastmod",
changefreq = "changefreq",
priority = "priority",
'video:thumbnail_loc' = "video:thumbnail_loc",
'video:video' = "video:video",
'video:title' = "video:title",
'video:description' = "video:description",
'video:tag' = "video:tag",
'video:duration' = "video:duration",
'video:player_loc' = "video:player_loc",
'video:content_loc' = "video:content_loc",
'image:image' = "image:image",
'image:loc' = "image:loc",
'image:geo_location' = "image:geo_location",
'image:license' = "image:license",
'image:title' = "image:title",
'image:caption' = "image:caption",
'video:requires_subscription' = "video:requires_subscription",
'video:publication_date' = "video:publication_date",
'video:id' = "video:id",
'video:restriction' = "video:restriction",
'video:family_friendly' = "video:family_friendly",
'video:view_count' = "video:view_count",
'video:uploader' = "video:uploader",
'video:expiration_date' = "video:expiration_date",
'video:platform' = "video:platform",
'video:price' = "video:price",
'video:rating' = "video:rating",
'video:category' = "video:category",
'video:live' = "video:live",
'video:gallery_loc' = "video:gallery_loc",
'news:news' = "news:news",
'news:publication' = "news:publication",
'news:name' = "news:name",
'news:access' = "news:access",
'news:genres' = "news:genres",
'news:publication_date' = "news:publication_date",
'news:title' = "news:title",
'news:keywords' = "news:keywords",
'news:stock_tickers' = "news:stock_tickers",
'news:language' = "news:language",
'mobile:mobile' = "mobile:mobile",
'xhtml:link' = "xhtml:link",
'expires' = "expires"
}
export declare enum IndexTagNames {
sitemap = "sitemap",
sitemapindex = "sitemapindex",
loc = "loc",
lastmod = "lastmod"
}
/**
* Generic object with string keys and any values
* Used for XML attribute building and other flexible data structures
*/
export interface StringObj {
[index: string]: any;
}
export {};

106
node_modules/sitemap/dist/esm/lib/types.js generated vendored Normal file
View File

@@ -0,0 +1,106 @@
/**
* How frequently the page is likely to change. This value provides general
* information to search engines and may not correlate exactly to how often they crawl the page. Please note that the
* value of this tag is considered a hint and not a command. See
* <https://www.sitemaps.org/protocol.html#xmlTagDefinitions> for the acceptable
* values
*/
export var EnumChangefreq;
(function (EnumChangefreq) {
EnumChangefreq["DAILY"] = "daily";
EnumChangefreq["MONTHLY"] = "monthly";
EnumChangefreq["ALWAYS"] = "always";
EnumChangefreq["HOURLY"] = "hourly";
EnumChangefreq["WEEKLY"] = "weekly";
EnumChangefreq["YEARLY"] = "yearly";
EnumChangefreq["NEVER"] = "never";
})(EnumChangefreq || (EnumChangefreq = {}));
export var EnumYesNo;
(function (EnumYesNo) {
EnumYesNo["YES"] = "YES";
EnumYesNo["NO"] = "NO";
EnumYesNo["Yes"] = "Yes";
EnumYesNo["No"] = "No";
EnumYesNo["yes"] = "yes";
EnumYesNo["no"] = "no";
})(EnumYesNo || (EnumYesNo = {}));
export var EnumAllowDeny;
(function (EnumAllowDeny) {
EnumAllowDeny["ALLOW"] = "allow";
EnumAllowDeny["DENY"] = "deny";
})(EnumAllowDeny || (EnumAllowDeny = {}));
/**
* How to handle errors in passed in urls
*/
export var ErrorLevel;
(function (ErrorLevel) {
/**
* Validation will be skipped and nothing logged or thrown.
*/
ErrorLevel["SILENT"] = "silent";
/**
* If an invalid value is encountered, a console.warn will be called with details
*/
ErrorLevel["WARN"] = "warn";
/**
* An Error will be thrown on encountering invalid data.
*/
ErrorLevel["THROW"] = "throw";
})(ErrorLevel || (ErrorLevel = {}));
export var TagNames;
(function (TagNames) {
TagNames["url"] = "url";
TagNames["loc"] = "loc";
TagNames["urlset"] = "urlset";
TagNames["lastmod"] = "lastmod";
TagNames["changefreq"] = "changefreq";
TagNames["priority"] = "priority";
TagNames["video:thumbnail_loc"] = "video:thumbnail_loc";
TagNames["video:video"] = "video:video";
TagNames["video:title"] = "video:title";
TagNames["video:description"] = "video:description";
TagNames["video:tag"] = "video:tag";
TagNames["video:duration"] = "video:duration";
TagNames["video:player_loc"] = "video:player_loc";
TagNames["video:content_loc"] = "video:content_loc";
TagNames["image:image"] = "image:image";
TagNames["image:loc"] = "image:loc";
TagNames["image:geo_location"] = "image:geo_location";
TagNames["image:license"] = "image:license";
TagNames["image:title"] = "image:title";
TagNames["image:caption"] = "image:caption";
TagNames["video:requires_subscription"] = "video:requires_subscription";
TagNames["video:publication_date"] = "video:publication_date";
TagNames["video:id"] = "video:id";
TagNames["video:restriction"] = "video:restriction";
TagNames["video:family_friendly"] = "video:family_friendly";
TagNames["video:view_count"] = "video:view_count";
TagNames["video:uploader"] = "video:uploader";
TagNames["video:expiration_date"] = "video:expiration_date";
TagNames["video:platform"] = "video:platform";
TagNames["video:price"] = "video:price";
TagNames["video:rating"] = "video:rating";
TagNames["video:category"] = "video:category";
TagNames["video:live"] = "video:live";
TagNames["video:gallery_loc"] = "video:gallery_loc";
TagNames["news:news"] = "news:news";
TagNames["news:publication"] = "news:publication";
TagNames["news:name"] = "news:name";
TagNames["news:access"] = "news:access";
TagNames["news:genres"] = "news:genres";
TagNames["news:publication_date"] = "news:publication_date";
TagNames["news:title"] = "news:title";
TagNames["news:keywords"] = "news:keywords";
TagNames["news:stock_tickers"] = "news:stock_tickers";
TagNames["news:language"] = "news:language";
TagNames["mobile:mobile"] = "mobile:mobile";
TagNames["xhtml:link"] = "xhtml:link";
TagNames["expires"] = "expires";
})(TagNames || (TagNames = {}));
export var IndexTagNames;
(function (IndexTagNames) {
IndexTagNames["sitemap"] = "sitemap";
IndexTagNames["sitemapindex"] = "sitemapindex";
IndexTagNames["loc"] = "loc";
IndexTagNames["lastmod"] = "lastmod";
})(IndexTagNames || (IndexTagNames = {}));

48
node_modules/sitemap/dist/esm/lib/utils.d.ts generated vendored Normal file
View File

@@ -0,0 +1,48 @@
import { Readable, ReadableOptions, TransformOptions } from 'node:stream';
import { SitemapItem, SitemapItemLoose } from './types.js';
export { validateSMIOptions } from './validation.js';
/**
* Combines multiple streams into one
* @param streams the streams to combine
*/
export declare function mergeStreams(streams: Readable[], options?: TransformOptions): Readable;
export interface ReadlineStreamOptions extends ReadableOptions {
input: Readable;
}
/**
* Wraps node's ReadLine in a stream
*/
export declare class ReadlineStream extends Readable {
private _source;
constructor(options: ReadlineStreamOptions);
_read(size: number): void;
}
/**
* Takes a stream likely from fs.createReadStream('./path') and returns a stream
* of sitemap items
* @param stream a stream of line separated urls.
* @param opts.isJSON is the stream line separated JSON. leave undefined to guess
*/
export declare function lineSeparatedURLsToSitemapOptions(stream: Readable, { isJSON }?: {
isJSON?: boolean;
}): Readable;
/**
* Based on lodash's implementation of chunk.
*
* Copyright JS Foundation and other contributors <https://js.foundation/>
*
* Based on Underscore.js, copyright Jeremy Ashkenas,
* DocumentCloud and Investigative Reporters & Editors <http://underscorejs.org/>
*
* This software consists of voluntary contributions made by many
* individuals. For exact contribution history, see the revision history
* available at https://github.com/lodash/lodash
*/
export declare function chunk(array: any[], size?: number): any[];
/**
* Converts the passed in sitemap entry into one capable of being consumed by SitemapItem
* @param {string | SitemapItemLoose} elem the string or object to be converted
* @param {string} hostname
* @returns SitemapItemOptions a strict sitemap item option
*/
export declare function normalizeURL(elem: string | SitemapItemLoose, hostname?: string, lastmodDateOnly?: boolean): SitemapItem;

221
node_modules/sitemap/dist/esm/lib/utils.js generated vendored Normal file
View File

@@ -0,0 +1,221 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
import { statSync } from 'node:fs';
import { Readable, Transform, PassThrough, } from 'node:stream';
import { createInterface } from 'node:readline';
import { URL } from 'node:url';
import { EnumYesNo, } from './types.js';
// Re-export validateSMIOptions from validation.ts for backward compatibility
export { validateSMIOptions } from './validation.js';
/**
* Combines multiple streams into one
* @param streams the streams to combine
*/
export function mergeStreams(streams, options) {
let pass = new PassThrough(options);
let waiting = streams.length;
for (const stream of streams) {
pass = stream.pipe(pass, { end: false });
stream.once('end', () => --waiting === 0 && pass.emit('end'));
}
return pass;
}
/**
* Wraps node's ReadLine in a stream
*/
export class ReadlineStream extends Readable {
_source;
constructor(options) {
if (options.autoDestroy === undefined) {
options.autoDestroy = true;
}
options.objectMode = true;
super(options);
this._source = createInterface({
input: options.input,
terminal: false,
crlfDelay: Infinity,
});
// Every time there's data, push it into the internal buffer.
this._source.on('line', (chunk) => {
// If push() returns false, then stop reading from source.
if (!this.push(chunk))
this._source.pause();
});
// When the source ends, push the EOF-signaling `null` chunk.
this._source.on('close', () => {
this.push(null);
});
}
// _read() will be called when the stream wants to pull more data in.
// The advisory size argument is ignored in this case.
_read(size) {
this._source.resume();
}
}
/**
* Takes a stream likely from fs.createReadStream('./path') and returns a stream
* of sitemap items
* @param stream a stream of line separated urls.
* @param opts.isJSON is the stream line separated JSON. leave undefined to guess
*/
export function lineSeparatedURLsToSitemapOptions(stream, { isJSON } = {}) {
return new ReadlineStream({ input: stream }).pipe(new Transform({
objectMode: true,
transform: (line, encoding, cb) => {
if (isJSON || (isJSON === undefined && line[0] === '{')) {
cb(null, JSON.parse(line));
}
else {
cb(null, line);
}
},
}));
}
/**
* Based on lodash's implementation of chunk.
*
* Copyright JS Foundation and other contributors <https://js.foundation/>
*
* Based on Underscore.js, copyright Jeremy Ashkenas,
* DocumentCloud and Investigative Reporters & Editors <http://underscorejs.org/>
*
* This software consists of voluntary contributions made by many
* individuals. For exact contribution history, see the revision history
* available at https://github.com/lodash/lodash
*/
/* eslint-disable @typescript-eslint/no-explicit-any */
export function chunk(array, size = 1) {
size = Math.max(Math.trunc(size), 0);
const length = array ? array.length : 0;
if (!length || size < 1) {
return [];
}
const result = Array(Math.ceil(length / size));
let index = 0, resIndex = 0;
while (index < length) {
result[resIndex++] = array.slice(index, (index += size));
}
return result;
}
function boolToYESNO(bool) {
if (bool === undefined) {
return undefined;
}
if (typeof bool === 'boolean') {
return bool ? EnumYesNo.yes : EnumYesNo.no;
}
return bool;
}
/**
* Converts the passed in sitemap entry into one capable of being consumed by SitemapItem
* @param {string | SitemapItemLoose} elem the string or object to be converted
* @param {string} hostname
* @returns SitemapItemOptions a strict sitemap item option
*/
export function normalizeURL(elem, hostname, lastmodDateOnly = false) {
// SitemapItem
// create object with url property
const smi = {
img: [],
video: [],
links: [],
url: '',
};
if (typeof elem === 'string') {
smi.url = new URL(elem, hostname).toString();
return smi;
}
const { url, img, links, video, lastmodfile, lastmodISO, lastmod, ...other } = elem;
Object.assign(smi, other);
smi.url = new URL(url, hostname).toString();
if (img) {
// prepend hostname to all image urls
smi.img = (Array.isArray(img) ? img : [img]).map((el) => typeof el === 'string'
? { url: new URL(el, hostname).toString() }
: { ...el, url: new URL(el.url, hostname).toString() });
}
if (links) {
smi.links = links.map((link) => ({
...link,
url: new URL(link.url, hostname).toString(),
}));
}
if (video) {
smi.video = (Array.isArray(video) ? video : [video]).map((video) => {
const nv = {
...video,
family_friendly: boolToYESNO(video.family_friendly),
live: boolToYESNO(video.live),
requires_subscription: boolToYESNO(video.requires_subscription),
tag: [],
rating: undefined,
};
if (video.tag !== undefined) {
nv.tag = !Array.isArray(video.tag) ? [video.tag] : video.tag;
}
if (video.rating !== undefined) {
if (typeof video.rating === 'string') {
const parsedRating = parseFloat(video.rating);
// Validate parsed rating is a valid number
if (Number.isNaN(parsedRating)) {
throw new Error(`Invalid video rating "${video.rating}" for URL "${elem.url}": must be a valid number`);
}
nv.rating = parsedRating;
}
else {
nv.rating = video.rating;
}
}
if (typeof video.view_count === 'string') {
const parsedViewCount = parseInt(video.view_count, 10);
// Validate parsed view count is a valid non-negative integer
if (Number.isNaN(parsedViewCount)) {
throw new Error(`Invalid video view_count "${video.view_count}" for URL "${elem.url}": must be a valid number`);
}
if (parsedViewCount < 0) {
throw new Error(`Invalid video view_count "${video.view_count}" for URL "${elem.url}": cannot be negative`);
}
nv.view_count = parsedViewCount;
}
else if (typeof video.view_count === 'number') {
nv.view_count = video.view_count;
}
return nv;
});
}
// If given a file to use for last modified date
if (lastmodfile) {
const { mtime } = statSync(lastmodfile);
const lastmodDate = new Date(mtime);
// Validate date is valid
if (Number.isNaN(lastmodDate.getTime())) {
throw new Error(`Invalid date from file stats for URL "${smi.url}": file modification time is invalid`);
}
smi.lastmod = lastmodDate.toISOString();
// The date of last modification (YYYY-MM-DD)
}
else if (lastmodISO) {
const lastmodDate = new Date(lastmodISO);
// Validate date is valid
if (Number.isNaN(lastmodDate.getTime())) {
throw new Error(`Invalid lastmodISO "${lastmodISO}" for URL "${smi.url}": must be a valid date string`);
}
smi.lastmod = lastmodDate.toISOString();
}
else if (lastmod) {
const lastmodDate = new Date(lastmod);
// Validate date is valid
if (Number.isNaN(lastmodDate.getTime())) {
throw new Error(`Invalid lastmod "${lastmod}" for URL "${smi.url}": must be a valid date string`);
}
smi.lastmod = lastmodDate.toISOString();
}
if (lastmodDateOnly && smi.lastmod) {
smi.lastmod = smi.lastmod.slice(0, 10);
}
return smi;
}

94
node_modules/sitemap/dist/esm/lib/validation.d.ts generated vendored Normal file
View File

@@ -0,0 +1,94 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
import { SitemapItem, ErrorLevel, EnumChangefreq, EnumYesNo, EnumAllowDeny, PriceType, Resolution, ErrorHandler } from './types.js';
export declare const validators: {
[index: string]: RegExp;
};
/**
* Type guard to check if a string is a valid price type
*/
export declare function isPriceType(pt: string | PriceType): pt is PriceType;
/**
* Type guard to check if a string is a valid resolution
*/
export declare function isResolution(res: string): res is Resolution;
export declare function isValidChangeFreq(freq: string): freq is EnumChangefreq;
/**
* Type guard to check if a string is a valid yes/no value
*/
export declare function isValidYesNo(yn: string): yn is EnumYesNo;
/**
* Type guard to check if a string is a valid allow/deny value
*/
export declare function isAllowDeny(ad: string): ad is EnumAllowDeny;
/**
* Validates that a URL is well-formed and meets security requirements
*
* Security: This function enforces that URLs use safe protocols (http/https),
* are within reasonable length limits (2048 chars per sitemaps.org spec),
* and can be properly parsed. This prevents protocol injection attacks and
* ensures compliance with sitemap specifications.
*
* @param url - The URL to validate
* @param paramName - The parameter name for error messages
* @throws {InvalidHostnameError} If the URL is invalid
*/
export declare function validateURL(url: string, paramName: string): void;
/**
* Validates that a path doesn't contain path traversal sequences
*
* Security: This function prevents path traversal attacks by detecting
* any occurrence of '..' in the path, whether it appears as '../', '/..',
* or standalone. This prevents attackers from accessing files outside
* the intended directory structure.
*
* @param path - The path to validate
* @param paramName - The parameter name for error messages
* @throws {InvalidPathError} If the path contains traversal sequences
*/
export declare function validatePath(path: string, paramName: string): void;
/**
* Validates that a public base path is safe for URL construction
*
* Security: This function prevents path traversal attacks and validates
* that the path is safe for use in URL construction within sitemap indexes.
* It checks for '..' sequences, null bytes, and invalid whitespace that
* could be used to manipulate URL structure or inject malicious content.
*
* @param publicBasePath - The public base path to validate
* @throws {InvalidPublicBasePathError} If the path is invalid
*/
export declare function validatePublicBasePath(publicBasePath: string): void;
/**
* Validates that a limit is within acceptable range per sitemaps.org spec
*
* Security: This function enforces sitemap size limits (1-50,000 URLs per
* sitemap) as specified by sitemaps.org. This prevents resource exhaustion
* attacks and ensures compliance with search engine requirements.
*
* @param limit - The limit to validate
* @throws {InvalidLimitError} If the limit is out of range
*/
export declare function validateLimit(limit: number): void;
/**
* Validates that an XSL URL is safe and well-formed
*
* Security: This function validates XSL stylesheet URLs to prevent
* injection attacks. It blocks dangerous protocols and content patterns
* that could be used for XSS or other attacks. The validation uses
* case-insensitive matching to catch obfuscated attacks.
*
* @param xslUrl - The XSL URL to validate
* @throws {InvalidXSLUrlError} If the URL is invalid
*/
export declare function validateXSLUrl(xslUrl: string): void;
/**
* Verifies all data passed in will comply with sitemap spec.
* @param conf Options to validate
* @param level logging level
* @param errorHandler error handling func
*/
export declare function validateSMIOptions(conf: SitemapItem, level?: ErrorLevel, errorHandler?: ErrorHandler): SitemapItem;

384
node_modules/sitemap/dist/esm/lib/validation.js generated vendored Normal file
View File

@@ -0,0 +1,384 @@
/*!
* Sitemap
* Copyright(c) 2011 Eugene Kalinin
* MIT Licensed
*/
import { InvalidPathError, InvalidHostnameError, InvalidLimitError, InvalidPublicBasePathError, InvalidXSLUrlError, ChangeFreqInvalidError, InvalidAttrValue, InvalidNewsAccessValue, InvalidNewsFormat, InvalidVideoDescription, InvalidVideoDuration, InvalidVideoFormat, InvalidVideoRating, NoURLError, NoConfigError, PriorityInvalidError, InvalidVideoTitle, InvalidVideoViewCount, InvalidVideoTagCount, InvalidVideoCategory, InvalidVideoFamilyFriendly, InvalidVideoRestriction, InvalidVideoRestrictionRelationship, InvalidVideoPriceType, InvalidVideoResolution, InvalidVideoPriceCurrency, } from './errors.js';
import { ErrorLevel, EnumChangefreq, } from './types.js';
import { LIMITS } from './constants.js';
import { isAbsolute } from 'node:path';
/**
* Validator regular expressions for various sitemap fields
*/
const allowDeny = /^(?:allow|deny)$/;
export const validators = {
'price:currency': /^[A-Z]{3}$/,
'price:type': /^(?:rent|purchase|RENT|PURCHASE)$/,
'price:resolution': /^(?:HD|hd|sd|SD)$/,
'platform:relationship': allowDeny,
'restriction:relationship': allowDeny,
restriction: /^([A-Z]{2}( +[A-Z]{2})*)?$/,
platform: /^((web|mobile|tv)( (web|mobile|tv))*)?$/,
// Language codes: zh-cn, zh-tw, or ISO 639 2-3 letter codes
language: /^(zh-cn|zh-tw|[a-z]{2,3})$/,
genres: /^(PressRelease|Satire|Blog|OpEd|Opinion|UserGenerated)(, *(PressRelease|Satire|Blog|OpEd|Opinion|UserGenerated))*$/,
stock_tickers: /^(\w+:\w+(, *\w+:\w+){0,4})?$/,
};
/**
* Type guard to check if a string is a valid price type
*/
export function isPriceType(pt) {
return validators['price:type'].test(pt);
}
/**
* Type guard to check if a string is a valid resolution
*/
export function isResolution(res) {
return validators['price:resolution'].test(res);
}
/**
* Type guard to check if a string is a valid changefreq value
*/
const CHANGEFREQ = Object.values(EnumChangefreq);
export function isValidChangeFreq(freq) {
return CHANGEFREQ.includes(freq);
}
/**
* Type guard to check if a string is a valid yes/no value
*/
export function isValidYesNo(yn) {
return /^YES|NO|[Yy]es|[Nn]o$/.test(yn);
}
/**
* Type guard to check if a string is a valid allow/deny value
*/
export function isAllowDeny(ad) {
return allowDeny.test(ad);
}
/**
* Validates that a URL is well-formed and meets security requirements
*
* Security: This function enforces that URLs use safe protocols (http/https),
* are within reasonable length limits (2048 chars per sitemaps.org spec),
* and can be properly parsed. This prevents protocol injection attacks and
* ensures compliance with sitemap specifications.
*
* @param url - The URL to validate
* @param paramName - The parameter name for error messages
* @throws {InvalidHostnameError} If the URL is invalid
*/
export function validateURL(url, paramName) {
if (!url || typeof url !== 'string') {
throw new InvalidHostnameError(url, `${paramName} must be a non-empty string`);
}
if (url.length > LIMITS.MAX_URL_LENGTH) {
throw new InvalidHostnameError(url, `${paramName} exceeds maximum length of ${LIMITS.MAX_URL_LENGTH} characters`);
}
if (!LIMITS.URL_PROTOCOL_REGEX.test(url)) {
throw new InvalidHostnameError(url, `${paramName} must use http:// or https:// protocol`);
}
// Validate URL can be parsed
try {
new URL(url);
}
catch (err) {
throw new InvalidHostnameError(url, `${paramName} is not a valid URL: ${err instanceof Error ? err.message : String(err)}`);
}
}
/**
* Validates that a path doesn't contain path traversal sequences
*
* Security: This function prevents path traversal attacks by detecting
* any occurrence of '..' in the path, whether it appears as '../', '/..',
* or standalone. This prevents attackers from accessing files outside
* the intended directory structure.
*
* @param path - The path to validate
* @param paramName - The parameter name for error messages
* @throws {InvalidPathError} If the path contains traversal sequences
*/
export function validatePath(path, paramName) {
if (!path || typeof path !== 'string') {
throw new InvalidPathError(path, `${paramName} must be a non-empty string`);
}
// Reject absolute paths to prevent arbitrary write location when caller input
// reaches destinationDir (BB-04)
if (isAbsolute(path)) {
throw new InvalidPathError(path, `${paramName} must be a relative path (absolute paths are not allowed)`);
}
// Check for path traversal sequences - must check before and after normalization
// to catch both Windows-style (\) and Unix-style (/) separators
if (path.includes('..')) {
throw new InvalidPathError(path, `${paramName} contains path traversal sequence (..)`);
}
// Additional check after normalization to catch encoded or obfuscated attempts
const normalizedPath = path.replace(/\\/g, '/');
const pathComponents = normalizedPath.split('/').filter((p) => p.length > 0);
if (pathComponents.includes('..')) {
throw new InvalidPathError(path, `${paramName} contains path traversal sequence (..)`);
}
// Check for null bytes (security issue in some contexts)
if (path.includes('\0')) {
throw new InvalidPathError(path, `${paramName} contains null byte character`);
}
}
/**
* Validates that a public base path is safe for URL construction
*
* Security: This function prevents path traversal attacks and validates
* that the path is safe for use in URL construction within sitemap indexes.
* It checks for '..' sequences, null bytes, and invalid whitespace that
* could be used to manipulate URL structure or inject malicious content.
*
* @param publicBasePath - The public base path to validate
* @throws {InvalidPublicBasePathError} If the path is invalid
*/
export function validatePublicBasePath(publicBasePath) {
if (!publicBasePath || typeof publicBasePath !== 'string') {
throw new InvalidPublicBasePathError(publicBasePath, 'must be a non-empty string');
}
// Check for path traversal - check the raw string first
if (publicBasePath.includes('..')) {
throw new InvalidPublicBasePathError(publicBasePath, 'contains path traversal sequence (..)');
}
// Additional check for path components after normalization
const normalizedPath = publicBasePath.replace(/\\/g, '/');
const pathComponents = normalizedPath.split('/').filter((p) => p.length > 0);
if (pathComponents.includes('..')) {
throw new InvalidPublicBasePathError(publicBasePath, 'contains path traversal sequence (..)');
}
// Check for null bytes
if (publicBasePath.includes('\0')) {
throw new InvalidPublicBasePathError(publicBasePath, 'contains null byte character');
}
// Check for potentially dangerous characters that could break URL construction
if (/[\r\n\t]/.test(publicBasePath)) {
throw new InvalidPublicBasePathError(publicBasePath, 'contains invalid whitespace characters');
}
}
/**
* Validates that a limit is within acceptable range per sitemaps.org spec
*
* Security: This function enforces sitemap size limits (1-50,000 URLs per
* sitemap) as specified by sitemaps.org. This prevents resource exhaustion
* attacks and ensures compliance with search engine requirements.
*
* @param limit - The limit to validate
* @throws {InvalidLimitError} If the limit is out of range
*/
export function validateLimit(limit) {
if (typeof limit !== 'number' ||
!Number.isFinite(limit) ||
Number.isNaN(limit)) {
throw new InvalidLimitError(limit);
}
if (limit < LIMITS.MIN_SITEMAP_ITEM_LIMIT ||
limit > LIMITS.MAX_SITEMAP_ITEM_LIMIT) {
throw new InvalidLimitError(limit);
}
// Ensure it's an integer
if (!Number.isInteger(limit)) {
throw new InvalidLimitError(limit);
}
}
/**
* Validates that an XSL URL is safe and well-formed
*
* Security: This function validates XSL stylesheet URLs to prevent
* injection attacks. It blocks dangerous protocols and content patterns
* that could be used for XSS or other attacks. The validation uses
* case-insensitive matching to catch obfuscated attacks.
*
* @param xslUrl - The XSL URL to validate
* @throws {InvalidXSLUrlError} If the URL is invalid
*/
export function validateXSLUrl(xslUrl) {
if (!xslUrl || typeof xslUrl !== 'string') {
throw new InvalidXSLUrlError(xslUrl, 'must be a non-empty string');
}
if (xslUrl.length > LIMITS.MAX_URL_LENGTH) {
throw new InvalidXSLUrlError(xslUrl, `exceeds maximum length of ${LIMITS.MAX_URL_LENGTH} characters`);
}
if (!LIMITS.URL_PROTOCOL_REGEX.test(xslUrl)) {
throw new InvalidXSLUrlError(xslUrl, 'must use http:// or https:// protocol');
}
// Validate URL can be parsed
try {
new URL(xslUrl);
}
catch (err) {
throw new InvalidXSLUrlError(xslUrl, `is not a valid URL: ${err instanceof Error ? err.message : String(err)}`);
}
// Check for potentially dangerous content (case-insensitive)
const lowerUrl = xslUrl.toLowerCase();
// Block dangerous HTML/script content
if (lowerUrl.includes('<script')) {
throw new InvalidXSLUrlError(xslUrl, 'contains potentially malicious content (<script tag)');
}
// Block dangerous protocols (already checked http/https above, but double-check for encoded variants)
const dangerousProtocols = [
'javascript:',
'data:',
'vbscript:',
'file:',
'about:',
];
for (const protocol of dangerousProtocols) {
if (lowerUrl.includes(protocol)) {
throw new InvalidXSLUrlError(xslUrl, `contains dangerous protocol: ${protocol}`);
}
}
// Check for URL-encoded variants of dangerous patterns
// %3C = '<', %3E = '>', %3A = ':'
const encodedPatterns = [
'%3cscript', // <script
'%3c%73%63%72%69%70%74', // <script (fully encoded)
'javascript%3a', // javascript:
'data%3a', // data:
];
for (const pattern of encodedPatterns) {
if (lowerUrl.includes(pattern)) {
throw new InvalidXSLUrlError(xslUrl, 'contains URL-encoded malicious content');
}
}
// Reject unencoded XML special characters — these must be percent-encoded in
// valid URLs and could break out of XML attribute context if left raw.
if (xslUrl.includes('"') || xslUrl.includes('<') || xslUrl.includes('>')) {
throw new InvalidXSLUrlError(xslUrl, 'contains unencoded XML special characters (" < >); percent-encode them in the URL');
}
}
/**
* Internal helper to validate fields against their validators
*/
function validate(subject, name, url, level) {
Object.keys(subject).forEach((key) => {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const val = subject[key];
if (validators[key] && !validators[key].test(val)) {
if (level === ErrorLevel.THROW) {
throw new InvalidAttrValue(key, val, validators[key]);
}
else {
console.warn(`${url}: ${name} key ${key} has invalid value: ${val}`);
}
}
});
}
/**
* Internal helper to handle errors based on error level
*/
function handleError(error, level) {
if (level === ErrorLevel.THROW) {
throw error;
}
else if (level === ErrorLevel.WARN) {
console.warn(error.name, error.message);
}
}
/**
* Verifies all data passed in will comply with sitemap spec.
* @param conf Options to validate
* @param level logging level
* @param errorHandler error handling func
*/
export function validateSMIOptions(conf, level = ErrorLevel.WARN, errorHandler = handleError) {
if (!conf) {
throw new NoConfigError();
}
if (level === ErrorLevel.SILENT) {
return conf;
}
const { url, changefreq, priority, news, video } = conf;
if (!url) {
errorHandler(new NoURLError(), level);
}
if (changefreq) {
if (!isValidChangeFreq(changefreq)) {
errorHandler(new ChangeFreqInvalidError(url, changefreq), level);
}
}
if (priority) {
if (!(priority >= 0.0 && priority <= 1.0)) {
errorHandler(new PriorityInvalidError(url, priority), level);
}
}
if (news) {
if (news.access &&
news.access !== 'Registration' &&
news.access !== 'Subscription') {
errorHandler(new InvalidNewsAccessValue(url, news.access), level);
}
if (!news.publication ||
!news.publication.name ||
!news.publication.language ||
!news.publication_date ||
!news.title) {
errorHandler(new InvalidNewsFormat(url), level);
}
validate(news, 'news', url, level);
validate(news.publication, 'publication', url, level);
}
if (video) {
video.forEach((vid) => {
if (vid.duration !== undefined) {
if (vid.duration < 0 || vid.duration > 28800) {
errorHandler(new InvalidVideoDuration(url, vid.duration), level);
}
}
if (vid.rating !== undefined && (vid.rating < 0 || vid.rating > 5)) {
errorHandler(new InvalidVideoRating(url, vid.title, vid.rating), level);
}
if (typeof vid !== 'object' ||
!vid.thumbnail_loc ||
!vid.title ||
!vid.description) {
// has to be an object and include required categories https://support.google.com/webmasters/answer/80471?hl=en&ref_topic=4581190
errorHandler(new InvalidVideoFormat(url), level);
}
if (vid.title.length > 100) {
errorHandler(new InvalidVideoTitle(url, vid.title.length), level);
}
if (vid.description.length > 2048) {
errorHandler(new InvalidVideoDescription(url, vid.description.length), level);
}
if (vid.view_count !== undefined && vid.view_count < 0) {
errorHandler(new InvalidVideoViewCount(url, vid.view_count), level);
}
if (vid.tag.length > 32) {
errorHandler(new InvalidVideoTagCount(url, vid.tag.length), level);
}
if (vid.category !== undefined && vid.category?.length > 256) {
errorHandler(new InvalidVideoCategory(url, vid.category.length), level);
}
if (vid.family_friendly !== undefined &&
!isValidYesNo(vid.family_friendly)) {
errorHandler(new InvalidVideoFamilyFriendly(url, vid.family_friendly), level);
}
if (vid.restriction) {
if (!validators.restriction.test(vid.restriction)) {
errorHandler(new InvalidVideoRestriction(url, vid.restriction), level);
}
if (!vid['restriction:relationship'] ||
!isAllowDeny(vid['restriction:relationship'])) {
errorHandler(new InvalidVideoRestrictionRelationship(url, vid['restriction:relationship']), level);
}
}
// TODO price element should be unbounded
if ((vid.price === '' && vid['price:type'] === undefined) ||
(vid['price:type'] !== undefined && !isPriceType(vid['price:type']))) {
errorHandler(new InvalidVideoPriceType(url, vid['price:type'], vid.price), level);
}
if (vid['price:resolution'] !== undefined &&
!isResolution(vid['price:resolution'])) {
errorHandler(new InvalidVideoResolution(url, vid['price:resolution']), level);
}
if (vid['price:currency'] !== undefined &&
!validators['price:currency'].test(vid['price:currency'])) {
errorHandler(new InvalidVideoPriceCurrency(url, vid['price:currency']), level);
}
validate(vid, 'video', url, level);
});
}
return conf;
}

12
node_modules/sitemap/dist/esm/lib/xmllint.d.ts generated vendored Normal file
View File

@@ -0,0 +1,12 @@
import { Readable } from 'node:stream';
/**
* Verify the passed in xml is valid. Requires xmllib be installed
*
* Security: This function always pipes XML content via stdin to prevent
* command injection vulnerabilities. Never pass user-controlled strings
* as file path arguments to xmllint.
*
* @param xml what you want validated (string or Readable stream)
* @return {Promise<void>} resolves on valid rejects [error stderr]
*/
export declare function xmlLint(xml: string | Readable): Promise<void>;

78
node_modules/sitemap/dist/esm/lib/xmllint.js generated vendored Normal file
View File

@@ -0,0 +1,78 @@
import { existsSync } from 'node:fs';
import { resolve } from 'node:path';
import { execFile } from 'node:child_process';
import { XMLLintUnavailable } from './errors.js';
/**
* Finds the `schema` directory with robust path resolution.
* Searches from the project root directory using process.cwd().
* This works correctly regardless of whether the code is running from:
* - Source: lib/xmllint.ts
* - ESM build: dist/esm/lib/xmllint.js
* - CJS build: dist/cjs/lib/xmllint.js
* - Test environment
*
* @throws {Error} if the schema directory is not found
* @returns {string} the path to the schema directory
*/
function findSchemaDir() {
// Search for schema directory from project root
// This works in test, build, and source environments
const possiblePaths = [
resolve(process.cwd(), 'schema'), // From project root
resolve(process.cwd(), '..', 'schema'), // One level up
resolve(process.cwd(), '..', '..', 'schema'), // Two levels up
];
for (const schemaPath of possiblePaths) {
if (existsSync(schemaPath)) {
return schemaPath;
}
}
throw new Error(`Schema directory not found. Searched paths: ${possiblePaths.join(', ')}`);
}
/**
* Verify the passed in xml is valid. Requires xmllib be installed
*
* Security: This function always pipes XML content via stdin to prevent
* command injection vulnerabilities. Never pass user-controlled strings
* as file path arguments to xmllint.
*
* @param xml what you want validated (string or Readable stream)
* @return {Promise<void>} resolves on valid rejects [error stderr]
*/
export function xmlLint(xml) {
const args = [
'--schema',
resolve(findSchemaDir(), 'all.xsd'),
'--noout',
'-', // Always read from stdin for security
];
return new Promise((resolve, reject) => {
execFile('which', ['xmllint'], (error, stdout, stderr) => {
if (error) {
reject([new XMLLintUnavailable()]);
return;
}
const xmllint = execFile('xmllint', args, (error, stdout, stderr) => {
if (error) {
reject([error, stderr]);
}
resolve();
});
// Always pipe XML content via stdin for security
if (xmllint.stdin) {
if (typeof xml === 'string') {
// Convert string to stream and pipe to stdin
xmllint.stdin.write(xml);
xmllint.stdin.end();
}
else if (xml) {
// Pipe readable stream to stdin
xml.pipe(xmllint.stdin);
}
}
if (xmllint.stdout) {
xmllint.stdout.unpipe();
}
});
});
}