364 lines
16 KiB
JavaScript
364 lines
16 KiB
JavaScript
"use strict";
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.SitemapAndIndexStream = exports.SitemapIndexStream = exports.IndexTagNames = void 0;
|
|
const node_stream_1 = require("node:stream");
|
|
const types_js_1 = require("./types.js");
|
|
Object.defineProperty(exports, "IndexTagNames", { enumerable: true, get: function () { return types_js_1.IndexTagNames; } });
|
|
const sitemap_stream_js_1 = require("./sitemap-stream.js");
|
|
const sitemap_xml_js_1 = require("./sitemap-xml.js");
|
|
const constants_js_1 = require("./constants.js");
|
|
const validation_js_1 = require("./validation.js");
|
|
const xmlDec = '<?xml version="1.0" encoding="UTF-8"?>';
|
|
const sitemapIndexTagStart = '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">';
|
|
const closetag = '</sitemapindex>';
|
|
const defaultStreamOpts = {};
|
|
/**
|
|
* `SitemapIndexStream` is a Transform stream that takes `IndexItem`s or sitemap URL strings and outputs a stream of sitemap index XML.
|
|
*
|
|
* It automatically handles the XML declaration and the opening and closing tags for the sitemap index.
|
|
*
|
|
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
|
|
* before `finish` will be emitted. Failure to read the stream will result in hangs.
|
|
*
|
|
* @extends {Transform}
|
|
*/
|
|
class SitemapIndexStream extends node_stream_1.Transform {
|
|
lastmodDateOnly;
|
|
level;
|
|
xslUrl;
|
|
hasHeadOutput;
|
|
/**
|
|
* `SitemapIndexStream` is a Transform stream that takes `IndexItem`s or sitemap URL strings and outputs a stream of sitemap index XML.
|
|
*
|
|
* It automatically handles the XML declaration and the opening and closing tags for the sitemap index.
|
|
*
|
|
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
|
|
* before `finish` will be emitted. Failure to read the stream will result in hangs.
|
|
*
|
|
* @param {SitemapIndexStreamOptions} [opts=defaultStreamOpts] - Stream options.
|
|
*/
|
|
constructor(opts = defaultStreamOpts) {
|
|
opts.objectMode = true;
|
|
super(opts);
|
|
this.hasHeadOutput = false;
|
|
this.lastmodDateOnly = opts.lastmodDateOnly || false;
|
|
this.level = opts.level ?? types_js_1.ErrorLevel.WARN;
|
|
if (opts.xslUrl !== undefined) {
|
|
(0, validation_js_1.validateXSLUrl)(opts.xslUrl);
|
|
}
|
|
this.xslUrl = opts.xslUrl;
|
|
}
|
|
writeHeadOutput() {
|
|
this.hasHeadOutput = true;
|
|
let stylesheet = '';
|
|
if (this.xslUrl) {
|
|
stylesheet = (0, sitemap_stream_js_1.stylesheetInclude)(this.xslUrl);
|
|
}
|
|
this.push(xmlDec + stylesheet + sitemapIndexTagStart);
|
|
}
|
|
_transform(item, encoding, callback) {
|
|
if (!this.hasHeadOutput) {
|
|
this.writeHeadOutput();
|
|
}
|
|
try {
|
|
// Validate URL using centralized validation (checks protocol, length, format)
|
|
const url = typeof item === 'string' ? item : item.url;
|
|
if (!url || typeof url !== 'string') {
|
|
const error = new Error('Invalid sitemap index item: URL must be a non-empty string');
|
|
if (this.level === types_js_1.ErrorLevel.THROW) {
|
|
callback(error);
|
|
return;
|
|
}
|
|
else if (this.level === types_js_1.ErrorLevel.WARN) {
|
|
console.warn(error.message, item);
|
|
}
|
|
// For SILENT or after WARN, skip this item
|
|
callback();
|
|
return;
|
|
}
|
|
// Security: Use centralized validation to enforce protocol restrictions,
|
|
// length limits, and prevent injection attacks
|
|
try {
|
|
(0, validation_js_1.validateURL)(url, 'Sitemap index URL');
|
|
}
|
|
catch (error) {
|
|
// Wrap the validation error with consistent message format
|
|
const validationMsg = error instanceof Error ? error.message : String(error);
|
|
const err = new Error(`Invalid URL in sitemap index: ${validationMsg}`);
|
|
if (this.level === types_js_1.ErrorLevel.THROW) {
|
|
callback(err);
|
|
return;
|
|
}
|
|
else if (this.level === types_js_1.ErrorLevel.WARN) {
|
|
console.warn(err.message);
|
|
}
|
|
// For SILENT or after WARN, skip this item
|
|
callback();
|
|
return;
|
|
}
|
|
this.push((0, sitemap_xml_js_1.otag)(types_js_1.IndexTagNames.sitemap));
|
|
if (typeof item === 'string') {
|
|
this.push((0, sitemap_xml_js_1.element)(types_js_1.IndexTagNames.loc, item));
|
|
}
|
|
else {
|
|
this.push((0, sitemap_xml_js_1.element)(types_js_1.IndexTagNames.loc, item.url));
|
|
if (item.lastmod) {
|
|
try {
|
|
const lastmod = new Date(item.lastmod).toISOString();
|
|
this.push((0, sitemap_xml_js_1.element)(types_js_1.IndexTagNames.lastmod, this.lastmodDateOnly ? lastmod.slice(0, 10) : lastmod));
|
|
}
|
|
catch {
|
|
const error = new Error(`Invalid lastmod date in sitemap index: ${item.lastmod}`);
|
|
if (this.level === types_js_1.ErrorLevel.THROW) {
|
|
callback(error);
|
|
return;
|
|
}
|
|
else if (this.level === types_js_1.ErrorLevel.WARN) {
|
|
console.warn(error.message);
|
|
}
|
|
// Continue without lastmod for SILENT or after WARN
|
|
}
|
|
}
|
|
}
|
|
this.push((0, sitemap_xml_js_1.ctag)(types_js_1.IndexTagNames.sitemap));
|
|
callback();
|
|
}
|
|
catch (error) {
|
|
callback(error instanceof Error ? error : new Error(String(error)));
|
|
}
|
|
}
|
|
_flush(cb) {
|
|
if (!this.hasHeadOutput) {
|
|
this.writeHeadOutput();
|
|
}
|
|
this.push(closetag);
|
|
cb();
|
|
}
|
|
}
|
|
exports.SitemapIndexStream = SitemapIndexStream;
|
|
/**
|
|
* `SitemapAndIndexStream` is a Transform stream that takes in sitemap items,
|
|
* writes them to sitemap files, adds the sitemap files to a sitemap index,
|
|
* and creates new sitemap files when the count limit is reached.
|
|
*
|
|
* It waits for the target stream of the current sitemap file to finish before
|
|
* moving on to the next if the target stream is returned by the `getSitemapStream`
|
|
* callback in the 3rd position of the tuple.
|
|
*
|
|
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
|
|
* before `finish` will be emitted. Failure to read the stream will result in hangs.
|
|
*
|
|
* @extends {SitemapIndexStream}
|
|
*/
|
|
class SitemapAndIndexStream extends SitemapIndexStream {
|
|
itemsWritten;
|
|
getSitemapStream;
|
|
currentSitemap;
|
|
limit;
|
|
currentSitemapPipeline;
|
|
/**
|
|
* Flag to prevent race conditions when creating new sitemap files.
|
|
* Set to true while waiting for the current sitemap to finish and
|
|
* a new one to be created.
|
|
*/
|
|
isCreatingSitemap;
|
|
/**
|
|
* `SitemapAndIndexStream` is a Transform stream that takes in sitemap items,
|
|
* writes them to sitemap files, adds the sitemap files to a sitemap index,
|
|
* and creates new sitemap files when the count limit is reached.
|
|
*
|
|
* It waits for the target stream of the current sitemap file to finish before
|
|
* moving on to the next if the target stream is returned by the `getSitemapStream`
|
|
* callback in the 3rd position of the tuple.
|
|
*
|
|
* ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null)
|
|
* before `finish` will be emitted. Failure to read the stream will result in hangs.
|
|
*
|
|
* @param {SitemapAndIndexStreamOptions} opts - Stream options.
|
|
*/
|
|
constructor(opts) {
|
|
opts.objectMode = true;
|
|
super(opts);
|
|
this.itemsWritten = 0;
|
|
this.getSitemapStream = opts.getSitemapStream;
|
|
this.limit = opts.limit ?? constants_js_1.DEFAULT_SITEMAP_ITEM_LIMIT;
|
|
this.isCreatingSitemap = false;
|
|
// Validate limit is within acceptable range per sitemaps.org spec
|
|
// See: https://www.sitemaps.org/protocol.html#index
|
|
if (this.limit < constants_js_1.LIMITS.MIN_SITEMAP_ITEM_LIMIT ||
|
|
this.limit > constants_js_1.LIMITS.MAX_SITEMAP_ITEM_LIMIT) {
|
|
throw new Error(`limit must be between ${constants_js_1.LIMITS.MIN_SITEMAP_ITEM_LIMIT} and ${constants_js_1.LIMITS.MAX_SITEMAP_ITEM_LIMIT} per sitemaps.org spec, got ${this.limit}`);
|
|
}
|
|
}
|
|
_transform(item, encoding, callback) {
|
|
if (this.itemsWritten % this.limit === 0) {
|
|
// Prevent race condition if multiple items arrive during sitemap creation
|
|
if (this.isCreatingSitemap) {
|
|
// Wait and retry on next tick
|
|
process.nextTick(() => this._transform(item, encoding, callback));
|
|
return;
|
|
}
|
|
if (this.currentSitemap) {
|
|
this.isCreatingSitemap = true;
|
|
const currentSitemap = this.currentSitemap;
|
|
const currentPipeline = this.currentSitemapPipeline;
|
|
// Set up promises with proper cleanup to prevent memory leaks
|
|
const onFinish = new Promise((resolve, reject) => {
|
|
const finishHandler = () => {
|
|
currentSitemap.off('error', errorHandler);
|
|
resolve();
|
|
};
|
|
const errorHandler = (err) => {
|
|
currentSitemap.off('finish', finishHandler);
|
|
reject(err);
|
|
};
|
|
currentSitemap.on('finish', finishHandler);
|
|
currentSitemap.on('error', errorHandler);
|
|
currentSitemap.end();
|
|
});
|
|
const onPipelineFinish = currentPipeline
|
|
? new Promise((resolve, reject) => {
|
|
const finishHandler = () => {
|
|
currentPipeline.off('error', errorHandler);
|
|
resolve();
|
|
};
|
|
const errorHandler = (err) => {
|
|
currentPipeline.off('finish', finishHandler);
|
|
reject(err);
|
|
};
|
|
currentPipeline.on('finish', finishHandler);
|
|
currentPipeline.on('error', errorHandler);
|
|
})
|
|
: Promise.resolve();
|
|
Promise.all([onFinish, onPipelineFinish])
|
|
.then(() => {
|
|
this.isCreatingSitemap = false;
|
|
this.createSitemap(encoding);
|
|
this.writeItem(item, callback);
|
|
})
|
|
.catch((err) => {
|
|
this.isCreatingSitemap = false;
|
|
callback(err);
|
|
});
|
|
return;
|
|
}
|
|
else {
|
|
this.createSitemap(encoding);
|
|
}
|
|
}
|
|
this.writeItem(item, callback);
|
|
}
|
|
writeItem(item, callback) {
|
|
if (!this.currentSitemap) {
|
|
callback(new Error('No sitemap stream available'));
|
|
return;
|
|
}
|
|
if (!this.currentSitemap.write(item)) {
|
|
this.currentSitemap.once('drain', callback);
|
|
}
|
|
else {
|
|
process.nextTick(callback);
|
|
}
|
|
// Increment the count of items written
|
|
this.itemsWritten++;
|
|
}
|
|
/**
|
|
* Called when the stream is finished.
|
|
* If there is a current sitemap, we wait for it to finish before calling the callback.
|
|
* Includes proper event listener cleanup to prevent memory leaks.
|
|
*
|
|
* @param cb - The callback to invoke when flushing is complete
|
|
*/
|
|
_flush(cb) {
|
|
const currentSitemap = this.currentSitemap;
|
|
const currentPipeline = this.currentSitemapPipeline;
|
|
const onFinish = new Promise((resolve, reject) => {
|
|
if (currentSitemap) {
|
|
const finishHandler = () => {
|
|
currentSitemap.off('error', errorHandler);
|
|
resolve();
|
|
};
|
|
const errorHandler = (err) => {
|
|
currentSitemap.off('finish', finishHandler);
|
|
reject(err);
|
|
};
|
|
currentSitemap.on('finish', finishHandler);
|
|
currentSitemap.on('error', errorHandler);
|
|
currentSitemap.end();
|
|
}
|
|
else {
|
|
resolve();
|
|
}
|
|
});
|
|
const onPipelineFinish = new Promise((resolve, reject) => {
|
|
if (currentPipeline) {
|
|
const finishHandler = () => {
|
|
currentPipeline.off('error', errorHandler);
|
|
resolve();
|
|
};
|
|
const errorHandler = (err) => {
|
|
currentPipeline.off('finish', finishHandler);
|
|
reject(err);
|
|
};
|
|
currentPipeline.on('finish', finishHandler);
|
|
currentPipeline.on('error', errorHandler);
|
|
// The pipeline (pipe target) will get its end() call
|
|
// from the sitemap stream ending.
|
|
}
|
|
else {
|
|
resolve();
|
|
}
|
|
});
|
|
Promise.all([onFinish, onPipelineFinish])
|
|
.then(() => {
|
|
super._flush(cb);
|
|
})
|
|
.catch((err) => {
|
|
cb(err);
|
|
});
|
|
}
|
|
createSitemap(encoding) {
|
|
const sitemapIndex = this.itemsWritten / this.limit;
|
|
let result;
|
|
try {
|
|
result = this.getSitemapStream(sitemapIndex);
|
|
}
|
|
catch (err) {
|
|
this.emit('error', new Error(`getSitemapStream callback threw an error for index ${sitemapIndex}: ${err instanceof Error ? err.message : String(err)}`));
|
|
return;
|
|
}
|
|
// Validate the return value
|
|
if (!Array.isArray(result) || result.length !== 3) {
|
|
this.emit('error', new Error(`getSitemapStream must return a 3-element array [IndexItem | string, SitemapStream, WriteStream], got: ${typeof result}`));
|
|
return;
|
|
}
|
|
const [idxItem, currentSitemap, currentSitemapPipeline] = result;
|
|
// Validate each element
|
|
if (!idxItem ||
|
|
(typeof idxItem !== 'string' && typeof idxItem !== 'object')) {
|
|
this.emit('error', new Error('getSitemapStream must return an IndexItem or string as the first element'));
|
|
return;
|
|
}
|
|
if (!currentSitemap || typeof currentSitemap.write !== 'function') {
|
|
this.emit('error', new Error('getSitemapStream must return a SitemapStream as the second element'));
|
|
return;
|
|
}
|
|
if (currentSitemapPipeline &&
|
|
typeof currentSitemapPipeline.write !== 'function') {
|
|
this.emit('error', new Error('getSitemapStream must return a WriteStream or undefined as the third element'));
|
|
return;
|
|
}
|
|
// Propagate errors from the sitemap stream
|
|
currentSitemap.on('error', (err) => this.emit('error', err));
|
|
this.currentSitemap = currentSitemap;
|
|
this.currentSitemapPipeline = currentSitemapPipeline;
|
|
super._transform(idxItem, encoding, () => {
|
|
// We are not too concerned about waiting for the index item to be written
|
|
// as we'll wait for the file to finish at the end, and index file write
|
|
// volume tends to be small in comparison to sitemap writes.
|
|
// noop
|
|
});
|
|
}
|
|
}
|
|
exports.SitemapAndIndexStream = SitemapAndIndexStream;
|