"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.SitemapStream = exports.closetag = exports.stylesheetInclude = void 0; exports.streamToPromise = streamToPromise; const node_stream_1 = require("node:stream"); const types_js_1 = require("./types.js"); const utils_js_1 = require("./utils.js"); const validation_js_1 = require("./validation.js"); const sitemap_item_stream_js_1 = require("./sitemap-item-stream.js"); const errors_js_1 = require("./errors.js"); const constants_js_1 = require("./constants.js"); const xmlDec = ''; const stylesheetInclude = (url) => { const safe = url .replace(/&/g, '&') .replace(/"/g, '"') .replace(//g, '>'); return ``; }; exports.stylesheetInclude = stylesheetInclude; const urlsetTagStart = ' constants_js_1.LIMITS.MAX_CUSTOM_NAMESPACES) { throw new Error(`Too many custom namespaces: ${custom.length} exceeds limit of ${constants_js_1.LIMITS.MAX_CUSTOM_NAMESPACES}`); } // Basic format validation for xmlns declarations and namespace-qualified attributes // Supports both xmlns:prefix="uri" and prefix:attribute="value" (e.g., xsi:schemaLocation) const xmlAttributePattern = /^[a-zA-Z_][\w.-]*:[a-zA-Z_][\w.-]*="[^"<>]*"$/; for (const ns of custom) { if (typeof ns !== 'string' || ns.length === 0) { throw new Error('Custom namespace must be a non-empty string'); } if (ns.length > constants_js_1.LIMITS.MAX_NAMESPACE_LENGTH) { throw new Error(`Custom namespace exceeds maximum length of ${constants_js_1.LIMITS.MAX_NAMESPACE_LENGTH} characters: ${ns.substring(0, 50)}...`); } // Check for potentially malicious content BEFORE format check // (format check will reject < and > but we want specific error message) const lowerNs = ns.toLowerCase(); if (lowerNs.includes(' { let ns = xmlDec; if (xslURL) { ns += (0, exports.stylesheetInclude)(xslURL); } ns += urlsetTagStart; if (news) { ns += ' xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"'; } if (xhtml) { ns += ' xmlns:xhtml="http://www.w3.org/1999/xhtml"'; } if (image) { ns += ' xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"'; } if (video) { ns += ' xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"'; } if (custom) { validateCustomNamespaces(custom); ns += ' ' + custom.join(' '); } return ns + '>'; }; exports.closetag = ''; const defaultXMLNS = { news: true, xhtml: true, image: true, video: true, }; const defaultStreamOpts = { xmlns: defaultXMLNS, }; /** * A [Transform](https://nodejs.org/api/stream.html#stream_implementing_a_transform_stream) * for turning a * [Readable stream](https://nodejs.org/api/stream.html#stream_readable_streams) * of either [SitemapItemOptions](#sitemap-item-options) or url strings into a * Sitemap. The readable stream it transforms **must** be in object mode. * * @param {SitemapStreamOptions} opts - Configuration options * @param {string} [opts.hostname] - Base URL for relative paths. Must use http:// or https:// protocol * @param {ErrorLevel} [opts.level=ErrorLevel.WARN] - Error handling level (SILENT, WARN, or THROW) * @param {boolean} [opts.lastmodDateOnly=false] - Format lastmod as date only (YYYY-MM-DD) * @param {NSArgs} [opts.xmlns] - Control which XML namespaces to include in output * @param {string} [opts.xslUrl] - URL to XSL stylesheet for sitemap display. Must use http:// or https:// * @param {ErrorHandler} [opts.errorHandler] - Custom error handler function * * @throws {InvalidHostnameError} If hostname is provided but invalid (non-http(s), malformed, or >2048 chars) * @throws {InvalidXSLUrlError} If xslUrl is provided but invalid (non-http(s), malformed, >2048 chars, or contains malicious content) * @throws {Error} If xmlns.custom contains invalid namespace declarations * * @example * ```typescript * const stream = new SitemapStream({ * hostname: 'https://example.com', * level: ErrorLevel.THROW * }); * stream.write({ url: '/page', changefreq: 'daily' }); * stream.end(); * ``` * * @security * - Hostname and xslUrl are validated to prevent URL injection attacks * - Custom namespaces are validated to prevent XML injection * - All URLs are normalized and validated before output * - XML content is properly escaped to prevent injection */ class SitemapStream extends node_stream_1.Transform { hostname; level; hasHeadOutput; xmlNS; xslUrl; errorHandler; smiStream; lastmodDateOnly; constructor(opts = defaultStreamOpts) { opts.objectMode = true; super(opts); // Validate hostname if provided if (opts.hostname !== undefined) { (0, validation_js_1.validateURL)(opts.hostname, 'hostname'); } // Validate xslUrl if provided if (opts.xslUrl !== undefined) { (0, validation_js_1.validateXSLUrl)(opts.xslUrl); } this.hasHeadOutput = false; this.hostname = opts.hostname; this.level = opts.level || types_js_1.ErrorLevel.WARN; this.errorHandler = opts.errorHandler; this.smiStream = new sitemap_item_stream_js_1.SitemapItemStream({ level: opts.level }); this.smiStream.on('data', (data) => this.push(data)); this.lastmodDateOnly = opts.lastmodDateOnly || false; this.xmlNS = opts.xmlns || defaultXMLNS; this.xslUrl = opts.xslUrl; } _transform(item, encoding, callback) { if (!this.hasHeadOutput) { this.hasHeadOutput = true; this.push(getURLSetNs(this.xmlNS, this.xslUrl)); } if (!this.smiStream.write((0, validation_js_1.validateSMIOptions)((0, utils_js_1.normalizeURL)(item, this.hostname, this.lastmodDateOnly), this.level, this.errorHandler))) { this.smiStream.once('drain', callback); } else { process.nextTick(callback); } } _flush(cb) { if (!this.hasHeadOutput) { cb(new errors_js_1.EmptySitemap()); } else { this.push(exports.closetag); cb(); } } } exports.SitemapStream = SitemapStream; /** * Converts a readable stream into a promise that resolves with the concatenated data from the stream. * * The function listens for 'data' events from the stream, and when the stream ends, it resolves the promise with the concatenated data. If an error occurs while reading from the stream, the promise is rejected with the error. * * ⚠️ CAUTION: This function should not generally be used in production / when writing to files as it holds a copy of the entire file contents in memory until finished. * * @param {Readable} stream - The readable stream to convert to a promise. * @returns {Promise} A promise that resolves with the concatenated data from the stream as a Buffer, or rejects with an error if one occurred while reading from the stream. If the stream is empty, the promise is rejected with an EmptyStream error. * @throws {EmptyStream} If the stream is empty. */ function streamToPromise(stream) { return new Promise((resolve, reject) => { const drain = []; stream // Error propagation is not automatic // Bubble up errors on the read stream .on('error', reject) .pipe(new node_stream_1.Writable({ write(chunk, enc, next) { drain.push(chunk); next(); }, })) // This bubbles up errors when writing to the internal buffer // This is unlikely to happen, but we have this for completeness .on('error', reject) .on('finish', () => { if (!drain.length) { reject(new errors_js_1.EmptyStream()); } else { resolve(Buffer.concat(drain)); } }); }); }