Files
dealplustech/node_modules/parse-latin/lib/plugin/merge-inner-word-symbol.js
Kunthawat 77ac4d2d05 feat: Upgrade to Astro with full PDPA compliance
PDPA Features:
 Cookie consent banner
 Consent logging API
 Admin dashboard
 Privacy Policy
 Terms & Conditions

Technical:
 Astro 5.x + Tailwind v4
 Docker on port 80
 SQLite database
 15 pages built

Ready for Easypanel deployment.
2026-03-12 10:01:04 +07:00

80 lines
2.4 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* @typedef {import('nlcst').Sentence} Sentence
* @typedef {import('nlcst').WordContent} WordContent
*/
import {toString} from 'nlcst-to-string'
import {modifyChildren} from 'unist-util-modify-children'
// Symbols part of surrounding words.
import {wordSymbolInner} from '../expressions.js'
// Merge words joined by certain punctuation marks.
export const mergeInnerWordSymbol = modifyChildren(
/**
* @type {import('unist-util-modify-children').Modifier<Sentence>}
*/
function (child, index, parent) {
if (
index > 0 &&
(child.type === 'SymbolNode' || child.type === 'PunctuationNode')
) {
const siblings = parent.children
const previous = siblings[index - 1]
if (previous && previous.type === 'WordNode') {
let position = index - 1
/** @type {Array<WordContent>} */
const tokens = []
/** @type {Array<WordContent>} */
let queue = []
// - If a token which is neither word nor inner word symbol is found,
// the loop is broken
// - If an inner word symbol is found, its queued
// - If a word is found, its queued (and the queue stored and emptied)
while (siblings[++position]) {
const sibling = siblings[position]
if (sibling.type === 'WordNode') {
tokens.push(...queue, ...sibling.children)
queue = []
} else if (
(sibling.type === 'SymbolNode' ||
sibling.type === 'PunctuationNode') &&
wordSymbolInner.test(toString(sibling))
) {
queue.push(sibling)
} else {
break
}
}
if (tokens.length > 0) {
// If there is a queue, remove its length from `position`.
if (queue.length > 0) {
position -= queue.length
}
// Remove every (one or more) inner-word punctuation marks and children
// of words.
siblings.splice(index, position - index)
// Add all found tokens to `prev`s children.
previous.children.push(...tokens)
const last = tokens[tokens.length - 1]
// Update position.
if (previous.position && last.position) {
previous.position.end = last.position.end
}
// Next, iterate over the node *now* at the current position.
return index
}
}
}
}
)