diff --git a/src/velum.ts b/src/velum.ts index d8b8681..a2cd096 100644 --- a/src/velum.ts +++ b/src/velum.ts @@ -11,6 +11,66 @@ function escapeRegExp(value: string): string { return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } +const BRACKET_PAIRS: Array<{ open: string; close: string }> = [ + { open: "(", close: ")" }, + { open: "[", close: "]" }, + { open: "{", close: "}" }, +]; + +function countChar(value: string, char: string): number { + let count = 0; + for (const c of value) if (c === char) count += 1; + return count; +} + +function rebalanceBrackets( + text: string, + mapping: PlaceholderMapping, +): { text: string; mapping: PlaceholderMapping } { + let resultText = text; + const resultMapping: PlaceholderMapping = {}; + + for (const [placeholder, entry] of Object.entries(mapping)) { + let original = entry.original; + let lead = ""; + let trail = ""; + + let trimmed = true; + while (trimmed && original.length > 0) { + trimmed = false; + const pair = BRACKET_PAIRS.find((p) => original.startsWith(p.open)); + if (pair && countChar(original, pair.open) > countChar(original, pair.close)) { + lead += original[0]; + original = original.slice(1); + trimmed = true; + } + } + + trimmed = true; + while (trimmed && original.length > 0) { + trimmed = false; + const pair = BRACKET_PAIRS.find((p) => original.endsWith(p.close)); + if (pair && countChar(original, pair.close) > countChar(original, pair.open)) { + trail = original[original.length - 1] + trail; + original = original.slice(0, -1); + trimmed = true; + } + } + + if (lead || trail) { + const escaped = escapeRegExp(placeholder); + resultText = resultText.replace( + new RegExp(escaped, "g"), + `${lead}${placeholder}${trail}`, + ); + } + + resultMapping[placeholder] = { ...entry, original }; + } + + return { text: resultText, mapping: resultMapping }; +} + export function normalizePseudonymizeResponse( response: PseudonymizeResponse, ): PseudonymizeResponse { @@ -34,10 +94,12 @@ export function normalizePseudonymizeResponse( text = text.replace(bareRegex, bracketed); } + const rebalanced = rebalanceBrackets(text, normalizedMapping); + return { ...response, - pseudonymized_text: text, - mapping: normalizedMapping, + pseudonymized_text: rebalanced.text, + mapping: rebalanced.mapping, }; }