From 33d492dd84f4412db53f120cdce4a66dd1ac57f2 Mon Sep 17 00:00:00 2001 From: Stef Busking Date: Tue, 19 Dec 2023 15:20:43 +0100 Subject: [PATCH] Also guard against entity expansion attacks in attribute values --- src/dom-parsing/EntityExpansionGuard.ts | 55 +++++++++++++++ src/dom-parsing/parsingAlgorithms.ts | 57 +++++++--------- test/dom-parsing/parseXmlDocument.tests.ts | 78 ++++++++++++++++++++++ 3 files changed, 158 insertions(+), 32 deletions(-) create mode 100644 src/dom-parsing/EntityExpansionGuard.ts diff --git a/src/dom-parsing/EntityExpansionGuard.ts b/src/dom-parsing/EntityExpansionGuard.ts new file mode 100644 index 0000000..1b93450 --- /dev/null +++ b/src/dom-parsing/EntityExpansionGuard.ts @@ -0,0 +1,55 @@ +import { EntityRefEvent } from './parserEvents'; +import { throwErrorWithContext } from './parsingAlgorithms'; + +/* + * Guard against entity expansion attacks by keeping track of the initial input + * length vs. the expanded input length. The latter includes the length of the + * replacement text for each processed entity reference. An attack is likely if + * the ratio between the two exceeds the maximum amplification factor AND the + * expanded input length exceeds a threshold. This approach and defaults are + * taken from libexpat's billion laughs attack protection. + */ +export default class EntityExpansionGuard { + private readonly _initialInputLength: number; + + private _expandedInputLength: number; + + private readonly _entityExpansionThreshold: number; + + private readonly _entityExpansionMaxAmplification: number; + + private _topLevelEntityRef: EntityRefEvent | null = null; + + private _depth = 0; + + public constructor( + initialInputLength: number, + entityExpansionThreshold: number, + entityExpansionMaxAmplification: number + ) { + this._initialInputLength = initialInputLength; + this._expandedInputLength = initialInputLength; + this._entityExpansionThreshold = entityExpansionThreshold; + this._entityExpansionMaxAmplification = entityExpansionMaxAmplification; + } + + public enter(event: EntityRefEvent, replacementTextLength: number): void { + const topLevelEntityRef = this._topLevelEntityRef ?? event; + this._expandedInputLength += replacementTextLength; + if (this._expandedInputLength > this._entityExpansionThreshold) { + const amplification = this._expandedInputLength / this._initialInputLength; + if (amplification > this._entityExpansionMaxAmplification) { + throwErrorWithContext('too much entity expansion', topLevelEntityRef); + } + } + this._topLevelEntityRef = topLevelEntityRef; + this._depth += 1; + } + + public exit(): void { + this._depth -= 1; + if (this._depth === 0) { + this._topLevelEntityRef = null; + } + } +} diff --git a/src/dom-parsing/parsingAlgorithms.ts b/src/dom-parsing/parsingAlgorithms.ts index 21f790a..d1c0d40 100644 --- a/src/dom-parsing/parsingAlgorithms.ts +++ b/src/dom-parsing/parsingAlgorithms.ts @@ -7,6 +7,7 @@ import { insertNode } from '../util/mutationAlgorithms'; import { XMLNS_NAMESPACE, XML_NAMESPACE } from '../util/namespaceHelpers'; import { isElement } from '../util/NodeType'; import { getNodeDocument } from '../util/treeHelpers'; +import EntityExpansionGuard from './EntityExpansionGuard'; import { CompleteChars, CompleteName, @@ -343,7 +344,8 @@ function normalizeAndIncludeEntities( normalized: string[], value: AttValueEvent[], dtd: Dtd | null, - ancestorEntities: string[] | null + ancestorEntities: string[] | null, + expansionGuard: EntityExpansionGuard ) { for (const event of value) { if (typeof event === 'string') { @@ -372,6 +374,7 @@ function normalizeAndIncludeEntities( event ); } + expansionGuard.enter(event, replacementText.length); const result = EntityReplacementTextInLiteral(replacementText, 0); if (!result.success) { throwParseError( @@ -386,18 +389,21 @@ function normalizeAndIncludeEntities( normalized, result.value, dtd, - ancestorEntities ? [event.name, ...ancestorEntities] : [event.name] + ancestorEntities ? [event.name, ...ancestorEntities] : [event.name], + expansionGuard ); + expansionGuard.exit(); } } function normalizeAttributeValue( value: AttValueEvent[], attDef: AttDefEvent | undefined, - dtd: Dtd | null + dtd: Dtd | null, + expansionGuard: EntityExpansionGuard ): string { const normalized: string[] = []; - normalizeAndIncludeEntities(normalized, value, dtd, null); + normalizeAndIncludeEntities(normalized, value, dtd, null, expansionGuard); if (attDef && !attDef.isCData) { return normalized .join('') @@ -506,7 +512,8 @@ class Namespaces { event: STagEvent | EmptyElemTagEvent, attlist: Map | undefined, dtd: Dtd | null, - qualifiedNameCache: QualifiedNameCache + qualifiedNameCache: QualifiedNameCache, + expansionGuard: EntityExpansionGuard ): Namespaces { let ns = parent; let hasDeclarations = false; @@ -558,7 +565,7 @@ class Namespaces { localName === 'xmlns' && (!hasDeclarations || !ns._byPrefix.has(null)) ) { - const namespace = normalizeAttributeValue(value, def, dtd) || null; + const namespace = normalizeAttributeValue(value, def, dtd, expansionGuard) || null; add(null, namespace, nameEvent); } else if (prefix === 'xmlns' && (!hasDeclarations || !ns._byPrefix.has(localName))) { if (localName === 'xmlns') { @@ -567,7 +574,7 @@ class Namespaces { nameEvent ); } - const namespace = normalizeAttributeValue(value, def, dtd) || null; + const namespace = normalizeAttributeValue(value, def, dtd, expansionGuard) || null; add(localName, namespace, nameEvent); } }; @@ -719,15 +726,11 @@ export function parseXml( input = input.replace(/^\ufeff/, ''); input = normalizeLineEndings(input); - // Guard against entity expansion attacks by keeping track of the initial input length vs. the - // expanded input length. The latter includes the length of the replacement text for each - // processed entity reference. An attack is likely if the ratio between the two exceeds the - // maximum amplification factor AND the expanded input length exceeds a threshold. This approach - // and defaults are taken from libexpat's billion laughs attack protection. - const initialInputLength = input.length; - let expandedInputLength = initialInputLength; - let topLevelEntityRef: EntityRefEvent | null = null; - + const expansionGuard = new EntityExpansionGuard( + input.length, + entityExpansionThreshold, + entityExpansionMaxAmplification + ); let entityContext: EntityContext | null = { parent: null, entity: null, @@ -778,16 +781,7 @@ export function parseXml( event ); } - if (topLevelEntityRef === null) { - topLevelEntityRef = event; - } - expandedInputLength += replacementText.length; - if (expandedInputLength > entityExpansionThreshold) { - const amplification = expandedInputLength / initialInputLength; - if (amplification > entityExpansionMaxAmplification) { - throwErrorWithContext('too much entity expansion', topLevelEntityRef); - } - } + expansionGuard.enter(event, replacementText.length); domContext = { parent: domContext, root: domContext.root, @@ -861,7 +855,8 @@ export function parseXml( event, attlist, dtd, - qualifiedNameCache + qualifiedNameCache, + expansionGuard ); const { prefix, localName } = splitQualifiedName( event.name, @@ -888,7 +883,7 @@ export function parseXml( namespace, prefix, localName, - normalizeAttributeValue(attr.value, def, dtd), + normalizeAttributeValue(attr.value, def, dtd, expansionGuard), element ); appendAttribute(attrNode, element, true); @@ -917,7 +912,7 @@ export function parseXml( namespace, prefix, localName, - normalizeAttributeValue(def.value, attr, dtd), + normalizeAttributeValue(def.value, attr, dtd, expansionGuard), element ); appendAttribute(attrNode, element, true); @@ -979,12 +974,10 @@ export function parseXml( ); } + expansionGuard.exit(); entityContext = entityContext.parent; if (entityContext) { domContext = domContext.parent!; - if (entityContext.entity === null) { - topLevelEntityRef = null; - } } } diff --git a/test/dom-parsing/parseXmlDocument.tests.ts b/test/dom-parsing/parseXmlDocument.tests.ts index 937d27f..8e0734b 100644 --- a/test/dom-parsing/parseXmlDocument.tests.ts +++ b/test/dom-parsing/parseXmlDocument.tests.ts @@ -831,6 +831,84 @@ describe('parseXmlDocument', () => { `); }); + it('protects against entity expansion attacks in attribute values', () => { + // Each level expands to 10 times the next, leading to 10^10 expansions + const xml = ` + + + + + + + + + + ]>`; + expect(() => { + console.log(slimdom.parseXmlDocument(xml)); + }).toThrowErrorMatchingInlineSnapshot(` + "too much entity expansion + At line 12, character 17: + + ]> + ^^^^^" + `); + }); + + it('protects against entity expansion attacks in default attribute values', () => { + // Each level expands to 10 times the next, leading to 10^10 expansions + const xml = ` + + + + + + + + + + + ]>`; + expect(() => { + console.log(slimdom.parseXmlDocument(xml)); + }).toThrowErrorMatchingInlineSnapshot(` + "too much entity expansion + At line 12, character 31: + + + ^^^^^" + `); + }); + + it('protects against entity expansion attacks in default attributes on elements generated from an entity', () => { + // Each level expands to 10 times the next, leading to 10^10 expansions + const xml = ` + + + + + + + + + + "> + + ]>&ele;`; + expect(() => { + console.log(slimdom.parseXmlDocument(xml)); + }).toThrowErrorMatchingInlineSnapshot(` + "too much entity expansion + At line 14, character 11: + + ]>&ele; + ^^^^^" + `); + }); + it('can optionally treat CDATA sections as text', () => { const xml = 'beforeafter'; const doc1 = slimdom.parseXmlDocument(xml);