From 5320f09cb252f9da05695e322296816524fac640 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Barto=C5=A1?= Date: Wed, 15 Apr 2020 20:01:22 +0200 Subject: [PATCH] Improve text node replacement --- README.md | 4 ++++ src/rules.js | 5 +++++ src/turndown.js | 16 +++++++++++----- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9f98c22e..38d8a03c 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,8 @@ var turndownService = new TurndownService({ option: 'value' }) | `blankReplacement` | rule replacement function | See **Special Rules** below | | `keepReplacement` | rule replacement function | See **Special Rules** below | | `defaultReplacement` | rule replacement function | See **Special Rules** below | +| `textReplacement` | rule replacement function | See **Special Rules** below | +| `escapes` | array of replacement pairs | See [source code](https://github.com/domchristie/turndown/blob/master/src/turndown.js#L9) | ## Methods @@ -197,6 +199,8 @@ rules.emphasis = { **Default rule** handles nodes which are not recognised by any other rule. By default, it outputs the node's text content (separated by blank lines if it is a block-level element). Its behaviour can be customised with the `defaultReplacement` option. +**Text rule** handles text nodes. By default it preserves text under `` elements and escapes all other text. + ### Rule Precedence Turndown iterates over the set of rules, and picks the first one that matches the `filter`. The following list describes the order of precedence: diff --git a/src/rules.js b/src/rules.js index 5414fcd6..47785663 100644 --- a/src/rules.js +++ b/src/rules.js @@ -7,6 +7,10 @@ export default function Rules (options) { this._keep = [] this._remove = [] + this.textRule = { + replacement: options.textReplacement + } + this.blankRule = { replacement: options.blankReplacement } @@ -43,6 +47,7 @@ Rules.prototype = { }, forNode: function (node) { + if (node.nodeType === 3) return this.textRule if (node.isBlank) return this.blankRule var rule diff --git a/src/turndown.js b/src/turndown.js index bcd4d60b..f94a96d0 100644 --- a/src/turndown.js +++ b/src/turndown.js @@ -6,7 +6,7 @@ import Node from './node' var reduce = Array.prototype.reduce var leadingNewLinesRegExp = /^\n*/ var trailingNewLinesRegExp = /\n*$/ -var escapes = [ +var ESCAPES = [ [/\\/g, '\\\\'], [/\*/g, '\\*'], [/^-/g, '\\-'], @@ -25,8 +25,10 @@ var escapes = [ export default function TurndownService (options) { if (!(this instanceof TurndownService)) return new TurndownService(options) + var self = this var defaults = { rules: COMMONMARK_RULES, + escapes: ESCAPES, headingStyle: 'setext', hr: '* * *', bulletListMarker: '*', @@ -45,6 +47,9 @@ export default function TurndownService (options) { }, defaultReplacement: function (content, node) { return node.isBlock ? '\n\n' + content + '\n\n' : content + }, + textReplacement: function (content, node, options) { + return node.isCode ? content : self.escape(content, node, options) } } this.options = extend({}, defaults, options) @@ -140,10 +145,10 @@ TurndownService.prototype = { * @type String */ - escape: function (string) { - return escapes.reduce(function (accumulator, escape) { + escape: function (content, node, options) { + return options.escapes.reduce(function (accumulator, escape) { return accumulator.replace(escape[0], escape[1]) - }, string) + }, content) } } @@ -162,7 +167,8 @@ function process (parentNode) { var replacement = '' if (node.nodeType === 3) { - replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue) + var textRule = self.rules.forNode(node) + replacement = textRule.replacement(node.nodeValue, node, self.options) } else if (node.nodeType === 1) { replacement = replacementForNode.call(self, node) }