From aaa0f05ab0915c1f4fdabee59c255ef4ac0068c5 Mon Sep 17 00:00:00 2001 From: Aaron Karper Date: Sat, 26 Mar 2022 15:07:30 +0100 Subject: [PATCH] Property based test based on markdown-it (commonmark). **Note that the test is currently failing, so merge with caution** This change checks that markdown_it.render(turndown.render(html)) ~ html and it currently finds various corner-cases: * "foo
bar" => "_foo \n_bar" => "_foo
_bar" * "

foo

bar" => "# foo \nbar" => "

foo

bar" * "foobar" => "_foo_bar" => "_foo_bar" * "foo" => "__foo__" => "foo" * ... The test should be easy to extend to other scenarios that may prove to be problematic. --- package-lock.json | 144 ++++++++++++++++++++++++++++++++++++++++++ package.json | 4 +- test/turndown-test.js | 101 ++++++++++++++++++++++++++++- 3 files changed, 247 insertions(+), 2 deletions(-) diff --git a/package-lock.json b/package-lock.json index 739aa476..4aa03632 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,6 +5,7 @@ "requires": true, "packages": { "": { + "name": "turndown", "version": "7.1.1", "license": "MIT", "dependencies": { @@ -15,6 +16,8 @@ "@rollup/plugin-node-resolve": "13.0.0", "@rollup/plugin-replace": "2.4.2", "browserify": "17.0.0", + "fast-check": "^2.0.0", + "markdown-it": "^12.2.3", "rollup": "2.52.3", "standard": "^10.0.3", "turndown-attendant": "0.0.3" @@ -1199,6 +1202,15 @@ "integrity": "sha512-c98Bf3tPniI+scsdk237ku1Dc3ujXQTSgyiPUDEOe7tRkhrqridvh8klBv0HCEso1OLOYcHuCv/cS6DNxKH+ZA==", "dev": true }, + "node_modules/entities": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-2.1.0.tgz", + "integrity": "sha512-hCx1oky9PFrJ611mf0ifBLBRW8lUUVRlFolb5gWRfIELabBlbp9xZvrqZLZAs+NxFnbfQoeGd8wDkygjg7U85w==", + "dev": true, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/error-ex": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", @@ -1784,6 +1796,22 @@ "integrity": "sha512-180WMDQaIMm3+7hGXWf12GtdniDEy7nYcyFMKJn/eZz/6tSLXrUN9V0wKSbMjej0I1WHWbpREDEKHtqPQa9NNw==", "dev": true }, + "node_modules/fast-check": { + "version": "2.23.2", + "resolved": "https://registry.npmjs.org/fast-check/-/fast-check-2.23.2.tgz", + "integrity": "sha512-ECYuSlp6NLpvOj8eScKsqoz1ihtCpSDuEC2ofdGvgsEu1obHYEGqreJ/iPzkJFy73yoU0kCFea7PHUQDNM0VNg==", + "dev": true, + "dependencies": { + "pure-rand": "^5.0.1" + }, + "engines": { + "node": ">=8.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/fast-check" + } + }, "node_modules/fast-levenshtein": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", @@ -2731,6 +2759,15 @@ "node": ">= 0.8.0" } }, + "node_modules/linkify-it": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-3.0.3.tgz", + "integrity": "sha512-ynTsyrFSdE5oZ/O9GEf00kPngmOfVwazR5GKDq6EYfhlpFug3J2zybX56a2PRRpc9P+FuSoGNAwjlbDs9jJBPQ==", + "dev": true, + "dependencies": { + "uc.micro": "^1.0.1" + } + }, "node_modules/load-json-file": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz", @@ -2786,6 +2823,28 @@ "sourcemap-codec": "^1.4.4" } }, + "node_modules/markdown-it": { + "version": "12.3.2", + "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-12.3.2.tgz", + "integrity": "sha512-TchMembfxfNVpHkbtriWltGWc+m3xszaRD0CZup7GFFhzIgQqxIfn3eGj1yZpfuflzPvfkt611B2Q/Bsk1YnGg==", + "dev": true, + "dependencies": { + "argparse": "^2.0.1", + "entities": "~2.1.0", + "linkify-it": "^3.0.1", + "mdurl": "^1.0.1", + "uc.micro": "^1.0.5" + }, + "bin": { + "markdown-it": "bin/markdown-it.js" + } + }, + "node_modules/markdown-it/node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "dev": true + }, "node_modules/md5.js": { "version": "1.3.5", "resolved": "https://registry.npmjs.org/md5.js/-/md5.js-1.3.5.tgz", @@ -2797,6 +2856,12 @@ "safe-buffer": "^5.1.2" } }, + "node_modules/mdurl": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz", + "integrity": "sha1-/oWy7HWlkDfyrf7BAP1sYBdhFS4=", + "dev": true + }, "node_modules/miller-rabin": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/miller-rabin/-/miller-rabin-4.0.1.tgz", @@ -3394,6 +3459,16 @@ "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=", "dev": true }, + "node_modules/pure-rand": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-5.0.1.tgz", + "integrity": "sha512-ksWccjmXOHU2gJBnH0cK1lSYdvSZ0zLoCMSz/nTGh6hDvCSgcRxDyIcOBD6KNxFz3xhMPm/T267Tbe2JRymKEQ==", + "dev": true, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/fast-check" + } + }, "node_modules/querystring": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/querystring/-/querystring-0.2.0.tgz", @@ -4275,6 +4350,12 @@ "integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=", "dev": true }, + "node_modules/uc.micro": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-1.0.6.tgz", + "integrity": "sha512-8Y75pvTYkLJW2hWQHXxoqRgV7qb9B+9vFEtidML+7koHUFapnVJAZ6cKs+Qjz5Aw3aZWHMC6u0wJE3At+nSGwA==", + "dev": true + }, "node_modules/umd": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/umd/-/umd-3.0.3.tgz", @@ -5569,6 +5650,12 @@ } } }, + "entities": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-2.1.0.tgz", + "integrity": "sha512-hCx1oky9PFrJ611mf0ifBLBRW8lUUVRlFolb5gWRfIELabBlbp9xZvrqZLZAs+NxFnbfQoeGd8wDkygjg7U85w==", + "dev": true + }, "error-ex": { "version": "1.3.2", "resolved": "https://registry.npmjs.org/error-ex/-/error-ex-1.3.2.tgz", @@ -6049,6 +6136,15 @@ } } }, + "fast-check": { + "version": "2.23.2", + "resolved": "https://registry.npmjs.org/fast-check/-/fast-check-2.23.2.tgz", + "integrity": "sha512-ECYuSlp6NLpvOj8eScKsqoz1ihtCpSDuEC2ofdGvgsEu1obHYEGqreJ/iPzkJFy73yoU0kCFea7PHUQDNM0VNg==", + "dev": true, + "requires": { + "pure-rand": "^5.0.1" + } + }, "fast-levenshtein": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", @@ -6776,6 +6872,15 @@ "type-check": "~0.3.2" } }, + "linkify-it": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-3.0.3.tgz", + "integrity": "sha512-ynTsyrFSdE5oZ/O9GEf00kPngmOfVwazR5GKDq6EYfhlpFug3J2zybX56a2PRRpc9P+FuSoGNAwjlbDs9jJBPQ==", + "dev": true, + "requires": { + "uc.micro": "^1.0.1" + } + }, "load-json-file": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/load-json-file/-/load-json-file-4.0.0.tgz", @@ -6825,6 +6930,27 @@ "sourcemap-codec": "^1.4.4" } }, + "markdown-it": { + "version": "12.3.2", + "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-12.3.2.tgz", + "integrity": "sha512-TchMembfxfNVpHkbtriWltGWc+m3xszaRD0CZup7GFFhzIgQqxIfn3eGj1yZpfuflzPvfkt611B2Q/Bsk1YnGg==", + "dev": true, + "requires": { + "argparse": "^2.0.1", + "entities": "~2.1.0", + "linkify-it": "^3.0.1", + "mdurl": "^1.0.1", + "uc.micro": "^1.0.5" + }, + "dependencies": { + "argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "dev": true + } + } + }, "md5.js": { "version": "1.3.5", "resolved": "https://registry.npmjs.org/md5.js/-/md5.js-1.3.5.tgz", @@ -6836,6 +6962,12 @@ "safe-buffer": "^5.1.2" } }, + "mdurl": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-1.0.1.tgz", + "integrity": "sha1-/oWy7HWlkDfyrf7BAP1sYBdhFS4=", + "dev": true + }, "miller-rabin": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/miller-rabin/-/miller-rabin-4.0.1.tgz", @@ -7319,6 +7451,12 @@ "integrity": "sha1-wNWmOycYgArY4esPpSachN1BhF4=", "dev": true }, + "pure-rand": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-5.0.1.tgz", + "integrity": "sha512-ksWccjmXOHU2gJBnH0cK1lSYdvSZ0zLoCMSz/nTGh6hDvCSgcRxDyIcOBD6KNxFz3xhMPm/T267Tbe2JRymKEQ==", + "dev": true + }, "querystring": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/querystring/-/querystring-0.2.0.tgz", @@ -8016,6 +8154,12 @@ "integrity": "sha1-hnrHTjhkGHsdPUfZlqeOxciDB3c=", "dev": true }, + "uc.micro": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-1.0.6.tgz", + "integrity": "sha512-8Y75pvTYkLJW2hWQHXxoqRgV7qb9B+9vFEtidML+7koHUFapnVJAZ6cKs+Qjz5Aw3aZWHMC6u0wJE3At+nSGwA==", + "dev": true + }, "umd": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/umd/-/umd-3.0.3.tgz", diff --git a/package.json b/package.json index 14ea2d04..a6992180 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,9 @@ "browserify": "17.0.0", "rollup": "2.52.3", "standard": "^10.0.3", - "turndown-attendant": "0.0.3" + "turndown-attendant": "0.0.3", + "fast-check": "^2.0.0", + "markdown-it": "^12.2.3" }, "files": [ "lib", diff --git a/test/turndown-test.js b/test/turndown-test.js index a7ab0b2a..4c41f0b5 100644 --- a/test/turndown-test.js +++ b/test/turndown-test.js @@ -1,5 +1,7 @@ var Attendant = require('turndown-attendant') var TurndownService = require('../lib/turndown.cjs') +var fc = require('fast-check') +var Md = require('markdown-it') var attendant = new Attendant({ file: __dirname + '/index.html', @@ -174,7 +176,104 @@ test('remove elements are overridden by keep', function (t) { turndownService.keep(['del', 'ins']) turndownService.remove(['del', 'ins']) t.equal(turndownService.turndown( - '

Hello worldWorld

'), + '

Hello worldWorld

'), 'Hello worldWorld' ) }) + +// Property based tests + +function arbitraryHtml (opts) { + let { spans, divs, hr, br, unclosedP } = Object.assign({ + spans: ['em', 'i', 'strong', 'b', 'span', 'q'], + divs: ['p', 'div', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'], + hr: true, + br: true, + unclosedP: true + }, opts) + const pseudoDom = fc.letrec(rec => { + let spanOpts = [ + fc.lorem(), + fc.array(rec('span'), { + minLength: 1 + }) + ] + if (br) { + spanOpts.push(fc.lorem().map(s => `${s}
`)) + } + if (spans.length > 0) { + spanOpts.push(fc.record({ + tag: fc.constantFrom(...spans), + content: rec('span') + })) + } + let span = fc.oneof({ depthFactor: 0.5, withCrossShrink: true }, ...spanOpts) + let divOpts = [] + if (hr) { + divOpts.push(fc.constant('
')) + } + if (unclosedP) { + divOpts.push(fc.lorem().map(s => `

${s}`)) + } + if (divs.length > 0) { + divOpts.push(fc.record({ + tag: fc.constantFrom(...divs), + content: rec('span') + })) + } + + let div = fc.oneof(...divOpts) + return { + tree: fc.oneof(rec('span'), rec('div')), + span: span, + div: div + } + }).tree + const flatten = (tree) => { + if (Array.isArray(tree)) { + return tree.map(flatten).join(' ') + } else if (typeof tree === 'string') { + return tree + } else { + return `<${tree.tag}>${flatten(tree.content)}` + } + } + return pseudoDom.map(flatten) +} + +test('arbitraryHtml sanity check', (t) => { + let n = 0 + fc.assert( + fc.property(arbitraryHtml(), fc.context(), (html, ctx) => { + ctx.log(`HTML: ${JSON.stringify(html)}`) + n += 1 + t.plan(n) + t.equal(typeof html, 'string') + }) + ) +}) + +// Full-blown inverse would be f(g(x)) = x, but to account for bytewise diffs, but semantic +// equivalence, we use a set of conditions listed in: +// https://hypothesis.works/articles/canonical-serialization/ +test('Round Trip', (t) => { + let mdIt = new Md() + let turndownService = new TurndownService() + let normalize = (s) => { + return mdIt.render(turndownService.turndown(s)) + } + let compare = (left, right) => { + if (left !== right) { + throw new Error(`left:\n<<<\n${left}\n>>>\nright:\n<<<\n${right}\n>>>`) + } + } + fc.assert( + fc.property(arbitraryHtml(), fc.context(), (html, ctx) => { + ctx.log(`HTML: ${JSON.stringify(html)}`) + let markdown = turndownService.turndown(html) + compare(markdown, turndownService.turndown(normalize(html))) + compare(normalize(html), normalize(normalize(html))) + }) + ) + t.end() +})