From eb1d7b39177e78cdf2ff035273fb6087240145bd Mon Sep 17 00:00:00 2001 From: Dan Vanderkam Date: Sat, 2 Nov 2024 10:16:05 -0400 Subject: [PATCH] Preserve line endings (#49) * bump versions * failing tests * fix the bug * one more test * version bump * prettier * format on save --- .vscode/settings.json | 5 ++++- classify-images.ts | 2 +- csv.ts | 41 ++++++++++++++++++++++++++++++++++------- localturk.ts | 2 +- package.json | 2 +- test/csv.test.ts | 32 ++++++++++++++++++++++++++++++++ test/windows.csv | 2 ++ 7 files changed, 75 insertions(+), 11 deletions(-) create mode 100644 test/windows.csv diff --git a/.vscode/settings.json b/.vscode/settings.json index 55712c1..54b7990 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,6 @@ { - "typescript.tsdk": "node_modules/typescript/lib" + "typescript.tsdk": "node_modules/typescript/lib", + "[typescript]": { + "editor.formatOnSave": true + } } \ No newline at end of file diff --git a/classify-images.ts b/classify-images.ts index 5cd9f15..bf60e23 100644 --- a/classify-images.ts +++ b/classify-images.ts @@ -38,7 +38,7 @@ interface CLIArgs { const program = new Command(); program - .version('2.2.0') + .version('2.2.1') .usage('[options] /path/to/images/*.jpg | images.txt') .option('-p, --port ', 'Run on this port (default 4321)', parseInt) .option('-o, --output ', 'Path to output CSV file (default output.csv)', 'output.csv') diff --git a/csv.ts b/csv.ts index c51ad5e..8dcb692 100644 --- a/csv.ts +++ b/csv.ts @@ -1,5 +1,5 @@ import csvParse from 'csv-parse'; -import {stringify} from 'csv-stringify/sync'; +import {stringify, Options} from 'csv-stringify/sync'; import * as fs from 'fs-extra'; const csvOptions: csvParse.Options = { @@ -83,12 +83,35 @@ export async function readHeaders(file: string) { } /** Write a CSV file */ -export async function writeCsv(file: string, rows: string[][]) { +export async function writeCsv(file: string, rows: string[][], options?: Options) { // TODO(danvk): make this less memory-intensive - const output = stringify(rows); + const output = stringify(rows, options); await fs.writeFile(file, output, {encoding: 'utf8'}); } +const LF = '\n'.charCodeAt(0); +const CR = '\r'.charCodeAt(0); + +/** Determine the type of line endings a file uses by looking for the first one. */ +export function detectLineEnding(path: string) { + const f = fs.openSync(path, 'r'); + const SIZE = 10_000; + const buffer = Buffer.alloc(SIZE); + const n = fs.readSync(f, buffer, 0, SIZE, 0); + fs.closeSync(f); + for (let i = 0; i < n - 1; i++) { + const [a, b] = [buffer[i], buffer[i + 1]]; + if (a == CR && b == LF) { + return '\r\n'; // Windows + } else if (a == LF) { + return '\n'; // Unix + } else if (a == CR) { + return '\r'; // Old Mac + } + } + return undefined; +} + /** * Append one row to a CSV file. * @@ -103,6 +126,7 @@ export async function appendRow(file: string, row: {[column: string]: string}) { return writeCsv(file, rows); } + const lineEnding = detectLineEnding(file); const lines = readRows(file); const headerRow = await lines.next(); if (headerRow.done) { @@ -130,7 +154,7 @@ export async function appendRow(file: string, row: {[column: string]: string}) { rows.push(row.concat(emptyCols)); } rows.push(fullHeaders.map(k => row[k] || '')); - await writeCsv(file, rows); + await writeCsv(file, rows, {record_delimiter: lineEnding}); } else { // write the new row const newRow = headers.map(k => row[k] || ''); @@ -138,14 +162,17 @@ export async function appendRow(file: string, row: {[column: string]: string}) { // Add a newline if the file doesn't end with one. const f = fs.openSync(file, 'a+'); const {size} = fs.fstatSync(f); - const {buffer} = await fs.read(f, Buffer.alloc(1), 0, 1, size - 1); - const hasTrailingNewline = buffer[0] == '\n'.charCodeAt(0); - const lineStr = (hasTrailingNewline ? '' : '\n') + stringify([newRow]); + const {buffer} = await fs.read(f, Buffer.alloc(2), 0, 2, size - 2); + const tail = buffer.toString('utf8'); + const hasTrailingNewline = tail.endsWith(lineEnding ?? '\n'); + const lineStr = + (hasTrailingNewline ? '' : lineEnding) + stringify([newRow], {record_delimiter: lineEnding}); await fs.appendFile(f, lineStr); await fs.close(f); } } +// Note: this might change line endings in the file. export async function deleteLastRow(file: string) { const rows = []; for await (const row of readRows(file)) { diff --git a/localturk.ts b/localturk.ts index a677104..957a7ae 100644 --- a/localturk.ts +++ b/localturk.ts @@ -45,7 +45,7 @@ const program = new Command(); // If you add an option here, consider adding it in classify-images.ts as well. program - .version('2.2.0') + .version('2.2.1') .usage('[options] template.html tasks.csv outputs.csv') .option('-p, --port ', 'Run on this port (default 4321)', parseInt) .option( diff --git a/package.json b/package.json index be05a9f..f0037b5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "localturk", - "version": "2.2.0", + "version": "2.2.1", "description": "Run Mechanical Turk-like tasks on your own.", "main": "index.js", "repository": "https://github.com/danvk/localturk.git", diff --git a/test/csv.test.ts b/test/csv.test.ts index 13bcca1..1dba490 100644 --- a/test/csv.test.ts +++ b/test/csv.test.ts @@ -143,4 +143,36 @@ describe('csv', () => { const data = await read('/tmp/test.csv'); expect(data).toEqual('id,"First,Last","Last,First"\n' + '1,"Jane,Doe","Doe,Jane"\n'); }); + + it('should read rows from a CSV file with Windows line endings', async () => { + const rows = []; + for await (const row of csv.readRows('./test/windows.csv')) { + rows.push(row); + } + expect(rows).toEqual([ + ['A', 'B'], + ['1', '2'], + ]); + }); + + it('should append to a CSV file with Windows line endings', async () => { + fs.copyFileSync('./test/windows.csv', '/tmp/test.csv'); + expect(csv.detectLineEnding('/tmp/test.csv')).toEqual('\r\n'); + await csv.appendRow('/tmp/test.csv', {A: '3', B: '4'}); + expect(await read('/tmp/test.csv')).toEqual(`A,B\r\n1,2\r\n3,4\r\n`); + }); + + it('should append to a CSV file with Windows line endings and no trailing newline', async () => { + fs.writeFileSync('/tmp/test.csv', `A,B\r\n1,2`); + expect(csv.detectLineEnding('/tmp/test.csv')).toEqual('\r\n'); + expect(await read('/tmp/test.csv')).toEqual(`A,B\r\n1,2`); // no trailing newline + await csv.appendRow('/tmp/test.csv', {A: '3', B: '4'}); + expect(await read('/tmp/test.csv')).toEqual(`A,B\r\n1,2\r\n3,4\r\n`); + }); + + it('should preserve line endings when adding a new column', async () => { + fs.copyFileSync('./test/windows.csv', '/tmp/test.csv'); + await csv.appendRow('/tmp/test.csv', {A: '3', C: '4'}); + expect(await read('/tmp/test.csv')).toEqual(`A,B,C\r\n1,2,\r\n3,,4\r\n`); + }); }); diff --git a/test/windows.csv b/test/windows.csv new file mode 100644 index 0000000..1133781 --- /dev/null +++ b/test/windows.csv @@ -0,0 +1,2 @@ +A,B +1,2