Skip to content

Commit

Permalink
Preserve line endings (#49)
Browse files Browse the repository at this point in the history
* bump versions

* failing tests

* fix the bug

* one more test

* version bump

* prettier

* format on save
  • Loading branch information
danvk authored Nov 2, 2024
1 parent 1b1da5f commit eb1d7b3
Show file tree
Hide file tree
Showing 7 changed files with 75 additions and 11 deletions.
5 changes: 4 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
{
"typescript.tsdk": "node_modules/typescript/lib"
"typescript.tsdk": "node_modules/typescript/lib",
"[typescript]": {
"editor.formatOnSave": true
}
}
2 changes: 1 addition & 1 deletion classify-images.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ interface CLIArgs {

const program = new Command();
program
.version('2.2.0')
.version('2.2.1')
.usage('[options] /path/to/images/*.jpg | images.txt')
.option('-p, --port <n>', 'Run on this port (default 4321)', parseInt)
.option('-o, --output <file>', 'Path to output CSV file (default output.csv)', 'output.csv')
Expand Down
41 changes: 34 additions & 7 deletions csv.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import csvParse from 'csv-parse';
import {stringify} from 'csv-stringify/sync';
import {stringify, Options} from 'csv-stringify/sync';
import * as fs from 'fs-extra';

const csvOptions: csvParse.Options = {
Expand Down Expand Up @@ -83,12 +83,35 @@ export async function readHeaders(file: string) {
}

/** Write a CSV file */
export async function writeCsv(file: string, rows: string[][]) {
export async function writeCsv(file: string, rows: string[][], options?: Options) {
// TODO(danvk): make this less memory-intensive
const output = stringify(rows);
const output = stringify(rows, options);
await fs.writeFile(file, output, {encoding: 'utf8'});
}

const LF = '\n'.charCodeAt(0);
const CR = '\r'.charCodeAt(0);

/** Determine the type of line endings a file uses by looking for the first one. */
export function detectLineEnding(path: string) {
const f = fs.openSync(path, 'r');
const SIZE = 10_000;
const buffer = Buffer.alloc(SIZE);
const n = fs.readSync(f, buffer, 0, SIZE, 0);
fs.closeSync(f);
for (let i = 0; i < n - 1; i++) {
const [a, b] = [buffer[i], buffer[i + 1]];
if (a == CR && b == LF) {
return '\r\n'; // Windows
} else if (a == LF) {
return '\n'; // Unix
} else if (a == CR) {
return '\r'; // Old Mac
}
}
return undefined;
}

/**
* Append one row to a CSV file.
*
Expand All @@ -103,6 +126,7 @@ export async function appendRow(file: string, row: {[column: string]: string}) {
return writeCsv(file, rows);
}

const lineEnding = detectLineEnding(file);
const lines = readRows(file);
const headerRow = await lines.next();
if (headerRow.done) {
Expand Down Expand Up @@ -130,22 +154,25 @@ export async function appendRow(file: string, row: {[column: string]: string}) {
rows.push(row.concat(emptyCols));
}
rows.push(fullHeaders.map(k => row[k] || ''));
await writeCsv(file, rows);
await writeCsv(file, rows, {record_delimiter: lineEnding});
} else {
// write the new row
const newRow = headers.map(k => row[k] || '');
await lines.return(); // close the file for reading.
// Add a newline if the file doesn't end with one.
const f = fs.openSync(file, 'a+');
const {size} = fs.fstatSync(f);
const {buffer} = await fs.read(f, Buffer.alloc(1), 0, 1, size - 1);
const hasTrailingNewline = buffer[0] == '\n'.charCodeAt(0);
const lineStr = (hasTrailingNewline ? '' : '\n') + stringify([newRow]);
const {buffer} = await fs.read(f, Buffer.alloc(2), 0, 2, size - 2);
const tail = buffer.toString('utf8');
const hasTrailingNewline = tail.endsWith(lineEnding ?? '\n');
const lineStr =
(hasTrailingNewline ? '' : lineEnding) + stringify([newRow], {record_delimiter: lineEnding});
await fs.appendFile(f, lineStr);
await fs.close(f);
}
}

// Note: this might change line endings in the file.
export async function deleteLastRow(file: string) {
const rows = [];
for await (const row of readRows(file)) {
Expand Down
2 changes: 1 addition & 1 deletion localturk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ const program = new Command();

// If you add an option here, consider adding it in classify-images.ts as well.
program
.version('2.2.0')
.version('2.2.1')
.usage('[options] template.html tasks.csv outputs.csv')
.option('-p, --port <n>', 'Run on this port (default 4321)', parseInt)
.option(
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "localturk",
"version": "2.2.0",
"version": "2.2.1",
"description": "Run Mechanical Turk-like tasks on your own.",
"main": "index.js",
"repository": "https://github.com/danvk/localturk.git",
Expand Down
32 changes: 32 additions & 0 deletions test/csv.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,36 @@ describe('csv', () => {
const data = await read('/tmp/test.csv');
expect(data).toEqual('id,"First,Last","Last,First"\n' + '1,"Jane,Doe","Doe,Jane"\n');
});

it('should read rows from a CSV file with Windows line endings', async () => {
const rows = [];
for await (const row of csv.readRows('./test/windows.csv')) {
rows.push(row);
}
expect(rows).toEqual([
['A', 'B'],
['1', '2'],
]);
});

it('should append to a CSV file with Windows line endings', async () => {
fs.copyFileSync('./test/windows.csv', '/tmp/test.csv');
expect(csv.detectLineEnding('/tmp/test.csv')).toEqual('\r\n');
await csv.appendRow('/tmp/test.csv', {A: '3', B: '4'});
expect(await read('/tmp/test.csv')).toEqual(`A,B\r\n1,2\r\n3,4\r\n`);
});

it('should append to a CSV file with Windows line endings and no trailing newline', async () => {
fs.writeFileSync('/tmp/test.csv', `A,B\r\n1,2`);
expect(csv.detectLineEnding('/tmp/test.csv')).toEqual('\r\n');
expect(await read('/tmp/test.csv')).toEqual(`A,B\r\n1,2`); // no trailing newline
await csv.appendRow('/tmp/test.csv', {A: '3', B: '4'});
expect(await read('/tmp/test.csv')).toEqual(`A,B\r\n1,2\r\n3,4\r\n`);
});

it('should preserve line endings when adding a new column', async () => {
fs.copyFileSync('./test/windows.csv', '/tmp/test.csv');
await csv.appendRow('/tmp/test.csv', {A: '3', C: '4'});
expect(await read('/tmp/test.csv')).toEqual(`A,B,C\r\n1,2,\r\n3,,4\r\n`);
});
});
2 changes: 2 additions & 0 deletions test/windows.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
A,B
1,2

0 comments on commit eb1d7b3

Please sign in to comment.