diff --git a/CHANGELOG.md b/CHANGELOG.md index 55eec4d..76afad3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,8 @@ # SmarterCSV 1.x Change Log ## 1.11.0 - * added feature to output CSV files ([issue #44](https://github.com/tilo/smarter_csv/issues/44)) + * added SmarterCSV::Writer to output CSV files ([issue #44](https://github.com/tilo/smarter_csv/issues/44)) + * added SmarterCSV::Reader to isolate parsing of CSV files ## 1.10.3 (2024-03-10) * fixed issue when frozen options are handed in (thanks to Daniel Pepper) diff --git a/lib/smarter_csv.rb b/lib/smarter_csv.rb index 72c2464..f0ef1b7 100644 --- a/lib/smarter_csv.rb +++ b/lib/smarter_csv.rb @@ -10,7 +10,7 @@ require "smarter_csv/headers" require "smarter_csv/hash_transformations" require "smarter_csv/parse" -require "smarter_csv/generator" +require "smarter_csv/writer" # load the C-extension: case RUBY_ENGINE diff --git a/lib/smarter_csv/generator.rb b/lib/smarter_csv/writer.rb similarity index 74% rename from lib/smarter_csv/generator.rb rename to lib/smarter_csv/writer.rb index bd2fc18..81cb69e 100644 --- a/lib/smarter_csv/generator.rb +++ b/lib/smarter_csv/writer.rb @@ -9,7 +9,7 @@ module SmarterCSV # Optionally headers can be passed-in via the options, # If any new headers are fund in the data, they will be appended to the headers. # - class Generator + class Writer def initialize(file_path, options = {}) @options = options @discover_headers = options.has_key?(:discover_headers) ? (options[:discover_headers] == true) : true @@ -17,7 +17,8 @@ def initialize(file_path, options = {}) @col_sep = options[:col_sep] || ',' @force_quotes = options[:force_quotes] @map_headers = options[:map_headers] || {} - @file = File.open(file_path, 'w+') + @temp_file = Tempfile.new('tempfile', '/tmp') + @output_file = File.open(file_path, 'w+') end def append(array_of_hashes) @@ -29,7 +30,7 @@ def append(array_of_hashes) # Reorder the hash to match the current headers order and fill missing fields ordered_row = @headers.map { |header| hash[header] || '' } - @file.puts ordered_row.map { |value| escape_csv_field(value) }.join(@col_sep) + @temp_file.puts ordered_row.map { |value| escape_csv_field(value) }.join(@col_sep) end end @@ -37,17 +38,18 @@ def finalize # Map headers if :map_headers option is provided mapped_headers = @headers.map { |header| @map_headers[header] || header } - # Rewind to the beginning of the file to write the headers - @file.rewind - @file.write(mapped_headers.join(@col_sep) + "\n") - @file.flush # Ensure all data is written to the file - @file.close + @temp_file.rewind + @output_file.write(mapped_headers.join(@col_sep) + "\n") + @output_file.write(@temp_file.read) + @output_file.flush + @output_file.close end private + SPECIAL_CHARS = /[,\"\n]/ def escape_csv_field(field) - if @force_quotes || field.to_s.include?(@col_sep) + if @force_quotes || field.to_s.match(SPECIAL_CHARS) "\"#{field}\"" else field.to_s diff --git a/spec/smarter_csv/generator_spec.rb b/spec/smarter_csv/generator_spec.rb deleted file mode 100644 index d2ba7ff..0000000 --- a/spec/smarter_csv/generator_spec.rb +++ /dev/null @@ -1,77 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe SmarterCSV::Generator do - let(:file_path) { 'test_output.csv' } - - after(:each) do - File.delete(file_path) if File.exist?(file_path) - end - - let(:data_batches) do - [ - [ - { name: 'John', age: 30, city: 'New York' }, - { name: 'Jane', age: 25, country: 'USA' } - ], - [ - { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } - ] - ] - end - - context 'when headers are given in advance' do - let(:options) { { headers: %w[name age city] } } - - it 'writes the given headers and data correctly' do - generator = SmarterCSV::Generator.new(file_path, options) - data_batches.each { |batch| generator.append(batch) } - generator.finalize - - output = File.read(file_path) - expect(output).to include("name,age,city,country,state\n") - expect(output).to include("John,30,New York,,\n") - expect(output).to include("Jane,25,,USA,\n") - expect(output).to include("Mike,35,Chicago,,IL\n") - end - end - - context 'when headers are automatically discovered' do - it 'writes the discovered headers and data correctly' do - generator = SmarterCSV::Generator.new(file_path) - data_batches.each { |batch| generator.append(batch) } - generator.finalize - - output = File.read(file_path) - expect(output).to include("name,age,city,country,state\n") - expect(output).to include("John,30,New York,,\n") - expect(output).to include("Jane,25,,USA,\n") - expect(output).to include("Mike,35,Chicago,,IL\n") - end - end - - context 'when headers are mapped' do - let(:options) do - { - map_headers: { - name: 'Full Name', - age: 'Age', - city: 'City', - country: 'Country', - state: 'State', - } - } - end - - it 'writes the mapped headers and data correctly' do - generator = SmarterCSV::Generator.new(file_path, options) - data_batches.each { |batch| generator.append(batch) } - generator.finalize - - output = File.read(file_path) - expect(output).to include("Full Name,Age,City,Country,State\n") - expect(output).to include("John,30,New York,,\n") - expect(output).to include("Jane,25,,USA,\n") - expect(output).to include("Mike,35,Chicago,,IL\n") - end - end -end diff --git a/spec/smarter_csv/writer_spec.rb b/spec/smarter_csv/writer_spec.rb new file mode 100644 index 0000000..62b0ff3 --- /dev/null +++ b/spec/smarter_csv/writer_spec.rb @@ -0,0 +1,229 @@ +# frozen_string_literal: true + +RSpec.describe SmarterCSV::Writer do + subject(:create_csv_file) do + writer = SmarterCSV::Writer.new(file_path, options) + data_batches.each { |batch| writer.append(batch) } + writer.finalize + end + let(:file_path) { '/tmp/test_output.csv' } + + after(:each) do + File.delete(file_path) if File.exist?(file_path) + end + + let(:data_batches) do + [ + [ + { name: 'John', age: 30, city: 'New York' }, + { name: 'Jane', age: 25, country: 'USA' } + ], + [ + { name: 'Mike', age: 35, city: 'Chicago', state: 'IL' } + ] + ] + end + + context 'when headers are given in advance' do + let(:options) { { headers: %i[name age city] } } + + it 'writes the given headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("name,age,city,country,state\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + end + end + + context 'when headers are automatically discovered' do + let(:options) { {} } + + it 'writes the discovered headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("name,age,city,country,state\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + end + end + + context 'when headers are mapped' do + let(:options) do + { + map_headers: { + name: 'Full Name', + age: 'Age', + city: 'City', + country: 'Country', + state: 'State', + } + } + end + + it 'writes the mapped headers and data correctly' do + create_csv_file + output = File.read(file_path) + + expect(output).to include("Full Name,Age,City,Country,State\n") + expect(output).to include("John,30,New York\n") + expect(output).to include("Jane,25,,USA\n") + expect(output).to include("Mike,35,Chicago,,IL\n") + end + end + + context 'Initialization with Default Options' do + it 'initializes with default options' do + writer = SmarterCSV::Writer.new(file_path) + expect(writer.instance_variable_get(:@discover_headers)).to be true + expect(writer.instance_variable_get(:@headers)).to eq([]) + expect(writer.instance_variable_get(:@col_sep)).to eq(',') + end + end + + context 'Initialization with Custom Options' do + it 'initializes with custom options' do + options = { discover_headers: false, headers: ['a', 'b'], col_sep: ';', force_quotes: true, map_headers: { 'a' => 'A' } } + writer = SmarterCSV::Writer.new(file_path, options) + expect(writer.instance_variable_get(:@discover_headers)).to be false + expect(writer.instance_variable_get(:@headers)).to eq(['a', 'b']) + expect(writer.instance_variable_get(:@col_sep)).to eq(';') + expect(writer.instance_variable_get(:@force_quotes)).to be true + expect(writer.instance_variable_get(:@map_headers)).to eq({ 'a' => 'A' }) + end + end + + context 'Appending Data' do + it 'appends multiple hashes over multiple calls' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: 1, b: 2 }, {c: 3}]) + writer.append([{ d: 4, a: 5 }]) + writer.finalize + output = File.read(file_path) + + expect(output).to include("a,b,c,d\n") + expect(output).to include("1,2\n") + expect(output).to include(",,3\n") + expect(output).to include("5,,,4\n") + end + + it 'appends with existing headers' do + options = { headers: [:a] } + writer = SmarterCSV::Writer.new(file_path, options) + writer.append([{ a: 1, b: 2 }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n1,2\n") + end + + it 'appends with missing fields' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: 1, b: 2 }, { a: 3 }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n1,2\n3,\n") + end + end + + context 'Finalizing the Output File' do + it 'maps headers' do + options = { map_headers: { a: 'A', b: 'B' } } + writer = SmarterCSV::Writer.new(file_path, options) + writer.append([{ a: 1, b: 2 }]) + writer.finalize + + expect(File.read(file_path)).to eq("A,B\n1,2\n") + end + + it 'writes header and appends content to output file' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: 1, b: 2 }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n1,2\n") + end + + it 'properly closes the output file' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: 1, b: 2 }]) + writer.finalize + + expect(File).to be_exist(file_path) + end + end + + context 'CSV Field Escaping' do + it 'does not quote fields without commas unless force_quotes is enabled' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: 'hello', b: 'world' }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\nhello,world\n") + end + + it 'quotes fields with column separator' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: 'hello, world', b: 'test' }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n\"hello, world\",test\n") + end + + it 'quotes all fields when force_quotes is enabled' do + options = { force_quotes: true } + writer = SmarterCSV::Writer.new(file_path, options) + writer.append([{ a: 'hello', b: 'world' }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n\"hello\",\"world\"\n") + end + end + + context 'Edge Cases' do + it 'handles empty hash' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{}]) + writer.finalize + + expect(File.read(file_path)).to eq("\n\n") + end + + it 'handles empty array' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([]) + writer.finalize + + expect(File.read(file_path)).to eq("\n") + end + + it 'handles special characters in data' do + writer = SmarterCSV::Writer.new(file_path) + writer.append([{ a: "hello\nworld", b: 'quote"test' }]) + writer.finalize + + expect(File.read(file_path)).to eq("a,b\n\"hello\nworld\",\"quote\"test\"\n") + end + end + + context 'Error Handling' do + it 'handles file access issues' do + allow(File).to receive(:open).and_raise(Errno::EACCES) + + expect { + SmarterCSV::Writer.new(file_path) + }.to raise_error(Errno::EACCES) + end + + it 'handles tempfile issues' do + allow(Tempfile).to receive(:new).and_raise(Errno::ENOENT) + + expect { + SmarterCSV::Writer.new(file_path) + }.to raise_error(Errno::ENOENT) + end + end +end