From 67373e5bbc475a2a90145f17f62c52a985067a05 Mon Sep 17 00:00:00 2001 From: Benjamin Lungwitz Date: Fri, 26 Jun 2020 11:53:46 +0200 Subject: [PATCH 1/6] test case added for loading to database using CSV.jl row iterator --- Project.toml | 6 ++++-- test/runtests.jl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 60522979..cf3148fd 100644 --- a/Project.toml +++ b/Project.toml @@ -23,7 +23,8 @@ TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53" [compat] BinaryProvider = "0.5" CEnum = "0.2" -DataFrames = "0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20" +CSV = "0.6.2" +DataFrames = "0.21" Decimals = "0.4.1" DocStringExtensions = "0.8.0" Infinity = "0.2" @@ -37,8 +38,9 @@ TimeZones = "0.9.2, 0.10, 0.11, 1" julia = "1" [extras] +CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "DataFrames"] +test = ["Test", "DataFrames", "CSV"] diff --git a/test/runtests.jl b/test/runtests.jl index fb21799a..23fb0a55 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,6 +12,7 @@ using Memento.TestUtils using OffsetArrays using TimeZones using Tables +using CSV Memento.config!("critical") @@ -31,6 +32,27 @@ macro test_nolog_on_windows(ex...) end end +""" + load_by_copy!(table, con:: LibPQ.Connection, tablename:: AbstractString) + +Fast data upload using the PostgreSQL `COPY FROM STDIN` method, which is usually much faster, +especially for large data amounts, than SQL Inserts. + +`table` must be a Tables.jl compatible data structure. + +All columns given in `table` must have corresponding fields in the target DB table, +the order of the columns does not matter. + +Columns in the target DB table, which are not provided by the input `table`, are filled +with `null` (provided they are nullable). +""" +function load_by_copy!(table, con:: LibPQ.Connection, tablename:: AbstractString) + iter = CSV.RowWriter(table) + column_names = first(iter) + copyin = LibPQ.CopyIn("COPY $tablename ($column_names) FROM STDIN (FORMAT CSV, HEADER);", iter) + execute(con, copyin) +end + @testset "LibPQ" begin @testset "ConninfoDisplay" begin @@ -317,6 +339,30 @@ end @test isequal(table_data, data) close(result) + # testing loading to database using CSV.jl row iterator + result = execute( + conn, + "DELETE FROM libpqjl_test;"; + throw_error=true, + ) + close(result) + + result = load_by_copy!(data, conn, "libpqjl_test") + @test isopen(result) + @test status(result) == LibPQ.libpq_c.PGRES_COMMAND_OK + @test isempty(LibPQ.error_message(result)) + close(result) + + result = execute( + conn, + "SELECT no_nulls, yes_nulls FROM libpqjl_test ORDER BY no_nulls ASC;"; + throw_error=true + ) + table_data = DataFrame(result) + @test isequal(table_data, data) + close(result) + + close(conn) end From c1f5d51846266131b9addd08ed995530febf6337 Mon Sep 17 00:00:00 2001 From: Benjamin Lungwitz Date: Fri, 26 Jun 2020 12:01:14 +0200 Subject: [PATCH 2/6] added load from DataFrame to documentation example --- docs/src/index.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/src/index.md b/docs/src/index.md index 901c2e3d..5a059cef 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -49,6 +49,26 @@ LibPQ.load!( "INSERT INTO libpqjl_test (no_nulls, yes_nulls) VALUES (\$1, \$2);", ) +using DataFrames +no_nulls = map(string, 'a':'z') +yes_nulls = Union{String, Missing}[isodd(Int(c)) ? string(c) : missing for c in 'a':'z'] +data = DataFrame(no_nulls=no_nulls, yes_nulls=yes_nulls) + +execute(conn, "DELETE FROM libpqjl_test;") + +using CSV +""" +Function for upload of a Tables.jl compatible data structure (e.g. DataFrames.jl) into the db. +""" +function load_by_copy!(table, con:: LibPQ.Connection, tablename:: AbstractString) + iter = CSV.RowWriter(table) + column_names = first(iter) + copyin = LibPQ.CopyIn("COPY $tablename ($column_names) FROM STDIN (FORMAT CSV, HEADER);", iter) + execute(con, copyin) +end + +load_by_copy!(data, conn, "libpqjl_test") + close(conn) ``` From b51a7f7ed16faa1848a6cbb05d052a1296b0ca76 Mon Sep 17 00:00:00 2001 From: Benjamin Lungwitz <52384612+lungben@users.noreply.github.com> Date: Thu, 2 Jul 2020 20:59:08 +0200 Subject: [PATCH 3/6] Update test/runtests.jl Co-authored-by: Eric Davies --- test/runtests.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 23fb0a55..eb71d884 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -46,11 +46,11 @@ the order of the columns does not matter. Columns in the target DB table, which are not provided by the input `table`, are filled with `null` (provided they are nullable). """ -function load_by_copy!(table, con:: LibPQ.Connection, tablename:: AbstractString) +function load_by_copy!(table, conn::LibPQ.Connection, table_name::AbstractString) iter = CSV.RowWriter(table) column_names = first(iter) - copyin = LibPQ.CopyIn("COPY $tablename ($column_names) FROM STDIN (FORMAT CSV, HEADER);", iter) - execute(con, copyin) + copyin = LibPQ.CopyIn("COPY $table_name ($column_names) FROM STDIN (FORMAT CSV, HEADER);", iter) + execute(conn, copyin) end @testset "LibPQ" begin From 5188864e0b4b25ed9a73c5eacd5c92d1aad89f22 Mon Sep 17 00:00:00 2001 From: Benjamin Lungwitz Date: Tue, 7 Jul 2020 15:59:08 +0200 Subject: [PATCH 4/6] added support of CSV 0.7 for tests --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 68dec1fb..84f68984 100644 --- a/Project.toml +++ b/Project.toml @@ -23,7 +23,7 @@ TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53" [compat] BinaryProvider = "0.5" CEnum = "0.2" -CSV = "0.6.2" +CSV = "0.6.2, 0.7" DataFrames = "0.21" Decimals = "0.4.1" DocStringExtensions = "0.8.0" From 7d904f3b1191c944be2cf3fe61920a28fb14ffa0 Mon Sep 17 00:00:00 2001 From: Benjamin Lungwitz Date: Tue, 7 Jul 2020 16:04:40 +0200 Subject: [PATCH 5/6] close connection and recreate tmp table --- test/runtests.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index eb71d884..e676fd97 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -338,14 +338,16 @@ end table_data = DataFrame(result) @test isequal(table_data, data) close(result) + close(conn) # testing loading to database using CSV.jl row iterator - result = execute( - conn, - "DELETE FROM libpqjl_test;"; - throw_error=true, - ) - close(result) + conn = LibPQ.Connection("dbname=postgres user=$DATABASE_USER") + result = execute(conn, """ + CREATE TEMPORARY TABLE libpqjl_test ( + no_nulls varchar(10) PRIMARY KEY, + yes_nulls varchar(10) + ); + """) result = load_by_copy!(data, conn, "libpqjl_test") @test isopen(result) From bd9c33bbd6bc8bea483a25996b4500b58162d297 Mon Sep 17 00:00:00 2001 From: Benjamin Lungwitz Date: Tue, 7 Jul 2020 16:16:53 +0200 Subject: [PATCH 6/6] moved DataFrame upload to copy section --- docs/src/index.md | 51 +++++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 5a059cef..aa875d53 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -49,26 +49,6 @@ LibPQ.load!( "INSERT INTO libpqjl_test (no_nulls, yes_nulls) VALUES (\$1, \$2);", ) -using DataFrames -no_nulls = map(string, 'a':'z') -yes_nulls = Union{String, Missing}[isodd(Int(c)) ? string(c) : missing for c in 'a':'z'] -data = DataFrame(no_nulls=no_nulls, yes_nulls=yes_nulls) - -execute(conn, "DELETE FROM libpqjl_test;") - -using CSV -""" -Function for upload of a Tables.jl compatible data structure (e.g. DataFrames.jl) into the db. -""" -function load_by_copy!(table, con:: LibPQ.Connection, tablename:: AbstractString) - iter = CSV.RowWriter(table) - column_names = first(iter) - copyin = LibPQ.CopyIn("COPY $tablename ($column_names) FROM STDIN (FORMAT CSV, HEADER);", iter) - execute(con, copyin) -end - -load_by_copy!(data, conn, "libpqjl_test") - close(conn) ``` @@ -97,21 +77,32 @@ An alternative to repeated `INSERT` queries is the PostgreSQL `COPY` query. `LibPQ.CopyIn` makes it easier to stream data to the server using a `COPY FROM STDIN` query. ```julia -using LibPQ, DataFrames +using LibPQ, DataFrames, CSV conn = LibPQ.Connection("dbname=postgres user=$DATABASE_USER") -row_strings = imap(eachrow(df)) do row - if ismissing(row[:yes_nulls]) - "$(row[:no_nulls]),\n" - else - "$(row[:no_nulls]),$(row[:yes_nulls])\n" - end -end +result = execute(conn, """ + CREATE TEMPORARY TABLE libpqjl_test ( + no_nulls varchar(10) PRIMARY KEY, + yes_nulls varchar(10) + ); +""") -copyin = LibPQ.CopyIn("COPY libpqjl_test FROM STDIN (FORMAT CSV);", row_strings) +no_nulls = map(string, 'a':'z') +yes_nulls = Union{String, Missing}[isodd(Int(c)) ? string(c) : missing for c in 'a':'z'] +data = DataFrame(no_nulls=no_nulls, yes_nulls=yes_nulls) -execute(conn, copyin) +""" +Function for upload of a Tables.jl compatible data structure (e.g. DataFrames.jl) into the db. +""" +function load_by_copy!(table, conn:: LibPQ.Connection, tablename:: AbstractString) + iter = CSV.RowWriter(table) + column_names = first(iter) + copyin = LibPQ.CopyIn("COPY $tablename ($column_names) FROM STDIN (FORMAT CSV, HEADER);", iter) + execute(conn, copyin) +end + +load_by_copy!(data, conn, "libpqjl_test") close(conn) ```