From 95e9c5fa8dac9fd5de8abe15bc596008e8ab1046 Mon Sep 17 00:00:00 2001 From: Kenneth Shaw Date: Fri, 3 Nov 2023 10:47:58 +0700 Subject: [PATCH] Fixing issue with file: URLs --- README.md | 4 +-- dburl.go | 37 +++++++++++++++++++++-- dburl_test.go | 6 ++-- scheme.go | 62 +++++++++++++++++++++++++++++++++++++-- testdata/test.duckdb | Bin 0 -> 12288 bytes testdata/test.duckdb.wal | 0 testdata/test.sqlite3 | Bin 0 -> 8192 bytes 7 files changed, 99 insertions(+), 10 deletions(-) create mode 100644 testdata/test.duckdb create mode 100644 testdata/test.duckdb.wal create mode 100644 testdata/test.sqlite3 diff --git a/README.md b/README.md index 30269d1..1a7a69f 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ additional aliases, and the related Go driver: | MySQL | `mysql` | `my`, `maria`, `aurora`, `mariadb`, `percona` | [github.com/go-sql-driver/mysql][d-mysql] | | Microsoft SQL Server | `sqlserver` | `ms`, `mssql`, `azuresql` | [github.com/microsoft/go-mssqldb][d-sqlserver] | | Oracle Database | `oracle` | `or`, `ora`, `oci`, `oci8`, `odpi`, `odpi-c` | [github.com/sijms/go-ora/v2][d-oracle] | -| SQLite3 | `sqlite3` | `sq`, `file`, `sqlite` | [github.com/mattn/go-sqlite3][d-sqlite3] [†][f-cgo] | +| SQLite3 | `sqlite3` | `sq`, `sqlite`, `file` | [github.com/mattn/go-sqlite3][d-sqlite3] [†][f-cgo] | | CSVQ | `csvq` | `cs`, `csv`, `tsv`, `json` | [github.com/mithrandie/csvq-driver][d-csvq] | | | | | | | Alibaba MaxCompute | `maxcompute` | `mc` | [sqlflow.org/gomaxcompute][d-maxcompute] | @@ -126,7 +126,7 @@ additional aliases, and the related Go driver: | Couchbase | `couchbase` | `n1`, `n1ql` | [github.com/couchbase/go_n1ql][d-couchbase] | | Cznic QL | `ql` | `cznic`, `cznicql` | [modernc.org/ql][d-ql] | | Databend | `databend` | `dd`, `bend` | [github.com/databendcloud/databend-go][d-databend] | -| DuckDB | `duckdb` | `dk`, `ddb`, `duck` | [github.com/marcboeker/go-duckdb][d-duckdb] [†][f-cgo] | +| DuckDB | `duckdb` | `dk`, `ddb`, `duck`, `file` | [github.com/marcboeker/go-duckdb][d-duckdb] [†][f-cgo] | | Exasol | `exasol` | `ex`, `exa` | [github.com/exasol/exasol-driver-go][d-exasol] | | Firebird | `firebird` | `fb`, `firebirdsql` | [github.com/nakagami/firebirdsql][d-firebird] | | FlightSQL | `flightsql` | `fl`, `flight` | [github.com/apache/arrow/go/v12/arrow/flight/flightsql/driver][d-flightsql] | diff --git a/dburl.go b/dburl.go index 7983009..f6e13b8 100644 --- a/dburl.go +++ b/dburl.go @@ -12,6 +12,7 @@ package dburl import ( "database/sql" "net/url" + "os" "strings" ) @@ -73,10 +74,10 @@ type URL struct { func Parse(urlstr string) (*URL, error) { // parse url v, err := url.Parse(urlstr) - if err != nil { + switch { + case err != nil: return nil, err - } - if v.Scheme == "" { + case v.Scheme == "": return nil, ErrInvalidDatabaseScheme } // create url @@ -97,6 +98,15 @@ func Parse(urlstr string) (*URL, error) { if !ok { return nil, ErrUnknownDatabaseScheme } + // load real scheme for file: + if scheme.Driver == "file" { + typ, err := SchemeType(u.Opaque) + if err == nil { + if s, ok := schemeMap[typ]; ok { + scheme = s + } + } + } // if scheme does not understand opaque URLs, retry parsing after building // fully qualified URL if !scheme.Opaque && u.Opaque != "" { @@ -230,6 +240,25 @@ func (u *URL) Normalize(sep, empty string, cut int) string { return strings.Join(s, sep) } +// SchemeType returns the scheme type for a file on disk. +func SchemeType(name string) (string, error) { + f, err := os.OpenFile(name, os.O_RDONLY, 0) + if err != nil { + return "", err + } + defer f.Close() + buf := make([]byte, 128) + if _, err := f.Read(buf); err != nil { + return "", err + } + for _, header := range headerTypes { + if header.f(buf) { + return header.driver, nil + } + } + return "", ErrUnknownFileHeader +} + // Error is an error. type Error string @@ -244,6 +273,8 @@ const ( ErrInvalidDatabaseScheme Error = "invalid database scheme" // ErrUnknownDatabaseScheme is the unknown database type error. ErrUnknownDatabaseScheme Error = "unknown database scheme" + // ErrUnknownFileHeader is the unknown file header error. + ErrUnknownFileHeader Error = "unknown file header" // ErrInvalidTransportProtocol is the invalid transport protocol error. ErrInvalidTransportProtocol Error = "invalid transport protocol" // ErrRelativePathNotSupported is the relative paths not supported error. diff --git a/dburl_test.go b/dburl_test.go index 1bd0117..56c908b 100644 --- a/dburl_test.go +++ b/dburl_test.go @@ -43,8 +43,9 @@ func TestBadParse(t *testing.T) { {`file+tcp://`, ErrInvalidTransportProtocol}, {`file://`, ErrMissingPath}, {`ql://`, ErrMissingPath}, + {`duckdb://`, ErrMissingPath}, {`mssql+tcp://user:pass@host/dbname`, ErrInvalidTransportProtocol}, - {`mssql+aoeu://`, ErrInvalidTransportProtocol}, + {`mssql+foobar://`, ErrInvalidTransportProtocol}, {`mssql+unix:/var/run/mssql.sock`, ErrInvalidTransportProtocol}, {`mssql+udp:localhost:155`, ErrInvalidTransportProtocol}, {`adodb+foo+bar://provider/database`, ErrInvalidTransportProtocol}, @@ -140,7 +141,6 @@ func TestParse(t *testing.T) { `oo+Postgres+Unicode://user:pass@host:5432/dbname`, `adodb`, `Provider=MSDASQL.1;Extended Properties="Database=dbname;Driver={Postgres Unicode};PWD=pass;Port=5432;Server=host;UID=user"`, ``, }, - {`file:/path/to/file.sqlite3`, `sqlite3`, `/path/to/file.sqlite3`, ``}, // 33 {`sqlite:///path/to/file.sqlite3`, `sqlite3`, `/path/to/file.sqlite3`, ``}, {`sq://path/to/file.sqlite3`, `sqlite3`, `path/to/file.sqlite3`, ``}, {`sq:path/to/file.sqlite3`, `sqlite3`, `path/to/file.sqlite3`, ``}, @@ -210,6 +210,8 @@ func TestParse(t *testing.T) { {`flightsql://user:pass@localhost?timeout=3s&token=foobar&tls=enabled`, `flightsql`, `flightsql://user:pass@localhost?timeout=3s&token=foobar&tls=enabled`, ``}, {`duckdb:/path/to/foo.db?access_mode=read_only&threads=4`, `duckdb`, `/path/to/foo.db?access_mode=read_only&threads=4`, ``}, {`dk:///path/to/foo.db?access_mode=read_only&threads=4`, `duckdb`, `/path/to/foo.db?access_mode=read_only&threads=4`, ``}, + {`file:./testdata/test.sqlite3?a=b`, `sqlite3`, `./testdata/test.sqlite3?a=b`, ``}, + {`file:./testdata/test.duckdb?a=b`, `duckdb`, `./testdata/test.duckdb?a=b`, ``}, } for i, test := range tests { u, err := Parse(test.s) diff --git a/scheme.go b/scheme.go index 1702048..b4dc024 100644 --- a/scheme.go +++ b/scheme.go @@ -1,7 +1,9 @@ package dburl import ( + "bytes" "fmt" + "regexp" "sort" ) @@ -48,6 +50,12 @@ type Scheme struct { // BaseSchemes returns the supported base schemes. func BaseSchemes() []Scheme { return []Scheme{ + { + "file", + GenOpaque, 0, true, + []string{"file"}, + "", + }, // core databases { "mysql", @@ -71,7 +79,7 @@ func BaseSchemes() []Scheme { { "sqlite3", GenOpaque, 0, true, - []string{"sqlite", "file"}, + []string{"sqlite"}, "", }, { @@ -88,8 +96,7 @@ func BaseSchemes() []Scheme { "postgres", }, { - "memsql", - GenMysql, 0, false, nil, "mysql", + "memsql", GenMysql, 0, false, nil, "mysql", }, { "redshift", @@ -331,6 +338,8 @@ func init() { for _, scheme := range schemes { Register(scheme) } + RegisterHeaderType("duckdb", isDuckdbHeader) + RegisterHeaderType("sqlite3", isSqlite3Header) } // schemeMap is the map of registered schemes. @@ -431,6 +440,32 @@ func RegisterAlias(name, alias string) { registerAlias(name, alias, true) } +// headerTypes are registered header recognition funcs. +var headerTypes []headerType + +// RegisterHeaderType registers a file header recognition func. +func RegisterHeaderType(driver string, f func([]byte) bool) { + headerTypes = append(headerTypes, headerType{ + driver: driver, + f: f, + }) +} + +// headerType wraps a header recognition func. +type headerType struct { + driver string + f func([]byte) bool +} + +// HeaderTypes returns the registered header types. +func HeaderTypes() []string { + var v []string + for _, header := range headerTypes { + v = append(v, header.driver) + } + return v +} + // Protocols returns list of all valid protocol aliases for a registered // [Scheme] name. func Protocols(name string) []string { @@ -474,6 +509,27 @@ func ShortAlias(name string) string { return schemeMap[name].Aliases[0] } +// isSqlite3Header returns true when the passed header is empty or starts with +// the SQLite3 header. +// +// See: https://www.sqlite.org/fileformat.html +func isSqlite3Header(buf []byte) bool { + return len(buf) == 0 || bytes.HasPrefix(buf, sqlite3Header) +} + +// sqlite3Header is the sqlite3 header. +var sqlite3Header = []byte("SQLite format 3\000") + +// isDuckdbHeader returns true when the passed header is a DuckDB header. +// +// See: https://duckdb.org/internals/storage +func isDuckdbHeader(buf []byte) bool { + return duckdbRE.Match(buf) +} + +// duckdbRE is the duckdb storage header regexp. +var duckdbRE = regexp.MustCompile(`^.{8}DUCK.{8}`) + // contains determines if v contains s. func contains(v []string, s string) bool { for _, z := range v { diff --git a/testdata/test.duckdb b/testdata/test.duckdb new file mode 100644 index 0000000000000000000000000000000000000000..778b0ff5122e08831b03a5f38dbca1c7f45624af GIT binary patch literal 12288 zcmeI#u?mAg3;<9UKf$%D+i^lE?N+dxI{F0;cJ1O1`eE&Sg@bkx4qmp9K*GzEwVlT2 z>9s%Rez;e^#TLUYbot(M?3Qc1+mzY};ssn~x^2|GK!g<(MUXkDC-5w0i>+;}