Skip to content

Commit

Permalink
Fixing issue with file: URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
kenshaw committed Nov 3, 2023
1 parent 16249ca commit 95e9c5f
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 10 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ additional aliases, and the related Go driver:
| MySQL | `mysql` | `my`, `maria`, `aurora`, `mariadb`, `percona` | [github.com/go-sql-driver/mysql][d-mysql] |
| Microsoft SQL Server | `sqlserver` | `ms`, `mssql`, `azuresql` | [github.com/microsoft/go-mssqldb][d-sqlserver] |
| Oracle Database | `oracle` | `or`, `ora`, `oci`, `oci8`, `odpi`, `odpi-c` | [github.com/sijms/go-ora/v2][d-oracle] |
| SQLite3 | `sqlite3` | `sq`, `file`, `sqlite` | [github.com/mattn/go-sqlite3][d-sqlite3] <sup>[][f-cgo]</sup> |
| SQLite3 | `sqlite3` | `sq`, `sqlite`, `file` | [github.com/mattn/go-sqlite3][d-sqlite3] <sup>[][f-cgo]</sup> |
| CSVQ | `csvq` | `cs`, `csv`, `tsv`, `json` | [github.com/mithrandie/csvq-driver][d-csvq] |
| | | | |
| Alibaba MaxCompute | `maxcompute` | `mc` | [sqlflow.org/gomaxcompute][d-maxcompute] |
Expand All @@ -126,7 +126,7 @@ additional aliases, and the related Go driver:
| Couchbase | `couchbase` | `n1`, `n1ql` | [github.com/couchbase/go_n1ql][d-couchbase] |
| Cznic QL | `ql` | `cznic`, `cznicql` | [modernc.org/ql][d-ql] |
| Databend | `databend` | `dd`, `bend` | [github.com/databendcloud/databend-go][d-databend] |
| DuckDB | `duckdb` | `dk`, `ddb`, `duck` | [github.com/marcboeker/go-duckdb][d-duckdb] <sup>[][f-cgo]</sup> |
| DuckDB | `duckdb` | `dk`, `ddb`, `duck`, `file` | [github.com/marcboeker/go-duckdb][d-duckdb] <sup>[][f-cgo]</sup> |
| Exasol | `exasol` | `ex`, `exa` | [github.com/exasol/exasol-driver-go][d-exasol] |
| Firebird | `firebird` | `fb`, `firebirdsql` | [github.com/nakagami/firebirdsql][d-firebird] |
| FlightSQL | `flightsql` | `fl`, `flight` | [github.com/apache/arrow/go/v12/arrow/flight/flightsql/driver][d-flightsql] |
Expand Down
37 changes: 34 additions & 3 deletions dburl.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package dburl
import (
"database/sql"
"net/url"
"os"
"strings"
)

Expand Down Expand Up @@ -73,10 +74,10 @@ type URL struct {
func Parse(urlstr string) (*URL, error) {
// parse url
v, err := url.Parse(urlstr)
if err != nil {
switch {
case err != nil:
return nil, err
}
if v.Scheme == "" {
case v.Scheme == "":
return nil, ErrInvalidDatabaseScheme
}
// create url
Expand All @@ -97,6 +98,15 @@ func Parse(urlstr string) (*URL, error) {
if !ok {
return nil, ErrUnknownDatabaseScheme
}
// load real scheme for file:
if scheme.Driver == "file" {
typ, err := SchemeType(u.Opaque)
if err == nil {
if s, ok := schemeMap[typ]; ok {
scheme = s
}
}
}
// if scheme does not understand opaque URLs, retry parsing after building
// fully qualified URL
if !scheme.Opaque && u.Opaque != "" {
Expand Down Expand Up @@ -230,6 +240,25 @@ func (u *URL) Normalize(sep, empty string, cut int) string {
return strings.Join(s, sep)
}

// SchemeType returns the scheme type for a file on disk.
func SchemeType(name string) (string, error) {
f, err := os.OpenFile(name, os.O_RDONLY, 0)
if err != nil {
return "", err
}
defer f.Close()
buf := make([]byte, 128)
if _, err := f.Read(buf); err != nil {
return "", err
}
for _, header := range headerTypes {
if header.f(buf) {
return header.driver, nil
}
}
return "", ErrUnknownFileHeader
}

// Error is an error.
type Error string

Expand All @@ -244,6 +273,8 @@ const (
ErrInvalidDatabaseScheme Error = "invalid database scheme"
// ErrUnknownDatabaseScheme is the unknown database type error.
ErrUnknownDatabaseScheme Error = "unknown database scheme"
// ErrUnknownFileHeader is the unknown file header error.
ErrUnknownFileHeader Error = "unknown file header"
// ErrInvalidTransportProtocol is the invalid transport protocol error.
ErrInvalidTransportProtocol Error = "invalid transport protocol"
// ErrRelativePathNotSupported is the relative paths not supported error.
Expand Down
6 changes: 4 additions & 2 deletions dburl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ func TestBadParse(t *testing.T) {
{`file+tcp://`, ErrInvalidTransportProtocol},
{`file://`, ErrMissingPath},
{`ql://`, ErrMissingPath},
{`duckdb://`, ErrMissingPath},
{`mssql+tcp://user:pass@host/dbname`, ErrInvalidTransportProtocol},
{`mssql+aoeu://`, ErrInvalidTransportProtocol},
{`mssql+foobar://`, ErrInvalidTransportProtocol},
{`mssql+unix:/var/run/mssql.sock`, ErrInvalidTransportProtocol},
{`mssql+udp:localhost:155`, ErrInvalidTransportProtocol},
{`adodb+foo+bar://provider/database`, ErrInvalidTransportProtocol},
Expand Down Expand Up @@ -140,7 +141,6 @@ func TestParse(t *testing.T) {
`oo+Postgres+Unicode://user:pass@host:5432/dbname`, `adodb`,
`Provider=MSDASQL.1;Extended Properties="Database=dbname;Driver={Postgres Unicode};PWD=pass;Port=5432;Server=host;UID=user"`, ``,
},
{`file:/path/to/file.sqlite3`, `sqlite3`, `/path/to/file.sqlite3`, ``}, // 33
{`sqlite:///path/to/file.sqlite3`, `sqlite3`, `/path/to/file.sqlite3`, ``},
{`sq://path/to/file.sqlite3`, `sqlite3`, `path/to/file.sqlite3`, ``},
{`sq:path/to/file.sqlite3`, `sqlite3`, `path/to/file.sqlite3`, ``},
Expand Down Expand Up @@ -210,6 +210,8 @@ func TestParse(t *testing.T) {
{`flightsql://user:pass@localhost?timeout=3s&token=foobar&tls=enabled`, `flightsql`, `flightsql://user:pass@localhost?timeout=3s&token=foobar&tls=enabled`, ``},
{`duckdb:/path/to/foo.db?access_mode=read_only&threads=4`, `duckdb`, `/path/to/foo.db?access_mode=read_only&threads=4`, ``},
{`dk:///path/to/foo.db?access_mode=read_only&threads=4`, `duckdb`, `/path/to/foo.db?access_mode=read_only&threads=4`, ``},
{`file:./testdata/test.sqlite3?a=b`, `sqlite3`, `./testdata/test.sqlite3?a=b`, ``},
{`file:./testdata/test.duckdb?a=b`, `duckdb`, `./testdata/test.duckdb?a=b`, ``},
}
for i, test := range tests {
u, err := Parse(test.s)
Expand Down
62 changes: 59 additions & 3 deletions scheme.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package dburl

import (
"bytes"
"fmt"
"regexp"
"sort"
)

Expand Down Expand Up @@ -48,6 +50,12 @@ type Scheme struct {
// BaseSchemes returns the supported base schemes.
func BaseSchemes() []Scheme {
return []Scheme{
{
"file",
GenOpaque, 0, true,
[]string{"file"},
"",
},
// core databases
{
"mysql",
Expand All @@ -71,7 +79,7 @@ func BaseSchemes() []Scheme {
{
"sqlite3",
GenOpaque, 0, true,
[]string{"sqlite", "file"},
[]string{"sqlite"},
"",
},
{
Expand All @@ -88,8 +96,7 @@ func BaseSchemes() []Scheme {
"postgres",
},
{
"memsql",
GenMysql, 0, false, nil, "mysql",
"memsql", GenMysql, 0, false, nil, "mysql",
},
{
"redshift",
Expand Down Expand Up @@ -331,6 +338,8 @@ func init() {
for _, scheme := range schemes {
Register(scheme)
}
RegisterHeaderType("duckdb", isDuckdbHeader)
RegisterHeaderType("sqlite3", isSqlite3Header)
}

// schemeMap is the map of registered schemes.
Expand Down Expand Up @@ -431,6 +440,32 @@ func RegisterAlias(name, alias string) {
registerAlias(name, alias, true)
}

// headerTypes are registered header recognition funcs.
var headerTypes []headerType

// RegisterHeaderType registers a file header recognition func.
func RegisterHeaderType(driver string, f func([]byte) bool) {
headerTypes = append(headerTypes, headerType{
driver: driver,
f: f,
})
}

// headerType wraps a header recognition func.
type headerType struct {
driver string
f func([]byte) bool
}

// HeaderTypes returns the registered header types.
func HeaderTypes() []string {
var v []string
for _, header := range headerTypes {
v = append(v, header.driver)
}
return v
}

// Protocols returns list of all valid protocol aliases for a registered
// [Scheme] name.
func Protocols(name string) []string {
Expand Down Expand Up @@ -474,6 +509,27 @@ func ShortAlias(name string) string {
return schemeMap[name].Aliases[0]
}

// isSqlite3Header returns true when the passed header is empty or starts with
// the SQLite3 header.
//
// See: https://www.sqlite.org/fileformat.html
func isSqlite3Header(buf []byte) bool {
return len(buf) == 0 || bytes.HasPrefix(buf, sqlite3Header)
}

// sqlite3Header is the sqlite3 header.
var sqlite3Header = []byte("SQLite format 3\000")

// isDuckdbHeader returns true when the passed header is a DuckDB header.
//
// See: https://duckdb.org/internals/storage
func isDuckdbHeader(buf []byte) bool {
return duckdbRE.Match(buf)
}

// duckdbRE is the duckdb storage header regexp.
var duckdbRE = regexp.MustCompile(`^.{8}DUCK.{8}`)

// contains determines if v contains s.
func contains(v []string, s string) bool {
for _, z := range v {
Expand Down
Binary file added testdata/test.duckdb
Binary file not shown.
Empty file added testdata/test.duckdb.wal
Empty file.
Binary file added testdata/test.sqlite3
Binary file not shown.

0 comments on commit 95e9c5f

Please sign in to comment.