Skip to content

Commit

Permalink
Add read-only OGR ADBC (Arrow Database Connectivity) driver
Browse files Browse the repository at this point in the history
Cf https://arrow.apache.org/adbc/current/index.html for what ADBC is.

Depends on the adbc-driver-manager library.

The driver is read-only, and there is no support for spatial data currently.

Beyond official ADBC drivers (adbc-driver-sqlite,
adbc-driver-postgresql, adbc-driver-snowflake, adbc-driver-bigquery,
etc.), it can also be used to read Parquet or DuckDB using libduckdb, if
libduckdb is installed and can be loaded through dynamic shared library opening.
  • Loading branch information
rouault committed Oct 13, 2024
1 parent 9d058a9 commit 57e6e32
Show file tree
Hide file tree
Showing 20 changed files with 1,444 additions and 4 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/ubuntu_24.04/Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,19 @@ RUN python3 -m pip install -U --break-system-packages -r /tmp/requirements.txt
# cfchecker requires udunits2
RUN apt-get install -y --allow-unauthenticated libudunits2-0 libudunits2-data
RUN python3 -m pip install --break-system-packages cfchecker

# Manually install ADBC packages from Ubuntu 22.04 as there are no 24.04 packages at time of writing.
RUN wget -q https://apache.jfrog.io/artifactory/arrow/ubuntu/pool/jammy/main/a/apache-arrow-adbc/libadbc-driver-manager102_14-1_amd64.deb \
&& wget -q https://apache.jfrog.io/artifactory/arrow/ubuntu/pool/jammy/main/a/apache-arrow-adbc/libadbc-driver-manager-dev_14-1_amd64.deb \
&& wget -q https://apache.jfrog.io/artifactory/arrow/ubuntu/pool/jammy/main/a/apache-arrow-adbc/libadbc-driver-sqlite102_14-1_amd64.deb \
&& wget -q https://apache.jfrog.io/artifactory/arrow/ubuntu/pool/jammy/main/a/apache-arrow-adbc/libadbc-driver-sqlite-dev_14-1_amd64.deb \
&& dpkg -i libadbc-driver-manager102_14-1_amd64.deb \
&& dpkg -i libadbc-driver-manager-dev_14-1_amd64.deb \
&& dpkg -i libadbc-driver-sqlite102_14-1_amd64.deb \
&& dpkg -i libadbc-driver-sqlite-dev_14-1_amd64.deb

# Install libduckdb
RUN wget -q https://github.com/duckdb/duckdb/releases/download/v1.1.1/libduckdb-linux-amd64.zip \
&& unzip libduckdb-linux-amd64.zip libduckdb.so \
&& mv libduckdb.so /usr/lib/x86_64-linux-gnu \
&& rm -f libduckdb-linux-amd64.zip
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ Supported Formats: (ro:read-only, rw:read-write, +:update, v:virtual-I/O s:subda
JSONFG -vector- (rw+v): OGC Features and Geometries JSON (*.json)
MiraMonVector -vector- (rw+v): MiraMon Vectors (.pol, .arc, .pnt) (*.pol, *.arc, *.pnt)
XODR -vector- (ro): OpenDRIVE - Open Dynamic Road Information for Vehicle Environment (*.xodr)
ADBC -vector- (ro): Arrow Database Connectivity
TIGER -vector- (rov): U.S. Census TIGER/Line
AVCBin -vector- (rov): Arc/Info Binary Coverage
AVCE00 -vector- (rov): Arc/Info E00 (ASCII) Coverage (*.e00)
Expand Down
8 changes: 4 additions & 4 deletions apps/test_ogrsf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1655,19 +1655,20 @@ static int TestOGRLayerFeatureCount(GDALDataset *poDS, OGRLayer *poLayer,
}
delete poFeat;

const auto nFCEndOfIter = LOG_ACTION(poLayer->GetFeatureCount());
if (nFC != nClaimedFC)
{
bRet = FALSE;
printf("ERROR: Claimed feature count " CPL_FRMT_GIB
" doesn't match actual, " CPL_FRMT_GIB ".\n",
nClaimedFC, nFC);
}
else if (nFC != LOG_ACTION(poLayer->GetFeatureCount()))
else if (nFC != nFCEndOfIter)
{
bRet = FALSE;
printf("ERROR: Feature count at end of layer, " CPL_FRMT_GIB
", differs from at start, " CPL_FRMT_GIB ".\n",
poLayer->GetFeatureCount(), nFC);
nFCEndOfIter, nFC);
}
else if (bVerbose)
printf("INFO: Feature count verified.\n");
Expand Down Expand Up @@ -4187,8 +4188,7 @@ static int TestLayerGetArrowStream(OGRLayer *poLayer)
{
if (array.length != 0)
{
bRet = false;
printf("ERROR: get_next() return an array with length != 0 "
printf("WARNING: get_next() return an array with length != 0 "
"after end of iteration\n");
}
if (array.release)
Expand Down
266 changes: 266 additions & 0 deletions autotest/ogr/ogr_adbc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
#!/usr/bin/env pytest
###############################################################################
# $Id$
#
# Project: GDAL/OGR Test Suite
# Purpose: Test read functionality for OGR ADBC driver.
# Author: Even Rouault <even dot rouault at spatialys.com>
#
###############################################################################
# Copyright (c) 2024, Even Rouault <even dot rouault at spatialys.com>
#
# SPDX-License-Identifier: MIT
###############################################################################

import gdaltest
import pytest

from osgeo import gdal, ogr

pytestmark = pytest.mark.require_driver("ADBC")

###############################################################################


def _has_sqlite_driver():
import ctypes

try:
return ctypes.cdll.LoadLibrary("libadbc_driver_sqlite.so") is not None
except Exception:
return False


###############################################################################


def test_ogr_adbc_driver_open_option():

if not _has_sqlite_driver():
pytest.skip("adbc_driver_sqlite missing")

with gdal.OpenEx(
"ADBC:", gdal.OF_VECTOR, open_options=["ADBC_DRIVER=adbc_driver_sqlite"]
) as ds:
assert ds.GetLayerCount() == 0
with ds.ExecuteSQL("SELECT sqlite_version()") as sql_lyr:
f = sql_lyr.GetNextFeature()
assert f
assert f.GetField(0).startswith("3.")


###############################################################################


def test_ogr_adbc_invalid_driver():

with pytest.raises(Exception):
gdal.OpenEx(
"ADBC:", gdal.OF_VECTOR, open_options=["ADBC_DRIVER=invalid_driver"]
)


###############################################################################


def test_ogr_adbc_invalid_dataset():

if not _has_sqlite_driver():
pytest.skip("adbc_driver_sqlite missing")

with pytest.raises(Exception):
gdal.OpenEx(
"ADBC:/i/do/not/exist.db",
gdal.OF_VECTOR,
open_options=["ADBC_DRIVER=adbc_driver_sqlite"],
)


###############################################################################


def test_ogr_adbc_sqlite3():

if not _has_sqlite_driver():
pytest.skip("adbc_driver_sqlite missing")

with gdal.OpenEx(
"data/sqlite/poly_spatialite.sqlite", gdal.OF_VECTOR, allowed_drivers=["ADBC"]
) as ds:
assert ds.GetLayerCount() == 13
assert ds.GetLayer(-1) is None
assert ds.GetLayer(ds.GetLayerCount()) is None
lyr = ds.GetLayer(0)
assert lyr.TestCapability(ogr.OLCFastGetArrowStream)


###############################################################################


def test_ogr_adbc_sql_open_option():

if not _has_sqlite_driver():
pytest.skip("adbc_driver_sqlite missing")

with gdal.OpenEx(
"ADBC:data/sqlite/poly_spatialite.sqlite",
gdal.OF_VECTOR,
open_options=["SQL=SELECT * FROM poly"],
) as ds:
assert ds.GetLayerCount() == 1
lyr = ds.GetLayer(0)
assert lyr.GetFeatureCount() == 10


###############################################################################


def test_ogr_adbc_invalid_sql():

if not _has_sqlite_driver():
pytest.skip("adbc_driver_sqlite missing")

with pytest.raises(Exception):
gdal.OpenEx(
"ADBC:data/sqlite/poly_spatialite.sqlite",
gdal.OF_VECTOR,
open_options=["SQL=SELECT * FROM"],
)


###############################################################################


def test_ogr_adbc_generic_open_option():

if not _has_sqlite_driver():
pytest.skip("adbc_driver_sqlite missing")

with gdal.OpenEx(
"ADBC:",
gdal.OF_VECTOR,
open_options=[
"ADBC_DRIVER=adbc_driver_sqlite",
"ADBC_OPTION_uri=data/sqlite/poly_spatialite.sqlite",
],
) as ds:
assert ds.GetLayerCount() == 13


###############################################################################


def test_ogr_adbc_execute_sql():

if not _has_sqlite_driver():
pytest.skip("adbc_driver_sqlite missing")

with gdal.OpenEx(
"data/sqlite/poly_spatialite.sqlite",
gdal.OF_VECTOR,
open_options=["SQL="],
allowed_drivers=["ADBC"],
) as ds:
assert ds.GetLayerCount() == 0
with ds.ExecuteSQL("SELECT * FROM poly") as lyr:
assert lyr.GetFeatureCount() == 10


###############################################################################


def _has_libduckdb():
import ctypes

try:
return ctypes.cdll.LoadLibrary("libduckdb.so") is not None
except Exception:
return False


###############################################################################


def test_ogr_adbc_duckdb_parquet():

if not _has_libduckdb():
pytest.skip("libduckdb.so missing")

with gdal.OpenEx(
"data/parquet/partitioned_flat/part.0.parquet",
gdal.OF_VECTOR,
allowed_drivers=["ADBC"],
) as ds:
assert ds.GetLayerCount() == 1
lyr = ds.GetLayer(0)
assert lyr.TestCapability(ogr.OLCFastFeatureCount)
assert lyr.GetFeatureCount() == 3


###############################################################################
# Run test_ogrsf


def test_ogr_adbc_test_ogrsf_sqlite3():

if not _has_sqlite_driver():
pytest.skip("adbc_driver_sqlite missing")

import test_cli_utilities

if test_cli_utilities.get_test_ogrsf_path() is None:
pytest.skip()

ret = gdaltest.runexternal(
test_cli_utilities.get_test_ogrsf_path()
+ " -ro ADBC:data/sqlite/first_geometry_null.db"
)

assert "INFO" in ret
assert "ERROR" not in ret


###############################################################################
# Run test_ogrsf


def test_ogr_adbc_test_ogrsf_parquet():

if not _has_libduckdb():
pytest.skip("libduckdb.so missing")

import test_cli_utilities

if test_cli_utilities.get_test_ogrsf_path() is None:
pytest.skip()

ret = gdaltest.runexternal(
test_cli_utilities.get_test_ogrsf_path()
+ " -ro ADBC:data/parquet/partitioned_flat/part.0.parquet"
)

assert "INFO" in ret
assert "ERROR" not in ret


###############################################################################
# Run test_ogrsf


def test_ogr_adbc_test_ogrsf_parquet_star():

if not _has_libduckdb():
pytest.skip("libduckdb.so missing")

import test_cli_utilities

if test_cli_utilities.get_test_ogrsf_path() is None:
pytest.skip()

ret = gdaltest.runexternal(
test_cli_utilities.get_test_ogrsf_path()
+ " -ro ADBC:data/parquet/partitioned_flat/*.parquet"
)

assert "INFO" in ret
assert "ERROR" not in ret
2 changes: 2 additions & 0 deletions cmake/helpers/CheckDependentLibraries.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,8 @@ include(CheckDependentLibrariesArrowParquet)

gdal_check_package(OpenDrive "Enable libOpenDRIVE" CONFIG CAN_DISABLE)

gdal_check_package(AdbcDriverManager "Enable ADBC" CONFIG CAN_DISABLE)

# bindings

# finding python in top of project because of common for autotest and bindings
Expand Down
Loading

0 comments on commit 57e6e32

Please sign in to comment.