-
Notifications
You must be signed in to change notification settings - Fork 406
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Peter Ke <[email protected]>
- Loading branch information
1 parent
a999c92
commit 94dcbc1
Showing
8 changed files
with
169 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from __future__ import annotations | ||
|
||
from typing import List | ||
|
||
import pyarrow | ||
|
||
from deltalake._internal import PyQueryBuilder | ||
from deltalake.table import DeltaTable | ||
|
||
|
||
class QueryBuilder: | ||
def __init__(self) -> None: | ||
self._query_builder = PyQueryBuilder() | ||
|
||
def register(self, table_name: str, delta_table: DeltaTable) -> QueryBuilder: | ||
"""Add a table to the query builder.""" | ||
self._query_builder.register( | ||
table_name=table_name, | ||
delta_table=delta_table._table, | ||
) | ||
return self | ||
|
||
def execute(self, sql: str) -> List[pyarrow.RecordBatch]: | ||
"""Execute the query and return a list of record batches.""" | ||
return self._query_builder.execute(sql) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
use std::sync::Arc; | ||
|
||
use deltalake::{ | ||
arrow::pyarrow::ToPyArrow, | ||
datafusion::prelude::SessionContext, | ||
delta_datafusion::{DeltaScanConfigBuilder, DeltaSessionConfig, DeltaTableProvider}, | ||
}; | ||
use pyo3::prelude::*; | ||
|
||
use crate::{error::PythonError, utils::rt, RawDeltaTable}; | ||
|
||
#[pyclass(module = "deltalake._internal")] | ||
pub(crate) struct PyQueryBuilder { | ||
_ctx: SessionContext, | ||
} | ||
|
||
#[pymethods] | ||
impl PyQueryBuilder { | ||
#[new] | ||
pub fn new() -> Self { | ||
let config = DeltaSessionConfig::default().into(); | ||
let _ctx = SessionContext::new_with_config(config); | ||
|
||
PyQueryBuilder { _ctx } | ||
} | ||
|
||
pub fn register(&self, table_name: &str, delta_table: &RawDeltaTable) -> PyResult<()> { | ||
let snapshot = delta_table._table.snapshot().map_err(PythonError::from)?; | ||
let log_store = delta_table._table.log_store(); | ||
|
||
let scan_config = DeltaScanConfigBuilder::default() | ||
.with_parquet_pushdown(false) | ||
.build(snapshot) | ||
.map_err(PythonError::from)?; | ||
|
||
let provider = Arc::new( | ||
DeltaTableProvider::try_new(snapshot.clone(), log_store, scan_config) | ||
.map_err(PythonError::from)?, | ||
); | ||
|
||
self._ctx | ||
.register_table(table_name, provider) | ||
.map_err(PythonError::from)?; | ||
|
||
Ok(()) | ||
} | ||
|
||
pub fn execute(&self, py: Python, sql: &str) -> PyResult<PyObject> { | ||
let batches = py.allow_threads(|| { | ||
rt().block_on(async { | ||
let df = self._ctx.sql(sql).await?; | ||
df.collect().await | ||
}) | ||
.map_err(PythonError::from) | ||
})?; | ||
|
||
batches.to_pyarrow(py) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters