diff --git a/src/azul/plugins/repository/tdr_anvil/__init__.py b/src/azul/plugins/repository/tdr_anvil/__init__.py index 1081c39c63..3923b4ca23 100644 --- a/src/azul/plugins/repository/tdr_anvil/__init__.py +++ b/src/azul/plugins/repository/tdr_anvil/__init__.py @@ -747,7 +747,7 @@ def _columns(self, entity_type: EntityType) -> set[str]: def import_tables(self, source: TDRSourceRef): """ - Import tables for an AnVIL snapshot into BigQuery via TDR's parquet + Import tables for an AnVIL snapshot into BigQuery via TDR's Parquet export API. Only tables defined in the AnVIL schema will be imported. Currently, only GS-backed snapshots are supported. """ @@ -758,7 +758,7 @@ def import_tables(self, source: TDRSourceRef): urls_by_table = self.tdr.export_parquet_urls(source.id) reject(urls_by_table is None, - 'No parquet access information is available for snapshot %r.', source.spec) + 'No Parquet access information is available for snapshot %r.', source.spec) for table in anvil_schema['tables']: table_name = table['name'] diff --git a/src/azul/terra.py b/src/azul/terra.py index 80417af6e1..4418e5e48f 100644 --- a/src/azul/terra.py +++ b/src/azul/terra.py @@ -700,7 +700,7 @@ def create_table(self, :param table_name: Unqualified name of the new table - :param import_urls: URLs of parquet file(s) to populate the table. These + :param import_urls: URLs of Parquet file(s) to populate the table. These must be `gs://` URLS and the GCS bucket's region must be compatible with the target dataset's. See https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-parquet#limitations