Skip to content

Commit

Permalink
Improve BigQuery logging
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Nov 7, 2024
1 parent ec2e7a2 commit d9280cc
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 4 deletions.
3 changes: 2 additions & 1 deletion src/azul/plugins/repository/tdr_anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,8 @@ def convert_column(value):
log.debug('Retrieved %i entities of type %r', len(rows), entity_type)
missing = keys - {row[pk_column] for row in rows}
require(not missing,
f'Required entities not found in {table_name}: {missing}')
f'Found only {len(rows)} out of {len(keys)} expected rows in {table_name}. '
f'Missing entities: {missing}')
return rows
else:
return []
Expand Down
3 changes: 2 additions & 1 deletion src/azul/plugins/repository/tdr_hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,8 @@ def _retrieve_entities(self,
log.debug('Retrieved %i entities of type %r', len(rows), entity_type)
missing = expected - {row[pk_column] for row in rows}
require(not missing,
f'Required entities not found in {table_name}: {missing}')
f'Found only {len(rows)} out of {len(entity_ids)} expected rows in {table_name}. '
f'Missing entities: {missing}')
return rows

def _in(self,
Expand Down
8 changes: 6 additions & 2 deletions src/azul/terra.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
QueryJobConfig,
QueryPriority,
)
from google.cloud.bigquery.table import (
RowIterator,
)
from more_itertools import (
one,
)
Expand Down Expand Up @@ -504,20 +507,21 @@ def run_sql(self, query: str) -> BigQueryRows:
else:
assert False
if log.isEnabledFor(logging.DEBUG):
log.debug('Job info: %s', json.dumps(self._job_info(job)))
log.debug('Job info: %s', json.dumps(self._job_info(job, result)))
return result

def _trunc_query(self, query: str) -> str:
return trunc_ellipses(query, 2048)

def _job_info(self, job: QueryJob) -> JSON:
def _job_info(self, job: QueryJob, result: RowIterator) -> JSON:
# noinspection PyProtectedMember
stats = job._properties['statistics']['query']
if config.debug < 2:
ignore = ('referencedTables', 'statementType', 'queryPlan')
stats = {k: v for k, v in stats.items() if k not in ignore}
return {
'job_id': job.job_id,
'total_rows': result.total_rows,
'stats': stats,
'query': self._trunc_query(job.query)
}
Expand Down

0 comments on commit d9280cc

Please sign in to comment.