Skip to content

Commit

Permalink
Make bundle-counting method public
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc committed Nov 7, 2024
1 parent 6c1c601 commit 96747c3
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 14 deletions.
2 changes: 1 addition & 1 deletion scripts/post_deploy_tdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def verify_source(self,
require(prefix.common == '', source_spec)
self.tdr.check_bigquery_access(source_spec)
else:
subgraph_count = len(plugin.list_bundles(ref, prefix.common))
subgraph_count = plugin.count_bundles(ref.spec)
require(subgraph_count > 0, 'Common prefix is too long', ref.spec)
require(subgraph_count <= 512, 'Common prefix is too short', ref.spec)

Expand Down
7 changes: 4 additions & 3 deletions src/azul/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,9 +615,10 @@ def _lookup_source_id(self, spec: SOURCE_SPEC) -> str:
raise NotImplementedError

@abstractmethod
def _count_subgraphs(self, source: SOURCE_SPEC) -> int:
def count_bundles(self, source: SOURCE_SPEC) -> int:
"""
The total number of subgraphs in the given source, ignoring its prefix.
The total number of subgraphs in the given source. The source's prefix
may be None.
"""
raise NotImplementedError

Expand All @@ -631,7 +632,7 @@ def partition_source(self,
should be appropriate for indexing in the given catalog.
"""
if source.spec.prefix is None:
count = self._count_subgraphs(source.spec)
count = self.count_bundles(source.spec)
is_main = config.deployment.is_main
is_it = catalog in config.integration_test_catalogs
# We use the "lesser" heuristic during IT to avoid indexing an
Expand Down
11 changes: 9 additions & 2 deletions src/azul/plugins/repository/canned/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
from furl import (
furl,
)
from more_itertools import (
ilen,
)

from azul import (
CatalogName,
Expand Down Expand Up @@ -163,9 +166,13 @@ def staging_area(self, url: str) -> StagingArea:
ref)
return factory.load_staging_area(path)

def _count_subgraphs(self, source: SOURCE_SPEC) -> int:
def count_bundles(self, source: SOURCE_SPEC) -> int:
staging_area = self.staging_area(source.spec.name)
return len(staging_area.links)
return ilen(
links_id
for links_id in staging_area.links
if source.prefix is None or links_id.startswith(source.prefix.common)
)

def list_bundles(self,
source: CannedSourceRef,
Expand Down
2 changes: 1 addition & 1 deletion src/azul/plugins/repository/dss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def sources(self) -> AbstractSet[SimpleSourceSpec]:
def _lookup_source_id(self, spec: SimpleSourceSpec) -> str:
return DSSSourceRef.id_from_spec(spec)

def _count_subgraphs(self, source: SimpleSourceSpec) -> NoReturn:
def count_bundles(self, source: SimpleSourceSpec) -> NoReturn:
assert False, 'DSS is EOL'

def list_sources(self,
Expand Down
6 changes: 4 additions & 2 deletions src/azul/plugins/repository/tdr_anvil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,14 +169,16 @@ def _version(self):
datarepo_row_uuid_version = 4
bundle_uuid_version = 10

def _count_subgraphs(self, source: TDRSourceSpec) -> int:
def count_bundles(self, source: TDRSourceSpec) -> int:
prefix = '' if source.prefix is None else source.prefix.common
rows = self._run_sql(f'''
SELECT COUNT(*) AS count
FROM {backtick(self._full_table_name(source, BundleType.primary.value))}
WHERE STARTS_WITH(datarepo_row_id, {prefix!r})
UNION ALL
SELECT COUNT(*) AS count
FROM {backtick(self._full_table_name(source, BundleType.supplementary.value))}
WHERE is_supplementary
WHERE is_supplementary AND STARTS_WITH(datarepo_row_id, {prefix!r})
''')
return sum(row['count'] for row in rows)

Expand Down
13 changes: 8 additions & 5 deletions src/azul/plugins/repository/tdr_hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,11 +278,14 @@ def _parse_drs_uri(self,

class Plugin(TDRPlugin[TDRHCABundle, TDRSourceSpec, TDRSourceRef, TDRBundleFQID]):

def _count_subgraphs(self, source: TDRSourceSpec) -> int:
rows = self._run_sql(f'''
SELECT COUNT(*) AS count
FROM {backtick(self._full_table_name(source, 'links'))}
''')
def count_bundles(self, source: TDRSourceSpec) -> int:
prefix = '' if source.prefix is None else source.prefix.common
query = f'''
SELECT COUNT(*) AS count
FROM {backtick(self._full_table_name(source, 'links'))}
WHERE STARTS_WITH(datarepo_row_id, {prefix!r})
'''
rows = self._run_sql(query)
return one(rows)['count']

def _list_bundles(self,
Expand Down

0 comments on commit 96747c3

Please sign in to comment.