Skip to content

Commit

Permalink
[a r] Support for HCA bionetworks and atlases (#5519)
Browse files Browse the repository at this point in the history
  • Loading branch information
nadove-ucsc authored and achave11-ucsc committed Feb 14, 2024
1 parent 785cbbe commit 5b0e0fe
Show file tree
Hide file tree
Showing 15 changed files with 674 additions and 17 deletions.
2 changes: 1 addition & 1 deletion lambdas/service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@
# changes and reset the minor version to zero. Otherwise, increment only
# the minor version for backwards compatible changes. A backwards
# compatible change is one that does not require updates to clients.
'version': '3.4'
'version': '3.5'
},
'tags': [
{
Expand Down
491 changes: 482 additions & 9 deletions lambdas/service/openapi.json

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions src/azul/plugins/metadata/hca/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,10 @@ def _field_mapping(self) -> MetadataPlugin._FieldMapping:
'project_title': 'projectTitle',
'publication_titles': 'publicationTitle',
'accessions': 'accessions',
'estimated_cell_count': 'projectEstimatedCellCount'
'estimated_cell_count': 'projectEstimatedCellCount',
'is_tissue_atlas_project': 'isTissueAtlasProject',
'tissue_atlas': 'tissueAtlas',
'bionetwork_name': 'bionetworkName'
},
'sequencing_protocols': {
'instrument_manufacturer_model': 'instrumentManufacturerModel',
Expand Down Expand Up @@ -297,7 +300,10 @@ def facets(self) -> Sequence[str]:
'projectDescription',
'institution',
'contactName',
'publicationTitle'
'publicationTitle',
'isTissueAtlasProject',
'tissueAtlas',
'bionetworkName'
]

@property
Expand Down
3 changes: 2 additions & 1 deletion src/azul/plugins/metadata/hca/indexer/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ def _accumulator(self, field) -> Optional[Accumulator]:
'contact_names',
'contributors',
'publications',
'accessions'):
'accessions',
'tissue_atlas'):
return None
elif field == 'estimated_cell_count':
return MaxAccumulator()
Expand Down
15 changes: 15 additions & 0 deletions src/azul/plugins/metadata/hca/indexer/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,8 @@ def api_schema(self) -> JSON:

accession: Nested = Nested(namespace=null_str, accession=null_str)

tissue_atlas: Nested = Nested(atlas=null_str, version=null_str)

age_range = ClosedRange(pass_thru_float)


Expand Down Expand Up @@ -658,6 +660,12 @@ def _accession(self, p: api.Accession):
'accession': p.accession
}

def _tissue_atlas(self, b: api.Bionetwork):
return {
'atlas': b.hca_tissue_atlas,
'version': b.hca_tissue_atlas_version
}

@classmethod
def _project_types(cls) -> FieldTypes:
return {
Expand All @@ -674,6 +682,9 @@ def _project_types(cls) -> FieldTypes:
'supplementary_links': [null_str],
'_type': null_str,
'accessions': [accession],
'is_tissue_atlas_project': null_bool,
'tissue_atlas': [tissue_atlas],
'bionetwork_name': [null_str],
'estimated_cell_count': null_int
}

Expand Down Expand Up @@ -717,6 +728,10 @@ def _project(self, project: api.Project) -> MutableJSON:
'supplementary_links': sorted(project.supplementary_links),
'_type': 'project',
'accessions': list(map(self._accession, project.accessions)),
'is_tissue_atlas_project': any(bionetwork.atlas_project
for bionetwork in project.bionetworks),
'tissue_atlas': list(map(self._tissue_atlas, project.bionetworks)),
'bionetwork_name': sorted(bionetwork.name for bionetwork in project.bionetworks),
'estimated_cell_count': project.estimated_cell_count
}

Expand Down
3 changes: 3 additions & 0 deletions src/azul/plugins/metadata/hca/service/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,9 @@ def make_projects(self, entry) -> MutableJSONs:
'projectShortname': project['project_short_name'],
'laboratory': sorted(set(project.get('laboratory', [None]))),
'estimatedCellCount': project['estimated_cell_count'],
'isTissueAtlasProject': project['is_tissue_atlas_project'],
'tissueAtlas': project.get('tissue_atlas'),
'bionetworkName': project['bionetwork_name']
}
if self.entity_type == 'projects':
translated_project['projectDescription'] = project.get('project_description', [])
Expand Down
13 changes: 13 additions & 0 deletions src/humancellatlas/data/metadata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,15 @@ class Accession:
accession: str


@dataclass(eq=True, frozen=True)
class Bionetwork:
name: str
atlas_project: Optional[bool] = None
hca_tissue_atlas: Optional[str] = None
hca_tissue_atlas_version: Optional[str] = None
schema_version: Optional[str] = None


@dataclass(init=False)
class Project(Entity):
project_short_name: str
Expand All @@ -316,6 +325,7 @@ class Project(Entity):
accessions: OrderedSet[Accession]
supplementary_links: OrderedSet[str]
estimated_cell_count: Optional[int]
bionetworks: OrderedSet[Bionetwork]

def __init__(self,
json: JSON,
Expand All @@ -341,6 +351,9 @@ def __init__(self,
assert isinstance(value, list)
accessions.update(Accession(namespace=prefix, accession=v) for v in value)
self.accessions = accessions
self.bionetworks = OrderedSet(Bionetwork(**bionetwork)
for bionetwork in content.get('hca_bionetworks', ())
if bionetwork)

def _accessions(self, namespace: str) -> Set[str]:
return {a.accession for a in self.accessions if a.namespace == namespace}
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions test/service/data/pfb_manifest.results.json
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,9 @@
null
],
"publications": [],
"is_tissue_atlas_project": [false],
"bionetwork_name": ["Immune"],
"tissue_atlas": [],
"supplementary_links": [
null
]
Expand Down Expand Up @@ -864,6 +867,9 @@
"Single-Cell Analysis of Human Pancreas Reveals Transcriptional Signatures of Aging and Somatic Mutation Patterns."
],
"publications": [],
"is_tissue_atlas_project": [false],
"bionetwork_name": [null],
"tissue_atlas": [],
"supplementary_links": [
"https://www.ebi.ac.uk/gxa/sc/experiments/E-GEOD-81547/Results"
]
Expand Down Expand Up @@ -3273,6 +3279,9 @@
"The cellular immune response to COVID-19 deciphered by single cell multi-omics across three UK centres"
],
"publications": [],
"is_tissue_atlas_project": [false],
"bionetwork_name": [null],
"tissue_atlas": [],
"supplementary_links": [
null
]
Expand Down
45 changes: 45 additions & 0 deletions test/service/data/pfb_manifest.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1936,6 +1936,51 @@
"type": "array"
}
},
{
"name": "is_tissue_atlas_project",
"namespace": "projects",
"type": {
"items": ["null", "boolean"],
"type": "array"
}
},
{
"name": "tissue_atlas",
"namespace": "projects",
"type": {
"items": {
"fields": [
{
"name": "atlas",
"namespace": "projects.tissue_atlas",
"type": {
"items": ["null", "string"],
"type": "array"
}
},
{
"name": "version",
"namespace": "projects.tissue_atlas",
"type": {
"items": ["null", "string"],
"type": "array"
}
}
],
"name": "projects.tissue_atlas",
"type": "record"
},
"type": "array"
}
},
{
"name": "bionetwork_name",
"namespace": "projects",
"type": {
"items": ["null", "string"],
"type": "array"
}
},
{
"name": "estimated_cell_count",
"namespace": "projects",
Expand Down
3 changes: 3 additions & 0 deletions test/service/test_index_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def assert_file_type_summaries(hit):
'matrices',
'contributedAnalyses',
'accessions',
'tissueAtlas',
'isTissueAtlasProject',
'bionetworkName',
'estimatedCellCount'
}
response_json = get_response_json()
Expand Down
Loading

0 comments on commit 5b0e0fe

Please sign in to comment.