Skip to content

Commit

Permalink
Adding data sanitizing - strip white space from rows and make institu…
Browse files Browse the repository at this point in the history
…tions upper case.

Also removed the load_hca partition creation - that pipeline is no longer used.
  • Loading branch information
bahill committed Nov 6, 2024
1 parent 40769ee commit d610d1d
Showing 1 changed file with 2 additions and 3 deletions.
5 changes: 2 additions & 3 deletions orchestration/hca_manage/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,15 @@ def _parse_csv(csv_path: str, env: str, project_id_only: bool = False,
continue

assert len(row) == 2
institution = row[0]
row = [x.strip() for x in row]
institution = row[0].upper()
project_id = find_project_id_in_str(row[1])

key = None
if project_id_only:
project_id = row[1]
key = project_id
else:
# TODO check for all caps - change to all caps if not, then match
if institution not in STAGING_AREA_BUCKETS[env]:
raise Exception(f"Unknown institution {institution} found")

Expand Down Expand Up @@ -178,7 +178,6 @@ def _enumerate_manifests(env: str) -> None:


def load(args: argparse.Namespace) -> None:
parse_and_load_manifest(args.env, args.csv_path, args.release_tag, "load_hca")
parse_and_load_manifest(args.env, args.csv_path, args.release_tag, "per_project_load_hca")
parse_and_load_manifest(args.env, args.csv_path, args.release_tag, "validate_ingress")
parse_and_load_manifest(
Expand Down

0 comments on commit d610d1d

Please sign in to comment.