diff --git a/src/data/downloadable_files_metadata.json b/src/data/downloadable_files_metadata.json index f4c3174e..5c84810f 100644 --- a/src/data/downloadable_files_metadata.json +++ b/src/data/downloadable_files_metadata.json @@ -7,7 +7,7 @@ "morphology_clusters.csv.gz": 736744, "connectivity_clusters.csv.gz": 522912, "connections.csv.gz": 26124953, - "labels.csv.gz": 3404570, + "labels.csv.gz": 3406816, "coordinates.csv.gz": 4584155, "neuropil_synapse_table.csv.gz": 4490002 }, @@ -82,17 +82,17 @@ "col 5 - nt_type": "6 unique values" }, "labels.csv.gz": { - "# rows": "120,551 (+ header)", + "# rows": "120,614 (+ header)", "# columns": "9", - "col 1 - root_id": "87,373 unique values", - "col 2 - label": "6,504 unique values", + "col 1 - root_id": "87,435 unique values", + "col 2 - label": "6,510 unique values", "col 3 - user_id": "132 unique values", - "col 4 - position": "99,524 unique values", - "col 5 - supervoxel_id": "98,795 unique values", + "col 4 - position": "99,587 unique values", + "col 5 - supervoxel_id": "98,858 unique values", "col 6 - label_id": "all rows contain unique values", - "col 7 - date_created": "36,203 unique values", - "col 8 - user_name": "129 unique values in 119,901 rows, empty in 650 rows", - "col 9 - user_affiliation": "52 unique values in 119,838 rows, empty in 713 rows" + "col 7 - date_created": "36,226 unique values", + "col 8 - user_name": "129 unique values in 119,964 rows, empty in 650 rows", + "col 9 - user_affiliation": "52 unique values in 119,901 rows, empty in 713 rows" }, "coordinates.csv.gz": { "# rows": "207,700 (+ header)", diff --git a/src/utils/label_cleaning.py b/src/utils/label_cleaning.py index 41012b1c..f2f88c81 100644 --- a/src/utils/label_cleaning.py +++ b/src/utils/label_cleaning.py @@ -119,7 +119,7 @@ def remove_redundant_parts(labels, neuron_data): ) def rewrite(lbl): - for rprefix in ["putative "]: + for rprefix in ["putative ", "putative-", "putative_"]: if lbl.lower().startswith(rprefix): lbl = lbl[len(rprefix) :] return lbl