diff --git a/omero/annotation_scripts/Convert_KeyVal_namespace.py b/omero/annotation_scripts/Convert_KeyVal_namespace.py new file mode 100644 index 000000000..004ad89ba --- /dev/null +++ b/omero/annotation_scripts/Convert_KeyVal_namespace.py @@ -0,0 +1,339 @@ +# coding=utf-8 +""" + Convert_KeyVal_namespace.py + + Convert the namespace of objects key-value pairs. +----------------------------------------------------------------------------- + Copyright (C) 2024 + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +------------------------------------------------------------------------------ +Created by Tom Boissonnet + +""" + +import omero +from omero.gateway import BlitzGateway +from omero.rtypes import rstring, rlong, robject +import omero.scripts as scripts +from omero.constants.metadata import NSCLIENTMAPANNOTATION + + +CHILD_OBJECTS = { + "Project": "Dataset", + "Dataset": "Image", + "Screen": "Plate", + "Plate": "Well", + "Well": "WellSample", + "WellSample": "Image" +} + +ALLOWED_PARAM = { + "Project": ["Project", "Dataset", "Image"], + "Dataset": ["Dataset", "Image"], + "Image": ["Image"], + "Screen": ["Screen", "Plate", "Well", "Acquisition", "Image"], + "Plate": ["Plate", "Well", "Acquisition", "Image"], + "Well": ["Well", "Image"], + "Acquisition": ["Acquisition", "Image"], + "Tag": ["Project", "Dataset", "Image", + "Screen", "Plate", "Well", "Acquisition"] +} + +P_DTYPE = "Data_Type" # Do not change +P_IDS = "IDs" # Do not change +P_TARG_DTYPE = "Target Data_Type" +P_OLD_NS = "Old Namespace (blank for default)" +P_NEW_NS = "New Namespace (blank for default)" +P_MERGE = "Create new and merge" + + +def get_children_recursive(source_object, target_type): + if CHILD_OBJECTS[source_object.OMERO_CLASS] == target_type: + # Stop condition, we return the source_obj children + if source_object.OMERO_CLASS != "WellSample": + return source_object.listChildren() + else: + return [source_object.getImage()] + else: # Not yet the target + result = [] + for child_obj in source_object.listChildren(): + # Going down in the Hierarchy list + result.extend(get_children_recursive(child_obj, target_type)) + return result + + +def target_iterator(conn, source_object, target_type, is_tag): + if target_type == source_object.OMERO_CLASS: + target_obj_l = [source_object] + elif source_object.OMERO_CLASS == "PlateAcquisition": + # Check if there is more than one Run, otherwise + # it's equivalent to start from a plate (and faster this way) + plate_o = source_object.getParent() + wellsamp_l = get_children_recursive(plate_o, "WellSample") + if len(list(plate_o.listPlateAcquisitions())) > 1: + # Only case where we need to filter on PlateAcquisition + run_id = source_object.getId() + wellsamp_l = filter(lambda x: x._obj.plateAcquisition._id._val + == run_id, wellsamp_l) + target_obj_l = [wellsamp.getImage() for wellsamp in wellsamp_l] + elif target_type == "PlateAcquisition": + # No direct children access from a plate + if source_object.OMERO_CLASS == "Screen": + plate_l = get_children_recursive(source_object, "Plate") + elif source_object.OMERO_CLASS == "Plate": + plate_l = [source_object] + target_obj_l = [r for p in plate_l for r in p.listPlateAcquisitions()] + elif is_tag: + target_obj_l = conn.getObjectsByAnnotations(target_type, + [source_object.getId()]) + # Need that to load objects + obj_ids = [o.getId() for o in target_obj_l] + target_obj_l = list(conn.getObjects(target_type, obj_ids)) + else: + target_obj_l = get_children_recursive(source_object, + target_type) + + print(f"Iterating objects from {source_object}:") + for target_obj in target_obj_l: + print(f"\t- {target_obj}") + yield target_obj + + +def main_loop(conn, script_params): + """ + For every object: + - Find annotations in the namespace + - If merge: + - Remove annotations with old namespace + - Create a merged annotation with new namespace + - Else change the namespace of the annotation (default) + """ + source_type = script_params[P_DTYPE] + target_type = script_params[P_TARG_DTYPE] + source_ids = script_params[P_IDS] + old_namespace = script_params[P_OLD_NS] + new_namespace = script_params[P_NEW_NS] + merge = script_params[P_MERGE] + + ntarget_processed = 0 + ntarget_updated = 0 + result_obj = None + + # One file output per given ID + for source_object in conn.getObjects(source_type, source_ids): + is_tag = source_type == "TagAnnotation" + for target_obj in target_iterator(conn, source_object, + target_type, is_tag): + ntarget_processed += 1 + keyval_l, ann_l = get_existing_map_annotations(target_obj, + old_namespace) + if len(keyval_l) > 0: + if merge: + annotate_object(conn, target_obj, keyval_l, + new_namespace) + remove_map_annotations(conn, ann_l) + else: + for ann in ann_l: + try: + ann.setNs(new_namespace) + ann.save() + except Exception: + print(f"Failed to edit {ann}") + continue + ntarget_updated += 1 + if result_obj is None: + result_obj = target_obj + else: + print("\tNo MapAnnotation found with that namespace\n") + print("\n------------------------------------\n") + message = ( + "Updated kv pairs to " + + f"{ntarget_updated}/{ntarget_processed} {target_type}" + ) + + return message, result_obj + + +def get_existing_map_annotations(obj, namespace_l): + keyval_l, ann_l = [], [] + forbidden_deletion = [] + for namespace in namespace_l: + p = {} if namespace == "*" else {"ns": namespace} + for ann in obj.listAnnotations(**p): + if isinstance(ann, omero.gateway.MapAnnotationWrapper): + if ann.canEdit(): # If not, skipping it + keyval_l.extend([(k, v) for (k, v) in ann.getValue()]) + ann_l.append(ann) + else: + forbidden_deletion.append(ann.id) + if len(forbidden_deletion) > 0: + print("\tMap Annotation IDs skipped (not permitted):", + f"{forbidden_deletion}") + return keyval_l, ann_l + + +def remove_map_annotations(conn, ann_l): + mapann_ids = [ann.id for ann in ann_l] + + if len(mapann_ids) == 0: + return 0 + print(f"\tMap Annotation IDs to delete: {mapann_ids}\n") + try: + conn.deleteObjects("Annotation", mapann_ids) + return 1 + except Exception: + print(f"Failed to delete old annotations {mapann_ids}") + return 0 + + +def annotate_object(conn, obj, kv_list, namespace): + + map_ann = omero.gateway.MapAnnotationWrapper(conn) + map_ann.setNs(namespace) + map_ann.setValue(kv_list) + map_ann.save() + + print("\tMap Annotation created", map_ann.id) + obj.linkAnnotation(map_ann) + + +def run_script(): + # Cannot add fancy layout if we want auto fill and selct of object ID + source_types = [ + rstring("Project"), rstring("Dataset"), rstring("Image"), + rstring("Screen"), rstring("Plate"), rstring("Well"), + rstring("Acquisition"), rstring("Image"), rstring("Tag"), + ] + + # Duplicate Image for UI, but not a problem for script + target_types = [ + rstring(""), rstring("Project"), + rstring("- Dataset"), rstring("-- Image"), + rstring("Screen"), rstring("- Plate"), + rstring("-- Well"), rstring("-- Acquisition"), + rstring("--- Image") + ] + + client = scripts.client( + 'Convert Key-Value pairs namespace', + """ + Converts the namespace of key-value pairs. + \t + Check the guide for more information on parameters and errors: + https://guide-kvpairs-scripts.readthedocs.io/en/latest/index.html + \t + Default namespace: openmicroscopy.org/omero/client/mapAnnotation + """, # Tabs are needed to add line breaks in the HTML + + scripts.String( + P_DTYPE, optional=False, grouping="1", + description="Data type of the parent objects.", + values=source_types, default="Dataset"), + + scripts.List( + P_IDS, optional=False, grouping="1.1", + description="IDs of the parent objects").ofType(rlong(0)), + + scripts.String( + P_TARG_DTYPE, optional=False, grouping="1.2", + description="Data type to process from the selected " + + "parent objects.", + values=target_types, default=""), + + scripts.List( + P_OLD_NS, optional=True, grouping="1.4", + description="Namespace(s) of the key-value pairs to " + + "process. Client namespace by default, " + + "'*' for all.").ofType(rstring("")), + + scripts.String( + P_NEW_NS, optional=True, grouping="1.5", + description="The new namespace for the annotations."), + + scripts.Bool( + P_MERGE, optional=True, grouping="1.6", + description="Check to merge selected key-value pairs " + + "into a single new one (will also include " + + "existing key-value pairs having the New Namespace)", + default=False), + + authors=["Tom Boissonnet"], + institutions=["CAi HHU"], + contact="https://forum.image.sc/tag/omero", + version="2.0.0", + ) + + try: + params = parameters_parsing(client) + print("Input parameters:") + keys = [P_DTYPE, P_IDS, P_TARG_DTYPE, P_OLD_NS, P_NEW_NS] + for k in keys: + print(f"\t- {k}: {params[k]}") + print("\n####################################\n") + + # wrap client to use the Blitz Gateway + conn = BlitzGateway(client_obj=client) + message, robj = main_loop(conn, params) + client.setOutput("Message", rstring(message)) + if robj is not None: + client.setOutput("Result", robject(robj._obj)) + + except AssertionError as err: + # Display assertion errors in OMERO.web activities + client.setOutput("ERROR", rstring(err)) + raise AssertionError(str(err)) + + finally: + client.closeSession() + + +def parameters_parsing(client): + params = {} + # Param dict with defaults for optional parameters + params[P_OLD_NS] = [NSCLIENTMAPANNOTATION] + params[P_NEW_NS] = NSCLIENTMAPANNOTATION + + for key in client.getInputKeys(): + if client.getInput(key): + params[key] = client.getInput(key, unwrap=True) + + if params[P_TARG_DTYPE] == "": + params[P_TARG_DTYPE] = params[P_DTYPE] + elif " " in params[P_TARG_DTYPE]: + # Getting rid of the trailing '---' added for the UI + params[P_TARG_DTYPE] = params[P_TARG_DTYPE].split(" ")[1] + + assert params[P_TARG_DTYPE] in ALLOWED_PARAM[params[P_DTYPE]], \ + (f"{params['Target Data_Type']} is not a valid target for " + + f"{params['Data_Type']}.") + + if params[P_DTYPE] == "Tag": + params[P_DTYPE] = "TagAnnotation" + + if params[P_TARG_DTYPE] == "Acquisition": + params[P_TARG_DTYPE] = "PlateAcquisition" + + if params[P_MERGE]: + # If merge, also include existing target NS + params[P_OLD_NS].append(params[P_NEW_NS]) + # Remove duplicate entries from namespace list + tmp = params[P_OLD_NS] + if "*" in tmp: + tmp = ["*"] + params[P_OLD_NS] = list(set(tmp)) + + return params + + +if __name__ == "__main__": + run_script() diff --git a/omero/annotation_scripts/Export_to_csv.py b/omero/annotation_scripts/Export_to_csv.py new file mode 100644 index 000000000..f12805a5f --- /dev/null +++ b/omero/annotation_scripts/Export_to_csv.py @@ -0,0 +1,587 @@ +# coding=utf-8 +""" + Export_to_csv.py + + Reads the metadata associated with the images in a dataset + and creates a csv file attached to dataset + +----------------------------------------------------------------------------- + Copyright (C) 2018 - 2024 + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +------------------------------------------------------------------------------ +Created by Christian Evenhuis + +""" + +import omero +from omero.gateway import BlitzGateway +from omero.rtypes import rstring, rlong, robject +from omero.constants.metadata import NSCLIENTMAPANNOTATION, NSINSIGHTTAGSET +import omero.scripts as scripts + +import tempfile +import os +import re +import csv +from collections import OrderedDict, defaultdict + +CHILD_OBJECTS = { + "Project": "Dataset", + "Dataset": "Image", + "Screen": "Plate", + "Plate": "Well", + "Well": "WellSample", + "WellSample": "Image" +} + +ALLOWED_PARAM = { + "Project": ["Project", "Dataset", "Image"], + "Dataset": ["Dataset", "Image"], + "Image": ["Image"], + "Screen": ["Screen", "Plate", "Well", "Acquisition", "Image"], + "Plate": ["Plate", "Well", "Acquisition", "Image"], + "Well": ["Well", "Image"], + "Acquisition": ["Acquisition", "Image"], + "Tag": ["Project", "Dataset", "Image", + "Screen", "Plate", "Well", "Acquisition"] +} + +P_DTYPE = "Data_Type" # Do not change +P_IDS = "IDs" # Do not change +P_TARG_DTYPE = "Target Data_Type" +P_NAMESPACE = "Namespace (blank for default)" +P_CSVSEP = "CSV separator" +P_INCL_PARENT = "Include parent container names" +P_INCL_NS = "Include namespace" +P_INCL_TAG = "Include tags" + +# Add your OMERO.web URL for direct download from link: +# eg https://omero-adress.org/webclient +WEBCLIENT_URL = "" + + +def get_obj_name(omero_obj): + """ Helper function """ + if omero_obj.OMERO_CLASS == "Well": + return omero_obj.getWellPos().upper() + else: + return omero_obj.getName() + + +def get_children_recursive(source_object, target_type): + if CHILD_OBJECTS[source_object.OMERO_CLASS] == target_type: + # Stop condition, we return the source_obj children + if source_object.OMERO_CLASS != "WellSample": + return source_object.listChildren() + else: + return [source_object.getImage()] + else: # Not yet the target + result = [] + for child_obj in source_object.listChildren(): + # Going down in the Hierarchy list + result.extend(get_children_recursive(child_obj, target_type)) + return result + + +def target_iterator(conn, source_object, target_type, is_tag): + if target_type == source_object.OMERO_CLASS: + target_obj_l = [source_object] + elif source_object.OMERO_CLASS == "PlateAcquisition": + # Check if there is more than one Run, otherwise + # it's equivalent to start from a plate (and faster this way) + plate_o = source_object.getParent() + wellsamp_l = get_children_recursive(plate_o, "WellSample") + if len(list(plate_o.listPlateAcquisitions())) > 1: + # Only case where we need to filter on PlateAcquisition + run_id = source_object.getId() + wellsamp_l = filter(lambda x: x._obj.plateAcquisition._id._val + == run_id, wellsamp_l) + target_obj_l = [wellsamp.getImage() for wellsamp in wellsamp_l] + elif target_type == "PlateAcquisition": + # No direct children access from a plate + if source_object.OMERO_CLASS == "Screen": + plate_l = get_children_recursive(source_object, "Plate") + elif source_object.OMERO_CLASS == "Plate": + plate_l = [source_object] + target_obj_l = [r for p in plate_l for r in p.listPlateAcquisitions()] + elif is_tag: + target_obj_l = conn.getObjectsByAnnotations(target_type, + [source_object.getId()]) + # Need that to load objects + obj_ids = [o.getId() for o in target_obj_l] + target_obj_l = list(conn.getObjects(target_type, obj_ids)) + else: + target_obj_l = get_children_recursive(source_object, + target_type) + + print(f"Iterating objects from {source_object}:") + for target_obj in target_obj_l: + print(f"\t- {target_obj}") + yield target_obj + + +def main_loop(conn, script_params): + """ + For every object: + - Find annotations in the namespace and gather in a dict + - (opt) Gather ancestry + Finalize: + - Group all annotations together + - Sort rows (useful for wells) + - Write a single CSV file + """ + source_type = script_params[P_DTYPE] + target_type = script_params[P_TARG_DTYPE] + source_ids = script_params[P_IDS] + namespace_l = script_params[P_NAMESPACE] + separator = script_params[P_CSVSEP] + include_parent = script_params[P_INCL_PARENT] + include_namespace = script_params[P_INCL_NS] + include_tags = script_params[P_INCL_TAG] + + # One file output per given ID + obj_ancestry_l = [] + annotations_d = defaultdict(list) + if include_tags: + all_tag_d = get_all_tags(conn) + obj_id_l, obj_name_l, tagannotation_l = [], [], [] + for source_object in conn.getObjects(source_type, source_ids): + + result_obj = source_object + if source_type == "TagAnnotation": + result_obj = None # Attach result csv on the first object + is_tag = source_type == "TagAnnotation" + + for target_obj in target_iterator(conn, source_object, + target_type, is_tag): + annotations_d[0].append([]) # (when no ns exported, all ann in 0) + for ns in namespace_l: + next_ann_l = get_existing_map_annotations(target_obj, + ns) + if ns != "*": + annotations_d[ns].append(next_ann_l) + annotations_d[0][-1].extend(next_ann_l) + + if include_tags: + tagannotation_l.append(get_existing_tag_annotations(target_obj, + all_tag_d)) + + obj_id_l.append(target_obj.getId()) + obj_name_l.append(get_obj_name(target_obj)) + if include_parent: + ancestry = [] + for o in target_obj.getAncestry(): + if o.OMERO_CLASS == "WellSample": + o = o.getPlateAcquisition() + ancestry.append((o.OMERO_CLASS, get_obj_name(o))) + obj_ancestry_l.append(ancestry[::-1]) + + if result_obj is None: + result_obj = target_obj + print("\n------------------------------------\n") + + csv_name = f"{get_obj_name(source_object)}_{target_type}-KeyValue.csv" + + if include_namespace and "*" in namespace_l: + # Assign entries of * namespace + ns_set = set() + for ann_l in annotations_d[0]: + ns_set = ns_set.union([ann.getNs() for ann in ann_l]) + for ann_l in annotations_d[0]: + for ns in ns_set: + annotations_d[ns].append([]) + for ann in ann_l: + annotations_d[ann.getNs()][-1].append(ann) + + # Complete ancestry for image/dataset/plate without parents + norm_ancestry_l = [] + if len(obj_ancestry_l) > 0: + # Issue with image that don't have a plateacquisition + # if combined with images that have + max_level = max(map(lambda x: len(x), obj_ancestry_l)) + for ancestry in obj_ancestry_l: + norm_ancestry_l.append([("", "")] * + (max_level - len(ancestry)) + + ancestry) + + ns_row, header_row, rows = build_rows(annotations_d, tagannotation_l, + include_namespace) + ns_row, header_row, rows = sort_concat_rows(ns_row, header_row, rows, + obj_id_l, obj_name_l, + norm_ancestry_l) + rows.insert(0, header_row) + if include_namespace: + rows.insert(0, ns_row) + file_ann = attach_csv(conn, result_obj, rows, separator, csv_name) + + if file_ann is None: + message = "The CSV is printed in output, no file could be attached:" + else: + message = ("The csv is attached to " + + f"{result_obj.OMERO_CLASS}:{result_obj.getId()}") + + return message, file_ann, result_obj + + +def get_all_tags(conn): + all_tag_d = {} + for tag in conn.getObjects("TagAnnotation"): + + tagname = tag.getValue() + if (tag.getNs() == NSINSIGHTTAGSET): + # It's a tagset, set all tag_id to "tagname[tagset_name]" + for lk in conn.getAnnotationLinks("TagAnnotation", + parent_ids=[tag.id]): + child_id = int(lk.child.id.val) + child_name = lk.child.textValue.val + all_tag_d[child_id] = f"{child_name}[{tagname}]" + elif tag.id not in all_tag_d.keys(): + # Normal tag and not in the dict yet + # (if found as part of a tagset, it is not overwritten) + all_tag_d[int(tag.id)] = tagname + + return all_tag_d + + +def get_existing_map_annotations(obj, namespace): + "Return list of KV with updated keys with NS and occurences" + annotation_l = [] + p = {} if namespace == "*" else {"ns": namespace} + for ann in obj.listAnnotations(**p): + if isinstance(ann, omero.gateway.MapAnnotationWrapper): + annotation_l.append(ann) + return annotation_l + + +def get_existing_tag_annotations(obj, all_tag_d): + "Return list of tag names with tagset if any" + annotation_l = [] + for ann in obj.listAnnotations(): + if (isinstance(ann, omero.gateway.TagAnnotationWrapper) + and ann.getId() in all_tag_d.keys()): + annotation_l.append(all_tag_d[ann.getId()]) + return annotation_l + + +def build_rows(annotation_dict_l, tagannotation_l, include_namespace): + ns_row = [] + if include_namespace: + header_row, rows = [], [[] for i in range(len(annotation_dict_l[0]))] + for ns, annotation_l in annotation_dict_l.items(): + if ns == 0: + continue + next_header, next_rows = group_keyvalues(annotation_l) + ns_row.extend([ns]*len(next_header)) + header_row.extend(next_header) + for i, next_row in enumerate(next_rows): + rows[i].extend(next_row) + else: + header_row, rows = group_keyvalues(annotation_dict_l[0]) + + if len(tagannotation_l) > 0: + max_tag = max(map(len, tagannotation_l)) + if include_namespace: + ns_row.extend([""] * max_tag) + header_row.extend(["TAG"] * max_tag) + for i, tag_l in enumerate(tagannotation_l): + rows[i].extend(tag_l) + rows[i].extend([""] * (max_tag - len(tag_l))) + + return ns_row, header_row, rows + + +def group_keyvalues(objannotation_l): + """ Groups the keys and values of each object into a single dictionary """ + header_row = OrderedDict() # To keep the keys in order + keyval_obj_l = [] + for ann_l in objannotation_l: + count_k_l = [] + keyval_obj_l.append({}) + for ann in ann_l: + for (k, v) in ann.getValue(): + n_occurence = count_k_l.count(k) + pad_k = f"{n_occurence}#{k}" + keyval_obj_l[-1][pad_k] = v + header_row[pad_k] = None + count_k_l.append(k) + header_row = list(header_row.keys()) + # TODO find how to sort columns when multiple exist + # or similar + + rows = [] + for keyval_obj in keyval_obj_l: + obj_dict = OrderedDict((k, "") for k in header_row) + obj_dict.update(keyval_obj) + rows.append(list(obj_dict.values())) + + # Removing temporary padding + header_row = [k[k.find("#")+1:] for k in header_row] + return header_row, rows + + +def sort_concat_rows(ns_row, header_row, rows, obj_id_l, + obj_name_l, obj_ancestry_l): + def convert(text): + return int(text) if text.isdigit() else text.lower() + + def alphanum_key(key): + return [convert(c) for c in re.split('([0-9]+)', key)] + + def natural_sort(names): + # kudos to https://stackoverflow.com/a/4836734/10712860 + names = list(map(alphanum_key, names)) + return sorted(range(len(names)), key=names.__getitem__) + + with_parents = len(obj_ancestry_l) > 0 + + prefixes = [""] * len(obj_name_l) + if with_parents: + for i in range(len(obj_ancestry_l[0])): + curr_name_list = [prf+names[i][1] for prf, names + in zip(prefixes, obj_ancestry_l)] + curr_name_set = list(set(curr_name_list)) + indexes = natural_sort(curr_name_set) + prefix_d = {curr_name_set[idx]: j for j, idx in enumerate(indexes)} + prefixes = [f"{prefix_d[name]}_" for name in curr_name_list] + curr_name_list = [prf+name for prf, name in zip(prefixes, obj_name_l)] + indexes = natural_sort(curr_name_list) + + # End sorting, start concatenation + + res_rows = [] + for idx in indexes: + curr_row = [str(obj_id_l[idx])] + [obj_name_l[idx]] + rows[idx] + if with_parents: + curr_row = [e[1] for e in obj_ancestry_l[idx]] + curr_row + res_rows.append(curr_row) + header_row.insert(0, "OBJECT_ID") + header_row.insert(1, "OBJECT_NAME") + ns_row.insert(0, "") + ns_row.insert(1, "") + + if with_parents: + i = 0 + while "" in [e[0] for e in obj_ancestry_l[i]]: + i += 1 # Find the row with complete parent names + for j in range(len(obj_ancestry_l[i])): + header_row.insert(j, obj_ancestry_l[i][j][0].upper()) + ns_row.insert(j, "") + ns_row[0] = "NAMESPACE" + + print(f"\tColumn names: {header_row}", "\n") + + return ns_row, header_row, res_rows + + +def attach_csv(conn, obj_, rows, separator, csv_name): + if not obj_.canAnnotate() and WEBCLIENT_URL == "": + for row in rows: + print(f"{separator.join(row)}") + return None + + # create the tmp directory + tmp_dir = tempfile.mkdtemp(prefix='MIF_meta') + (fd, tmp_file) = tempfile.mkstemp(dir=tmp_dir, text=True) + with os.fdopen(fd, 'w', encoding="utf-8") as tfile: + tfile.write(f"sep={separator}\n") # Indicates separator for excel + csvwriter = csv.writer(tfile, + delimiter=separator, + quotechar='"', + quoting=csv.QUOTE_MINIMAL, + lineterminator="\n") + for row in rows: + csvwriter.writerow(row) + + # link it to the object + file_ann = conn.createFileAnnfromLocalFile( + tmp_file, origFilePathAndName=csv_name, + ns='KeyVal_export') + + if obj_.canAnnotate(): + obj_.linkAnnotation(file_ann) + print(f"{file_ann} linked to {obj_}") + + # remove the tmp file + os.remove(tmp_file) + os.rmdir(tmp_dir) + + return file_ann.getFile() + + +def run_script(): + """ + The main entry point of the script, as called by the client via the + scripting service, passing the required parameters. + """ + + # Cannot add fancy layout if we want auto fill and selct of object ID + source_types = [ + rstring("Project"), rstring("Dataset"), rstring("Image"), + rstring("Screen"), rstring("Plate"), rstring("Well"), + rstring("Acquisition"), rstring("Image"), rstring("Tag"), + ] + + # Duplicate Image for UI, but not a problem for script + target_types = [ + rstring(""), rstring("Project"), + rstring("- Dataset"), rstring("-- Image"), + rstring("Screen"), rstring("- Plate"), + rstring("-- Well"), rstring("-- Acquisition"), + rstring("--- Image") + ] + + separators = [";", ",", "TAB", "|"] + # Here we define the script name and description. + # Good practice to put url here to give users more guidance on how to run + # your script. + client = scripts.client( + 'Export to CSV', + """ + Exports in a CSV the key-value pairs, tags, name and ID + of the selected objects. + \t + Check the guide for more information on parameters and errors: + https://guide-kvpairs-scripts.readthedocs.io/en/latest/index.html + \t + Default namespace: openmicroscopy.org/omero/client/mapAnnotation + """, # Tabs are needed to add line breaks in the HTML + + scripts.String( + P_DTYPE, optional=False, grouping="1", + description="Data type of the parent objects.", + values=source_types, default="Dataset"), + + scripts.List( + P_IDS, optional=False, grouping="1.1", + description="IDs of the parent objects").ofType(rlong(0)), + + scripts.String( + P_TARG_DTYPE, optional=False, grouping="1.2", + description="Data type to process from the selected " + + "parent objects.", + values=target_types, default=""), + + scripts.List( + P_NAMESPACE, optional=True, + grouping="1.3", + description="Namespace(s) of the key-value pairs " + + "to export. Client namespace by default, " + + "'*' for all.").ofType(rstring("")), + + scripts.Bool( + "Other parameters", optional=True, grouping="2", default=True, + description="Ticking or unticking this has no effect"), + + scripts.String( + P_CSVSEP, optional=False, grouping="2.1", + description="Choose the CSV separator.", + values=separators, default="TAB"), + + scripts.Bool( + P_INCL_PARENT, optional=True, + grouping="2.2", + description="Check to include columns for the parent " + + "containers names", default=False), + + scripts.Bool( + P_INCL_NS, optional=True, + grouping="2.3", + description="Check to include the namespaces " + + "of the key-value pairs in the CSV.", default=False), + + scripts.Bool( + P_INCL_TAG, optional=True, + grouping="2.4", + description="Check to include tags in the CSV file.", + default=False), + + authors=["Christian Evenhuis", "MIF", "Tom Boissonnet"], + institutions=["University of Technology Sydney", "CAi HHU"], + contact="https://forum.image.sc/tag/omero", + version="2.0.0", + ) + try: + params = parameters_parsing(client) + + # wrap client to use the Blitz Gateway + conn = BlitzGateway(client_obj=client) + message, fileann, res_obj = main_loop(conn, params) + client.setOutput("Message", rstring(message)) + + if res_obj is not None and fileann is not None: + href = f"{WEBCLIENT_URL}/download_original_file/{fileann.getId()}" + if WEBCLIENT_URL != "": + url = omero.rtypes.wrap({ + "type": "URL", + "href": href, + "title": "CSV file of Key-Value pairs", + }) + client.setOutput("URL", url) + else: + client.setOutput("Result", robject(res_obj._obj)) + + except AssertionError as err: + # Display assertion errors in OMERO.web activities + client.setOutput("ERROR", rstring(err)) + raise AssertionError(str(err)) + finally: + client.closeSession() + + +def parameters_parsing(client): + params = {} + # Param dict with defaults for optional parameters + params[P_NAMESPACE] = [NSCLIENTMAPANNOTATION] + + for key in client.getInputKeys(): + if client.getInput(key): + # unwrap rtypes to String, Integer etc + params[key] = client.getInput(key, unwrap=True) + + if params[P_TARG_DTYPE] == "": + params[P_TARG_DTYPE] = params[P_DTYPE] + elif " " in params[P_TARG_DTYPE]: + # Getting rid of the trailing '---' added for the UI + params[P_TARG_DTYPE] = params[P_TARG_DTYPE].split(" ")[1] + + assert params[P_TARG_DTYPE] in ALLOWED_PARAM[params[P_DTYPE]], \ + (f"{params['Target Data_Type']} is not a valid target for " + + f"{params['Data_Type']}.") + + # Remove duplicate entries from namespace list + tmp = params[P_NAMESPACE] + if "*" in tmp: + tmp = ["*"] + params[P_NAMESPACE] = list(set(tmp)) + + if params[P_DTYPE] == "Tag": + params[P_DTYPE] = "TagAnnotation" + + if params[P_TARG_DTYPE] == "Acquisition": + params[P_TARG_DTYPE] = "PlateAcquisition" + + print("Input parameters:") + keys = [P_DTYPE, P_IDS, P_TARG_DTYPE, P_NAMESPACE, + P_CSVSEP, P_INCL_PARENT, P_INCL_NS, P_INCL_TAG] + for k in keys: + print(f"\t- {k}: {params[k]}") + print("\n####################################\n") + + if params[P_CSVSEP] == "TAB": + params[P_CSVSEP] = "\t" + + return params + + +if __name__ == "__main__": + run_script() diff --git a/omero/annotation_scripts/Import_from_csv.py b/omero/annotation_scripts/Import_from_csv.py new file mode 100644 index 000000000..408d09416 --- /dev/null +++ b/omero/annotation_scripts/Import_from_csv.py @@ -0,0 +1,863 @@ +# coding=utf-8 +""" + Import_from_csv.py + + Adds key-value pairs to a target object on OMERO from a CSV file. + +----------------------------------------------------------------------------- + Copyright (C) 2018 - 2024 + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +------------------------------------------------------------------------------ +Created by Christian Evenhuis + +""" + +import omero +from omero.gateway import BlitzGateway, TagAnnotationWrapper +from omero.rtypes import rstring, rlong, robject +import omero.scripts as scripts +from omero.constants.metadata import NSCLIENTMAPANNOTATION, NSINSIGHTTAGSET +from omero.model import AnnotationAnnotationLinkI +from omero.util.populate_roi import DownloadingOriginalFileProvider + +import csv +from collections import defaultdict, OrderedDict +import re + + +CHILD_OBJECTS = { + "Project": "Dataset", + "Dataset": "Image", + "Screen": "Plate", + "Plate": "Well", + "Well": "WellSample", + "WellSample": "Image" +} + +ALLOWED_PARAM = { + "Project": ["Project", "Dataset", "Image"], + "Dataset": ["Dataset", "Image"], + "Image": ["Image"], + "Screen": ["Screen", "Plate", "Well", "Acquisition", "Image"], + "Plate": ["Plate", "Well", "Acquisition", "Image"], + "Well": ["Well", "Image"], + "Acquisition": ["Acquisition", "Image"], + "Tag": ["Project", "Dataset", "Image", + "Screen", "Plate", "Well", "Acquisition"] +} + +P_DTYPE = "Data_Type" # Do not change +P_FILE_ANN = "File_Annotation" # Do not change +P_IDS = "IDs" # Do not change +P_TARG_DTYPE = "Target Data_Type" +P_NAMESPACE = "Namespace (blank for default or from csv)" +P_CSVSEP = "CSV separator" +P_EXCL_COL = "Columns to exclude" +P_TARG_COLID = "Target ID colname" +P_TARG_COLNAME = "Target name colname" +P_EXCL_EMPTY = "Exclude empty values" +P_SPLIT_CELL = "Split values on" +P_IMPORT_TAGS = "Import tags" +P_OWN_TAG = "Only use personal tags" +P_ALLOW_NEWTAG = "Allow tag creation" + + +def get_obj_name(omero_obj): + """ Helper function """ + if omero_obj.OMERO_CLASS == "Well": + return omero_obj.getWellPos().upper() + else: + return omero_obj.getName() + + +def get_children_recursive(source_object, target_type): + if CHILD_OBJECTS[source_object.OMERO_CLASS] == target_type: + # Stop condition, we return the source_obj children + if source_object.OMERO_CLASS != "WellSample": + return source_object.listChildren() + else: + return [source_object.getImage()] + else: # Not yet the target + result = [] + for child_obj in source_object.listChildren(): + # Going down in the Hierarchy list + result.extend(get_children_recursive(child_obj, target_type)) + return result + + +def target_iterator(conn, source_object, target_type, is_tag): + if target_type == source_object.OMERO_CLASS: + target_obj_l = [source_object] + elif source_object.OMERO_CLASS == "PlateAcquisition": + # Check if there is more than one Run, otherwise + # it's equivalent to start from a plate (and faster this way) + plate_o = source_object.getParent() + wellsamp_l = get_children_recursive(plate_o, "WellSample") + if len(list(plate_o.listPlateAcquisitions())) > 1: + # Only case where we need to filter on PlateAcquisition + run_id = source_object.getId() + wellsamp_l = filter(lambda x: x._obj.plateAcquisition._id._val + == run_id, wellsamp_l) + target_obj_l = [wellsamp.getImage() for wellsamp in wellsamp_l] + elif target_type == "PlateAcquisition": + # No direct children access from a plate + if source_object.OMERO_CLASS == "Screen": + plate_l = get_children_recursive(source_object, "Plate") + elif source_object.OMERO_CLASS == "Plate": + plate_l = [source_object] + target_obj_l = [r for p in plate_l for r in p.listPlateAcquisitions()] + elif is_tag: + target_obj_l = conn.getObjectsByAnnotations(target_type, + [source_object.getId()]) + # Need that to load objects + obj_ids = [o.getId() for o in target_obj_l] + target_obj_l = list(conn.getObjects(target_type, obj_ids)) + else: + target_obj_l = get_children_recursive(source_object, + target_type) + + print(f"Iterating objects from {source_object}:") + for target_obj in target_obj_l: + if target_obj.canAnnotate(): + print(f"\t- {target_obj}") + yield target_obj + else: + print(f"\t- Annotate {target_obj} is not permitted, skipping") + print() + + +def main_loop(conn, script_params): + """ + Startup: + - Find CSV and read + For every object: + - Gather name and ID + Finalize: + - Find a match between CSV rows and objects + - Annotate the objects + - (opt) attach the CSV to the source object + """ + source_type = script_params[P_DTYPE] + target_type = script_params[P_TARG_DTYPE] + source_ids = script_params[P_IDS] + file_ids = script_params[P_FILE_ANN] + namespace = script_params[P_NAMESPACE] + to_exclude = script_params[P_EXCL_COL] + target_id_colname = script_params[P_TARG_COLID] + target_name_colname = script_params[P_TARG_COLNAME] + separator = script_params[P_CSVSEP] + exclude_empty_value = script_params[P_EXCL_EMPTY] + split_on = script_params[P_SPLIT_CELL] + use_personal_tags = script_params[P_OWN_TAG] + create_new_tags = script_params[P_ALLOW_NEWTAG] + import_tags = script_params[P_IMPORT_TAGS] + file_ann_multiplied = script_params["File_Annotation_multiplied"] + + ntarget_processed = 0 + ntarget_updated = 0 + missing_names = set() + processed_names = set() + total_missing_names = 0 + + result_obj = None + + # Dictionaries needed for the tags + tag_d, tagset_d, tagtree_d, tagid_d = None, None, None, None + + # One file output per given ID + source_objects = conn.getObjects(source_type, source_ids) + for source_object, file_ann_id in zip(source_objects, file_ids): + ntarget_updated_curr = 0 + + # Find the file from the user input + if file_ann_id is not None: + file_ann = conn.getObject("Annotation", oid=file_ann_id) + assert file_ann is not None, f"Annotation {file_ann_id} not found" + assert file_ann.OMERO_TYPE == omero.model.FileAnnotationI, \ + ("The provided annotation ID must reference a " + + f"FileAnnotation, not a {file_ann.OMERO_TYPE}") + else: + file_ann = get_original_file(source_object) + + # Get the list of things to annotate + is_tag = source_type == "TagAnnotation" + target_obj_l = list(target_iterator(conn, source_object, + target_type, is_tag)) + + # Find the most suitable object to link the file to + if is_tag and len(target_obj_l) > 0: + obj_to_link = target_obj_l[0] + else: + obj_to_link = source_object + link_file_ann(conn, obj_to_link, file_ann) + + original_file = file_ann.getFile()._obj + rows, header, namespaces = read_csv(conn, original_file, + separator, import_tags) + if namespace is not None: + namespaces = [namespace] * len(header) + elif len(namespaces) == 0: + namespaces = [NSCLIENTMAPANNOTATION] * len(header) + + # Index of the column used to identify the targets. Try for IDs first + idx_id, idx_name = -1, -1 + if target_id_colname in header: + idx_id = header.index(target_id_colname) + if target_name_colname in header: + idx_name = header.index(target_name_colname) + cols_to_ignore = [header.index(el) for el in to_exclude + if el in header] + + assert (idx_id != -1) or (idx_name != -1), \ + ("Neither the column for the objects' name or" + + " the objects' index were found") + + use_id = idx_id != -1 # use the obj_idx column if exist + if not use_id: + idx_id = idx_name + # check if the names in the .csv contain duplicates + name_list = [row[idx_id] for row in rows] + duplicates = {name for name in name_list + if name_list.count(name) > 1} + print("duplicates:", duplicates) + assert not len(duplicates) > 0, \ + (f"The .csv contains duplicates {duplicates} which makes" + + " it impossible to correctly allocate the annotations.") + + # Identify target-objects by name fail if two have identical names + target_d = dict() + for target_obj in target_obj_l: + name = get_obj_name(target_obj) + assert name not in target_d.keys(), \ + ("Target objects identified by name have at " + + f"least one duplicate: {name}") + target_d[name] = target_obj + else: + # Setting the dictionnary target_id:target_obj + # keys as string to match CSV reader output + target_d = {str(target_obj.getId()): target_obj + for target_obj in target_obj_l} + ntarget_processed += len(target_d) + + if tag_d is None and "tag" in [h.lower() for h in header]: + # Create the tag dictionary a single time if needed + tag_d, tagset_d, tagtree_d, tagid_d = get_tag_dict( + conn, use_personal_tags + ) + # Replace the tags in the CSV by the tag_id to use + rows, tag_d, tagset_d, tagtree_d, tagid_d = preprocess_tag_rows( + conn, header, rows, tag_d, tagset_d, tagtree_d, tagid_d, + create_new_tags, split_on + ) + + ok_idxs = [i for i in range(len(header)) if i not in cols_to_ignore] + for row in rows: + # Iterate the CSV rows and search for the matching target + target_id = row[idx_id] + # skip empty rows + if target_id == "": + continue + if target_id in target_d.keys(): + target_obj = target_d[target_id] + # add name/id to processed set + if file_ann_multiplied: + processed_names.add(target_id) + else: + # add name/id to missing set + if file_ann_multiplied: + missing_names.add(target_id) + else: + total_missing_names += 1 + print(f"Not found: {target_id}") + continue + + if split_on != "": + parsed_row, parsed_ns, parsed_head = [], [], [] + for i in ok_idxs: + curr_vals = row[i].strip().split(split_on) + parsed_row.extend(curr_vals) + parsed_ns.extend([namespaces[i]] * len(curr_vals)) + parsed_head.extend([header[i]] * len(curr_vals)) + else: + parsed_row = [row[i] for i in ok_idxs] + parsed_ns = [namespaces[i] for i in ok_idxs] + parsed_head = [header[i] for i in ok_idxs] + + updated = annotate_object( + conn, target_obj, parsed_row, parsed_head, + parsed_ns, exclude_empty_value, tagid_d, split_on + ) + + if updated: + if result_obj is None: + result_obj = target_obj + ntarget_updated += 1 + ntarget_updated_curr += 1 + + print("\n------------------------------------\n") + + message = ( + "Added Annotations to " + + f"{ntarget_updated}/{ntarget_processed} {target_type}(s)" + ) + + if file_ann_multiplied and len(missing_names) > 0: + # subtract the processed names/ids from the + # missing ones and print the missing names/ids + missing_names = missing_names - processed_names + if len(missing_names) > 0: + print(f"Not found: {missing_names}") + total_missing_names = len(missing_names) + + if total_missing_names > 0: + message += ( + f". {total_missing_names} {target_type}(s) not found " + f"(using {'ID' if use_id else 'name'} to identify them)." + ) + + return message, result_obj + + +def get_original_file(omero_obj): + """Find last AnnotationFile linked to object if no annotation is given""" + file_ann = None + for ann in omero_obj.listAnnotations(): + if ann.OMERO_TYPE == omero.model.FileAnnotationI: + file_name = ann.getFile().getName() + # Pick file by Ann ID (or name if ID is None) + if file_name.endswith(".csv") or file_name.endswith(".tsv"): + if (file_ann is None) or (ann.getDate() > file_ann.getDate()): + # Get the most recent file + file_ann = ann + + assert file_ann is not None, \ + (f"No .csv FileAnnotation was found on {omero_obj.OMERO_CLASS}" + + f":{get_obj_name(omero_obj)}:{omero_obj.getId()}") + + return file_ann + + +def read_csv(conn, original_file, delimiter, import_tags): + """ Dedicated function to read the CSV file """ + print("Using FileAnnotation", + f"{original_file.id.val}:{original_file.name.val}") + provider = DownloadingOriginalFileProvider(conn) + # read the csv + # Needs omero-py 5.9.1 or later + + try: + temp_file = provider.get_original_file_data(original_file) + with open(temp_file.name, mode="rt", encoding='utf-8-sig') as f: + csv_content = f.readlines() + except UnicodeDecodeError as e: + assert False, ("Error while reading the csv, convert your " + + "file to utf-8 encoding" + + str(e)) + + # Read delimiter from CSV first line if exist + re_delimiter = re.compile("sep=(?P.?)") + match = re_delimiter.match(csv_content[0]) + if match: # Need to discard first row + csv_content = csv_content[1:] + if delimiter is None: # (and we detect delimiter if not given) + delimiter = match.group('delimiter') + + if delimiter is None: + try: + # Sniffing on a maximum of four lines + delimiter = csv.Sniffer().sniff("\n".join(csv_content[:4]), + "|,;\t").delimiter + except Exception as e: + assert False, ("Failed to sniff CSV delimiter: " + str(e)) + rows = list(csv.reader(csv_content, delimiter=delimiter)) + + rowlen = len(rows[0]) + error_msg = ( + "CSV rows lenght mismatch: Header has {} " + + "items, while line {} has {}" + ) + for i in range(1, len(rows)): + assert len(rows[i]) == rowlen, error_msg.format( + rowlen, i, len(rows[i]) + ) + + # keys are in the header row (first row for no namespaces + # second row with namespaces declared) + namespaces = [] + if rows[0][0].lower() == "namespace": + namespaces = [el.strip() for el in rows[0]] + namespaces = [ns if ns else NSCLIENTMAPANNOTATION for ns in namespaces] + rows = rows[1:] + header = [el.strip() for el in rows[0]] + rows = rows[1:] + + if not import_tags: + # We filter out the tag columns + idx_l = [i for i in range(len(header)) if header[i].lower() != "tag"] + header = [header[i] for i in idx_l] + if len(namespaces) > 0: + namespaces = [namespaces[i] for i in idx_l] + for j in range(len(rows)): + rows[j] = [rows[j][i] for i in idx_l] + + print(f"Header: {header}\n") + return rows, header, namespaces + + +def annotate_object(conn, obj, row, header, namespaces, + exclude_empty_value, tagid_d, split_on): + updated = False + print(f"-->processing {obj}") + for curr_ns in list(OrderedDict.fromkeys(namespaces)): + updated = False + kv_list = [] + tag_id_l = [] + for ns, h, r in zip(namespaces, header, row): + r = r.strip() + if ns == curr_ns and (len(r) > 0 or not exclude_empty_value): + if h.lower() == "tag": + if r == "": + continue + # check for "tag" in header and create&link a TagAnnotation + if split_on == "": # Default join for tags is "," + tag_id_l.extend(r.split(",")) + else: # given split_on is used (ahead of this function) + tag_id_l.append(r) + else: + kv_list.append([h, r]) + if len(kv_list) > 0: # Always exclude empty KV pairs + # creation and linking of a MapAnnotation + map_ann = omero.gateway.MapAnnotationWrapper(conn) + map_ann.setNs(curr_ns) + map_ann.setValue(kv_list) + map_ann.save() + obj.linkAnnotation(map_ann) + print(f"MapAnnotation:{map_ann.id} created on {obj}") + updated = True + if len(tag_id_l) > 0: + exist_ids = [ann.getId() for ann in obj.listAnnotations()] + for tag_id in tag_id_l: + tag_id = int(tag_id) + if tag_id not in exist_ids: + tag_ann = tagid_d[tag_id] + obj.linkAnnotation(tag_ann) + exist_ids.append(tag_id) + print(f"TagAnnotation:{tag_ann.id} created on {obj}") + updated = True + + return updated + + +def get_tag_dict(conn, use_personal_tags): + """ + Generate dictionnaries of the tags in the group. + + Parameters: + -------------- + conn : ``omero.gateway.BlitzGateway`` object + OMERO connection. + use_personal_tags: ``Boolean``, indicates the use of only tags + owned by the user. + + Returns: + ------------- + tag_d: dictionary of tag_ids {"tagA": [12], "tagB":[34,56]} + tagset_d: dictionary of tagset_ids {"tagsetX":[78]} + tagtree_d: dictionary of tags in tagsets {"tagsetX":{"tagA":[12]}} + tagid_d: dictionary of tag objects {12:tagA_obj, 34:tagB_obj} + + """ + tagtree_d = defaultdict(lambda: defaultdict(list)) + tag_d, tagset_d = defaultdict(list), defaultdict(list) + tagid_d = {} + + max_id = -1 + + uid = conn.getUserId() + for tag in conn.getObjects("TagAnnotation"): + is_owner = tag.getOwner().id == uid + if use_personal_tags and not is_owner: + continue + + tagid_d[tag.id] = tag + max_id = max(max_id, tag.id) + tagname = tag.getValue() + if (tag.getNs() == NSINSIGHTTAGSET): + # It's a tagset + tagset_d[tagname].append((int(is_owner), tag.id)) + for lk in conn.getAnnotationLinks("TagAnnotation", + parent_ids=[tag.id]): + # Add all tags of this tagset in the tagtree + cname = lk.child.textValue.val + cid = lk.child.id.val + cown = int(lk.child.getDetails().owner.id.val == uid) + tagtree_d[tagname][cname].append((cown, cid)) + else: + tag_d[tagname].append((int(is_owner), tag.id)) + + # Sorting the tag by index (and if owned or not) + # to keep only one + for k, v in tag_d.items(): + v.sort(key=lambda x: (x[0]*max_id + x[1])) + tag_d[k] = v[0][1] + for k, v in tagset_d.items(): + v.sort(key=lambda x: (x[0]*max_id + x[1])) + tagset_d[k] = v[0][1] + for k1, v1 in tagtree_d.items(): + for k2, v2 in v1.items(): + v2.sort(key=lambda x: (x[0]*max_id + x[1])) + tagtree_d[k1][k2] = v2[0][1] + + return tag_d, tagset_d, tagtree_d, tagid_d + + +def preprocess_tag_rows(conn, header, rows, tag_d, tagset_d, + tagtree_d, tagid_d, + create_new_tags, split_on): + """ + Replace the tags in the rows by tag_ids. + All done in preprocessing means that the script will fail before + annotations process starts. + """ + regx_tag = re.compile(r"([^\[\]]+)?(?:\[(\d+)\])?(?:\[([^[\]]+)\])?") + update = conn.getUpdateService() + + col_idxs = [i for i in range(len(header)) if header[i].lower() == "tag"] + res_rows = [] + for row in rows: + for col_idx in col_idxs: + values = row[col_idx] + tagid_l = [] + if split_on == "": + split_on = "," + values = values.split(split_on) + + for val in values: + val = val.strip() + # matching a regex to the value + re_match = regx_tag.match(val) + if re_match is None: + continue + tagname, tagid, tagset = re_match.groups() + has_tagset = (tagset is not None and tagset != "") + if tagid is not None: + # If an ID is found, take precedence + assert int(tagid) in tagid_d.keys(), \ + (f"The tag ID:'{tagid}' is not" + + " in the permitted selection of tags") + tag_o = tagid_d[tagid] + if tagname is not None or tagname != "": + assert tag_o.getValue() == tagname, ( + f"The tag {tagname} doesn't correspond" + + f" to the tag on the server with ID:{tagid}" + ) + tagid_l.append(str(tagid)) + # We found the tag + continue + elif tagname is None or tagname == "": + continue + + if not has_tagset: + tag_exist = tagname in tag_d.keys() + assert (tag_exist or create_new_tags), ( + f"The tag '{tagname}'" + + " does not exist while" + + " creation of new tags" + + " is not permitted" + ) + if not tag_exist: + tag_o = TagAnnotationWrapper(conn) + tag_o.setValue(tagname) + tag_o.save() + tagid_d[tag_o.id] = tag_o + tag_d[tagname] = tag_o.id + print(f"creating new Tag for '{tagname}'") + tagid_l.append(str(tag_d[tagname])) + + else: # has tagset + tagset_exist = tagset in tagset_d.keys() + tag_exist = (tagset_exist + and (tagname in tagtree_d[tagset].keys())) + assert (tag_exist or create_new_tags), ( + f"The tag '{tagname}' " + + f"in TagSet '{tagset}'" + + " does not exist while" + + " creation of new tags" + + " is not permitted" + ) + if not tag_exist: + tag_o = TagAnnotationWrapper(conn) + tag_o.setValue(tagname) + tag_o.save() + tagid_d[tag_o.id] = tag_o + tag_d[tagname] = tag_o.id + if not tagset_exist: + tagset_o = TagAnnotationWrapper(conn) + tagset_o.setValue(tagset) + tagset_o.setNs(NSINSIGHTTAGSET) + tagset_o.save() + tagid_d[tagset_o.id] = conn.getObject( + "TagAnnotation", + tagset_o.id + ) + tagset_d[tagset] = tagset_o.id + print(f"Created new TagSet {tagset}:{tagset_o.id}") + # else: + tagset_o = tagid_d[tagset_d[tagset]] + link = AnnotationAnnotationLinkI() + link.parent = tagset_o._obj + link.child = tag_o._obj + update.saveObject(link) + tagtree_d[tagset][tagname] = tag_o.id + print(f"creating new Tag for '{tagname}' " + + f"in the tagset '{tagset}'") + tagid_l.append(str(tagtree_d[tagset][tagname])) + + # joined list of tag_ids instead of ambiguous names + row[col_idx] = split_on.join(tagid_l) + res_rows.append(row) + return res_rows, tag_d, tagset_d, tagtree_d, tagid_d + + +def link_file_ann(conn, obj_to_link, file_ann): + """Link File Annotation to the Object, if not already linked.""" + links = list(conn.getAnnotationLinks( + obj_to_link.OMERO_CLASS, + parent_ids=[obj_to_link.getId()], + ann_ids=[file_ann.getId()] + )) + if len(links) == 0: + obj_to_link.linkAnnotation(file_ann) + + +def run_script(): + # Cannot add fancy layout if we want auto fill and selct of object ID + source_types = [ + rstring("Project"), rstring("Dataset"), rstring("Image"), + rstring("Screen"), rstring("Plate"), rstring("Well"), + rstring("Acquisition"), rstring("Image"), rstring("Tag"), + ] + + # Duplicate Image for UI, but not a problem for script + target_types = [ + rstring(""), rstring("Project"), + rstring("- Dataset"), rstring("-- Image"), + rstring("Screen"), rstring("- Plate"), + rstring("-- Well"), rstring("-- Acquisition"), + rstring("--- Image") + ] + + separators = ["guess", ";", ",", "TAB"] + + client = scripts.client( + 'Import from CSV', + """ + Import key-value pairs and tags from a CSV file. + \t + Check the guide for more information on parameters and errors: + https://guide-kvpairs-scripts.readthedocs.io/en/latest/index.html + \t + Default namespace: openmicroscopy.org/omero/client/mapAnnotation + """, # Tabs are needed to add line breaks in the HTML + + scripts.String( + P_DTYPE, optional=False, grouping="1", + description="Data type of the parent objects.", + values=source_types, default="Dataset"), + + scripts.List( + P_IDS, optional=False, grouping="1.1", + description="IDs of the parent objects").ofType(rlong(0)), + + scripts.String( + P_TARG_DTYPE, optional=False, grouping="1.2", + description="Data type to process from the selected " + + "parent objects.", + values=target_types, default=""), + + scripts.String( + P_FILE_ANN, optional=True, grouping="1.3", + description="If no file is provided, list of file IDs " + + "containing metadata to populate (one per ID). " + + "Otherwise, takes the most recent CSV " + + "on each parent object."), + + scripts.String( + P_NAMESPACE, + optional=True, grouping="1.4", + description="Namespace assigned to the key-value pairs. " + + "Default is the client " + + "namespace (editable in OMERO.web)."), + + scripts.Bool( + P_IMPORT_TAGS, optional=True, grouping="2", default=True, + description="Check this box to allow the import of tags."), + + scripts.Bool( + P_OWN_TAG, optional=True, grouping="2.1", default=False, + description="Restrict the usage of tags to the ones owned " + + "by the user. If checked, tags owned by others will not be " + + "considered for the creation of new tags."), + + scripts.Bool( + P_ALLOW_NEWTAG, optional=True, grouping="2.2", default=False, + description="Creates new tags and tagsets if the ones" + + " specified in the CSV do not exist."), + + scripts.Bool( + "Other parameters", optional=True, grouping="3", default=True, + description="Ticking or unticking this has no effect"), + + scripts.Bool( + P_EXCL_EMPTY, optional=True, grouping="3.1", default=True, + description="Skip the keys with empty values."), + + scripts.String( + P_CSVSEP, optional=True, grouping="3.2", + description="Separator used in the CSV file. 'guess' will " + + "attempt to detetect automatically which of " + + ",;\\t to use.", + values=separators, default="guess"), + + scripts.String( + P_SPLIT_CELL, optional=True, grouping="3.3", + default="", + description="Separator used to split cells into multiple " + + "key-value pairs."), + + scripts.List( + P_EXCL_COL, optional=True, grouping="3.4", + default=",,", + description="Columns to exclude from the key-value pairs. " + + " and correspond to the column name " + + "specified by the next two parameters. " + + " matches all {PROJECT, DATASET, " + + "SCREEN, PLATE, RUN, WELL}.").ofType(rstring("")), + + scripts.String( + P_TARG_COLID, optional=True, grouping="3.5", + default="OBJECT_ID", + description="The column name in the CSV containing " + + "the objects IDs."), + + scripts.String( + P_TARG_COLNAME, optional=True, grouping="3.6", + default="OBJECT_NAME", + description="The column name in the CSV containing " + + "the objects names. (used only if the column " + + "ID is not found"), + + authors=["Christian Evenhuis", "Tom Boissonnet", "Jens Wendt"], + institutions=["MIF UTS", "CAi HHU", "MiN WWU"], + contact="https://forum.image.sc/tag/omero", + version="2.0.0", + ) + + try: + params = parameters_parsing(client) + + # wrap client to use the Blitz Gateway + conn = BlitzGateway(client_obj=client) + message, robj = main_loop(conn, params) + client.setOutput("Message", rstring(message)) + if robj is not None: + client.setOutput("Result", robject(robj._obj)) + + except AssertionError as err: + # Display assertion errors in OMERO.web activities + client.setOutput("ERROR", rstring(err)) + raise AssertionError(str(err)) + + finally: + client.closeSession() + + +def parameters_parsing(client): + params = {} + # Param dict with defaults for optional parameters + params[P_FILE_ANN] = None + params[P_NAMESPACE] = None + params[P_SPLIT_CELL] = "" + + for key in client.getInputKeys(): + if client.getInput(key): + params[key] = client.getInput(key, unwrap=True) + + if params[P_TARG_DTYPE] == "": + params[P_TARG_DTYPE] = params[P_DTYPE] + elif " " in params[P_TARG_DTYPE]: + # Getting rid of the trailing '---' added for the UI + params[P_TARG_DTYPE] = params[P_TARG_DTYPE].split(" ")[1] + + assert params[P_TARG_DTYPE] in ALLOWED_PARAM[params[P_DTYPE]], \ + (f"{params['Target Data_Type']} is not a valid target for " + + f"{params['Data_Type']}.") + + if params[P_DTYPE] == "Tag": + assert params[P_FILE_ANN] is not None, \ + "File annotation ID must be given when using Tag as source" + + if ((params[P_FILE_ANN]) is not None + and ("," in params[P_FILE_ANN])): + # List of ID provided, have to do the split + params[P_FILE_ANN] = params[P_FILE_ANN].split(",") + else: + params[P_FILE_ANN] = [int(params[P_FILE_ANN])] + if len(params[P_FILE_ANN]) == 1: + # Poulate the parameter with None or same ID for all source + params[P_FILE_ANN] *= len(params[P_IDS]) + params["File_Annotation_multiplied"] = True + params[P_FILE_ANN] = list(map(int, params[P_FILE_ANN])) + + assert len(params[P_FILE_ANN]) == len(params[P_IDS]), \ + "Number of IDs and FileAnnotation IDs must match" + + # Replacing the placeholders and with values from params + to_exclude = list(map(lambda x: x.replace('', + params[P_TARG_COLID]), + params[P_EXCL_COL])) + to_exclude = list(map(lambda x: x.replace('', + params[P_TARG_COLNAME]), + to_exclude)) + if "" in to_exclude: + to_exclude.remove("") + to_exclude.extend(["PROJECT", "DATASET", "SCREEN", + "PLATE", "RUN", "WELL"]) + + params[P_EXCL_COL] = to_exclude + + print("Input parameters:") + keys = [P_DTYPE, P_IDS, P_TARG_DTYPE, P_FILE_ANN, + P_NAMESPACE, P_CSVSEP, P_EXCL_COL, P_TARG_COLID, + P_TARG_COLNAME, P_EXCL_EMPTY, P_SPLIT_CELL, + P_IMPORT_TAGS, P_OWN_TAG, P_ALLOW_NEWTAG] + + for k in keys: + print(f"\t- {k}: {params[k]}") + print("\n####################################\n") + + if params[P_CSVSEP] == "guess": + params[P_CSVSEP] = None + elif params[P_CSVSEP] == "TAB": + params[P_CSVSEP] = "\t" + + if params[P_DTYPE] == "Tag": + params[P_DTYPE] = "TagAnnotation" + if params[P_TARG_DTYPE] == "Acquisition": + params[P_TARG_DTYPE] = "PlateAcquisition" + + return params + + +if __name__ == "__main__": + run_script() diff --git a/omero/annotation_scripts/KeyVal_from_csv.py b/omero/annotation_scripts/KeyVal_from_csv.py deleted file mode 100644 index 1bd8f5797..000000000 --- a/omero/annotation_scripts/KeyVal_from_csv.py +++ /dev/null @@ -1,352 +0,0 @@ -# coding=utf-8 -""" - MIF/Add_Key_Val_from_csv.py - - Adds key-value (kv) metadata to images in a dataset from a csv file - The first column contains the filenames - The first row of the file contains the keys - The rest is the values for each file/key - ------------------------------------------------------------------------------ - Copyright (C) 2018 - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ------------------------------------------------------------------------------- -Created by Christian Evenhuis - -""" - -import omero -from omero.gateway import BlitzGateway -from omero.rtypes import rstring, rlong -import omero.scripts as scripts -from omero.cmd import Delete2 - -import sys -import csv -import copy -from math import floor - -from omero.util.populate_roi import DownloadingOriginalFileProvider - -from collections import OrderedDict - - -def get_existing_map_annotations(obj): - """Get all Map Annotations linked to the object""" - ord_dict = OrderedDict() - for ann in obj.listAnnotations(): - if isinstance(ann, omero.gateway.MapAnnotationWrapper): - kvs = ann.getValue() - for k, v in kvs: - if k not in ord_dict: - ord_dict[k] = set() - ord_dict[k].add(v) - return ord_dict - - -def remove_map_annotations(conn, object): - """Remove ALL Map Annotations on the object""" - anns = list(object.listAnnotations()) - mapann_ids = [ann.id for ann in anns - if isinstance(ann, omero.gateway.MapAnnotationWrapper)] - - try: - delete = Delete2(targetObjects={'MapAnnotation': mapann_ids}) - handle = conn.c.sf.submit(delete) - conn.c.waitOnCmd(handle, loops=10, ms=500, failonerror=True, - failontimeout=False, closehandle=False) - - except Exception as ex: - print("Failed to delete links: {}".format(ex.message)) - return - - -def get_original_file(omero_object, file_ann_id=None): - """Find file linked to object. Option to filter by ID.""" - file_ann = None - for ann in omero_object.listAnnotations(): - if isinstance(ann, omero.gateway.FileAnnotationWrapper): - file_name = ann.getFile().getName() - # Pick file by Ann ID (or name if ID is None) - if (file_ann_id is None and file_name.endswith(".csv")) or ( - ann.getId() == file_ann_id): - file_ann = ann - if file_ann is None: - sys.stderr.write("Error: File does not exist.\n") - sys.exit(1) - - return file_ann.getFile()._obj - - -def link_file_ann(conn, object_type, object_id, file_ann_id): - """Link File Annotation to the Object, if not already linked.""" - file_ann = conn.getObject("Annotation", file_ann_id) - if file_ann is None: - sys.stderr.write("Error: File Annotation not found: %s.\n" - % file_ann_id) - sys.exit(1) - omero_object = conn.getObject(object_type, object_id) - # Check for existing links - links = list(conn.getAnnotationLinks(object_type, parent_ids=[object_id], - ann_ids=[file_ann_id])) - if len(links) == 0: - omero_object.linkAnnotation(file_ann) - - -def get_children_by_name(omero_obj): - - images_by_name = {} - wells_by_name = {} - - if omero_obj.OMERO_CLASS == "Dataset": - for img in omero_obj.listChildren(): - img_name = img.getName() - if img_name in images_by_name: - sys.stderr.write("File names not unique: {}".format(img_name)) - sys.exit(1) - images_by_name[img_name] = img - elif omero_obj.OMERO_CLASS == "Plate": - for well in omero_obj.listChildren(): - label = well.getWellPos() - wells_by_name[label] = well - for ws in well.listChildren(): - img = ws.getImage() - img_name = img.getName() - if img_name in images_by_name: - sys.stderr.write( - "File names not unique: {}".format(img_name)) - sys.exit(1) - images_by_name[img_name] = img - else: - sys.stderr.write(f'{omero_obj.OMERO_CLASS} objects not supported') - - return images_by_name, wells_by_name - - -def keyval_from_csv(conn, script_params): - data_type = script_params["Data_Type"] - ids = script_params["IDs"] - - nimg_processed = 0 - nimg_updated = 0 - missing_names = 0 - - for target_object in conn.getObjects(data_type, ids): - - # file_ann_id is Optional. If not supplied, use first .csv attached - file_ann_id = None - if "File_Annotation" in script_params: - file_ann_id = int(script_params["File_Annotation"]) - link_file_ann(conn, data_type, target_object.id, file_ann_id) - print("set ann id", file_ann_id) - - original_file = get_original_file(target_object, file_ann_id) - print("Original File", original_file.id.val, original_file.name.val) - provider = DownloadingOriginalFileProvider(conn) - - # read the csv - temp_file = provider.get_original_file_data(original_file) - # Needs omero-py 5.9.1 or later - temp_name = temp_file.name - file_length = original_file.size.val - with open(temp_name, 'rt', encoding='utf-8-sig') as file_handle: - try: - delimiter = csv.Sniffer().sniff( - file_handle.read(floor(file_length/4)), ",;\t").delimiter - print("Using delimiter: ", delimiter, - f" after reading {floor(file_length/4)} characters") - except Exception: - file_handle.seek(0) - try: - delimiter = csv.Sniffer().sniff( - file_handle.read(floor(file_length/2)), - ",;\t").delimiter - print("Using delimiter: ", delimiter, - f"after reading {floor(file_length/2)} characters") - except Exception: - file_handle.seek(0) - try: - delimiter = csv.Sniffer().sniff( - file_handle.read(floor(file_length*0.75)), - ",;\t").delimiter - print("Using delimiter: ", delimiter, - f" after reading {floor(file_length*0.75)}" - " characters") - except Exception: - print("Failed to sniff delimiter, using ','") - delimiter = "," - - # reset to start and read whole file... - file_handle.seek(0) - data = list(csv.reader(file_handle, delimiter=delimiter)) - - # keys are in the header row - header = data[0] - print("header", header) - - # create dictionaries for well/image name:object - images_by_name, wells_by_name = get_children_by_name(target_object) - nimg_processed += len(images_by_name) - - image_index = header.index("image") if "image" in header else -1 - well_index = header.index("well") if "well" in header else -1 - plate_index = header.index("plate") if "plate" in header else -1 - if image_index == -1: - # first header is the img-name column, if 'image' not found - image_index = 0 - print("image_index:", image_index, "well_index:", well_index, - "plate_index:", plate_index) - rows = data[1:] - - # loop over csv rows... - for row in rows: - # try to find 'image', then 'well', then 'plate' - image_name = row[image_index] - well_name = None - plate_name = None - obj = None - if len(image_name) > 0: - if image_name in images_by_name: - obj = images_by_name[image_name] - print("Annotating Image:", obj.id, image_name) - else: - missing_names += 1 - print("Image not found:", image_name) - if obj is None and well_index > -1 and len(row[well_index]) > 0: - well_name = row[well_index] - if well_name in wells_by_name: - obj = wells_by_name[well_name] - print("Annotating Well:", obj.id, well_name) - else: - missing_names += 1 - print("Well not found:", well_name) - # always check that Plate name matches if it is given: - if data_type == "Plate" and plate_index > -1 and \ - len(row[plate_index]) > 0: - if row[plate_index] != target_object.name: - print("plate", row[plate_index], - "doesn't match object", target_object.name) - continue - if obj is None: - obj = target_object - print("Annotating Plate:", obj.id, plate_name) - if obj is None: - msg = "Can't find object by image, well or plate name" - print(msg) - continue - - cols_to_ignore = [image_index, well_index, plate_index] - updated = annotate_object(conn, obj, header, row, cols_to_ignore) - if updated: - nimg_updated += 1 - - message = "Added kv pairs to {}/{} files".format( - nimg_updated, nimg_processed) - if missing_names > 0: - message += f". {missing_names} image names not found." - return message - - -def annotate_object(conn, obj, header, row, cols_to_ignore): - - obj_updated = False - existing_kv = get_existing_map_annotations(obj) - updated_kv = copy.deepcopy(existing_kv) - print("Existing kv:") - for k, vset in existing_kv.items(): - for v in vset: - print(" ", k, v) - - print("Adding kv:") - for i in range(len(row)): - if i in cols_to_ignore or i >= len(header): - continue - key = header[i].strip() - vals = row[i].strip().split(';') - if len(vals) > 0: - for val in vals: - if len(val) > 0: - if key not in updated_kv: - updated_kv[key] = set() - print(" ", key, val) - updated_kv[key].add(val) - - if existing_kv != updated_kv: - obj_updated = True - print("The key-values pairs are different") - remove_map_annotations(conn, obj) - map_ann = omero.gateway.MapAnnotationWrapper(conn) - namespace = omero.constants.metadata.NSCLIENTMAPANNOTATION - map_ann.setNs(namespace) - # convert the ordered dict to a list of lists - kv_list = [] - for k, vset in updated_kv.items(): - for v in vset: - kv_list.append([k, v]) - map_ann.setValue(kv_list) - map_ann.save() - print("Map Annotation created", map_ann.id) - obj.linkAnnotation(map_ann) - else: - print("No change change in kv") - - return obj_updated - - -def run_script(): - - data_types = [rstring('Dataset'), rstring('Plate')] - client = scripts.client( - 'Add_Key_Val_from_csv', - """ - This script processes a csv file, attached to a Dataset - """, - scripts.String( - "Data_Type", optional=False, grouping="1", - description="Choose source of images", - values=data_types, default="Dataset"), - - scripts.List( - "IDs", optional=False, grouping="2", - description="Dataset or Plate ID(s).").ofType(rlong(0)), - - scripts.String( - "File_Annotation", grouping="3", - description="File ID containing metadata to populate."), - - authors=["Christian Evenhuis"], - institutions=["MIF UTS"], - contact="https://forum.image.sc/tag/omero" - ) - - try: - # process the list of args above. - script_params = {} - for key in client.getInputKeys(): - if client.getInput(key): - script_params[key] = client.getInput(key, unwrap=True) - - # wrap client to use the Blitz Gateway - conn = BlitzGateway(client_obj=client) - print("script params") - for k, v in script_params.items(): - print(k, v) - message = keyval_from_csv(conn, script_params) - client.setOutput("Message", rstring(message)) - - finally: - client.closeSession() - - -if __name__ == "__main__": - run_script() diff --git a/omero/annotation_scripts/KeyVal_to_csv.py b/omero/annotation_scripts/KeyVal_to_csv.py deleted file mode 100644 index b29314081..000000000 --- a/omero/annotation_scripts/KeyVal_to_csv.py +++ /dev/null @@ -1,191 +0,0 @@ -# coding=utf-8 -""" - MIF/Key_Val_to_csv.py - - Reads the metadata associated with the images in a dataset - a creates a csv file attached to dataset - ------------------------------------------------------------------------------ - Copyright (C) 2018 - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ------------------------------------------------------------------------------- -Created by Christian Evenhuis - -""" - -import omero -from omero.gateway import BlitzGateway -from omero.rtypes import rstring, rlong -import omero.scripts as scripts -from omero.cmd import Delete2 - -import tempfile - -import os - -from collections import OrderedDict - - -def get_existing_map_annotions(obj): - ord_dict = OrderedDict() - for ann in obj.listAnnotations(): - if isinstance(ann, omero.gateway.MapAnnotationWrapper): - kvs = ann.getValue() - for k, v in kvs: - if k not in ord_dict: - ord_dict[k] = set() - ord_dict[k].add(v) - return ord_dict - - -def attach_csv_file(conn, obj, data): - ''' writes the data (list of dicts) to a file - and attaches it to the object - conn : connection to OMERO (need to annotation creation - obj : the object to attach the file file to - data : the data - ''' - # create the tmp directory - tmp_dir = tempfile.mkdtemp(prefix='MIF_meta') - (fd, tmp_file) = tempfile.mkstemp(dir=tmp_dir, text=True) - tfile = os.fdopen(fd, 'w') - - # get the list of keys and maximum number of occurences - # A key can appear multiple times, for example multiple dyes can be used - key_union = OrderedDict() - for img_n, img_kv in data.items(): - for key, vset in img_kv.items(): - key_union[key] = max(key_union.get(key, 0), len(vset)) - - # convience function to write a csv line - def to_csv(ll): - nl = len(ll) - fmstr = "{}, "*(nl-1)+"{}\n" - return fmstr.format(*ll) - - # construct the header of the CSV file - header = ['filename'] - for key, count in key_union.items(): - header.extend([key] * count) # keys can repeat multiple times - tfile.write(to_csv(header)) - - # write the keys values for each file - for filename, kv_dict in data.items(): - row = [""] * len(header) # empty row - row[0] = filename - for key, vset in kv_dict.items(): - n0 = header.index(key) # first occurence of key in header - for i, val in enumerate(vset): - row[n0 + i] = val - tfile.write(to_csv(row)) - tfile.close() - - name = "{}_metadata_out.csv".format(obj.getName()) - # link it to the object - ann = conn.createFileAnnfromLocalFile( - tmp_file, origFilePathAndName=name, - ns='MIF_test') - ann = obj.linkAnnotation(ann) - - # remove the tmp file - os.remove(tmp_file) - os.rmdir(tmp_dir) - return "done" - - -def run_script(): - - data_types = [rstring('Dataset')] - client = scripts.client( - 'Create_Metadata_csv', - """ - This script reads the metadata attached data set and creates - a csv file attached to the Dataset - """, - scripts.String( - "Data_Type", optional=False, grouping="1", - description="Choose source of images", - values=data_types, default="Dataset"), - - scripts.List( - "IDs", optional=False, grouping="2", - description="Plate or Screen ID.").ofType(rlong(0)), - - - authors=["Christian Evenhuis"], - institutions=["MIF UTS"], - contact="https://forum.image.sc/tag/omero" - ) - - try: - # process the list of args above. - script_params = {} - for key in client.getInputKeys(): - if client.getInput(key): - script_params[key] = client.getInput(key, unwrap=True) - - # wrap client to use the Blitz Gateway - conn = BlitzGateway(client_obj=client) - print("connection made") - - data_type = script_params["Data_Type"] - print(data_type) - ids = script_params["IDs"] - datasets = list(conn.getObjects(data_type, ids)) - print(ids) - print("datasets:") - print(datasets) - for ds in datasets: - # name of the file - csv_name = "{}_metadata_out.csv".format(ds.getName()) - print(csv_name) - - # remove the csv if it exists - for ann in ds.listAnnotations(): - if isinstance(ann, omero.gateway.FileAnnotationWrapper): - if ann.getFileName() == csv_name: - # if the name matches delete it - try: - delete = Delete2( - targetObjects={'FileAnnotation': - [ann.getId()]}) - handle = conn.c.sf.submit(delete) - conn.c.waitOnCmd( - handle, loops=10, - ms=500, failonerror=True, - failontimeout=False, closehandle=False) - print("Deleted existing csv") - except Exception as ex: - print("Failed to delete existing csv: {}".format( - ex.message)) - else: - print("No exisiting file") - - # assemble the metadata into an OrderedDict - kv_dict = OrderedDict() - for img in ds.listChildren(): - fn = img.getName() - kv_dict[fn] = get_existing_map_annotions(img) - - # attach the data - mess = attach_csv_file(conn, ds, kv_dict) - print(mess) - mess = "done" - client.setOutput("Message", rstring(mess)) - - finally: - client.closeSession() - - -if __name__ == "__main__": - run_script() diff --git a/omero/annotation_scripts/README.md b/omero/annotation_scripts/README.md index b50f9c597..87073c8ee 100644 --- a/omero/annotation_scripts/README.md +++ b/omero/annotation_scripts/README.md @@ -7,43 +7,38 @@ This is the central repository for community contributed scripts to [omero-web]( These scripts, in combination with the [omero.forms](https://pypi.org/project/omero-forms), support the bulk annotation workflow described in [this blog post](https://mpievolbio-scicomp.pages.gwdg.de/blog/post/2020-09-03_omerobulkannotation/). +For the new scripts version of 2024, you can follow this guide: +https://guide-kvpairs-scripts.readthedocs.io/en/latest/walkthrough.html + Content ------- -This repository provides five scripts: -* `01-KeyVal_from_Description.py`: Parses a Dataset/Project/Screen description and converts - key:value pairs into map annotations in the same container. -* `01-KeyVal_to_csv.py`: Converts a dataset map annotation into a table with one - record for every image in the dataset. Columns are named according to map -annotation keys. The first column contains the image filename (or id???) -* `03-KeyVal_from_csv.py`: Parses a given csv table attachment and converts each - record into a map annotation for the image identified via the entry in the -first column (filename or image id). -* `04-Remove_KeyVal.py`: Removes all map annotations from a dataset and all - contained images. -* `05-KeyVal_from_Filename.py`: Creates image map annotation by tokenizing the - filename. +This repository provides four scripts: +* `Import_from_csv.py`: Read a csv file and converts each row into a map annotation +for the identified object (image, dataset, project, run, well, plate, screen). +* `Export_to_csv.py`: Exports the map annotations of objects into a csv file. +* `Remove_KeyVal.py`: Removes the key-value pairs of an object associated with +a given namespace. +* `Convert_KeyVal_namespace.py`: Converts the namespace of map annotations. Installation --------------- The scripts must be placed in the `OMERODIR/lib/scripts/omero` directory of your omero installation, preferrentially in a seperatate subdirectory, e.g. `Bulk -Annotation/`. +Annotation/`. -`OMERODIR` -refers to the root directory of you omero server. If you followed the -installation procedures, you should have the `$OMERODIR` environment variable set. -Logged in omero admins can also use the "Upload scripts" button in the *Gears* -menu. +Follow [these instruction](https://omero.readthedocs.io/en/stable/developers/scripts/index.html#downloading-and-installing-scripts) to install/update the scripts, -After installation, the scripts will be accessible in omero web by clicking the *Gears* -icon in the menu bar. +You should also configure the Export_to_csv script so that it returns the csv file as a direct download link: +https://guide-kvpairs-scripts.readthedocs.io/en/latest/setup.html#configuring-the-export-script History -------- This repository started as a fork of [evehuis/omero-user-scripts](). Ownership was transferred to @CFGrote after merging a pull request that fixed a number of bugs and ported the original code from python2.7 to python3.x +In 2024, the scripts were reworked by Tom Boissonnet (HHU Düsseldorf) and Jens Wendt (WWU Münster) to extend the annotation to all OMERO objects, and to include a new script to convert namespaces of map annotations. + Contributions ---------------- diff --git a/omero/annotation_scripts/Remove_KeyVal.py b/omero/annotation_scripts/Remove_KeyVal.py index d2bfb47f0..e9c85a6e6 100644 --- a/omero/annotation_scripts/Remove_KeyVal.py +++ b/omero/annotation_scripts/Remove_KeyVal.py @@ -1,15 +1,13 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """ - MIF/Key_Value_remove.py" + Remove_KeyVal.py" - Remove all key-value pairs from: - * selected image(s) - * selected dataset(s) and the images contained in them - * selected screens(s) and the wells & images contained in them + Remove all key-value pairs associated with a namespace from + objects on OMERO. ----------------------------------------------------------------------------- - Copyright (C) 2018 - 2022 + Copyright (C) 2018 - 2024 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -28,128 +26,277 @@ from omero.gateway import BlitzGateway import omero -from omero.rtypes import rlong, rstring, wrap +from omero.rtypes import rlong, rstring, robject +from omero.constants.metadata import NSCLIENTMAPANNOTATION import omero.scripts as scripts -def remove_map_annotations(conn, obj): - anns = list(obj.listAnnotations()) - mapann_ids = [ann.id for ann in anns - if isinstance(ann, omero.gateway.MapAnnotationWrapper)] - if len(mapann_ids) == 0: - return 0 +CHILD_OBJECTS = { + "Project": "Dataset", + "Dataset": "Image", + "Screen": "Plate", + "Plate": "Well", + "Well": "WellSample", + "WellSample": "Image" +} - print("Map Annotation IDs to delete:", mapann_ids) - try: - conn.deleteObjects("Annotation", mapann_ids) - return 0 - except Exception: - print("Failed to delete links") - return 1 - return +ALLOWED_PARAM = { + "Project": ["Project", "Dataset", "Image"], + "Dataset": ["Dataset", "Image"], + "Image": ["Image"], + "Screen": ["Screen", "Plate", "Well", "Acquisition", "Image"], + "Plate": ["Plate", "Well", "Acquisition", "Image"], + "Well": ["Well", "Image"], + "Acquisition": ["Acquisition", "Image"], + "Tag": ["Project", "Dataset", "Image", + "Screen", "Plate", "Well", "Acquisition"] +} + +P_DTYPE = "Data_Type" # Do not change +P_IDS = "IDs" # Do not change +P_TARG_DTYPE = "Target Data_Type" +P_NAMESPACE = "Namespace (blank for default)" +P_AGREEMENT = ("I understand what I am doing and that this will result " + + "in a batch deletion of key-value pairs from the server") + + +def get_children_recursive(source_object, target_type): + if CHILD_OBJECTS[source_object.OMERO_CLASS] == target_type: + # Stop condition, we return the source_obj children + if source_object.OMERO_CLASS != "WellSample": + return source_object.listChildren() + else: + return [source_object.getImage()] + else: # Not yet the target + result = [] + for child_obj in source_object.listChildren(): + # Going down in the Hierarchy list + result.extend(get_children_recursive(child_obj, target_type)) + return result + + +def target_iterator(conn, source_object, target_type, is_tag): + if target_type == source_object.OMERO_CLASS: + target_obj_l = [source_object] + elif source_object.OMERO_CLASS == "PlateAcquisition": + # Check if there is more than one Run, otherwise + # it's equivalent to start from a plate (and faster this way) + plate_o = source_object.getParent() + wellsamp_l = get_children_recursive(plate_o, "WellSample") + if len(list(plate_o.listPlateAcquisitions())) > 1: + # Only case where we need to filter on PlateAcquisition + run_id = source_object.getId() + wellsamp_l = filter(lambda x: x._obj.plateAcquisition._id._val + == run_id, wellsamp_l) + target_obj_l = [wellsamp.getImage() for wellsamp in wellsamp_l] + elif target_type == "PlateAcquisition": + # No direct children access from a plate + if source_object.OMERO_CLASS == "Screen": + plate_l = get_children_recursive(source_object, "Plate") + elif source_object.OMERO_CLASS == "Plate": + plate_l = [source_object] + target_obj_l = [r for p in plate_l for r in p.listPlateAcquisitions()] + elif is_tag: + target_obj_l = conn.getObjectsByAnnotations(target_type, + [source_object.getId()]) + # Need that to load objects + obj_ids = [o.getId() for o in target_obj_l] + target_obj_l = list(conn.getObjects(target_type, obj_ids)) + else: + target_obj_l = get_children_recursive(source_object, + target_type) + print(f"Iterating objects from {source_object}:") + for target_obj in target_obj_l: + print(f"\t- {target_obj}") + yield target_obj -def get_objects(conn, script_params): + +def main_loop(conn, script_params): """ - File the list of objects - @param conn: Blitz Gateway connection wrapper - @param script_params: A map of the input parameters + For every object: + - Find annotations in the namespace and remove """ - # we know script_params will have "Data_Type" and "IDs" since these - # parameters are not optional - data_type = script_params["Data_Type"] - ids = script_params["IDs"] - - # data_type is 'Dataset', 'Plate' or 'Image' so we can use it directly in - objs = list(conn.getObjects(data_type, ids)) - - if len(objs) == 0: - print("No {} found for specified IDs".format(data_type)) - return - - objs_ret = [] - - if data_type == 'Dataset': - for ds in objs: - print("Processing Images from Dataset: {}".format(ds.getName())) - objs_ret.append(ds) - imgs = list(ds.listChildren()) - objs_ret.extend(imgs) - elif data_type == "Plate": - for plate in objs: - print("Processing Wells and Images from Plate:", plate.getName()) - objs_ret.append(plate) - for well in plate.listChildren(): - objs_ret.append(well) - for ws in well.listChildren(): - img = ws.getImage() - objs_ret.append(img) - else: - print("Processing Images identified by ID") - objs_ret = objs + source_type = script_params[P_DTYPE] + target_type = script_params[P_TARG_DTYPE] + source_ids = script_params[P_IDS] + namespace_l = script_params[P_NAMESPACE] - return objs_ret + nsuccess = 0 + ntotal = 0 + result_obj = None + for source_object in conn.getObjects(source_type, source_ids): + is_tag = source_type == "TagAnnotation" + for target_obj in target_iterator(conn, source_object, + target_type, is_tag): + success = remove_map_annotations(conn, target_obj, namespace_l) + if success: + nsuccess += 1 + if result_obj is None: + result_obj = target_obj -if __name__ == "__main__": + ntotal += 1 + print("\n------------------------------------\n") + message = f"Key value data deleted from {nsuccess} of {ntotal} objects" + + return message, result_obj + + +def remove_map_annotations(conn, obj, namespace_l): + mapann_ids = [] + forbidden_deletion = [] + for namespace in namespace_l: + p = {} if namespace == "*" else {"ns": namespace} + for ann in obj.listAnnotations(**p): + if isinstance(ann, omero.gateway.MapAnnotationWrapper): + if ann.canDelete(): # If not, skipping it + mapann_ids.append(ann.id) + else: + forbidden_deletion.append(ann.id) + + if len(mapann_ids) == 0: + return 0 + print(f"\tMap Annotation IDs to delete: {mapann_ids}") + if len(forbidden_deletion) > 0: + print("\tMap Annotation IDs skipped (not permitted):", + f"{forbidden_deletion}\n") + try: + conn.deleteObjects("Annotation", mapann_ids) + return 1 + except Exception: + print("Failed to delete links") + return 0 + + +def run_script(): """ The main entry point of the script, as called by the client via the scripting service, passing the required parameters. """ - data_types = wrap(['Dataset', 'Plate', 'Image']) + # Cannot add fancy layout if we want auto fill and selct of object ID + source_types = [ + rstring("Project"), rstring("Dataset"), rstring("Image"), + rstring("Screen"), rstring("Plate"), rstring("Well"), + rstring("Acquisition"), rstring("Image"), rstring("Tag"), + ] + + # Duplicate Image for UI, but not a problem for script + target_types = [ + rstring(""), rstring("Project"), + rstring("- Dataset"), rstring("-- Image"), + rstring("Screen"), rstring("- Plate"), + rstring("-- Well"), rstring("-- Acquisition"), + rstring("--- Image") + ] # Here we define the script name and description. # Good practice to put url here to give users more guidance on how to run # your script. client = scripts.client( - 'Remove_Key_Value.py', - ("Remove key-value pairs from" - " Image IDs or by the Dataset IDs.\nSee" - " http://www.openmicroscopy.org/site/support/omero5.2/developers/" - "scripts/user-guide.html for the tutorial that uses this script."), + 'Remove Key-Value pairs', + """ + Deletes key-value pairs of the selected objects. + \t + Check the guide for more information on parameters and errors: + https://guide-kvpairs-scripts.readthedocs.io/en/latest/index.html + \t + Default namespace: openmicroscopy.org/omero/client/mapAnnotation + """, # Tabs are needed to add line breaks in the HTML scripts.String( - "Data_Type", optional=False, grouping="1", - description="The data you want to work with.", values=data_types, - default="Dataset"), + P_DTYPE, optional=False, grouping="1", + description="Data type of the parent objects.", + values=source_types, default="Dataset"), scripts.List( - "IDs", optional=False, grouping="2", - description="List of Dataset IDs or Image IDs").ofType(rlong(0)), + P_IDS, optional=False, grouping="1.1", + description="IDs of the parent objects").ofType(rlong(0)), + + scripts.String( + P_TARG_DTYPE, optional=False, grouping="1.2", + description="Data type to process from the selected " + + "parent objects.", + values=target_types, default=""), - authors=["Christian Evenhuis", "MIF"], - institutions=["University of Technology Sydney"], - contact="https://forum.image.sc/tag/omero" + scripts.List( + P_NAMESPACE, optional=True, + grouping="1.3", + description="Namespace(s) of the key-value pairs to " + + "delete. Client namespace by default, " + + "'*' for all.").ofType(rstring("")), + + scripts.Bool( + P_AGREEMENT, optional=True, grouping="2", + description="Make sure that you understood the scope of " + + "what will be deleted."), + + authors=["Christian Evenhuis", "MIF", "Tom Boissonnet"], + institutions=["University of Technology Sydney", "CAi HHU"], + contact="https://forum.image.sc/tag/omero", + version="2.0.0", ) try: - script_params = {} - for key in client.getInputKeys(): - if client.getInput(key): - # unwrap rtypes to String, Integer etc - script_params[key] = client.getInput(key, unwrap=True) - - print(script_params) # handy to have inputs in the std-out log + params = parameters_parsing(client) + print("Input parameters:") + keys = [P_DTYPE, P_IDS, P_TARG_DTYPE, P_NAMESPACE] + for k in keys: + print(f"\t- {k}: {params[k]}") + print("\n####################################\n") # wrap client to use the Blitz Gateway conn = BlitzGateway(client_obj=client) + message, robj = main_loop(conn, params) + client.setOutput("Message", rstring(message)) + if robj is not None: + client.setOutput("Result", robject(robj._obj)) + except AssertionError as err: + # Display assertion errors in OMERO.web activities + client.setOutput("ERROR", rstring(err)) + raise AssertionError(str(err)) + finally: + client.closeSession() - # do the editing... - objs = get_objects(conn, script_params) - nfailed = 0 - for obj in objs: - print("Processing object:", obj) - ret = remove_map_annotations(conn, obj) - nfailed = nfailed + ret +def parameters_parsing(client): + params = {} + # Param dict with defaults for optional parameters + params[P_NAMESPACE] = [NSCLIENTMAPANNOTATION] - # now handle the result, displaying message and returning image if - # appropriate - nobjs = len(objs) - message = "Key value data deleted from {} of {} objects".format( - nobjs-nfailed, nobjs) - client.setOutput("Message", rstring(message)) + for key in client.getInputKeys(): + if client.getInput(key): + # unwrap rtypes to String, Integer etc + params[key] = client.getInput(key, unwrap=True) - finally: - client.closeSession() + assert params[P_AGREEMENT], "Please tick the box to confirm that you " +\ + "understood the risks." + + if params[P_TARG_DTYPE] == "": + params[P_TARG_DTYPE] = params[P_DTYPE] + elif " " in params[P_TARG_DTYPE]: + # Getting rid of the trailing '---' added for the UI + params[P_TARG_DTYPE] = params[P_TARG_DTYPE].split(" ")[1] + + assert params[P_TARG_DTYPE] in ALLOWED_PARAM[params[P_DTYPE]], \ + (f"{params['Target Data_Type']} is not a valid target for " + + f"{params['Data_Type']}.") + + if params[P_DTYPE] == "Tag": + params[P_DTYPE] = "TagAnnotation" + + if params[P_TARG_DTYPE] == "Acquisition": + params[P_TARG_DTYPE] = "PlateAcquisition" + + # Remove duplicate entries from namespace list + tmp = params[P_NAMESPACE] + if "*" in tmp: + tmp = ["*"] + params[P_NAMESPACE] = list(set(tmp)) + + return params + + +if __name__ == "__main__": + run_script() diff --git a/test/integration/test_annotation_scripts.py b/test/integration/test_annotation_scripts.py new file mode 100644 index 000000000..db916fff5 --- /dev/null +++ b/test/integration/test_annotation_scripts.py @@ -0,0 +1,656 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# +# Copyright (C) 2016 University of Dundee & Open Microscopy Environment. +# All rights reserved. Use is subject to license terms supplied in LICENSE.txt +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +""" + Integration test for annotation scripts. +""" + +from __future__ import print_function +import omero +from omero.gateway import BlitzGateway +from omero.model import AnnotationAnnotationLinkI, MapAnnotationI +from omero.constants.metadata import NSCLIENTMAPANNOTATION, NSINSIGHTTAGSET +from omero.rtypes import rstring, rlist, rbool, rlong +from omero.util.temp_files import create_path +import omero.scripts +from script import get_file_contents + +import pytest +from script import ScriptTest +from script import run_script + + +import_script = "/omero/annotation_scripts/Import_from_csv.py" +export_script = "/omero/annotation_scripts/Export_to_csv.py" +remove_script = "/omero/annotation_scripts/Remove_KeyVal.py" +convert_script = "/omero/annotation_scripts/Convert_KeyVal_namespace.py" + +DEFAULT_IMPORT_ARGS = { + "CSV separator": rstring("guess"), + "Columns to exclude": rlist([ + rstring(""), + rstring(""), + rstring("") + ]), + "Target ID colname": rstring("OBJECT_ID"), + "Target name colname": rstring("OBJECT_NAME"), + "Exclude empty values": rbool(False), + "Import tags": rbool(False), + "Only use personal tags": rbool(False), + "Allow tag creation": rbool(False), +} + + +def link_file_plate(client, plate, cvs_file): + conn = BlitzGateway(client_obj=client) + fa = conn.createFileAnnfromLocalFile(cvs_file, mimetype="text/csv") + assert fa is not None + assert fa.id > 0 + link = omero.model.PlateAnnotationLinkI() + link.setParent(plate) + link.setChild(omero.model.FileAnnotationI(fa.id, False)) + client.getSession().getUpdateService().saveAndReturnObject(link) + return fa + + +class TestAnnotationScripts(ScriptTest): + + @pytest.mark.parametrize('import_tag', [True, False]) + @pytest.mark.parametrize('tag_creation', [True, False]) + @pytest.mark.parametrize('ns', [ + "", NSCLIENTMAPANNOTATION, "otherNS" + ]) + @pytest.mark.parametrize('ns_in_csv', [True, False]) + def test_import(self, import_tag, tag_creation, ns, ns_in_csv): + """ + Test various import option with a simple CSV + """ + sid = super(TestAnnotationScripts, self).get_script(import_script) + assert sid > 0 + + client, user = self.new_client_and_user() + + n_well = 3 + plates = self.import_plates(client, plate_cols=n_well, plate_rows=1) + plate = plates[0] + + cvs_file = create_path("test_kvp_name", ".csv") + # create a file annotation + + ns_str = "NAMESPACE" + "".join([f";{ns}" for i in range(3)]) + with open(cvs_file.abspath(), 'w') as f: + if ns_in_csv: + f.write(ns_str + "\n") + f.write("OBJECT_NAME; key_1; key_2; key_3\n") + f.write("A1; val_A; val_B; val_C" + "\n") + f.write("A2; val_D; val_E; val_F" + "\n") + f.write("A3; val_G; val_H; val_I" + "\n") + + fa = link_file_plate(client, plate, cvs_file) + + # run the script + args = DEFAULT_IMPORT_ARGS.copy() + args["Data_Type"] = rstring("Plate") + args["IDs"] = rlist([rlong(plate.id.val)]) + args["Target Data_Type"] = rstring("-- Well") + args["File_Annotation"] = rstring(str(fa.id)) + args["Import tags"] = rbool(import_tag) + args["Allow tag creation"] = rbool(tag_creation) + if not ns_in_csv and ns != "": + args["Namespace (blank for default or from csv)"] = rstring(ns) + + msg = run_script(client, sid, args, "Message") + + conn = BlitzGateway(client_obj=client) + assert msg._val == f"Added Annotations to {n_well}/{n_well} Well(s)" + plate_o = conn.getObject("Plate", plate.id.val) + list_well = list(plate_o.listChildren()) + list_well = sorted(list_well, key=lambda w: w.getWellPos()) + + well_a1, well_a2, well_a3 = list_well + + assert well_a1.getAnnotationCounts()["MapAnnotation"] == 1 + assert well_a2.getAnnotationCounts()["MapAnnotation"] == 1 + assert well_a3.getAnnotationCounts()["MapAnnotation"] == 1 + + if ns == "": + ns = NSCLIENTMAPANNOTATION + + value = list(well_a1.listAnnotations(ns=ns))[0].getValue() + assert len(value) == 3 + assert value[0] == ("key_1", "val_A") + assert value[1] == ("key_2", "val_B") + assert value[2] == ("key_3", "val_C") + + value = list(well_a2.listAnnotations(ns=ns))[0].getValue() + assert len(value) == 3 + assert value[0] == ("key_1", "val_D") + assert value[1] == ("key_2", "val_E") + assert value[2] == ("key_3", "val_F") + + value = list(well_a3.listAnnotations(ns=ns))[0].getValue() + assert len(value) == 3 + assert value[0] == ("key_1", "val_G") + assert value[1] == ("key_2", "val_H") + assert value[2] == ("key_3", "val_I") + + @pytest.mark.parametrize('import_tag', [True, False]) + @pytest.mark.parametrize('tag_creation', [True, False]) + def test_import_tags(self, import_tag, tag_creation): + """ + Test the import of tags from a CSV with tag information + """ + sid = super(TestAnnotationScripts, self).get_script(import_script) + assert sid > 0 + + client, user = self.new_client_and_user() + conn = BlitzGateway(client_obj=client) + update = conn.getUpdateService() + + if not tag_creation: # Create the tags ahead + self.make_tag(name="tail", client=client) + self.make_tag(name="head", client=client) + self.make_tag(name="mouse", client=client) + + tagset = self.make_tag( + name="condition", ns=NSINSIGHTTAGSET, client=client + ) + tag1 = self.make_tag(name="ctrl", client=client) + tag2 = self.make_tag(name="test", client=client) + + link = AnnotationAnnotationLinkI() + link.setParent(tagset) + link.setChild(tag1) + update.saveObject(link) + tagset = conn.getObject("TagAnnotation", tagset.id.val)._obj + link = AnnotationAnnotationLinkI() + link.setParent(tagset) + link.setChild(tag2) + update.saveObject(link) + + n_well = 3 + plates = self.import_plates(client, plate_cols=n_well, plate_rows=1) + plate = plates[0] + + cvs_file = create_path("test_kvp_name", ".csv") + # create a file annotation + + with open(cvs_file.abspath(), 'w') as f: + f.write("OBJECT_NAME; key_1; tag; tag\n") + f.write("A1; val_A; ctrl[condition]; mouse,tail\n") + f.write("A2; val_B; test[condition],head;\n") + f.write("A3; val_C; ; mouse\n") + + fa = link_file_plate(client, plate, cvs_file) + + # run the script + args = DEFAULT_IMPORT_ARGS.copy() + args["Data_Type"] = rstring("Plate") + args["IDs"] = rlist([rlong(plate.id.val)]) + args["Target Data_Type"] = rstring("-- Well") + args["File_Annotation"] = rstring(str(fa.id)) + args["Import tags"] = rbool(import_tag) + args["Allow tag creation"] = rbool(tag_creation) + + msg = run_script(client, sid, args, "Message") + + assert msg._val == f"Added Annotations to {n_well}/{n_well} Well(s)" + plate_o = conn.getObject("Plate", plate.id.val) + list_well = list(plate_o.listChildren()) + list_well = sorted(list_well, key=lambda w: w.getWellPos()) + well_a1, well_a2, well_a3 = list_well + + if import_tag: + assert well_a1.getAnnotationCounts()["TagAnnotation"] == 3 + assert well_a2.getAnnotationCounts()["TagAnnotation"] == 2 + assert well_a3.getAnnotationCounts()["TagAnnotation"] == 1 + else: + assert well_a1.getAnnotationCounts()["TagAnnotation"] == 0 + assert well_a2.getAnnotationCounts()["TagAnnotation"] == 0 + assert well_a3.getAnnotationCounts()["TagAnnotation"] == 0 + + assert well_a1.getAnnotationCounts()["MapAnnotation"] == 1 + assert well_a2.getAnnotationCounts()["MapAnnotation"] == 1 + assert well_a3.getAnnotationCounts()["MapAnnotation"] == 1 + + annlist = list(well_a1.listAnnotations(ns=NSCLIENTMAPANNOTATION)) + value = annlist[0].getValue() + assert len(value) == 1 + assert value[0] == ("key_1", "val_A") + + annlist = list(well_a2.listAnnotations(ns=NSCLIENTMAPANNOTATION)) + value = annlist[0].getValue() + assert len(value) == 1 + assert value[0] == ("key_1", "val_B") + + annlist = list(well_a3.listAnnotations(ns=NSCLIENTMAPANNOTATION)) + value = annlist[0].getValue() + assert len(value) == 1 + assert value[0] == ("key_1", "val_C") + + def test_import_split(self): + """ + Test the import of KV with inner cell splitting + """ + sid = super(TestAnnotationScripts, self).get_script(import_script) + assert sid > 0 + + client, user = self.new_client_and_user() + + n_well = 3 + plates = self.import_plates(client, plate_cols=n_well, plate_rows=1) + plate = plates[0] + + cvs_file = create_path("test_kvp_name", ".csv") + # create a file annotation + with open(cvs_file.abspath(), 'w') as f: + f.write("OBJECT_NAME; key_1; key_2\n") + f.write("A1; val_A,val_B; val_C\n") + f.write("A2; val_D,val_E,val_F;\n") + f.write("A3; ; val_G,val_H\n") + + fa = link_file_plate(client, plate, cvs_file) + + # run the script + args = DEFAULT_IMPORT_ARGS.copy() + args["Data_Type"] = rstring("Plate") + args["IDs"] = rlist([rlong(plate.id.val)]) + args["Target Data_Type"] = rstring("-- Well") + args["File_Annotation"] = rstring(str(fa.id)) + args["Split values on"] = rstring(",") + args["Exclude empty values"] = rbool(False) + + msg = run_script(client, sid, args, "Message") + conn = BlitzGateway(client_obj=client) + assert msg._val == f"Added Annotations to {n_well}/{n_well} Well(s)" + plate_o = conn.getObject("Plate", plate.id.val) + list_well = list(plate_o.listChildren()) + list_well = sorted(list_well, key=lambda w: w.getWellPos()) + well_a1, well_a2, well_a3 = list_well + + assert well_a1.getAnnotationCounts()["MapAnnotation"] == 1 + assert well_a2.getAnnotationCounts()["MapAnnotation"] == 1 + assert well_a3.getAnnotationCounts()["MapAnnotation"] == 1 + + value = list(well_a1.listAnnotations())[0].getValue() + assert len(value) == 3 + assert value[0] == ("key_1", "val_A") + assert value[1] == ("key_1", "val_B") + assert value[2] == ("key_2", "val_C") + + value = list(well_a2.listAnnotations())[0].getValue() + assert len(value) == 4 + assert value[0] == ("key_1", "val_D") + assert value[1] == ("key_1", "val_E") + assert value[2] == ("key_1", "val_F") + assert value[3] == ("key_2", "") + + value = list(well_a3.listAnnotations())[0].getValue() + assert len(value) == 3 + assert value[0] == ("key_1", "") + assert value[1] == ("key_2", "val_G") + assert value[2] == ("key_2", "val_H") + + def test_import_empty(self): + """ + Test the import from a CSV with exclusion of empty cells + """ + sid = super(TestAnnotationScripts, self).get_script(import_script) + assert sid > 0 + + client, user = self.new_client_and_user() + + n_well = 3 + plates = self.import_plates(client, plate_cols=n_well, plate_rows=1) + plate = plates[0] + + cvs_file = create_path("test_kvp_name", ".csv") + # create a file annotation + with open(cvs_file.abspath(), 'w') as f: + f.write("OBJECT_NAME; key_1; key_2\n") + f.write("A1; val_A;\n") + f.write("A2; ;\n") + f.write("A3; ; val_B\n") + + fa = link_file_plate(client, plate, cvs_file) + + # run the script + args = DEFAULT_IMPORT_ARGS.copy() + args["Data_Type"] = rstring("Plate") + args["IDs"] = rlist([rlong(plate.id.val)]) + args["Target Data_Type"] = rstring("-- Well") + args["File_Annotation"] = rstring(str(fa.id)) + args["Exclude empty values"] = rbool(True) + + msg = run_script(client, sid, args, "Message") + conn = BlitzGateway(client_obj=client) + assert msg._val == f"Added Annotations to {n_well-1}/{n_well} Well(s)" + plate_o = conn.getObject("Plate", plate.id.val) + list_well = list(plate_o.listChildren()) + list_well = sorted(list_well, key=lambda w: w.getWellPos()) + well_a1, well_a2, well_a3 = list_well + + assert well_a1.getAnnotationCounts()["MapAnnotation"] == 1 + assert well_a2.getAnnotationCounts()["MapAnnotation"] == 0 + assert well_a3.getAnnotationCounts()["MapAnnotation"] == 1 + + value = list(well_a1.listAnnotations())[0].getValue() + assert len(value) == 1 + assert value[0] == ("key_1", "val_A") + + value = list(well_a3.listAnnotations())[0].getValue() + assert len(value) == 1 + assert value[0] == ("key_2", "val_B") + + def test_convert(self): + """ + Test the conversion of KV pairs namespace + """ + sid = super(TestAnnotationScripts, self).get_script(convert_script) + assert sid > 0 + + client, user = self.new_client_and_user() + conn = BlitzGateway(client_obj=client) + image = self.make_image(name="testImage", client=client) + + kv = MapAnnotationI() + kv.setMapValue([omero.model.NamedValue("key_1", "val_A")]) + kv.setNs(rstring("test")) + kv = client.sf.getUpdateService().saveAndReturnObject(kv) + self.link(image, kv, client=client) + + args = { + "Data_Type": rstring("Image"), + "IDs": rlist([rlong(image.id.val)]), + "Target Data_Type": rstring(""), + "Old Namespace (blank for default)": rlist([rstring("test")]), + "New Namespace (blank for default)": rstring("new_ns"), + "Create new and merge": rbool(False) + } + + msg = run_script(client, sid, args, "Message") + + assert msg._val == "Updated kv pairs to 1/1 Image" + + conn = BlitzGateway(client_obj=client) + image_o = conn.getObject("Image", image.id.val) + + value = list(image_o.listAnnotations(ns="new_ns"))[0].getValue() + assert len(value) == 1 + assert value[0] == ("key_1", "val_A") + + @pytest.mark.parametrize('merge', [True, False]) + def test_convert_no_merge(self, merge): + """ + Test the conversion of KV pairs namespace with different + merging options + """ + sid = super(TestAnnotationScripts, self).get_script(convert_script) + assert sid > 0 + + client, user = self.new_client_and_user() + conn = BlitzGateway(client_obj=client) + image = self.make_image(name="testImage", client=client) + + kv = MapAnnotationI() + kv.setMapValue([omero.model.NamedValue("key_1", "val_A")]) + kv.setNs(rstring("test")) + kv = client.sf.getUpdateService().saveAndReturnObject(kv) + self.link(image, kv, client=client) + + kv = MapAnnotationI() + kv.setMapValue([omero.model.NamedValue("key_2", "val_B")]) + kv.setNs(rstring("test")) + kv = client.sf.getUpdateService().saveAndReturnObject(kv) + self.link(image, kv, client=client) + + args = { + "Data_Type": rstring("Image"), + "IDs": rlist([rlong(image.id.val)]), + "Target Data_Type": rstring(""), + "Old Namespace (blank for default)": rlist([rstring("test")]), + "New Namespace (blank for default)": rstring("new_ns"), + "Create new and merge": rbool(merge) + } + + msg = run_script(client, sid, args, "Message") + + assert msg._val == "Updated kv pairs to 1/1 Image" + + conn = BlitzGateway(client_obj=client) + image_o = conn.getObject("Image", image.id.val) + + list_ann = list(image_o.listAnnotations(ns="new_ns")) + if not merge: + assert len(list_ann) == 2 + value = list_ann[0].getValue() + assert len(value) == 1 + value = list_ann[1].getValue() + assert len(value) == 1 + else: + assert len(list_ann) == 1 + value = list_ann[0].getValue() + assert len(value) == 2 + + @pytest.mark.parametrize('agree_check', [True, False]) + def test_remove(self, agree_check): + """ + Test the removal of KV pairs, and if the script fails without the + agreement checked. + """ + + agreement = ( + "I understand what I am doing and that this will result " + + "in a batch deletion of key-value pairs from the server" + ) + + sid = super(TestAnnotationScripts, self).get_script(remove_script) + assert sid > 0 + + client, user = self.new_client_and_user() + conn = BlitzGateway(client_obj=client) + image = self.make_image(name="testImage", client=client) + + kv = MapAnnotationI() + kv.setMapValue([omero.model.NamedValue("key_1", "val_A")]) + kv.setNs(rstring("test_delete")) + kv = client.sf.getUpdateService().saveAndReturnObject(kv) + self.link(image, kv, client=client) + + args = { + "Data_Type": rstring("Image"), + "IDs": rlist([rlong(image.id.val)]), + "Target Data_Type": rstring(""), + "Namespace (blank for default)": rlist([rstring("test_delete")]), + agreement: rbool(agree_check) + } + + msg = run_script(client, sid, args, "Message") + if not agree_check: # should be an AssertionError, returning None + assert msg is None + else: + assert msg._val == "Key value data deleted from 1 of 1 objects" + conn = BlitzGateway(client_obj=client) + image_o = conn.getObject("Image", image.id.val) + assert len(list(image_o.listAnnotations())) == 0 + + def test_export(self): + """ + Test the export of KV pairs into a CSV + """ + sid = super(TestAnnotationScripts, self).get_script(export_script) + assert sid > 0 + + client, user = self.new_client_and_user() + conn = BlitzGateway(client_obj=client) + image = self.make_image(name="testImage", client=client) + + kv = MapAnnotationI() + kv.setMapValue([omero.model.NamedValue("key_1", "val_A"), + omero.model.NamedValue("key_2", "val_B")]) + kv.setNs(rstring("test")) + kv = client.sf.getUpdateService().saveAndReturnObject(kv) + self.link(image, kv, client=client) + + args = { + "Data_Type": rstring("Image"), + "IDs": rlist([rlong(image.id.val)]), + "Target Data_Type": rstring(""), + "Namespace (blank for default)": rlist([rstring("test")]), + "CSV separator": rstring("TAB"), + "Include parent container names": rbool(False), + "Include namespace": rbool(False), + "Include tags": rbool(False) + } + + msg = run_script(client, sid, args, "Message") + + assert msg._val == f"The csv is attached to Image:{image.id.val}" + + conn = BlitzGateway(client_obj=client) + img_o = conn.getObject("Image", image.id.val) + + file_ann = img_o.getAnnotation(ns="KeyVal_export") + fid = file_ann.getFile().getId() + csv_text = get_file_contents(self.new_client(user=user), fid) + lines = csv_text.split("\n") + lines = lines[1:] # Ignore sep= metadata + assert len(lines) == 3 + assert lines[-1] == "" # Last empty line + key_l = lines[0].split("\t") + assert key_l[0] == "OBJECT_ID" + assert key_l[1] == "OBJECT_NAME" + assert "key_1" in key_l + assert "key_2" in key_l + + img1_l = lines[1].split("\t") + assert img1_l[0] == str(image.id.val) + assert img1_l[1] == "testImage" + assert "val_A" in img1_l + assert "val_B" in img1_l + + @pytest.mark.parametrize('same_ns', [True, False]) + def test_export_all_opt(self, same_ns): + """ + Test the export of two KV pairs into a CSV with all options checked + (namespace, parent container, tags). + """ + sid = super(TestAnnotationScripts, self).get_script(export_script) + assert sid > 0 + + client, user = self.new_client_and_user() + conn = BlitzGateway(client_obj=client) + update = conn.getUpdateService() + + # making tags + tagset = self.make_tag( + name="condition", ns=NSINSIGHTTAGSET, client=client + ) + tag1 = self.make_tag(name="ctrl", client=client) + tag2 = self.make_tag(name="test", client=client) + + link = AnnotationAnnotationLinkI() + link.setParent(tagset) + link.setChild(tag1) + update.saveObject(link) + tagset = conn.getObject("TagAnnotation", tagset.id.val)._obj + link = AnnotationAnnotationLinkI() + link.setParent(tagset) + link.setChild(tag2) + update.saveObject(link) + + image1 = self.make_image(name="testImage1", client=client) + kv = MapAnnotationI() + kv.setMapValue([omero.model.NamedValue("key_1", "val_A"), + omero.model.NamedValue("key_2", "val_B")]) + kv.setNs(rstring("test")) + kv = client.sf.getUpdateService().saveAndReturnObject(kv) + self.link(image1, kv, client=client) + self.link(image1, tag1, client=client) + + image2 = self.make_image(name="testImage2", client=client) + kv = MapAnnotationI() + kv.setMapValue([omero.model.NamedValue("key_1", "val_C"), + omero.model.NamedValue("key_2", "val_D")]) + if same_ns: + kv.setNs(rstring("test")) + else: + kv.setNs(rstring("other")) + kv = client.sf.getUpdateService().saveAndReturnObject(kv) + self.link(image2, kv, client=client) + self.link(image2, tag2, client=client) + + ns_l = [rstring("test")] + if not same_ns: + ns_l.append(rstring("other")) + + args = { + "Data_Type": rstring("Image"), + "IDs": rlist([rlong(image1.id.val), rlong(image2.id.val)]), + "Target Data_Type": rstring(""), + "Namespace (blank for default)": rlist(ns_l), + "CSV separator": rstring("TAB"), + "Include parent container names": rbool(True), + "Include namespace": rbool(True), + "Include tags": rbool(True) + } + + run_script(client, sid, args, "Message") + + conn = BlitzGateway(client_obj=client) + img1_o = conn.getObject("Image", image1.id.val) + img2_o = conn.getObject("Image", image2.id.val) + + file_ann = img1_o.getAnnotation(ns="KeyVal_export") + if file_ann is None: + file_ann = img2_o.getAnnotation(ns="KeyVal_export") + + fid = file_ann.getFile().getId() + csv_text = get_file_contents(self.new_client(user=user), fid) + lines = csv_text.split("\n") + lines = lines[1:] # Ignore sep= metadata + assert len(lines) == 5 + ns_l = lines[0].split("\t") + assert ns_l[0] == "NAMESPACE" + key_l = lines[1].split("\t") + img1_l = lines[2].split("\t") + img2_l = lines[3].split("\t") + assert len(ns_l) == len(key_l) + assert len(key_l) == len(img1_l) + assert len(img1_l) == len(img2_l) + if same_ns: + assert len(key_l) == 5 + k1_pos = key_l.index("key_1") + assert img1_l[k1_pos] == "val_A" + assert img2_l[k1_pos] == "val_C" + k2_pos = key_l.index("key_2") + assert img1_l[k2_pos] == "val_B" + assert img2_l[k2_pos] == "val_D" + else: + assert len(key_l) == 7 + ns1_pos = ns_l.index("test") + ns2_pos = ns_l.index("other") + assert img1_l[ns2_pos] == "" + assert img2_l[ns1_pos] == "" + + tag_pos = key_l.index("TAG") + assert img1_l[tag_pos] == "ctrl[condition]" + assert img2_l[tag_pos] == "test[condition]"