From 5ab2d30789a9d49be644ad2b15e014148e139aac Mon Sep 17 00:00:00 2001 From: "Miguel A. Ibarra-Arellano" Date: Wed, 6 Nov 2024 16:53:35 +0100 Subject: [PATCH] Prettify, remove .format in favor of f-strings. --- README.md | 9 + macsima2mc/CLI.py | 10 +- macsima2mc/illumination_corr.py | 49 ++++- macsima2mc/macsima2mc.py | 51 ++--- macsima2mc/mc_tools.py | 163 +++++++-------- macsima2mc/ome_schema.py | 171 ++++++++++------ macsima2mc/ome_writer.py | 55 +++--- macsima2mc/templates.py | 101 +++++----- macsima2mc/tools.py | 338 +++++++++++++++++++++----------- 9 files changed, 572 insertions(+), 375 deletions(-) diff --git a/README.md b/README.md index fce3571..e43e3c8 100644 --- a/README.md +++ b/README.md @@ -1 +1,10 @@ # Staging module for Miltenyi - MACSIMA to MCMICRO + + +## Docker implementation + +TODO: Add instructions on how to run the tool from Docker + +## CLI + +TODO: Add instructions on how to run the CLI \ No newline at end of file diff --git a/macsima2mc/CLI.py b/macsima2mc/CLI.py index a6b3ea9..0e837c0 100644 --- a/macsima2mc/CLI.py +++ b/macsima2mc/CLI.py @@ -1,14 +1,16 @@ import argparse import pathlib - - #---CLI-BLOCK---# -def get_args(): +def get_args(): + """ + This function parses the command line arguments and returns them as a namespace object. + + returns: namespace object with the arguments. + """ parser=argparse.ArgumentParser() #Mandatory arguments - parser.add_argument('-i', '--input', required=True, diff --git a/macsima2mc/illumination_corr.py b/macsima2mc/illumination_corr.py index 3e9bf4b..735b8eb 100644 --- a/macsima2mc/illumination_corr.py +++ b/macsima2mc/illumination_corr.py @@ -1,24 +1,55 @@ from basicpy import BaSiC import numpy as np -def indices_per_channel(total_imgs,no_of_channels): +def indices_per_channel(total_imgs, no_of_channels): + """ + This function creates a list of lists with the indices of the images in the stack per channel. + Args: + total_imgs (int): total number of images in the stack. + no_of_channels (int): number of channels. + Returns: + list: list of lists with the indices of the images in the stack per channel. + """ + #total_imgs in the stack - img_indices=[ list( range(ch,total_imgs,no_of_channels) ) for ch in range( 0, no_of_channels ) ] + img_indices = [list(range(ch,total_imgs,no_of_channels)) for ch in range(0, no_of_channels)] return img_indices + def extract_channel_imgs (stack,indices): + """ + This function extracts the images in the stack per channel. + Args: + stack (np.array): stack of images. + indices (list): list of indices of the images in the stack per channel. + Returns: + np.array: stack of images per channel. + """ + return stack[indices,:,:] + def apply_corr(uncorr_stack,no_of_channels): - corr_stack=np.zeros( uncorr_stack.shape,dtype=uncorr_stack.dtype ) - total_imgs=uncorr_stack.shape[0] - indices=indices_per_channel(total_imgs, no_of_channels) - basic = BaSiC(get_darkfield=False, smoothness_flatfield=1.0,fitting_mode = "approximate", sort_intensity = True) + """ + This function applies the BaSiC algorithm to correct the illumination in the images. + Args: + uncorr_stack (np.array): stack of uncorrected images. + no_of_channels (int): number of channels. + Returns: + np.array: stack of corrected images. + """ + + corr_stack = np.zeros( uncorr_stack.shape,dtype=uncorr_stack.dtype ) + total_imgs = uncorr_stack.shape[0] + indices = indices_per_channel(total_imgs, no_of_channels) + basic = BaSiC(get_darkfield=False, smoothness_flatfield=1.0, fitting_mode="approximate", sort_intensity=True) + for ind_list in indices: - uncorr_imgs=extract_channel_imgs(uncorr_stack, ind_list) + uncorr_imgs = extract_channel_imgs(uncorr_stack, ind_list) basic.fit(uncorr_imgs) - ffp=basic.flatfield - corr_stack[ind_list,:,:]=np.uint16( np.clip( uncorr_imgs.astype(float)/ffp ,0, 65535) ) + ffp = basic.flatfield + corr_stack[ind_list,:,:] = np.uint16(np.clip(uncorr_imgs.astype(float) / ffp, 0, 65535)) + return corr_stack diff --git a/macsima2mc/macsima2mc.py b/macsima2mc/macsima2mc.py index 084be09..ae1160e 100644 --- a/macsima2mc/macsima2mc.py +++ b/macsima2mc/macsima2mc.py @@ -3,36 +3,37 @@ import CLI import mc_tools - - #input_test_folder=Path("D:/macsima_data_samples/macsima_data_v2/8_Cycle2") #output_test_folder=Path('D:/test_folder') -def main(args): - input=args.input - output=args.output - ref=args.reference_marker - basicpy_corr=args.illumination_correction - out_folder_name=args.output_dir - - cycle_info=tools.cycle_info( input , macsima_pattern(version=2),ref_marker= ref) - cycle_info=tools.append_metadata( cycle_info ) - #cycle_info.to_csv( args.output / 'cycle_{c}_info.csv'.format(c=f'{6:03d}'), index=False ) - output_dirs=tools.create_stack( cycle_info, output , - ref_marker=ref, - hi_exp=args.hi_exposure_only, - ill_corr=basicpy_corr, - out_folder=out_folder_name - ) - [mc_tools.write_markers_file(path) for path in output_dirs] - - -if __name__ == '__main__': +def main(): + # Get arguments args = CLI.get_args() - main(args) - - + # Assign arguments to variables + input = args.input + output = args.output + ref = args.reference_marker + basicpy_corr = args.illumination_correction + out_folder_name = args.output_dir + # Get cycle info + cycle_info = tools.cycle_info(input, macsima_pattern(version=2), ref_marker= ref) + # Create stack + cycle_info = tools.append_metadata( cycle_info ) + #cycle_info.to_csv( args.output / 'cycle_{c}_info.csv'.format(c=f'{6:03d}'), index=False ) + output_dirs = tools.create_stack(cycle_info, + output, + ref_marker=ref, + hi_exp=args.hi_exposure_only, + ill_corr=basicpy_corr, + out_folder=out_folder_name) + + # Save markers file in each output directory + for path in output_dirs: + mc_tools.write_markers_file(path) + +if __name__ == '__main__': + main() diff --git a/macsima2mc/mc_tools.py b/macsima2mc/mc_tools.py index b5b68ab..4c4cc38 100644 --- a/macsima2mc/mc_tools.py +++ b/macsima2mc/mc_tools.py @@ -8,90 +8,97 @@ def flatten_list(lol): - #lol=list of lists - return [x for xs in lol - for x in xs - ] + """ + This function flattens a list of lists. + Args: + lol (list): list of lists. + Returns: + list: flattened list. + """ + return [x for xs in lol for x in xs] + def markers_file(): - - columns={'channel_number': [] , - 'cycle_number': [], - 'marker_name':[], - 'Filter':[], - 'background':[], - 'exposure':[], - 'remove':[] - } - - return columns + """ + This function creates a dictionary with the columns of the markers.csv file. + Returns: + dict: dictionary with the columns of the markers.csv file. + """ + columns = {'channel_number': [], + 'cycle_number': [], + 'marker_name':[], + 'Filter':[], + 'background':[], + 'exposure':[], + 'remove':[] + } + + return columns -def get_patterns(): - #Dictionary of tuples where first element indicates the pattern to search and - #second element is a boolean that indicates whether to transform the search output into a value or not - #the keys of this dictionary should be a subset of the keys in the markers_file function. - patterns={ - 'cycle_number':(r"cycle-(.*?)-", True), - 'marker_name': (r"markers-(.*?)-",False), - 'Filter':(r"-filters-(.*?).ome",False), - 'background':(r"-src-(.*?)-" ,False) - } +def get_patterns(): + """ + Dictionary of tuples where first element indicates the pattern to search and + second element is a boolean that indicates whether to transform the search output into a value or not + the keys of this dictionary should be a subset of the keys in the markers_file function. + Returns: + dict: dictionary with regular expressions to extract metadata from macsima filenames. + """ - return patterns + patterns = { + 'cycle_number': (r"cycle-(.*?)-", True), + 'marker_name' : (r"markers-(.*?)-",False), + 'Filter' : (r"-filters-(.*?).ome",False), + 'background' : (r"-src-(.*?)-" ,False) + } -#input=Path('D:/test_folder/rack-01-well-B01-roi-001-exp-1/raw') -#img_names=[ file.stem for file in list( input.glob('*.tif*') ) ] + return patterns -#file[ ]=flatten_list( [ m.split('__') for m in extract_values( tup[0] , file_names,number_cast=tup[1] )] ) -#file[key]=flatten_list( [ m.split('__') for m in extract_values( tup[0] , file_names,number_cast=tup[1] )] ) def write_markers_file( data_path, ref_marker='DAPI'): - img_paths=list( sorted( data_path.glob('*.tif*') ) ) - mks_file=markers_file() - patt=get_patterns() - - for img in img_paths: - img_name=[img.stem] - - cycle_no= extract_values( patt['cycle_number'][0] , img_name, number_cast=patt['cycle_number'][1] ) - background= extract_values( patt['background'][0] , img_name, number_cast=patt['background'][1] ) - - markers= extract_values( patt['marker_name'][0] , img_name ,number_cast=patt['marker_name'][1] )[0].split('__') - filters= extract_values( patt['Filter'][0] , img_name ,number_cast=patt['Filter'][1] )[0].split('__') - ome= from_tiff(img) - - - - - - if background[0]=='B': - remove=len(markers)*['TRUE'] - markers=['bg_{c}_{m}-{f}'.format( c= f'{cycle_no[0]:03d}' , m=x , f=y ) for x,y in zip(markers,filters) ] - fmt_background=len(markers)*[''] - else: - fmt_background=[] - remove=len(markers)*[''] - for x,y in zip(markers,filters): - if x==ref_marker: - fmt_background.append( '' ) - else: - fmt_background.append( 'bg_{c}_{m}-{f}'.format( c=f'{cycle_no[0]:03d}', m=x , f=y ) ) - - #fmt_background=[ 'bg_{c}_{m}-{f}'.format( c=f'{cycle_no[0]:03d}' , m=x , f=y ) for x,y in zip(markers,filters) ] - - - mks_file['cycle_number'].extend( len(markers)*cycle_no ) - mks_file['marker_name'].extend( markers ) - mks_file['Filter'].extend( filters ) - mks_file['background'].extend( fmt_background ) - mks_file['exposure'].extend( [ome.images[0].pixels.planes[ch].exposure_time for ch,_ in enumerate(markers) ] ) - mks_file['remove'].extend( remove ) - - mks_file['channel_number']=list( range( 1, 1 + len(mks_file['marker_name']) ) ) - mks_file_df=DataFrame(mks_file) - mks_file_df.to_csv( data_path.parent.absolute() / 'markers.csv' , index=False ) - return mks_file - - - + """ + This function writes the markers.csv file. + Args: + data_path (Path): path to the folder containing the images. + ref_marker (str): reference marker. + Returns: + dict: dictionary with the columns of the markers.csv file. + """ + + img_paths = list(sorted( data_path.glob('*.tif*'))) + mks_file = markers_file() + patt = get_patterns() + + for img in img_paths: + img_name = [img.stem] + cycle_no = extract_values(patt['cycle_number'][0], img_name, number_cast=patt['cycle_number'][1]) + background = extract_values(patt['background'][0], img_name, number_cast=patt['background'][1]) + markers = extract_values(patt['marker_name'][0], img_name, number_cast=patt['marker_name'][1])[0].split('__') + filters = extract_values(patt['Filter'][0], img_name, number_cast=patt['Filter'][1])[0].split('__') + ome = from_tiff(img) + + if background[0]=='B': + remove = len(markers)*['TRUE'] + markers = ['bg_{c}_{m}-{f}'.format( c= f'{cycle_no[0]:03d}' , m=x , f=y ) for x,y in zip(markers,filters) ] + fmt_background = len(markers)*[''] + else: + fmt_background = [] + remove = len(markers)*[''] + for x,y in zip(markers,filters): + if x == ref_marker: + fmt_background.append('') + else: + fmt_background.append(f'bg_{cycle_no[0]:03d}_{x}-{y}') + + mks_file['cycle_number'].extend(len(markers)*cycle_no) + mks_file['marker_name'].extend(markers) + mks_file['Filter'].extend(filters) + mks_file['background'].extend(fmt_background) + mks_file['exposure'].extend([ome.images[0].pixels.planes[ch].exposure_time for ch,_ in enumerate(markers)]) + mks_file['remove'].extend(remove) + + mks_file['channel_number'] = list(range(1, 1 + len(mks_file['marker_name']))) + mks_file_df = DataFrame(mks_file) + mks_file_df.to_csv( data_path.parent.absolute() / 'markers.csv' , index=False ) + + return mks_file diff --git a/macsima2mc/ome_schema.py b/macsima2mc/ome_schema.py index 1983dd3..0ed8966 100644 --- a/macsima2mc/ome_schema.py +++ b/macsima2mc/ome_schema.py @@ -4,25 +4,36 @@ from ome_types.model import OME,Image,Pixels,TiffData,Channel,Plane import platform -def INPUTS(frame,conformed_markers): - features=frame.columns.values - inputs={column:[] for column in features } - metadata=[ frame.loc[ (frame['marker']==marker) & (frame['filter']==filt)] for marker,filt in conformed_markers ] + +def INPUTS(frame, conformed_markers): + """ + This function creates a dictionary with the metadata of the tiles. + Args: + frame (pd.DataFrame): dataframe containing the metadata of the tiles. + conformed_markers (list): list of tuples with the name of the markers and their corresponding fluorophore. + Returns: + dict: dictionary with the metadata of the tiles. + """ + + features = frame.columns.values + inputs = {column:[] for column in features } + metadata = [frame.loc[ (frame['marker']==marker) & (frame['filter']==filt)] for marker, filt in conformed_markers] for meta in metadata: for f in features: inputs[f].append(meta[f].values[0]) return inputs - - - - -def TIFF_array(no_of_channels, - inputs={'offset':0} - ): - - TIFF=[ +def TIFF_array(no_of_channels, inputs={'offset':0}): + """ + This function creates a list of TIFFData objects. + Args: + no_of_channels (int): number of channels. + inputs (dict): dictionary with the metadata of the tiles. + Returns: + list: list of TIFFData objects. + """ + TIFF = [ TiffData( first_c=ch, ifd=n, @@ -33,9 +44,18 @@ def TIFF_array(no_of_channels, return TIFF -def PLANE_array(no_of_channels,inputs): - PLANE=[ +def PLANE_array(no_of_channels, inputs): + """ + This function creates a list of Plane objects. + Args: + no_of_channels (int): number of channels. + inputs (dict): dictionary with the metadata of the tiles. + Returns: + list: list of Plane objects. + """ + + PLANE = [ Plane( the_c=ch, the_t=0, @@ -51,81 +71,106 @@ def PLANE_array(no_of_channels,inputs): ] return PLANE - -def CHANN_array(no_of_channels,inputs): - CHANN=[ + +def CHANN_array(no_of_channels, inputs): + """ + This function creates a list of Channel objects. + Args: + no_of_channels (int): number of channels. + inputs (dict): dictionary with the metadata of the tiles. + Returns: + list: list of Channel objects. + """ + + CHANN = [ Channel( - #id=ome_types.model.simple_types.ChannelID('Channel:{y}:{x}:{marker_name}'.format(x=ch,y=100+inputs['tile'][ch],marker_name=inputs['marker'][ch] )), - id='Channel:{y}:{x}:{marker_name}'.format(x=ch,y=100+int( inputs['tile'][ch] ) ,marker_name=inputs['marker'][ch] ), - #color=ome_types.model.simple_types.Color((255,255,255)), + id=f"Channel:{100+int(inputs['tile'][ch])}:{ch}:{inputs['marker'][ch]}", # 'Channel:{y}:{x}:{marker_name}'.format(x=ch,y=100+int( inputs['tile'][ch] ) ,marker_name=inputs['marker'][ch] ) color=(255,255,255), emission_wavelength=inputs['emission_wavelenght'][ch], emission_wavelength_unit=inputs['emission_wavelenght_unit'][ch], excitation_wavelength=inputs['excitation_wavelenght'][ch], excitation_wavelength_unit=inputs['excitation_wavelenght_unit'][ch] - ) for ch in range(0,no_of_channels) ] return CHANN -def PIXELS_array(chann_block,plane_block,tiff_block,inputs): - #inputs['type'][0],#bit_depth - PIXELS=Pixels( - #id=ome_types.model.simple_types.PixelsID('Pixels:{x}'.format(x=inputs['tile'][0])), - id='Pixels:{x}'.format(x=inputs['tile'][0]), - #dimension_order=ome_types.model.pixels.DimensionOrder('XYCZT'), - dimension_order='XYCZT', - size_c=len(chann_block), - size_t=1, - size_x=inputs['size_x'][0], - size_y=inputs['size_y'][0], - size_z=1, - type=inputs['type'][0],#bit_depth - big_endian=False, - channels=chann_block, - interleaved=False, - physical_size_x=inputs['physical_size_x'][0], - physical_size_x_unit=inputs['physical_size_x_unit'][0], - physical_size_y=inputs['physical_size_y'][0], - physical_size_y_unit=inputs['physical_size_y_unit'][0], - physical_size_z=1.0, - planes=plane_block, - significant_bits=inputs['significant_bits'][0], - tiff_data_blocks=tiff_block - ) + +def PIXELS_array(chann_block, plane_block, tiff_block, inputs): + """ + This function creates a Pixels object. + Args: + chann_block (list): list of Channel objects. + plane_block (list): list of Plane objects. + tiff_block (list): list of TIFFData objects. + inputs (dict): dictionary with the metadata of the tiles. + Returns: + Pixels: Pixels object. + """ + + PIXELS = Pixels( + id=f"Pixels:{inputs['tile'][0]}", + dimension_order='XYCZT', + size_c=len(chann_block), + size_t=1, + size_x=inputs['size_x'][0], + size_y=inputs['size_y'][0], + size_z=1, + type=inputs['type'][0],#bit_depth + big_endian=False, + channels=chann_block, + interleaved=False, + physical_size_x=inputs['physical_size_x'][0], + physical_size_x_unit=inputs['physical_size_x_unit'][0], + physical_size_y=inputs['physical_size_y'][0], + physical_size_y_unit=inputs['physical_size_y_unit'][0], + physical_size_z=1.0, + planes=plane_block, + significant_bits=inputs['significant_bits'][0], + tiff_data_blocks=tiff_block + ) return PIXELS -def IMAGE_array(pixels_block,imageID): + +def IMAGE_array(pixels_block, imageID): + """ + This function creates an Image object. + Args: + pixels_block (Pixels): Pixels object. + imageID (int): identifier of the image. + Returns: + Image: Image object. + """ - IMAGE=Image( - #id=ome_types.model.simple_types.ImageID('Image:{x}'.format(x=imageID)), - id='Image:{x}'.format(x=imageID), + IMAGE = Image( + id =f'Image:{imageID}', pixels=pixels_block ) return IMAGE -def OME_metadata(image_block): - ome=OME() - ome.creator=" ".join([ome_types.__name__, +def OME_metadata(image_block): + """ + This function creates an OME object. + Args: + image_block (list): list of Image objects. + Returns: + OME: OME object. + """ + ome = OME() + ome.creator = " ".join([ome_types.__name__, ome_types.__version__, '/ python version-', platform.python_version() ] ) - ome.images=image_block - ome.uuid=uuid4().urn - ome_xml=ome_types.to_xml(ome) - - return ome,ome_xml - - - - + ome.images = image_block + ome.uuid = uuid4().urn + ome_xml = ome_types.to_xml(ome) + return ome, ome_xml diff --git a/macsima2mc/ome_writer.py b/macsima2mc/ome_writer.py index 8f0c419..db150b4 100644 --- a/macsima2mc/ome_writer.py +++ b/macsima2mc/ome_writer.py @@ -2,32 +2,29 @@ import pandas as pd -#df=pd.read_csv("D:/test_folder/cycle_006_info.csv") - -#acq_group=df.groupby(['source','rack','well','roi','exposure_level']) -#acq_index=list(acq_group.indices.keys()) -#group=acq_group.get_group( ('B', 1, 'B01', 1, 1) ) -#group.to_csv("D:/test_folder/tile_info.csv") -#conformed_markers=[('DAPI', 'DAPI'), ('Syk', 'APC'), ('Syk', 'FITC'), ('Syk', 'PE')] - -def create_ome(tile_info,conformed_markers): - grouped_tiles=tile_info.groupby(['tile']) - no_of_channels=len(conformed_markers) - tiles_counter=0 - image=[] - for tileID,frame in grouped_tiles: - metadata=schema.INPUTS(frame, conformed_markers) - tiff=schema.TIFF_array( no_of_channels, inputs={'offset':no_of_channels*tiles_counter} ) - plane=schema.PLANE_array(no_of_channels, metadata) - channel=schema.CHANN_array(no_of_channels,metadata) - pixels=schema.PIXELS_array(channel,plane,tiff,metadata) - image.append( schema.IMAGE_array (pixels ,tiles_counter) ) - tiles_counter+=1 - - ome,ome_xml=schema.OME_metadata(image) - - return ome,ome_xml - -#create_ome(group,conformed_markers) - - +def create_ome(tile_info, + conformed_markers): + """ + This function creates an OME-XML file from a pandas dataframe containing the metadata of the tiles. + Args: + tile_info (pd.DataFrame): dataframe containing the metadata of the tiles. + conformed_markers (list): list of tuples with the name of the markers and their corresponding fluorophore. + Returns: + str: OME-XML file. + """ + + grouped_tiles = tile_info.groupby(['tile']) + no_of_channels = len(conformed_markers) + tiles_counter = 0 + image = [] + for tileID, frame in grouped_tiles: + metadata = schema.INPUTS(frame, conformed_markers) + tiff = schema.TIFF_array(no_of_channels, inputs={'offset': no_of_channels * tiles_counter}) + plane = schema.PLANE_array(no_of_channels, metadata) + channel = schema.CHANN_array(no_of_channels, metadata) + pixels = schema.PIXELS_array(channel, plane, tiff, metadata) + image.append(schema.IMAGE_array (pixels, tiles_counter)) + tiles_counter += 1 + ome, ome_xml = schema.OME_metadata(image) + + return ome, ome_xml diff --git a/macsima2mc/templates.py b/macsima2mc/templates.py index 8c066eb..b4298c3 100644 --- a/macsima2mc/templates.py +++ b/macsima2mc/templates.py @@ -1,72 +1,61 @@ import re - -#img_info= -#{ -# 'name':'', #string -# 'device':'', #string -# 'no_channels':[], #list -# 'markers':[], -# 'filters':[], -# 'exposure_times':exposure_per_marker, -# 'xy_img_size_pix':(width,height), -# 'pix_size':tile_data['pixel_size'], -# 'pix_units':tile_data['pixel_units'], -# 'bit_depth':tile_data['bit_depth'], -# 'sig_bits':tile_data['sig_bits']. -# 'tile_positions': -# -#} - def info_dic(target_pattern): - ''' - creates a dictionary with keys mirroring the keys in the - target_pattern dictionary. The value of each key is an empty string. - inputs: - -target_pattern[dic]: dictionary taken from the macsima_pattern function. - outputs: - template[dic]: template with keys from macsima_pattern plus 2 extra keys, - full path of image and image name. + """ + creates a dictionary with keys mirroring the keys in the target_pattern dictionary. + The value of each key is an empty string. + Args: + target_pattern (dict): dictionary taken from the macsima_pattern function. + Returns: + dict: template with keys from macsima_pattern plus 2 extra keys, full path of image and image name. + """ - ''' - - template={} - template['full_path']='' - template['img_name']='' + # Initialize dictionary + template = { + 'full_path': '', + 'img_name': '' + } for key in target_pattern: - template[key]='' + template[key] = '' return template + def macsima_pattern(version=1): + """ + Returns a dictionary with regular expressions to extract metadata from macsima filenames. + Args: + version (int): version of the macsima filenames. Default is 1. + Returns: + dict: dictionary with regular expressions to extract metadata from macsima filenames. + """ - if version==1: - - pattern={ - "cycle" : r"(.*?)_(.*?)Cycle", - "source": r"_(.*?)Cycle", - "rack" : r"_R-(\d+)", - "well" : r"_W-(\d+)", - "roi" : r"_G-(\d+)", - "tile" : r"_F-(\d+)", - "exposure_time":r"_E-(\d+)", - "marker": r"Cycle_(.*?)_", - "filter": r".*_([^_]*)_\d+bit" - } + if version == 1: + pattern = { + "cycle" : r"(.*?)_(.*?)Cycle", + "source" : r"_(.*?)Cycle", + "rack" : r"_R-(\d+)", + "well" : r"_W-(\d+)", + "roi" : r"_G-(\d+)", + "tile" : r"_F-(\d+)", + "exposure_time" : r"_E-(\d+)", + "marker" : r"Cycle_(.*?)_", + "filter" : r".*_([^_]*)_\d+bit" + } elif version==2: - pattern={ - "cycle": r"CYC-(\d+)", - "source": r"_ST-(.*?)_", - "rack": r"_R-(\d+)", - "well": r"_W-(.*?\d+)", - "roi": r"_ROI-(\d+)", - "tile": r"_F-(\d+)", - "exposure_time": r"_EXP-(\d+(?:\.\d+)?)", - "marker": r"_A-(.*?)_", - "filter": r"_D-(.*?)_" + pattern = { + "cycle" : r"CYC-(\d+)", + "source" : r"_ST-(.*?)_", + "rack" : r"_R-(\d+)", + "well" : r"_W-(.*?\d+)", + "roi" : r"_ROI-(\d+)", + "tile" : r"_F-(\d+)", + "exposure_time" : r"_EXP-(\d+(?:\.\d+)?)", + "marker" : r"_A-(.*?)_", + "filter" : r"_D-(.*?)_" } else: @@ -74,5 +63,3 @@ def macsima_pattern(version=1): "version argument should be 1 or 2" ) return pattern - - diff --git a/macsima2mc/tools.py b/macsima2mc/tools.py index dc48e1c..9ec116d 100644 --- a/macsima2mc/tools.py +++ b/macsima2mc/tools.py @@ -1,3 +1,5 @@ +from tkinter.font import names + from templates import info_dic import re import pandas as pd @@ -10,6 +12,13 @@ def merge_dicts(list_of_dicts): + """ + This function merges a list of dictionaries into a single dictionary where the values are stored in lists. + Args: + list_of_dicts (list): list of dictionaries to merge + Returns: + merged_dict (dict): dictionary with the values stored in lists + """ merged_dict = {} for d in list_of_dicts: for key, value in d.items(): @@ -19,7 +28,18 @@ def merge_dicts(list_of_dicts): merged_dict[key] = [value] return merged_dict -def extract_values(target_pattern, strings,number_cast=True): +def extract_values(target_pattern, + strings, + number_cast=True): + """ + This function extracts the values from a list of strings using a regular expression pattern. + Args: + target_pattern (str): regular expression pattern + strings (list): list of strings to extract the values from + number_cast (bool): if True, the extracted values are cast to integers + Returns: + list: list of extracted values + """ return [ (int(m.group(1)) if number_cast else m.group(1)) if (m := re.search(target_pattern, s)) @@ -29,7 +49,13 @@ def extract_values(target_pattern, strings,number_cast=True): def extract_metadata(tile_abs_path): - + """ + This function extracts the metadata from a tiff file using the ome-xml format. + Args: + tile_abs_path (Path): full path to the tiff file + Returns: + dict: dictionary with the metadata extracted from the tiff file using the ome-xml format. + """ with tifff.TiffFile(tile_abs_path) as tif: metadata = tif.ome_metadata @@ -53,188 +79,280 @@ def extract_metadata(tile_abs_path): "excitation_wavelenght_unit":ome.Channel["ExcitationWavelengthUnit"] } +def cycle_info(cycle_path, + platform_pattern, + ref_marker='DAPI'): + """ + This function reads the images produced by the MACSima device and returns the acquisition information + specified in the image name. + Args: + cycle_path (Path): full path to the cycle folder + platform_pattern (dict): dictionary with the pattern to search in the image name. + ref_marker (str): marker of reference used for registration -def cycle_info(cycle_path, platform_pattern,ref_marker= 'DAPI'): - ''' - This function reads the images produced by the MACSima device and returns the acquistion information - specified in the image name. - inputs: - -cycle_path[Path]= full path to the cycle folder - -ref[str]=marker of reference used for registration - -source[str]= valid values 'Antigen' or 'Bleach' - -dir_version[int]=version of the macsima folder and file naming structure. Valid values are 1 or 2. - E.g. version_1 (001_AntigenCycle_DAPI_V0_DAPI_16bit_M-20x-S Fluor full sensor_B-1_R-2_W-2_G-1_F-30_E-16.0.tif) and - version_2 (CYC-001_SCN-001_ST-B_R-01_W-B01_ROI-001_F-001_A-Syk_C-_D-FITC_EXP-17.5781.tif) - output: - -info[dict]=dictionary with acquisition information, ROI, rack, exposure time etc. - - ''' + Returns: + df (pd.DataFrame): dataframe with the acquisition information, ROI, rack, exposure time etc. + """ full_image_paths = list(cycle_path.glob("*.tif")) file_names = [x.name for x in full_image_paths] info=info_dic(platform_pattern) - info['full_path']=full_image_paths - info['img_name']=file_names + info['full_path'] = full_image_paths + info['img_name'] = file_names for feat,value in platform_pattern.items(): - info[feat]=extract_values(target_pattern=value, strings=file_names,number_cast=False) - df=pd.DataFrame(info) - df.loc[df['filter']==ref_marker,'marker']=ref_marker + df = pd.DataFrame(info) + df.loc[df['filter']==ref_marker,'marker'] = ref_marker df.insert(loc=df.shape[1], column="exposure_level", value=0) df["exposure_time"] = df["exposure_time"].astype(float) - df["exposure_level"] = ( df.groupby(["source","marker","filter"])["exposure_time"].rank(method="dense")).astype(int) - + df["exposure_level"] = ( df.groupby(["source","marker","filter"])["exposure_time"].rank(method="dense")).astype(int) return df def append_metadata(cycle_info_df): - + """ + This function appends the metadata extracted from the tiff files to the cycle_info dataframe. + Args: + cycle_info_df (pd.DataFrame): dataframe with the acquisition information + Returns: + pd.DataFrame: dataframe with the metadata appended to the cycle_info dataframe as new columns. + """ pos=list( map(extract_metadata, cycle_info_df['full_path'].values) ) - for key,val in merge_dicts(pos).items(): + for key, val in merge_dicts(pos).items(): cycle_info_df.insert(loc=cycle_info_df.shape[1], column=key, value=val) return cycle_info_df -def conform_markers(mf_tuple,ref_marker='DAPI'): - markers=[tup for tup in mf_tuple if tup[0]!=ref_marker] +def conform_markers(mf_tuple, + ref_marker='DAPI'): + """ + This function reorders the markers in the mf_tuple so that the reference marker is the first element. + Args: + mf_tuple (tuple): tuple with the markers and filters + ref_marker (str): reference marker used for registration + Returns: + list: list with the markers and filters reordered so that the reference marker is the first element. + """ + + markers = [tup for tup in mf_tuple if tup[0]!=ref_marker] markers.insert(0,(ref_marker,ref_marker)) return markers -def any_ref(mf_tuple,ref_marker='DAPI'): - exist_ref=False +def any_ref(mf_tuple, + ref_marker='DAPI'): + """ + This function checks if the reference marker is present in the mf_tuple. + Args: + mf_tuple (tuple): tuple with the markers and filters + ref_marker (str): reference marker used for registration + Returns: + bool: True if the reference marker is present in the mf_tuple, False otherwise. + """ + + exist_ref = False for m in mf_tuple: - if m[0]==ref_marker: - exist_ref=True + if m[0] == ref_marker: + exist_ref = True break return exist_ref - -def init_stack(ref_tile_index,groupby_obj,marker_filter_map): - ref_tile=groupby_obj.get_group((ref_tile_index,)) - total_tiles=len(groupby_obj) - width=ref_tile.size_x.values[0] - height=ref_tile.size_y.values[0] - depth=total_tiles*len(marker_filter_map) - stack=np.zeros( (depth,int(height),int(width)) ,dtype=ref_tile.type.values[0] ) +def init_stack(ref_tile_index, + groupby_obj, + marker_filter_map): + """ + This function initializes the stack array with the dimensions of the tiles. + Args: + ref_tile_index (int): index of the reference tile + groupby_obj (pd.DataFrame.groupby): groupby object with the tiles + marker_filter_map (list): list with the markers and filters + Returns: + np.ndarray: array with the dimensions of the stack array (depth, height, width) and the dtype of the + reference tile. + """ + ref_tile = groupby_obj.get_group((ref_tile_index,)) + total_tiles = len(groupby_obj) + width = ref_tile.size_x.values[0] + height = ref_tile.size_y.values[0] + depth = total_tiles*len(marker_filter_map) + stack = np.zeros( (depth,int(height),int(width)), dtype=ref_tile.type.values[0]) return stack -def cast_stack_name(cycle_no,acq_group_index,marker_filter_map): - #acq_group_index('source','rack','well','roi','exposure_level') +def cast_stack_name(cycle_no, + acq_group_index, + marker_filter_map): + """ + This function creates the name of the stack file. + Args: + cycle_no (int): cycle number + acq_group_index (tuple): tuple with the acquisition information + marker_filter_map (list): list with the markers and filters + Returns: + str: name of the stack file. + """ markers='__'.join([element[0] for element in marker_filter_map ]) filters='__'.join([element[1] for element in marker_filter_map ]) - cycle_no=int(cycle_no) - - name='cycle-{C}-src-{S}-rack-{R}-well-{W}-roi-{ROI}-exp-{E}-markers-{M}-filters-{F}.{img_format}'.format( - C=f'{cycle_no:03d}', - S=acq_group_index[0], - E=acq_group_index[4], - R=acq_group_index[1], - W=acq_group_index[2], - ROI=acq_group_index[3], - M=markers, - F=filters, - img_format='ome.tiff' - ) + cycle_no = int(cycle_no) + + c = f'{cycle_no:03d}' + s = acq_group_index[0] + e = acq_group_index[4] + r = acq_group_index[1] + w = acq_group_index[2] + roi = acq_group_index[3] + m = markers + f = filters + img_format = 'ome.tiff' + + # Nicer way to format strings + name = f'cycle-{c}-src-{s}-rack-{r}-well-{w}-roi-{roi}-exp-{e}-markers-{m}-filters-{f}.{img_format}' return name + def cast_outdir_name(tup): - #tuple('source','rack','well','roi','exposure_level']) - name='rack-{R}-well-{W}-roi-{ROI}-exp-{E}'.format( - R=tup[1], - W=tup[2], - ROI=tup[3], - E=tup[4] - ) + """ + This function creates the name of the output directory. + Args: + tup (tuple): tuple with the acquisition information + Returns: + str: name of the output directory. + """ + r = tup[1] + w = tup[2] + roi = tup[3] + e = tup[4] + + # Nicer way to format strings + name = f'rack-{r}-well-{w}-roi-{roi}-exp-{e}' return name -def outputs_dic(): - out={'index':[], +def outputs_dic(): + """ + This function initializes the dictionary used to store the outputs of the create_stack function. + Returns: + dict: dictionary with the keys 'index', 'array', 'full_path', 'ome' and empty lists as values + """ + + out={ + 'index':[], 'array':[], 'full_path':[], 'ome':[], - } return out -def select_by_exposure(list_indices,exp_index=4,target='max'): - selected_indices=[] - df_aux=pd.DataFrame( np.row_stack(list_indices) ) - group_by_indices=np.setdiff1d( range(0, len(list_indices[0]) ), exp_index ).tolist() - for key,frame in df_aux.groupby( group_by_indices ): - if target=='max': +def select_by_exposure(list_indices, + exp_index=4, + target='max'): + """ + This function selects the indices with the maximum or minimum exposure time. + Args: + list_indices (list): list of indices + exp_index (int): index of the exposure time + target (str): 'max' or 'min' + Returns: + list: list of selected indices + """ + selected_indices = [] + df_aux = pd.DataFrame( np.row_stack(list_indices) ) + group_by_indices = np.setdiff1d( range(0, len(list_indices[0]) ), exp_index ).tolist() + + for key, frame in df_aux.groupby( group_by_indices ): + if target == 'max': selected_indices.append( key + ( int(frame[exp_index].max() ), ) ) - elif target=='min': + elif target == 'min': selected_indices.append( key + ( int( frame[exp_index].min()), ) ) return selected_indices -def create_stack(cycle_info_df,output_dir,ref_marker='DAPI',hi_exp=False,ill_corr=False,out_folder='raw',extended_outputs=False): + +def create_stack(cycle_info_df, + output_dir, + ref_marker='DAPI', + hi_exp=False, + ill_corr=False, + out_folder='raw', + extended_outputs=False): + """ + This function creates the stack of images from the cycle_info dataframe. + Args: + cycle_info_df (pd.DataFrame): dataframe with the acquisition information + output_dir (Path): full path to the output directory + ref_marker (str): reference marker used for registration + hi_exp (bool): if True, only the tiles with the highest exposure time are selected + ill_corr (bool): if True, the illumination correction is applied + out_folder (str): name of the output folder + extended_outputs (bool): if True, the function returns a dictionary with the stack arrays, full paths and ome-xml metadata + Returns: + np.ndarray or list: stack array or list with the full paths of the stack files created in the output directory. + """ if extended_outputs: - out=outputs_dic() + out = outputs_dic() else: - out={'output_paths':[]} + out = {'output_paths':[]} - acq_group=cycle_info_df.groupby(['source','rack','well','roi','exposure_level']) - acq_index=list( acq_group.indices.keys() ) + acq_group = cycle_info_df.groupby(['source','rack','well','roi','exposure_level']) + acq_index = list( acq_group.indices.keys() ) if hi_exp: - acq_index=select_by_exposure(acq_index) + acq_index = select_by_exposure(acq_index) for index in acq_index: - stack_output_dir=output_dir / cast_outdir_name(index) / out_folder - ( stack_output_dir ).mkdir(parents=True, exist_ok=True) - group=acq_group.get_group(index) - #use tile 1 as reference to determine the heigh and width of the tiles - tile_no=group.tile.values - ref_tile=group.groupby(['tile']).get_group((tile_no[0],)) - marker_filter_map=list(ref_tile.groupby(["marker","filter"]).indices.keys()) - exist_ref=any_ref(marker_filter_map,ref_marker) + stack_output_dir = output_dir / cast_outdir_name(index) / out_folder + stack_output_dir.mkdir(parents=True, exist_ok=True) + group = acq_group.get_group(index) + + #use tile 1 as reference to determine the height and width of the tiles + tile_no = group.tile.values + ref_tile = group.groupby(['tile']).get_group((tile_no[0],)) + marker_filter_map = list(ref_tile.groupby(["marker","filter"]).indices.keys()) + exist_ref = any_ref(marker_filter_map,ref_marker) if not exist_ref: - index_aux=list(index) - index_aux[-1]=1 - index_aux=tuple(index_aux) - aux_group=acq_group.get_group(index_aux) - aux_group=aux_group.loc[aux_group['marker']==ref_marker] - group=pd.concat( [group,aux_group] ) + index_aux = list(index) + index_aux[-1] = 1 + index_aux = tuple(index_aux) + aux_group = acq_group.get_group(index_aux) + aux_group = aux_group.loc[aux_group['marker']==ref_marker] + group = pd.concat([group, aux_group]) #group.to_csv(stack_output_dir.parent.absolute() /'info.csv' ) - groups_of_tiles=group.groupby(['tile']) - conformed_markers =conform_markers(marker_filter_map,ref_marker) - stack=init_stack(tile_no[0],groups_of_tiles,conformed_markers) - ome=ome_writer.create_ome(group,conformed_markers) - counter=0 - for tile_no,frame in groups_of_tiles: - for marker,filter in conformed_markers: - target_path=frame.loc[ (frame['marker']==marker) & (frame['filter']==filter) ].full_path.values[0] - stack[counter,:,:]=tifff.imread(Path(target_path)) - counter+=1 - stack_name =cast_stack_name(frame.cycle.iloc[0],index,conformed_markers) + groups_of_tiles = group.groupby(['tile']) + conformed_markers = conform_markers(marker_filter_map, ref_marker) + stack = init_stack(tile_no[0], groups_of_tiles, conformed_markers) + ome = ome_writer.create_ome(group, conformed_markers) + counter = 0 + + for tile_no, frame in groups_of_tiles: + for marker, filter in conformed_markers: + target_path = frame.loc[ (frame['marker']==marker) & (frame['filter']==filter) ].full_path.values[0] + stack[counter,:,:] = tifff.imread(Path(target_path)) + counter += 1 + stack_name = cast_stack_name(frame.cycle.iloc[0], index, conformed_markers) if ill_corr: - tag='corr_' - no_of_channels=len(conformed_markers) - stack=illumination_corr.apply_corr(stack,no_of_channels) + tag = 'corr_' + no_of_channels = len(conformed_markers) + stack = illumination_corr.apply_corr(stack,no_of_channels) else: - tag='' + tag = '' - stack_file_path= stack_output_dir/ '{prefix}{base}'.format(prefix=tag,base=stack_name) + stack_file_path = stack_output_dir/ f'{tag}{stack_name}' if extended_outputs: out['index'].append(index) @@ -242,9 +360,9 @@ def create_stack(cycle_info_df,output_dir,ref_marker='DAPI',hi_exp=False,ill_cor out['full_path'].append(stack_full_path) out['ome'].append(ome) else: - out['output_paths'].append( stack_output_dir ) + out['output_paths'].append(stack_output_dir) tifff.imwrite( stack_file_path , stack, photometric='minisblack' ) - ome,ome_xml=ome_writer.create_ome(group,conformed_markers) + ome,ome_xml = ome_writer.create_ome(group, conformed_markers) tifff.tiffcomment(stack_file_path, ome_xml) if extended_outputs: