diff --git a/autostreamtree/functions.py b/autostreamtree/functions.py index 84d8983..a7e4bd2 100644 --- a/autostreamtree/functions.py +++ b/autostreamtree/functions.py @@ -249,7 +249,7 @@ def parse_subgraph_from_points(params, point_coords, pop_coords, G): else: print( "NOTE: Not over-writing existing network. To change this, use", - "--overwrite" + "--overwrite" ) network_plot = str(params.out) + ".subGraph.pdf" @@ -440,7 +440,7 @@ def parse_input_genmat(params, inmat, point_coords, popmap, seqs=None): del inmat elif params.geopop or params.clusterpop: if (len(inmat.columns)) != len(point_coords): - print("Found", str(len(inmat.columns)), + print("Found", str(len(inmat.columns)), "columns in provided matrix. This doesn't match number", "of individuals.") print("When using --geopop or --clusterpop, the provided", @@ -1233,7 +1233,7 @@ def extract_minimal_existing(subgraph, graph, nodelist, id_col, dist_col, graph (NetworkX Graph): The input graph. nodelist (list): The list of nodes. id_col (str): The column name for edge ID. - dist_col (str): Column name for dist attribute + dist_col (str): Column name for dist attribute path (list): The path between nodes. """ curr_edge = {id_col: list(), dist_col: 0} @@ -1285,7 +1285,7 @@ def write_geodataframe(gdf, output_prefix, output_driver): extension = { "SHP": ".shp", "GPKG": ".gpkg", - "GDB": ".gdb" + "GDB": ".gdb" }.get(output_driver.upper(), ".gpkg") # Default to .gpkg output_path = f"{output_prefix}{extension}" diff --git a/autostreamtree/genetic_distances.py b/autostreamtree/genetic_distances.py index 36c224a..5f726f3 100644 --- a/autostreamtree/genetic_distances.py +++ b/autostreamtree/genetic_distances.py @@ -184,7 +184,7 @@ def get_genmat(dist, points, seqs, ploidy, het, loc_agg): genmat[:] = np.nan # NOT USED CURRENTLY - # for models which relax equal nuc frequencies, get global frequencies + # for models which relax equal nuc frequencies, get global frequencies # for each locus # freqs will be a list of loci, with each locus as a dist of freqs # if dist in ["TN84", "TN93"]: @@ -459,19 +459,15 @@ def hamming_distance(seq1, seq2): # e.g. ['A/A', 'A/B', 'B/B', ...]. # s2 (List[str]): A list of phased genotypes from population 2, # e.g. ['A/A', 'A/C', 'C/C', ...]. - # Returns: # float: The Nei's 1983 Da estimator. - # """ # # Clean the input lists by removing individuals with unknown or gap # alleles. # s1 = clean_list(s1, ["n", "?", "-", "N"]) # s2 = clean_list(s2, ["n", "?", "-", "N"]) - # # Get the list of unique alleles from both populations. # uniques = uniq_alleles(s1+s2) - # # Compute the sum of squared roots of frequencies for each allele. # sumSqRt = 0.0 # for allele in uniques: @@ -499,11 +495,9 @@ def hamming_distance(seq1, seq2): # """ # # Get unique alleles in the two populations # uniques = seq.uniq_alleles(s1+s2) - # # Clean the sequences by removing any unknown or gap alleles # s1 = clean_list(s1, ["n", "?", "-", "N"]) # s2 = clean_list(s2, ["n", "?", "-", "N"]) - # # Calculate Euclidean distance # sumSq = 0.0 # for allele in uniques: @@ -789,4 +783,3 @@ def uniq_alleles(s): set: A set of unique alleles. """ return set(sum([x.split("/") for x in s], [])) - diff --git a/autostreamtree/params.py b/autostreamtree/params.py index b2bd0e7..8447ff4 100644 --- a/autostreamtree/params.py +++ b/autostreamtree/params.py @@ -17,7 +17,7 @@ def __init__(self): "loc_agg=", "pop_agg=", "sdist_agg=", "clusterpop", "epsilon=", "min_samples=", "sclusterpop", "network=", "overwrite", "reachid_col=", "length_col=", "coercemat", - "locmatdir=", "vcf=", "concat=", "edge_list=", "gdf_out=", + "locmatdir=", "vcf=", "concat=", "edge_list=", "gdf_out=", "seed="] ) except getopt.GetoptError as err: