Merge pull request #46 from AFM-SPM/Protein

Protein
AFM-SPM · Aug 16, 2021 · 4ad2ab1 · 4ad2ab1
2 parents e0ca55a + 26de956
commit 4ad2ab1
Show file tree

Hide file tree

Showing 4 changed files with 406 additions and 82 deletions.
diff --git a/Plotting.py b/Plotting.py
@@ -0,0 +1,296 @@
+from __future__ import unicode_literals
+
+import os
+import fnmatch
+import matplotlib.pyplot as plt
+from matplotlib import cm
+import pandas as pd
+import seaborn as sns
+import numpy as np
+import scipy
+import glob
+from scipy import stats
+from cycler import cycler
+
+# Set seaborn to override matplotlib for plot output
+sns.set()
+sns.set_style("white", {'font.family': ['sans-serif']})
+# The four preset contexts, in order of relative size, are paper, notebook, talk, and poster.
+# The notebook style is the default
+# sns.set_context("notebook", font_scale=1.5)
+sns.set_context("poster", font_scale=1.4)
+# plt.style.use("dark_background")
+sns.set_palette(sns.color_palette('bright'))
+defextension = '.png'
+
+colname2label = {
+    'grain_bound_len': 'Circumference / %s',
+    'aspectratio': 'Aspect Ratio',
+    'grain_curvature1': 'Smaller Curvature',
+    'grain_curvature2': 'Larger Curvature',
+    'grain_ellipse_major': 'Ellipse Major Axis Length / %s',
+    'grain_ellipse_minor': 'Ellipse Minor Axis Length / %s',
+    'grain_half_height_area': 'Area Above Half Height / $\mathregular{%s^2}$',
+    'grain_maximum': 'Maximum Height / %s',
+    'grain_mean': 'Mean Height / %s',
+    'grain_median': 'Median Height / %s',
+    'grain_min_bound_size': 'Width / %s',
+    'grain_max_bound_size': 'Length / %s',
+    'grain_mean_radius': 'Mean Radius / %s',
+    'grain_pixel_area': 'Area / Pixels',
+    'grain_proj_area': 'Area / $\mathregular{%s^2}$'
+}
+
+
+def importfromjson(path):
+    """Importing the data needed from the json file specified by the user"""
+
+    print (path)
+    importeddata = pd.read_json(path)
+
+    return importeddata
+
+
+def savestats(path, dataframetosave):
+    print 'Saving stats for: ' + str(os.path.basename(path)[:-5]) + '_evaluated'
+
+    dataframetosave.to_json(path[:-5] + '_evaluated.json')
+    dataframetosave.to_csv(path[:-5] + '_evaluated.txt')
+
+
+def pathman(path):
+    """Splitting the path into directory and file name; creating or specifying a directory to save the plots"""
+
+    directory = os.path.dirname(path)
+    name = os.path.basename(path)[:-5]
+    savedir = os.path.join(directory, 'Plots')
+    if not os.path.exists(savedir):
+        os.makedirs(savedir)
+    plotname = os.path.join(savedir, name)
+    return plotname
+
+
+def labelunitconversion(plotarg, nm):
+    """Adding units (m or nm) to the axis labels"""
+
+    if plotarg in colname2label:
+        label = colname2label[plotarg]
+    else:
+        label = plotarg
+
+    if '%s' in label:
+        if nm is True:
+            label = label % 'nm'
+        else:
+            label = label % 'm'
+    return label
+
+
+def dataunitconversion(data, plotarg, nm):
+    """Converting the data based on the unit specified by the user. Only nm and m are supported at the moment."""
+    label = colname2label[plotarg]
+    if nm is True:
+        if '%s' in label:
+            if '^2' in label:
+                data = data*1e18
+            else:
+                data = data*1e9
+    return data
+
+
+def plotkde(df, plotarg, grouparg=None, xmin=None, xmax=None, nm=False, specpath=None, plotextension=defextension):
+    """Creating a KDE plot for the chosen variable. Grouping optional. The x axis range can be defined by the user. The
+    default unit is metre, but this can be changed to nanometre by adding 'nm=True'. The default path is the path under
+    the if __name__ == '__main__' line, but this can also be changed using the specpath argument."""
+
+    print 'Plotting kde of %s' % plotarg
+
+    # Set the name of the file
+    if specpath is None:
+        specpath = path
+    savename = os.path.join(pathman(specpath) + '_' + plotarg + '_KDE' + plotextension)
+
+    # Convert the unit of the data to nm if specified by the user
+    df[plotarg] = dataunitconversion(df[plotarg], plotarg, nm)
+
+    # Plot figure
+    fig, ax = plt.subplots(figsize=(15, 12))
+    # Simple KDE plot
+    if grouparg is None:
+        df = df[plotarg]
+        df.plot.kde(ax=ax, alpha=1, linewidth=7.0)
+    # Grouped KDE plots
+    else:
+        df = df[[grouparg, plotarg]]
+        df.groupby(grouparg)[plotarg].plot.kde(ax=ax, legend=True, alpha=1, linewidth=7.0)
+        handles, labels = ax.get_legend_handles_labels()
+        ax.legend(reversed(handles), reversed(labels), title=grouparg, loc='upper right')
+
+    # Label plot and save figure
+    plt.xlim(xmin, xmax)
+    plt.xlabel(labelunitconversion(plotarg, nm), alpha=1)
+    plt.ylabel('Probability Density', alpha=1)
+    plt.ticklabel_format(axis='both', style='sci', scilimits=(0, 0))
+    plt.savefig(savename)
+
+
+def plothist(df, plotarg, grouparg=None, xmin=None, xmax=None, bins=20, nm=False, specpath=None, plotextension=defextension):
+    """Creating a histogram for the chosen variable. Grouping optional. The x axis range can be defined by the user. The
+    default unit is metre, but this can be changed to nanometre by adding 'nm=True'. The default path is the path under
+    the if __name__ == '__main__' line, but this can also be changed using the specpath argument."""
+
+    print 'Plotting histogram of %s' % plotarg
+
+    # Set the name of the file
+    if specpath is None:
+        specpath = path
+    savename = os.path.join(pathman(specpath) + '_' + plotarg + '_histogram' + plotextension)
+
+    # Convert the unit of the data to nm if specified by the user
+    df[plotarg] = dataunitconversion(df[plotarg], plotarg, nm)
+
+    # Plot figure
+    fig, ax = plt.subplots(figsize=(15, 12))
+    # Simple histogram
+    if grouparg is None:
+        df = df[plotarg]
+        df.plot.hist(ax=ax, alpha=1, linewidth=7.0, bins=bins)
+    # Grouped histogram
+    else:
+        df = df[[grouparg, plotarg]]
+        df.groupby(grouparg)[plotarg].plot.hist(ax=ax, legend=True, alpha=1, linewidth=7.0, bins=bins)
+        handles, labels = ax.get_legend_handles_labels()
+        ax.legend(reversed(handles), reversed(labels), title=grouparg, loc='upper right')
+
+    # Label plot and save figure
+    plt.xlim(xmin, xmax)
+    plt.xlabel(labelunitconversion(plotarg, nm), alpha=1)
+    plt.ylabel('Count', alpha=1)
+    plt.ticklabel_format(axis='both', style='sci', scilimits=(0, 0))
+    plt.savefig(savename)
+
+
+def plotviolin(df, plotarg, grouparg=None, ymin=None, ymax=None, nm=False, specpath=None, plotextension=defextension):
+    """Creating a violin plot for the chosen variable. Grouping optional. The y axis range can be defined by the user.
+    The default unit is metre, but this can be changed to nanometre by adding 'nm=True'. The default path is the path
+    under the if __name__ == '__main__' line, but this can also be changed using the specpath argument."""
+
+    print 'Plotting violin of %s' % plotarg
+
+    # Set the name of the file
+    if specpath is None:
+        specpath = path
+    savename = os.path.join(pathman(specpath) + '_' + plotarg + '_violin' + plotextension)
+
+    # Plot and save figures
+    df[plotarg] = dataunitconversion(df[plotarg], plotarg, nm)
+    fig, ax = plt.subplots(figsize=(15, 12))
+    # Single violin plot
+    if grouparg is None:
+        df = df[plotarg]
+        ax = sns.violinplot(data=df)
+    # Grouped violin plot
+    else:
+        df = df[[grouparg, plotarg]]
+        ax = sns.violinplot(x=grouparg, y=plotarg, data=df)
+        ax.invert_xaxis()  # Useful for topoisomers with negative writhe
+
+    # Label plot and save figure
+    plt.ylim(ymin, ymax)
+    plt.ylabel(labelunitconversion(plotarg, nm), alpha=1)
+    plt.xlabel(grouparg)
+    plt.savefig(savename)
+
+
+def plotjoint(df, arg1, arg2, xmin=None, xmax=None, ymin=None, ymax=None, nm=False, specpath=None, plotextension=defextension):
+    """Creating a joint plot for two chosen variables. The range for both axes can be defined by the user.
+    The default unit is metre, but this can be changed to nanometre by adding 'nm=True'. The default path is the path
+    under the if __name__ == '__main__' line, but this can also be changed using the specpath argument."""
+
+    print 'Plotting joint plot for %s and %s' % (arg1, arg2)
+
+    # Set the name of the file
+    if specpath is None:
+        specpath = path
+    savename = os.path.join(pathman(specpath) + '_' + arg1 + '_and_' + arg2 + plotextension)
+
+    df[arg1] = dataunitconversion(df[arg1], arg1, nm)
+    df[arg2] = dataunitconversion(df[arg2], arg2, nm)
+
+    # Plot data using seaborn
+    sns.jointplot(arg1, arg2, data=df, kind='reg', height=15)
+    plt.xlim(xmin, xmax)
+    plt.ylim(ymin, ymax)
+    plt.xlabel(labelunitconversion(arg1, nm), alpha=1)
+    plt.ylabel(labelunitconversion(arg2, nm), alpha=1)
+    plt.savefig(savename)
+
+
+def plotLinearVsCircular(contour_lengths_df):
+    pass
+
+
+if __name__ == '__main__':
+    # Path to the json file, e.g. C:\\Users\\username\\Documents\\Data\\Data.json
+    path = 'C:\\Users\\dumin\\Documents\\PhD\\Data\\Kavit-Top1\\Non-incubation\\Non-incubation.json'
+
+    # Set the name of the json file to import here
+    # name = 'Non-incubation'
+    bins = 50
+
+    # import data form the json file specified as a dataframe
+    df = importfromjson(path)
+    # Rename directory column as appropriate
+    df = df.rename(columns={"directory": "Experimental Conditions"})
+    # Calculate the aspect ratio for each grain
+    df['aspectratio'] = df['grain_min_bound_size'] / df['grain_max_bound_size']
+    # Get list of unique directory names i.e. topoisomers
+    # # topos = df['Proteins'].unique()
+    # # topos = sorted(topos, reverse=False)
+
+    # Convert original (rounded) delta Lk to correct delta Lk
+    # # dfnew = df
+    # # dfnew['Proteins'] = df['Proteins'].astype(str).replace({'-2': '-1.8', '-3': '-2.8', '-6': '-4.9'})
+    # Get list of unique directory names i.e. topoisomers
+    # # newtopos = dfnew['Proteins']
+    # # newtopos = pd.to_numeric(newtopos, errors='ignore')
+    # # dfnew['Proteins'] = newtopos
+
+    # Obtain list of unique topoisomers
+    # # topos = df['Proteins'].unique()
+    # # topos = sorted(topos, reverse=False)
+
+    # Get statistics for different topoisoimers
+    # # allstats = df.groupby('Proteins').describe()
+    # transpose allstats dataframe to get better saving output
+    # # allstats1 = allstats.transpose()
+    # Save out statistics file
+    # # savestats(path, allstats1)
+    # Set palette for all plots with length number of topoisomers and reverse
+    # # palette = sns.color_palette('PuBu', n_colors=len(topos))
+
+# Setting group argument
+grouparg = 'Experimental Conditions'
+
+# Setting a continuous colour palette; useful for certain grouped plots, but can be commented out if unsuitable.
+sns.set_palette(sns.color_palette('BuPu', n_colors=len(df.groupby(grouparg))))
+# print df.pivot(columns=grouparg, values='grain_median')
+
+
+# Plot one column of the dataframe e.g. 'grain_mean_radius'; grouparg can be specified for plotkde, plothist and
+# plotviolin by entering e.g. 'xmin = 0'; xmin and xmax can be specified for plotkde, plothist, and plotjoint;
+# ymin and ymax can be specified for plotviolin and plotjoint; bins can be speficied for plothist. The default unit is
+# m; add "nm=True" to change from m to nm.
+
+# plotkde(df, 'grain_bound_len',  xmin=0, xmax=1e-7)
+# plotkde(df, 'grain_mean_radius')
+# plotkde(df, 'grain_proj_area', nm=True)
+# plotkde (df, 'aspectratio')
+# plotkde(df, 'grain_min_bound_size', nm=True, grouparg=grouparg)
+# plotkde(df, 'grain_max_bound_size', xmax=3.5e-8)
+# plotkde(df, 'grain_half_height_area', grouparg=grouparg, nm=True)
+# plothist(df, 'grain_min_bound_size', xmax=2.5e-8, bins=bins)
+# plothist(df, 'grain_proj_area', xmax=3e-16)
+# plothist(df, 'grain_half_height_area', grouparg=grouparg)
+plotviolin(df, "grain_proj_area", grouparg=grouparg)
+# plotjoint(df, 'grain_bound_len', 'grain_mean_radius', xmax=200, ymax=20, nm=True)