Trials.Rmd

---
jupyter:
  jupytext:
    formats: ipynb,Rmd
    text_representation:
      extension: .Rmd
      format_name: rmarkdown
      format_version: '1.2'
      jupytext_version: 1.7.1
  kernelspec:
    display_name: Python 3
    language: python
    name: python3
---

```{python}
import drosophila_utils
import powerlaw 

# Network analysis
import networkx as nx
from scipy import sparse
from fast_pagerank import pagerank_power
# Data handling
import pandas as pd
# Visualization
import matplotlib
import matplotlib.pyplot as plt
#import igraph as ig
#import cairocffi as cairo
# Miscellaneous
import time
import os
import numpy as np
from tqdm import tqdm
```

```{python}
# False -> avoid running time-consuming cells
PATIENCE = True
```

## Importing datasets
- neurons_dataframe contains neuron number (body_id). The body_id is an [unique identifier](https://en.wikipedia.org/wiki/Unique_identifier#:~:text=A%20unique%20identifier%20(UID)%20is,with%20an%20atomic%20data%20type.). It also contains the neuron cell type (type) and the neuron unique name (instance).
- synapses_dataframe contains the sparse connectivity matrix of the network;
- roi_dataframe contains also the region of interest of the connection, but some of them are repeated, i.e. we have the same couple of connecting neurons in different regions (maybe they are not sure)

https://asajadi.github.io/fast-pagerank/

```{python}
neurons_dataframe, synapses_dataframe, roi_dataframe = utils.read_datasets()
```

```{python}
neurons_dataframe.head()
```

```{python}

print(synapses_dataframe.head())
print(f"Number of unique synapses: {synapses_dataframe['synaps'].nunique()}")
print(f"Length of synapses dataset: {len(synapses_dataframe)}")
# NOTE: the weight of the synapses dataset is the sum of all the weights from the ROI
```

```{python}

```

#### Hemibrain regions of interest

```{python}


# sanity check
assert roi_dataframe['synaps'].nunique() == len(roi_dataframe)
```

```{python}
# key is superset, value is list of roi
roi_supersets = {
    'AL': ['AL(L)', 'AL(R)'],
    'CX': ['FB', 'EB', 'PB', 'NO', 'AB(L)', 'AB(R)'],
    'GNG': ['GNG'],
    'INP': ['CRE(L)', 'CRE(R)', 'SCL(L)', 'SCL(R)', 'IB', 'ICL(L)', 'ICL(R)', 'ATL(L)', 'ATL(R)', 'IB'],
    'LH': ['LH(R)'],
    'LX': ['BU(L)', 'BU(R)','LAL(L)', 'LAL(R)'],
    'MB': ['CA(R)','PED(R)',"a'L(L)", "a'L(R)", 'aL(L)', 'aL(R)', "b'L(L)", "b'L(R)", 'bL(L)', 'bL(R)', 'gL(L)', 'gL(R)'],
    'OL': ['AME(R)','ME(R)','LO(R)', 'LOP(R)'],
    'PENP': ['SAD','CAN(R)','FLA(R)'],
    'SNP': ['SIP(L)', 'SIP(R)', 'SLP(R)', 'SMP(L)', 'SMP(R)', 'SPS(L)', 'SPS(R)'],
    'VLNP': ['AOTU(R)','AVLP(R)','PVLP(R)','PLP(R)','WED(R)'],
    'VMNP': ['VES(L)', 'VES(R)','EPA(L)', 'EPA(R)','GOR(L)', 'GOR(R)','SPS(L)', 'SPS(R)','IPS(R)'],
    'NotPrimary': ['NotPrimary']
}

#roi_dataframe['superset'] = utils.map_roi_to_superset(roi_dataframe['roi'].tolist(), roi_supersets)
roi_dataframe.head()
```

```{python}
# sanity check
roi = list(roi_dataframe['roi'].unique())
removed = []
for elem in roi:
    for key, value in roi_supersets.items():
        if elem in value and elem not in removed:
            removed.append(elem)
remains = [x for x in roi if x not in removed]
print(f"Non-included sections: {remains}")

```

## Graph

```{python}
# neuron type is attribute of the neuron in the graph, we directly extract it from the dataframe
nodes = neurons_dataframe[['bodyId', 'type']]
nodes_attr_dict = nodes.set_index('bodyId')['type'].to_dict()

links = roi_dataframe[['synaps', 'roi', 'superset', 'weight']]
links_attr_dict_1 = links.set_index('synaps')['roi'].to_dict()
links_attr_dict_2 = links.set_index('synaps')['superset'].to_dict()
links_attr_dict_3 = links.set_index('synaps')['weight'].to_dict()
```

```{python}

print((roi_dataframe['synaps'][0]))
print(type(roi_dataframe['synaps'][0]))
```

```{python}

graph = nx.Graph()

graph.add_nodes_from(neurons_dataframe['bodyId'])
nx.set_node_attributes(graph, nodes_attr_dict, 'type')

graph.add_edges_from(roi_dataframe['synaps'])
nx.set_edge_attributes(graph, links_attr_dict_1, 'roi')
nx.set_edge_attributes(graph, links_attr_dict_2, 'superset')
# nx.set_edge_attributes(graph, links_attr_dict_3, 'weight')
```

```{python}
# sanity check
print(f"Node: {list(graph.nodes)[0]}")
print(f"Node attributes: {graph.nodes[200326126]}")

print(f"Edge: {list(graph.edges)[0]}")
print(f"Edge attributes: {graph.edges[(200326126, 264083994)]}")
```

#### Adjacency matrix

```{python}
# key is superset, value is list of roi
roi_supersets = {
    'AL': 1,
    'CX': 2,
    'GNG': 3,
    'INP': 4,
    'LH': 5,
    'LX': 6,
    'MB': 7,
    'OL': 8,
    'PENP': 9,
    'SNP': 10,
    'VLNP': 11,
    'VMNP': 12,
    'NotPrimary': 13,
}

roi_numerical = {}
i = 1
# create a mapping also for roi
for roi in roi_dataframe['roi'].unique():
    roi_numerical[roi]=i
    i = i + 1

def map_roi_to_number(links):
    def find_attr(roi):
        return roi_numerical[roi]
    res = list(map(find_attr, links))
    return res

def map_superset_to_number(links):
    def find_attr(roi):
        return roi_supersets[roi]
    res = list(map(find_attr, links))
    return res

temp = roi_dataframe.copy()
temp['superset_number'] = map_superset_to_number(roi_dataframe['superset'].tolist())
temp['roi_number'] = map_roi_to_number(roi_dataframe['roi'].tolist())


# map the synaps superset to numeric categorical, and use it as weight
temp['weight'] = temp['superset_number']
links = temp[['synaps', 'roi', 'superset', 'weight']]
links_attr_dict_1 = links.set_index('synaps')['weight'].to_dict()
# nx.set_edge_attributes(graph, links_attr_dict_1, 'weight')

```

```{python}
colors = ['#000000', '#67e682', '#cccccc', '#f3747e', '#8095cc', '#452664', 
          '#f2d7aa', '#b9f6d5', '#dcfe17', '#2727e4', '#38f5a5', '#c44048',
          '#CC5B01', '#0793c6', '#ffffff']

scale = list(range(0,15))

cmap=matplotlib.colors.ListedColormap(colors)
norm=matplotlib.colors.BoundaryNorm(scale, len(colors))
```

```{python}
print(list(roi_supersets.keys()))
```

```{python}
data = nx.adjacency_matrix(graph).astype(np.int8).todense()
f = plt.figure(figsize=(15,15))
ax = plt.axes([0, 0.05, 0.9, 0.9 ]) #left, bottom, width, height
im = ax.matshow(data, cmap=cmap, norm=norm)
cax = plt.axes([0.95, 0.05, 0.05,0.9 ])
cbar = plt.colorbar(mappable=im, cax=cax, ticks=np.linspace(0, 14, 14, endpoint=False) + 1/2, spacing='uniform', label='Superset connection type')
# cbar.ax.set_yticklabels(['< -1', '0', '> 1'])  # vertically oriented colorbar
ticks = [str(i) for i in range(1,14)]
cbar.ax.set_yticklabels(['No link'] + list(roi_supersets.keys()))  # vertically oriented colorbar
cbar.ax.axes.tick_params(length=5)
plt.show()
```

#### Non-randomness coefficient

```{python}
if PATIENCE:
    start_time = time.time()
    loc, glob = nx.non_randomness(graph)
    print(glob)
    end_time = time.time()
    print("Total time: {:.2f} seconds".format((end_time - start_time)))
```

#### Degree distribution and properties


See https://stackoverflow.com/questions/49908014/how-can-i-check-if-a-network-is-scale-free and https://pypi.org/project/powerlaw/

```{python}
degree = np.array([ d for n, d in graph.degree()])
print(degree.mean())
```

```{python}
# used for degree distribution and powerlaw test
# reverse = True to have the cumulative distribution
degree_sequence = sorted([d for n, d in graph.degree()], reverse=True) 
# Power laws are probability distributions with the form:p(x)∝x−α
# NOTE: xmin is the data value beyond which distributions should be fitted. 
# If None an optimal one will be calculated
fit = powerlaw.Fit(degree_sequence) 
fig2 = fit.plot_pdf(color='b', linewidth=2)
fit.power_law.plot_pdf(color='g', linestyle='--', ax=fig2)
plt.legend(["data", "power law"])
R, p = fit.distribution_compare('power_law', 'exponential', normalized_ratio=True)
print (f"Loglikelihood: {R:.2f}, p-value: {p:.2e}")
```

```{python}
plt.figure(figsize=(10, 6))
fit.distribution_compare('power_law', 'lognormal')
fig4 = fit.plot_ccdf(linewidth=3, color='black')
fit.power_law.plot_ccdf(ax=fig4, color='r', linestyle='--') #powerlaw
fit.lognormal.plot_ccdf(ax=fig4, color='g', linestyle='--') #lognormal
fit.stretched_exponential.plot_ccdf(ax=fig4, color='b', linestyle='--') #stretched_exponential
plt.legend(["data", "power law", "log-normal", "stretched_exponential"])
```

```{python}
#bins = np.logspace(0, 4, 100)
bins = 100
y, x = np.histogram(degree, bins=bins)
x = (x[1:]+x[:-1])/2
```

```{python}
# plot degree bins
plt.loglog()
plt.plot(x, y, 'ro')
plt.xlabel('Log Degree log(k)')
plt.ylabel('Log Frequency ')
plt.title('Degree distribution')
plt.grid()
plt.show()
```

# Coarse graining 
To apply our coarse graining procedure we aggregate the nodes with their nearest neighbors, where the concept of nearest is choosen using one of the following centrality measures:
- **local page rank**. The local page rank gives the proximity of the nodes with respect to the chosen node. So the second highest node is gonna be merged with the one where the lpr is computed. Networkx creates each time the sparse matrix, which is a time consuming task. We need to compute the page rank several times, and so chosen to use another representation;
- **link strength**. We merge the more connected or the less connected (with the idea that they are not influencing the graph properties)


## Approximate visualization of main brain regions

```{python}
class CoarseGrainer:
    def __init__(self, nodes, connectivity):
        """
            Parameters:
                nodes: np array size (N,) with node name where N is number of nodes
                connectivity: np array size (M, 3) where M is the number of links. We have M<<N**2
        """
        self.nodes = nodes
        self.connectivity = connectivity
        self.N = self.nodes.size
        self.M = self.connectivity.shape[0]
        self.powerlaw = np.array([])
        self.avg_deg_conn = np.array([])
        self.density = [self.M/self.N**2]
        print(f'Initialized with connectivity matrix of density: {self.M/self.N**2}')
        
    def set_metrics(self, metric_vect, node_id, minmax):
        """ 
            Define the metric vector and the node_id to be used at the next iteration
            Parameters:
                node_id (string): Name of the selected node
                metric_vect (np array(2, B)): (connected_node ,metric) of the network wrt @node_id
                minmax(int): 1 if looking for max of the metric, -1 if looking for min
        """
        self.sel_node = node_id
        self.metric = metric_vect
        self.minmax = minmax
        
    def fusion(self, min_links=10):
        """
            Select the nearest node to @self.sel_node. Fuse the two node togheter by keeping the density
            of connections constant, selecting randomly the links to keep among the possible ones.
            Eliminate self loops
        """
        if self.metric.size != 0:
            # Select nearest node
            nearest_node_id = self.metric[:, 0][ np.argmax( self.minmax*self.metric[:, 1] ) ]
            nearest_node = np.where( self.nodes==nearest_node_id )
            # Eliminate the nearest node from the node list
            self.nodes = np.delete(self.nodes, nearest_node)
            # Compute difference in connectivity to keep the density fixed
            dM = int( self.M*(2/self.N-1/self.N**2) )
            # Select the connections of the nearest node
            connections = np.hstack( (np.where(self.connectivity[:,0]==nearest_node_id),
                                      np.where(self.connectivity[:,1]==nearest_node_id) ) )
            connections = np.unique(connections) # keep only the uniques ones
            # Randomly select the connections by keeping the density fixed
            min_links = min( min_links, connections.size)
            connections_selected = np.random.choice(connections, size=max(min_links, connections.size-dM), replace=False)
            # (----?----) Add selection with probability proportional to the weight (----?----)
            # Select the non-selected connections
            connestions_not_selected = np.setdiff1d(connections, connections_selected)
            # Join connection of the deleted node to the other
            self.connectivity[:,0][ connections_selected] = self.sel_node
            self.connectivity[:,1][ connections_selected] = self.sel_node
            # Eliminate non-selected connections and self loops
            self.connectivity = np.delete(self.connectivity, connestions_not_selected, axis=0)
            self.connectivity = self.connectivity[ self.connectivity[:,0] != self.connectivity[:,1] ]
            
            # Update nodes number and connectivity
            self.N -= 1 
            self.M = self.connectivity.shape[0]
        
    def _get_weight(self, node_id):
        """
            Get the weights of the links connecting @node_id to other nodes. In particular returns
            an array of size (2, B) where the first column is the ID of the connected node and
            the second column contains the weights
        """
        temp1 = self.connectivity[:, 1:3][ self.connectivity[:,0]==node_id ]
        temp2 = self.connectivity[:, 0:3:2][ self.connectivity[:,1]==node_id ]
        return np.vstack( (temp1, temp2))
    
    def _upd_stat(self):
        """
            Updates the statistics. Notice that this function is very time consuming since we need
            to instantiate the graph object. In partiucular we compute:
                - The coefficient of the power law of the degree distribution
                - The average of the degree connectivity
        """
        # Graph instantiaton
        synapses= list(zip(self.connectivity[:,0], self.connectivity[:,1]))
        graph_coarsed = nx.Graph()
        graph_coarsed.add_nodes_from(self.nodes)
        graph_coarsed.add_edges_from(synapses)
        # Degree distribution
        degree = sorted([d for n, d in graph_coarsed.degree()], reverse=True)
        fit = powerlaw.Fit(degree) 
        if self.powerlaw.size==0:
            self.powerlaw = np.array([fit.alpha, fit.sigma])
        else:
            self.powerlaw = np.vstack( (self.powerlaw, [fit.alpha, fit.sigma]) )
        # Degree connectivity
        avg_degree_connectivity = nx.average_degree_connectivity(graph_coarsed)
        avg_degree_connectivity = np.array([i for k,i in avg_degree_connectivity.items()])
        self.avg_deg_conn = np.append(self.avg_deg_conn, int(avg_degree_connectivity.mean()) )
        
    def plot_stat(self):
        """
            Plot the two statistics collected in the experiment.
        """
        fig, ax = plt.subplots(1, 2, figsize=(12,8))
        ax[0].set_xlabel('Iteration $10^3$')
        ax[0].set_ylabel('Coefficient of the power law')
        ax[0].set_title('Evolution of the power law distribution')
        ax[0].plot(self.powerlaw[:,0], 'o--', color='blue', label='Coef')
        ax[0].fill_between(np.arange(self.powerlaw.shape[0]), (self.powerlaw[:,0]+self.powerlaw[:,1]),
                           (self.powerlaw[:,0]-self.powerlaw[:,1]), alpha=0.3, label='Error', color='green' )
        ax[0].legend()
        
        ax[1].set_xlabel('Iteration $10^3$')
        ax[1].set_ylabel('Average degree connectivity')
        ax[1].set_title('Evolution of the average degree connectivity')
        ax[1].plot(self.avg_deg_conn, 'o--', color='blue')
        plt.show()
        
        
    def _test(self, Niter, Nupd_stat=1000, min_links=10):
        """
            Apply the coarse graining for @Niter iterations and updating the statistics each @Nupd_stat iter
        """
        for i in tqdm(range(Niter)):
            node_id = np.random.choice(self.nodes)
            metric_vect = self._get_weight(node_id)
            self.set_metrics(metric_vect, node_id, +1)
            self.fusion(min_links=min_links)
            if i%Nupd_stat == 0:
                self._upd_stat()
            self.density.append(self.M/self.N**2)
        print(f'Finalized with connectivity matrix of density: {self.M/self.N**2}')
```

```{python}
nodes = np.array(neurons_dataframe['bodyId'])
M = len(synapses_dataframe['bodyId_pre'])
connectivity = np.hstack(  (np.array(synapses_dataframe['bodyId_pre']).reshape(M,1), 
                            np.array(synapses_dataframe['bodyId_post']).reshape(M,1), 
                           np.array( synapses_dataframe['weight']).reshape(M,1))
    )
```

```{python}
coarse = CoarseGrainer(nodes, connectivity)
```

```{python}
start_time = time.time()
coarse._test(20000)
end_time = time.time()
print("Total time: {:.2f} seconds".format((end_time - start_time)))
```

```{python}
coarse.plot_stat()
```


```{python}
d = np.array( coarse.density[0:-1:200] )
plt.plot(d)
plt.show()
```

```{python}
coarse.M/coarse.N**2
```

```{python}
synapses= list(zip(coarse.connectivity[:,0], coarse.connectivity[:,1]))
graph_coarsed = nx.Graph()
graph_coarsed.add_nodes_from(coarse.nodes)
graph_coarsed.add_edges_from(synapses)
```

```{python}
degree = np.array([ d for n, d in graph_coarsed.degree()])
print(np.mean(degree))
#bins = np.logspace(0, 4, 100)
bins = 100
y, x = np.histogram(degree, bins=bins)
x = (x[1:]+x[:-1])/2
# plot degree bins
plt.loglog()
plt.plot(x, y, 'ro')
plt.xlabel('Log Degree log(k)')
plt.ylabel('Log Frequency ')
plt.title('Degree distribution')
plt.grid()
plt.show()
```

```{python}
matfig = plt.figure(figsize=(20,20))
adj = nx.adjacency_matrix(graph_coarsed).todense()
plt.matshow(adj, fignum=matfig.number, cmap=plt.get_cmap("binary"))
plt.show()
```

## Coarse graining through clustering

```{python}
from community import induced_graph
from sklearn.cluster import AgglomerativeClustering
from collections import Counter
```

```{python}
for node1, node2, datas in graph.edges(data=True):
    A = np.array(datas.get('superset', None))
    c = Counter(A.flat).most_common(1)
    d = c
    print(d)
    break
    #ret[node1, node2]
```

```{python}
def my_induced_graph(partition, graph, super_map):
    """
    super_map is dictionary 'name': value
    """
    ret = nx.Graph()
    ret.add_nodes_from(partition.values())
   
    for node1, node2, datas in graph.edges(data=True):
        super_vec =  np.zeros( len(super_map) )
        edge_weight = datas.get('weight', 1)
        edge_superset = datas.get('superset', None)
        
        com1 = partition[node1]
        com2 = partition[node2]
        w_prec = ret.get_edge_data(com1, com2, {'weight': 0}).get('weight', 1)
        superset_prec = ret.get_edge_data(com1, com2, {'superset': super_vec}).get('superset', super_vec)
        super_vec[super_map[edge_superset]-1] += 1
        
        ret.add_edge(com1, com2, **{'weight': w_prec + edge_weight,
                                   'superset': superset_prec + super_vec} )

    inv_map = {v-1: k for k, v in super_map.items()} # 'value' : name    
    for node1, node2, datas in ret.edges(data=True):
        s = ret[node1][node2]['superset']
        s = inv_map[ np.argmax(s) ]
        ret[node1][node2]['superset'] = s
    
    return ret
```

```{python}
class ClusterGrainer():
    def __init__(self, graph, n_clusters, preserve_roi=False, supermap = None):
        
        self.preserve_roi = preserve_roi
        if self.preserve_roi and (supermap is not None):
            self.supermap = supermap
        elif self.preserve_roi and (supermap is None):
            raise ValueError("Param 'preserve_roi' is True but 'supermap' has not been defined")
        
        self.G = graph
        self.adj = nx.adjacency_matrix(self.G).todense()
        self.nodes = np.array([n for n in self.G.nodes()])
        self.clusterer = AgglomerativeClustering(n_clusters=n_clusters, affinity= 'precomputed', 
                                                 connectivity =np.sign(self.adj), linkage='single')
        # Statistics
        degree = sorted([d for n, d in self.G.degree()], reverse=True)
        avg_degree_con = nx.average_degree_connectivity(self.G)
        avg_degree_con = np.array([i for k,i in avg_degree_con.items()])
        fit = powerlaw.Fit(degree) 
        self.avg_deg = np.array( [np.mean(degree), np.std(degree)]  )
        self.avg_deg_con =  np.array( [int(avg_degree_con.mean()), avg_degree_con.std()] )
        self.con_distr_density, self.con_distr_bins = np.histogram(avg_degree_con, 200, density=True)   # returns 2-tuple (density, edges)
        self.power_law = np.array( [fit.alpha, fit.sigma] )
        self.density = np.array(self.G.number_of_edges()/self.G.number_of_nodes()**2 )
        
    def predict(self, metric_mat):
        clusts = self.clusterer.fit_predict(metric_mat)
        partition = { n : c for n, c in zip(self.nodes, clusts) }
        return partition
    
    def zip_graph(self, partitions, inplace=True):
        if inplace:
            if(self.preserve_roi):
                self.G = my_induced_graph(partitions, self.G, self.supermap)
            else:
                self.G = induced_graph(partitions, self.G)
            self.adj = nx.adjacency_matrix(self.G).todense()
            self.nodes = np.array([n for n in self.G.nodes()])
        else:
            if self.preserve_roi:
                return my_induced_graph(partitions, self.G, self.supermap)
            else:
                return induced_graph(partitions, self.G)
        
    def _upd_stats(self, graph ):
        degree = sorted([d for n, d in graph.degree()], reverse=True)
        avg_degree_con = nx.average_degree_connectivity(graph)
        avg_degree_con = np.array([i for k,i in avg_degree_con.items()])
        fit = powerlaw.Fit(degree) 
        
        self.avg_deg = np.vstack( (self.avg_deg, [np.mean(degree), np.std(degree)])  )
        self.avg_deg_con = np.vstack( (self.avg_deg_con, [int(avg_degree_con.mean()), avg_degree_con.std()] ))
        temp_density, temp_bins = np.histogram(avg_degree_con, 200, density=True) 
        self.con_distr_density = np.vstack( (self.con_distr_density, temp_density) )
        self.con_distr_bins = np.vstack( (self.con_distr_bins, temp_bins) )
        self.power_law = np.vstack( (self.power_law, [fit.alpha, fit.sigma]) )
        self.density = np.append( self.density, graph.number_of_edges()/graph.number_of_nodes()**2 )
        
    def _set_clust(self, n_cl):
        self.clusterer = AgglomerativeClustering(n_clusters=n_cl, affinity= 'precomputed', 
                                                 connectivity =np.sign(self.adj), linkage='single')
        
    def experiment(self, n_clusters, metric_mat, inplace=False, save=False):
        iteration_number = 1;
        for cl in tqdm(n_clusters):
            self._set_clust(cl)
            partitions = self.predict(metric_mat)
            new_graph = self.zip_graph(partitions, inplace=inplace)
            if save:
                nx.readwrite.gpickle.write_gpickle(new_graph, f"graph_iter_{iteration_number}.gpickle")
                iteration_number = iteration_number + 1
            self._upd_stats(new_graph)
    
    def plot_stats(self, sizes):
        fig, ax = plt.subplots(2, 2, figsize=(12, 12))
        ax = ax.flatten()
        ax[0].set_xlabel('Size of the network [# of nodes]')
        ax[0].set_ylabel('Coefficient of the power law')
        ax[0].set_title('Evolution of the power law distribution')
        ax[0].plot(sizes, self.power_law[:,0], 'o--', color='blue', label='Coef')
        ax[0].fill_between(sizes, (self.power_law[:,0]+self.power_law[:,1]),
                           (self.power_law[:,0]-self.power_law[:,1]), alpha=0.3, label='Error', color='green' )
        ax[0].legend()
        
        ax[1].set_xlabel('Size of the network [# of nodes]')
        ax[1].set_ylabel('Density #edges/(#nodes)^2')
        ax[1].set_title('Evolution of the network density')
        ax[1].plot(sizes, self.density, 'o--', color='blue')
        
        ax[2].set_xlabel('Size of the network [# of nodes]')
        ax[2].set_ylabel('Average degree')
        ax[2].set_title('Evolution of the average degree')
        ax[2].plot(sizes, self.avg_deg[:,0], 'o--', color='blue', label='Average')
        ax[2].fill_between(sizes, (self.avg_deg[:,0]+self.avg_deg[:,1]),
                           (self.avg_deg[:,0]-self.avg_deg[:,1]), alpha=0.3, label='Error', color='green' )
        ax[2].legend()
        
        ax[3].set_xlabel('Size of the network [# of nodes]')
        ax[3].set_ylabel('Average degree connectivity')
        ax[3].set_title('Evolution of the average degree connectivity')
        ax[3].plot(sizes, self.avg_deg_con[:,0], 'o--', color='blue', label='Average')
        ax[3].fill_between(sizes, (self.avg_deg_con[:,0]+self.avg_deg_con[:,1]),
                           (self.avg_deg_con[:,0]-self.avg_deg_con[:,1]), alpha=0.3, label='Error', color='green' )
        ax[3].legend()
        
        plt.show()
```

```{python}
g = graph.copy()

# following correctly raises error
# Grainer = ClusterGrainer(g, 10000, preserve_roi=True, supermap=None)
# following correctly works
Grainer = ClusterGrainer(g, 10000, preserve_roi=True, supermap=roi_supersets)
```

### Upload metric from pickle

```{python}
clusters = np.arange(20000, 0, -500)
weight_metric = np.load('exported-traced-adjacencies/pagerank_computed.npy', allow_pickle=True)
```

```{python}
metric = 1/weight_metric
```

```{python}
if not PATIENCE:
    r_clusters = clusters[1:4]
else:
    r_clusters = clusters
        
Grainer.experiment(r_clusters, metric, save=True)
sizes = np.insert(r_clusters, 0, graph.number_of_nodes())
```

```{python}
Grainer.plot_stats(sizes)
```

```{python}
len(sizes)
```

```{python}
con_distr_bins = Grainer.con_distr_bins
con_distr_density = Grainer.con_distr_density
```

```{python}
#con_distr_bins = np.load('bins.npy')
#con_distr_density = np.load('density.npy')
```

```{python}
default_bins = Grainer.con_distr_bins[0]
default_density = Grainer.con_distr_density[0]
default_center = (default_bins[:-1] + default_bins[1:]) / 2

def_width = (default_bins[2] - default_bins[1])

fig, ax = plt.subplots( figsize=(12,8))
ax.bar(default_center, default_density, width=def_width, alpha=1, label=f'{21733} neurons')
indexes = [5, 20, 30]
for i in indexes:
    # current values
    bins = Grainer.con_distr_bins[i]
    hist = Grainer.con_distr_density[i]
    width = (bins[1] - bins[0])
    center = (bins[:-1] + bins[1:]) / 2
    # ax.bar(center, hist, align='center', width=width)
    ax.bar(center, hist, width=width, alpha=0.6, label=f'{sizes[i]} neurons')
    
    # legend
    #ax.legend(['original', f'it. {i}', f'it {i-1}'])
    ax.legend()
    ax.set_title("Average degree connectivity distribution")
    ax.set_ylabel("Probability")
    ax.set_xlabel("Average degree connectivity")

ax.set_xlim(0, 1500)
plt.show()

```

```{python}
from matplotlib.animation import FuncAnimation
class Animate:
    def __init__(self, bins, distribution, clusters):
        self.fig , self.ax = plt.subplots( figsize=(12,8))
        self.ax.set_xlabel('Average degree connectivity', fontsize=16)
        self.ax.set_title("Average degree connectivity distribution", fontsize=20)
        self.ax.set_ylabel('Probability', fontsize=16)
        #self.ax.set_ylim([0, 0.04])#np.max(distribution)])
        # Parameters
        self.clusters = clusters
        self.distribution = distribution
        self.bins = bins
        self.width = (bins[:,1] - bins[:,0])
        self.center = (bins[:,:-1]+bins[:,1:])/2
        self.im = self.ax.bar(self.center[0,:], self.distribution[0,:], width=self.width[0],
                              label='Original: 21733', color='royalblue')
        self.moving = self.ax.bar(self.center[1,:], self.distribution[1,:], width=self.width[1], alpha=0.7, 
                                  label=f'Number of neurons: {self.clusters[0]}', color='limegreen')
        self.legend = self.ax.legend()
    
    def _update(self, i):
        if i<len(self.distribution)-1:
            curr = self.distribution[i+1]
            for s, c, cent in zip(self.moving, curr, self.center[i+1]):
                s.set_height(c)
                s.set_width(self.width[i+1])
                s.set_x(cent)
                self.legend.texts[1].set_text(f'Number of neurons: {self.clusters[i]}')
                self.ax.set_ylim([0, np.max(curr)+np.max(curr)/10])
        else:
            curr = self.distribution[-1]
            for s, c, cent in zip(self.moving, curr, self.center[-1]):
                s.set_height(c)
                s.set_width(self.width[-1])
                s.set_x(cent) 
                self.legend.texts[1].set_text(f'Number of neurons: {self.clusters[-1]}')
        
    def sys_anim(self, interval=100):
        self.anim = FuncAnimation(self.fig, self._update, frames=len(self.clusters)+2, interval=interval)
        self.anim.save('prova.gif')
```

```{python}
anim = Animate(con_distr_bins, con_distr_density, r_clusters)
anim.sys_anim(interval=800)

```

```{python}
names = ['GNG', 'PENP', 'VMNP', 'CX', 'LX', 'AL', 'MB', 'INP', 'VLNP', 'OL', 'SNP', 'LH']
links = [('GNG', 'PENP'), ('PENP', 'VMNP'), ('PENP', 'CX'), ('PENP', 'LX'), ('VMNP', 'CX'),
         ('VMNP', 'LX'), ('CX', 'LX'), ('CX', 'AL'), ('LX', 'AL'), ('AL', 'INP'), 
         ('AL', 'MB'), ('INP', 'VLNP'), ('VLNP', 'OL'), ('MB', 'SNP'), ('SNP', 'LH')]
```

```{python}
high_lvl_brain = nx.Graph()
high_lvl_brain.add_nodes_from(names)
high_lvl_brain.add_edges_from(links)
high_lvl_brain_ig = ig.Graph.from_networkx(high_lvl_brain)
```

```{python}
colors = ig.drawing.colors.known_colors
colors = list(colors.keys())
#print(colors)
```

```{python}
#set label to be names of nx graph nodes
high_lvl_brain_ig.vs["label"] = high_lvl_brain_ig.vs["_nx_name"]

visual_style = {}
#node size
visual_style["vertex_size"] = 20
#node color
c = [0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0]
n_names = np.array(['deep sky blue', 'magenta4'])
visual_style["vertex_color"] = n_names[c]  
#node label
visual_style["vertex_label"] = high_lvl_brain_ig.vs["label"]
#node label color
visual_style["vertex_label_color"] = "black"
#node label size
visual_style["vertex_label_size"] = 15
#edge thickness
visual_style["edge_width"] = 2
#bounding box
visual_style["bbox"] = (500, 500)
#margin
visual_style["margin"] = 20

ig.plot(high_lvl_brain_ig, "high_lvl_brain.pdf", **visual_style, layout="kk")
```

## Outlier analysis

```{python}
out_size = 3000
```

```{python}
Grainer._set_clust(out_size)
partitions = Grainer.predict(metric)
out_graph = Grainer.zip_graph(partitions, inplace=False)
```

```{python}
degree = np.array( [ d for n, d in out_graph.degree()] )
density_deg, bins_deg = np.histogram(degree, 200) 

avg_degree_con = nx.average_degree_connectivity(out_graph)
avg_degree_con = np.array([i for k,i in avg_degree_con.items()])
density_con, bins_con = np.histogram(avg_degree_con, 200, density=True) 
```

```{python}
fig, ax = plt.subplots(1, 2,  figsize=(15, 6))

ax[0].loglog()
ax[0].plot( (bins_deg[:-1]+ bins_deg[1:])/2, density_deg, 'o')
ax[0].plot( 30000000000000*np.arange(10, 1000).astype(float)**(-5), 'r--', label = 'power law coefficient: 5' )
ax[0].set_ylim(0.5, 500)
ax[0].set_xlim(10, 4000)
ax[0].set_title('Degree distribution')
ax[0].set_xlabel('Degree')
ax[0].set_ylabel('Frequency')
ax[0].legend()

ax[1].bar( (bins_con[:-1]+bins_con[1:])/2, density_con, width=bins_con[1]-bins_con[0])
ax[1].set_title('Average degree connectivity distribution')
ax[1].set_xlabel('Average degree connectivity')
ax[1].set_ylabel('Probability')

plt.show()
```

# To do list
1. ✔️ list brain regions to create different $12$ theoretical communities;
1. ✔️ understand what the $12$ regions does; 
1. 🟥 Understand brain section of neurons;
1. ✔️ Check degree distribution is power law;
1. ✔️ netorkx documentation (algorithms in particular);
1. ◻️ Local connectivity measure;
1. ✔️ Colour adjacency matrix based on connection supergroup **FRANK**;
1. ✔️ Redefine ROI by taking connection with highest weight only **FRANK**;
1. ✔️ Define coarse graining procedure **BALLA**;
1. ◻️ Curare il report up to now;
1. ✔️ Better define coarse graining metrics;
1. ◻️ Community detection (naive and after coarse graining);
1. ◻️ Betweenness (especially for multiple ROI synapses), centrality, small world characteristics **FRANK**;
1. ✔️Distance from random network;
1. ◻️ robustness to cuts. In particular it may be interesting to attack outer regions of the brain, the more easily damaged. **BALLA**. The  **common connectome constraint paper** already say something about it;
1. ◻️ Visualization of networks and parameters;
1. ◻️ Aggregate by ROI, modification with coarse graining;
1. ◻️ Book an appointment to discuss state of the work


###### Problems
~In point (3), we have that the same synapse (same link between two neurons, as we've defined it here) may belong to more than one region. We could add only the one with the highest weight, but often is not possible. Random?~ Fixed by taking the highest-weight only connection.


## Average measures


#### Degree

```{python}
degree = np.array([ d for n, d in graph.degree()])
degree_mean = int(degree.mean())
print(f"Degree mean: {degree_mean}")
y, x = np.histogram(degree, bins=100)
x = (x[1:]+x[:-1])/2
# plot degree bins
plt.loglog()
plt.plot(x, y, 'ro')
plt.xlabel('Log Degree log(k)')
plt.ylabel('Log Frequency ')
plt.title('Degree distribution')
plt.grid()
plt.show()
```

#### Degree connectivity
The *average degree connectivity* is the average nearest neighbor degree of nodes with degree k. 

```{python}
avg_degree_connectivity = nx.average_degree_connectivity(graph)
avg_degree_connectivity = np.array([i for k,i in avg_degree_connectivity.items()])
print(f"Global average degree connectivity value: {int(avg_degree_connectivity.mean())}")

plt.hist(avg_degree_connectivity, bins=200)
plt.grid()
plt.title("Average degree connectivity distribution")
plt.xlabel("Average degree connectivity for degree k")
plt.show()
```

```{python}
avg_degree_connectivity = nx.average_degree_connectivity(graph)
avg_degree_connectivity = np.array([i for k,i in avg_degree_connectivity.items()])
max_avg = np.max(avg_degree_connectivity)
print(f"Global average degree connectivity value: {int(avg_degree_connectivity.mean())}")

hist, bins = np.histogram(avg_degree_connectivity, bins=200)
#width = 0.7 * (bins[1] - bins[0])
width = 1 * (bins[1] - bins[0])
center = (bins[:-1] + bins[1:]) / 2
plt.bar(center, hist, align='center', width=width)
plt.show()
```

#### Average clustering coefficient
Time saver: avg_clustering_coeff = 0.31370364316752

```{python}
if PATIENCE:
    start_time = time.time()

    avg_clustering_coeff = nx.average_clustering(graph)
    print(f"Average clustering coefficient: {avg_clustering_coeff}")

    end_time = time.time()
    print("Total time: {:.2f} seconds".format((end_time - start_time)))
```

#### Average shortest path length (NOTE: veeeery long)

```{python}
if PATIENCE:
    # sanity check
    if nx.is_connected(graph):
        start_time = time.time()

        avg_shortest_path = nx.average_shortest_path_length(graph)
        print(avg_shortest_path)

        end_time = time.time()
        print("Total time: {:.2f} seconds".format((end_time - start_time)))
    else:
        print("Graph is not connected.")
```

```{python}

```