-
Notifications
You must be signed in to change notification settings - Fork 0
/
Partitioner.py
46 lines (37 loc) · 1.58 KB
/
Partitioner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
"""
Protein data partitioner
"""
import random
class Partitioner:
# select by project
def select_by_project(dataframe, projectNumber):
dataframe = dataframe.loc[dataframe['Proj'] == projectNumber]
return dataframe
# select by run
def select_by_run(dataframe, runNumber):
dataframe = dataframe.loc[dataframe['Run'] == runNumber]
return dataframe
# select by clone
def select_by_clone(dataframe, cloneNumber):
dataframe = dataframe.loc[dataframe['Clone'] == cloneNumber]
return dataframe
# remove all bookkeeping data (project, run, clone, time, and date?) this is necessary for clustering
def remove_all_bookkeeping(dataframe, remove_native_contacts=False):
dataframe = dataframe.iloc[:, 4:]
if remove_native_contacts:
dataframe = dataframe.drop('NC', 1)
return dataframe
@staticmethod
def sample(dataframe, sample_size):
return dataframe.loc[random.sample(list(dataframe.index), sample_size)]
@staticmethod
def select_by_column(dataframe, bounds):
partitioned_data = dataframe.iloc[:, bounds[0]:(bounds[1] + 1)]
return partitioned_data
@staticmethod
def select_by_time(dataframe, startime, timecolumn):
ix_true = dataframe.index[dataframe.iloc[:, timecolumn] > startime]
temp_df = dataframe
temp_df.columns = list(map(str, range(len(dataframe.iloc[0,:]))))
partitioned_data = dataframe.loc[dataframe[str(timecolumn)] >= startime]
return partitioned_data