-
Notifications
You must be signed in to change notification settings - Fork 4
/
lisst_200.py
134 lines (111 loc) · 5.42 KB
/
lisst_200.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import os
import subprocess
import datetime
import pandas as pd
# from influxdb import DataFrameClient
import sensor
import config
import util_db
import util_file
logger = util_file.init_logger(config.main_logfile, name='olmo.lisst_200')
class Lisst_200(sensor.Sensor):
def __init__(self, influx_clients=None):
# Init the Sensor() class: This sets some defaults.
super(Lisst_200, self).__init__()
self.influx_clients = influx_clients
self.data_dir = f'/home/{config.munkholmen_user}/olmo/munkholmen/DATA'
self.file_search_l0 = r"lisst_L(\d{7})\.RBN"
self.drop_recent_files_l0 = 0
self.remove_remote_files_l0 = True
self.max_files_l0 = 10
self.measurement_name_l0 = 'munk_lisst-200_l0'
self.file_search_l1 = r"ready_lisst_I(\d{7})\.CSV"
self.drop_recent_files_l1 = 0
self.remove_remote_files_l1 = True
self.max_files_l1 = None
self.measurement_name_l1 = 'lisst_200'
def lisst200_csv_to_df(self, csv_filename):
'''Take a LISST-200 .CSV file and returns a pandas DataFrame'''
c = 36 # number of size bins of LISST-200x
column_names = []
for size_bin in range(c):
name = f'size_bin_{size_bin+1:02}'
column_names += [name]
column_names += ['Laser transmission Sensor']
column_names += ['Supply voltage in [V]']
column_names += ['External analog input 1 [V]']
column_names += ['Laser Reference sensor [mW]']
column_names += ['Depth in [m of sea water]']
column_names += ['Temperature [C]']
column_names += ['Year']
column_names += ['Month']
column_names += ['Day']
column_names += ['Hour']
column_names += ['Minute']
column_names += ['Second']
column_names += ['External analog input 2 [V]']
column_names += ['Mean Diameter [μm]']
column_names += ['Total Volume Concentration [PPM]']
column_names += ['Relative Humidity [%]']
column_names += ['Accelerometer X [not presently calibrated or used]']
column_names += ['Accelerometer Y [not presently calibrated or used]']
column_names += ['Accelerometer Z [not presently calibrated or used]']
column_names += ['Raw pressure [most significant bit]']
column_names += ['Raw pressure [least significant 16 bits]']
column_names += ['Ambient Light [counts – not calibrated]']
column_names += ['Not used (set to zero)']
column_names += ['Computed optical transmission over path [dimensionless]']
column_names += ['Beam-attenuation (c) [m-1]']
df = pd.read_csv(csv_filename, names=column_names)
df['date'] = pd.to_datetime(dict(
year=df.Year,
month=df.Month,
day=df.Day,
hour=df.Hour,
minute=df.Minute,
second=df.Second))
return df
def ingest_l0(self, files):
# I don't believe we are getting these files right now.
for f in files:
storage_location = f"{self.measurement_name_l0}/{os.path.split(f)[1]}"
process = subprocess.run([
'az', 'storage', 'fs', 'file', 'upload', '-s', f, '-p', storage_location,
'-f', 'oceanlabdlcontainer', '--account-name', 'oceanlabdlstorage',
'--sas-token', self.get_azure_token()],
stdout=subprocess.PIPE, universal_newlines=True)
assert process.returncode == 0, f"Upload to az failed for file {f}. Msg: {process}"
logger.info(f'File uploaded to Azure here: {storage_location}')
ingest_data = {
'date': pd.to_datetime(datetime.datetime.now()),
'azure_location': storage_location}
df = pd.DataFrame(columns=ingest_data.keys())
df = df.append(ingest_data, ignore_index=True)
# df = df.set_index('date').tz_localize('CET', ambiguous='infer')
df = df.set_index('date').tz_localize('CET', ambiguous='infer').tz_convert('UTC')
logger.info(f'Ingesting file {f} to {self.measurement_name_l0}.')
# influx_client.write_points(df, self.measurement_name_l0)
util_db.ingest_df(self.measurement_name_l0, df, self.influx_clients)
def ingest_l1(self, files):
for f in files:
df = self.lisst200_csv_to_df(f)
df = util_db.force_float_cols(df, not_float_cols=['date'])
# TODO: Check this time is correct with what the instrument gives.
df = df.set_index('date').tz_localize('CET', ambiguous='infer').tz_convert('UTC')
tag_values = {'tag_sensor': 'lisst_200',
'tag_edge_device': 'munkholmen_topside_pi',
'tag_platform': 'munkholmen',
'tag_data_level': 'processed',
'tag_approved': 'no',
'tag_unit': 'none'}
df = util_db.add_tags(df, tag_values)
logger.info(f'Ingesting file {f} to {self.measurement_name_l1}.')
util_db.ingest_df(self.measurement_name_l1, df, self.influx_clients)
def rsync_and_ingest(self):
files = self.rsync()
logger.info('Lisst_200.rsync() finished.')
if files['l0'] is not None:
self.ingest_l0(files['l0'])
if files['l1'] is not None:
self.ingest_l1(files['l1'])
logger.info('Lisst_200.rsync_and_ingest() finished.')