-
Notifications
You must be signed in to change notification settings - Fork 0
/
ecl_tokenizer.py
249 lines (224 loc) · 9.29 KB
/
ecl_tokenizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# -*- coding: utf-8 -*-
# Ecl tokenizer.
# by Baurzhan Muftakhidinov
import os
import re
from datetime import datetime
from difflib import HtmlDiff
sections = ("RUNSPEC", "GRID", "EDIT", "PROPS", "REGIONS", "SOLUTION", "SUMMARY", "SCHEDULE")
kwd_expr = re.compile("^[A-Z][A-Z0-9]+[\+\-]?$")
class EclKwd:
def __init__(self, name, section, parent, line_no, value=None):
#print("EclKwd init with data: {0}".format(value))
self.name = name
self.value = value
self.section = section
self.parent = parent
self.line_num = line_no
def to_list(self, expand=True):
# Grid keyword values to list of lists.
out = []
push_line = False
line_buf = [] # Corresponds to 1 data record.
if not self.value:
return out
for line in self.value:
line = line.strip()
if line.endswith("/"):
line = line[0:-1]
push_line = True
line = line.split()
for elem in line:
if "*" in elem and expand:
num, val = elem.split("*")
line_buf.extend(int(num) * [val])
else:
line_buf.append(elem)
if push_line: # end of record.
if line_buf:
out.append(line_buf)
push_line = False
line_buf = []
return out
def has_trailing_slash(self):
# All keywords with value and not being 'TITLE'.
if self.value and self.name != "TITLE":
for line in self.value:
if line.endswith("/"):
return True
return False
return True
def __eq__(self, other):
return self.name == other.name and self.to_list() == other.to_list()
def __hash__(self):
return hash((self.name, "".join(self.value)))
def __repr__(self):
return '<EclKwd: {0:9} Section "{1:9}" Parent: "{2}" Line_number: "{3}">\n'.format(self.name, self.section, self.parent, self.line_num)
class EclCase:
def __init__(self, data_file, verbose=False, skip_grdecl=False):
print(f"INFO: EclCase init from data deck: {data_file}")
self.data_file = data_file
self.skip_grdecl = skip_grdecl
self.processed_files = []
self.missing_files = []
self.keywords = []
self.parse(data_file, verbose=verbose)
def __repr__(self):
title = ""
if self.has_kwd("TITLE"):
title_data = self.get_kwds("TITLE")[0].value
if title_data:
title = self.get_kwds("TITLE")[0].value[0].strip()
return '<EclCase: Title: `{0}` Keywords: {1} Includes: {2}, missing: {3}, File: "{4}">\n'.format(title, len(self.keywords), len(self.processed_files), len(self.missing_files), self.data_file)
def append_keyword(self, keyword):
if keyword.has_trailing_slash():
self.keywords.append(keyword)
else:
raise ValueError("HELLO HERE")
def parse(self, in_file, cur_section="", verbose=False):
if self.skip_grdecl and in_file.upper().endswith(".GRDECL"):
if verbose:
print("SKIP: Skipping grid files was requested!")
return
in_file = os.path.abspath(in_file).strip()
if verbose:
print("INFO: Parsing file: {0}".format(in_file))
self.processed_files.append(in_file)
last_kwd = None
buf = []
line_no = 0
line = ""
# TODO: which encoding should DATA file have?
with open(in_file, 'r') as sr:
line = sr.readline()
line_no = 0
buf = []
probe_kwd = ""
last_kwd = None
while line:
line_no += 1
line=line.rstrip()
# We found comment.
if "--" in line:
line = line.split("--")[0]
# We found empty line.
if not line.strip():
line = sr.readline()
continue
if len(line) >= 8 and not "/" in line:
probe_kwd = line[:8].rstrip()
else:
probe_kwd = line
# Check for TITLE keyword data.
if last_kwd and last_kwd.name == "TITLE":
last_kwd.value = [line]
self.append_keyword(last_kwd)
last_kwd = None
line = sr.readline()
continue
# Looks like we found a keyword.
if probe_kwd and kwd_expr.match(probe_kwd):
if probe_kwd in sections:
cur_section = probe_kwd
if last_kwd:
last_kwd.value = buf.copy()
self.append_keyword(last_kwd)
if last_kwd.name == "INCLUDE":
self.parse_include(last_kwd, verbose=verbose)
buf.clear()
if verbose:
print("DBG : keyword found: ", probe_kwd, " line: ", line_no)
last_kwd = EclKwd(probe_kwd, cur_section, in_file, line_no)
else:
if not last_kwd:
raise ValueError("Data line without previous keyword is found! ", line_no, line)
buf.append(self.process_data_line(line))
line = sr.readline()
if last_kwd and last_kwd not in self.keywords:
last_kwd.value = buf.copy()
self.append_keyword(last_kwd)
if last_kwd.name == "INCLUDE":
self.parse_include(last_kwd, verbose=verbose)
buf.clear()
def parse_include(self, ecl_kwd, verbose=False):
if not ecl_kwd.value:
print("ERR : The keyword supplied to parse_include does not have a value in it.")
return
inc_file = ecl_kwd.value[0].replace("'", "").strip().strip("/").strip()
included_file_name = os.path.abspath(os.path.join(os.path.dirname(self.data_file), inc_file)).strip()
if included_file_name in self.processed_files:
if verbose:
print("INFO: Duplicated INCLUDE found at {0}:{1} : {2}".format(ecl_kwd.parent, ecl_kwd.line_num, included_file_name))
else:
if os.path.exists(included_file_name):
self.parse(included_file_name, ecl_kwd.section, verbose=verbose)
else:
print("ERR : INCLUDE file not found, ref file: {0}, line: {1}, name: {2}".format(ecl_kwd.parent, ecl_kwd.line_num, included_file_name))
self.missing_files.append(included_file_name)
def strip_complex_comment(self, line):
# Complex comments not having `--` and located after closing slash.
within_quote=False
for pos, char in enumerate(line):
if char == "'":
if not within_quote:
within_quote = True
else:
within_quote = False
elif char == "/":
if not within_quote:
return line[0:pos+1]
return line
def process_data_line(self, line):
# From manual. Comments could be directly included after the '/', without the '--'.
# By this time, we stripped the right part of any '--' delimiters.
line = line.rstrip()
# Line has a slash, whether data closing or inside include paths.
if "/" in line:
if "'" in line:
line = self.strip_complex_comment(line)
else:
bs_pos = line.find("/")
line = line[0:bs_pos+1]
# We preserve the backslash itself to keep records.
return line
def has_kwd(self, keyword):
# Whether case has keyword.
for kwd in self.keywords:
if kwd.name == keyword:
return True
return False
def get_kwds(self, keyword):
# Returns keywords found by name supplied.
out = []
for kwd in self.keywords:
if kwd.name == keyword:
out.append(kwd)
return out
def get_kwds_data(self, keyword, expand=True):
# Returns combined data of all instances of keyword.
out = []
for kwd in self.get_kwds(keyword):
out.extend(kwd.to_list(expand=expand))
return out
def describe(self):
print("+", self.data_file)
for pfile in self.processed_files:
print("|--", pfile)
for mfile in self.missing_files:
print("|XX", mfile)
def get_top_dir(self):
# Return the top-most common path of all files.
return os.path.commonpath(self.processed_files + self.missing_files)
def compare_includes_html(self, other):
# Create file hightlighting differences.
# Useful for IPython.
# If needs to be saved, use following snippet:
#
# diff = case.compare_includes_html(other_case)
# with open("output.html", "w+") as file:
# file.save(diff)
#
diff = HtmlDiff().make_file(self.processed_files, other.processed_files, self.data_file, other.data_file)
return diff
if __name__ == "__main__":
print("Import me!")