-
Notifications
You must be signed in to change notification settings - Fork 0
/
add_wx_cols.py
201 lines (174 loc) · 6.88 KB
/
add_wx_cols.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/usr/bin/env python
# coding: utf8
"""
Thr Oct 24, 2019
Stacy
user runs add_wxcols.py from command line
>> python add_wxcols.py
- make a copy of the project file and save with a '_wx_v1' suffix
- add the wx columns
rem xlsxwriter library docs at:
https://pbpython.com/improve-pandas-excel-output.html
and
https://xlsxwriter.readthedocs.io/
"""
# IMPORTS ---------------------------------------------------------------------
import os, shutil, sys
import pathlib
from pathlib import Path
import shutil
from hashids import Hashids
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
import numpy as np
import xlsxwriter
from xlsxwriter.utility import xl_rowcol_to_cell # excel formatting
# import py files -------------------------------------------------------------
import menu
def main():
# print menu options to console -----------------------------------------------
# declare menu and file arrays
menu_choices = []
file_choices = []
console_message = ''
# get path of current folder
folder_path = os.path.dirname(os.path.abspath(__file__))
# get names of .xlsx files that are in the folder that are also input files
for r, d, f in os.walk(folder_path): # rem r=root, d=dir, f=file
for file in f:
if '.xlsx' in file and 'data' in file and '_wx_v1' not in file:
# rem for full path use <files.append(os.path.join(r, file))>
file_choices.append(file)
# print user menu
print('\n-----------------------------------------')
print(' Data Files')
print('-----------------------------------------')
spacer =' '
print('{}{}{}'.format('m', spacer, 'Show Main Menu'))
menu_choices.append('m')
i = 0
for ic in file_choices:
i += 1
print('{} {}'.format(i, ic))
menu_choices.append(str(i))
# get user input
if len(file_choices) == 0:
console_message = '\nNo data files available. Select \'m\' for Main Menu'
else:
console_message = '\nSelect a data file (or \'m\' for Main Menu)'
print(console_message)
user_choice = input()
# validate user input
while user_choice not in menu_choices:
print('Invalid choice! {}'.format(console_message))
user_choice = input()
if user_choice == 'm':
menu.main()
# if the user chooses 'm', then program control goes back to menu.main(),
# which means that when menu.main() terminates, the program control will
# return to this program; therefore, it's important to invoke sys.exit()
# upon the callback to terminate all py execution in the terminal
sys.exit()
# see if user-selected file has already been made into a wx_v1 file
user_selected_file = file_choices[int(user_choice)-1]
wx_files = []
wx_file_exists = False
# find wx_v1 filenames and put them in array
for r, d, f in os.walk(folder_path): # rem r=root, d=dir, f=file
for file in f:
if 'xlsx' in file and 'wx_v1' in file:
wx_files.append(file)
for f in wx_files:
if user_selected_file[0:len(user_selected_file)-5] in f:
wx_file_exists = True
# if the wx_v1 file already exists, see if user wants to overwrite it
want_to_add_cols = True
if wx_file_exists:
print('A \'wx_v1\' file already exists for that data. Overwrite (y/n)?')
yn_choice = input()
while yn_choice not in ['y', 'n', 'Y', 'N']:
print('Invalid choice! Overwrite (y/n)?')
yn_choice = input()
if yn_choice == 'n':
want_to_add_cols = False
# if the user wants to overwrite the existing wx_1 file, or
# if there is not yet a wx_1 file for the user-selected data file, then
# create the new wx_1 file
if want_to_add_cols:
print('\nAdding wx columns to \'' + user_selected_file + '\' ...')
# define paths/names for files and directories
mkpath = os.path.abspath(__file__) # = (os.path.abspath('.'))
projectDir = os.path.dirname(mkpath)
#templateDir = os.path.dirname(projectDir) + r'\NERS_Template' # relative path
#nersDir = projectDir + r'\ners'
#projectFile = ''
#projectFilename = ''
projectSheetName = ''
metaDir = projectDir + r'\input' # relative path
metaFilename = r'\in_wxmeta.xlsx'
metaFile = metaDir + metaFilename
wxFile = ''
wxFilename = ''
wxSheetName = 'WrWx Fields'
numberOfRows = 0
hashids = Hashids(alphabet='ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890', min_length=5)
# get the name of the project file
wxFilename = user_selected_file[0:len(user_selected_file)-5] + '_wx_v1.xlsx'
wxFile = projectDir + '\\' + wxFilename
# declare arrays to hold data from the project and metadata files
pf_headers = []
pf_cols = []
wx_headers = []
wx_cols = []
wx_hashIds = []
# read the project and metadata files into dataframes
df_pf = pd.read_excel(user_selected_file, sheet_name=0)
df_meta = pd.read_excel(metaFile, sheet_name=1)
# get the number of records in project files
numberOfRows = len(df_pf.index)
# get name of worksheet in the project file
pf = pd.ExcelFile(user_selected_file)
projectSheetName = pf.sheet_names[0]
# copy the file headers to arrays
for head in df_pf: pf_headers.append(head)
for head in df_meta: wx_headers.append(head)
# copy each column of project data as a list into an array of lists,
# using the header array index as the iterator
# ie: master_cols = [ [col1], [col2], [col3], ...[coln] ]
i = 0
for h in pf_headers:
colname = pf_headers[i] # get colname
pf_cols.append(df_pf[colname]) # insert as a list to column array
i += 1
# create wx hash ids
print('Creating hash ids...')
i = 0
while i < numberOfRows:
hashid = hashids.encode(i)
wx_hashIds.append(hashid)
i += 1
# create dictionary to hold output for new wx file
# where key = header and val = columns
main_dict = {}
i = 0
for mh in pf_headers:
main_dict[mh] = pf_cols[i]
i += 1
i = 0
for wxh in wx_headers:
if wxh == 'wHashID':
main_dict[wxh] = wx_hashIds
else:
main_dict[wxh] = ''
i += 1
# create pandas data frames
df_main = pd.DataFrame(main_dict) # main worksheet
# create excel writer
writer = pd.ExcelWriter(wxFile)
# write data frames to excel file
# and skip one row to insert a user-defined header
df_main.to_excel(writer, projectSheetName, index=False)
writer.save()
# end program
print('File complete:', wxFile)