Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add functionality to copy the static files to the transformed data fo… #14

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions config/gmb_config.yaml.j2
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
USERNAME: {{rave_user}}
PASSWORD: {{rave_password}}
API: https://ccredc.mdsol.com/RaveWebServices/studies/000048({{rave_env}})/datasets/regular
OUTPUT_FOLDER: ./raw_data/
OUTPUT_NODE_FOLDER: ./transformed_data/
NODE_FILE: ./node_file/000048_Model.yml
OUTPUT_FOLDER_RAW: ./gmb_raw_data_files/
OUTPUT_FOLDER_TRANSFORMED: ./gmb_transformed_data_files/
DATA_MODEL_NODE_FILE: ./node_file/000048_Model.yml
S3_BUCKET: cloudone-gmb-nonprod-metadata
RAVE_DATA_VERSION: {{rave_data_version}}
S3_BUCKET: {{bucket_name}}
STATIC_FILES: static-files/
2 changes: 1 addition & 1 deletion config/gmb_config_example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ S3_BUCKET: S3_BUCKET_NAME
RAVE_DATA_VERSION: 1147
DATA_LOADER: /Documents/icdc-dataloader-master/loader.py
DATA_LOADER_CONFIG: gmb-local.yml
STATIC_FILES: ./static-files/
STATIC_FILES: static-files/
1 change: 1 addition & 0 deletions gmb_config.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ OUTPUT_FOLDER_TRANSFORMED: ./gmb_transformed_data_files/
DATA_MODEL_NODE_FILE: ./node_file/000048_Model.yml
S3_BUCKET: cloudone-gmb-nonprod-metadata
RAVE_DATA_VERSION: {{rave_data_version}}
STATIC_FILES: static-files/
15 changes: 15 additions & 0 deletions gmb_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ def upload_files(self, s3):
file_directory = self.config['OUTPUT_FOLDER_TRANSFORMED'] + file_name
s3_file_directory = 'Transformed' + '/' + timestamp + '/' + file_name
s3.upload_file(file_directory ,self.config['S3_BUCKET'], s3_file_directory)
for file_name in os.listdir(self.config['STATIC_FILES']):
if file_name.endswith('.tsv'):
file_directory = self.config['STATIC_FILES'] + file_name
s3_file_directory = 'Transformed' + '/' + timestamp + '/' + file_name
s3.upload_file(file_directory ,self.config['S3_BUCKET'], s3_file_directory)
subfolder = 's3://' + self.config['S3_BUCKET'] + '/' + 'Transformed' + '/' + timestamp
self.log.info(f'Data files upload to {subfolder}')

Expand Down Expand Up @@ -183,6 +188,16 @@ def transform(self):
else:
self.log.info(f'{file_name[0]} is not in the node file')

#download the static files before upload
for key in s3.list_objects(Bucket = self.config['S3_BUCKET'], Prefix = self.config['STATIC_FILES'])['Contents']:
if key['Key'].endswith(".tsv"):
if not os.path.exists(self.config['STATIC_FILES']):
# If the path does not exist, then create the folder
os.mkdir(self.config['STATIC_FILES'])
static_file_name = key['Key'].split('/')
static_file_key = self.config['STATIC_FILES'] + static_file_name[1]
s3.download_file(self.config['S3_BUCKET'], key['Key'], static_file_key)


self.upload_files(s3)

Expand Down