Skip to content

Commit

Permalink
Fixed duplicate_list_location bug
Browse files Browse the repository at this point in the history
Signed-off-by: Constantin M Adam <[email protected]>
  • Loading branch information
cmadam committed Oct 25, 2024
1 parent f187948 commit 84b9104
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@
import os
from typing import Any

from data_cleaning_transform import DataCleaningTransformConfiguration
from data_cleaning_transform import (
DataCleaningTransformConfiguration,
duplicate_list_location_default,
duplicate_list_location_key,
)
from data_processing.data_access import DataAccessFactoryBase
from data_processing.runtime.pure_python import PythonTransformLauncher
from data_processing.runtime.pure_python.runtime_configuration import (
Expand Down Expand Up @@ -53,9 +57,12 @@ def get_transform_config(
:return: dictionary of transform init params
"""
data_access = data_access_factory.create_data_access()
duplicate_list_location = os.path.abspath(
os.path.join(data_access.output_folder, "..", self.params["duplicate_list_location"])
)
duplicate_list_location = self.params.get(duplicate_list_location_key, duplicate_list_location_default)
if not duplicate_list_location.startswith("/"):
out_paths = data_access.output_folder.rstrip("/").split("/")
dupl_list_paths = duplicate_list_location.split("/")
paths = out_paths[:-1] + dupl_list_paths
duplicate_list_location = "/".join([p.strip("/") for p in paths])
if duplicate_list_location.startswith("s3://"):
_, duplicate_list_location = duplicate_list_location.split("://")
self.duplicate_list, retries = data_access.get_file(duplicate_list_location)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,11 @@ def get_transform_config(
"""
data_access = data_access_factory.create_data_access()
duplicate_list_location = self.params.get(duplicate_list_location_key, duplicate_list_location_default)
duplicate_list_location = os.path.abspath(
os.path.join(data_access.output_folder, "..", duplicate_list_location)
)
if not duplicate_list_location.startswith("/"):
out_paths = data_access.output_folder.rstrip("/").split("/")
dupl_list_paths = duplicate_list_location.split("/")
paths = out_paths[:-1] + dupl_list_paths
duplicate_list_location = "/".join([p.strip("/") for p in paths])
if duplicate_list_location.startswith("s3://"):
_, duplicate_list_location = duplicate_list_location.split("://")
duplicate_list, retries = data_access.get_file(duplicate_list_location)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@
import os
from typing import Any

from data_cleaning_transform import DataCleaningTransformConfiguration
from data_cleaning_transform import (
DataCleaningTransformConfiguration,
duplicate_list_location_default,
duplicate_list_location_key,
)
from data_processing.data_access import DataAccessFactoryBase
from data_processing.transform import TransformStatistics
from data_processing.utils import get_logger
Expand Down Expand Up @@ -53,9 +57,12 @@ def get_transform_config(
:return: dictionary of transform init params
"""
data_access = data_access_factory.create_data_access()
duplicate_list_location = os.path.abspath(
os.path.join(data_access.output_folder, "..", self.params["duplicate_list_location"])
)
duplicate_list_location = self.params.get(duplicate_list_location_key, duplicate_list_location_default)
if not duplicate_list_location.startswith("/"):
out_paths = data_access.output_folder.rstrip("/").split("/")
dupl_list_paths = duplicate_list_location.split("/")
paths = out_paths[:-1] + dupl_list_paths
duplicate_list_location = "/".join([p.strip("/") for p in paths])
if duplicate_list_location.startswith("s3://"):
_, duplicate_list_location = duplicate_list_location.split("://")
self.duplicate_list, retries = data_access.get_file(duplicate_list_location)
Expand Down

0 comments on commit 84b9104

Please sign in to comment.