Skip to content

Commit

Permalink
Implement FileIO.append (for CSV, JSON and NDJSON) (#44)
Browse files Browse the repository at this point in the history
Adding support for append functionality!
  • Loading branch information
bh2smith authored Nov 18, 2022
1 parent 9db50bb commit 2606417
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ dist
_version.py
.idea/
venv/
tmp/
83 changes: 77 additions & 6 deletions dune_client/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import logging
import os.path
from enum import Enum
from os.path import exists
from pathlib import Path
from typing import TextIO
from typing import TextIO, Callable, List, Tuple

# ndjson missing types: https://github.com/rhgrant10/ndjson/issues/10
import ndjson # type: ignore
Expand Down Expand Up @@ -60,14 +61,17 @@ def load(self, file: TextIO) -> list[DuneRecord]:
return list(ndjson.reader(file))
raise ValueError(f"Unrecognized FileType {self} for {file.name}")

def write(self, out_file: TextIO, data: list[DuneRecord]) -> None:
def write(
self, out_file: TextIO, data: list[DuneRecord], skip_headers: bool = False
) -> None:
"""Writes `data` to `out_file`"""
logger.debug(f"writing results to file {out_file.name}")
if self == FileType.CSV:
headers = data[0].keys()
data_tuple = [tuple(rec.values()) for rec in data]
dict_writer = csv.DictWriter(out_file, headers, lineterminator="\n")
dict_writer.writeheader()
if not skip_headers:
dict_writer.writeheader()
writer = csv.writer(out_file, lineterminator="\n")
writer.writerows(data_tuple)

Expand Down Expand Up @@ -106,13 +110,77 @@ def _filepath(self, name: str, ftype: FileType) -> str:
"""Internal method for building absolute path."""
return os.path.join(self.path, name + str(ftype))

def _write(self, data: list[DuneRecord], name: str, ftype: FileType) -> None:
# TODO - use @skip_empty decorator here. Couldn't get the types to work.
def _write(self, data: List[DuneRecord], name: str, ftype: FileType) -> None:
# The following three lines are duplicated in _append, due to python version compatibility
# https://github.com/cowprotocol/dune-client/issues/45
# We will continue to support python < 3.10 until ~3.13, this issue will remain open.
if len(data) == 0:
logger.info(f"Nothing to write to {name}... skipping")
return
return None
with open(self._filepath(name, ftype), "w", encoding=self.encoding) as out_file:
ftype.write(out_file, data)
return None

def _assert_matching_keys(
self, keys: Tuple[str, ...], fname: str, ftype: FileType
) -> None:
with open(fname, "r", encoding=self.encoding) as file:
if ftype == FileType.CSV:
# Check matching headers.
headers = file.readline()
existing_keys = headers.strip().split(",")
elif ftype == FileType.JSON:
single_object = json.loads(file.readline())[0]
existing_keys = single_object.keys()
elif ftype == FileType.NDJSON:
single_object = json.loads(file.readline())
existing_keys = single_object.keys()

key_tuple = tuple(existing_keys)
assert keys == key_tuple, f"{keys} != {key_tuple}"

def _append(self, data: List[DuneRecord], name: str, ftype: FileType) -> None:
if len(data) == 0:
logger.info(f"Nothing to write to {name}... skipping")
return None
fname = self._filepath(name, ftype)
if not exists(fname):
logger.warning(
f"File {fname} does not exist, using write instead of append!"
)
return self._write(data, name, ftype)

# validate that the incoming content to be appended has the same schema
# The skip empty decorator ensures existence of data[0]!
self._assert_matching_keys(tuple(data[0].keys()), fname, ftype)

if ftype == FileType.JSON:
# These are JSON lists, so we have to concatenate the data.
with open(fname, "r", encoding=self.encoding) as existing_file:
existing_data = ftype.load(existing_file)
return self._write(existing_data + data, name, ftype)

with open(fname, "a+", encoding=self.encoding) as out_file:
return ftype.write(out_file, data, skip_headers=True)

def append_csv(self, data: list[DuneRecord], name: str) -> None:
"""Appends `data` to csv file `name`"""
# This is a special case because we want to skip headers when the file already exists
# Additionally, we may want to validate that the headers actually coincide.
self._append(data, name, FileType.CSV)

def append_json(self, data: list[DuneRecord], name: str) -> None:
"""
Appends `data` to json file `name`
This is the least efficient of all, since we have to load the entire file,
concatenate the lists and then overwrite the file!
Other filetypes such as CSV and NDJSON can be directly appended to!
"""
self._append(data, name, FileType.JSON)

def append_ndjson(self, data: list[DuneRecord], name: str) -> None:
"""Appends `data` to ndjson file `name`"""
self._append(data, name, FileType.NDJSON)

def write_csv(self, data: list[DuneRecord], name: str) -> None:
"""Writes `data` to csv file `name`"""
Expand Down Expand Up @@ -154,3 +222,6 @@ def load_singleton(
) -> DuneRecord:
"""Loads and returns single entry by index (default 0)"""
return self._load(name, self._parse_ftype(ftype))[index]


WriteLikeSignature = Callable[[FileIO, List[DuneRecord], str, FileType], None]
26 changes: 25 additions & 1 deletion tests/unit/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def cleanup():


def cleanup_files(func):
"""This decorator can be used for testing methods outside of this class"""
"""This decorator can be used for testing methods outside this class"""

def wrapped_func(self):
func(self)
Expand Down Expand Up @@ -48,6 +48,30 @@ def test_invertible_write_and_load(self):
f"Assert invertible failed on {ftype}",
)

def test_append_ok(self):
for ftype in FileType:
self.file_manager._write(self.dune_records, TEST_FILE, ftype)
self.file_manager._append(self.dune_records, TEST_FILE, ftype)
loaded_records = self.file_manager._load(TEST_FILE, ftype)
expected = self.dune_records + self.dune_records
self.assertEqual(
expected,
loaded_records,
f"test_append failed on {ftype}",
)

def test_append_calls_write_on_new_file(self):
for ftype in FileType:
with self.assertLogs(level="WARNING"):
self.file_manager._append(self.dune_records, TEST_FILE, ftype)

def test_append_error(self):
invalid_records = [{}] # Empty dict has different keys than self.dune_records
for ftype in FileType:
self.file_manager._write(self.dune_records, TEST_FILE, ftype)
with self.assertRaises(AssertionError):
self.file_manager._append(invalid_records, TEST_FILE, ftype)

def test_load_singleton(self):
for file_type in FileType:
self.file_manager._write(self.dune_records, TEST_FILE, file_type)
Expand Down

0 comments on commit 2606417

Please sign in to comment.