Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enforce exact_match=True when listing JSON file for get_estimated_time for MPH #467

Open
wants to merge 44 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
ecfc03c
Enforce exact_match=True for get_estimated_time
alex-l-kong Aug 22, 2024
81abb38
Merge branch 'main' into mph_fov_name_fix
alex-l-kong Aug 22, 2024
221bcdd
Ensure we're not searching for the actual extension
alex-l-kong Sep 11, 2024
1762d50
Merge remote-tracking branch 'origin/mph_fov_name_fix' into mph_fov_n…
alex-l-kong Sep 11, 2024
fb2f4c0
Modify list_files to use "correct" syntax for exact_match=True and fi…
alex-l-kong Sep 12, 2024
0f4e63f
Make workflow of getting bin file easier
alex-l-kong Sep 12, 2024
553d4cc
Remove the == 0
alex-l-kong Sep 12, 2024
326e9c2
Add processing.json files to test
alex-l-kong Sep 13, 2024
2bd3ab4
Merge remote-tracking branch 'origin/mph_fov_name_fix' into mph_fov_n…
alex-l-kong Sep 13, 2024
a529f53
Fix bin file generation by simulating update only after .json file wr…
Oct 7, 2024
875da78
Formatting to avoid going over 100 characters per line
Oct 7, 2024
673cd53
Add trailing comma to please black linter
Oct 7, 2024
77c7cb6
Print statement debugging
Oct 8, 2024
4392878
See if an OSError is being thrown
Oct 8, 2024
660ec0a
More debugging statements: need to see the MPH process
Oct 8, 2024
6dea20b
Test the presence and contents of log_out
Oct 8, 2024
517687a
See what the actual log path is being set to
Oct 8, 2024
9737c5c
Try adding a simple logging statement at the beginning to force
Oct 8, 2024
db326a6
Try to set logging propagation to be False
Oct 8, 2024
57421cf
Fix pyproject.toml logging
Oct 8, 2024
53f18fa
Additional changes to pytest pyproject.toml
Oct 8, 2024
7e6ef2c
Lowercase true for pyproject.toml
Oct 8, 2024
fd021c6
Attempt to force pytest fixture to write to log file
Oct 17, 2024
58f915a
Add logging import
Oct 17, 2024
63f7388
Remove whitespace
Oct 17, 2024
170ab4d
Try to fix logging issue
Oct 17, 2024
93735fc
Overwrite pytest.txt with blank stuff
Oct 17, 2024
3d2832c
More testing fixes
Oct 17, 2024
c9b9439
Use the absolute path to the directory for accessing hte log
Oct 17, 2024
4db71a3
Adjust error message
Oct 17, 2024
a046bc3
Don't print add_blank
Oct 18, 2024
98013de
Check if the error is happening during the renaming phases
Oct 18, 2024
378a82a
Add some more tests in
Nov 12, 2024
1fef177
Actually get image_stitching to print out debug
Nov 12, 2024
7b08a84
Pin watchdog to version 6
Nov 12, 2024
1493401
Merge branch 'main' into mph_fov_name_fix
alex-l-kong Nov 12, 2024
e76de60
Update lock file
Nov 12, 2024
406172e
Merge remote-tracking branch 'origin/mph_fov_name_fix' into mph_fov_n…
Nov 12, 2024
8f80979
Add debug workflow to GitHub PR
Nov 12, 2024
5cb65a9
Remove the lock entirely
Nov 12, 2024
d50c4b4
Nuke file timer functionality
Nov 12, 2024
efc3fee
Test deleting the lock AND the timer
Nov 12, 2024
af9369f
Try pushing old slow copy tissue data
Nov 13, 2024
7aef668
removed debug
srivarra Nov 14, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ jobs:
run: |
poetry install --no-interaction

# - name: Debug
# uses: lhotari/action-upterm@v1

- name: Run Tests
run: |
poetry install --with test --no-interaction --no-root
Expand Down
Empty file.
Empty file.
Empty file.
Empty file.
3,350 changes: 1,798 additions & 1,552 deletions poetry.lock

Large diffs are not rendered by default.

9 changes: 8 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ numpy = "1.*"
natsort = "^8"
seaborn = "^0.13"
scikit-learn = "^1"
watchdog = "^3"
watchdog = "^6"
tqdm = "^4"
scipy = "^1.10.1"
pandas = "^2"
Expand Down Expand Up @@ -128,3 +128,10 @@ filterwarnings = [
]
testpaths = ["tests"]
norecursedirs = ["tests/utilities"]

log_cli = true
log_level = "INFO"
log_file = "pytest.txt"
log_file_level = "INFO"
log_format = "%(asctime)s %(levelname)s %(message)s"
log_date_format = "%Y-%m-%d %H:%M:%S"
96 changes: 49 additions & 47 deletions src/toffy/fov_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,21 +207,23 @@ def __init__(
self.run_folder = run_folder

self.last_event_time = datetime.now()
self.timer_thread = threading.Thread(
target=self.file_timer, args=(fov_timeout, watcher_timeout)
)
self.timer_thread.daemon = True
self.timer_thread.start()
# self.timer_thread = threading.Thread(
# target=self.file_timer, args=(fov_timeout, watcher_timeout)
# )
# self.timer_thread.daemon = True
# self.timer_thread.start()

self.log_path = os.path.join(log_folder, f"{Path(run_folder).parts[-1]}_log.txt")
if not os.path.exists(log_folder):
os.makedirs(log_folder)
print(f"Setting log path to {self.log_path}")
logging.basicConfig(
level=logging.INFO,
filename=self.log_path,
filemode="a",
format="%(name)s - %(levelname)s - %(message)s",
)
logging.info(f"Starting run on {run_folder}\n")

# create run structure
self.run_structure = RunStructure(run_folder, fov_timeout=fov_timeout)
Expand Down Expand Up @@ -487,45 +489,45 @@ def on_created(self, event: FileCreatedEvent, check_last_fov: bool = True):
if self.last_fov_num_processed == self.run_structure.highest_fov:
return

with self.lock:
super().on_created(event)
self._run_callbacks(event, check_last_fov)

def file_timer(self, fov_timeout, watcher_timeout):
"""Checks time since last file was generated.

Args:
fov_timeout (int):
how long to wait for fov data to be generated once file detected
watcher_timeout (int):
length to wait for new file generation before timing out
"""
while True:
current_time = datetime.now()
time_elapsed = (current_time - self.last_event_time).total_seconds()

# 3 fov cycles and no new files --> timeout
if time_elapsed > watcher_timeout:
fov_num = self.last_fov_num_processed
fov_name = list(self.run_structure.fov_progress.keys())[fov_num]
print(f"Timed out waiting for {fov_name} files to be generated.")
logging.info(
f'{datetime.now().strftime("%d/%m/%Y %H:%M:%S")} -- Timed out'
f"waiting for {fov_name} files to be generated.\n"
)
logging.info(
f'{datetime.now().strftime("%d/%m/%Y %H:%M:%S")} -- '
f"Running {self.run_func.__name__} on FOVs\n"
)

# mark remaining fovs as completed to exit watcher
for fov_name in list(self.run_structure.fov_progress.keys()):
self.run_structure.fov_progress[fov_name] = {"json": True, "bin": True}

# trigger run callbacks
self.run_func(self.run_folder)
break
time.sleep(fov_timeout)
# with self.lock:
super().on_created(event)
self._run_callbacks(event, check_last_fov)

# def file_timer(self, fov_timeout, watcher_timeout):
# """Checks time since last file was generated.

# Args:
# fov_timeout (int):
# how long to wait for fov data to be generated once file detected
# watcher_timeout (int):
# length to wait for new file generation before timing out
# """
# while True:
# current_time = datetime.now()
# time_elapsed = (current_time - self.last_event_time).total_seconds()

# # 3 fov cycles and no new files --> timeout
# if time_elapsed > watcher_timeout:
# fov_num = self.last_fov_num_processed
# fov_name = list(self.run_structure.fov_progress.keys())[fov_num]
# print(f"Timed out waiting for {fov_name} files to be generated.")
# logging.info(
# f'{datetime.now().strftime("%d/%m/%Y %H:%M:%S")} -- Timed out'
# f"waiting for {fov_name} files to be generated.\n"
# )
# logging.info(
# f'{datetime.now().strftime("%d/%m/%Y %H:%M:%S")} -- '
# f"Running {self.run_func.__name__} on FOVs\n"
# )

# # mark remaining fovs as completed to exit watcher
# for fov_name in list(self.run_structure.fov_progress.keys()):
# self.run_structure.fov_progress[fov_name] = {"json": True, "bin": True}

# # trigger run callbacks
# self.run_func(self.run_folder)
# break
# time.sleep(fov_timeout)

def on_moved(self, event: FileMovedEvent, check_last_fov: bool = True):
"""Handles file renaming events.
Expand All @@ -541,9 +543,9 @@ def on_moved(self, event: FileMovedEvent, check_last_fov: bool = True):
if self.last_fov_num_processed == self.run_structure.highest_fov:
return

with self.lock:
super().on_moved(event)
self._run_callbacks(event, check_last_fov)
# with self.lock:
super().on_moved(event)
self._run_callbacks(event, check_last_fov)

def check_complete(self):
"""Checks run structure fov_progress status.
Expand Down
7 changes: 7 additions & 0 deletions src/toffy/image_stitching.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,13 @@ def stitch_images(
num_cols = math.isqrt(len(folders))
max_img_size = get_max_img_size(tiff_out_dir, img_sub_folder, run_dir)

print(tiff_out_dir)
print(img_sub_folder)
print(os.listdir(tiff_out_dir))
subfolder = "" if img_sub_folder is None else img_sub_folder
for fov in folders:
print(f"Here's what's in fov {fov}")
print(os.listdir(os.path.join(tiff_out_dir, fov, subfolder)))
image_data = load_utils.load_imgs_from_tree(
tiff_out_dir,
img_sub_folder=img_sub_folder,
Expand Down
6 changes: 3 additions & 3 deletions src/toffy/mph_comp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ def get_estimated_time(bin_file_dir, fov):
io_utils.validate_paths(bin_file_dir)

# get fov json file in bin_file_path
json_file = io_utils.list_files(bin_file_dir, fov + ".json")
if len(json_file) == 0:
json_file = os.path.join(bin_file_dir, f"{fov}.json")
if not os.path.exists(json_file):
raise FileNotFoundError(f"The FOV name supplied doesn't have a JSON file: {fov}")

# retrieve estimated time (frame dimensions x pixel dwell time)
run_metadata = read_json_file(os.path.join(bin_file_dir, json_file[0]), encoding="utf-8")
run_metadata = read_json_file(json_file, encoding="utf-8")
try:
size = run_metadata.get("frameSize")
time = run_metadata.get("dwellTimeMillis")
Expand Down
3 changes: 3 additions & 0 deletions src/toffy/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,17 @@ def write_mph_per_mass(
proficient (bool): whether proficient MPH data is written or not
"""
# compute pulse heights
print("Inside writing MPH per mass")
panel = make_panel(
mass=masses, target_name=masses, low_range=start_offset, high_range=stop_offset
)

# generate the MPH values for each mass
print("About to get median pulse height")
mph_vals = list(
get_median_pulse_height(data_dir=base_dir, fov=fov, channels=masses, panel=panel).values()
)
print("Computed median pulse height")

fovs = np.repeat(fov, len(masses))
out_df = pd.DataFrame({"mass": masses, "fov": fovs, "pulse_height": mph_vals})
Expand Down
1 change: 1 addition & 0 deletions src/toffy/watcher_callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ def generate_pulse_heights(self, pulse_out_dir: str, panel: pd.DataFrame = None,
- start_offset
- stop_offset
"""
print("Inside computing pulse heights")
if not os.path.exists(pulse_out_dir):
os.makedirs(pulse_out_dir)

Expand Down
88 changes: 84 additions & 4 deletions tests/fov_watcher_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import os
import shutil
import tempfile
Expand Down Expand Up @@ -35,6 +36,72 @@
SLOW_COPY_INTERVAL_S = 1


# def _slow_copy_sample_tissue_data(
# dest: str, delta: int = 10, one_blank: bool = False, temp_bin: bool = False
# ):
# """Slowly copies files from ./data/tissue/.

# Args:
# dest (str):
# Where to copy tissue files to
# delta (int):
# Time (in seconds) between each file copy
# one_blank (bool):
# Add a blank .bin file or not
# temp_bin (bool):
# Use initial temp bin file paths or not
# """
# num_bin_files = 0
# for tissue_file in sorted(os.listdir(COMBINED_DATA_PATH)):
# time.sleep(delta)
# if one_blank and ".bin" in tissue_file and tissue_file[0] != ".":
# # create blank (0 size) file
# open(os.path.join(dest, tissue_file), "w").close()
# else:
# tissue_path = os.path.join(COMBINED_DATA_PATH, tissue_file)
# if temp_bin and ".bin" in tissue_file:
# # copy to a temporary file with hash extension, then move to dest folder
# print(f"Copying over a temporary file with hash extension on {tissue_file}")
# new_tissue_path = os.path.join(COMBINED_DATA_PATH, "." + tissue_file + ".aBcDeF")
# shutil.copy(tissue_path, new_tissue_path)
# shutil.copy(new_tissue_path, dest)
# print(f"Removing the old tissue path at .{tissue_file}.aBcDeF")
# os.remove(new_tissue_path)

# # simulate a renaming event in dest
# time.sleep(delta)
# print(f"Renaming back to {tissue_file}")
# copied_tissue_path = os.path.join(dest, "." + tissue_file + ".aBcDeF")
# os.rename(copied_tissue_path, os.path.join(dest, tissue_file))

# else:
# shutil.copy(tissue_path, dest)

# # simulate a timestamp update if .bin file has been extracted AND not blank
# # NOTE: this assumes .json file always copies after the .bin, which does happen on Ionpath
# tissue_data = os.path.splitext(tissue_file)
# if tissue_data[1] == ".json" and "_processing" not in tissue_data[0]:
# if one_blank:
# one_blank = False
# continue
# elif num_bin_files % 2 == 0:
# bin_file_name = tissue_data[0] + ".bin"
# print(f"Simulating .bin file update on {bin_file_name}")
# shutil.copy(
# os.path.join(COMBINED_DATA_PATH, bin_file_name),
# os.path.join(dest, bin_file_name + ".temp"),
# )
# print("Renamed bin file to temp")
# os.remove(os.path.join(dest, bin_file_name))
# print("Removed old bin file")
# os.rename(
# os.path.join(dest, bin_file_name + ".temp"),
# os.path.join(dest, bin_file_name),
# )
# print("Renamed temp bin file back to orig")
# num_bin_files += 1


def _slow_copy_sample_tissue_data(
dest: str, delta: int = 10, one_blank: bool = False, temp_bin: bool = False
):
Expand Down Expand Up @@ -255,7 +322,6 @@ def test_watcher(
add_blank,
temp_bin,
):
print("The watcher start lag is: %d" % watcher_start_lag)
try:
with tempfile.TemporaryDirectory() as tmpdir:
tiff_out_dir = os.path.join(tmpdir, "cb_0", RUN_DIR_NAME)
Expand Down Expand Up @@ -371,7 +437,11 @@ def test_watcher(

res_scan.get()

with open(os.path.join(log_out, "test_run_log.txt")) as f:
print("Testing log out status")
print(log_out)
# with open(os.path.join(log_out, "test_run_log.txt")) as f:
print(os.listdir("."))
with open(os.path.join(Path(__file__).parents[1], "pytest.txt")) as f:
logtxt = f.read()
assert add_blank == ("non-zero file size..." in logtxt)

Expand All @@ -390,6 +460,7 @@ def test_watcher(
fovs = fovs[1:]

# extract tiffs check
print("TIFF validator check")
validators[0](os.path.join(tmpdir, "cb_0", RUN_DIR_NAME), fovs, bad_fovs)
if kwargs["extract_prof"]:
validators[0](
Expand All @@ -401,9 +472,11 @@ def test_watcher(
)

# qc check
print("QC check")
validators[1](os.path.join(tmpdir, "cb_1", RUN_DIR_NAME), fovs, bad_fovs)

# mph check
print("MPH check")
validators[2](
os.path.join(tmpdir, "cb_2", RUN_DIR_NAME),
os.path.join(tmpdir, "cb_2_plots", RUN_DIR_NAME),
Expand All @@ -412,13 +485,20 @@ def test_watcher(
)

# stitch images check
print("Stitch images check")
validators[3](os.path.join(tmpdir, "cb_0", RUN_DIR_NAME, f"{RUN_DIR_NAME}_stitched"))

# pulse heights check
print("Pulse heights check")
validators[4](os.path.join(tmpdir, "cb_3", RUN_DIR_NAME), fovs, bad_fovs)

except OSError:
warnings.warn("Temporary file cleanup was incomplete.")
with open(os.path.join(Path(__file__).parents[1], "pytest.txt"), "w") as infile:
infile.write("")

except OSError as ose:
print("Ran into an error:")
print(ose)
# warnings.warn("Temporary file cleanup was incomplete.")


def test_watcher_missing_fovs():
Expand Down
Loading