Skip to content

Commit

Permalink
separate auxiliary functions
Browse files Browse the repository at this point in the history
  • Loading branch information
luiztauffer committed Sep 21, 2023
1 parent ffe609e commit 0ad021a
Show file tree
Hide file tree
Showing 10 changed files with 173 additions and 194 deletions.
6 changes: 3 additions & 3 deletions containers/Dockerfile.combined
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ COPY requirements.txt .
RUN pip install -r requirements.txt

WORKDIR /app
COPY run_script.py .
COPY main.py .
COPY utils.py .
COPY light_server.py .
RUN mkdir /data
RUN mkdir /logs

# Get Python stdout logs
ENV PYTHONUNBUFFERED=1

CMD ["python", "light_server.py"]
# CMD ["uvicorn", "light_server:app", "--host", "0.0.0.0", "--port", "5000", "--reload"]
CMD ["python", "light_server.py"]
19 changes: 9 additions & 10 deletions containers/Dockerfile_ks2_5 → containers/Dockerfile.ks2_5
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Spike sorters image
FROM spikeinterface/kilosort2_5-compiled-base as ks25base
FROM spikeinterface/kilosort2_5-compiled-base:0.2.0 as ks25base

# NVIDIA-ready Image
FROM nvidia/cuda:11.6.2-base-ubuntu20.04
Expand All @@ -12,9 +12,8 @@ RUN apt-get update && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

ENV MINICONDA_VERSION 4.8.2
ENV CONDA_DIR /home/miniconda3
ENV LATEST_CONDA_SCRIPT "Miniconda3-py38_$MINICONDA_VERSION-Linux-x86_64.sh"
ENV LATEST_CONDA_SCRIPT "Miniconda3-py39_23.5.2-0-Linux-x86_64.sh"

RUN wget --quiet https://repo.anaconda.com/miniconda/$LATEST_CONDA_SCRIPT -O ~/miniconda.sh && \
bash ~/miniconda.sh -b -p $CONDA_DIR && \
Expand All @@ -29,20 +28,20 @@ ENV PATH="/opt/matlabruntime:${PATH}"
COPY --from=ks25base /usr/lib/x86_64-linux-gnu/libXt.so.6 /usr/lib/x86_64-linux-gnu/libXt.so.6
COPY --from=ks25base /usr/lib/x86_64-linux-gnu/libSM.so.6 /usr/lib/x86_64-linux-gnu/libSM.so.6
COPY --from=ks25base /usr/lib/x86_64-linux-gnu/libICE.so.6 /usr/lib/x86_64-linux-gnu/libICE.so.6
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/matlabruntime/v911/runtime/glnxa64:/opt/matlabruntime/v911/bin/glnxa64:/opt/matlabruntime/v911/sys/os/glnxa64:/opt/matlabruntime/v911/sys/opengl/lib/glnxa64:/opt/matlabruntime/v911/extern/bin/glnxa64

# Get Python stdout logs
ENV PYTHONUNBUFFERED=1
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/matlabruntime/R2022b/runtime/glnxa64:/opt/matlabruntime/R2022b/bin/glnxa64:/opt/matlabruntime/R2022b/sys/os/glnxa64:/opt/matlabruntime/R2022b/sys/opengl/lib/glnxa64:/opt/matlabruntime/R2022b/extern/bin/glnxa64

# Copy requirements and script
COPY requirements.txt .
RUN pip install -r requirements.txt

WORKDIR /app
COPY run_script.py .
COPY main.py .
COPY utils.py .
COPY light_server.py .
RUN mkdir /data
RUN mkdir /logs

CMD ["python", "light_server.py"]
# ENTRYPOINT ["python", "run_script.py"]
# Get Python stdout logs
ENV PYTHONUNBUFFERED=1

CMD ["python", "light_server.py"]
19 changes: 9 additions & 10 deletions containers/Dockerfile_ks3 → containers/Dockerfile.ks3
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Spike sorter image
FROM spikeinterface/kilosort3-compiled-base as ksbase
FROM spikeinterface/kilosort3-compiled-base:0.2.0 as ksbase

# NVIDIA-ready Image
FROM nvidia/cuda:11.6.2-base-ubuntu20.04
Expand All @@ -12,9 +12,8 @@ RUN apt-get update && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

ENV MINICONDA_VERSION 4.8.2
ENV CONDA_DIR /home/miniconda3
ENV LATEST_CONDA_SCRIPT "Miniconda3-py38_$MINICONDA_VERSION-Linux-x86_64.sh"
ENV LATEST_CONDA_SCRIPT "Miniconda3-py39_23.5.2-0-Linux-x86_64.sh"

RUN wget --quiet https://repo.anaconda.com/miniconda/$LATEST_CONDA_SCRIPT -O ~/miniconda.sh && \
bash ~/miniconda.sh -b -p $CONDA_DIR && \
Expand All @@ -29,20 +28,20 @@ ENV PATH="/opt/matlabruntime:${PATH}"
COPY --from=ksbase /usr/lib/x86_64-linux-gnu/libXt.so.6 /usr/lib/x86_64-linux-gnu/libXt.so.6
COPY --from=ksbase /usr/lib/x86_64-linux-gnu/libSM.so.6 /usr/lib/x86_64-linux-gnu/libSM.so.6
COPY --from=ksbase /usr/lib/x86_64-linux-gnu/libICE.so.6 /usr/lib/x86_64-linux-gnu/libICE.so.6
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/matlabruntime/v911/runtime/glnxa64:/opt/matlabruntime/v911/bin/glnxa64:/opt/matlabruntime/v911/sys/os/glnxa64:/opt/matlabruntime/v911/sys/opengl/lib/glnxa64:/opt/matlabruntime/v911/extern/bin/glnxa64

# Get Python stdout logs
ENV PYTHONUNBUFFERED=1
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/matlabruntime/R2022b/runtime/glnxa64:/opt/matlabruntime/R2022b/bin/glnxa64:/opt/matlabruntime/R2022b/sys/os/glnxa64:/opt/matlabruntime/R2022b/sys/opengl/lib/glnxa64:/opt/matlabruntime/R2022b/extern/bin/glnxa64

# Copy requirements and script
COPY requirements.txt .
RUN pip install -r requirements.txt

WORKDIR /app
COPY run_script.py .
COPY main.py .
COPY utils.py .
COPY light_server.py .
RUN mkdir /data
RUN mkdir /logs

CMD ["python", "light_server.py"]
# ENTRYPOINT ["python", "run_script.py"]
# Get Python stdout logs
ENV PYTHONUNBUFFERED=1

CMD ["python", "light_server.py"]
15 changes: 0 additions & 15 deletions containers/Dockerfile_simple

This file was deleted.

2 changes: 1 addition & 1 deletion containers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ If having difficulties pushing the image to ECR:
4. Create a Job Definition (EC2)
- Choose suitable Execution Timeout, Job Attempts and Retry Strategies
- Select the base image
- Command = `python run_script.py`
- Command = `python main.py`
- For the Execution role and Job role configuration, choose the `BatchJobsAccessRole`
- Configure the resource requirements. Remember to choose a value for Memory slightly smaller than the value for the machines you're hoping to use, otherwise ECS might not find suitable instances.
- Add any fixed ENV variables that should be used by any Jobs using this definition
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion containers/light_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
import functools

from run_script import main
from main import main


app = Flask(__name__)
Expand Down
174 changes: 20 additions & 154 deletions containers/run_script.py → containers/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import boto3
import botocore
import os
import ast
import shutil
import requests
import logging
import sys
import subprocess
from warnings import filterwarnings
from datetime import datetime
Expand All @@ -21,147 +16,13 @@
from dandi.upload import upload
from dandi.download import download


# TODO - complete with more data types
DATA_TYPE_TO_READER = {
"spikeglx": se.read_spikeglx,
"nwb": se.read_nwb_recording,
}

# # TODO - create data models for inputs of each data type reader
# DATA_TYPE_READER_DATA_MODELS = {
# "spikeglx": ,
# "nwb": ,
# }

# # TODO - complete with more sorters
# SORTER_DATA_MODELS = {
# "kilosort3": ,
# "kilosort2_5":,
# }


class Tee(object):
def __init__(self, *files):
self.files = files
def write(self, obj):
for f in self.files:
f.write(obj)
f.flush()
def flush(self) :
for f in self.files:
f.flush()


def make_logger(run_identifier: str, log_to_file: bool):
logging.basicConfig()
logger = logging.getLogger("sorting_worker")
logger.handlers.clear()
logger.setLevel(logging.DEBUG)
log_formatter = logging.Formatter(
fmt="%(asctime)s %(levelname)s %(name)s -- %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)

if log_to_file:
# Add a logging handler for the log file
fileHandler = logging.FileHandler(
filename=f"/logs/sorting_worker_{run_identifier}.log",
mode="a",
)
fileHandler.setFormatter(log_formatter)
fileHandler.setLevel(level=logging.DEBUG)
logger.addHandler(fileHandler)

# Add a logging handler for stdout
stdoutHandler = logging.StreamHandler(sys.stdout)
stdoutHandler.setLevel(logging.DEBUG)
stdoutHandler.setFormatter(log_formatter)
logger.addHandler(stdoutHandler)

# Redirect stdout to a file-like object that writes to both stdout and the log file
stdout_log_file = open(f"/logs/sorting_worker_{run_identifier}.log", "a")
sys.stdout = Tee(sys.stdout, stdout_log_file)
else:
# Handler to print to console as well
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
handler.setFormatter(log_formatter)
logger.addHandler(handler)
return logger


def download_file_from_url(url):
# ref: https://stackoverflow.com/a/39217788/11483674
local_filename = "/data/filename.nwb"
with requests.get(url, stream=True) as r:
with open(local_filename, 'wb') as f:
shutil.copyfileobj(r.raw, f)


def download_file_from_s3(
client:botocore.client.BaseClient,
bucket_name:str,
file_path:str
):
file_name = file_path.split("/")[-1]
client.download_file(
Bucket=bucket_name,
Key=file_path,
Filename=f"/data/{file_name}"
)
return file_name


def download_all_files_from_bucket_folder(
client:botocore.client.BaseClient,
bucket_name:str,
bucket_folder:str
):
# List files in folder, download all files with content
res = client.list_objects_v2(Bucket=bucket_name, Prefix=bucket_folder)
for f in res["Contents"]:
if f["Size"] > 0:
file_name = f["Key"].split("/")[-1]
client.download_file(
Bucket=bucket_name,
Key=f["Key"],
Filename=f"/data/{file_name}"
)


def upload_file_to_bucket(
logger:logging.Logger,
client:botocore.client.BaseClient,
bucket_name:str,
bucket_folder:str,
local_file_path:str
):
# Upload file to S3
logger.info(f"Uploading {local_file_path}...")
client.upload_file(
Filename=local_file_path,
Bucket=bucket_name,
Key=f"{bucket_folder}/{local_file_path}",
)


def upload_all_files_to_bucket_folder(
logger:logging.Logger,
client:botocore.client.BaseClient,
bucket_name:str,
bucket_folder:str,
local_folder:str
):
# List files from results, upload them to S3
files_list = [f for f in Path(local_folder).rglob("*") if f.is_file()]
for f in files_list:
logger.info(f"Uploading {str(f)}...")
client.upload_file(
Filename=str(f),
Bucket=bucket_name,
Key=f"{bucket_folder}{str(f)}",
)
from utils import (
make_logger,
download_file_from_s3,
upload_file_to_bucket,
upload_all_files_to_bucket_folder,
download_file_from_url,
)


def main(
Expand Down Expand Up @@ -210,8 +71,9 @@ def main(
- AWS_ACCESS_KEY_ID
- AWS_SECRET_ACCESS_KEY
If saving results to DANDI archive, or reading from embargoed dandisets, the following ENV variable should be present in the running container:
If saving results to DANDI archive, or reading from embargoed dandisets, the following ENV variables should be present in the running container:
- DANDI_API_KEY
- DANDI_API_KEY_STAGING
"""

# Order of priority for definition of running arguments:
Expand Down Expand Up @@ -507,13 +369,17 @@ def main(
# Upload results to DANDI
logger.info(f"Uploading results to DANDI: {output_path}")
dandi_instance = "dandi-staging" if "staging" in output_path else "dandi"
upload(
paths=[str(dandiset_local_full_path)],
existing="refresh",
validation="require",
dandi_instance=dandi_instance,
sync=True,
)
if dandi_instance == "dandi-staging":
DANDI_API_KEY = os.environ.get("DANDI_API_KEY_STAGING", None)
if DANDI_API_KEY is None:
raise Exception("DANDI_API_KEY_STAGING not found in ENV variables. Cannot upload results to DANDI staging.")
# upload(
# paths=[str(dandiset_local_full_path)],
# existing="refresh",
# validation="require",
# dandi_instance=dandi_instance,
# sync=True,
# )
else:
# Upload results to local - already done by mounted volume
pass
Expand Down
Loading

0 comments on commit 0ad021a

Please sign in to comment.