-
Notifications
You must be signed in to change notification settings - Fork 129
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Constantin M Adam <[email protected]>
- Loading branch information
Showing
2 changed files
with
53 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
FROM docker.io/python:3.10.14-slim-bullseye | ||
|
||
RUN pip install --upgrade --no-cache-dir pip | ||
|
||
# install pytest | ||
RUN pip install --no-cache-dir pytest | ||
|
||
# Create a user and use it to run the transform | ||
RUN useradd -ms /bin/bash dpk | ||
USER dpk | ||
WORKDIR /home/dpk | ||
|
||
# Copy and install data processing libraries | ||
# These are expected to be placed in the docker context before this is run (see the make image). | ||
COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ | ||
RUN cd data-processing-lib-python && pip install --no-cache-dir -e . | ||
|
||
COPY --chown=dpk:root src/ src/ | ||
COPY --chown=dpk:root pyproject.toml pyproject.toml | ||
COPY --chown=dpk:root README.md README.md | ||
COPY --chown=dpk:root requirements.txt requirements.txt | ||
|
||
RUN pip install --no-cache-dir -e . | ||
|
||
# copy source data | ||
COPY src/ src/ | ||
|
||
# copy source data | ||
COPY ./src/signature_calc_transform_python.py fdedup_transform_python.py | ||
COPY ./src/signature_calc_local_python.py local/ | ||
|
||
# copy test | ||
COPY test/ test/ | ||
COPY test-data/ test-data/ | ||
|
||
# Set environment | ||
ENV PYTHONPATH /home/dpk | ||
|
||
# Put these at the end since they seem to upset the docker cache. | ||
ARG BUILD_DATE | ||
ARG GIT_COMMIT | ||
LABEL build-date=$BUILD_DATE | ||
LABEL git-commit=$GIT_COMMIT |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
pyarrow==16.1.0 | ||
pyyaml>=6.0.2 | ||
boto3>=1.34.69 | ||
kubernetes>=30.1.0 | ||
polars>=1.6.0 | ||
disjoint-set>=0.8.0 | ||
numpy<1.29.0 | ||
sentencepiece>=0.2.0 | ||
mmh3>=4.1.0 | ||
scipy>=1.12.0, <2.0.0 |