Skip to content

Commit

Permalink
Create python Dockerfile
Browse files Browse the repository at this point in the history
Signed-off-by: Constantin M Adam <[email protected]>
  • Loading branch information
cmadam committed Oct 18, 2024
1 parent 77d85fd commit 310d813
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 0 deletions.
43 changes: 43 additions & 0 deletions transforms/universal/fdedup/python/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
FROM docker.io/python:3.10.14-slim-bullseye

RUN pip install --upgrade --no-cache-dir pip

# install pytest
RUN pip install --no-cache-dir pytest

# Create a user and use it to run the transform
RUN useradd -ms /bin/bash dpk
USER dpk
WORKDIR /home/dpk

# Copy and install data processing libraries
# These are expected to be placed in the docker context before this is run (see the make image).
COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/
RUN cd data-processing-lib-python && pip install --no-cache-dir -e .

COPY --chown=dpk:root src/ src/
COPY --chown=dpk:root pyproject.toml pyproject.toml
COPY --chown=dpk:root README.md README.md
COPY --chown=dpk:root requirements.txt requirements.txt

RUN pip install --no-cache-dir -e .

# copy source data
COPY src/ src/

# copy source data
COPY ./src/signature_calc_transform_python.py fdedup_transform_python.py
COPY ./src/signature_calc_local_python.py local/

# copy test
COPY test/ test/
COPY test-data/ test-data/

# Set environment
ENV PYTHONPATH /home/dpk

# Put these at the end since they seem to upset the docker cache.
ARG BUILD_DATE
ARG GIT_COMMIT
LABEL build-date=$BUILD_DATE
LABEL git-commit=$GIT_COMMIT
10 changes: 10 additions & 0 deletions transforms/universal/fdedup/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
pyarrow==16.1.0
pyyaml>=6.0.2
boto3>=1.34.69
kubernetes>=30.1.0
polars>=1.6.0
disjoint-set>=0.8.0
numpy<1.29.0
sentencepiece>=0.2.0
mmh3>=4.1.0
scipy>=1.12.0, <2.0.0

0 comments on commit 310d813

Please sign in to comment.