From 901436fcb6e86dbfb67bd6e7264a92527277e6a0 Mon Sep 17 00:00:00 2001 From: Michael Folz Date: Tue, 14 Nov 2023 10:47:20 +0100 Subject: [PATCH] #237 - Minor suggestions to improve the container image - reorder cmds to decrease img size - use user id instead of name - pin base image by digest - use docker metadata action - use alpine image --- .github/integration-test/docker-compose.yml | 1 + .../check-if-running-as-dataportal-user.sh | 10 ------ .../scripts/check-if-running-as-user-10001.sh | 10 ++++++ .github/workflows/ci.yml | 32 +++++++++++++---- Dockerfile | 36 ++++++------------- docker-compose.yml | 23 ++++++------ docker-entrypoint.sh | 3 ++ 7 files changed, 61 insertions(+), 54 deletions(-) delete mode 100755 .github/scripts/check-if-running-as-dataportal-user.sh create mode 100755 .github/scripts/check-if-running-as-user-10001.sh diff --git a/.github/integration-test/docker-compose.yml b/.github/integration-test/docker-compose.yml index 95388135..2233ece8 100644 --- a/.github/integration-test/docker-compose.yml +++ b/.github/integration-test/docker-compose.yml @@ -8,6 +8,7 @@ services: depends_on: - dataportal-postgres environment: + JAVA_OPTS: "" QUERYRESULT_PUBLIC_KEY: "MIIBojANBgkqhkiG9w0BAQEFAAOCAY8AMIIBigKCAYEA1lWOfXzE/mUEPitNLxsDMtjERJGVhS8gP1WmuHPvjPxUOQyod4EbJcbJlkBqLqpaIs8Buy3gcbJvIPERdG1N1BSZ8NOKOtRubioKf30JwnLdZAae3vJAzRC3h42OPM3fohZCXMxbrju+KM0ZUIrLEXKEDMHQWfevCQCxeixvXVYpfXlkJIBGaWz4cDgEOiiwhU87AMzGZwjAIHvr4oTF/uHg6+C3Mdx0m8WLtygTiEixJegMb/txR+4gNVYrzpm5BwDUU7Qxy3nTUDYZLlTGeP9MBFWW+W87IHzgP+OFr3ZKMEkAPU0R1lqXFZCYcgZHGA5He2W701isnqkKIQT8ePOH43ZOXo3S34Pqw5oQ4Q2kPubp1wgZWw0VtEiZDtlwqUJ+r3CigU7NAFM5JnC/skiIBKetbWoNm1JPEfGOTrgjHD2uo82jSO8tV45LNH1EaR2+5UWSFZyDvTayLZsxsVlRFXJKgQJDI344R6lhGbLXbhqCuPzeQaHr1XGCKAtdAgMBAAE=" # ----- app QUERY_VALIDATION_ENABLED: "true" diff --git a/.github/scripts/check-if-running-as-dataportal-user.sh b/.github/scripts/check-if-running-as-dataportal-user.sh deleted file mode 100755 index 8b508fc6..00000000 --- a/.github/scripts/check-if-running-as-dataportal-user.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -e - -if docker exec -u0 dataportal-backend pgrep -u dataportal java > /dev/null -then - echo "Java process is running as dataportal" - exit 0 -else - echo "Java process is not running as dataportal" - exit 1 -fi diff --git a/.github/scripts/check-if-running-as-user-10001.sh b/.github/scripts/check-if-running-as-user-10001.sh new file mode 100755 index 00000000..4b52b855 --- /dev/null +++ b/.github/scripts/check-if-running-as-user-10001.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e + +if docker exec -u0 dataportal-backend pgrep -u 10001 java > /dev/null +then + echo "Java process is running as user 10001" + exit 0 +else + echo "Java process is not running as user 10001" + exit 1 +fi diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 63735242..88a73498 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,11 +17,31 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up JDK 17 + - name: Docker Meta + uses: docker/metadata-action@v5 + with: + images: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha + labels: | + maintainer=medizininformatik-initiative + org.opencontainers.image.authors=medizininformatik-initiative + org.opencontainers.image.source=https://github.com/medizininformatik-initiative/feasibility-backend + org.opencontainers.image.vendor=medizininformatik-initiative + org.opencontainers.image.title=dataportal backend + org.opencontainers.image.description=The backend for the dataportal, including feasibility query execution as well as data selection and extraction. + + - name: Set up JDK 22 uses: actions/setup-java@v4 with: distribution: 'temurin' - java-version: 17 + java-version: 22 - name: Cache Local Maven Repo uses: actions/cache@v4 @@ -150,8 +170,8 @@ jobs: - name: Wait for Dataportal Backend run: .github/scripts/wait-for-url.sh http://localhost:8091/actuator/health - - name: Check if Dataportal Backend is correctly running with the dataportal user - run: .github/scripts/check-if-running-as-dataportal-user.sh + - name: Check if Dataportal Backend is correctly running with the user with id 10001 + run: .github/scripts/check-if-running-as-user-10001.sh - name: Wait for Blaze run: .github/scripts/wait-for-url.sh http://localhost:8082/health @@ -185,11 +205,11 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up JDK 17 + - name: Set up JDK 22 uses: actions/setup-java@v4 with: distribution: 'temurin' - java-version: 17 + java-version: 22 - name: Cache Local Maven Repo uses: actions/cache@v4 diff --git a/Dockerfile b/Dockerfile index c6871383..8ba54d48 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,17 +1,6 @@ -FROM eclipse-temurin:17-jre - -RUN apt update -yqq && apt upgrade -yqq && \ - apt-get autoremove -y && apt-get clean && rm -rf /var/lib/apt/lists/ +FROM eclipse-temurin:22-jre-alpine@sha256:d8ac5f15b7dc0a91bdfb89422f900383469e5de12dcce3949f145d713f455b34 WORKDIR /opt/dataportal-backend -COPY ./target/*.jar ./dataportal-backend.jar -COPY ontology ontology - -RUN groupadd --system dataportal && useradd --system dataportal -g dataportal -RUN mkdir logging -RUN chown -R dataportal:dataportal /opt/dataportal-backend - -USER dataportal:dataportal ARG VERSION=6.0.0 ENV APP_VERSION=${VERSION} @@ -23,22 +12,17 @@ ENV CERTIFICATE_PATH=/opt/dataportal-backend/certs ENV TRUSTSTORE_PATH=/opt/dataportal-backend/truststore ENV TRUSTSTORE_FILE=self-signed-truststore.jks -RUN mkdir -p $CERTIFICATE_PATH $TRUSTSTORE_PATH -RUN chown dataportal:dataportal $CERTIFICATE_PATH $TRUSTSTORE_PATH +RUN mkdir logging && \ + mkdir -p $CERTIFICATE_PATH $TRUSTSTORE_PATH && \ + chown -R 10001:10001 /opt/dataportal-backend && \ + chown 10001:10001 $CERTIFICATE_PATH $TRUSTSTORE_PATH && \ + apk --no-cache add curl bash +USER 10001 HEALTHCHECK --interval=5s --start-period=10s CMD curl -s -f http://localhost:8090/actuator/health || exit 1 +COPY ./target/*.jar ./dataportal-backend.jar +COPY ontology ontology COPY ./docker-entrypoint.sh / -ENTRYPOINT ["/bin/bash", "/docker-entrypoint.sh"] -ARG GIT_REF="" -ARG BUILD_TIME="" -LABEL maintainer="medizininformatik-initiative" \ - org.opencontainers.image.created=${BUILD_TIME} \ - org.opencontainers.image.authors="medizininformatik-initiative" \ - org.opencontainers.image.source="https://github.com/medizininformatik-initiative/feasibility-backend" \ - org.opencontainers.image.version=${VERSION} \ - org.opencontainers.image.revision=${GIT_REF} \ - org.opencontainers.image.vendor="medizininformatik-initiative" \ - org.opencontainers.image.title="dataportal backend" \ - org.opencontainers.image.description="Provides backend functions for the dataportal" +ENTRYPOINT ["/bin/bash", "/docker-entrypoint.sh"] \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 7e22e18e..e05d96ab 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,6 @@ services: dataportal-backend: container_name: dataportal-backend restart: unless-stopped - build: . ports: - ${DATAPORTAL_BACKEND_PORT:-127.0.0.1:8091}:8090 depends_on: @@ -13,6 +12,7 @@ services: init-elasticsearch: condition: service_completed_successfully environment: + JAVA_OPTS: ${DATAPORTAL_BACKEND_JAVA_OPTS} # ----- app QUERY_VALIDATION_ENABLED: ${DATAPORTAL_BACKEND_QUERY_VALIDATION_ENABLED:-true} CQL_TRANSLATE_ENABLED: ${DATAPORTAL_BACKEND_CQL_TRANSLATE_ENABLED:-true} @@ -23,7 +23,7 @@ services: ONTOLOGY_ORDER: ${DATAPORTAL_BACKEND_ONTOLOGY_ORDER:-"Diagnose, Prozedur, Person, Laboruntersuchung, Medikamentenverabreichung, Bioprobe, Einwilligung"} MAX_SAVED_QUERIES_PER_USER: ${DATAPORTAL_BACKEND_MAX_SAVED_QUERIES_PER_USER:-100} # ---- db config - DATABASE_HOST: ${DATAPORTAL_BACKEND_DATABASE_HOST:-dataportal-backend-db} + DATABASE_HOST: ${DATAPORTAL_BACKEND_DATABASE_HOST:-dataportal-postgres} DATABASE_PORT: ${DATAPORTAL_BACKEND_DATABASE_PORT:-5432} DATABASE_USER: ${DATAPORTAL_BACKEND_DATABASE_USERNAME:-dataportaluser} DATABASE_PASSWORD: ${DATAPORTAL_BACKEND_DATABASE_PASSWORD:-dataportalpw} @@ -31,8 +31,8 @@ services: # ---- ontology ONTOLOGY_FILES_FOLDER_UI: ${DATAPORTAL_BACKEND_ONTOLOGY_FILES_FOLDER:-/opt/dataportal-backend/ontology} ONTOLOGY_DB_MIGRATION_FOLDER: ${DATAPORTAL_BACKEND_ONTOLOGY_DB_MIGRATION_FOLDER:-/opt/dataportal-backend/ontology/migration} - MAPPINGS_FILE: ${DATAPORTAL_BACKEND_ONTOLOGY_FILES_FOLDER:-/opt/dataportal-backend/ontology}/codex-term-code-mapping.json - CONCEPT_TREE_FILE: ${DATAPORTAL_BACKEND_ONTOLOGY_FILES_FOLDER:-/opt/dataportal-backend/ontology}/codex-code-tree.json + MAPPINGS_FILE: ${DATAPORTAL_BACKEND_ONTOLOGY_FILES_FOLDER:-/opt/dataportal-backend/ontology}/mapping_cql.json + CONCEPT_TREE_FILE: ${DATAPORTAL_BACKEND_ONTOLOGY_FILES_FOLDER:-/opt/dataportal-backend/ontology}/mapping_tree.json # ---- auth KEYCLOAK_ENABLED: ${DATAPORTAL_BACKEND_KEYCLOAK_ENABLED:-true} KEYCLOAK_BASE_URL: ${DATAPORTAL_BACKEND_KEYCLOAK_BASE_URL:-http://keycloak:8080} @@ -82,20 +82,18 @@ services: PRIVACY_QUOTA_READ_DETAILED_OBFUSCATED_INTERVALSECONDS: ${DATAPORTAL_BACKEND_PRIVACY_QUOTA_READ_DETAILED_OBFUSCATED_INTERVALSECONDS:-7200} PRIVACY_THRESHOLD_RESULTS: ${DATAPORTAL_BACKEND_PRIVACY_THRESHOLD_RESULTS:-20} PRIVACY_THRESHOLD_SITES: ${DATAPORTAL_BACKEND_PRIVACY_THRESHOLD_SITES:-3} - PRIVACY_THRESHOLD_SITES_RESULT: ${DATAPORTAL_BACKEND_PRIVACY_THRESHOLD_SITES_RESULT} + PRIVACY_THRESHOLD_SITES_RESULT: ${DATAPORTAL_BACKEND_PRIVACY_THRESHOLD_SITES_RESULT:-0} QUERYRESULT_DISABLE_LOG_FILE_ENCRYPTION: "true" # ---- Elastic Search - ELASTIC_SEARCH_ENABLED: ${DATAPORTAL_BACKEND_ELASTIC_SEARCH_ENABLED} - ELASTIC_SEARCH_HOST: ${DATAPORTAL_BACKEND_ELASTIC_SEARCH_HOST} - ELASTIC_SEARCH_FILTER: ${DATAPORTAL_BACKEND_ELASTIC_SEARCH_FILTER} + ELASTIC_SEARCH_ENABLED: ${DATAPORTAL_BACKEND_ELASTIC_SEARCH_ENABLED:-true} + ELASTIC_SEARCH_HOST: ${DATAPORTAL_BACKEND_ELASTIC_SEARCH_HOST:-dataportal-elastic} + ELASTIC_SEARCH_FILTER: ${DATAPORTAL_BACKEND_ELASTIC_SEARCH_FILTER:-context,terminology,kds_module} # ---- logging LOG_LEVEL_SQL: ${DATAPORTAL_BACKEND_LOG_LEVEL_SQL:-warn} LOG_LEVEL: ${DATAPORTAL_BACKEND_LOG_LEVEL:-warn} volumes: - - ${DATAPORTAL_BACKEND_LOCAL_CONCEPT_TREE_PATH:-./ontology/dataportal-code-tree.json}:${DATAPORTAL_BACKEND_ONTOLOGY_FILES_FOLDER:-/opt/dataportal-backend/ontology}/dataportal-code-tree.json - - ${DATAPORTAL_BACKEND_LOCAL_TERM_CODE_MAPPING_PATH:-./ontology/dataportal-term-code-mapping.json}:${DATAPORTAL_BACKEND_ONTOLOGY_FILES_FOLDER:-/opt/dataportal-backend/ontology}/dataportal-term-code-mapping.json - - ${DATAPORTAL_BACKEND_DSF_SECURITY_DIR:-/dev/null}:/opt/dataportal-backend/dsf-security/ - - ${DATAPORTAL_BACKEND_ONTOLOGY_DB_MIGRATION_FOLDER:-../ontology/migration}:/opt/dataportal-backend/ontology/migration + - ${DATAPORTAL_BACKEND_CERTS_PATH:-../certs}:/opt/dataportal-security + - ./certs:/opt/dataportal-backend/certs dataportal-postgres: container_name: dataportal-postgres @@ -134,6 +132,7 @@ services: target: /usr/share/elasticsearch/data init-elasticsearch: image: curlimages/curl:8.8.0 + container_name: dataportal-elasticsearch-init depends_on: dataportal-elastic: condition: service_healthy diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 2c603dc2..6235bf40 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -28,8 +28,11 @@ if [ ! "${#ca_files[@]}" -eq 0 ]; then done + echo "### JAVA_OPTS is set to $JAVA_OPTS" + java $JAVA_OPTS -Djavax.net.ssl.trustStore="$TRUSTSTORE_FILE" -Djavax.net.ssl.trustStorePassword="$TRUSTSTORE_PASS" -jar dataportal-backend.jar else echo "# No CA *.pem cert files found in /opt/dataportal-backend/certs -> starting dataportal backend without own CAs" + echo "### JAVA_OPTS is set to $JAVA_OPTS" java $JAVA_OPTS -jar dataportal-backend.jar fi