diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 000000000..32776bd43 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,54 @@ +FROM tomcat:8.5.90-jre8-temurin-jammy +# TODO: update to tomcat:9.0.76-jre8-temurin-jammy + +# ARG default values that can be overridden at build-time +ARG TAG=2.19.0 +ARG DEVTOOLS='' + +# ENV default key=value env vars shared with container at runtime +ENV METACAT_APP_CONTEXT=metacat +ENV TC_HOME=/usr/local/tomcat +ENV USRBINDIR=/usr/local/bin +ENV PATH=${USRBINDIR}:$PATH +ENV DEVTOOLS=${DEVTOOLS} + +# ADDITIONAL USER-SPECIFIC ENV VAR CREDENTIALS NEEDED AT RUNTIME BY docker-entrypoint.sh: +# +# METACAT_AUTH_ADMINISTRATORS is a single admin username or LDAP-style Distinguished Name, +# used to log into the metacat admin pages. It is NOT a list of +# users (despite its name), and must not contain any colons (:) +# METACAT_ADMINISTRATOR_PASSWORD is the corresponding aadmin login password + + +# Add a user with name 'metacat' +RUN useradd metacat + +## ADD auto-inflates the .tar.gz +ADD metacat-bin-${TAG}.tar.gz /tmp + +RUN apt-get update && apt-get install -y --no-install-recommends netcat python3-bcrypt unzip && \ + bash -c "if [[ \"$DEVTOOLS\" == \"true\" ]]; then echo '* * INSECURE! DEV TOOLS BUILD * *' && \ + apt-get install -y --no-install-recommends vim procps lsof telnet; fi" && \ + rm -rf /var/lib/apt/lists/* && \ + cp /tmp/metacat.war /tmp/metacat-index.war /tmp/metacatui.war ${TC_HOME}/webapps && \ + chown -R metacat ${TC_HOME}/webapps && \ + # Tomcat Mods - TODO MB - remove after Tomcat9 upgrade + cp ${TC_HOME}/conf/server.xml ${TC_HOME}/conf/server.xml.original && \ + sed -i 's/port="8080"/relaxedPathChars="\[\]\|" \ + relaxedQueryChars="\[\]\|\{\}\^\&\#x5c\;\&\#x60\;\"\;\<\;\>\;\&\#x2a\;\&\#x2c\;" port="8080"/g' \ + $TC_HOME/conf/server.xml + +COPY --chown=metacat apply_context.py ${USRBINDIR}/ +COPY --chown=metacat docker-entrypoint.sh ${USRBINDIR}/ + +#Run Container as 'metacat' +USER metacat + +EXPOSE 8080 +EXPOSE 8009 +EXPOSE 8443 +EXPOSE 5701 + +ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"] + +CMD ["catalina.sh","start"] diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 000000000..59f3b37b4 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,27 @@ +# Metacat + + + +This is a [Docker](https://www.docker.com/) image for configuring and running +Metacat in a lightweight container. Metacat requires access to an existing +postgres database, and an existing solr instance, both of which are configured to be +accessed by Metacat. It also assumes that secure https access to the metacat instance is +handled via an external proxy server. + +# How to build the Metacat image + +Building an image can be accomplished by first building the +metacat distribution associated with a given version, and then +building the docker image based on that. Starting in the root directory of the "metacat" repo: + + $ ant distbin + ... + ... a very long build process ensues, resulting in a tar.gz file + $ cd docker + $ ./build 2.19.0 + $ docker image ls + REPOSITORY TAG IMAGE ID CREATED SIZE + metacat 2.19.0 8da92210dfc4 39 minutes ago 1.27GB + +The image can then be deployed in a Kubernetes environment - see the helm chart at `metacat/helm/` diff --git a/docker/apply_context.py b/docker/apply_context.py new file mode 100755 index 000000000..1379f9251 --- /dev/null +++ b/docker/apply_context.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +from __future__ import absolute_import, division, print_function +''' + File name: apply_context.py + Description: Change the default context in the metacat web.xml file + Author: Valerie Hendrix + Date created: 11/28/2017 + Python Version: 2.7 +''' +import xml.etree.ElementTree as ET + +import sys + +# The namespace +javaee = {'javaee': 'http://java.sun.com/xml/ns/javaee'} + + +if __name__ == "__main__": + + # Check for args + if len(sys.argv) < 4: + print("Usage: {} web_xml_file old_context new_context".format(sys.argv[0]),file=sys.stderr) + sys.exit(-1) + + web_xml_file = sys.argv[1] + old_context = sys.argv[2] + new_context = sys.argv[3] + + # No prefixes for the namespace + ET.register_namespace("","http://java.sun.com/xml/ns/javaee") + tree = ET.parse(web_xml_file) + root = tree.getroot() + + # Iterate over the servlet-mappings and change the url pattern to the + # new application context + mappings = root.findall("./javaee:servlet-mapping", javaee) + for mapping in mappings: + + servlet_name = mapping.find("javaee:servlet-name", javaee) + url_pattern = mapping.find("javaee:url-pattern",javaee) + if servlet_name is not None and servlet_name.text == "metacat": + url_pattern.text = str(url_pattern.text.replace(old_context, new_context)) + + # Overwrite the web xml file + tree.write(web_xml_file) + + # Update the metacat.properties.path in the metaca-index application + metacat_index_web_xml = "/usr/local/tomcat/webapps/metacat-index/WEB-INF/web.xml" + tree = ET.parse(metacat_index_web_xml) + root = tree.getroot() + + # Iterate over the servlet-mappings and change the url pattern to the + # new application context + mappings = root.findall("./context-param", javaee) + for mapping in mappings: + + param_name = mapping.find("param-name", javaee) + param_value = mapping.find("param-value", javaee) + if param_name is not None and param_name.text == "metacat.properties.path": + param_value.text = str(param_value.text.replace("/{}/WEB-INF".format(old_context), "/{}/WEB-INF".format(new_context))) + break + + # Overwrite the web xml file + tree.write(metacat_index_web_xml) diff --git a/docker/build.sh b/docker/build.sh new file mode 100755 index 000000000..9a86ac070 --- /dev/null +++ b/docker/build.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +set -e +TAG=$1 +DEFAULT_TAG="2.19.0" + +if [ -z "$1" ] || [[ "$1" == "-devtools" ]]; then + TAG=${DEFAULT_TAG} + echo + echo "Usage: ${0} [tag] [-devtools]" + echo " or: ${0} [-devtools] (omitting tag defaults it to ${DEFAULT_TAG})" + echo + echo "where: tag is typically set to the metacat version #. Setting to default: ${TAG}" + echo " -devtools is FOR DEV/DEBUGGING ONLY - NOT FOR PRODUCTION USE! Installs dev tools:" + echo " vim, procps, lsof, and telnet in the container (see Dockerfile for" + echo " complete list), thus REDUCING ITS SECURITY!" + echo +fi + +DEVTOOLS="false" +DEVBUILDOPTS="" +if [[ "$1" == "-devtools" ]] || [[ "$2" == "-devtools" ]]; then + DEVTOOLS="true" + DEVBUILDOPTS="--no-cache --progress=plain" +fi +echo "Building with \"DEVTOOLS\" set to: ${DEVTOOLS}" +echo + +RELEASE="metacat-bin-${TAG}.tar.gz" + +# Grab the Metacat release +if [ ! -f "../${RELEASE}" ]; then + echo "Could not find ../${RELEASE}" + echo "You must first build the metacat release with 'ant distbin'. Exiting..." + exit 1 +fi + +cp ../"${RELEASE}" . + +docker image build \ + $DEVBUILDOPTS --tag metacat:"$TAG" --build-arg TAG="$TAG" --build-arg DEVTOOLS="$DEVTOOLS" . diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh new file mode 100755 index 000000000..9ccfaddd4 --- /dev/null +++ b/docker/docker-entrypoint.sh @@ -0,0 +1,168 @@ +#!/usr/bin/env bash +set -e + +if [ "$1" = 'catalina.sh' ]; then + + if [ -z "$METACAT_AUTH_ADMINISTRATORS" ] || + [ $(echo "$METACAT_AUTH_ADMINISTRATORS" | grep -c ":") -ne 0 ]; then + echo "ERROR: The admin user ($METACAT_AUTH_ADMINISTRATORS) environment variable was either" + echo " not set, or it included a colon (:). It should contain a single username or" + echo " LDAP-style Distinguished Name, not a colon-delimited list of administrators" + echo " (despite its name indicating otherwise - sorry! :-)" + exit 2 + else + METACAT_ADMINISTRATOR_USERNAME="$METACAT_AUTH_ADMINISTRATORS" + fi + + # Expand the metacat-index.war + if [ ! -d webapps/metacat-index ]; then + unzip webapps/metacat-index.war -d webapps/metacat-index + fi + + # set the env vars. Note that TC_HOME and METACAT_APP_CONTEXT are set in Dockerfile + METACAT_DEFAULT_WAR=${TC_HOME}/webapps/metacat.war + METACAT_DIR=${TC_HOME}/webapps/${METACAT_APP_CONTEXT} + METACAT_WAR=${METACAT_DIR}.war + + # Check the context + if [ "${METACAT_WAR}" != "${METACAT_DEFAULT_WAR}" ] && [ -f "$METACAT_DEFAULT_WAR" ]; then + # Move the application to match the context + echo "Changing metacat context to ${METACAT_APP_CONTEXT}" + mv "$METACAT_DEFAULT_WAR" "$METACAT_WAR" + else + echo "Installing metacat to context ${METACAT_APP_CONTEXT}" + fi + + # Expand the WAR file + if [ ! -d "$METACAT_DIR" ]; then + unzip "$METACAT_WAR" -d "$METACAT_DIR" + fi + + # change the context in the web.xml file + apply_context.py "$METACAT_DIR"/WEB-INF/web.xml metacat "${METACAT_APP_CONTEXT}" + + # Show KNB skin if nothing else configured. + # TODO: deploy metacatui separately, or make this work with props config later + mkdir "${TC_HOME}"/webapps/config + { + echo "MetacatUI.AppConfig = {" + echo " theme: \"knb\"," + echo " root: \"/metacatui\"," + echo " metacatContext: \"/${METACAT_APP_CONTEXT}\"," + echo " baseUrl: \"http://localhost:8080\"" + echo "}" + } > "${TC_HOME}"/webapps/config/config.js + + + # Make sure all default directories are available + mkdir -p /var/metacat/data \ + /var/metacat/inline-data \ + /var/metacat/documents \ + /var/metacat/temporary \ + /var/metacat/logs \ + /var/metacat/config \ + /var/metacat/.metacat + + # if METACAT_DEBUG, set the root log level accordingly + if [[ "$METACAT_DEBUG" == "true" ]]; then + sed -i 's/rootLogger\.level[^\n]*/rootLogger\.level=DEBUG/g' \ + "${TC_HOME}"/webapps/metacat/WEB-INF/classes/log4j2.properties; + echo "* * * * * * set Log4J rootLogger level to DEBUG * * * * * *" + fi + + # TODO: need a more-elegant way to handle this, without manipulating files + # If env has an admin/password set, but it does not exist in the passwords file, then add it + if [ -n "$METACAT_ADMINISTRATOR_USERNAME" ]; then + USER_PWFILE="/var/metacat/users/password.xml" + + if [ -z "$METACAT_ADMINISTRATOR_PASSWORD" ]; then + echo "ERROR: The admin user (METACAT_ADMINISTRATOR_USERNAME) environment variable was" + echo " set, but no password value was set." + echo " You may use the METACAT_ADMINISTRATOR_PASSWORD environment variable to" + echo " set the administrator password" + exit 2 + fi + # look for the user password file, as it is expected if the configuration is completed + if [ ! -s "$USER_PWFILE" ] || + [ $(grep -c "$METACAT_ADMINISTRATOR_USERNAME" "$USER_PWFILE") -eq 0 ]; then + # Note: the Java bcrypt library only supports '2a' format hashes, so override the + # default python behavior so that the hashes created start with '2a' rather than '2y' + cd "${METACAT_DIR}"/WEB-INF/scripts/bash + PASS=$(python3 -c "import bcrypt;print bcrypt.hashpw('$METACAT_ADMINISTRATOR_PASSWORD',\ + bcrypt.gensalt(10,prefix='2a'))") + bash ./authFileManager.sh useradd -h "$PASS" -dn "$METACAT_ADMINISTRATOR_USERNAME" + cd "$TC_HOME" + echo + echo '*************************************' + echo 'Added administrator to passwords file' + echo '*************************************' + fi + fi + + # Start tomcat + "$@" > /dev/null 2>&1 + + # Give time for tomcat to start + echo + echo '**************************************' + echo "Waiting for Tomcat to start before" + echo "checking upgrade/initialization status" + echo '**************************************' + while ! nc -z localhost 8080; do + echo -n "." + sleep 1 + done + echo + + # + # TODO: Replace DB config check with internal metacat check for an "autoconfig" flag at startup + # + # Login to Metacat Admin and start a session (cookie.txt) + echo "doing curl -X POST to localhost admin page" + if [[ "$METACAT_DEBUG" == "true" ]]; then + echo "using password=${METACAT_ADMINISTRATOR_PASSWORD}\ + & username=${METACAT_ADMINISTRATOR_USERNAME}" + fi + curl -X POST --data "loginAction=Login&configureType=login&processForm=true&password=\ +${METACAT_ADMINISTRATOR_PASSWORD}&username=${METACAT_ADMINISTRATOR_USERNAME}" \ + --cookie-jar ./cookie.txt http://localhost:8080/"${METACAT_APP_CONTEXT}"/admin >\ + /tmp/login_result.txt 2>&1 + echo + echo '**************************************' + echo "admin login result from /tmp/login_result.txt:" + # following lines use "|| true" because grep exits script (-1) if no matches found + grep 'You must log in' /tmp/login_result.txt || true + grep 'You are logged in' /tmp/login_result.txt || true + echo '**************************************' + echo + echo '**************************************' + echo "Checking if Database is configured..." + + ## If the DB needs to be updated run the migration scripts + DB_CONFIGURED=$(grep -c "configureType=database" /tmp/login_result.txt || true) + if [ "$DB_CONFIGURED" -ne 0 ]; then + echo "Database needs configuring..." + # Run the database initialization to create or upgrade tables + # /${METACAT_APP_CONTEXT}/admin?configureType=database must have an + # authenticated session, then run: + curl -X POST --cookie ./cookie.txt \ + --data "configureType=database&processForm=true" \ + http://localhost:8080/"${METACAT_APP_CONTEXT}"/admin > /dev/null 2>&1 + + # Validate the database should be configured + curl -X POST --cookie ./cookie.txt \ + --data "configureType=configure&processForm=false" \ + http://localhost:8080/"${METACAT_APP_CONTEXT}"/admin > /dev/null 2>&1 + else + echo "Database is already configured" + fi + echo '**************************************' +fi + +if [[ "$DEVTOOLS" == "true" ]]; then + echo "Container dev tools mode -- starting infinite loop -- ctrl-c to interrupt..." + sh -c 'trap "exit" TERM; while true; do sleep 1; done' +else + echo "tailing logs in: $TC_HOME/logs/*" + exec tail -f "$TC_HOME"/logs/* +fi diff --git a/docker/tomcat_DEBUG/README.md b/docker/tomcat_DEBUG/README.md new file mode 100644 index 000000000..9674951a6 --- /dev/null +++ b/docker/tomcat_DEBUG/README.md @@ -0,0 +1,5 @@ +# INSECURE - DO NOT USE IN PRODUCTION! + +These files are for development debugging purposes only. + +Each file contains instructions/explanation in comments diff --git a/docker/tomcat_DEBUG/metacat.xml b/docker/tomcat_DEBUG/metacat.xml new file mode 100644 index 000000000..f59552c52 --- /dev/null +++ b/docker/tomcat_DEBUG/metacat.xml @@ -0,0 +1,6 @@ + + + + + diff --git a/docker/tomcat_DEBUG/metacatui.xml b/docker/tomcat_DEBUG/metacatui.xml new file mode 100644 index 000000000..e2837405c --- /dev/null +++ b/docker/tomcat_DEBUG/metacatui.xml @@ -0,0 +1,6 @@ + + + + + diff --git a/docker/tomcat_DEBUG/setenv.sh b/docker/tomcat_DEBUG/setenv.sh new file mode 100644 index 000000000..cd15d7edd --- /dev/null +++ b/docker/tomcat_DEBUG/setenv.sh @@ -0,0 +1,3 @@ +# this file goes in $TOMCAT_HOME/bin/ and tells tomcat to allow remote +# debugging connections to the port listed as "address=" +export CATALINA_OPTS="$CATALINA_OPTS -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005" \ No newline at end of file diff --git a/helm/.helmignore b/helm/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/helm/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm/Chart.yaml b/helm/Chart.yaml new file mode 100644 index 000000000..708171bf8 --- /dev/null +++ b/helm/Chart.yaml @@ -0,0 +1,36 @@ +apiVersion: v2 +name: metacat +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 +icon: https://camo.githubusercontent.com/a41e704e6cbd198afed0a892f2dccfc43aef29fb7631407048f531aa31956e72/68747470733a2f2f6b6e622e65636f696e666f726d61746963732e6f72672f6b6e622f646f63732f5f696d616765732f6d6574616361742d6c6f676f2d6461726b677261792e706e67 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "2.19.0" + +# Chart dependencies +# dependencies: +# - name: postgres +# repository: https://charts.bitnami.com/bitnami +# version: 14.6.0 +# condition: postgres.enabled, global.postgres.enabled +# - name: solr +# repository: https://charts.bitnami.com/bitnami +# version: 6.2.3 +# condition: solr.enabled, global.solr.enabled diff --git a/helm/README.md b/helm/README.md new file mode 100644 index 000000000..aed6cd966 --- /dev/null +++ b/helm/README.md @@ -0,0 +1,103 @@ +# Metacat Helm Chart + +Metacat is repository software for preserving data and metadata +(documentation about data) that helps scientists find, understand and +effectively use data sets they manage or that have been created by +others. For more details, see https://github.com/NCEAS/metacat + +## TL;DR +For now, you need to have existing instances of **postgres** and **solr** running and configured for +metacat. Starting in root directory of the metacat repo: +```console +# 1. build metacat's binary distribution +$ ant distbin + +# 2. build the docker image +$ pushd docker ; ./build.sh ; popd + +# 3. First time only: add your credentials to helm/admin/secrets.yaml, and add to cluster. +$ vim helm/admin/secrets.yaml ## follow the instructions in this file + +# 4. deploy and enjoy! Assuming yoru release name is "my-release": +$ helm install my-release ./helm +``` +This `helm install` command will also print out instructions on how to access the application +via a url! +Note you should not need to edit anything in [values.yaml](./values.yaml), if your dev setup is +fairly standard, but it's worth checking, particularly the values in the `metacat` section + + +## Introduction + +This chart deploys a Metacat deployment on a Kubernetes cluster using the Helm package +manager. + +## Prerequisites + +- Kubernetes 1.19+ +- Helm 3.2.0+ +- PV provisioner support in the underlying infrastructure +- An existing instance of solr, configured to be accessed by Metacat +- An existing postgres database, configured to be accessed by Metacat + +## Installing the Chart + +To install the chart with the release name `my-release`: + +```console +helm install my-release ./helm +``` + +The command deploys Metacat on the Kubernetes cluster in the default configuration. The +[Parameters](#parameters) section lists the parameters that can be configured during +installation. Parameters may be provided on the command line to override those in values.yaml; e.g. + +```console +helm install my-release ./helm --set image.debug=true +``` + +> **Tip**: List all releases using `helm list` + +## Uninstalling the Chart + +To uninstall/delete the `my-release` deployment: + +```console +helm delete my-release +``` + +The command removes all the Kubernetes components associated with the chart and deletes the release. + +## Parameters + +(( TODO: See to create the table )) + +The above parameters map to the env variables defined in Metacat. For more information please +refer to the [Metacat Administrators' Guide](https://knb.ecoinformatics.org/knb/docs/). + +Specify non-secret parameters in the default [values.yaml](values.yaml), which will be used +automatically each time you deploy. + +**Tip**: You can also reference your own version of values.yaml as follows: +```console +helm install my-release -f myValues.yaml ./helm +``` + +> NOTE: Once this chart is deployed, it is not possible to change the application's access credentials, such as usernames or passwords, using Helm. To change these application credentials after deployment, delete any persistent volumes (PVs) used by the chart and re-deploy it, or use the application's built-in administrative tools if available. + +## Configuration and installation details + +## Secrets + +Secret parameters (such as login credentials, certificates etc.) should be installed as +kubernetes Secrets in the cluster. The file [admin/secrets.yaml](./admin/secrets.yaml) provides a +template that you can complete and apply using `kubectl` - see file comments for details. Please +remember to NEVER ADD SECRETS TO GITHUB! + +## Persistence + +The Metacat image stores the Metacat data and configurations at the `/var/metacat` path of the +container. Persistent Volume Claims are used to keep the data across deployments. With the +default setup in values.yaml, a persistent volume will be auto-provisioned automatically. If you +want to have the application use a specific directory on the host machine, for example, see the +documentation in the [admin/pv-hostpath.yaml](./admin/pv-hostpath.yaml) file diff --git a/helm/admin/pv-hostpath.yaml b/helm/admin/pv-hostpath.yaml new file mode 100644 index 000000000..1ef428fe8 --- /dev/null +++ b/helm/admin/pv-hostpath.yaml @@ -0,0 +1,57 @@ +# For development use: +# Create a persistent volume using the filesystem on the local machine. +# +# 1. Edit this file to set the "spec.hostpath.path" parameter below + +# 2. Then install in your cluster by one of the following methods. Assuming your release name is +# "my-release": (NOTE THE RELEASE NAME MUST MATCH THE ONE YOU USE WITH HELM!) +# +# a. Define the release name on the command line: +# $ RELEASE_NAME=my-release envsubst < pv-hostpath.yaml | kubectl apply -n -f - +# +# If this results in "command not found: envsubst", you can install envsubst; e.g. on Mac OS: +# $ brew install gettext && brew link --force gettext +# +# b. Alternatively, you can simply replace "${RELEASE_NAME}" in the file below and do: +# $ kubectl apply -n -f pv-hostpath.yaml +# +apiVersion: v1 +kind: PersistentVolume +metadata: + name: ${RELEASE_NAME}-pv +spec: + storageClassName: local-path + capacity: + storage: 1Gi + volumeMode: Filesystem + accessModes: + # ReadWriteOnce: + # the volume can be mounted as read-write by a single node. ReadWriteOnce access mode still + # can allow multiple pods to access the volume when the pods are running on the same node. + # + - ReadWriteOnce + # ReadOnlyMany: + # the volume can be mounted as read-only by multiple nodes. + # + - ReadOnlyMany + # persistentVolumeReclaimPolicy defines what happens to a persistent volume when released from its + # claim. Valid options are: + # Retain -- after the PersistentVolumeClaim is released, keep this PersistentVolume and its + # contents (default for manually created PVs) + # Delete -- after the PersistentVolumeClaim is released, delete this PersistentVolume and + # delete the underlying storage (default for dynamically provisioned PVs) + # Recycle -- (DEPRECATED) after the PersistentVolumeClaim is released, delete the contents of + # this PersistentVolume and make it available for another claim. "Recycle" must be + # supported by the volume plugin underlying this PV + # + persistentVolumeReclaimPolicy: Retain + # hostPath.path hostPath represents a directory on the host. Provisioned by a developer or tester. + # This is useful for single-node development and testing only! On-host storage is not supported + # in any way and WILL NOT WORK in a multi-node cluster. + # Note: if using Rancher Desktop on a Mac, k8s runs inside a virtual machine, so the directory + # used by the persistent volume mount is generally not shared with the corresponding location on + # the mac's hard drive. However the user's home directory IS shared with the VM, so use a path + # within /Users/yourname/... if you need access outside of k8s. It may be necessary to chmod 777 + # so the metacat user can write to it - so be careful! Again, this is for testing/dev only! + hostPath: + path: /Users/yourname/your-pv-directory diff --git a/helm/admin/pvclaim.yaml b/helm/admin/pvclaim.yaml new file mode 100644 index 000000000..0c3097fef --- /dev/null +++ b/helm/admin/pvclaim.yaml @@ -0,0 +1,23 @@ +# +# * * * NOT REQUIRED FOR A REGULAR StatefulSet DEPLOYMENT OF METACAT! * * * * +# +# Creates a persistent volume claim named "metacat-pvc". +# +# This should be done only one time, separately from the helm install/upgrade cycle, +# +# NOTE: ensure that a PersistentVolume has already been created before creating +# the PersistentVolumeClaim! (See pv-hostpath.yaml for details) +# +# Create by running the command: kubectl apply -n -f pvclaim.yaml +# +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: metacat-pvc +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + storageClassName: "" diff --git a/helm/admin/secrets.yaml b/helm/admin/secrets.yaml new file mode 100644 index 000000000..5e24e5492 --- /dev/null +++ b/helm/admin/secrets.yaml @@ -0,0 +1,34 @@ +###################################################################################### +# D O N O T C H E C K C R E D E N T I A L S I N T O G I T H U B ! ! ! +###################################################################################### +# +# Edit this file to add your passwords, then install them in your cluster by one of the +# following methods. Assuming your release name is "my-release": (NOTE THE RELEASE NAME MUST +# MATCH THE ONE YOU USE WITH HELM!) +# +# 1. Define the release name on the command line: +# $ RELEASE_NAME=my-release envsubst < secrets.yaml | kubectl apply -n -f - +# +# # if this results in "command not found: envsubst", you can install envsubst (e.g. on Mac OS: +# $ brew install gettext && brew link --force gettext +# +# 2. Alternatively, you can simply replace "${RELEASE_NAME}" in the file below (be sure to retain +# the "-secrets" part), and do: $ kubectl apply -n -f secrets.yaml) +# +apiVersion: v1 +kind: Secret +metadata: + name: ${RELEASE_NAME}-secrets +type: Opaque +stringData: # # # EDIT THESE VALUES # do not check into GitHub! + # METACAT_AUTH_ADMINISTRATORS is a single admin username or LDAP-style Distinguished Name used + # to log into the metacat admin pages. It is NOT a list of users (despite its name), and must + # not contain any colons (:) + METACAT_AUTH_ADMINISTRATORS: your-value-here # account will be created if not already existing + METACAT_ADMINISTRATOR_PASSWORD: your-value-here # account will be created if not already existing + POSTGRES_PASSWORD: your-value-here # for existing postgres account + POSTGRES_USER: your-value-here # for existing postgres account + METACAT_GUID_DOI_USERNAME: your-value-here # can be ignored if not using DOI + METACAT_GUID_DOI_PASSWORD: your-value-here # can be ignored if not using DOI + METACAT_REPLICATION_PRIVATE_KEY_PASSWORD: "" # can be ignored if not using CN -> CN replication +data: {} diff --git a/helm/config/metacat-site.properties b/helm/config/metacat-site.properties new file mode 100644 index 000000000..df840b0e3 --- /dev/null +++ b/helm/config/metacat-site.properties @@ -0,0 +1,44 @@ +## +## YOU SHOULD NOT NEED TO EDIT THIS FILE (unless you are doing some specialist customizations and +## you really know what you are doing). +## Instead, you can provide metacat properties as needed in the metacat section of values.yaml +## +## The set of properties in this file will be made available to the metacat instance at runtime +## as a read-only version of the expected "metacat-site.properties" file. +## +#################################################################################################### +# This section will pull any arbitrary number of key: value pairs from .Values.metacat and write +# them here as key=value pairs. These will then be used by metacat to override any default +# properties with the same keys at runtime +#################################################################################################### +{{- range $key, $value := .Values.metacat }} +{{ $key }}={{ $value }} +{{- end }} + +# TODO - required, not yet templatized ############################################################# +# Could any move permanently to metacat.properties as over-writeable defaults? + +database.adapter=edu.ucsb.nceas.dbadapter.PostgresqlAdapter +database.driver=org.postgresql.Driver + +#################################################################################################### +## specific to (and constant for) k8s deployments ################################################ +#################################################################################################### + +application.backupDir=/var/metacat/.metacat +application.deployDir=/usr/local/tomcat/webapps +server.internalPort=8080 + +# Pretend the config is all done. TODO - handle this more elegantly? +configutil.propertiesConfigured=true +configutil.authConfigured=true +configutil.skinsConfigured=true +configutil.databaseConfigured=true +configutil.solrserverConfigured=true +configutil.dataoneConfigured=bypassed +configutil.ezidConfigured=true +configutil.quotaConfigured=bypassed +configutil.upgrade.status=success +configutil.upgrade.database.status=success +configutil.upgrade.java.status=success +configutil.upgrade.solr.status=success diff --git a/helm/templates/NOTES.txt b/helm/templates/NOTES.txt new file mode 100644 index 000000000..469304b25 --- /dev/null +++ b/helm/templates/NOTES.txt @@ -0,0 +1,22 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "metacat.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "metacat.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "metacat.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "metacat.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/helm/templates/_helpers.tpl b/helm/templates/_helpers.tpl new file mode 100644 index 000000000..69b44aef5 --- /dev/null +++ b/helm/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "metacat.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "metacat.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "metacat.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "metacat.labels" -}} +helm.sh/chart: {{ include "metacat.chart" . }} +{{ include "metacat.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "metacat.selectorLabels" -}} +app.kubernetes.io/name: {{ include "metacat.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "metacat.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "metacat.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml new file mode 100644 index 000000000..4fd52276c --- /dev/null +++ b/helm/templates/configmap.yaml @@ -0,0 +1,8 @@ +# Load all files in the "config" directory into a ConfigMap +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-configfiles + labels: + {{- include "metacat.labels" . | nindent 4 }} +data: {{ (tpl (.Files.Glob "config/*").AsConfig . ) | nindent 4 }} diff --git a/helm/templates/ingress.yaml b/helm/templates/ingress.yaml new file mode 100644 index 000000000..89353e094 --- /dev/null +++ b/helm/templates/ingress.yaml @@ -0,0 +1,61 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "metacat.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }} + {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} + {{- end }} +{{- end }} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "metacat.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/helm/templates/service-headless.yaml b/helm/templates/service-headless.yaml new file mode 100644 index 000000000..c9aa9053f --- /dev/null +++ b/helm/templates/service-headless.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "metacat.fullname" . }}-headless-svc + labels: + {{- include "metacat.labels" . | nindent 4 }} +spec: + type: ClusterIP + # The StatefulSet’s governing Service must be headless. + clusterIP: None + publishNotReadyAddresses: true + ports: + {{- toYaml .Values.service.ports | nindent 4 }} + selector: + {{- include "metacat.selectorLabels" . | nindent 4 }} diff --git a/helm/templates/serviceaccount.yaml b/helm/templates/serviceaccount.yaml new file mode 100644 index 000000000..713823743 --- /dev/null +++ b/helm/templates/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "metacat.serviceAccountName" . }} + labels: + {{- include "metacat.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/helm/templates/statefulset.yaml b/helm/templates/statefulset.yaml new file mode 100644 index 000000000..dafd854ba --- /dev/null +++ b/helm/templates/statefulset.yaml @@ -0,0 +1,101 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "metacat.fullname" . }} + labels: + {{- include "metacat.labels" . | nindent 4 }} +spec: + serviceName: {{ include "metacat.fullname" . }}-headless-svc + replicas: 1 + selector: + matchLabels: + {{- include "metacat.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "metacat.selectorLabels" . | nindent 8 }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "metacat.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - containerPort: 8080 + name: metacat-web + - containerPort: 8009 + name: apache-connect + - containerPort: 5701 + name: hazelcast + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: {{ .Release.Name }}-config-volume + mountPath: /var/metacat/config/metacat-site.properties + subPath: metacat-site.properties + readOnly: true + - name: {{ .Release.Name }}-volume + mountPath: /var/metacat + readOnly: false + env: + - name: METACAT_DEBUG + value: {{ ternary "true" "false" .Values.image.debug | quote }} + envFrom: + - secretRef: + name: {{ .Release.Name }}-secrets + livenessProbe: + httpGet: + path: /{{ index .Values.metacat "application.context" }}/ + port: metacat-web + readinessProbe: + httpGet: + path: /{{ index .Values.metacat "application.context" }}/ + port: metacat-web + volumes: + - name: {{ .Release.Name }}-config-volume + configMap: + name: {{ .Release.Name }}-configfiles + defaultMode: 0644 +{{- if not (.Values.persistence.enabled) }} + - name: {{ .Release.Name }}-volume + emptyDir: {} +{{ else }} + {{- if .Values.persistence.existingClaim }} + - name: {{ .Release.Name }}-volume + persistentVolumeClaim: + claimName: {{ .Values.persistence.existingClaim }} + {{ else }} + volumeClaimTemplates: + - metadata: + name: {{ .Release.Name }}-volume + spec: + accessModes: + {{- range .Values.persistence.accessModes }} + - {{ . | quote }} + {{- end }} + volumeMode: Filesystem + resources: + requests: + storage: {{ .Values.persistence.size }} + {{ $storageClass := .Values.persistence.storageClass -}} + {{- if $storageClass -}} + {{- if (eq "-" $storageClass) -}} + storageClassName: "" + {{- else }} + storageClassName: {{ $storageClass }} + {{- end }} + {{ end }} + {{ end }} +{{ end }} diff --git a/helm/templates/tests/test-connection.yaml b/helm/templates/tests/test-connection.yaml new file mode 100644 index 000000000..bcf4128b6 --- /dev/null +++ b/helm/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "metacat.fullname" . }}-test-connection" + labels: + {{- include "metacat.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "metacat.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/helm/values.yaml b/helm/values.yaml new file mode 100644 index 000000000..c1c460603 --- /dev/null +++ b/helm/values.yaml @@ -0,0 +1,166 @@ +## Default values for metacat. +## This is a YAML-formatted file. +## +## The metacat section can contain any properties that will override those in +## metacat-site.properties and metacat.properties. The properties that have been pre-populated +## in this section comprise the minimum set of values needed to run the app and the test suite on +## a development machine. +## +metacat: + ## @param metacat.application.context + ## the application context to use - for example, if your application is hosted at + ## https://mydomain.org, and you define the context to be "metacat", then the url to access the + ## application will be https://mydomain.org/metacat/ + ## + application.context: metacat + ## @param database.connectionURI + ## connection URI for the postgres database, in the form: jdbc:postgresql://hostname/database-name + ## host.docker.internal is equivalent to "localhost" + ## + database.connectionURI: jdbc:postgresql://host.docker.internal/metacat + ## Allow users to publish Digital Object Identifiers for the data in this metacat instance? + ## (see doi.org). If true, you will also need to define guid.doi.username $ guid.doi.password, + ## and either override or use the defaults in metacat.properties for all the entries that begin + ## with: "guid.doi." + ## + guid.doi.enabled: true + ## @param metacat.server.httpPort + ## + server.httpPort: 8080 + ## @param metacat.server.httpSSLPort + ## + server.httpSSLPort: 8443 + ## @param metacat.server.name + ## + server.name: metacat.example.com + ## @param metacat.solr.baseURL + # host.docker.internal is equivalent to "localhost" + solr.baseURL: http://host.docker.internal:8983/solr + ## + ## @param metacat.replication.logdir + replication.logdir: /var/metacat/logs + +image: + #TODO pull from github container repo, e.g. see indexer: ghcr.io/dataoneorg/dataone-index-worker + repository: metacat + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + ## @param image.debug Specify if debug values should be set + ## Set to true if you would like to see extra information in metacat/tomcat logs. + ## (Sets the Log4J rootLogger level to "DEBUG") + ## * * WARNING - FOR TESTING ONLY! * * May result in secrets being printed to logs in plain text. + ## + debug: false + +imagePullSecrets: [] + +global: + #TODO + #storageClass: csi-rbd-sc + #solrPort: &global-solr-port 8983 + +serviceAccount: + # Specifies whether a service account should be created + create: false + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +podAnnotations: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} +#TODO +# fsGroup: metacat +# runAsUser: metacat +# capabilities: +# readOnlyRootFilesystem: true +# runAsNonRoot: true + # drop: + # - ALL + +service: + type: ClusterIP # Headless ClusterIP required for StatefulSet + ports: + - port: 8080 + name: metacat-web + - port: 8009 + name: apache-connect + - port: 5701 + name: hazelcast + +ingress: + #TODO + enabled: false + className: "" + annotations: {} + tls: [] + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with limited + # resources, such as Minikube. + +autoscaling: + enabled: false + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +persistence: + ## @param persistence.enabled + ## Enable metacat data persistence using Persistent Volume Claims + ## + ## Always set to 'enabled: true' for production deployments. + ## + ## For development/testing ONLY: Setting 'enabled: false' will result in the use of a temporary + ## 'emptyDir' for saving metacat's data. This means the data saved by metacat WILL BE LOST when + ## the pod is deleted! + ## + enabled: true + ## @param persistence.storageClass Storage class of backing PVC + ## + ## If is defined -- storageClassName: + ## + ## If set to "-" -- storageClassName: "" -- which disables dynamic PV provisioning + ## (meaning claim can only be bound to an existing PV, not a dynamically-provisioned one) with + ## no class (no annotation, or one set equal to "") + ## + ## NOTE when using StatefulSet with a defaultClaimTemplate - leaving storageClass unset/null does + ## NOT choose the default provisioner for dynamic provisioning of the underlying PV, as expected! + ## Instead, inspect your cluster to see what stoprageClass is set as default: + ## $ kubectl get storageclass + ## ...and then explicitly set storageClass to match the name of the default storageclass + ## (e.g. for Rancher Desktop -- storageclass: local-path + ## + storageClass: local-path + ## @param persistence.existingClaim + ## Set a value for 'existingClaim' only if you want to re-use a Persistent Volume Claim that has + ## already been set up by a k8s admin ahead of time. + ## Leaving it blank will cause a pvc to be created dynamically using volumeClaimTemplates. + ## + existingClaim: "" + ## @param persistence.accessModes PVC Access Mode for metacat volume + ## Example: + ## accessModes: + ## - ReadWriteOnce # allow only one node to mount in read/write mode + ## - ReadOnlyMany # allow many nodes to mount in read-only mode + ## ReadWriteOnce is always required by metacat. ReadOnlyMany is useful for giving other + ## services (e.g. metadig) read-only access to metacat data. + ## Note that the underlying PersistentVolume (or pv auto-provisioner) must be able to provide + ## these modes, in order for the PVC to bind successfully. (For Rancher Desktop, this means + ## setting only ReadWriteOnce, not ReadOnlyMany) + ## + accessModes: + - ReadWriteOnce + ## @param primary.persistence.size PVC Storage Request for metacat volume + ## + size: 1Gi