From ce8de9590b92e2b6b0c72fe0841fafb50e05ecd1 Mon Sep 17 00:00:00 2001
From: Matthew de Detrich <matthew.dedetrich@aiven.io>
Date: Fri, 18 Feb 2022 12:17:32 +0100
Subject: [PATCH] Add sbt-paradox docs

---
 .github/workflows/ci.yml                      | 59 +++++++++++++
 .jvmopts                                      |  3 +
 CONTRIBUTING.md                               | 17 +++-
 README.md                                     | 12 ++-
 build.sbt                                     | 88 ++++++++++++++++++-
 .../kafka/backup/BackupClientInterface.scala  | 31 +++----
 .../backup/configs/TimeConfiguration.scala    | 10 +--
 .../{application.conf => reference.conf}      |  0
 core-s3/src/main/resources/reference.conf     |  1 +
 docs/src/main/paradox/application/index.md    | 49 +++++++++++
 docs/src/main/paradox/backup/configuration.md | 24 +++++
 docs/src/main/paradox/backup/index.md         | 31 +++++++
 docs/src/main/paradox/ci.md                   | 42 +++++++++
 docs/src/main/paradox/doc-generation.md       | 42 +++++++++
 docs/src/main/paradox/index.md                | 26 ++++++
 docs/src/main/paradox/overview.md             | 19 ++++
 docs/src/main/paradox/persistence/design.md   | 26 ++++++
 docs/src/main/paradox/persistence/index.md    | 12 +++
 .../paradox/persistence/s3/configuration.md   | 15 ++++
 docs/src/main/paradox/persistence/s3/index.md | 12 +++
 .../src/main/paradox/restore/configuration.md | 13 +++
 docs/src/main/paradox/restore/index.md        | 13 +++
 docs/src/main/paradox/security.md             | 29 ++++++
 project/plugins.sbt                           | 33 ++++---
 project/project-info.conf                     | 57 ++++++++++++
 25 files changed, 626 insertions(+), 38 deletions(-)
 create mode 100644 .jvmopts
 rename core-restore/src/main/resources/{application.conf => reference.conf} (100%)
 create mode 100644 docs/src/main/paradox/application/index.md
 create mode 100644 docs/src/main/paradox/backup/configuration.md
 create mode 100644 docs/src/main/paradox/backup/index.md
 create mode 100644 docs/src/main/paradox/ci.md
 create mode 100644 docs/src/main/paradox/doc-generation.md
 create mode 100644 docs/src/main/paradox/index.md
 create mode 100644 docs/src/main/paradox/overview.md
 create mode 100644 docs/src/main/paradox/persistence/design.md
 create mode 100644 docs/src/main/paradox/persistence/index.md
 create mode 100644 docs/src/main/paradox/persistence/s3/configuration.md
 create mode 100644 docs/src/main/paradox/persistence/s3/index.md
 create mode 100644 docs/src/main/paradox/restore/configuration.md
 create mode 100644 docs/src/main/paradox/restore/index.md
 create mode 100644 docs/src/main/paradox/security.md
 create mode 100644 project/project-info.conf

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 35e04efa..8f4cfa6b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -64,8 +64,67 @@ jobs:
       - name: Build project
         run: sbt ++${{ matrix.scala }} clean coverage test
 
+      - name: Compile docs
+        run: sbt ++${{ matrix.scala }} docs/makeSite
+
       - name: Upload coverage data to Coveralls
         env:
           COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           COVERALLS_FLAG_NAME: Scala ${{ matrix.scala }}
         run: sbt ++${{ matrix.scala }} coverageReport coverageAggregate coveralls
+
+      - name: Compress target directories
+        run: tar cf targets.tar target cli-compaction/target compaction-gcs/target backup-s3/target compaction-s3/target docs/target cli-backup/target core-restore/target restore-s3/target core-gcs/target core-compaction/target core-s3/target core-backup/target core-cli/target cli-restore/target core/target restore-gcs/target backup-gcs/target project/target
+
+      - name: Upload target directories
+        uses: actions/upload-artifact@v2
+        with:
+          name: target-${{ matrix.os }}-${{ matrix.scala }}-${{ matrix.java }}
+          path: targets.tar
+
+  publish:
+    name: Publish Artifacts
+    needs: [build]
+    if: github.event_name != 'pull_request' && (github.ref == 'refs/heads/main')
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        scala: [2.13.8]
+        java: [temurin@11]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Checkout current branch (full)
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - name: Setup Java (temurin@11)
+        if: matrix.java == 'temurin@11'
+        uses: actions/setup-java@v2
+        with:
+          distribution: temurin
+          java-version: 11
+
+      - name: Cache sbt
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/.sbt
+            ~/.ivy2/cache
+            ~/.coursier/cache/v1
+            ~/.cache/coursier/v1
+            ~/AppData/Local/Coursier/Cache/v1
+            ~/Library/Caches/Coursier/v1
+          key: ${{ runner.os }}-sbt-cache-v2-${{ hashFiles('**/*.sbt') }}-${{ hashFiles('project/build.properties') }}
+
+      - name: Download target directories (2.13.8)
+        uses: actions/download-artifact@v2
+        with:
+          name: target-${{ matrix.os }}-2.13.8-${{ matrix.java }}
+
+      - name: Inflate target directories (2.13.8)
+        run: |
+          tar xf targets.tar
+          rm targets.tar
+
+      - run: sbt ++${{ matrix.scala }} docs/ghpagesPushSite
diff --git a/.jvmopts b/.jvmopts
new file mode 100644
index 00000000..74196af4
--- /dev/null
+++ b/.jvmopts
@@ -0,0 +1,3 @@
+-XX:+IgnoreUnrecognizedVMOptions
+--add-opens java.base/java.lang=ALL-UNNAMED
+--illegal-access=permit
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index cc7c13f7..41f357bc 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -67,8 +67,8 @@ the [scalafmt installation guide][scalafmt-installation-link] for more details
 * There are native builds of Scalafmt that let you run a `scalafmt` as a CLI tool, see the CLI section in
 [scalafmt installation guide][scalafmt-installation-link]
 
-Note that a github action exists which will check that your PR is formatted when you create it. The check runs
-separately ad in parallel to the main build/tests
+Note that a github action exists which will check that your code is formatted whenever you create a PR. For more details
+read the [documentation](https://aiven.github.io/guardian-for-apache-kafka/ci.html#scalafmt)
 
 ## sbt - Compiling, Building and Testing
 
@@ -81,6 +81,19 @@ it will start a REPL session where you can type in commands, i.e.
 * `core/compile` will only compile the `core` project. See [build.sbt](build.sbt) to get a reference for how the projects
 are named
 * `publishLocal` will publish the project into the local `~/.m2` repository
+* `clean` will clean all builds targets (including documentation) from the project. Note that sbt stores build
+in sub-directories named `target`
+* `reload` will reload sbt which is used when the [sbt][sbt-link] build definition is changed
+
+## sbt - documentation
+
+Documentation is also built within SBT, i.e.
+
+* `docs/makeSite` will compile documentation
+* `docs/previewSite` will compile documentation (if needed) and open the result in your system's default browser
+
+For details about how the document generation works go 
+[here](https://aiven.github.io/guardian-for-apache-kafka/doc-generation.html)
 
 [adopt-openjdk-link]: https://adoptopenjdk.net/
 [metals-link]: https://scalameta.org/metals/
diff --git a/README.md b/README.md
index 74d65e93..82c3c11e 100644
--- a/README.md
+++ b/README.md
@@ -4,8 +4,14 @@
 
 # Guardian for Apache Kafka®
 
-Guardian is a backup and restore tool for Apache Kafka clusters. It is designed to continuously stream kafka topics
-into persistent/object storages such as S3 and also provides tools for restoring said backups.
+Guardian is a backup and restore tool for Apache Kafka clusters. It is designed to continuously stream kafka topics into
+persistent/object storages such as S3 and also provides tools for restoring said backups.
+
+## Documentation
+
+* [Guardian reference](https://aiven.github.io/guardian-for-apache-kafka/) documentation.
 
 ## Trademarks
-Apache Kafka is either a registered trademark or trademark of the Apache Software Foundation in the United States and/or other countries.
+
+Apache Kafka is either a registered trademark or trademark of the Apache Software Foundation in the United States and/or
+other countries.
diff --git a/build.sbt b/build.sbt
index cd56e065..730e955d 100644
--- a/build.sbt
+++ b/build.sbt
@@ -1,4 +1,5 @@
 import com.jsuereth.sbtpgp.PgpKeys.publishSigned
+import com.lightbend.paradox.apidoc.ApidocPlugin.autoImport.apidocRootPackage
 
 ThisBuild / scalaVersion         := "2.13.8"
 ThisBuild / organization         := "aiven.io"
@@ -75,6 +76,33 @@ val cliSettings = Seq(
 
 val baseName = "guardian-for-apache-kafka"
 
+lazy val guardian = project
+  .in(file("."))
+  .enablePlugins(ScalaUnidocPlugin)
+  .disablePlugins(SitePlugin)
+  .aggregate(
+    core,
+    coreCli,
+    coreS3,
+    coreGCS,
+    coreBackup,
+    backupS3,
+    backupGCS,
+    cliBackup,
+    coreCompaction,
+    compactionS3,
+    compactionGCS,
+    cliCompaction,
+    coreRestore,
+    restoreS3,
+    restoreGCS,
+    cliRestore
+  )
+  .settings(
+    publish / skip     := true,
+    crossScalaVersions := List() // workaround for https://github.com/sbt/sbt/issues/3465
+  )
+
 lazy val core = project
   .in(file("core"))
   .settings(
@@ -268,12 +296,65 @@ lazy val cliRestore = project
   )
   .enablePlugins(JavaAppPackaging)
 
+def binaryVersion(key: String): String = key.substring(0, key.lastIndexOf('.'))
+
+lazy val docs = project
+  .enablePlugins(ParadoxPlugin, ParadoxSitePlugin, PreprocessPlugin, GhpagesPlugin)
+  .settings(
+    Compile / paradox / name     := "Guardian for Apache Kafka",
+    publish / skip               := true,
+    makeSite                     := makeSite.dependsOn(LocalRootProject / ScalaUnidoc / doc).value,
+    previewPath                  := (Paradox / siteSubdirName).value,
+    paradoxTheme                 := Some(builtinParadoxTheme("generic")),
+    apidocRootPackage            := "io.aiven.guardian",
+    Preprocess / siteSubdirName  := s"api/${projectInfoVersion.value}",
+    Preprocess / sourceDirectory := (LocalRootProject / ScalaUnidoc / unidoc / target).value,
+    git.remoteRepo               := scmInfo.value.get.connection.replace("scm:git:", ""),
+    paradoxGroups                := Map("Language" -> Seq("Scala")),
+    paradoxProperties ++= Map(
+      "akka.version"                        -> akkaVersion,
+      "akka-http.version"                   -> akkaHttpVersion,
+      "akka-streams-json.version"           -> akkaStreamsJson,
+      "pure-config.version"                 -> pureConfigVersion,
+      "decline.version"                     -> declineVersion,
+      "scala-logging.version"               -> scalaLoggingVersion,
+      "extref.akka.base_url"                -> s"https://doc.akka.io/docs/akka/${binaryVersion(akkaVersion)}/%s",
+      "extref.akka-stream-json.base_url"    -> s"https://github.com/mdedetrich/akka-streams-json",
+      "extref.alpakka.base_url"             -> s"https://doc.akka.io/api/alpakka/${binaryVersion(alpakkaVersion)}/%s",
+      "extref.alpakka-docs.base_url"        -> s"https://docs.akka.io/docs/alpakka/${binaryVersion(alpakkaVersion)}/%s",
+      "extref.pureconfig.base_url"          -> s"https://pureconfig.github.io/docs/",
+      "scaladoc.io.aiven.guardian.base_url" -> s"/guardian-for-apache-kafka/${(Preprocess / siteSubdirName).value}/"
+    )
+  )
+
+ThisBuild / homepage := Some(url("https://github.com/aiven/akka-streams-json"))
+
+ThisBuild / scmInfo := Some(
+  ScmInfo(url("https://github.com/aiven/guardian-for-apache-kafka"),
+          "scm:git:git@github.com:aiven/guardian-for-apache-kafka.git"
+  )
+)
+
+ThisBuild / developers := List(
+  Developer("jlprat", "Josep Prat", "josep.prat@aiven.io", url("https://github.com/jlprat")),
+  Developer("mdedetrich", "Matthew de Detrich", "matthew.dedetrich@aiven.io", url("https://github.com/mdedetrich")),
+  Developer("reta", "Andriy Redko", "andriy.redko@aiven.io", url("https://github.com/reta"))
+)
+
+maintainer := "matthew.dedetrich@aiven.io"
+
+ThisBuild / licenses += ("Apache-2.0", url("https://opensource.org/licenses/Apache-2.0"))
+
 // This is currently causing problems, see https://github.com/djspiewak/sbt-github-actions/issues/74
 ThisBuild / githubWorkflowUseSbtThinClient := false
 
-ThisBuild / githubWorkflowTargetBranches := Seq("main") // Once we have branches per version, add the pattern here
+ThisBuild / githubWorkflowTargetBranches := Seq("main")
+
+// Once we have branches per version, add the pattern here, see
+// https://github.com/djspiewak/sbt-github-actions#integration-with-sbt-ci-release
+ThisBuild / githubWorkflowPublishTargetBranches := Seq(RefPredicate.Equals(Ref.Branch("main")))
 
-ThisBuild / githubWorkflowPublishTargetBranches := Seq()
+ThisBuild / githubWorkflowPublish := Seq(WorkflowStep.Sbt(List("docs/ghpagesPushSite")))
 
 ThisBuild / githubWorkflowBuildPreamble := Seq(
   WorkflowStep.Sbt(List("scalafixAll --check"), name = Some("Linter: Scalafix checks"))
@@ -300,7 +381,8 @@ ThisBuild / githubWorkflowEnv ++= Map(
 ThisBuild / githubWorkflowJavaVersions := List(JavaSpec.temurin("11"))
 
 ThisBuild / githubWorkflowBuild := Seq(
-  WorkflowStep.Sbt(List("clean", "coverage", "test"), name = Some("Build project"))
+  WorkflowStep.Sbt(List("clean", "coverage", "test"), name = Some("Build project")),
+  WorkflowStep.Sbt(List("docs/makeSite"), name = Some("Compile docs"))
 )
 
 ThisBuild / githubWorkflowBuildPostamble ++= Seq(
diff --git a/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/BackupClientInterface.scala b/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/BackupClientInterface.scala
index c57904e2..39f30c75 100644
--- a/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/BackupClientInterface.scala
+++ b/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/BackupClientInterface.scala
@@ -69,38 +69,39 @@ trait BackupClientInterface[T <: KafkaClientInterface] extends StrictLogging {
     * @param key
     *   The object key or filename for what is currently being backed up
     * @return
-    *   A [[Future]] with a [[UploadStateResult]] data structure that optionally contains the state associated with
-    *   `key` along with the previous latest state before `key` (if it exists)
+    *   A [[scala.concurrent.Future]] with a [[UploadStateResult]] data structure that optionally contains the state
+    *   associated with `key` along with the previous latest state before `key` (if it exists)
     */
   def getCurrentUploadState(key: String): Future[UploadStateResult]
 
   /** A sink that is executed whenever a previously existing Backup needs to be terminated and closed. Generally
-    * speaking this [[Sink]] is similar to the [[backupToStorageSink]] except that
-    * [[kafkaClientInterface.CursorContext]] is not required since no Kafka messages are being written.
+    * speaking this [[akka.stream.scaladsl.Sink]] is similar to the `backupToStorageSink` except that
+    * `kafkaClientInterface.CursorContext` is not required since no Kafka messages are being written.
     *
-    * Note that the terminate refers to the fact that this Sink is executed with a `null]` [[Source]] which when written
-    * to an already existing unfinished backup terminates the containing JSON array so that it becomes valid parsable
-    * JSON.
+    * Note that the terminate refers to the fact that this Sink is executed with a `null]`
+    * [[akka.stream.scaladsl.Source]] which when written to an already existing unfinished backup terminates the
+    * containing JSON array so that it becomes valid parsable JSON.
     * @param previousState
     *   A data structure containing both the [[State]] along with the associated key which you can refer to in order to
-    *   define your [[Sink]]
+    *   define your [[akka.stream.scaladsl.Sink]]
     * @return
-    *   A [[Sink]] that points to an existing key defined by `previousState.previousKey`
+    *   A [[akka.stream.scaladsl.Sink]] that points to an existing key defined by `previousState.previousKey`
     */
   def backupToStorageTerminateSink(previousState: PreviousState): Sink[ByteString, Future[BackupResult]]
 
-  /** Override this method to define how to backup a `ByteString` combined with Kafka
+  /** Override this method to define how to backup a [[akka.util.ByteString]] combined with Kafka
     * `kafkaClientInterface.CursorContext` to a `DataSource`
     * @param key
     *   The object key or filename for what is being backed up
     * @param currentState
     *   The current state if it exists. If this is empty then a new backup is being created with the associated `key`
-    *   otherwise if this contains a [[State]] then the defined [[Sink]] needs to handle resuming a previously
-    *   unfinished backup with that `key` by directly appending the [[ByteString]] data.
+    *   otherwise if this contains a [[State]] then the defined [[akka.stream.scaladsl.Sink]] needs to handle resuming a
+    *   previously unfinished backup with that `key` by directly appending the [[akka.util.ByteString]] data.
     * @return
-    *   A [[Sink]] that given a [[ByteString]] (containing a single Kafka [[ReducedConsumerRecord]]) along with its
-    *   [[kafkaClientInterface.CursorContext]] backs up the data to your data storage. The [[Sink]] is also responsible
-    *   for executing [[kafkaClientInterface.commitCursor]] when the data is successfully backed up
+    *   A [[akka.stream.scaladsl.Sink]] that given a [[akka.util.ByteString]] (containing a single Kafka
+    *   [[io.aiven.guardian.kafka.models.ReducedConsumerRecord]]) along with its `kafkaClientInterface.CursorContext`
+    *   backs up the data to your data storage. The [[akka.stream.scaladsl.Sink]] is also responsible for executing
+    *   `kafkaClientInterface.commitCursor` when the data is successfully backed up
     */
   def backupToStorageSink(key: String,
                           currentState: Option[State]
diff --git a/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/TimeConfiguration.scala b/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/TimeConfiguration.scala
index 89dd41f4..3b87f30b 100644
--- a/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/TimeConfiguration.scala
+++ b/core-backup/src/main/scala/io/aiven/guardian/kafka/backup/configs/TimeConfiguration.scala
@@ -13,11 +13,11 @@ sealed trait TimeConfiguration
   */
 final case class PeriodFromFirst(duration: FiniteDuration) extends TimeConfiguration
 
-/** Backs up objects/files by collecting received Kafka messages into a single time slice based on a [[ChronoUnit]].
-  * When suspending/resuming the backup client, this option will reuse existing objects/files if they fall into the
-  * currently configured `chronoUnit`.
+/** Backs up objects/files by collecting received Kafka messages into a single time slice based on a
+  * [[java.time.temporal.ChronoUnit]]. When suspending/resuming the backup client, this option will reuse existing
+  * objects/files if they fall into the currently configured `chronoUnit`.
   * @param chronoUnit
-  *   Timestamps for kafka messages that are contained within the configured [[ChronoUnit]] will be placed into the same
-  *   object/file.
+  *   Timestamps for kafka messages that are contained within the configured [[java.time.temporal.ChronoUnit]] will be
+  *   placed into the same object/file.
   */
 final case class ChronoUnitSlice(chronoUnit: ChronoUnit) extends TimeConfiguration
diff --git a/core-restore/src/main/resources/application.conf b/core-restore/src/main/resources/reference.conf
similarity index 100%
rename from core-restore/src/main/resources/application.conf
rename to core-restore/src/main/resources/reference.conf
diff --git a/core-s3/src/main/resources/reference.conf b/core-s3/src/main/resources/reference.conf
index f2f26bc1..19f46913 100644
--- a/core-s3/src/main/resources/reference.conf
+++ b/core-s3/src/main/resources/reference.conf
@@ -6,6 +6,7 @@ alpakka.s3 {
     scheme = ${?ALPAKKA_S3_FORWARD_PROXY_SCHEME}
     host = ${?ALPAKKA_S3_FORWARD_PROXY_HOST}
     port = ${?ALPAKKA_S3_FORWARD_PROXY_PORT}
+
     credentials {
       username = ${?ALPAKKA_S3_FORWARD_PROXY_CREDENTIALS_USERNAME}
       password = ${?ALPAKKA_S3_FORWARD_PROXY_CREDENTIALS_PASSWORD}
diff --git a/docs/src/main/paradox/application/index.md b/docs/src/main/paradox/application/index.md
new file mode 100644
index 00000000..f118fd0d
--- /dev/null
+++ b/docs/src/main/paradox/application/index.md
@@ -0,0 +1,49 @@
+# Application
+
+Guardian also becomes packaged as various application/s that lets you run it using a CLI interface. Currently, the
+binaries provided are
+
+* restore: A continuously running binary that performs the restore operation.
+* backup: A binary which when executed allows you to restore an existing backup.
+
+The CLI follows POSIX guidelines which means you can use `--help` as an argument to provide information on all of the
+parameters.
+
+## Package formats
+
+Guardian is currently packaged using [sbt-native-packager](https://github.com/sbt/sbt-native-packager) to provide the
+following formats by using the sbt shell.
+
+* `rpm`
+    * restore: `cliRestore/rpm:packageBin`. Created `rpm` file will be contained
+      in `cli-restore/target/rpm/RPMS/noarch/`
+    * backup: `cliBackup/rpm:packageBin`. Created `rpm` file will be contained in `cli-backup/target/rpm/RPMS/noarch/`
+      NOTE: In order to build packages you need to have the [rpm-tools](https://rpm.org/) (specifically `rpmbuild`)
+      installed and available on `PATH`. Please consult your Linux distribution for more info
+* `zip`
+    * restore: `cliRestore/universal:packageBin`. Created `zip` file will be contained
+      in `cli-restore/target/universal/`
+    * backup: `cliBackup/universal:packageBin`. Created `zip` file will be contained in `cli-backup/target/universal/`
+* `tar`
+    * restore: `cliRestore/universal:packageZipTarball`. Created `tar` file will be contained
+      in `cli-restore/target/universal/`
+    * backup: `cliBackup/universal:packageZipTarball`. Created `tar` file will be contained
+      in `cli-backup/target/universal/`
+* `Xz`
+    * restore: `cliRestore/universal:packageXzTarball`. Created `xz` file will be contained
+      in `cli-restore/target/universal/`
+    * backup: `cliBackup/universal:packageXzTarball`. Created `xz` file will be contained
+      in `cli-backup/target/universal/`
+
+Note that for these packages formats you need to have JRE installed on your system to run the package. For more details
+about packaging read the [docs](https://sbt-native-packager.readthedocs.io/en/latest/)
+
+## Design
+
+Each application is contained within a corresponding sbt submodule, i.e. the application for `backup` is contained
+within the `cli-backup` sbt submodule. The `core-cli` sbt submodule contains common cli arguments (i.e. `kafka-topics`).
+
+Scala packaging has been disabled for these submodules which means that when publishing/packaging Guardian it won't push
+any built `.jar` files. This is because its unnecessary since you are meant to run these applications as a binary and
+not include it as a library. By the same token this also means that the cli modules are built with global inlining
+using `"-opt-inline-from:**"`, see [here](https://www.lightbend.com/blog/scala-inliner-optimizer) for more info.
diff --git a/docs/src/main/paradox/backup/configuration.md b/docs/src/main/paradox/backup/configuration.md
new file mode 100644
index 00000000..7f83a312
--- /dev/null
+++ b/docs/src/main/paradox/backup/configuration.md
@@ -0,0 +1,24 @@
+# Configuration
+
+## Reference
+
+@@snip (/core-backup/src/main/resources/reference.conf)
+
+Scala API doc @apidoc[kafka.backup.configs.Backup]
+
+## Explanation
+
+* `kafka-group-id`: The group id for the Kafka consumer that's used in restore tool
+* `time-configuration`: How to slice the persisted keys/files based by time
+    * `type`: The type of time configuration. Either `period-from-first` or `chrono-unit-slice`
+        * `period-from-first`: Guardian will split up the backup keys/files determined by the `duration` specified. The
+          key/filename will be determined by the timestamp of the first message received from the Kafka consumer with
+          each further key/filename being incremented by the configured `duration`. If guardian is shut down then it
+          will terminate and complete stream with the final element in the JSON array being a `null`
+            * This is done so it's possible to determine if a backup has been terminated by shut down of Guardian and
+              also because it's not really possible to resume using arbitrary durations.
+        * `chrono-unit-slice`: Guardian will split up the backup keys/files determined by the `chrono-unit` which
+          represent intervals such as days and weeks. As such when using this setting its possible for Guardian to
+          resume from a previous uncompleted backup.
+    * `duration`: If configuration is `period-from-first` then this determines max period of time for each time slice.
+    * `chrono-unit`: if configuration is `chrono-unit-slice` the `chrono-unit` determines
diff --git a/docs/src/main/paradox/backup/index.md b/docs/src/main/paradox/backup/index.md
new file mode 100644
index 00000000..25402fb6
--- /dev/null
+++ b/docs/src/main/paradox/backup/index.md
@@ -0,0 +1,31 @@
+# Backup
+
+The backup module is responsible for backing up a specific set of Kafka topics into a persistent storage. The backup
+runs as a continuous stream that is split depending on time buckets which are configurable. The format for backups is in
+JSON consisting of a large JSON array filled with JSON objects that have the following format.
+
+```json
+{
+  "topic": "kafka topic",
+  "partition": 0,
+  "offset": 0,
+  "key": "a2V5",
+  "value": "dmFsdWU=",
+  "timestamp": 0,
+  "timestamp_type": 0
+}
+```
+
+The `key` and `value` are Base64 encoded byte arrays (in the above example `"a2V5"` decodes to the string `key`
+and `"dmFsdWU="` decodes to the string `value`). This is due to the fact that the backup tool can make no assumptions on
+the format of the key or value, so we encode the raw byte arrays.
+
+One thing to note is that its possible for the last JSON object in the JSON array to be `null`, see for more info.
+
+@@toc { depth=2 }
+
+@@@ index
+
+* [configuration](configuration.md)
+
+@@@
diff --git a/docs/src/main/paradox/ci.md b/docs/src/main/paradox/ci.md
new file mode 100644
index 00000000..47d3c9a4
--- /dev/null
+++ b/docs/src/main/paradox/ci.md
@@ -0,0 +1,42 @@
+# CI - Continuous Integration
+
+Guardian uses github actions to perform CI whenever a pull request is made and when a pull request is merged into
+master. CI is also responsible for publishing github github. The integration with github actions for the main build is
+performed using [sbt-github-actions][sbt-github-actions-link].
+
+## Design
+
+One thing to note about [sbt-github-actions][sbt-github-actions-link] is that it generates the github workflow files
+directly from the sbt [build definition file](https://github.com/aiven/guardian-for-apache-kafka/blob/main/build.sbt).
+This means that the `build.sbt` is the source of truth and hence [sbt-github-actions][sbt-github-actions-link] also
+checks that the github workflow is in sync with `build.sbt` as part of the CI process.
+
+Essentially that means any changes to `build.sbt` (such as updating Scala versions) can also cause changes in github
+workflow actions. Likewise if you need to do any custom changes to
+the [ci.yaml](https://github.com/aiven/guardian-for-apache-kafka/blob/main/.github/workflows/ci.yml) you need to do this
+in `build.sbt` using [sbt-github-actions][sbt-github-actions-link] SBT dsl.
+
+To regenerate the relevant github workflow files after changes to `build.sbt` are done you need to run
+
+```
+githubWorkflowGenerate
+```
+
+In the sbt shell. For more information go [here](https://github.com/djspiewak/sbt-github-actions#generative-plugin)
+
+## Scalafmt
+
+In addition and separately to [sbt-github-actions][sbt-github-actions] Guardian also has a [scalafmt][scalafmt-link]
+pipeline that checks the code is correctly formatted on each PR. This allows
+the [scalafmt pipeline](https://github.com/aiven/guardian-for-apache-kafka/blob/main/.github/workflows/format.yml) to
+run at the same time the main build does. Furthermore, it
+uses [scalafmt-native](https://scalameta.org/scalafmt/docs/installation.html#native-image) for improved runtime
+performance (typically it takes 5-10 seconds to check the entire project is formatted)
+
+This means that if you ever update the scalafmt version in
+the [configuration file](https://github.com/aiven/guardian-for-apache-kafka/blob/main/.scalafmt.conf#L1) you also need
+to update it in
+the [scalafmt-pipeline](https://github.com/aiven/guardian-for-apache-kafka/blob/main/.github/workflows/format.yml#L26).
+
+[sbt-github-actions-link]: https://github.com/djspiewak/sbt-github-actions
+[scalafmt-link]: https://scalameta.org/scalafmt/
diff --git a/docs/src/main/paradox/doc-generation.md b/docs/src/main/paradox/doc-generation.md
new file mode 100644
index 00000000..ab0a3149
--- /dev/null
+++ b/docs/src/main/paradox/doc-generation.md
@@ -0,0 +1,42 @@
+# Document Generation
+
+Guardian uses [sbt-paradox][sbt-paradox-link] as the main plugin for generating documentation which is hosted
+using [github pages][github-pages-link]. In addition various other plugins are used which are noted below
+
+* [sbt-paradox-api-doc](https://github.com/lightbend/sbt-paradox-apidoc): Allows you to directly link to Scala
+  documentation using the `@@apidoc` directive
+* [sbt-paradox-project-info](https://github.com/lightbend/sbt-paradox-project-info): Provides an `@@projectInfo`
+  directive that derives common information about the project (such as dependencies, project info etc etc)
+* [sbt-site](https://github.com/sbt/sbt-site): Used in conjunction with [sbt-paradox][sbt-paradox-link] to generate the
+  final site structure
+* [sbt-ghpages](https://github.com/sbt/sbt-ghpages): Used for uploading the final site
+  to [github-pages][github-pages-link].
+* [sbt-unidoc](https://github.com/sbt/sbt-unidoc): Used to aggregate/concatenate documentation Scala API documentation
+  from various sbt modules into a single documentation result
+
+## Design
+
+[sbt-paradox][sbt-paradox-link] generates documentation using standard [Markdown](https://www.markdownguide.org/). The
+documentation can be found in the [docs-folder](https://github.com/aiven/guardian-for-apache-kafka/tree/main/docs). Note
+that this folder also corresponds to a sbt-module which is also named `docs` which also means that commands related to
+documentation are run in that sbt sub-project (i.e. `docs/makeSite` generates the documentation site).
+
+Guardian also uses [scaladoc][scaladoc-link] which is already included within Scala compiler/SBT to generate Scala API
+documentation.
+[scaladoc][scaladoc-link] is analogous to Java's own [javadoc](https://en.wikipedia.org/wiki/Javadoc) which generates
+API documentation that is written within the code itself.
+
+One advantage of using [sbt-paradox][sbt-paradox-link] and its various plugins as the main driver for documentation
+generation is it that checks at document generation (i.e. compile time) that the docs are well-formed. This checking
+includes
+
+* references to other links
+* references to specific Scala API documentation directly using Scala classes/objects/traits
+* TOC (table of contents) are well-formed (e.g. you don't have markdown files in `docs` which aren't referenced
+  anywhere)
+* references to versions from Guardians various Scala submodules are always up-to-date
+* references to code snippets
+
+[sbt-paradox-link]: https://github.com/lightbend/paradox
+[github-pages-link]: https://pages.github.com/
+[scaladoc-link]: https://docs.scala-lang.org/style/scaladoc.html
diff --git a/docs/src/main/paradox/index.md b/docs/src/main/paradox/index.md
new file mode 100644
index 00000000..c0bf2e1a
--- /dev/null
+++ b/docs/src/main/paradox/index.md
@@ -0,0 +1,26 @@
+# Guardian for Apache Kafka Documentation
+
+Guardian for Apache Kafka is an open source utility for backing up [Apache Kafka](https://kafka.apache.org/) clusters.
+It is built using [Scala](https://www.scala-lang.org/) entirely
+with [Akka-Streams](https://doc.akka.io/docs/akka/current/stream/index.html)
+to ensure that the tool runs reliably and as desired with large datasets in different scenarios.
+
+@@toc { depth=2 }
+
+@@@ index
+
+* [overview](overview.md)
+* [security](security.md)
+* [ci](ci.md)
+* [doc-generation](doc-generation.md)
+* [application](application/index.md)
+* [backup](backup/index.md)
+* [persistence](persistence/index.md)
+* [restore](restore/index.md)
+
+@@@
+
+## Trademarks
+
+Apache Kafka is either a registered trademark or trademark of the Apache Software Foundation in the United States and/or
+other countries.
diff --git a/docs/src/main/paradox/overview.md b/docs/src/main/paradox/overview.md
new file mode 100644
index 00000000..40593802
--- /dev/null
+++ b/docs/src/main/paradox/overview.md
@@ -0,0 +1,19 @@
+# Overview
+
+Guardian for Apache Kafka is an open source utility for backing up [Apache Kafka](https://kafka.apache.org/) clusters.
+It is built using [Scala](https://www.scala-lang.org/) entirely
+with [Akka-Streams](https://doc.akka.io/docs/akka/current/stream/index.html)
+to ensure that the tool runs as desired with large datasets in different scenarios.
+
+## Versions
+
+The core modules are compiled against:
+
+* Akka Streams $akka.version$+ (@extref:[Reference](akka:stream/index.html), [Github](https://github.com/akka/akka))
+* Akka Streams Json $akka-streams-json.version$+ ([Github](https://github.com/mdedetrich/akka-streams-json))
+* PureConfig $pure-config.version$+ ([Reference](https://pureconfig.github.io/docs/), [Github](https://github.com/pureconfig/pureconfig))
+* ScalaLogging $scala-logging.version$+ ([Github](https://github.com/lightbend/scala-logging))
+
+The cli modules are compiled against:
+
+* Decline $decline.version$+ ([Reference](https://ben.kirw.in/decline/), [Github](https://github.com/bkirwi/decline))
diff --git a/docs/src/main/paradox/persistence/design.md b/docs/src/main/paradox/persistence/design.md
new file mode 100644
index 00000000..c282b13d
--- /dev/null
+++ b/docs/src/main/paradox/persistence/design.md
@@ -0,0 +1,26 @@
+# Design
+
+Storage mechanisms are implemented via the @apidoc[BackupClientInterface] and @apidoc[RestoreClientInterface]. To add
+custom storage mechanisms you need to implement these methods. These interfaces are designed to be as simple as possible
+while being completely abstract to allow for any theoretical storage mechanism.
+
+## BackupClientInterface
+
+The @apidoc[BackupClientInterface] implements the entire backup flow including the resuming from a previously terminated
+backup. Of note is the @apidoc[BackupClientInterface.State](BackupClientInterface) which is the data structure that is
+returned when any previously existing backup for that key exists. This is provided to
+@apidoc[BackupClientInterface.backupToStorageSink](BackupClientInterface) indicating whether the backup being performed
+is a new backup or resuming from a previous one with the retrieval of the current state being defined by
+@apidoc[BackupClientInterface.getCurrentUploadState](BackupClientInterface).
+
+Note that when implementing @apidoc[BackupClientInterface] you do not need to handle the corner cases regarding the
+contents of the byte string when resuming/suspending/terminating, this is automatically handled for you. Essentially you
+just need to handle how to store/push `ByteString` into the storage of your choice.
+
+## RestoreClientInterface
+
+The @apidoc[RestoreClientInterface] implements restoration from an existing backup. Implementing this is quite simple,
+you need to define @apidoc[RestoreClientInterface.retrieveBackupKeys](RestoreClientInterface) which returns all valid
+keys to restore (i.e. don't include currently in progress backup keys) and
+@apidoc[RestoreClientInterface.downloadFlow](RestoreClientInterface) which is an akka-stream `Flow` that takes
+a `String` which is the key and outputs the content of that key.
diff --git a/docs/src/main/paradox/persistence/index.md b/docs/src/main/paradox/persistence/index.md
new file mode 100644
index 00000000..3d1f949e
--- /dev/null
+++ b/docs/src/main/paradox/persistence/index.md
@@ -0,0 +1,12 @@
+# Persistence Modules
+
+Guardian for Apache Kafka has a modular architecture that provides support for different persistence backups.
+
+@@toc { depth=2 }
+
+@@@ index
+
+* [design](design.md)
+* [S3](s3/index.md)
+
+@@@
\ No newline at end of file
diff --git a/docs/src/main/paradox/persistence/s3/configuration.md b/docs/src/main/paradox/persistence/s3/configuration.md
new file mode 100644
index 00000000..ba270dbd
--- /dev/null
+++ b/docs/src/main/paradox/persistence/s3/configuration.md
@@ -0,0 +1,15 @@
+# S3
+
+## Reference
+
+@@snip (/core-s3/src/main/resources/reference.conf)
+
+Scala API doc @apidoc[kafka.s3.configs.S3]
+
+## Explanation
+
+* `s3-headers`: See @extref:[documentation](alpakka:akka/stream/alpakka/s3/headers/index.html)
+* `alpakka.s3`: See @extref:[documentation](alpakka-docs:s3.html#configuration)
+* `s3-config`: Core S3 configuration about where to persist the data
+    * `data-bucket`: The main S3 bucket where data is backed up and where to restore data from
+    * `data-bucket-prefix`: S3 prefix configuration to be used when searching for the bucket
diff --git a/docs/src/main/paradox/persistence/s3/index.md b/docs/src/main/paradox/persistence/s3/index.md
new file mode 100644
index 00000000..0b97c2eb
--- /dev/null
+++ b/docs/src/main/paradox/persistence/s3/index.md
@@ -0,0 +1,12 @@
+# S3
+
+The S3 persistence module allows you to store kafka backups on [AWS S3 Cloud Storage](https://aws.amazon.com/s3/).
+
+@@toc { depth=2 }
+
+@@@ index
+
+* [configuration](configuration.md)
+
+@@@
+
diff --git a/docs/src/main/paradox/restore/configuration.md b/docs/src/main/paradox/restore/configuration.md
new file mode 100644
index 00000000..853a3328
--- /dev/null
+++ b/docs/src/main/paradox/restore/configuration.md
@@ -0,0 +1,13 @@
+# Configuration
+
+## Reference
+
+@@snip (/core-restore/src/main/resources/reference.conf)
+
+Scala API doc @apidoc[kafka.restore.configs.Restore]
+
+## Explanation
+
+* `from-when`: An `ISO-8601` time that specifies from when topics need to be restored. Note that the time used is based
+  on the original Kafka timestamp and **NOT** the current time.
+* `override-topics`: A mapping of currently backed up topics to a new topic in the destination Kafka cluster
diff --git a/docs/src/main/paradox/restore/index.md b/docs/src/main/paradox/restore/index.md
new file mode 100644
index 00000000..931bb71d
--- /dev/null
+++ b/docs/src/main/paradox/restore/index.md
@@ -0,0 +1,13 @@
+# Restore
+
+The restore module is responsible for streaming data from a backup storage location into a fresh new cluster in the
+circumstance of a disaster recovery. The restore is able to work in any format of backed up files created by Guardian's
+restore.
+
+@@toc { depth=2 }
+
+@@@ index
+
+* [configuration](configuration.md)
+
+@@@
diff --git a/docs/src/main/paradox/security.md b/docs/src/main/paradox/security.md
new file mode 100644
index 00000000..8eb764c3
--- /dev/null
+++ b/docs/src/main/paradox/security.md
@@ -0,0 +1,29 @@
+# Security
+
+## OWASP Report
+
+Guardian uses [sbt-dependency-check](https://github.com/albuch/sbt-dependency-check) to generate
+a [dependency-check-report][dependency-check-report-link] which checks direct and transitive dependencies for
+vulnerabilities against [NVD](https://nvd.nist.gov/) in the form of a HTML file that can be viewed in a standard
+browser.
+
+### Generating a report
+
+You can use the sbt shell to generate a report at any time using
+
+```
+dependencyCheckAggregate
+```
+
+This will overwrite the [current report file][dependency-check-report-link]
+
+### Suppressing false positives
+
+Sometimes it is possible that a false positive get generated in the report. To add a false positive, first you need to
+open the [report file][dependency-check-report-link] in a supported browser. In the list of found vulnerabilities there
+should be a suppress button which when clicked displays a popup containing an `XML` suppression entry. You then add
+that `<suppress>` tag entry to the
+existing [suppression-file](https://github.com/aiven/guardian-for-apache-kafka/edit/main/dependency-check/suppression.xml)
+. Finally, regenerate the report again using sbt's `dependencyCheckAggregate`
+
+[dependency-check-report-link]: https://github.com/aiven/guardian-for-apache-kafka/blob/main/dependency-check/dependency-check-report.html
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 323c250b..5ee3a87a 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -1,10 +1,23 @@
-addSbtPlugin("org.scalameta"         % "sbt-scalafmt"         % "2.4.6")
-addSbtPlugin("com.lightbend.paradox" % "sbt-paradox"          % "0.9.2")
-addSbtPlugin("com.github.sbt"        % "sbt-native-packager"  % "1.9.9")
-addSbtPlugin("com.codecommit"        % "sbt-github-actions"   % "0.14.2")
-addSbtPlugin("com.github.sbt"        % "sbt-pgp"              % "2.1.2")
-addSbtPlugin("com.github.sbt"        % "sbt-release"          % "1.1.0")
-addSbtPlugin("ch.epfl.scala"         % "sbt-scalafix"         % "0.9.34")
-addSbtPlugin("org.scoverage"         % "sbt-scoverage"        % "1.9.3")
-addSbtPlugin("org.scoverage"         % "sbt-coveralls"        % "1.3.2")
-addSbtPlugin("net.vonbuchholtz"      % "sbt-dependency-check" % "4.0.0")
+addSbtPlugin("org.scalameta"                     % "sbt-scalafmt"             % "2.4.6")
+addSbtPlugin("com.lightbend.paradox"             % "sbt-paradox"              % "0.9.2")
+addSbtPlugin("com.lightbend.paradox"             % "sbt-paradox-apidoc"       % "0.10+8-1685fc09")
+addSbtPlugin("com.lightbend.paradox"             % "sbt-paradox-project-info" % "1.1.3")
+addSbtPlugin("com.github.sbt"                    % "sbt-unidoc"               % "0.5.0")
+addSbtPlugin("com.typesafe.sbt"                  % "sbt-ghpages"              % "0.6.3")
+addSbtPlugin("com.thoughtworks.sbt-api-mappings" % "sbt-api-mappings"         % "3.0.0+82-b1fe858b")
+addSbtPlugin("com.typesafe.sbt"                  % "sbt-site"                 % "1.4.1")
+addSbtPlugin("com.github.sbt"                    % "sbt-native-packager"      % "1.9.9")
+addSbtPlugin("com.codecommit"                    % "sbt-github-actions"       % "0.14.2")
+addSbtPlugin("com.github.sbt"                    % "sbt-pgp"                  % "2.1.2")
+addSbtPlugin("com.github.sbt"                    % "sbt-release"              % "1.1.0")
+addSbtPlugin("ch.epfl.scala"                     % "sbt-scalafix"             % "0.9.34")
+addSbtPlugin("org.scoverage"                     % "sbt-scoverage"            % "1.9.3")
+addSbtPlugin("org.scoverage"                     % "sbt-coveralls"            % "1.3.2")
+addSbtPlugin("net.vonbuchholtz"                  % "sbt-dependency-check"     % "4.0.0")
+
+// This is here to bump dependencies for sbt-paradox/sbt-site, see
+// https://github.com/sirthias/parboiled/issues/175 and https://github.com/sirthias/parboiled/issues/128
+libraryDependencies ++= Seq(
+  "org.parboiled" %% "parboiled-scala" % "1.4.0",
+  "org.parboiled"  % "parboiled-java"  % "1.4.0"
+)
diff --git a/project/project-info.conf b/project/project-info.conf
new file mode 100644
index 00000000..234e62b3
--- /dev/null
+++ b/project/project-info.conf
@@ -0,0 +1,57 @@
+project-info {
+  version: "current"
+  labels: "https://github.com/aiven/guardian-for-apache-kafka/labels/p%3A"
+  scaladoc: "https://doc.akka.io/api/alpakka/"${project-info.version}"/akka/stream/alpakka/"
+  shared-info {
+    jdk-versions: ["Adopt OpenJDK 11", "Adopt OpenJDK 17"]
+    snapshots: {
+      url: "other-docs/snapshots.html"
+      text: "Snapshots are available"
+      new-tab: false
+    }
+    issues: {
+      url: "https://github.com/aiven/guardian-for-apache-kafka/issues"
+      text: "Github issues"
+    }
+    release-notes: {
+      url: "https://github.com/aiven/guardian-for-apache-kafka/releases"
+      text: "GitHub releases"
+    }
+  }
+  backupS3: ${project-info.shared-info} {
+    title: "Backup S3"
+    jpms-name: "io.aiven.guardian.kafka.backup.s3"
+  }
+  cliBackup: ${project-info.shared-info} {
+    title: "CLI Backup"
+    jpms-name: "io.aiven.guardian.kafka.backup"
+  }
+  cliRestore: ${project-info.shared-info} {
+    title: "CLI Restore"
+    jpms-name: "io.aiven.guardian.kafka.restore"
+  }
+  core: ${project-info.shared-info} {
+    title: "Core"
+    jpms-name: "io.aiven.guardian.kafka"
+  }
+  coreBackup: ${project-info.shared-info} {
+    title: "Core Backup"
+    jpms-name: "io.aiven.guardian.kafka.backup"
+  }
+  coreCli: ${project-info.shared-info} {
+    title: "Core CLI"
+    jpms-name: "io.aiven.guardian.cli"
+  }
+  coreRestore: ${project-info.shared-info} {
+    title: "Core Restore"
+    jpms-name: "io.aiven.guardian.kafka.restore"
+  }
+  coreS3: ${project-info.shared-info} {
+    title: "Core S3"
+    jpms-name: "io.aiven.guardian.kafka.restore"
+  }
+  restoreS3: ${project-info.shared-info} {
+    title: "Restore S3"
+    jpms-name: "io.aiven.guardian.kafka.restore.s3"
+  }
+}