Skip to content

Commit

Permalink
Merge pull request #1 from RMI-PACTA/feature/initial-setup
Browse files Browse the repository at this point in the history
Feature/initial setup
  • Loading branch information
AlexAxthelm authored Jan 30, 2024
2 parents a8aaaef + 3c82205 commit d2236fd
Show file tree
Hide file tree
Showing 54 changed files with 1,565 additions and 8 deletions.
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Dockerfile
docker-compose.yml

1 change: 1 addition & 0 deletions .github/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.html
12 changes: 12 additions & 0 deletions .github/workflows/build-Docker-image-nightly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
on:
schedule:
- cron: '0 0 * * 1,2,3,4,5'

jobs:
build_docker_image:
name: "Call build and push action"
uses: ./.github/workflows/build-and-push-Docker-image.yml
secrets: inherit
with:
image-name: workflow.portfolio.parsing
image-tag: nightly
12 changes: 12 additions & 0 deletions .github/workflows/build-Docker-image-on-push-to-main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
on:
push:
branches: [main]

jobs:
build_docker_image:
name: "Call build and push action"
uses: ./.github/workflows/build-and-push-Docker-image.yml
secrets: inherit
with:
image-name: workflow.portfolio.parsing
image-tag: main
37 changes: 37 additions & 0 deletions .github/workflows/build-Docker-image-on-push-to-pr.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
on:
pull_request:

jobs:
build_docker_image:
name: "Call build and push action"
uses: ./.github/workflows/build-and-push-Docker-image.yml
secrets: inherit
with:
image-name: workflow.portfolio.parsing
image-tag: pr${{ github.event.pull_request.number }}

add_comment:
needs: build_docker_image
runs-on: ubuntu-latest
steps:
- name: Find Comment
# https://github.com/peter-evans/find-comment
uses: peter-evans/find-comment@v3
id: fc
with:
issue-number: ${{ github.event.pull_request.number }}
comment-author: 'github-actions[bot]'
body-includes: Docker image from this PR

- name: Create or update comment
# https://github.com/peter-evans/create-or-update-comment
uses: peter-evans/create-or-update-comment@v4
with:
comment-id: ${{ steps.fc.outputs.comment-id }}
issue-number: ${{ github.event.pull_request.number }}
body: |
Docker image from this PR (${{ github.event.pull_request.head.sha }}) created
```
docker pull ${{ needs.build_docker_image.outputs.full-image-name }}
```
edit-mode: replace
67 changes: 67 additions & 0 deletions .github/workflows/build-and-push-Docker-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
---
name: Build and push docker image

on:
workflow_call:
inputs:
image-name:
required: true
type: string
image-tag:
required: true
type: string
outputs:
full-image-name:
description: "Full pushed image name including host/registry, name, and tag"
value: ${{ jobs.docker.outputs.full-image-name }}

jobs:
docker:
runs-on: ubuntu-latest
permissions:
packages: write
contents: read
timeout-minutes: 25
outputs:
full-image-name: ${{ steps.image-name.outputs.full-image-name }}

steps:

- name: Define image name
id: image-name
run: |
full_image_name="ghcr.io/${{ github.repository_owner }}/${{ inputs.image-name }}:${{ inputs.image-tag }}"
full_image_name=$(echo $full_image_name | tr '[A-Z]' '[a-z]')
echo "full-image-name=$full_image_name" >> "$GITHUB_OUTPUT"
echo "$full_image_name" > full-image-name
- uses: actions/upload-artifact@v4
with:
name: full-image-name
path: .

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push
uses: docker/build-push-action@v5
with:
push: true
tags: ${{ steps.image-name.outputs.full-image-name }}
cache-from: type=gha
cache-to: type=gha,mode=min
no-cache-filters: install-pacta

check-system-dependencies:
name: "Check System Dependencies"
needs: docker
uses: ./.github/workflows/check-R-sysdeps.yml
with:
image: ${{ needs.docker.outputs.full-image-name }}
32 changes: 32 additions & 0 deletions .github/workflows/check-R-sysdeps.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
---
name: Check R system dependencies

on:
workflow_call:
inputs:
image:
required: true
type: string

jobs:

check-system-dependencies:
runs-on: ubuntu-latest
steps:
- name: 'Pull image'
run: |
echo ${{ inputs.image }}
docker pull ${{ inputs.image }}
- name: 'Run pak::sysreqs_check_installed()'
run: |
docker run \
--rm \
--entrypoint "/bin/sh" \
${{ inputs.image }} \
-c "Rscript -e '
x <- pak::sysreqs_check_installed()
print(x)
is_installed <- as.data.frame(x)[[\"installed\"]]
stopifnot(all(is_installed))
'"
32 changes: 32 additions & 0 deletions .github/workflows/lint-package.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]

name: lint

jobs:
lint:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::lintr, local::.
needs: lint

- name: Lint
run: lintr::lint_package()
shell: Rscript {0}
env:
LINTR_ERROR_ON_LINT: true
11 changes: 11 additions & 0 deletions .github/workflows/run-hadolint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
on: [push, pull_request]

jobs:
hadolint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: hadolint/[email protected]
with:
dockerfile: Dockerfile
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
output/
40 changes: 32 additions & 8 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,38 @@
Package: workflow.portfolio.parsing
Title: reads, cleans, and reexports portfolios for PACTA
Title: Reads, cleans, and reexports portfolios for PACTA
Version: 0.0.0.9000
Authors@R:
person(given = "Alex",
family = "Axthelm",
role = c("aut", "ctr"),
email = "[email protected]",
comment = c(ORCID = "0000-0001-8579-8565")),
Description: What the package does (one paragraph).
c(
person(
given = "Alex",
family = "Axthelm",
role = c("aut", "ctr", "cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0001-8579-8565")
),
person(
given = "RMI",
role = c("cph", "fnd"),
email = "[email protected]"
)
)
Description: Processes and sanitizes portfolios for use with PACTA. Can accept
a directory of portfolio CSVs, and re-export them, separated into logical
portfolios, along with a JSON file providing descriptive metadata about the
exported files.
License: MIT + file LICENSE
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Imports:
digest,
dplyr,
logger,
pacta.portfolio.import,
uuid
Remotes:
RMI-PACTA/pacta.portfolio.import
Suggests:
testthat (>= 3.0.0),
withr
Config/testthat/edition: 3
48 changes: 48 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
FROM rocker/r-ver:4.3.2

# set Docker image labels
LABEL org.opencontainers.image.source=https://github.com/RMI-PACTA/workflow.portfolio.parsing
LABEL org.opencontainers.image.description="Prepare portfolios to be processed by PACTA"
LABEL org.opencontainers.image.licenses=MIT
LABEL org.opencontainers.image.title=""
LABEL org.opencontainers.image.revision=""
LABEL org.opencontainers.image.version=""
LABEL org.opencontainers.image.vendor=""
LABEL org.opencontainers.image.base.name=""
LABEL org.opencontainers.image.ref.name=""
LABEL org.opencontainers.image.authors=""

RUN apt-get update \
&& apt-get install -y --no-install-recommends \
libicu-dev=70.* \
&& chmod -R a+rwX /root \
&& rm -rf /var/lib/apt/lists/*

# set frozen CRAN repo
ARG CRAN_REPO="https://packagemanager.posit.co/cran/__linux__/jammy/2023-10-30"
ARG R_HOME="/usr/local/lib/R"
RUN echo "options(repos = c(CRAN = '$CRAN_REPO'), pkg.sysreqs = FALSE)" >> "${R_HOME}/etc/Rprofile.site"

# Install R dependencies
COPY DESCRIPTION /workflow.portfolio.parser/DESCRIPTION

# install pak, find dependencises from DESCRIPTION, and install them.
RUN Rscript -e "\
install.packages('pak'); \
deps <- pak::local_deps(root = '/workflow.portfolio.parser'); \
pkg_deps <- deps[!deps[['direct']], 'ref']; \
pak::pak(pkg_deps); \
"

# copy in everything from this repo
COPY . /workflow.portfolio.parser

RUN Rscript -e "pak::pak('local::/workflow.portfolio.parser')"

# set default run behavior
CMD ["Rscript", "-e", "logger::log_threshold(Sys.getenv('LOG_LEVEL', 'INFO'));workflow.portfolio.parsing::process_directory('/mnt/input')"]

# Don't run as root
RUN useradd -m portfolio-parser
USER portfolio-parser
WORKDIR /home/portfolio-parser
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
# Generated by roxygen2: do not edit by hand

export(process_directory)
export(reexport_portfolio)
75 changes: 75 additions & 0 deletions R/export_portfolio.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#' @ export
export_portfolio <- function(
portfolio_data,
group_data,
output_directory
) {

logger::log_trace("cleaning and rearranging data prior to export")
output_cols <- c("isin", "market_value", "currency")
extra_cols <- setdiff(colnames(portfolio_data), output_cols)
if (length(extra_cols)) {
logger::log_warn(
"Extra columns detected in portfolio data: ",
extra_cols,
" Discarding."
)
warning("Extra columns detected in portfolio data. Discarding.")
}
missing_cols <- setdiff(output_cols, colnames(portfolio_data))
if (length(missing_cols)) {
logger::log_warn(
"Missing columns detected in portfolio data: ",
missing_cols,
)
stop("Missing columns detected in portfolio data.")
}

portfolio_data <- dplyr::select(
.data = portfolio_data,
dplyr::all_of(output_cols)
)

output_rows <- nrow(portfolio_data)

output_filename <- paste0(
uuid::UUIDgenerate(),
".csv"
)

# Write the portfolio data to a file
output_filepath <- file.path(
output_directory,
output_filename
)

logger::log_trace("Writing portfolio data to file: ", output_filepath)
write.csv(
x = portfolio_data,
file = output_filepath,
row.names = FALSE,
na = "",
fileEncoding = "UTF-8"
)
logger::log_debug("Portfolio data written to file: ", output_filepath)

output_digest <- digest::digest(
object = output_filepath,
file = TRUE,
algo = "md5",
serialize = FALSE
)
logger::log_trace("Portfolio data digest: ", output_digest)

portfolio_metadata <- c(
list(
output_digest = output_digest,
output_filename = output_filename,
output_rows = output_rows
),
as.list(group_data)
)


return(portfolio_metadata)
}
Loading

0 comments on commit d2236fd

Please sign in to comment.