Skip to content

Commit

Permalink
Add some example actions for r/python/stata
Browse files Browse the repository at this point in the history
  • Loading branch information
bloodearnest committed Mar 15, 2022
1 parent 4ff67ff commit b2bd262
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 3 deletions.
13 changes: 13 additions & 0 deletions analysis/example.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// stata cannot handle compressed csv files directly, so unzip first to a plain csv file
!gunzip output/input.csv.gz

// now import the uncompressed csv using delimited
import delimited using output/input.csv


// your analysis code goes here


// all dta file outputs should be saved using `gzsave` and a .dta.gz extension
// In subsequent actions, use `gzuse` to load them.
gzsave output/stata.dta.gz
10 changes: 10 additions & 0 deletions analysis/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import pandas as pd
import pyarrow.feather

df = pd.read_csv("output/input.csv.gz")


# feather files are compressed by default in python
df.to_feather("output/python.feather")

pyarrow.feather.write_feather(df, "output/python.feather.raw", compression="uncompressed")
6 changes: 6 additions & 0 deletions analysis/example.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# read compressed .csv file
df <- readr::read_csv("output/input.csv.gz")

# write a .feather file output
arrow::write_feather(df, "output/r.feather")
arrow::write_feather(df, "output/r.feather.raw", compression = "uncompressed")
Empty file added analysis/stata.do
Empty file.
7 changes: 7 additions & 0 deletions analysis/study_definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,11 @@
population=patients.registered_with_one_practice_between(
"2019-02-01", "2020-02-01"
),
age=patients.age_as_of(
"2019-09-01",
return_expectations={
"rate": "universal",
"int": {"distribution": "population_ages"},
},
),
)
27 changes: 24 additions & 3 deletions project.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,33 @@
version: '3.0'

expectations:
population_size: 1000
population_size: 10000

actions:

generate_study_population:
run: cohortextractor:latest generate_cohort --study-definition study_definition
run: cohortextractor:latest generate_cohort --output-format csv.gz --study-definition study_definition
outputs:
highly_sensitive:
cohort: output/input.csv
cohort: output/input.csv.gz

python_example:
run: python:latest analysis/example.py
needs: [generate_study_population]
outputs:
highly_sensitive:
cohort: output/python.feather*

stata_example:
run: stata-mp:latest analysis/example.do
needs: [generate_study_population]
outputs:
highly_sensitive:
cohort: output/stata.dta.gz

r_example:
run: r:latest analysis/example.r
needs: [generate_study_population]
outputs:
highly_sensitive:
cohort: output/r.feather*

0 comments on commit b2bd262

Please sign in to comment.