From fa8bde46dc983f29e9d32a2591241f575f82a17c Mon Sep 17 00:00:00 2001 From: wjchulme <25637345+wjchulme@users.noreply.github.com> Date: Tue, 7 Feb 2023 14:35:31 +0000 Subject: [PATCH 1/5] add script to report frequency of covid-19 vaccine product names --- analysis/extract_covid_vaccination_name_frequency.sql | 8 ++++++++ project.yaml | 9 +++++++++ 2 files changed, 17 insertions(+) create mode 100644 analysis/extract_covid_vaccination_name_frequency.sql diff --git a/analysis/extract_covid_vaccination_name_frequency.sql b/analysis/extract_covid_vaccination_name_frequency.sql new file mode 100644 index 0000000..f8bacdd --- /dev/null +++ b/analysis/extract_covid_vaccination_name_frequency.sql @@ -0,0 +1,8 @@ +SELECT + VaccinationName_ID, + VaccinationName, + CEILING(COUNT(VaccinationName)/100.0)*100 as Frequency, +FROM Vaccination +WHERE VaccinationName_ID IN (SELECT VaccinationName_ID FROM VaccinationReference WHERE VaccinationContent = 'SARS-2 Coronavirus') +GROUP BY VaccinationName_ID, VaccinationName +ORDER BY Frequency DESC; \ No newline at end of file diff --git a/project.yaml b/project.yaml index ce07f09..96a16a2 100644 --- a/project.yaml +++ b/project.yaml @@ -26,3 +26,12 @@ actions: outputs: moderately_sensitive: html: output/vaccination_names.html + + extract_covid_vaccination_name_frequency: + run: > + sqlrunner:latest + --output output/covid_vaccination_name_frequency.csv + analysis/extract_covid_vaccination_name_frequency.sql + outputs: + moderately_sensitive: + rows: output/covid_vaccination_name_frequency.csv From d1e285a6f8d6ae06d65e83a56d0e4d750c386854 Mon Sep 17 00:00:00 2001 From: wjchulme <25637345+wjchulme@users.noreply.github.com> Date: Tue, 7 Feb 2023 14:53:19 +0000 Subject: [PATCH 2/5] linting; remove trailing comma --- analysis/extract_covid_vaccination_name_frequency.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analysis/extract_covid_vaccination_name_frequency.sql b/analysis/extract_covid_vaccination_name_frequency.sql index f8bacdd..4bd8fca 100644 --- a/analysis/extract_covid_vaccination_name_frequency.sql +++ b/analysis/extract_covid_vaccination_name_frequency.sql @@ -1,7 +1,7 @@ -SELECT +SELECT VaccinationName_ID, VaccinationName, - CEILING(COUNT(VaccinationName)/100.0)*100 as Frequency, + CEILING(COUNT(VaccinationName) / 100.0) * 100 AS Frequency FROM Vaccination WHERE VaccinationName_ID IN (SELECT VaccinationName_ID FROM VaccinationReference WHERE VaccinationContent = 'SARS-2 Coronavirus') GROUP BY VaccinationName_ID, VaccinationName From 276521f2cd3684a35af967a65e0dc4761d7110a0 Mon Sep 17 00:00:00 2001 From: wjchulme <25637345+wjchulme@users.noreply.github.com> Date: Tue, 7 Feb 2023 14:59:49 +0000 Subject: [PATCH 3/5] one line --- analysis/extract_covid_vaccination_name_frequency.sql | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/analysis/extract_covid_vaccination_name_frequency.sql b/analysis/extract_covid_vaccination_name_frequency.sql index 4bd8fca..d5fd730 100644 --- a/analysis/extract_covid_vaccination_name_frequency.sql +++ b/analysis/extract_covid_vaccination_name_frequency.sql @@ -1,8 +1,5 @@ -SELECT - VaccinationName_ID, - VaccinationName, - CEILING(COUNT(VaccinationName) / 100.0) * 100 AS Frequency +SELECT VaccinationName_ID, VaccinationName, CEILING(COUNT(VaccinationName) / 100.0) * 100 AS Frequency FROM Vaccination WHERE VaccinationName_ID IN (SELECT VaccinationName_ID FROM VaccinationReference WHERE VaccinationContent = 'SARS-2 Coronavirus') GROUP BY VaccinationName_ID, VaccinationName -ORDER BY Frequency DESC; \ No newline at end of file +ORDER BY Frequency DESC; From 8ebc050a5bf9d7995b9b3ebc7b19698aea62327d Mon Sep 17 00:00:00 2001 From: Iain Dillingham Date: Tue, 7 Feb 2023 15:13:19 +0000 Subject: [PATCH 4/5] Placate sqlfluff --- .../extract_covid_vaccination_name_frequency.sql | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/analysis/extract_covid_vaccination_name_frequency.sql b/analysis/extract_covid_vaccination_name_frequency.sql index d5fd730..6bb7891 100644 --- a/analysis/extract_covid_vaccination_name_frequency.sql +++ b/analysis/extract_covid_vaccination_name_frequency.sql @@ -1,5 +1,14 @@ -SELECT VaccinationName_ID, VaccinationName, CEILING(COUNT(VaccinationName) / 100.0) * 100 AS Frequency +SELECT + VaccinationName_ID, + VaccinationName, + CEILING(COUNT(VaccinationName) / 100.0) * 100 AS Frequency FROM Vaccination -WHERE VaccinationName_ID IN (SELECT VaccinationName_ID FROM VaccinationReference WHERE VaccinationContent = 'SARS-2 Coronavirus') +WHERE + VaccinationName_ID IN ( + SELECT VaccinationName_ID + FROM + VaccinationReference + WHERE VaccinationContent = 'SARS-2 Coronavirus' + ) GROUP BY VaccinationName_ID, VaccinationName ORDER BY Frequency DESC; From aa685b2e15fa3be8c313c9bc743158c239facbcd Mon Sep 17 00:00:00 2001 From: Iain Dillingham Date: Tue, 7 Feb 2023 15:25:02 +0000 Subject: [PATCH 5/5] Pass dummy data Dummy data is only used when not running in production. It's a convenience for downstream actions when running locally and in CI. --- analysis/dummy_vaccination_name_frequency.csv | 1 + project.yaml | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 analysis/dummy_vaccination_name_frequency.csv diff --git a/analysis/dummy_vaccination_name_frequency.csv b/analysis/dummy_vaccination_name_frequency.csv new file mode 100644 index 0000000..3595900 --- /dev/null +++ b/analysis/dummy_vaccination_name_frequency.csv @@ -0,0 +1 @@ +VaccinationName_ID,VaccinationName,Frequency diff --git a/project.yaml b/project.yaml index 96a16a2..d3b248b 100644 --- a/project.yaml +++ b/project.yaml @@ -8,6 +8,7 @@ actions: run: > sqlrunner:latest --output output/vaccination_names.csv + --dummy-data-file analysis/dummy.csv analysis/extract_vaccination_names.sql outputs: moderately_sensitive: @@ -31,6 +32,7 @@ actions: run: > sqlrunner:latest --output output/covid_vaccination_name_frequency.csv + --dummy-data-file analysis/dummy_vaccination_name_frequency.csv analysis/extract_covid_vaccination_name_frequency.sql outputs: moderately_sensitive: