Skip to content

Commit

Permalink
Merge pull request #62 from jsbrittain/main
Browse files Browse the repository at this point in the history
Add additional settings and fix validation checks
  • Loading branch information
jsbrittain authored Jul 30, 2023
2 parents 3834d59 + a228165 commit 3130e4e
Show file tree
Hide file tree
Showing 29 changed files with 1,230 additions and 226 deletions.
27 changes: 24 additions & 3 deletions builder/builder/builder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import copy
import json
import pathlib
import re
Expand Down Expand Up @@ -84,7 +85,7 @@ def _GetConfigFileinfo(self) -> Union[str, dict]:
return filename
if isinstance(self.snakefile, dict):
# Remote file
c = self.snakefile.copy()
c = copy.deepcopy(self.snakefile)
c["kwargs"]["path"] = c["kwargs"]["path"].replace(
workflow_filename, config_filename
)
Expand Down Expand Up @@ -126,6 +127,11 @@ class Model:
def __init__(self) -> None:
"""Initialise the model"""
self.nodes: List[Node] = [] # List of Node objects
self.partial_build: bool = False

def SetPartialBuild(self, partial_build: bool) -> None:
"""Sets the partial build flag (does not throw if a node is missing)"""
self.partial_build = partial_build

def BuildSnakefile(
self,
Expand Down Expand Up @@ -292,13 +298,19 @@ def AddConnector(self, name, connector) -> None:
Example: Connect the output of module2 to the named input on module1
connector = [ {"input1": "module2"}, "module1" ]
Error behaviour depends on the partial_build flag. If False then an
error is thrown if a node is not found. If True then the connector is
ignored and None returned.
Args:
name (str): Name of the connector
connector (list): Connector definition
"""
mapping = connector.get("map", None)
node_to = self.GetNodeByName(mapping[1])
if not node_to:
if self.partial_build:
return
raise ValueError(
"No matching node found for connector source: "
"Requested '" + mapping[1] + "'"
Expand All @@ -310,13 +322,17 @@ def AddConnector(self, name, connector) -> None:
for k, v in mapping[0].items():
incoming_node = self.GetNodeByName(v)
if not incoming_node:
if self.partial_build:
return
raise ValueError(
"No matching node found for connector source: " + v
)
node_to.input_namespace[k] = incoming_node.output_namespace
else:
node_from = self.GetNodeByName(mapping[0])
if not node_from:
if self.partial_build:
return
raise ValueError(
"No matching node found for connector destination: " + mapping[0]
)
Expand Down Expand Up @@ -579,7 +595,10 @@ def parse_struct(yl: dict):
c += f'["{key}"]={{}}\n' # Create empty dict
c += "\n".join([f'["{key}"]{v}' for v in vv]) + "\n"
elif isinstance(value, list):
raise Exception("Lists not supported in config")
c += f'["{key}"]=[]\n' # Create empty list
for item in value:
c += f'["{key}"].append("{item}")\n'
# raise Exception("Lists not supported in config")
elif not value:
# Null
c += f'["{key}"]="None"\n'
Expand Down Expand Up @@ -620,8 +639,9 @@ def BuildFromJSON(
config: dict,
singlefile: bool = False,
expand: bool = True,
build_path: str = "",
build_path: str = "build",
clean_build: bool = True,
partial_build: bool = False, # Don't throw an error if node is missing
) -> Tuple[Union[Tuple[str, str], bytes], Model]:
"""Builds a workflow from a JSON specification
Expand All @@ -630,6 +650,7 @@ def BuildFromJSON(
With singlefile=False the workflow is a (zipped) directory structure.
"""
m = Model()
m.SetPartialBuild(partial_build)
# Add modules first to ensure all namespaces are defined before connectors
for item in config:
if item["type"].casefold() in ["module", "source", "terminal"]:
Expand Down
29 changes: 26 additions & 3 deletions builder/builder/builder_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,12 @@ def GetLocalModules(path: str) -> List[dict]:
config = yaml.safe_load(file)
except FileNotFoundError:
print(f"Config file not found - assuming blank: {file}")
module_classification = GetModuleClassification(config)
modules.append(
{
"name": f"({org}) {FormatName(workflow)}",
"type": module_type[:-1], # remove plural
# "type": module_type[:-1], # remove plural
"type": module_classification,
"config": {
"snakefile": abspath(url_workflow),
"config": config,
Expand Down Expand Up @@ -159,10 +161,13 @@ def GetRemoteModulesGithubDirectoryListing(repo: str) -> List[dict]:
"Github API request failed (getting workflow config file)."
)
config = yaml.safe_load(r_config.text)
# Determine module type by config file, rather than directory name
module_classification = GetModuleClassification(config)
modules.append(
{
"name": f"({org['name']}) {FormatName(workflow['name'])}",
"type": module_type["name"][:-1], # remove plural
# "type": module_type["name"][:-1], # remove plural
"type": module_classification,
"config": {
"snakefile": {
"function": "github",
Expand Down Expand Up @@ -220,10 +225,12 @@ def GetRemoteModulesGithubBranchListing(repo: str) -> List[dict]:
if r_config.status_code != 200:
raise Exception("Github API request failed (getting workflow config file).")
config = yaml.safe_load(r_config.text)
module_classification = GetModuleClassification(config)
modules.append(
{
"name": branch["name"],
"type": module_types[module_type],
# "type": module_types[module_type],
"type": module_classification,
"config": {
"snakefile": {
"function": "github",
Expand All @@ -241,6 +248,22 @@ def GetRemoteModulesGithubBranchListing(repo: str) -> List[dict]:
return modules


def GetModuleClassification(config: dict) -> str:
"""Determine the module classification from the config file
Args:
config: module config file
"""
# If config is None, then default to module
if config is None:
return "module"
# If the input namespace exists and is anything other than None, then it is
# a module
if config.get("input_namespace", "blank") is None:
return "source"
return "module"


def GetWorkflowFiles(
load_command: str,
) -> Tuple[str, str]:
Expand Down
39 changes: 20 additions & 19 deletions docs/getting_started/builder.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# Quickstart

## GRAPEVNE Builder
# GRAPEVNE Builder

GRAPEVNE Builder is the graphical interface that assists you in graphing,
manipulating and building GRAPEVNE workflows.
Expand All @@ -12,27 +10,26 @@ run a simple workflow using several modules that are already available online.
For this tutorial we will download, process and visualise some publically
available Covid-19 data.

### Loading modules
## Loading modules

To begin, open GRAPEVNE. This should start the
application in the 'Builder' screen, where you will construct and test-run
your workflows.

There are various options available at the top of the screen.
There are various options available at the top of the screen.
For this tutorial we will make use of pre-constructed modules available through
our online repository. To access the repository ensure that 'Directory
Listing (github)' is selected from the repository drop-down box at the top of
the screen, and that `kraemer-lab/vneyard` (note the spelling) is displayed in
the repository field. Then, click `Get Module List`. A message (`Loading remote
modules`) will display while the listing loads. This will take a few seconds
the repository field. Then, click `Get Module List`. A message (`Loading remote modules`) will display while the listing loads. This will take a few seconds
and you will know when it completed as the list of available modules will
display on the left-hand side of the screen.

The module list is filterable - for this session we are interested in only the
`Tutorial Builder` modules, so select this from the filter drop-down box (
which is currently displaying `(all)`).

### Adding your first module
## Adding your first module

In this tutorial we are interested in the number of new cases of Covid-19 that
were reported to the World Health Organisation (WHO) at different times and across
Expand All @@ -50,7 +47,7 @@ appeared on the graph you can click on the node/module to edit its parameters.
This module has been set up (by default) to download the `snakemake` logo from
their website. However, we want to change this to download the WHO data.

### Modify module parameters
## Modify module parameters

Click on the module and take a moment to look through the parameters list that
is displayed on the right-hand side of the screen. Note that there is
Expand All @@ -60,6 +57,7 @@ which appears under `config-params-url`. Hovering your mouse over the URL text
will change the background colour, indicating that is can be edited. Clicking
on the text will open an edit box. Do this now, select all of the text and
replace it with the location of the WHO data:

```
https://covid19.who.int/WHO-COVID-19-global-data.csv
```
Expand All @@ -76,7 +74,7 @@ In addition to changing the URL location, we also want to rename the file
after download for convenience. We can do this by editing the `filename` field
(`config-params-filename`) and changing it to `data.csv`.

### Connect modules together into workflows
## Connect modules together into workflows

We will now add two mode nodes to the graph. First, add the `Filter` node.
The WHO dataset contains information on many countries, but we want to limit
Expand Down Expand Up @@ -107,7 +105,7 @@ screen into the canvas area, and connect the `Filter` node to it. Leave its
parameters set to the defaults (for now). Now we are ready to (test) run our
first workflow...

### Test run a workflow
## Test run a workflow

GRAPEVNE is designed to build workflows into self-contained files that can be
shared with others. However, we don't currently want to do that. Instead, we
Expand All @@ -132,7 +130,7 @@ Try this now: click `Build and Test`. Remember to open the `Terminal` in order
to monitor progress. This is the perfect time to grab a fresh cup of tea while
the environment loads - we will discuss why this takes so long shortly...

### Your first graph
## Your first graph

You can monitor the status of your workflow from the `Terminal`, but if all
goes well then you should see a figure pop-up onto your screed displaying
Expand All @@ -148,7 +146,7 @@ visualisation package. Visualising data is useful as a tutorial example, and
can be very useful to periodically inspect the results of analyses, or to
manually-inspect pre-processed inputs prior to starting long jobs.

### Extending the graph
## Extending the graph

Let us now extend the analysis by considering seasonal variations. With your
previous graph entact, add a new node `AggregateByMonth` and connect the
Expand All @@ -166,7 +164,7 @@ Your graph should now look (very similar) to this:
:alt: Complete layout for the Builder Tutorial
```

### Run your extended graph
## Run your extended graph

Click on `Build and Test` to run your extended graph. Since a lot of the
set-up process was completed on the first run (although not all - we have
Expand All @@ -183,7 +181,7 @@ and after a few more seconds of processing time, the second graph will appear,
revealing the seasonal fluctuations in new Covid-19 cases by month (numbered
1-12 in this case) for our selected country.

### Changing parameters
## Changing parameters

As you have seen already, changing the parameters of this analysis is extremely
easy, you simply have to edit them by clicking on the relevant modules. Try this
Expand All @@ -192,12 +190,12 @@ if you are not sure of your country code). Click `Build and Test` to launch the
analysis.

```{note}
Snakemake will know that the parameters of the analysis have changed
Snakemake will know that the parameters of the analysis have changed
and will automatically re-process the necessary steps to produce the
required analysis.
```

### Building workflows for use outside of GRAPEVNE
## Building workflows for use outside of GRAPEVNE

Although this extends slightly beyond the basic usage of GRAPEVNE, it is useful
to consider two scenarios where we may want to build a workflow and distribute
Expand All @@ -210,16 +208,18 @@ run outside of GRAPEVNE.

For example, to run your newly created workflow called `build.zip` outside of
GRAPEVNE, simply unzip it, move into the `build` folder and type:

```
snakemake --cores 1 $(snakemake --list)
```

in the terminal (or command prompt). This will launch the same series of steps
as you executed with the `Build and Test` button, but without needing the
GRAPEVNE application. These can therefore be run locally, or remotely, as
required and demonstrates another principal of GRAPEVNE: that workflows remains
entirely compatible with modern Snakemake.

### Building workflows for use as modules
## Building workflows for use as modules

The second basic scenario of workflow usage is using GRAPEVNE to create
workflows composed of hierarchies of modules. This requires us to be able to
Expand All @@ -236,14 +236,15 @@ exposed in the editor for use.
## Summary

You are now able to build a workflow to either

1. distribute to others for use, or
2. use as a module in another workflow

Indeed, it is this form of hierarchical modularization that makes GRAPEVNE
so powerful. However, to demonstrate this we will want to set-up our own
repository for testing (which we discuss in the next set of tutorials).

### Wait, but why is the workflow so slow on a first run?
## Wait, but why is the workflow so slow on a first run?

You may be wondering why the workflow was so slow to execute on its first run.
After all, it was simply reading a file, filtering it, and plotting the result.
Expand Down
67 changes: 67 additions & 0 deletions docs/getting_started/challenge.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Challenge

This challenge is designed to allow you to engage with GRAPEVNE using your
prefered programming or scripting language. For example, if you are comfortable
programming in R, then try to complete the challenge in R.

Build the following workflow using GRAPEVNE modules.

## Outline

Create a set of modules, and then combine that set in a hierarchy / composition
to achieve the following: download a given file and calculate the following
statistic for each letter of the alphabet:
"number of words beginning with each letter of the alphabet, minus the number
of words ending with that letter". For example,
if there were 20 words in a given file that began with the letter 'a', and 25
that ended with the letter 'a', then the output file should contain a list with
each letter of the alphabet, accompanied by the calculated metric, so the first
line would read "-5" in this case. The file should contain one line for every
letter of the alphabet.

As extended exercises:

1. limit the analysis to only consider words of a
specified length (i.e. words between, say, 4-8 letters long)
2. plot the results as a bar graph with a separate bar for each letter of the
alphabet.

In order to make best use of GRAPEVNE modules and hierarchies, it is recommended
to take the following approach:

### Module 1: Download a words list

Create a module (that runs in a conda environment) to download a list of words.

Here is a list of English words: [https://github.com/dwyl/english-words/blob/master/words_alpha.txt](https://github.com/dwyl/english-words/blob/master/words_alpha.txt) (credit to: [https://github.com/dwyl/english-words](https://github.com/dwyl/english-words)).

### Module 2: Count the number of words beginning with each letter of the alphabet

Create a module that takes as input a text file, and produces as output a file
listing each letter of the alphabet along with the number of words in the input
file that began with that letter.

### Module 3: Reverse a words

Create a module that reverses the text on each line of an input file, and
produces as output a file containing the reversed words.

### Module 4: Subtract two numeric files from one another

Create a module that takes as input _two_ files containing lists of numbers,
and produces as output a single file containing the difference (i.e. `a-b`)
in row-wise fashion (i.e. if file 1 contained lines `1 2 3 4 5` and file two
contained lines `3 1 5 2 3` then the output would be `-2 1 -2 2 2`).

### Extended Module 1: Filter by word length

Create a module that takes a text file as input and produces another text file
as output, where the output contains only those words that are within a
specified word length. The parameters should be adjustable but could be, for
example, larger than or equal to 4 letters and shorter than or equal to 8
letters.

### Extended Module 2: Bar graph

Create a module that produces a bar graph given an input file consisting of a
list of numbers.
File renamed without changes.
Loading

0 comments on commit 3130e4e

Please sign in to comment.