From 5b5c1a948cc6f222af88d0cb666826b07f0f80bb Mon Sep 17 00:00:00 2001 From: John Brittain Date: Fri, 28 Jul 2023 19:57:13 +0100 Subject: [PATCH 01/11] Module type determined by config, not folder, when loading and expanding --- builder/builder/builder_web.py | 29 ++++++++++-- .../src/NodeMap/scene/NodeMapEngine.tsx | 44 ++++++++++++++++++- nodemapper/src/gui/Builder/BuilderEngine.tsx | 29 ------------ nodemapper/src/gui/Builder/Header.tsx | 11 ++--- nodemapper/src/redux/globals.ts | 2 +- nodemapper/src/redux/reducers/builder.ts | 10 ++--- 6 files changed, 77 insertions(+), 48 deletions(-) diff --git a/builder/builder/builder_web.py b/builder/builder/builder_web.py index d145005e..bf87afec 100644 --- a/builder/builder/builder_web.py +++ b/builder/builder/builder_web.py @@ -59,10 +59,12 @@ def GetLocalModules(path: str) -> List[dict]: config = yaml.safe_load(file) except FileNotFoundError: print(f"Config file not found - assuming blank: {file}") + module_classification = GetModuleClassification(config) modules.append( { "name": f"({org}) {FormatName(workflow)}", - "type": module_type[:-1], # remove plural + # "type": module_type[:-1], # remove plural + "type": module_classification, "config": { "snakefile": abspath(url_workflow), "config": config, @@ -159,10 +161,13 @@ def GetRemoteModulesGithubDirectoryListing(repo: str) -> List[dict]: "Github API request failed (getting workflow config file)." ) config = yaml.safe_load(r_config.text) + # Determine module type by config file, rather than directory name + module_classification = GetModuleClassification(config) modules.append( { "name": f"({org['name']}) {FormatName(workflow['name'])}", - "type": module_type["name"][:-1], # remove plural + # "type": module_type["name"][:-1], # remove plural + "type": module_classification, "config": { "snakefile": { "function": "github", @@ -220,10 +225,12 @@ def GetRemoteModulesGithubBranchListing(repo: str) -> List[dict]: if r_config.status_code != 200: raise Exception("Github API request failed (getting workflow config file).") config = yaml.safe_load(r_config.text) + module_classification = GetModuleClassification(config) modules.append( { "name": branch["name"], - "type": module_types[module_type], + # "type": module_types[module_type], + "type": module_classification, "config": { "snakefile": { "function": "github", @@ -241,6 +248,22 @@ def GetRemoteModulesGithubBranchListing(repo: str) -> List[dict]: return modules +def GetModuleClassification(config: dict) -> str: + """Determine the module classification from the config file + + Args: + config: module config file + """ + # If config is None, then default to module + if config is None: + return "module" + # If the input namespace exists and is anything other than None, then it is + # a module + if config.get("input_namespace", "blank") is None: + return "source" + return "module" + + def GetWorkflowFiles( load_command: str, ) -> Tuple[str, str]: diff --git a/nodemapper/src/NodeMap/scene/NodeMapEngine.tsx b/nodemapper/src/NodeMap/scene/NodeMapEngine.tsx index 037a71c6..3300d9b8 100644 --- a/nodemapper/src/NodeMap/scene/NodeMapEngine.tsx +++ b/nodemapper/src/NodeMap/scene/NodeMapEngine.tsx @@ -338,10 +338,12 @@ export default class NodeMapEngine { offset += 5.0; // Call AddNodeToGraph with uniquenames = false to prevent node renaming // (at least until after the graph is expanded) + const module_type = NodeMapEngine.GetModuleType( + data.config.config as Record); const newnode = this.AddNodeToGraph( data, newpoint, - "rgb(192,255,255)", + NodeMapEngine.GetModuleTypeColor(module_type), false ); newnodes.push(newnode); @@ -461,4 +463,44 @@ export default class NodeMapEngine { this.engine.repaintCanvas(); return newnodes; } + + public static GetModuleType(config: Record): string { + for (const key in config) { + console.log(key + " = " + config[key]); + if (key === "input_namespace") { + const value = config[key]; + if (value === null) { + return "source"; + } + } + } + return "module"; + } + + public static GetModuleTypeColor(type: string): string { + let color = ""; + switch (type) { + case "source": { + color = "rgb(192,255,0)"; + break; + } + case "module": { + color = "rgb(0,192,255)"; + break; + } + case "connector": { + color = "rgb(0,255,192)"; + break; + } + case "terminal": { + color = "rgb(192,0,255)"; + break; + } + default: { + color = "rgb(128,128,128)"; + break; + } + } + return color; + } } diff --git a/nodemapper/src/gui/Builder/BuilderEngine.tsx b/nodemapper/src/gui/Builder/BuilderEngine.tsx index b3035c0a..cb07a3fb 100644 --- a/nodemapper/src/gui/Builder/BuilderEngine.tsx +++ b/nodemapper/src/gui/Builder/BuilderEngine.tsx @@ -7,33 +7,4 @@ export default class BuilderEngine extends NodeMapEngine { public static get Instance(): BuilderEngine { return BuilderEngine._Instance || (this._Instance = new this()); } - - // BUILDER specific methods - - public static GetModuleTypeColor(type: string): string { - let color = ""; - switch (type) { - case "source": { - color = "rgb(192,255,0)"; - break; - } - case "module": { - color = "rgb(0,192,255)"; - break; - } - case "connector": { - color = "rgb(0,255,192)"; - break; - } - case "terminal": { - color = "rgb(192,0,255)"; - break; - } - default: { - color = "rgb(128,128,128)"; - break; - } - } - return color; - } } diff --git a/nodemapper/src/gui/Builder/Header.tsx b/nodemapper/src/gui/Builder/Header.tsx index 394c225b..da96b6e4 100644 --- a/nodemapper/src/gui/Builder/Header.tsx +++ b/nodemapper/src/gui/Builder/Header.tsx @@ -123,7 +123,7 @@ const Header = () => { repo = { type: "github", listing_type: "DirectoryListing", - repo: "jsbrittain/snakeshack", + repo: "kraemer-lab/vneyard", }; break; case "BranchListing": @@ -170,6 +170,7 @@ const Header = () => { + | @@ -179,16 +180,12 @@ const Header = () => { - {/* - - */} + | handleChange(e.target.value)} - /> - ); -}; - const Header = () => { const dispatch = useAppDispatch(); + /* // Load nodemap from file const btnLoadScene = () => { BuilderEngine.Instance.LoadScene(); @@ -66,6 +44,7 @@ const Header = () => { const btnSaveScene = () => { BuilderEngine.Instance.SaveScene(); }; + */ // Load nodemap from file const btnClearScene = () => { @@ -104,39 +83,9 @@ const Header = () => { dispatch(builderGetRemoteModules()); }; - const btnImportModule = () => { - //dispatch(builderImportModule()); - console.error("Import of individual modules not currently implemented."); - }; - - const selectRepositoryTarget = (target) => { - let repo = {}; - switch (target) { - case "LocalFilesystem": - repo = { - type: "local", - listing_type: "DirectoryListing", - repo: "../../snakeshack", - }; - break; - case "DirectoryListing": - repo = { - type: "github", - listing_type: "DirectoryListing", - repo: "kraemer-lab/vneyard", - }; - break; - case "BranchListing": - repo = { - type: "github", - listing_type: "BranchListing", - repo: "jsbrittain/snakeshack", - }; - break; - default: - console.error("Unknown repository type selected: ", target); - } - dispatch(builderSetRepositoryTarget(repo)); + // Open settings pane + const btnSettings = () => { + dispatch(builderToggleSettingsVisibility()); }; return ( @@ -184,15 +133,9 @@ const Header = () => { - - + diff --git a/nodemapper/src/gui/Builder/components/BodyWidget.tsx b/nodemapper/src/gui/Builder/components/BodyWidget.tsx index 006125a0..e14d7599 100644 --- a/nodemapper/src/gui/Builder/components/BodyWidget.tsx +++ b/nodemapper/src/gui/Builder/components/BodyWidget.tsx @@ -9,6 +9,7 @@ import { DiagramEngine } from "@projectstorm/react-diagrams"; import TerminalWindow from "./TerminalWindow"; import BuilderEngine from "../BuilderEngine"; import NodeInfoRenderer from "./NodeInfoRenderer"; +import BuilderSettings from "./BuilderSettings"; import { TrayWidget } from "./TrayWidget"; import { useAppDispatch } from "redux/store/hooks"; @@ -147,15 +148,33 @@ export const BodyWidget = (props: BodyWidgetProps) => { overflowY: "auto", }} > - +
+ +
{trayitems} @@ -174,6 +193,7 @@ export const BodyWidget = (props: BodyWidgetProps) => { + ); diff --git a/nodemapper/src/gui/Builder/components/BuilderSettings.tsx b/nodemapper/src/gui/Builder/components/BuilderSettings.tsx new file mode 100644 index 00000000..f7a9c9f9 --- /dev/null +++ b/nodemapper/src/gui/Builder/components/BuilderSettings.tsx @@ -0,0 +1,110 @@ +import React from "react"; +import { useState } from "react"; +import { useAppDispatch } from "redux/store/hooks"; +import { useAppSelector } from "redux/store/hooks"; +import { builderSetRepositoryTarget } from "redux/actions"; +import { builderSetSnakemakeArgs } from "redux/actions"; + +const default_input_size = 35; + +const RepoOptions: React.FC = () => { + const dispatch = useAppDispatch(); + const repoSettings = JSON.parse( + useAppSelector((state) => state.builder.repo) + ); + const [repoURL, setRepoURL] = useState(repoSettings.repo); + + const handleChange = (url) => { + const repo_settings = { ...repoSettings }; + repo_settings.repo = url; + setRepoURL(url); + dispatch(builderSetRepositoryTarget(repo_settings)); + }; + + return ( + handleChange(e.target.value)} + /> + ); +}; + +const BuilderSettings = () => { + const dispatch = useAppDispatch(); + const isvisible = useAppSelector((state) => state.builder.settings_visible); + const listing_type = JSON.parse( + useAppSelector((state) => state.builder.repo) + ).listing_type; + const snakemake_args = useAppSelector((state) => state.builder.snakemake_args); + + const selectRepositoryTarget = (target) => { + let repo = {}; + switch (target) { + case "LocalFilesystem": + repo = { + type: "local", + listing_type: "DirectoryListing", + repo: "../../snakeshack", + }; + break; + case "DirectoryListing": + repo = { + type: "github", + listing_type: "DirectoryListing", + repo: "kraemer-lab/vneyard", + }; + break; + case "BranchListing": + repo = { + type: "github", + listing_type: "BranchListing", + repo: "jsbrittain/snakeshack", + }; + break; + default: + console.error("Unknown repository type selected: ", target); + } + dispatch(builderSetRepositoryTarget(repo)); + }; + + const SetSnakemakeArgs = (args: string) => { + dispatch(builderSetSnakemakeArgs(args)); + }; + + return ( + <> +
+

Repository

+
+ +
+
+ +
+
+

Snakemake arguments

+ SetSnakemakeArgs(e.target.value)} + /> +
+ + ); +}; + +export default BuilderSettings; diff --git a/nodemapper/src/gui/Builder/components/NodeInfoRenderer.tsx b/nodemapper/src/gui/Builder/components/NodeInfoRenderer.tsx index c498e8f0..78e46bf2 100644 --- a/nodemapper/src/gui/Builder/components/NodeInfoRenderer.tsx +++ b/nodemapper/src/gui/Builder/components/NodeInfoRenderer.tsx @@ -14,6 +14,7 @@ import { DefaultNodeModel } from "NodeMap"; import { builderNodeSelected } from "redux/actions"; import { builderNodeDeselected } from "redux/actions"; import { builderUpdateNodeInfoName } from "redux/actions"; +import { builderCheckNodeDependencies } from "redux/actions"; interface IPayload { id: string; @@ -23,6 +24,24 @@ interface ExpandProps { nodeinfo: Record; } +interface ValidateButtonProps { + nodename: string; +} + +const ValidateButton = (props: ValidateButtonProps) => { + const dispatch = useAppDispatch(); + + const btnValidate = () => { + dispatch(builderCheckNodeDependencies(props.nodename)); + } + + return ( + + ); +}; + const ExpandButton = (props: ExpandProps) => { const [newnodes, setNewNodes] = React.useState(null); const dispatch = useAppDispatch(); @@ -124,11 +143,10 @@ const NodeInfoRenderer = (props) => { />
+ {PermitNodeExpand(nodeinfo) ? ( - ) : ( - <> - )} + ) : null}
( "builder/update-modules-list" ); +export const builderCheckNodeDependencies = createAction( + "builder/check-node-dependencies" +); + export const builderUpdateStatusText = createAction( "builder/update-status-text" ); @@ -44,6 +52,10 @@ export const builderUpdateNodeInfoName = createAction( "builder/update-node-info-name" ); +export const builderSetSnakemakeArgs = createAction( + "builder/set-snakemake-args" +); + export const builderSetRepositoryTarget = createAction>( // eslint-disable-line @typescript-eslint/no-explicit-any "builder/set-repository-target" ); diff --git a/nodemapper/src/redux/globals.ts b/nodemapper/src/redux/globals.ts index 9cf99e14..17a0a16d 100644 --- a/nodemapper/src/redux/globals.ts +++ b/nodemapper/src/redux/globals.ts @@ -6,5 +6,5 @@ export const getBackend = () => { // The backend was originally written using a Flask-based REST server. // The REST server is being migrated to an Electron-based application. // However, both backends are still supported for now. - return "electron"; //"rest"; // | electron + return "rest"; //"rest"; // | electron }; diff --git a/nodemapper/src/redux/middleware/builder.ts b/nodemapper/src/redux/middleware/builder.ts index 5564e93d..6c7dd953 100644 --- a/nodemapper/src/redux/middleware/builder.ts +++ b/nodemapper/src/redux/middleware/builder.ts @@ -35,7 +35,7 @@ export const builderMiddleware = ({ getState, dispatch }) => { CompileToJSON(); break; case "builder/build-and-run": - BuildAndRun(dispatch); + BuildAndRun(dispatch, getState().builder.snakemake_args); break; case "builder/clean-build-folder": CleanBuildFolder(dispatch); @@ -46,6 +46,9 @@ export const builderMiddleware = ({ getState, dispatch }) => { case "builder/add-link": AddLink(action, dispatch); break; + case "builder/check-node-dependencies": + CheckNodeDependencies(action.payload, dispatch); + break; case "builder/node-selected": NodeSelected(action, dispatch, getState); break; @@ -135,7 +138,10 @@ const CompileToJSON = async () => { } }; -const BuildAndRun = async (dispatchString: TPayloadString) => { +const BuildAndRun = async ( + dispatchString: TPayloadString, + snakemake_args: string +) => { const app = BuilderEngine.Instance; const query: Record = { query: "builder/build-and-run", @@ -143,6 +149,7 @@ const BuildAndRun = async (dispatchString: TPayloadString) => { format: "Snakefile", content: app.GetModuleListJSON(), targets: app.GetLeafNodeNames(), + args: snakemake_args, }, }; const callback = (result) => { @@ -209,8 +216,18 @@ const AddLink = async (action: IPayloadLink, dispatch: TPayloadString) => { const node = targetPort.getParent(); const nodename = JSON.parse(node.getOptions().extras)["name"]; + // Check node dependencies + CheckNodeDependencies(nodename, dispatch); +} + +const CheckNodeDependencies = async ( + nodename: string, + dispatch: TPayloadString, +) => { // Identify all incoming connections to the Target node and build // a JSON Builder object, given it's immediate dependencies + const app = BuilderEngine.Instance; + const node = app.getNodeByName(nodename) as DefaultNodeModel; const inputNodes = app.nodeScene.getNodeInputNodes(node); const depNodeNames = Object.values(inputNodes) as string[]; depNodeNames.unshift(nodename); @@ -224,12 +241,17 @@ const AddLink = async (action: IPayloadLink, dispatch: TPayloadString) => { content: JSON.stringify(jsDeps), }, }; + // Set node grey to indicate checking + const node_type = app.getProperty(node, "type"); + node.getOptions().color = "rgb(192,192,192)"; + app.engine.repaintCanvas(); + const callback = (data: Record) => { dispatch(builderUpdateStatusText("")); console.log(data); switch (data["body"]["status"]) { case "ok": - node.getOptions().color = "rgb(0,192,255)"; + node.getOptions().color = BuilderEngine.GetModuleTypeColor(node_type); break; case "missing": node.getOptions().color = "red"; @@ -237,6 +259,7 @@ const AddLink = async (action: IPayloadLink, dispatch: TPayloadString) => { default: console.error("Unexpected response: ", data["body"]); } + app.engine.repaintCanvas(); dispatch(builderRedraw()); }; switch (backend as string) { @@ -348,7 +371,7 @@ const GetRemoteModules = async ( repo: string ) => { // Get list of remote modules - dispatchString(builderUpdateStatusText("Loading remote modules...")); + dispatchString(builderUpdateStatusText("Loading modules...")); const app = BuilderEngine.Instance; const query: Record = { query: "builder/get-remote-modules", diff --git a/nodemapper/src/redux/reducers/builder.ts b/nodemapper/src/redux/reducers/builder.ts index 7e52dde9..b41fb96c 100644 --- a/nodemapper/src/redux/reducers/builder.ts +++ b/nodemapper/src/redux/reducers/builder.ts @@ -9,6 +9,8 @@ interface IBuilderState { nodeinfo: string; can_selected_expand: boolean; terminal_visibile: boolean; + settings_visible: boolean; + snakemake_args: string; } // State @@ -23,6 +25,8 @@ const builderStateInit: IBuilderState = { nodeinfo: "{}", // {} requires to be a valid JSON string can_selected_expand: true, terminal_visibile: false, + settings_visible: false, + snakemake_args: "--cores 1 --use-conda $(snakemake --list)", }; // Nodemap @@ -78,6 +82,14 @@ const builderReducer = createReducer(builderStateInit, (builder) => { .addCase(actions.builderToggleTerminalVisibility, (state, action) => { state.terminal_visibile = !state.terminal_visibile; console.info("[Reducer] " + action.type); + }) + .addCase(actions.builderToggleSettingsVisibility, (state, action) => { + state.settings_visible = !state.settings_visible; + console.info("[Reducer] " + action.type); + }) + .addCase(actions.builderSetSnakemakeArgs, (state, action) => { + state.snakemake_args = action.payload; + console.info("[Reducer] " + action.type); }); }); diff --git a/runner/runner/runner.py b/runner/runner/runner.py index e403f4b7..70b5bd76 100644 --- a/runner/runner/runner.py +++ b/runner/runner/runner.py @@ -49,10 +49,16 @@ def Launch(data: dict, **kwargs) -> dict: return launch_response -def Launch_cmd(data: dict, **kwargs) -> dict: +def Launch_cmd(data: dict, *args, **kwargs) -> dict: """Returns the launch command for a workflow""" if data["format"] == "Snakefile": - cmd, workdir = snakefile.Launch_cmd(data["content"], **kwargs) + snakemake_args = data.get("args", "").split(" ") + cmd, workdir = snakefile.Launch_cmd( + data["content"], + *snakemake_args, + *args, + **kwargs, + ) launch_response: dict = {"command": cmd, "workdir": workdir} else: raise ValueError(f"Format not supported: {data['format']}") diff --git a/runner/runner/snakemake_runner/snakefile.py b/runner/runner/snakemake_runner/snakefile.py index e58cd7f1..e78fb40b 100644 --- a/runner/runner/snakemake_runner/snakefile.py +++ b/runner/runner/snakemake_runner/snakefile.py @@ -44,21 +44,20 @@ def DeleteAllOutput(filename: str) -> dict: } -def Launch_cmd(filename: str, **kwargs) -> Tuple[List[str], str]: +def Launch_cmd(filename: str, *args, **kwargs) -> Tuple[List[str], str]: """Return the snakemake launch command and working directory""" filename, workdir = GetFileAndWorkingDirectory(filename) return snakemake_cmd( filename, - "--nolock", - "$(snakemake --list)", workdir=workdir, + *args, **kwargs, ) -def Launch(filename: str, **kwargs) -> dict: +def Launch(filename: str, *args, **kwargs) -> dict: """Launch snakemake workflow given a [locally accessible] location""" - cmd, workdir = Launch_cmd(filename, **kwargs) + cmd, workdir = Launch_cmd(filename, *args, **kwargs) stdout, stderr = snakemake_run(cmd, workdir) return { "status": "ok" if not stderr else "error", @@ -474,14 +473,8 @@ def snakemake_cmd(filename: str, *args, **kwargs) -> Tuple[List[str], str]: pass # Collate arguments list arglist = list(args) - # Ensure conda is enabled - if "--use-conda" not in arglist: - arglist.append("--use-conda") for k, v in kwargs.items(): arglist.extend([k, v]) - # Default set a single core if none specified - if "--cores" not in kwargs.keys() and "--cores" not in args: - arglist.extend(["--cores", "1"]) # Launch process and wait for return cmd = [ "snakemake", From b0a2e8677873e589b93d1476e492a2a045758648 Mon Sep 17 00:00:00 2001 From: John Brittain Date: Sun, 30 Jul 2023 16:58:14 +0100 Subject: [PATCH 06/11] Open terminal on build+test; Add sub-modules and validation docs --- docs/getting_started/quickstart.md | 1 + docs/getting_started/submodules.md | 148 ++++++++++++++++++++++- docs/getting_started/validation.md | 86 +++++++++++++ nodemapper/src/gui/Builder/Header.tsx | 2 + nodemapper/src/redux/actions/builder.ts | 2 + nodemapper/src/redux/globals.ts | 2 +- nodemapper/src/redux/reducers/builder.ts | 4 + 7 files changed, 242 insertions(+), 3 deletions(-) create mode 100644 docs/getting_started/validation.md diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md index 202f0f07..ba8ec71b 100644 --- a/docs/getting_started/quickstart.md +++ b/docs/getting_started/quickstart.md @@ -18,4 +18,5 @@ maxdepth: 1 --- builder submodules +validation ``` diff --git a/docs/getting_started/submodules.md b/docs/getting_started/submodules.md index a2b602cc..0eb6bee9 100644 --- a/docs/getting_started/submodules.md +++ b/docs/getting_started/submodules.md @@ -1,5 +1,149 @@ -# Hierarchy of modules +# Sub-modules One of the great benefits of modular construction is the capacity to (re)-use components in a hierarchy - providing high level modules that make use of lower -level modules to provide abstraction, accessibility and reuse. +level modules to provide abstraction, accessibility and reuse. We can +demonstrate this principle by making use of the previous tutorial modules. + +So far we have made use of pre-constructed modules that are published online. +Now, we want to make use of these modules to construct module grouping, and to +make use of those groupings in a hierarchical fashion. This requires us to be +able to store our modules somewhere - and specifically, in a repository. + +Let us begin by taking the `build.zip` file that you constructed in the previous +tutorial and unzipping it into a folder somewhere (most likely in your +`Downloads` folder). You will now have a folder called `build` that contains +the Snakemake workflow files and configuration for the previous tutorial. +Rename the folder to something more descriptive, such as +`PlotCovidData`. + +## Module repositories + +Repositories in GRAPEVNE follow a strict folder structure, which we must now +make in order for GRAPEVNE to recognise your module. Create the following folder +structure in the directory of your choice (e.g. in `Downloads`) and copy your +module into the `modules` folder: +``` +vneyard <--- root repository folder +└── workflows <--- workflows folder (required) + └── My Modules <--- project name + └── modules <--- modules folder + └── PlotCovidData +``` + +The folders `workflows` and `modules` are required names, whereas the names of +the base repository folder (`vneyard`, the project name `My Modules` and the +list of modules themselves (`PlotCovidData`) can be changed +(but should be present). + +## Navigating to a local repository + +To navigate to a local repository in GRAPEVNE, open the `Settings` menu, +select `Local filesystem` from the `Repository` drop-down box, and enter the +path to the root repository folder in the url text box (e.g. +`/Users/home/MyUserName/Downloads/vneyard`). Now close the settings menu (by +clicking on the `Settings` button again) and refresh the modules list by +clicking `Get Module List`. After a few seconds you should see the modules list +change to include only one module: `PlotCovidData`. + +## Editing modules + +Before we go any further, clear the graph (this will also delete the test build +folder to provide a clean build environment). Now, drag your +`PlotCovidData` module into the main graph area. Notice that it +appears as a 'source' module, i.e. one without any inputs. This is because +when you build the module all input ports were connected. Remember that this +module now represents all of the previous modules that we built, and we can test +build the module in order to demonstrate this (doing so will require the build +environment to download again, so this may take some time). + +We now have a choice - are we happy with the module as it stands, or do we want +to want to make it more generalizable? + +### Altering parameters + +If all we want to do is change a parameter of the analysis, then we can +accomplish this by clicking on the node. Notice when you do this that the +node information contains all of the configuration settings for the previous +five modules, combined in a structured fashion. Due to this, it is incredibly +simple to locate the url of the file that is downloaded, or the name of the +country that we are filtering for, and to change them directly in this +interface. **Doing so will not change the module configuration in your +repository**. GRAPEVNE does not write changes to repositories. If you make +parameter changes to your workflows then you can execute them directly in +GRAPEVNE and/or build them to a new workflow file that can be shared with others. +The original module specification remains unchanged. + +### Expanding modules + +If we want to take a closer look at a module, or if we want to change something +more fundamental in the workflow - for example, we may want to remove the +`Download` function to generalize the module, then we need +to expand the module into its constituent parts. + +Click on the `PlotCovidData` module so that its information +pane appears on the right. Since this is a module made up of other modules, +you will notice a new button has appeared in the top-right-hand corner, named +`Expand`. Click this button now, then tidy up and graph (click `Arrange graph`) +and you should see a familiar sight! + +```{note} +Before we go any further, notice that although we are currently browsing our +own (local) repository, the module is constructed from nodes that are part of +another repository. This allows for easy sharing and reuse of modules. Although +we reference the modules by a github path and `branch` (which provides the +latest updates), you can also reference modules by `commit` in order to ensure +that you always using the same version. +``` + +### Modules with dependencies + +Delete the `Download` module from your graph, as-well as the two `Plot` modules. +Your graph should now contain only two modules: `Filter` and `AggregateByMonth`. +Build the workflow (`Build / Zip`) and save the resulting zip. Unzip it, name +the folder `FilterAndAggregateByMonth`, then place it into the `modules` +folder in your local repository. Refresh GRAPEVNE by clicking `Get Module List`. +You should now see two modules in your repository: `PlotCovidData` and +`FilterAndAggregateByMonth`. + +Clear the graph (`Clear Graph`), then drag `FilterAndAggregateByMonth` in the +main graph area. Notice that the module appears as a standard module with a +single input port (whose name reflects the sub-module [and specific port] to +which it connects). + +Drag a `Download` module into the scene, change its parameters to: +``` +url: https://covid19.who.int/WHO-COVID-19-global-data.csv +filename: data.csv +``` +then connect it to the input port of +`FilterAndAggregateByMonth`. Drag a `Plot` module into the scene and +connect it to the output of `FilterAndAggregateByMonth`. If you `Build and Test` +the workflow at this point then you should see our familiar aggregated data +graph. + +```{note} +At present each module is limited to a single output, meaning that modules do +not currently support branching workflows, so we cannot fully +recreate our previous graph (which plotted directly from the `Filter` +module also). +``` + +This module is much more useful as it provides composite functionality, while +permitting input and output to and from any other compatible modules. + +### Expanding a connected module + +Click on the `FilterAndAggregateByMonth` module to bring up its information +panel. Notice that the `Expand` button is visible as this module is constructed +from sub-modules. Click on `Expand`, then tidy up the graph by clicking +`Arrange graph`. + +Notice that both the input and output connection(s) are preserved. This allows +you to expand modules in your graph without loosing continuity. Such +functionality would allow you to, for example, expand the node and replace +a sub-node (let us say the `AggregateByMonth` module in this case) with +another module. Although the example given here is straightforward, we envisage +these modules / sub-modules representing rich hierarchic architecture of +processes that can be easily swapped in and out (and even tested) within the +editor. diff --git a/docs/getting_started/validation.md b/docs/getting_started/validation.md new file mode 100644 index 00000000..153db288 --- /dev/null +++ b/docs/getting_started/validation.md @@ -0,0 +1,86 @@ +# Validation checks + +There are many forms of validation can be applied to a workflow. These range +from assessing +whether modules are compatibile with one another, to data-depedent validation +procedures. +The latter (data-depedent validation) can generally only be assessed when data +become available in the workflow, and as such are best handled by Utility +modules that are designed to verify file-formats, file-contents, etc. + +Module compatibility on the other hand, can be assessed during construction of +the workflows directly in the GRAPEVNE Builder. This is +achieved by checking which depedencies are being fullfilled - and which are not - +for a particular module. + +We can demonstrate this process by making use of the `Download` and `Filter` +modules from the `Builder Tutorial` set. + +Open the GRAPEVNE Builder, or clear the current graph (performing either action +will automatically delete any previous `Test` builds). Ensure that your +repository is set to `Directory Listing (github)` and `kraemer-lab/vneyard`, +then click `Get Module List`. Once the modules load, filter the list to include +only those belonging to the `Tutorial Builder` group. + +Now, drag the `Download` module into the main graph building area, but this +time leave the default values as they are. Now, drag a `Filter` module into +the graph and connect the `Out` port of the `Download` module to the `In` port +of the `Filter` module. + +_Notice that when you draw this connection, the `Filter` module will turn grey +for several second and, in this case will then turn red_ + +A red module indicates that its dependencies are not being met given its current +inputs. However, we constructed exactly this graph earlier without issue, so +what is the problem? + +The problem lies in the fact that the `Filter` module is (by default) set to +look for (and read) from a file called `data.csv`. As the `Download` module is +currently set to retrieve the Snakemake logo (a `.png` file), this dependency +is not being fullfilled, and so this is indicated by the module turning red. + +To resolve the problem, in this case, we want to change the output filename +from the `Download` module so that it remains compatible with the `Filter` +module. To do this, click on the `Download` module and change the `filename` +parameter (`config-params-filename`) to `data.csv`. Note that the module remains +red for the time being. + +Module dependencies are assessed when new links are made between modules - but +this process is quite computationally intensive, so to re-run the validation +check it is necessary to open the module of interest (by clickng on it; click +on the `Filter` module now), +and then click on the `Validate` button that appears at the top of the +information panel. Do this and you should see the module first turn grey +(indicating that the validation check has started), and then turn Blue, +indicating that all is well and the module dependencies are being met through +its inputs. + +```{note} +In practise, these checks are implemented (in the background) by attempting to +build a workflow graph for the specific module in isolation. This produces a +list of dependencies that are not currently being fullfilled. We then build +another workflow graph, this time including its parent connections (its +immediate, first-order inputs). If those modules fullfill all of the original +missing dependencies (acknowledging that they will report their own [separate] +dependencies that may be missing), then we know that the target module has its +dependencies fullfilled. By applying this philosophy iteratively throughout the +graph, we can assess whether each module is being provided with appropriate data +and can highlight the specific modules that are not. +``` + +It is also worth noting that although the modules are now seen as compatible by +GRAPEVNE, we did not actually change the url of the file that we are downloading, +therefore if we ran this workflow now we would receive an error since the +`Filter` module would attempt to open `data.csv` as a comma-separated-variable +file, whereas it is infact just a renamed `png` image. This highlights the +fundamental difference between +a dependency that we can assess during construction (basic syntax and +requirements checking), versus a data-dependent check, that requires the +workflow to run in order to load and validate data files, to ensure that the +data being received is of the correct type and in the correct format. + +We could in-fact place a very simple data-validation module into our pipeline +in-between the `Download` module and the `Filter` module. Or (as would perhaps +be more fitting in this example) we could apply data validation checks during +the reading of the `csv` file itself, as part of the `Filter` module. Either +option is available depending on the requirements of the situation. diff --git a/nodemapper/src/gui/Builder/Header.tsx b/nodemapper/src/gui/Builder/Header.tsx index d578cc00..31780526 100644 --- a/nodemapper/src/gui/Builder/Header.tsx +++ b/nodemapper/src/gui/Builder/Header.tsx @@ -11,6 +11,7 @@ import { builderLoadNodemap } from "redux/actions"; import { builderSaveNodemap } from "redux/actions"; import { builderImportModule } from "redux/actions"; import { builderBuildAndRun } from "redux/actions"; +import { builderOpenTerminal } from "redux/actions"; import { builderCompileToJson } from "redux/actions"; import { builderNodeDeselected } from "redux/actions"; import { builderCleanBuildFolder } from "redux/actions"; @@ -56,6 +57,7 @@ const Header = () => { // Run - build and run the workflow const btnRun = () => { dispatch(builderBuildAndRun()); + dispatch(builderOpenTerminal()); }; // Clean build folder diff --git a/nodemapper/src/redux/actions/builder.ts b/nodemapper/src/redux/actions/builder.ts index 3c95abf8..4e504482 100644 --- a/nodemapper/src/redux/actions/builder.ts +++ b/nodemapper/src/redux/actions/builder.ts @@ -6,6 +6,8 @@ export const builderSaveNodemap = createAction("builder/save-nodemap"); export const builderCompileToJson = createAction("builder/compile-to-json"); +export const builderOpenTerminal = createAction("builder/open-terminal"); + export const builderBuildAndRun = createAction("builder/build-and-run"); export const builderToggleTerminalVisibility = createAction( diff --git a/nodemapper/src/redux/globals.ts b/nodemapper/src/redux/globals.ts index 17a0a16d..9cf99e14 100644 --- a/nodemapper/src/redux/globals.ts +++ b/nodemapper/src/redux/globals.ts @@ -6,5 +6,5 @@ export const getBackend = () => { // The backend was originally written using a Flask-based REST server. // The REST server is being migrated to an Electron-based application. // However, both backends are still supported for now. - return "rest"; //"rest"; // | electron + return "electron"; //"rest"; // | electron }; diff --git a/nodemapper/src/redux/reducers/builder.ts b/nodemapper/src/redux/reducers/builder.ts index b41fb96c..2c77b786 100644 --- a/nodemapper/src/redux/reducers/builder.ts +++ b/nodemapper/src/redux/reducers/builder.ts @@ -83,6 +83,10 @@ const builderReducer = createReducer(builderStateInit, (builder) => { state.terminal_visibile = !state.terminal_visibile; console.info("[Reducer] " + action.type); }) + .addCase(actions.builderOpenTerminal, (state, action) => { + state.terminal_visibile = true; + console.info("[Reducer] " + action.type); + }) .addCase(actions.builderToggleSettingsVisibility, (state, action) => { state.settings_visible = !state.settings_visible; console.info("[Reducer] " + action.type); From 5c7e5c6ab83291b0d3d124790845c3966898ac74 Mon Sep 17 00:00:00 2001 From: John Brittain Date: Sun, 30 Jul 2023 19:41:56 +0100 Subject: [PATCH 07/11] (docs) add tutorial module descriptions and tutorial challenge --- docs/getting_started/challenge.md | 67 ++++ .../{tutorial-5checkpoints.md => datadeps.md} | 0 docs/getting_started/inside-modules.md | 367 ++++++++++++++++++ docs/getting_started/modules.md | 2 +- docs/getting_started/quickstart.md | 6 +- docs/getting_started/tutorial-4challenge.md | 18 - docs/getting_started/tutorial.md | 2 - docs/index.md | 1 - 8 files changed, 440 insertions(+), 23 deletions(-) create mode 100644 docs/getting_started/challenge.md rename docs/getting_started/{tutorial-5checkpoints.md => datadeps.md} (100%) create mode 100644 docs/getting_started/inside-modules.md delete mode 100644 docs/getting_started/tutorial-4challenge.md diff --git a/docs/getting_started/challenge.md b/docs/getting_started/challenge.md new file mode 100644 index 00000000..3ff42dd5 --- /dev/null +++ b/docs/getting_started/challenge.md @@ -0,0 +1,67 @@ +# Challenge + +This challenge is designed to allow you to engage with GRAPEVNE using your +prefered programming or scripting language. For example, if you are comfortable +programming in R, then try to complete the challenge in R. + +Build the following workflow using GRAPEVNE modules. + +## Outline + +Create a set of modules, and then combine that set in a hierarchy / composition +to achieve the following: download a given file and calculate the following +statistic for each letter of the alphabet: +"number of words beginning with each letter of the alphabet, minus the number +of words ending with that letter". For example, +if there were 20 words in a given file that began with the letter 'a', and 25 +that ended with the letter 'a', then the output file should contain a list with +each letter of the alphabet, accompanied by the calculated metric, so the first +line would read "-5" in this case. The file should contain one line for every +letter of the alphabet. + +As extended exercises: +1) limit the analysis to only consider words of a +specified length (i.e. words between, say, 4-8 letters long) +2) plot the results as a bar graph with a separate bar for each letter of the +alphabet. + +In order to make best use of GRAPEVNE modules and hierarchies, it is recommended +to take the following approach: + +### Module 1: Download a words list + +Create a module (that runs in a conda environment) to download a list of words. + +Here is a list of English words: [https://github.com/dwyl/english-words/blob/master/words_alpha.txt](https://github.com/dwyl/english-words/blob/master/words_alpha.txt) (credit to: [https://github.com/dwyl/english-words](https://github.com/dwyl/english-words)). + + +### Module 2: Count the number of words beginning with each letter of the alphabet + +Create a module that takes as input a text file, and produces as output a file +listing each letter of the alphabet along with the number of words in the input +file that began with that letter. + +### Module 3: Reverse a words + +Create a module that reverses the text on each line of an input file, and +produces as output a file containing the reversed words. + +### Module 4: Subtract two numeric files from one another + +Create a module that takes as input *two* files containing lists of numbers, +and produces as output a single file containing the difference (i.e. `a-b`) +in row-wise fashion (i.e. if file 1 contained lines `1 2 3 4 5` and file two +contained lines `3 1 5 2 3` then the output would be `-2 1 -2 2 2`). + +### Extended Module 1: Filter by word length + +Create a module that takes a text file as input and produces another text file +as output, where the output contains only those words that are within a +specified word length. The parameters should be adjustable but could be, for +example, larger than or equal to 4 letters and shorter than or equal to 8 +letters. + +### Extended Module 2: Bar graph + +Create a module that produces a bar graph given an input file consisting of a +list of numbers. diff --git a/docs/getting_started/tutorial-5checkpoints.md b/docs/getting_started/datadeps.md similarity index 100% rename from docs/getting_started/tutorial-5checkpoints.md rename to docs/getting_started/datadeps.md diff --git a/docs/getting_started/inside-modules.md b/docs/getting_started/inside-modules.md new file mode 100644 index 00000000..0cc7051f --- /dev/null +++ b/docs/getting_started/inside-modules.md @@ -0,0 +1,367 @@ +# Inside the tutorial modules + +So what's inside the pre-prepared modules that we have already come across? +Let's take a look... + +## Tutorial Builder + +### Download + +The `download` module is both the first rule in out tutorial pipeline, and the +simplest to explain. That being said, we will also discuss the folder structure +and accompanying files while introducing this modules. + +`Download` makes use of a command called `wget` in order download (and rename) +a file from a url address. But how do we ensure that this particular command +is available on any computer? For this we make use of virtual environments, in +particular [`conda`](https://docs.conda.io/projects/conda/en/latest/index.html) +environments. These are natively supported by Snakemake so provide a natural +mechanism to handle application dependencies. + +The file structure for this module looks like this: +``` +Download +├── config +│   └── config.yaml +└── workflow + ├── Snakefile + └── envs + └── conda.yaml +``` + +There are three files of interest: +- `Snakemake`: contains the rules for the module, +- `config.yaml`: contains the configuration information, which is displayed in +GRAPEVNE when you click on a module. +- `conda.yaml`: contains information concerning the conda environment + +The `config.yaml` file contains the following: +```yaml +/config/config.yaml +input_namespace: null +output_namespace: out +params: + url: "https://snakemake.github.io/img/jk/logo.png" + filename: "snakemake.png" +``` + +The `Snakefile` file contains the following: +```python +configfile: "config/config.yaml" +from snakemake.remote import AUTO + +indir=config["input_namespace"] +outdir=config["output_namespace"] +params=config["params"] + +rule target: + input: + url=AUTO.remote(params["url"]) + output: + expand( + "results/{outdir}/{filename}", + outdir=outdir, + filename=params["filename"] + ) + log: + f"benchmark/{outdir}.txt" + benchmark: + f"benchmark/{outdir}" + conda: + "envs/conda.yaml" + shell: + """ + wget -c {input.url} -O {output} + """ +``` + +The `conda.yaml` file contains the following: +```yaml +channels: + - bioconda +``` + +The contents of `config.yaml` should look familiar, as it reflects the +module configuration as seen in GRAPEVNE (you can double-check this by +dragging the `Download` module into the main graph area and clicking on it). +Note that the `input_namespace` is explicitely declared `null` to reflect the +fact that this module does not take any inputs, and instead operates a 'source' +module. + +The `conda.yaml` file is a minimial example that simply declares a channel, +but does not list any additional requirements or dependencies. These will +become more interesting in the next module. + +The `Snakefile` contains the rules (or 'rule' in this case) for the module. +The file begins by specifying the location of the `config.yaml` file. +Note that we declare `indir`, `outdir` and `params` as convenience variables +so that we do not have to type `config["input_namespace"]`, etc. throughout +the rest of the file. Also note that Snakefile's may contain python code, and +we take advantage of this to import `AUTO` from `snakemake.remote` to assist +us in declaring the url file as a remote dependency (see below). + +The main rule itself `target` declares an `input` (the location pointed to +by the url), an `output file` (the destination filename to use; note that this +resides in `results/{outdir}`, and a `shell` command that uses `wget` to +retrieve the file at the given url. The use of `AUTO.remote` tells Snakemake +that this file is a remote file and should be downloaded. +There are some additional directives: +`conda` provides the path to the conda environment file; `log` provides the +path to store log-files related to this rule; `benchmark` provides the path +to store benchmark timings for rule execution. + +And that's it for a first module! + +### Filter + +The filter module is more interesting in several respects: it takes an input +namespace from another module, declares application dependencies, and runs +a custom script to process data. The folder structure +of the `Filter` module is similar to that of `Download`, with the addition of a +`resources` folder that contains the custom script, and a dummy input file that +we use for testing, in the `results` folder): +``` +Filter +├── config +│   └── config.yaml +├── resources +│   └── scripts +│   └── filter.py +├── results +│   └── in +│   └── data.csv +└── workflow + ├── Snakefile + └── envs + └── conda.yaml +``` + +The `config.yaml` file contains the following: +```yaml +input_namespace: in +output_namespace: out +params: + Source: "data.csv" + Filters: + Country_code: "ZA" +``` + +The `Snakemake` file contains the following: +```python +configfile: "config/config.yaml" +import json + +indir = config["input_namespace"] +outdir = config["output_namespace"] +params = config["params"] + +rule target: + input: + source=expand( + "results/{indir}/{filename}", + indir=indir, + filename=params["Source"], + ), + script=srcdir("../resources/scripts/filter.py"), + output: + expand( + "results/{outdir}/{filename}", + outdir=outdir, + filename=params["Source"], + ), + log: + "logs/test.log" + params: + filters=json.dumps(params["Filters"]).replace("\"", "\'"), + conda: + "envs/conda.yaml" + shell: + """ + python {input.script} \ + --source="{input.source}" \ + --dest="{output}" \ + --filters="{params.filters}" \ + """ +``` + +Here we see the main Snakefile and associated configuration file. The Snakefile +declares all of the directives that we saw previously, with the addition of +`params` which is used to pass parameters from the configuration file into +the `shell` directive. Also note that the `input` directive associates names +with the two inputs (one is a 'source' file, to be provided by another module; +the other is the script to execute [it is not strictly necessary to declare +these here]). The `shell` directive shows that the rule itself will launch a +`python` script to process the data. Despite the fact that Snakefile's are +natively pythonic themselves, we could launch any application, such as R-scripts +or shell commands from here - we simply use a python script as a convenient +example in this case. + +That filtering script itself is located in `resources/scripts/filter.py` and +contains the following (note that it is not necessary to fully comprehend this +file if you are not familiar with python - just note that we `import` several +dependencies at the top of the file and will need to tell Snakemake where to +find these in a moment...): +```python +import json +import argparse +import pandas as pd + + +def Filter( + source: str, + dest: str, + filters: dict = {}, +) -> None: + # Read data + df = pd.read_csv(source) + + # Filter data + for k, v in filters.items(): + df = df.loc[df[k] == v] + + df.to_csv(dest, index=False) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--source", type=str, default="") + parser.add_argument("--dest", type=str, default="") + parser.add_argument("--filters", type=str, default="") + + Filter( + source=parser.parse_args().source, + dest=parser.parse_args().dest, + filters=json.loads(parser.parse_args().filters.replace("\'", "\"")), + ) +``` + +As mentioned, this file processes the data, but depends on several applications +and libraries - not least of which is `python` itself, which may not be +installed on the host computer. To declare these dependencies (and allow +Snakemake to initialise a conda environment in which to download and prepare +these requirements), we make use of the `conda.yaml` file, which is set-up as +follows: +```yaml +channels: + - bioconda +dependencies: + - python + - pip: + - numpy + - pandas +``` + +The file states that in order for this rule to execute correctly, we need +`python` and `pip` (the third-party package installer for python), as-well as +several python packages, instaled through pip (`numpy` and `pandas`). These +are quite `heavy` dependencies and this module could almost certaintly be +written in a more succinct way, but it provides an information example. + +When the `target` rule is run, the conda environment is set-up and launched. +Then, the shell command is executed that environment, and the environment is +finally closed down once the command finishes executing. The output of the +command is a file, written to `{output}` (or, in this case `results/out/data.csv`). + +This module demonstrates the broad applicability and functionality of Snakemake. +Finally, we note that in order to develop and test the module (outside of +GRAPEVNE) it is useful to specify default values for the input and output +namespaces (usually "in" and "out", respectively), as-well as providing a +surrogate file to simulate incoming data (in this case as small `data.csv` file +is placed in the `results/in` folder). These do not contribute during normal +workflow execution. + +### Aggregate By Month + +The `AggregateByMonth` module follows the same pattern as the `Filter` module, +except for the specifics of the script file and associated dependencies. To +view these files, see the corresponding github folder: [AggregateByMonth](https://github.com/kraemer-lab/vneyard/tree/main/workflows/Tutorial Builder/modules/AggregateByMonth). + +### Select + +Likewise the `Select` module follows a similar pattern to both the `Filter` +and `AggregateByMonth` modules, excepting the specifics of their implementation +in the script file, and associated dependencies. To view these files, see the +corresponding github folder: [Select](https://github.com/kraemer-lab/vneyard/tree/main/workflows/Tutorial%20Builder/modules/Select) + +### Plot + +The `Plot` module also follows a similar pattern to the above modules, but +provides a graphical output. While we have (again) utilised `python` (and +the `matplotlib` library in this case), we could have just as easily implemented +these scripts in R (using packages such as `seabourne`), or made use of any +other languages or packages as needed. We simply need to ensure that the +correct dependencies are listed in the `conda.yaml` environment file. + +For demonstration purposes, the contents of the `resources/scripts/plotcol.py` +file are: +```python +import json +import argparse +import pandas as pd +import matplotlib.pyplot as plt + + +def PlotColumn( + source: str, + col_x: str, + col_y: str, +) -> None: + # Read data + df = pd.read_csv(source) + + df.index = df[col_x] + series = df[col_y] + + # Plot data + fig, ax = plt.subplots() + ax.plot(series) + if len(series) > 12: + ax.xaxis.set_major_locator(plt.MaxNLocator(12)) + ax.set(xlabel=col_x, ylabel=col_y) + plt.xticks(rotation=45) + plt.show() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--source", type=str, default="") + parser.add_argument("--col_x", type=str, default="") + parser.add_argument("--col_y", type=str, default="") + + PlotColumn( + source=parser.parse_args().source, + col_x=parser.parse_args().col_x, + col_y=parser.parse_args().col_y, + ) +``` + +The contents of the `conda.yaml` file are: +```yaml +channels: + - bioconda +dependencies: + - python + - pip: + - numpy + - pandas + - matplotlib + - lxml +``` +noting the inclusion of `matplotlib` to produce the graphical output, which is +installed by `pip` as a dependency. + +## Summary + +Here we have seen that constructing basic modules is incredibly simple, and +each module can contain as much (or as little) sophistication as required. +That is not to say that all modules are straightforward to implement, in +particular modules with data-dependencies offer unique challenges that we +cover elsewhere in the tutorial, but for the vast majority of cases the module +constuction process is flexible and can make use of the extensive resources +and applications already developed to overcome most challenges. + +As a final note, while all of the modules shown here contain only a single +`rule`, this is not a restricton and multi-rule Snakefiles can be included +as modules. With this in mind also note that the `conda` directive applies at +the `rule` level, meaning multiple conda environment files can be provided, +each associated with a separate rule. diff --git a/docs/getting_started/modules.md b/docs/getting_started/modules.md index 9bfc5f66..3521015c 100644 --- a/docs/getting_started/modules.md +++ b/docs/getting_started/modules.md @@ -1,4 +1,4 @@ -# Modules +# Inside modules This page provides a description of the basic block of GRAPEVNE - the Module specification. GRAPEVNE works by connecting 'Modules' together. These modules diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md index ba8ec71b..c41d44fe 100644 --- a/docs/getting_started/quickstart.md +++ b/docs/getting_started/quickstart.md @@ -1,4 +1,4 @@ -# Quickstart +# Tutorial The quickstart guide begins with a walkthrough of a basic workflow, constructed from modules that are available through our online repositories. You will be @@ -19,4 +19,8 @@ maxdepth: 1 builder submodules validation +modules +inside-modules +challenge +datadeps ``` diff --git a/docs/getting_started/tutorial-4challenge.md b/docs/getting_started/tutorial-4challenge.md deleted file mode 100644 index f84f13d5..00000000 --- a/docs/getting_started/tutorial-4challenge.md +++ /dev/null @@ -1,18 +0,0 @@ -# Challenge - -Build the following workflow in Phyloflow modules: - -(graph and rule specifications here) - -Outline: - -- Node 1 (mock input; use `pip` `Random-Word` in `envs` to build a list) -- Node 2 (preprocess input from node 1; filter by word length) -- Node 3 (input from Node1 and Node2; count words beginning with each letter) - -Once complete, change parameters on Node 2 and re-run. - -Once complete, build a replacement for Node 2 (reverse letter sequence) and -make the substitution in the graph. - -Provide expandable solutions. diff --git a/docs/getting_started/tutorial.md b/docs/getting_started/tutorial.md index b9ea7a50..cc631628 100644 --- a/docs/getting_started/tutorial.md +++ b/docs/getting_started/tutorial.md @@ -17,7 +17,5 @@ tutorial-0intro tutorial-1seeds tutorial-2alignment tutorial-3inputs -tutorial-4challenge -tutorial-5checkpoints tutorial-6pipeline ``` diff --git a/docs/index.md b/docs/index.md index 551ae61c..8e18c829 100644 --- a/docs/index.md +++ b/docs/index.md @@ -26,7 +26,6 @@ maxdepth: 1 --- getting_started/installation getting_started/quickstart -getting_started/modules getting_started/tutorial ``` From 5bbc3f0ff260326113fda118caebaa37b00eab78 Mon Sep 17 00:00:00 2001 From: John Brittain Date: Sun, 30 Jul 2023 19:42:43 +0100 Subject: [PATCH 08/11] Enable installer builds --- electron-app/forge.config.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/electron-app/forge.config.js b/electron-app/forge.config.js index e1be506a..8b231b0a 100644 --- a/electron-app/forge.config.js +++ b/electron-app/forge.config.js @@ -11,7 +11,7 @@ module.exports = { }, // Installers - /*{ + { name: "@electron-forge/maker-squirrel", config: {}, }, @@ -22,7 +22,7 @@ module.exports = { { name: "@electron-forge/maker-rpm", config: {}, - },*/ + }, ], publishers: [ { From 4417c8ff96e2fd53328991d35d2fb373dda9c0a5 Mon Sep 17 00:00:00 2001 From: John Brittain Date: Sun, 30 Jul 2023 21:23:10 +0100 Subject: [PATCH 09/11] Fix misdirected snakefile dict (copy->deepcopy for snakefile->config structure during build) --- builder/builder/builder.py | 5 +++-- electron-app/src/main.ts | 3 +-- nodemapper/src/redux/globals.ts | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/builder/builder/builder.py b/builder/builder/builder.py index 9707e668..57ab3b47 100644 --- a/builder/builder/builder.py +++ b/builder/builder/builder.py @@ -1,6 +1,7 @@ import argparse import json import pathlib +import copy import re import shutil from typing import List @@ -84,7 +85,7 @@ def _GetConfigFileinfo(self) -> Union[str, dict]: return filename if isinstance(self.snakefile, dict): # Remote file - c = self.snakefile.copy() + c = copy.deepcopy(self.snakefile) c["kwargs"]["path"] = c["kwargs"]["path"].replace( workflow_filename, config_filename ) @@ -635,7 +636,7 @@ def BuildFromJSON( config: dict, singlefile: bool = False, expand: bool = True, - build_path: str = "", + build_path: str = "build", clean_build: bool = True, partial_build: bool = False, # Don't throw an error if node is missing ) -> Tuple[Union[Tuple[str, str], bytes], Model]: diff --git a/electron-app/src/main.ts b/electron-app/src/main.ts index 127defe2..d8de0259 100644 --- a/electron-app/src/main.ts +++ b/electron-app/src/main.ts @@ -4,8 +4,6 @@ import path from "path"; import * as os from "node:os"; import * as pty from "node-pty"; -const shell = os.platform() === "win32" ? "powershell.exe" : "bash"; - const createWindow = () => { const win = new BrowserWindow({ width: 800, @@ -38,6 +36,7 @@ app.whenReady().then(() => { // Setup pseudo terminal //////////////////////// + const shell = os.platform() === "win32" ? "powershell.exe" : (process.env.SHELL || "bash"); const ptyProcess = pty.spawn(shell, [], { name: "xterm-color", cols: 80, diff --git a/nodemapper/src/redux/globals.ts b/nodemapper/src/redux/globals.ts index 9cf99e14..17a0a16d 100644 --- a/nodemapper/src/redux/globals.ts +++ b/nodemapper/src/redux/globals.ts @@ -6,5 +6,5 @@ export const getBackend = () => { // The backend was originally written using a Flask-based REST server. // The REST server is being migrated to an Electron-based application. // However, both backends are still supported for now. - return "electron"; //"rest"; // | electron + return "rest"; //"rest"; // | electron }; From 4d7bfe0b49684fcaeae63720c61087ee58971802 Mon Sep 17 00:00:00 2001 From: John Brittain Date: Sun, 30 Jul 2023 22:14:54 +0100 Subject: [PATCH 10/11] Add auto-validation option; fix issue with validation when lists exist in config file --- builder/builder/builder.py | 5 ++- .../src/gui/Builder/components/BodyWidget.tsx | 1 - .../Builder/components/BuilderSettings.tsx | 33 +++++++++++++++---- nodemapper/src/redux/actions/builder.ts | 4 +++ nodemapper/src/redux/middleware/builder.ts | 21 ++++++++++-- nodemapper/src/redux/reducers/builder.ts | 10 ++++++ 6 files changed, 62 insertions(+), 12 deletions(-) diff --git a/builder/builder/builder.py b/builder/builder/builder.py index 57ab3b47..a106fdc2 100644 --- a/builder/builder/builder.py +++ b/builder/builder/builder.py @@ -595,7 +595,10 @@ def parse_struct(yl: dict): c += f'["{key}"]={{}}\n' # Create empty dict c += "\n".join([f'["{key}"]{v}' for v in vv]) + "\n" elif isinstance(value, list): - raise Exception("Lists not supported in config") + c += f'["{key}"]=[]\n' # Create empty list + for item in value: + c += f'["{key}"].append("{item}")\n' + # raise Exception("Lists not supported in config") elif not value: # Null c += f'["{key}"]="None"\n' diff --git a/nodemapper/src/gui/Builder/components/BodyWidget.tsx b/nodemapper/src/gui/Builder/components/BodyWidget.tsx index e14d7599..853fd359 100644 --- a/nodemapper/src/gui/Builder/components/BodyWidget.tsx +++ b/nodemapper/src/gui/Builder/components/BodyWidget.tsx @@ -167,7 +167,6 @@ export const BodyWidget = (props: BodyWidgetProps) => { background: "rgb(20, 20, 20)", flexGrow: "0", flexShrink: "0", - width: "95%", boxSizing: "border-box", }} onChange={onChangeOrgList} diff --git a/nodemapper/src/gui/Builder/components/BuilderSettings.tsx b/nodemapper/src/gui/Builder/components/BuilderSettings.tsx index f7a9c9f9..13ff1ca1 100644 --- a/nodemapper/src/gui/Builder/components/BuilderSettings.tsx +++ b/nodemapper/src/gui/Builder/components/BuilderSettings.tsx @@ -2,8 +2,9 @@ import React from "react"; import { useState } from "react"; import { useAppDispatch } from "redux/store/hooks"; import { useAppSelector } from "redux/store/hooks"; -import { builderSetRepositoryTarget } from "redux/actions"; import { builderSetSnakemakeArgs } from "redux/actions"; +import { builderSetRepositoryTarget } from "redux/actions"; +import { builderSetAutoValidateConnections } from "redux/actions"; const default_input_size = 35; @@ -38,6 +39,7 @@ const BuilderSettings = () => { useAppSelector((state) => state.builder.repo) ).listing_type; const snakemake_args = useAppSelector((state) => state.builder.snakemake_args); + const auto_validate_connections = useAppSelector((state) => state.builder.auto_validate_connections); const selectRepositoryTarget = (target) => { let repo = {}; @@ -73,6 +75,10 @@ const BuilderSettings = () => { dispatch(builderSetSnakemakeArgs(args)); }; + const SetAutoValidateConnections = (value: boolean) => { + dispatch(builderSetAutoValidateConnections(value)); + }; + return ( <>
{

Snakemake arguments

- SetSnakemakeArgs(e.target.value)} - /> +

+ SetSnakemakeArgs(e.target.value)} + /> +

+
+

Validation

+

+ SetAutoValidateConnections(e.target.checked)} + /> + +

); diff --git a/nodemapper/src/redux/actions/builder.ts b/nodemapper/src/redux/actions/builder.ts index 4e504482..9d0d88f0 100644 --- a/nodemapper/src/redux/actions/builder.ts +++ b/nodemapper/src/redux/actions/builder.ts @@ -4,6 +4,10 @@ export const builderLoadNodemap = createAction("builder/load-nodemap"); export const builderSaveNodemap = createAction("builder/save-nodemap"); +export const builderSetAutoValidateConnections = createAction("builder/set-auto-validate-connections"); + +export const builderToggleAutoValidateConnections = createAction("builder/toggle-auto-validate-connections"); + export const builderCompileToJson = createAction("builder/compile-to-json"); export const builderOpenTerminal = createAction("builder/open-terminal"); diff --git a/nodemapper/src/redux/middleware/builder.ts b/nodemapper/src/redux/middleware/builder.ts index 6c7dd953..77a91297 100644 --- a/nodemapper/src/redux/middleware/builder.ts +++ b/nodemapper/src/redux/middleware/builder.ts @@ -44,10 +44,17 @@ export const builderMiddleware = ({ getState, dispatch }) => { Redraw(); break; case "builder/add-link": - AddLink(action, dispatch); + AddLink( + action, + getState().builder.auto_validate_connections, + dispatch, + ); break; case "builder/check-node-dependencies": - CheckNodeDependencies(action.payload, dispatch); + CheckNodeDependencies( + action.payload, + dispatch, + ); break; case "builder/node-selected": NodeSelected(action, dispatch, getState); @@ -203,7 +210,15 @@ const Redraw = () => { interface IPayloadLink { payload: DefaultLinkModel; // Non-serialisable object; consider alternatives } -const AddLink = async (action: IPayloadLink, dispatch: TPayloadString) => { +const AddLink = async ( + action: IPayloadLink, + auto_validate_connections: boolean, + dispatch: TPayloadString, +) => { + // Skip check if auto-validation is disabled + if (!auto_validate_connections) { + return; + } // Determine which is the input (vs output) port (ordering is drag-dependent) const app = BuilderEngine.Instance; const link = action.payload; diff --git a/nodemapper/src/redux/reducers/builder.ts b/nodemapper/src/redux/reducers/builder.ts index 2c77b786..6140e095 100644 --- a/nodemapper/src/redux/reducers/builder.ts +++ b/nodemapper/src/redux/reducers/builder.ts @@ -11,6 +11,7 @@ interface IBuilderState { terminal_visibile: boolean; settings_visible: boolean; snakemake_args: string; + auto_validate_connections: boolean; } // State @@ -27,6 +28,7 @@ const builderStateInit: IBuilderState = { terminal_visibile: false, settings_visible: false, snakemake_args: "--cores 1 --use-conda $(snakemake --list)", + auto_validate_connections: false, }; // Nodemap @@ -94,6 +96,14 @@ const builderReducer = createReducer(builderStateInit, (builder) => { .addCase(actions.builderSetSnakemakeArgs, (state, action) => { state.snakemake_args = action.payload; console.info("[Reducer] " + action.type); + }) + .addCase(actions.builderSetAutoValidateConnections, (state, action) => { + state.auto_validate_connections = action.payload; + console.info("[Reducer] " + action.type); + }) + .addCase(actions.builderToggleAutoValidateConnections, (state, action) => { + state.auto_validate_connections = !state.auto_validate_connections; + console.info("[Reducer] " + action.type); }); }); From a228165c45389a907e7afbc1569364c870c695f7 Mon Sep 17 00:00:00 2001 From: John Brittain Date: Sun, 30 Jul 2023 22:17:12 +0100 Subject: [PATCH 11/11] Code formatting --- builder/builder/builder.py | 2 +- docs/getting_started/challenge.md | 12 +-- docs/getting_started/inside-modules.md | 29 ++++-- docs/getting_started/submodules.md | 3 + electron-app/src/main.ts | 5 +- .../Builder/components/BuilderSettings.tsx | 90 ++++++++++--------- .../Builder/components/NodeInfoRenderer.tsx | 4 +- nodemapper/src/redux/actions/builder.ts | 8 +- nodemapper/src/redux/middleware/builder.ts | 15 ++-- 9 files changed, 97 insertions(+), 71 deletions(-) diff --git a/builder/builder/builder.py b/builder/builder/builder.py index a106fdc2..ac1663d8 100644 --- a/builder/builder/builder.py +++ b/builder/builder/builder.py @@ -1,7 +1,7 @@ import argparse +import copy import json import pathlib -import copy import re import shutil from typing import List diff --git a/docs/getting_started/challenge.md b/docs/getting_started/challenge.md index 3ff42dd5..13185ba8 100644 --- a/docs/getting_started/challenge.md +++ b/docs/getting_started/challenge.md @@ -20,10 +20,11 @@ line would read "-5" in this case. The file should contain one line for every letter of the alphabet. As extended exercises: -1) limit the analysis to only consider words of a -specified length (i.e. words between, say, 4-8 letters long) -2) plot the results as a bar graph with a separate bar for each letter of the -alphabet. + +1. limit the analysis to only consider words of a + specified length (i.e. words between, say, 4-8 letters long) +2. plot the results as a bar graph with a separate bar for each letter of the + alphabet. In order to make best use of GRAPEVNE modules and hierarchies, it is recommended to take the following approach: @@ -33,7 +34,6 @@ to take the following approach: Create a module (that runs in a conda environment) to download a list of words. Here is a list of English words: [https://github.com/dwyl/english-words/blob/master/words_alpha.txt](https://github.com/dwyl/english-words/blob/master/words_alpha.txt) (credit to: [https://github.com/dwyl/english-words](https://github.com/dwyl/english-words)). - ### Module 2: Count the number of words beginning with each letter of the alphabet @@ -48,7 +48,7 @@ produces as output a file containing the reversed words. ### Module 4: Subtract two numeric files from one another -Create a module that takes as input *two* files containing lists of numbers, +Create a module that takes as input _two_ files containing lists of numbers, and produces as output a single file containing the difference (i.e. `a-b`) in row-wise fashion (i.e. if file 1 contained lines `1 2 3 4 5` and file two contained lines `3 1 5 2 3` then the output would be `-2 1 -2 2 2`). diff --git a/docs/getting_started/inside-modules.md b/docs/getting_started/inside-modules.md index 0cc7051f..a4f6996e 100644 --- a/docs/getting_started/inside-modules.md +++ b/docs/getting_started/inside-modules.md @@ -19,6 +19,7 @@ environments. These are natively supported by Snakemake so provide a natural mechanism to handle application dependencies. The file structure for this module looks like this: + ``` Download ├── config @@ -30,12 +31,14 @@ Download ``` There are three files of interest: -- `Snakemake`: contains the rules for the module, + +- `Snakemake`: contains the rules for the module, - `config.yaml`: contains the configuration information, which is displayed in -GRAPEVNE when you click on a module. + GRAPEVNE when you click on a module. - `conda.yaml`: contains information concerning the conda environment The `config.yaml` file contains the following: + ```yaml /config/config.yaml input_namespace: null @@ -46,6 +49,7 @@ params: ``` The `Snakefile` file contains the following: + ```python configfile: "config/config.yaml" from snakemake.remote import AUTO @@ -76,6 +80,7 @@ rule target: ``` The `conda.yaml` file contains the following: + ```yaml channels: - bioconda @@ -120,6 +125,7 @@ a custom script to process data. The folder structure of the `Filter` module is similar to that of `Download`, with the addition of a `resources` folder that contains the custom script, and a dummy input file that we use for testing, in the `results` folder): + ``` Filter ├── config @@ -137,6 +143,7 @@ Filter ``` The `config.yaml` file contains the following: + ```yaml input_namespace: in output_namespace: out @@ -147,6 +154,7 @@ params: ``` The `Snakemake` file contains the following: + ```python configfile: "config/config.yaml" import json @@ -201,6 +209,7 @@ contains the following (note that it is not necessary to fully comprehend this file if you are not familiar with python - just note that we `import` several dependencies at the top of the file and will need to tell Snakemake where to find these in a moment...): + ```python import json import argparse @@ -241,14 +250,15 @@ installed on the host computer. To declare these dependencies (and allow Snakemake to initialise a conda environment in which to download and prepare these requirements), we make use of the `conda.yaml` file, which is set-up as follows: + ```yaml channels: - bioconda dependencies: - python - pip: - - numpy - - pandas + - numpy + - pandas ``` The file states that in order for this rule to execute correctly, we need @@ -294,6 +304,7 @@ correct dependencies are listed in the `conda.yaml` environment file. For demonstration purposes, the contents of the `resources/scripts/plotcol.py` file are: + ```python import json import argparse @@ -336,17 +347,19 @@ if __name__ == "__main__": ``` The contents of the `conda.yaml` file are: + ```yaml channels: - bioconda dependencies: - python - pip: - - numpy - - pandas - - matplotlib - - lxml + - numpy + - pandas + - matplotlib + - lxml ``` + noting the inclusion of `matplotlib` to produce the graphical output, which is installed by `pip` as a dependency. diff --git a/docs/getting_started/submodules.md b/docs/getting_started/submodules.md index 0eb6bee9..c16b6285 100644 --- a/docs/getting_started/submodules.md +++ b/docs/getting_started/submodules.md @@ -23,6 +23,7 @@ Repositories in GRAPEVNE follow a strict folder structure, which we must now make in order for GRAPEVNE to recognise your module. Create the following folder structure in the directory of your choice (e.g. in `Downloads`) and copy your module into the `modules` folder: + ``` vneyard <--- root repository folder └── workflows <--- workflows folder (required) @@ -112,10 +113,12 @@ single input port (whose name reflects the sub-module [and specific port] to which it connects). Drag a `Download` module into the scene, change its parameters to: + ``` url: https://covid19.who.int/WHO-COVID-19-global-data.csv filename: data.csv ``` + then connect it to the input port of `FilterAndAggregateByMonth`. Drag a `Plot` module into the scene and connect it to the output of `FilterAndAggregateByMonth`. If you `Build and Test` diff --git a/electron-app/src/main.ts b/electron-app/src/main.ts index d8de0259..c69df657 100644 --- a/electron-app/src/main.ts +++ b/electron-app/src/main.ts @@ -36,7 +36,8 @@ app.whenReady().then(() => { // Setup pseudo terminal //////////////////////// - const shell = os.platform() === "win32" ? "powershell.exe" : (process.env.SHELL || "bash"); + const shell = + os.platform() === "win32" ? "powershell.exe" : process.env.SHELL || "bash"; const ptyProcess = pty.spawn(shell, [], { name: "xterm-color", cols: 80, @@ -54,7 +55,7 @@ app.whenReady().then(() => { win.webContents.send("terminal/receive-data", data); }); // Set PS1 prompt (to show current folder) - terminal_sendData("export PS1=\"\\e[0;32m\\W > \\e[m\"\n"); + terminal_sendData('export PS1="\\e[0;32m\\W > \\e[m"\n'); //////////////////// // Setup IPC handles diff --git a/nodemapper/src/gui/Builder/components/BuilderSettings.tsx b/nodemapper/src/gui/Builder/components/BuilderSettings.tsx index 13ff1ca1..654667a7 100644 --- a/nodemapper/src/gui/Builder/components/BuilderSettings.tsx +++ b/nodemapper/src/gui/Builder/components/BuilderSettings.tsx @@ -38,9 +38,13 @@ const BuilderSettings = () => { const listing_type = JSON.parse( useAppSelector((state) => state.builder.repo) ).listing_type; - const snakemake_args = useAppSelector((state) => state.builder.snakemake_args); - const auto_validate_connections = useAppSelector((state) => state.builder.auto_validate_connections); - + const snakemake_args = useAppSelector( + (state) => state.builder.snakemake_args + ); + const auto_validate_connections = useAppSelector( + (state) => state.builder.auto_validate_connections + ); + const selectRepositoryTarget = (target) => { let repo = {}; switch (target) { @@ -81,47 +85,51 @@ const BuilderSettings = () => { return ( <> -
-

Repository

-
- -
-
- -
-
-

Snakemake arguments

-

- SetSnakemakeArgs(e.target.value)} - /> -

-
-

Validation

-

- SetAutoValidateConnections(e.target.checked)} - /> - -

+ > +

Repository

+
+ +
+
+
+
+

Snakemake arguments

+

+ SetSnakemakeArgs(e.target.value)} + /> +

+
+

Validation

+

+ SetAutoValidateConnections(e.target.checked)} + /> + +

+
); }; diff --git a/nodemapper/src/gui/Builder/components/NodeInfoRenderer.tsx b/nodemapper/src/gui/Builder/components/NodeInfoRenderer.tsx index 78e46bf2..783bff77 100644 --- a/nodemapper/src/gui/Builder/components/NodeInfoRenderer.tsx +++ b/nodemapper/src/gui/Builder/components/NodeInfoRenderer.tsx @@ -33,7 +33,7 @@ const ValidateButton = (props: ValidateButtonProps) => { const btnValidate = () => { dispatch(builderCheckNodeDependencies(props.nodename)); - } + }; return (