diff --git a/docs/publications/biohackathon_2024/paper.bib b/docs/publications/biohackathon_2024/paper.bib new file mode 100644 index 00000000..ee26e914 --- /dev/null +++ b/docs/publications/biohackathon_2024/paper.bib @@ -0,0 +1,158 @@ +@article{10.1093/nar/gkac247, + author = {{The Galaxy Community}}, + title = "{The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2022 update}", + journal = {Nucleic Acids Research}, + volume = {50}, + number = {W1}, + pages = {W345-W351}, + year = {2022}, + month = {04}, + abstract = "{Galaxy is a mature, browser accessible workbench for scientific computing. It enables scientists to share, analyze and visualize their own data, with minimal technical impediments. A thriving global community continues to use, maintain and contribute to the project, with support from multiple national infrastructure providers that enable freely accessible analysis and training services. The Galaxy Training Network supports free, self-directed, virtual training with \\>230 integrated tutorials. Project engagement metrics have continued to grow over the last 2 years, including source code contributions, publications, software packages wrapped as tools, registered users and their daily analysis jobs, and new independent specialized servers. Key Galaxy technical developments include an improved user interface for launching large-scale analyses with many files, interactive tools for exploratory data analysis, and a complete suite of machine learning tools. Important scientific developments enabled by Galaxy include Vertebrate Genome Project (VGP) assembly workflows and global SARS-CoV-2 collaborations.}", + issn = {0305-1048}, + doi = {10.1093/nar/gkac247}, + url = {https://doi.org/10.1093/nar/gkac247}, + eprint = {https://academic.oup.com/nar/article-pdf/50/W1/W345/45189566/gkac247.pdf}, +} + +@article{black2021edam, + title={EDAM: The bioscientific data analysis ontology (update 2021)[version 1; not peer reviewed]}, + author={Black, Melissa and Lamothe, Lucie and {Hager Eldakroury} and Kierkegaard, Mads and {Ankita Priya} and Machinda, Anne and Khanduja, Uttam Singh and {Drashti Patoliya} and {Rashika Rathi} and {Tawah Peggy Che Nico} and Umutesi, Gloria and Blankenburg, Claudia and Op, Anita and Chieke, Precious and {Omodolapo Babatunde} and Laurie, Steve and Neumann, Steffen and Schw\"{a}mmle, Veit and Kuzmin, Ivan and Hunter, Chris and Karr, Jonathan and Ison, Jon and Gaignard, Alban and Brancotte, Bryan and Ménager, Hervé and {Matúš Kalaš}}, + year={2022}, + doi={10.7490/f1000research.1118900.1}, + journal={F1000Research}, + publisher={F1000} +} + +@article{biotoolsSchema, + author = {Ison, Jon and Ienasescu, Hans and Rydza, Emil and Chmura, Piotr and Rapacki, Kristoffer and Gaignard, Alban and Schwämmle, Veit and van Helden, Jacques and Kala{\v{s}}, Mat{\'u}{\v{s}} and M{\'e}nager, Herv{\'e}}, + title = "{biotoolsSchema: a formalized schema for bioinformatics software description}", + journal = {GigaScience}, + volume = {10}, + number = {1}, + pages = {giaa157}, + year = {2021}, + month = {01}, + issn = {2047-217X}, + doi = {10.1093/gigascience/giaa157}, + url = {https://doi.org/10.1093/gigascience/giaa157}, + eprint = {https://academic.oup.com/gigascience/article-pdf/10/1/giaa157/36126150/giaa157.pdf}, +} + +@article{Ison2019, + title = {The bio.tools registry of software tools and data resources for the life sciences}, + volume = {20}, + ISSN = {1474-760X}, + url = {http://doi.org/10.1186/s13059-019-1772-6}, + DOI = {10.1186/s13059-019-1772-6}, + number = {1}, + journal = {Genome Biology}, + publisher = {Springer Science and Business Media LLC}, + author = {Ison, Jon and Ienasescu, Hans and Chmura, Piotr and Rydza, Emil and M{\'e}nager, Herv{\'e} and Kala{\v{s}}, Mat{\'u}{\v{s}} and Schw\"{a}mmle, Veit and Gr\"{u}ning, Bj\"{o}rn and Beard, Niall and Lopez, Rodrigo and Duvaud, Severine and Stockinger, Heinz and Persson, Bengt and Vařeková, Radka Svobodová and Raček, Tomáš and Vondrášek, Jiří and Peterson, Hedi and Salumets, Ahto and Jonassen, Inge and Hooft, Rob and Nyr\"{o}nen, Tommi and Valencia, Alfonso and Capella, Salvador and Gelpí, Josep and Zambelli, Federico and Savakis, Babis and Leskošek, Brane and Rapacki, Kristoffer and Blanchet, Christophe and Jimenez, Rafael and Oliveira, Arlindo and Vriend, Gert and Collin, Olivier and van Helden, Jacques and Løngreen, Peter and Brunak, Søren}, + year = {2019}, + month = aug +} + +@article {Bray2022.03.13.483965, + author = {Simon Bray and Matthias Bernt and Nicola Soranzo and Marius van den Beek and B{\'e}r{\'e}nice Batut and Helena Rasche and Martin {\v C}ech and Peter Cock and Anton Nekrutenko and Bj{\"o}rn Gr{\"u}ning and John Chilton}, + title = {Planemo: a command-line toolkit for developing, deploying, and executing scientific data analyses}, + elocation-id = {2022.03.13.483965}, + year = {2022}, + doi = {10.1101/2022.03.13.483965}, + publisher = {Cold Spring Harbor Laboratory}, + abstract = {There are thousands of well-maintained high-quality open-source software utilities for all aspects of scientific data analysis. For over a decade, the Galaxy Project has been providing computational infrastructure and a unified user interface for these tools to make them accessible to a wide range of researchers. In order to streamline the process of integrating tools and constructing workflows as much as possible, we have developed Planemo, a software development kit for tool and workflow developers and Galaxy power users. Here we outline Planemo{\textquoteright}s implementation and describe its broad range of functionality for designing, testing and executing Galaxy tools, workflows and training material. In addition, we discuss the philosophy underlying Galaxy tool and workflow development, and how Planemo encourages the use of development best practices, such as test-driven development, by its users, including those who are not professional software developers. Planemo is a mature project widely used within the Galaxy community which has been downloaded over 80,000 times.Competing Interest StatementThe authors have declared no competing interest.}, + URL = {https://www.biorxiv.org/content/early/2022/03/14/2022.03.13.483965}, + eprint = {https://www.biorxiv.org/content/early/2022/03/14/2022.03.13.483965.full.pdf}, + journal = {bioRxiv} +} + +@misc{datatables, + title = {{DataTables} {\textbar} {Table} plug-in for {jQuery}}, + url = {https://datatables.net/}, + urldate = {2023-11-28}, +} + +@misc{conda, + title = {Anaconda Software Distribution}, + url = {https://anaconda.com}, + urldate = {2016-11-01} +} + +@article{edamBrowser, doi = {10.21105/joss.00698}, url = {https://doi.org/10.21105/joss.00698}, year = {2018}, publisher = {The Open Journal}, volume = {3}, number = {27}, pages = {698}, author = {Bryan Brancotte and Christophe Blanchet and Hervé Ménager}, title = {A reusable tree-based web-visualization to browse EDAM ontology, and contribute to it.}, journal = {Journal of Open Source Software} } + +@misc{edamBrowserCode, + doi = {10.5281/zenodo.5808818}, + url = {https://zenodo.org/doi/10.5281/zenodo.5808818}, + author = {Eldakroury, Hager and Dhamija, Sakshi and Rathi, Rashika and Patoliya, Drashti and Nkwuda, Sunday Cletus and Singh, Guneet and Yadav, Pooja and D'oleo, Kelly and Cherop, Marlene and Che Nico, Tawah Peggy and Kalaš, Matúš and Ménager, Hervé and Brancotte, Bryan}, + keywords = {Ontology browser, Biosciences, Imaging, Machine learning, Domain ontology, EDAM}, + language = {en}, + title = {{EDAM Browser 2.0.0: Browsing multiple versions of EDAM}}, + publisher = {Zenodo}, + year = {2021}, + copyright = {MIT License} +} + +@article{batut_community-driven_2018, + title = {Community-{Driven} {Data} {Analysis} {Training} for {Biology}}, + volume = {6}, + issn = {24054712}, + url = {https://linkinghub.elsevier.com/retrieve/pii/S2405471218302308}, + doi = {10.1016/j.cels.2018.05.012}, + language = {en}, + number = {6}, + urldate = {2024-02-26}, + journal = {Cell Systems}, + author = {Batut, Bérénice and Hiltemann, Saskia and Bagnacani, Andrea and Baker, Dannon and Bhardwaj, Vivek and Blank, Clemens and Bretaudeau, Anthony and Brillet-Guéguen, Loraine and Čech, Martin and Chilton, John and Clements, Dave and Doppelt-Azeroual, Olivia and Erxleben, Anika and Freeberg, Mallory Ann and Gladman, Simon and Hoogstrate, Youri and Hotz, Hans-Rudolf and Houwaart, Torsten and Jagtap, Pratik and Larivière, Delphine and Le Corguillé, Gildas and Manke, Thomas and Mareuil, Fabien and Ramírez, Fidel and Ryan, Devon and Sigloch, Florian Christoph and Soranzo, Nicola and Wolff, Joachim and Videm, Pavankumar and Wolfien, Markus and Wubuli, Aisanjiang and Yusuf, Dilmurat and Taylor, James and Backofen, Rolf and Nekrutenko, Anton and Grüning, Björn}, + month = jun, + year = {2018}, + pages = {752--758.e1}, +} + +@misc{RSEc, + doi = {10.7490/f1000research.1119604.1}, + url = {https://f1000research.com/slides/12-1044}, + author = {Ienasescu, Hans and Capella-Gutiérrez, Salvador and Coppens, Frederik and Fernández, José María and Gaignard, Alban and Goble, Carole and Gr\"{u}ning, Bj\"{o}rn and Gustafsson, Johan and Gelpi, Josep Ll and Harrow, Jennifer and Manos, Steven and Miura, Kota and M\"{o}ller, Steffen and Owen, Stuart and Paul-Gilloteaux, Perrine and Peterson, Hedi and Pitoulias, Manthos and Tedds, Jonathan and Repchevsky, Dmitri and Zambelli, Federico and Zharkov, Oleg and Kala\v{s}, Mat\'{u}\v{s} and Ménager, Hervé}, + title = {The ELIXIR research software ecosystem: an open software metadata commons (BOSC track) [version 1; not peer reviewed]}, + publisher = {F1000 Research}, + year = {2023} +} + +@article{blankenberg2014dissemination, + title={{Dissemination of scientific software with Galaxy ToolShed}}, + author={Blankenberg, Daniel and Von Kuster, Gregory and Bouvier, Emil and Baker, Dannon and Afgan, Enis and Stoler, Nicholas and Galaxy Team and Taylor, James and Nekrutenko, Anton}, + journal={Genome Biology}, + volume={15}, + pages={1--3}, + year={2014}, + publisher={Springer} +} + +@misc{dev_community_tool_table, + author = {Bérénice Batut}, + title = {{Creation of an interactive Galaxy tools table for your community (Galaxy Training Materials)}}, + year = {2024}, + url = {https://training.galaxyproject.org/training-material/topics/dev/tutorials/community-tool-table/tutorial.html}, + note = {Online; accessed Thu Mar 14 2024} +} + +@article{Hiltemann_2023, + doi = {10.1371/journal.pcbi.1010752}, + url = {https://doi.org/10.1371%2Fjournal.pcbi.1010752}, + year = 2023, + month = {jan}, + publisher = {Public Library of Science ({PLoS})}, + volume = {19}, + number = {1}, + pages = {e1010752}, + author = {Saskia Hiltemann and Helena Rasche and Simon Gladman and Hans-Rudolf Hotz and Delphine Larivi{\`{e}}re and Daniel Blankenberg and Pratik D. Jagtap and Thomas Wollmann and Anthony Bretaudeau and Nadia Gou{\'{e}} and Timothy J. Griffin and Coline Royaux and Yvan Le Bras and Subina Mehta and Anna Syme and Frederik Coppens and Bert Droesbeke and Nicola Soranzo and Wendi Bacon and Fotis Psomopoulos and Crist{\'{o}}bal Gallardo-Alba and John Davis and Melanie Christine Föll and Matthias Fahrner and Maria A. Doyle and Beatriz Serrano-Solano and Anne Claire Fouilloux and Peter van Heusden and Wolfgang Maier and Dave Clements and Florian Heyl and Björn Grüning and B{\'{e}}r{\'{e}}nice Batut and}, + editor = {Francis Ouellette}, + title = {{Galaxy Training: A powerful framework for teaching!}}, + journal = {PLoS Computational Biology} +} + +@misc{dev_tool_annotation, + author = {Bérénice Batut and Johan Gustafsson and Paul Zierep}, + title = {{Adding and updating best practice metadata for Galaxy tools using the bio.tools registry (Galaxy Training Materials)}}, + year = {2024}, + url = {https://training.galaxyproject.org/training-material/topics/dev/tutorials/tool-annotation/tutorial.html}, + note = {Online; accessed Thu Mar 14 2024} +} diff --git a/docs/publications/biohackathon_2024/paper.md b/docs/publications/biohackathon_2024/paper.md new file mode 100644 index 00000000..49506c0a --- /dev/null +++ b/docs/publications/biohackathon_2024/paper.md @@ -0,0 +1,104 @@ +--- +title: 'Galaxy CoDex - Ensuring Galaxy community sustainability through resource aggregation and annotation' +title_short: 'BH24EU project 11: Galaxy CoDex' +tags: + - Findability + - Galaxy + - Community-specific Galaxy tools + - Tools + - EDAM + - bio.tools + - Metadata + - biohackeu24 +authors: + - name: Bérénice Batut + orcid: 0000-0001-9852-1987 + affiliation: 1, 2, a + - name: Wendi Bacon + orcid: 0000-0002-8170-8806 + affiliation: 3, a + - name: Paul Zierep + orcid: 0000-0003-2982-388X + affiliation: 1, a + - name: Matúš Kalaš + orcid: 0000-0002-1509-4981 + affiliation: 4 + - name: Wai Cheng Thang + orchid: 0000-0002-1480-3563 + affiliation: 5, 6 + - name: Ove Johan Ragnar Gustafsson + orcid: 0000-0002-2977-5032 + affiliation: 7 +affiliations: + - name: Bioinformatics Group, Department of Computer Science, University of Freiburg, Freiburg, Germany + index: 1 + - name: Institut Français de Bioinformatique, CNRS UAR 3601, Évry, France & Mésocentre Clermont-Auvergne, Université Clermont Auvergne, Aubiere, France + index: 2 + - name: The Open University, Milton Keynes, United Kingdom + index: 3 + - name: Department of Informatics, University of Bergen, Norway; and ELIXIR Norway + index: 4 + - name: Queensland Cyber Infrastructure Foundation (QCIF), Australia + index: 5 + - name: Institute of Molecular Bioscience, University of Queensland, St Lucia, Australia + index: 6 + - name: Australian BioCommons, University of Melbourne, Melbourne, Victoria, Australia + index: 7 + - name: These authors contributed equally to this work + index: a +date: 8 November 2024 +bibliography: paper.bib +event: BH24EU +biohackathon_name: "ELIXIR BioHackathon Europe 2024" +biohackathon_url: "https://biohackathon-europe.org/" +biohackathon_location: "Barcelona, Spain, 2024" +group: Project 11 - Galaxy CoDex - Ensuring Galaxy community sustainability through resource aggregation and annotation +git_url: +authors_short: Bérénice Batut, Wendi Bacon, \emph{et al.} +--- + + +# Introduction + +Galaxy hosts a vast array of tools, tutorials, and workflows, with the exact number of workflows remaining uncertain. To address the challenge of enhancing tool visibility within this expansive ecosystem, a pipeline called the Galaxy Tool Metadata Extractor was created during the BioHackathon Europe 2023. This pipeline aggregates Galaxy tool suites from various sources, automatically extracts metadata such as bio.tools identifiers and EDAM ontology, and presents the information in an interactive table. Users can filter this table to find tools relevant to their research community. Throughout development, it was noted that many tools lack EDAM annotations. Efforts by the microbial community during both BioHackathon 2023, and a subsequent community-hosted online hackathon in 2024, have improved EDAM annotations for over 200 tools. However, Galaxy communities also offer training materials and workflows, which, like software, may be scattered across different platforms and lack EDAM annotations. + +Building upon the achievements of BioHackathon Europe 2023, this new initiative seeks to expand the capabilities of the existing Galaxy tool list table by introducing the Galaxy Communities Dock (**Galaxy CoDex**). Galaxy CoDex will involve enhancing and implementing webpage templates and files that enable domain communities to efficiently gather, organize, integrate, and deploy pertinent tools, workflows, and training materials across various Galaxy servers. Concurrently, best practices for resource annotation will be developed and integrated into different levels of the Galaxy ecosystem. + +In essence, the growth of Galaxy Communities necessitates the adoption of sustainable practices to ensure their continued advancement. + +This project aims to achieve three main objectives: + +1. **Establishing the infrastructure for Galaxy CoDex** to enhance the discoverability of tools, workflows, and training materials within the Galaxy ecosystem, +2. **Ensuring the sustainability of Galaxy CoDex** by implementing comprehensive resource annotations for communities (e.g. microGalaxy, single-cells), and +3. **Establishing ongoing resource annotation best practices within the Galaxy ecosystem.** + + +# Methods + + +## CoDex + + +## Community curation + + +## Website + + + + +# Outcomes and results + + +# Conclusion and outlook + + +# Acknowledgements + +This work was developed as part of BioHackathon Europe 2024. +This work was supported by [ELIXIR](https://elixir-europe.org), the research infrastructure for life science data. +This work was supported by the Australian BioCommons which is enabled by NCRIS via Bioplatforms Australia funding. + + +# References +