Skip to content

Commit

Permalink
Merge pull request #541 from jGaboardi/spgh_paper
Browse files Browse the repository at this point in the history
preparing JOSS paper for submission
  • Loading branch information
jGaboardi authored Oct 30, 2020
2 parents 1add1ee + a3951a1 commit e7af786
Show file tree
Hide file tree
Showing 5 changed files with 310 additions and 3 deletions.
Binary file added paper/figs/pysal_logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added paper/figs/spaghetti_network.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions paper/main.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
\documentclass{article}
\usepackage{markdown}

\begin{document}
\section{NOTE}
\begin{itemize}
\item JOSS requires \texttt{paper.md} submissions. Things will look \textbf{super} messy here in \LaTeX{} (e.g. figures in weird places, bizarre citations, etc.). This document is for content \textbf{NOT} formatting.
\item See https://joss.readthedocs.io/en/latest/submitting.html\#example-paper-and-bibliography for formatting guidelines
\end{itemize}

\section{spaghetti: spatial network analysis in PySAL}
\markdownInput{paper.md}
\end{document}
209 changes: 209 additions & 0 deletions paper/paper.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
@article{pysal2007,
author={Rey, Sergio J. and Anselin, Luc},
title={{PySAL: A Python Library of Spatial Analytical Methods}},
journal={The Review of Regional Studies},
year=2007,
volume={37},
number={1},
pages={5-27},
keywords={Open Source; Software; Spatial},
url = {https://rrs.scholasticahq.com/article/8285.pdf}
}

@article{pysal2015,
Title = {Open {Geospatial} {Analytics} with {PySAL}},
Author = {Rey, Sergio J. and Anselin, Luc and Li, Xun and Pahle, Robert and Laura, Jason and Li, Wenwen and Koschinsky, Julia},
Journal = {{ISPRS International Journal of Geo-Information}},
Year = {2015},
Number = {2},
Pages = {815--836},
Volume = {4},
Keywords = {open science},
doi = {doi:10.3390/ijgi4020815}
}

@article{gaboardi2020a,
author = {Gaboardi, James D. and Folch, David C. and Horner, Mark W.},
title = {{Connecting Points to Spatial Networks: Effects on Discrete Optimization Models}},
journal = {Geographical Analysis},
year = {2020},
volume = {52},
issue = {2},
pages = {299--322},
doi = {10.1111/gean.12211},
}

@misc{gaboardi2018,
author = {Gaboardi, James D. and Laura, Jay and Rey, Sergio and
Wolf, Levi John and Folch, David C. and Kang, Wei and
Stephens, Philip and Schmidt, Charles},
month = {oct},
year = {2018},
title = {pysal/spaghetti},
url = {https://github.com/pysal/spaghetti},
doi = {10.5281/zenodo.1343650},
keywords = {graph-theory,network-analysis,python,spatial-networks,topology}
}

@article{okabe2006a,
author = {Okabe, Atsuyuki and Okunuki, Keiichi and Shiode, Shino},
doi = {10.1111/j.0016-7363.2005.00674.x},
isbn = {1538-4632},
issn = {0016-7363},
journal = {Geographical Analysis},
pages = {57--66},
title = {{SANET: A Toolbox for Spatial Analysis on a Network}},
volume = {38},
year = {2006}
}

@book{okabe2012,
address = {West Sussex, UK},
author = {Okabe, Atsuyki and Sugihara, Kokichi},
publisher = {John Wiley {\&} Sons, Inc.},
title = {{Spatial Analysis Along Networks}},
year = {2012},
doi = {10.1002/9781119967101}
}

@inproceedings{hagberg2008,
address = {Pasadena, CA USA},
author = {Hagberg, Aric A. and Schult, Daniel A. and Swart, Pieter J.},
booktitle = {Proceedings of the 7th Python in Science Conference (SciPy 2008)},
editor = {Varoquaux, G{\"{a}}el and Vaught, Travis and Millman, Jarrod},
isbn = {3333333333},
issn = {1540-9295},
pages = {11--15},
title = {{Exploring Network Structure, Dynamics, and Function using NetworkX}},
year = {2008}
}

@article{boeing2017,
author = {Boeing, Geoff},
doi = {10.1016/j.compenvurbsys.2017.05.004},
issn = {01989715},
journal = {Computers, Environment and Urban Systems},
pages = {126--139},
publisher = {Elsevier Ltd},
title = {{OSMnx: New Nethods for Acquiring, Constructing, Analyzing, and Visualizing Complex Street Networks}},
volume = {65},
year = {2017}
}

@misc{russell2019,
author = {Tom Russell and Elco Koks},
title = {{tomalrussell/snkit: v1.6.0}},
month = aug,
year = {2019},
publisher = {Zenodo},
version = {v1.6.0},
doi = {10.5281/zenodo.3379659}
}
@misc{geopandas2020,
author = {Kelsey Jordahl and
Joris Van den Bossche and
Jacob Wasserman and
James McBride and
Martin Fleischmann and
Jeffrey Gerard and
Jeff Tratner and
Matthew Perry and
Carson Farmer and
Geir Arne Hjelle and
Sean Gillies and
Micah Cochran and
Matt Bartos and
Lucas Culbertson and
Nick Eubank and
Aleksey Bilogur and
maxalbert},
title = {geopandas/geopandas: v0.7.0},
month = feb,
year = 2020,
publisher = {Zenodo},
version = {v0.7.0},
doi = {10.5281/zenodo.3669853},
url = {https://doi.org/10.5281/zenodo.3669853}
}

@misc{reback2020pandas,
author = {The pandas development team},
title = {pandas-dev/pandas: Pandas},
month = feb,
year = 2020,
publisher = {Zenodo},
version = {latest},
doi = {10.5281/zenodo.3509134},
url = {https://doi.org/10.5281/zenodo.3509134}
}

@inproceedings{ mckinney-proc-scipy-2010,
author = { {W}es {M}c{K}inney },
title = { {D}ata {S}tructures for {S}tatistical {C}omputing in {P}ython },
booktitle = { {P}roceedings of the 9th {P}ython in {S}cience {C}onference },
pages = { 56 - 61 },
year = { 2010 },
editor = { {S}t\'efan van der {W}alt and {J}arrod {M}illman },
doi = { 10.25080/Majora-92bf1922-00a }
}

@misc{libpysal2020,
author = {Sergio Rey and
Philip Stephens and
Levi John Wolf and
Charles Schmidt and
jlaura and
Taylor Oshan and
Dani Arribas-Bel and
James Gaboardi and
David C. Folch and
mhwang4 and
Wei Kang and
Nicholas Malizia and
Pedro Amaral and
Luc Anselin and
eli knaap and
Hu Shao and
Marynia and
Andrew Winslow and
Conceptron and
Jotham Apaloo and
Martin Fleischmann and
Andy Eschbacher and
Stefanie Lumnitz and
Siddharth S and
Forest Gregg and
Elliott Sales de Andrade and
Bas Couwenberg and
Josh Kalderimis and
Hannes and
Andy Reagan},
title = {pysal/libpysal: v4.2.2},
month = feb,
year = 2020,
publisher = {Zenodo},
version = {v4.2.2},
doi = {10.5281/zenodo.1472807},
url = {https://doi.org/10.5281/zenodo.1472807}
}

@article{Marshall2018,
author = {Marshall, Stephen and Gil, Jorge and Kropf, Karl and Tomko, Martin and Figueiredo, Lucas},
doi = {10.1007/s11067-018-9427-9},
issn = {15729427},
journal = {Networks and Spatial Economics},
keywords = {Graph representation,Modelling,Street networks},
number = {3},
pages = {735--749},
title = {{Street Network Studies: from Networks to Models and their Representations}},
volume = {18},
year = {2018}
}

@inproceedings{foti2012generalized,
title={A generalized computational framework for accessibility: from the pedestrian to the metropolitan scale},
author={Foti, Fletcher and Waddell, Paul and Luxen, Dennis},
booktitle={Proceedings of the 4th TRB Conference on Innovations in Travel Modeling. Transportation Research Board},
year={2012}
}
91 changes: 88 additions & 3 deletions paper/paper.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,103 @@ authors:
- name: James D. Gaboardi
orcid: 0000-0002-4776-6826
affiliation: 1
- name: Sergio Rey
orcid: 0000-0001-5857-9762
affiliation: 2
- name: Stefanie Lumnitz
orcid: 0000-0002-7007-5812
affiliation: 3
affiliations:
- name: Pennsylvania State University
index: 1
date: 03 June 2020
- name: Center for Geospatial Sciences, University of California Riverside
index: 2
- name: Directorate of Earth Observation Programs, ESRIN, European Space Agency
index: 3
date: 25 October 2020
bibliography: paper.bib

---


# Summary

`spahgetti`
The role spatial networks, such as streets, play on the human experience cannot be overstated. All of our daily activities fall along, or in close proximity to, roads, bike paths, and subway systems to name a few. Therefore, when performing spatial analysis in many cases considering network space, as opposed to Euclidean space, allows for a more precise representation of daily human action and movement patterns. For example, people generally cannot get to work by driving in a straight line directly from their home, but move along paths within networks. To this end, `spaghetti` (``**spa**tial **g**rap**h**s: n**et**works, **t**opology, & **i**nference''), a sub-module in the wider PySAL ecosystem, was developed to address network-centric research questions with a strong focus on spatial analysis [@pysal2007, @pysal2015, @gaboardi2018].

---

![The PySAL logo.](figs/pysal_logo.png)Figure 1 --- The PySAL logo.

---

# Related Work

The most well-known network analysis package within the Python scientific stack is [NetworkX](https://networkx.github.io) [@hagberg2008], which can be used for modelling any type of complex network (e.g. social, spatial, etc.). [OSMnx](https://osmnx.readthedocs.io/en/stable/) [@boeing2017] is built on top of NetworkX and queries [OpenStreetMap](https://openstreetmap.org) for modelling street networks with resultant network objects returned within a `geopandas.GeoDataFrame` [@geopandas2020]. Another package, [`pandana`](https://github.com/UDST/pandana) [@foti2012generalized], is built on top of `pandas` [@mckinney-proc-scipy-2010, @reback2020pandas] with a focus on shortest path calculation and accessibility measures. Within the realm of Python, the functionality provided by [`snkit`](https://github.com/tomalrussell/snkit) [@russell2019] is most comparable to `spaghetti`, though it's main purpose is the processing of raw line data into clean network objects. Outside of Python, [SANET](http://sanet.csis.u-tokyo.ac.jp) [@okabe2006a] is the most closely related project to `spaghetti`, however, it is not written in Python and provides a GUI plugin for GIS software such as QGIS. Moreover, SANET is not fully open source.

# Current Functionality

Considering the related projects detailed above, `spaghetti` fills a niche for not only the processing of spatial network objects, but also post-processing analysis. In other words, this package can be used to study the network *itself* or provide the foundation for studying network-based phenomena, such as crimes along city streets, all within a fully open-source environment. Considering this, the primary purpose of `spaghetti` is creating network objects: collections of vertices and arcs, and their topological relationships. The creation of a network object is realized through the following general steps:

1. read in line data or create features (regular lattices)
1. generate the network representation
1. extract contiguity weights (if desired)
1. identify connected components (if desired)
1. extract graph representation of the network (if desired)

After the creation of a base network object it can be manipulated, analyzed, and utilized as the input for subsequent modelling scenarios. The following are several such examples:

* allocating observation point patterns to the network
* calculating all neighbor distance matrices
* point type A to point type A (auto)
* point type A to point type B (cross)
* simulating point patterns that can be used within the [*K* function](https://pysal.org/spaghetti/generated/spaghetti.Network.html#spaghetti.Network.GlobalAutoK) for cluster analysis [@okabe2012]
* splitting the network into (nearly) uniform segments
* extracting features as `geopandas.GeoDataFrame` objects:
* network arcs, vertices and point patterns
* largest/longest components
* shortest paths
* minimum/maximum spanning trees

The following demonstrates several functionalities mentioned above, including feature creation, network instantiation, and feature extraction, along with a supplementary plot (see fig 2).

-------

```python
import spaghetti
%matplotlib inline
# generate network
lattice = spaghetti.regular_lattice((0,0,3,3), 2, exterior=True)
ntw = spaghetti.Network(in_data=lattice)
# extract network elements
vertices_df, arcs_df = spaghetti.element_as_gdf(ntw, vertices=True, arcs=True)
# plot
base_kws = {"figsize":(12, 12), "lw":5, "color":"k", "zorder":0}
base = arcs_df.plot(**base_kws, alpha=.35)
node_kws, edge_kws = {"s":100, "zorder":2}, {"zorder":1}
w_kws = {"edge_kws":edge_kws, "node_kws":node_kws}
ntw.w_network.plot(arcs_df, indexed_on="id", ax=base, **w_kws)
vertices_df.plot(ax=base, fc="r", ec="k", markersize=50, zorder=2)
```

-------

![spaghetti 5 network](figs/spaghetti_network.png)Figure 2 --- A 4x4 regular lattice with network arcs in gray and vertices in red. Connectivity is demonstrated with `libpysal` spatial weights, which plotted over the network in black [@libpysal2020].

-------

The overview presented here provides a high-level summary of functionality. More detailed examples and applications can be found in the *Tutorials* section of the `spaghetti` [documentation](https://pysal.org/spaghetti/tutorials.html).

# Planned Enhancements

As with any software project, there are always plans for further improvements and additional functionality. Three such major enhancements are described here. The first addition will likely be network partitioning through use of voronoi diagrams generated in network space. Network-constrained voronoi diagrams can be utilized as tools for analysis in and of themselves and can also be input for further analysis, such as the voronoi extension of the Network *K* function [@okabe2012]. Second, the current algorithm for allocating observations to a network within `spaghetti` allows for points to be snapped to a single location along the nearest network segment. While this is ideal for concrete observations, such as individual crime incidents, multiple network connections for abstract network events, such as census tract centroids, may be more appropriate [@gaboardi2020a]. Finally, the core functionality of `spaghetti` is nearly entirely written with pure Python data structures, which are excellent for code readability and initial development but generally suffer in terms of performance. There are currently several functions that can be utilized with an optional `geopandas` installation, however, further integration with the `pandas` stack has the potential to greatly improve performance.

# Concluding Remarks

Network-constrained spatial analysis is an important facet of scientific inquiry, especially within the social and geographic sciences [@Marshall2018]. Being able to perform this type of spatial analysis with a well-documented and tested open-source software package further facilitates fully reproducible and open science. With these motivations and core values, the `spaghetti` developers and wider PySAL team look forward to creating and supporting research into the future.

# Acknowledgements

Firstly, we would like to thank all the contributors to, and users of, this package. We would also like to acknowledge Jay Laura, who was the original lead developer of this package (`pysal.network`) prior to the introduction of the PySAL 2.0 ecosystem. The development of this package was partially supported by the [Atlanta Research Data Center](https://atlantardc.wordpress.com) and National Science Foundation Award [#1825768](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1825768).


# References
...

0 comments on commit e7af786

Please sign in to comment.