pubs_pre_prints.bib

@article{alignment2019,
 author = {Xu, Ting and Nenning, Karl-Heinz and Schwartz, Ernst and Hong, Seok-Jun and Vogelstein, Joshua T. and Fair, Damien A. and Schroeder, Charles E. and Margulies, Daniel S. and Smallwood, Jonny and Milham, Michael P. and Langs, Georg},
 author+an = {5=highlight},
 journal = {bioRxiv},
 month = {July},
 title = {Cross-species Functional Alignment Reveals Evolutionary Hierarchy Within the Connectome},
 url = {https://doi.org/10.1101/692616},
 year = {2019}
}

@article{bagging2019,
 author = {Nikolaidis, Aki and Heinsfeld, Anibal Solon and Xu, Ting and Bellec, Pierre and Vogelstein, Joshua T. and Milham, Michael},
 author+an = {5=highlight},
 journal = {bioRxiv},
 month = {July},
 title = {Bagging Improves Reproducibility of Functional Parcellation of the Human Brain},
 url = {https://www.biorxiv.org/content/10.1101/343392v3},
 year = {2019}
}

@article{Branch639674,
 abstract = {The advent of whole brain clearing and imaging methods have extended the breadth and depth at which neural populations can be studied. However, these methods have yet to be applied to larger tissues, such as the brains of the common laboratory rat, despite the importance of these models in behavioral neuroscience research. Here we introduce an optimized iDISCO+ based immunolabeling and clearing methodology for application to adult rat brain hemispheres. We validate this methodology through the testing of common antibodies. In order to extend the accessibility of this methodology for general use, we have developed an open source platform for the registration of rat brain volumes to standard brain atlases for high throughput analysis.},
 author = {Branch, Audrey and Tward, Daniel and Vogelstein, Joshua T and Wu, Zhuhao and Gallagher, Michela},
 author+an = {1=trainee;3=highlight},
 doi = {10.1101/639674},
 eprint = {https://www.biorxiv.org/content/early/2019/05/17/639674.full.pdf},
 journal = {bioRxiv},
 publisher = {Cold Spring Harbor Laboratory},
 title = {An optimized protocol for iDISCO+ rat brain clearing, imaging, and analysis},
 url = {https://www.biorxiv.org/content/early/2019/05/17/639674},
 year = {2019}
}

@article{deeplearning2019,
 abstract = {In recent years, deep learning has unlocked unprecedented success in various domains, especially in image, text, and speech processing. These breakthroughs may hold promise for neuroscience and especially for brain-imaging investigators who start to analyze thousands of participants. However, deep learning is only beneficial if the data have nonlinear relationships and if they are exploitable at currently available sample sizes. We systematically profiled the performance of deep models, kernel models, and linear models as a function of sample size on UK Biobank brain images against established machine learning references. On MNIST and Zalando Fashion, prediction accuracy consistently improved when escalating from linear models to shallow-nonlinear models, and further improved when switching to deep-nonlinear models. The more observations were available for model training, the greater the performance gain we saw. In contrast, using structural or functional brain scans, simple linear models performed on par with more complex, highly parameterized models in age/sex prediction across increasing sample sizes. In fact, linear models kept improving as the sample size approached \~{}10,000 participants. Our results indicate that the increase in performance of linear models with additional data does not saturate at the limit of current feasibility. Yet, nonlinearities of common brain scans remain largely inaccessible to both kernel and deep learning methods at any examined scale.},
 author = {Schulz, Marc-Andre and Yeo, B.T. Thomas and Vogelstein, Joshua T. and Mourao-Miranada, Janaina and Kather, Jakob N. and Kording, Konrad and Richards, Blake and Bzdok, Danilo},
 author+an = {3=highlight},
 doi = {10.1101/757054},
 journal = {bioRxiv},
 month = {September},
 publisher = {Cold Spring Harbor Laboratory},
 title = {Deep learning for brains?: Different linear and nonlinear scaling in UK Biobank brain images vs. machine-learning datasets},
 url = {https://www.biorxiv.org/content/early/2019/09/06/757054},
 year = {2019}
}

@article{estimatingforests2019,
 author = {Guo, Richard and Shen, Cencheng and Vogelstein, Joshua T.},
 author+an = {1=trainee;3=highlight},
 journal = {arXiv},
 month = {July},
 title = {Estimating Information-Theoretic Quantities with Random Forests},
 url = {https://arxiv.org/abs/1907.00325},
 year = {2019}
}

@article{francca2017kernel,
 author = {Franca, Guilherme and Rizzo, Maria L and Vogelstein, Joshua T.},
 author+an = {1=trainee;3=highlight},
 journal = {arXiv},
 month = {Aug},
 title = {Kernel k-Groups via Hartigan's Method},
 url = {https://arxiv.org/abs/1710.09859},
 year = {2017}
}

@article{geodesic2019,
 author = {Madhyastha, Meghana and Li, Percy and Browne, James and Strnadova-Neely, Veronika and Priebe, Carey E. and Burns, Randal and Vogelstein, Joshua T.},
 author+an = {1=trainee;3=trainee;7=highlight},
 journal = {arXiv},
 month = {July},
 title = {Geodesic Learning via Unsupervised Decision Forests},
 url = {https://arxiv.org/abs/1907.02844},
 year = {2019}
}

@article{graphindependence2019,
 author = {Xiong, Junhao and Shen, Cencheng and Arroyo, Jesús and Vogelstein, Joshua T.},
 author+an = {3=trainee;4=highlight},
 journal = {arXiv},
 month = {June},
 title = {Graph Independence Testing},
 url = {https://arxiv.org/abs/1906.03661},
 year = {2019}
}

@article{graphyti2019,
 author = {Mhembere, Disa and Zheng, Da and Vogelstein, Joshua T. and Priebe, Carey E. and Burns, Randal},
 author+an = {1=trainee;2=trainee;3=highlight},
 journal = {arXiv},
 month = {July},
 title = {Graphyti: A Semi-External Memory Graph Library for FlashGraph},
 url = {https://arxiv.org/abs/1907.03335},
 year = {2019}
}

@article{Greenberg479055,
 abstract = {Multiphoton imaging of genetically encoded calcium indicators is routinely used to report activity from populations of spatially resolved neurons in vivo. However, since the relationship between fluorescence and action potentials (APs) is nonlinear and varies over neurons, quantitatively inferring AP discharge is problematic. To address this we developed a biophysical model of calcium binding kinetics for the indicator GCaMP6s that accurately describes AP-evoked fluorescence changes in vivo. The model's physical interpretation allowed the same parameters to describe GCaMP6s binding kinetics for both in vitro binding assays and in vivo imaging. Using this model, we developed an algorithm to infer APs from fluorescence and measured its accuracy with cell-attached electrical recordings. This approach consistently inferred more accurate AP counts and times than alternative methods for firing rates from 0 to >20 Hz, while requiring less training data. These results demonstrate the utility of quantitative, biophysically grounded models for complex biological data.},
 author = {Greenberg, David S and Wallace, Damian J and Voit, Kay-Michael and Wuertenberger, Silvia and Czubayko, Uwe and Monsees, Arne and Handa, Takashi and Vogelstein, Joshua T and Seifert, Reinhard and Groemping, Yvonne and Kerr, Jason ND},
 author+an = {8=highlight},
 doi = {10.1101/479055},
 eprint = {https://www.biorxiv.org/content/early/2018/11/29/479055.full.pdf},
 journal = {bioRxiv},
 month = {nov},
 publisher = {Cold Spring Harbor Laboratory},
 title = {Accurate action potential inference from a calcium sensor protein through biophysical modeling},
 url = {https://www.biorxiv.org/content/early/2018/11/29/479055},
 year = {2018}
}

@article{heritability2019,
 author = {Wang, Nian and Anderson, Robert J and Ashbrook, David G and Gopalakrishnan, Vivek and Park, Youngser and Priebe, Carey E and Qi, Yi and Vogelstein, Joshua T and Williams, Robert W and Johnson, Allan G},
 author+an = {4=trainee;8=highlight},
 journal = {bioRxiv},
 month = {July},
 title = {Node-Specific Heritability in the Mouse Connectome},
 url = {https://www.biorxiv.org/content/10.1101/701755v1},
 year = {2019}
}

@article{kiar2017comprehensive,
 author = {Kiar, Gregory and Bridgeford, Eric and Chandrashekhar, Vikram and Mhembere, Disa and Burns, Randal and Roncal, William R Gray and Vogelstein, Joshua T},
 author+an = {1=trainee;2=trainee;3=trainee;4=trainee;6=trainee;7=highlight},
 journal = {bioRxiv},
 month = {Sep},
 pages = {188706},
 title = {A comprehensive cloud framework for accurate and reliable human connectome estimation and meganalysis},
 url = {https://www.biorxiv.org/content/early/2017/09/14/188706},
 year = {2017}
}

@article{Kiar2018,
 abstract = {Modern scientific discovery depends on collecting large heterogeneous datasets with many sources of variability, and applying domain-specific pipelines from which one can draw insight or clinical utility. For example, macroscale connectomics studies require complex pipelines to process raw functional or diffusion data and estimate connectomes. Individual studies tend to customize pipelines to their needs, raising concerns about their reproducibility, and adding to a longer list of factors that may differ across studies (including sampling, experimental design, and data acquisition protocols), resulting in failures to replicate. Mitigating these issues requires multi-study datasets and the development of pipelines that can be applied across them. We developed NeuroData's MRI to Graphs (NDMG) pipeline using several functional and diffusion studies, including the Consortium for Reliability and Reproducibility, to estimate connectomes. Without any manual intervention or parameter tuning, NDMG ran on 25 different studies ($\sim$6,000 scans) from 15 sites, with each scan resulting in a biologically plausible connectome (as assessed by multiple quality assurance metrics at each processing stage). For each study, the connectomes from NDMG are more similar within than across individuals, indicating that NDMG is preserving biological variability. Moreover, the connectomes exhibit near perfect consistency for certain connectional properties across every scan, individual, study, site, and modality; these include stronger ipsilateral than contralateral connections and stronger homotopic than heterotopic connections. Yet, the magnitude of the differences varied across individuals and studies - much more so when pooling data across sites, even after controlling for study, site, and basic demographic variables (i.e., age, sex, and ethnicity). This indicates that other experimental variables (possibly those not measured or reported) are contributing to this variability, which if not accounted for can limit the value of aggregate datasets, as well as expectations regarding the accuracy of findings and likelihood of replication. We, therefore, provide a set of principles to guide the development of pipelines capable of pooling data across studies while maintaining biological variability and minimizing measurement error. This open science approach provides us with an opportunity to understand and eventually mitigate spurious results for both past and future studies.},
 author = {Kiar, Gregory and Bridgeford, Eric and Roncal, Will Gray and (CoRR) and Chandrashekhar, Vikram and Mhembere, Disa and Ryman, Sephira and Zuo, Xi-Nian and Marguiles, Daniel S and Craddock, R Cameron and Priebe, Carey E and Jung, Rex and Calhoun, Vince and Caffo, Brian and Burns, Randal and Milham, Michael P and Vogelstein, Joshua},
 author+an = {1=trainee;2=trainee;3=trainee;5=trainee;6=trainee;17=highlight},
 doi = {10.1101/188706},
 journal = {bioRxiv},
 month = {apr},
 publisher = {Cold Spring Harbor Laboratory},
 title = {{A High-Throughput Pipeline Identifies Robust Connectomes But Troublesome Variability}},
 url = {https://www.biorxiv.org/content/early/2018/04/24/188706},
 year = {2018}
}

@article{Lyzinski2014,
 abstract = {We present a novel approximate graph matching algorithm that incorporates seeded data into the graph matching paradigm. Our Joint Optimization of Fidelity and Commensurability (JOFC) algorithm embeds two graphs into a common Euclidean space where the matching inference task can be performed. Through real and simulated data examples, we demonstrate the versatility of our algorithm in matching graphs with various characteristics--weightedness, directedness, loopiness, many-to-one and many-to-many matchings, and soft seedings.},
 author = {Patsolic, Heather and Adali, Sancar and Vogelstein, Joshua T. and Park, Youngser and Priebe, Carey E. and Li, Gongki and Lyzinski, Vince},
 author+an = {3=highlight},
 eprint = {1401.3813},
 journal = {arXiv},
 month = {Jan},
 title = {{Seeded Graph Matching Via Joint Optimization of Fidelity and Commensurability}},
 url = {http://arxiv.org/abs/1401.3813},
 year = {2014}
}

@article{mehta2019,
 author = {Mehta, Ronak and Shen, Cencheng and Xu, Ting and Vogelstein, Joshua T.},
 author+an = {1=trainee;4=highlight},
 journal = {arxiv},
 month = {October},
 title = {A Consistent Independence Test for Multivariate Time-Series},
 url = {https://arxiv.org/abs/1908.06486},
 year = {2019}
}

@article{mgcpy2019,
 author = {Panda, Sambit and Palaniappan, Satish and Xiong, Junhao and Swaminathan, Ananya and Ramachandran, Sandhya and Bridgeford, Eric W. and Shen, Cencheng and Vogelstein, Joshua T.},
 author+an = {1=trainee;6=trainee;8=highlight},
 journal = {arXiv},
 month = {July},
 title = {mgcpy: A Comprehensive High Dimensional Independence Testing Python Package},
 url = {https://arxiv.org/abs/1907.02088},
 year = {2019}
}

@article{mhembere2019,
 author = {Mhembere, Dia and Zheng, Da and Priebe, Carey E and Vogelstein, Joshua T and Burns, Randal},
 author+an = {1=trainee;2=trainee;4=highlight},
 journal = {arxiv},
 month = {Feb},
 title = {clusterNOR: A NUMA-Optimized Clustering Framework},
 url = {https://arxiv.org/abs/1902.09527},
 year = {2019}
}

@article{networkinference2019,
 author = {Arroyo, Jesús and Athreya, Avanti and Cape, Joshua and Chen, Guodong and Priebe, Carey E. and Vogelstein, Joshua T.},
 author+an = {1=trainee;6=highlight},
 journal = {arXiv},
 month = {June},
 title = {Inference for multiple heterogenous networks with a common invariant subspace},
 url = {https://arxiv.org/abs/1906.10026},
 year = {2019}
}

@article{neurosubtypes2019,
 author = {Hong, Seok-Jun and Vogelstein, Joshua T and Gozzi, G and Bernhardt, Boris C and Yeo, Thomas B.T. and Milham, Michael P and Di Martino, Adriana},
 author+an = {2=highlight},
 journal = {bioRxiv},
 month = {July},
 title = {Towards Neurosubtypes in Autism},
 url = {},
 volume = {in press},
 year = {2019}
}

@article{optimal2019,
 author = {Bridgeford, Eric W. and Wang, Shangsi and Yang, Zhi and Wang, Zeyi and Xu, Ting and Craddock, Cameron and Kiar, Gregory and Gray-Roncal, William and Priebe, Carey E. and Caffo, Brian and Milham, Michael and Zuo, Xi-Nian and (CoRR) and Vogelstein, Joshua T.},
 author+an = {1=trainee;2=trainee;7=trainee;14=highlight},
 journal = {bioRxiv},
 month = {October},
 title = {Optimal Experimental Design for Big Data: Applications in Brain Imaging},
 url = {https://doi.org/10.1101/802629},
 year = {2019}
}

@article{perry2019manifold,
 author = {Perry, Ronan and Tomita, Tyler M and Patsolic, Jesse and Falk, Benjamin and Vogelstein, Joshua T},
 author+an = {1=trainee;2=trainee;5=highlight},
 journal = {arXiv},
 month = {September},
 title = {Manifold Forests: Closing the Gap on Neural Networks},
 url = {https://arxiv.org/abs/1909.11799},
 year = {2019}
}

@article{ShenDecision2018,
 abstract = {Decision forests are popular tools for classification and regression. These forests naturally produce proximity matrices measuring how often each pair of observations lies in the same leaf node. Recently it has been demonstrated that these proximity matrices can be thought of as kernels, connecting the decision forest literature to the extensive kernel machine literature. While other kernels are known to have strong theoretical properties, such as being characteristic kernels, no similar result is available for any decision forest based kernel. We show that a decision forest induced proximity can be made into a characteristic kernel, which can be used within an independence test to obtain a universally consistent test. We therefore empirically evaluate this kernel on a suite of 12 high-dimensional independence test settings: the decision forest induced kernel is shown to typically achieve substantially higher power than other methods.},
 author = {Shen, Cencheng and Vogelstein, Joshua T},
 author+an = {2=highlight},
 journal = {arXiv},
 month = {Dec},
 publisher = {Cold Spring Harbor Laboratory},
 title = {Decision Forests Induce Characteristic Kernels},
 url = {https://arxiv.org/abs/1812.00029},
 year = {2018}
}

@article{Tang2017,
 abstract = {Estimation of graph parameters based on a collection of
graphs is essential for a wide range of graph inference
tasks. In practice, weighted graphs are generally observed
with edge contamination. We consider a weighted latent
position graph model contaminated via an edge weight gross
error model and propose an estimation methodology based on
robust Lq estimation followed by low-rank adjacency spectral
decomposition. We demonstrate that, under appropriate
conditions, our estimator both maintains Lq robustness and
wins the bias-variance tradeoff by exploiting low-rank graph
structure. We illustrate the improvement offered by our
estimator via both simulations and a human connectome data
experiment.},
 author = {Tang, Runze and Tang, Minh and Vogelstein, Joshua T and Priebe, Carey E},
 author+an = {3=highlight},
 eprint = {arXiv},
 journal = {arXiv},
 keywords = {administered through afrl contract,agency,and darpa graphs,darpa,darpa simplex contract n66001-15-c-4041,darpa xdata,embedding,fa8750-12-2-0303,fa8750-17-2-0112,low-rank,network,the authors gratefully acknowledge,the d3m program of,the defense advanced research,weighted},
 month = {Jul},
 title = {{Robust Estimation from Multiple Graphs under Gross Error Contamination}},
 url = {https://arxiv.org/abs/1707.03487},
 year = {2017}
}

@article{Tomita2018,
 author = {Tomita, Tyler M. and Browne, James and Shen, Cencheng and Chung, Jaewon and Patsolic, Jesse L. and Falk, Benjamin and Yim, Jason and Priebe, Carey E.~ and Burns, Randal and Maggioni, Mauro and Vogelstein, Joshua T.},
 author+an = {1=trainee;2=trainee;4=trainee;11=highlight},
 journal = {arXiv},
 month = {Sep},
 title = {Sparse Projection Oblique Randomer Forests},
 url = {http://arxiv.org/abs/1506.03410},
 year = {2019}
}

@article{vertex2019,
 author = {Helm, Hayden and Vogelstein, Joshua V. and Priebe, Carey E.},
 author+an = {1=trainee;2=highlight},
 journal = {arXiv},
 month = {June},
 title = {Vertex Classification on Weighted Networks},
 url = {https://arxiv.org/abs/1906.02881},
 year = {2019}
}

@article{Vogelstein2018equivalence,
 author = {Shen, Cencheng and Vogelstein, Joshua T.},
 author+an = {1=trainee;2=highlight},
 journal = {arXiv},
 month = {July},
 title = {{The Exact Equivalence of Distance and Kernel Methods for Hypothesis Testing}},
 url = {https://arxiv.org/abs/1806.05514},
 year = {2018}
}

@article{vogelstein2018geometric,
 author = {Vogelstein, Joshua T and Bridgeford, Eric and Tang, Minh and Zheng, Da and Burns, Randal and Maggioni, Mauro},
 author+an = {1=highlight;2=trainee},
 journal = {arXiv},
 month = {Nov},
 pages = {21},
 title = {Geometric Dimensionality Reduction for Subsequent Classification},
 url = {https://arxiv.org/abs/1709.01233},
 volume = {1050},
 year = {2018}
}

@article{Wang2018,
 author = {Wang, Shangsi and Shen, Cencheng and Badea, Alexandra and Priebe, Carey E and Vogelstein, Joshua T},
 author+an = {1=trainee;2=trainee;5=highlight},
 eprint = {arXiv},
 journal = {arXiv},
 month = {Jan},
 title = {{Signal Subgraph Estimation Via Vertex Screening}},
 url = {https://arxiv.org/abs/1801.07683},
 year = {2018}
}

@article{wang2018statistical,
 author = {Wang, Zeyi and Sair, Haris and Crainiceanu, Ciprian and Lindquist, Martin and Landman, Bennett A and Resnick, Susan and Vogelstein, Joshua T. and Caffo, Brian Scott},
 author+an = {1=trainee;7=highlight},
 journal = {bioRxiv},
 month = {Oct},
 publisher = {Cold Spring Harbor Laboratory},
 title = {On statistical tests of functional connectome fingerprinting},
 url = {https://www.biorxiv.org/content/early/2018/10/15/443556},
 year = {2018}
}