diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml
new file mode 100644
index 00000000..919f73bc
--- /dev/null
+++ b/.github/workflows/draft-pdf.yml
@@ -0,0 +1,28 @@
+name: Draft PDF
+on:
+  push:
+    paths:
+      - paper/**
+      - .github/workflows/draft-pdf.yml
+
+jobs:
+  paper:
+    runs-on: ubuntu-latest
+    name: Paper Draft
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Build draft PDF
+        uses: openjournals/openjournals-draft-action@master
+        with:
+          journal: joss
+          # This should be the path to the paper within your repo.
+          paper-path: paper/paper.md
+      - name: Upload
+        uses: actions/upload-artifact@v3
+        with:
+          name: paper
+          # This is the output path where Pandoc will write the compiled
+          # PDF. Note, this should be the same directory as the input
+          # paper.md
+          path: paper.pdf
\ No newline at end of file
diff --git a/paper/paper.bib b/paper/paper.bib
new file mode 100644
index 00000000..0e901a1a
--- /dev/null
+++ b/paper/paper.bib
@@ -0,0 +1,74 @@
+@article{vyas2020computation,
+  title={Computation through neural population dynamics},
+  author={Vyas, Saurabh and Golub, Matthew D and Sussillo, David and Shenoy, Krishna V},
+  journal={Annual review of neuroscience},
+  volume={43},
+  number={1},
+  pages={249--275},
+  year={2020},
+  publisher={Annual Reviews}
+}
+
+ @book{murphy2023probabilistic,
+ author = "Kevin P. Murphy",
+ title = "Probabilistic Machine Learning: Advanced Topics",
+ publisher = "MIT Press",
+ year = 2023,
+ url = "http://probml.github.io/book2"
+}
+
+@book{sarkka2023bayesian,
+  title={Bayesian filtering and smoothing},
+  author={S{\"a}rkk{\"a}, Simo and Svensson, Lennart},
+  volume={17},
+  year={2023},
+  publisher={Cambridge University Press}
+}
+
+
+@misc{jax,
+  author = {James Bradbury and Roy Frostig and Peter Hawkins and Matthew James Johnson and Chris Leary and Dougal Maclaurin and George Necula and Adam Paszke and Jake Vander{P}las and Skye Wanderman-{M}ilne and Qiao Zhang},
+  title = {{JAX}: composable transformations of {P}ython+{N}um{P}y programs},
+  url = {http://github.com/google/jax},
+  version = {0.3.13},
+  year = {2018},
+}
+
+@inproceedings{zhao2023revisiting,
+  title={Revisiting structured variational autoencoders},
+  author={Zhao, Yixiu and Linderman, Scott},
+  booktitle={International Conference on Machine Learning},
+  pages={42046--42057},
+  year={2023},
+  organization={PMLR}
+}
+
+@article{lee2023switching,
+  title={Switching autoregressive low-rank tensor models},
+  author={Lee, Hyun Dong and Warrington, Andrew and Glaser, Joshua and Linderman, Scott},
+  journal={Advances in Neural Information Processing Systems},
+  volume={36},
+  pages={57976--58010},
+  year={2023}
+}
+
+@article{chang2023low,
+  title={Low-rank extended {K}alman filtering for online learning of neural networks from streaming data},
+  author={Chang, Peter G and Dur{\'a}n-Mart{\'\i}n, Gerardo and Shestopaloff, Alexander Y and Jones, Matt and Murphy, Kevin},
+  journal={arXiv preprint arXiv:2305.19535},
+  year={2023}
+}
+
+@article{weinreb2024keypoint,
+  author = {Weinreb, Caleb and Pearl, Jonah E. and Lin, Sherry and Osman, Mohammed Abdal Monium and Zhang, Libby and Annapragada, Sidharth and Conlin, Eli and Hoffmann, Red and Makowska, Sofia and Gillis, Winthrop F. and Jay, Maya and Ye, Shaokai and Mathis, Alexander and Mathis, Mackenzie W. and Pereira, Talmo and Linderman, Scott W. and Datta, Sandeep Robert},
+  date = {2024/07/01},
+  doi = {10.1038/s41592-024-02318-2},
+  id = {Weinreb2024},
+  isbn = {1548-7105},
+  journal = {Nature Methods},
+  number = {7},
+  pages = {1329--1339},
+  title = {Keypoint-{M}o{S}eq: parsing behavior by linking point tracking to pose dynamics},
+  volume = {21},
+  year = {2024},
+}
\ No newline at end of file
diff --git a/paper/paper.md b/paper/paper.md
new file mode 100644
index 00000000..0f75111f
--- /dev/null
+++ b/paper/paper.md
@@ -0,0 +1,117 @@
+---
+title: 'Dynamax: A Python package for probabilistic state space models (SSMs) written
+in JAX'
+tags:
+  - Python
+  - State space models
+  - dynamics
+  - JAX
+
+Peter Chang, Giles Harper-Donnelly, Aleyna Kara, Xinglong Li, Scott Linderman, Kevin Murphy.
+
+authors:
+  - name: Scott W. Linderman
+    orcid: 0000-0002-3878-9073
+    affiliation: "1, 2" # (Multiple affiliations must be quoted)
+    corresponding: true # (This is how to denote the corresponding author)
+  - name: Peter Chang
+    affiliation: "3"
+  - name: Giles Harper-Donnelly
+    affiliation: "4"
+  - name: Aleyna Kara
+    affiliation: "5"
+  - name: Xinglong Li
+    affiliation: "6"
+  - name: Kevin Murphy
+    affiliation: "2"
+affiliations:
+ - name: Department of Statistics and Wu Tsai Neurosciences Insitute, Stanford University, USA
+   index: 1
+ - name: Google Research, USA
+   index: 2
+ - name: CSAIL, Massachusetts Institute of Technology, USA
+   index: 3
+ - name: Cambridge University, UK
+   index: 4
+ - name: Boğaziçi University, Turkey
+   index: 5
+ - name: University of British Columbia, Canada
+   index: 6
+ 
+date: 12 July 2024
+bibliography: paper.bib
+
+---
+
+# Summary
+
+Probabilistic state space models (SSMs) are fundamental tools for modeling
+sequential data, and they are broadly used in many engineering and scientific
+disciplines. Let $y_1, \ldots y_T$ denote a sequence of observations where 
+$y_t$ denotes the observation at time $t$. In an SSM, the observations are 
+generated by a latent state, $z_t$, which evolve according to a transition 
+(aka dynamics) model. An SSM may also use inputs (aka controls or covariates), 
+$u_t$, to steer the latent state dynamics and influence the observations.
+
+For example, SSMs are often used in neuroscience to model the dynamics of 
+neural spike train recordings [@vyas:2020]. Here, $y_t$ is a vector of spike 
+counts from each of, say, 100 measured neurons. The activity of nearby neurons 
+is often correlated, and SSMs can capture that correlation through a lower 
+dimensional latent state, $z_t$. Finally, if we know that certain sensory inputs 
+may drive the neural activity, we can encode them in $u_t$. A common goal
+in neuroscience is to infer the latent states $z_t$ that best explain the 
+observed neural spike train; this is called _state inference_. Another goal 
+is to estimate the dynamics that govern how latent states evolve over time; this is 
+called _parameter estimation_. `Dynamax` provides algorithms for state inference
+and parameter estimation in a variety of SSMs. 
+
+The key design choices when constructing an SSM include the type of latent state 
+(is $z_t$ a continuous or discrete random variable?), the dynamics that govern 
+how latent states evolve over time (are they linear or nonlinear?), and the 
+link between latent states, inputs, and observations. Canonical examples of SSMs
+include hidden Markov models (HMM), which have discrete latent states, and
+linear dynamical systems (LDS), which have continuous latent states with 
+linear dynamics and additive Gaussian noise. `Dynamax` supports these canonical
+examples as well as more complex models.
+
+More information about state space models and algorithms for state inference
+and parameter estimation can be found in @murphy2023probabilistic and @sarkka2023bayesian. 
+
+
+# Statement of need
+
+`Dynamax` is an open-source Python pacakge for state space modeling. Since it 
+is built with `JAX` [@jax], it automatically supports just-in-time (JIT)
+compilation for hardware acceleration on CPU, GPU, and TPU machines. 
+It also supports automatic differentiation for gradient-based model learning.
+While other libraries exist for state space modeling in Python, and some also
+use `JAX`, this library provides a combination of low-level inference
+algorithms and high-level modeling objects that can support a wide range of
+research applications.
+
+The API for `Dynamax` is divided into two parts: a set of core, functionally
+pure, low-level inference algorithms, and a high-level, object oriented module
+for constructing and fitting probabilistic SSMs. 
+The low-level inference API provides message passing algorithms for several
+common types of SSMs. For example, `Dynamax` provides `JAX` implementations for:
+- Forward-Backward algorithms for discrete-state hidden Markov models (HMMs), 
+- Kalman filtering and smoothing algorithms for linear Gaussian SSMs, 
+- Extended and unscented Kalman filtering and smoothing for nonlinear Gaussian SSMs, and
+- Conditional moment filtering and smoothing algorithms for models with non-Gaussian emissions. 
+- Parallel message passing routines take advantage of GPU or TPU acceleration to perform message passing in sublinear time. 
+The high-level model API makes it easy to construct, fit, and inspect HMMs and
+linear Gaussian SSMs.
+
+`Dynamax` has supported several publications. The low-level API has been used 
+in machine learning research [@zhao2023revisiting; @lee2023switching; @chang2023low]. 
+More sophisticated, special purpose models on top of `Dynamax`, like the 
+Keypoint-MoSeq library for modeling postural dynamics
+of animals [@Weinreb:2024]. Finally, the `Dynamax` tutorials are used as reference 
+examples in a major machine learning textbook [@murphy2023probabilistic]. 
+
+# Acknowledgements
+
+Most of this library was developed while S.W.L. was a Visiting Faculty Researcher
+at Google and P.C., G.H.D., A.K., and X.L. were Google Summer of Code participants. 
+
+# References
\ No newline at end of file