From 9394d30d217286add6032eb0694f11c12628032d Mon Sep 17 00:00:00 2001
From: "Timothy Fraser, PhD" <timothy.fraser.1@gmail.com>
Date: Fri, 20 Sep 2024 15:32:11 -0400
Subject: [PATCH] Create 00_why_distributions.R

---
 code/00_why_distributions.R | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 code/00_why_distributions.R

diff --git a/code/00_why_distributions.R b/code/00_why_distributions.R
new file mode 100644
index 0000000..89e56e3
--- /dev/null
+++ b/code/00_why_distributions.R
@@ -0,0 +1,35 @@
+# 00_why_distributions.R
+
+
+# A quick example summarizing why we use hypothetical distributions
+
+# If we have the observed data, great! Let's use that
+hours = c(1000, 2000,3000, 2500, 3200, 500, 3000, 5000, 600, 4000, 3000)
+# Make observed density function
+dobs = hours %>% density() %>% approxfun()
+
+# If we DON'T have the observed data, we could use exponential.
+dexp = function(t, lambda){  lambda * exp(-lambda* t)  }
+
+# Let's get a crap ton of probabilities
+dat = tibble(
+  t = 1:1000, 
+  prob = dexp(t = t, lambda = 0.001), 
+  prob2 = dexp(t, lambda = 0.002),
+  prob3 = dexp(t, lambda = 0.003),
+  prob4 = dobs(t),
+  prob5 = dexp(t, lambda = 1 / mean(hours)))
+
+# Key finding: don't use the exponential to model this particular data
+# it doesn't look right AT ALL!
+ggplot() +
+  geom_line(data = dat, mapping = aes(x = t, y = prob)) + 
+  geom_line(data = dat, mapping = aes(x = t, y = prob2)) +
+  geom_line(data = dat, mapping = aes(x = t, y = prob3))  +
+  geom_line(data = dat, mapping = aes(x = t, y = prob4), color = "red") +
+  geom_line(data = dat, mapping = aes(x = t, y = prob5), color = "blue")
+
+
+
+
+