Skip to content

Commit

Permalink
rm pymc code glm
Browse files Browse the repository at this point in the history
  • Loading branch information
juanitorduz committed Nov 13, 2023
1 parent 0b98259 commit b45ddd7
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 166 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -534,65 +534,6 @@ <h2>Retention - GLM in PyMC</h2>
<p><a href="https://juanitorduz.github.io/retention/">A Simple Cohort Retention Analysis in PyMC</a></p>
</div>
</section>
<section id="retention---glm-in-pymc-1" class="slide level2">
<h2>Retention - GLM in PyMC</h2>
<div class="sourceCode" id="cb1" data-code-line-numbers="|1|2-18|20-31|33-44|46-53"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1"></a><span class="cf">with</span> pm.Model(coords<span class="op">=</span>coords) <span class="im">as</span> model:</span>
<span id="cb1-2"><a href="#cb1-2"></a> <span class="co"># --- Data ---</span></span>
<span id="cb1-3"><a href="#cb1-3"></a> model.add_coord(name<span class="op">=</span><span class="st">"obs"</span>, values<span class="op">=</span>train_obs_idx, mutable<span class="op">=</span><span class="va">True</span>)</span>
<span id="cb1-4"><a href="#cb1-4"></a> age_scaled <span class="op">=</span> pm.MutableData(</span>
<span id="cb1-5"><a href="#cb1-5"></a> name<span class="op">=</span><span class="st">"age_scaled"</span>, value<span class="op">=</span>train_age_scaled, dims<span class="op">=</span><span class="st">"obs"</span></span>
<span id="cb1-6"><a href="#cb1-6"></a> )</span>
<span id="cb1-7"><a href="#cb1-7"></a> cohort_age_scaled <span class="op">=</span> pm.MutableData(</span>
<span id="cb1-8"><a href="#cb1-8"></a> name<span class="op">=</span><span class="st">"cohort_age_scaled"</span>, value<span class="op">=</span>train_cohort_age_scaled, dims<span class="op">=</span><span class="st">"obs"</span></span>
<span id="cb1-9"><a href="#cb1-9"></a> )</span>
<span id="cb1-10"><a href="#cb1-10"></a> period_month_idx <span class="op">=</span> pm.MutableData(</span>
<span id="cb1-11"><a href="#cb1-11"></a> name<span class="op">=</span><span class="st">"period_month_idx"</span>, value<span class="op">=</span>train_period_month_idx, dims<span class="op">=</span><span class="st">"obs"</span></span>
<span id="cb1-12"><a href="#cb1-12"></a> )</span>
<span id="cb1-13"><a href="#cb1-13"></a> n_users <span class="op">=</span> pm.MutableData(</span>
<span id="cb1-14"><a href="#cb1-14"></a> name<span class="op">=</span><span class="st">"n_users"</span>, value<span class="op">=</span>train_n_users, dims<span class="op">=</span><span class="st">"obs"</span></span>
<span id="cb1-15"><a href="#cb1-15"></a> )</span>
<span id="cb1-16"><a href="#cb1-16"></a> n_active_users <span class="op">=</span> pm.MutableData(</span>
<span id="cb1-17"><a href="#cb1-17"></a> name<span class="op">=</span><span class="st">"n_active_users"</span>, value<span class="op">=</span>train_n_active_users, dims<span class="op">=</span><span class="st">"obs"</span></span>
<span id="cb1-18"><a href="#cb1-18"></a> )</span>
<span id="cb1-19"><a href="#cb1-19"></a></span>
<span id="cb1-20"><a href="#cb1-20"></a> <span class="co"># --- Priors ---</span></span>
<span id="cb1-21"><a href="#cb1-21"></a> intercept <span class="op">=</span> pm.Normal(name<span class="op">=</span><span class="st">"intercept"</span>, mu<span class="op">=</span><span class="dv">0</span>, sigma<span class="op">=</span><span class="dv">1</span>)</span>
<span id="cb1-22"><a href="#cb1-22"></a> b_age_scaled <span class="op">=</span> pm.Normal(name<span class="op">=</span><span class="st">"b_age_scaled"</span>, mu<span class="op">=</span><span class="dv">0</span>, sigma<span class="op">=</span><span class="dv">1</span>)</span>
<span id="cb1-23"><a href="#cb1-23"></a> b_cohort_age_scaled <span class="op">=</span> pm.Normal(</span>
<span id="cb1-24"><a href="#cb1-24"></a> name<span class="op">=</span><span class="st">"b_cohort_age_scaled"</span>, mu<span class="op">=</span><span class="dv">0</span>, sigma<span class="op">=</span><span class="dv">1</span></span>
<span id="cb1-25"><a href="#cb1-25"></a> )</span>
<span id="cb1-26"><a href="#cb1-26"></a> b_period_month <span class="op">=</span> pm.ZeroSumNormal(</span>
<span id="cb1-27"><a href="#cb1-27"></a> name<span class="op">=</span><span class="st">"b_period_month"</span>, sigma<span class="op">=</span><span class="dv">1</span>, dims<span class="op">=</span><span class="st">"period_month"</span></span>
<span id="cb1-28"><a href="#cb1-28"></a> )</span>
<span id="cb1-29"><a href="#cb1-29"></a> b_age_cohort_age_interaction <span class="op">=</span> pm.Normal(</span>
<span id="cb1-30"><a href="#cb1-30"></a> name<span class="op">=</span><span class="st">"b_age_cohort_age_interaction"</span>, mu<span class="op">=</span><span class="dv">0</span>, sigma<span class="op">=</span><span class="dv">1</span></span>
<span id="cb1-31"><a href="#cb1-31"></a> )</span>
<span id="cb1-32"><a href="#cb1-32"></a></span>
<span id="cb1-33"><a href="#cb1-33"></a> <span class="co"># --- Parametrization ---</span></span>
<span id="cb1-34"><a href="#cb1-34"></a> mu <span class="op">=</span> pm.Deterministic(</span>
<span id="cb1-35"><a href="#cb1-35"></a> name<span class="op">=</span><span class="st">"mu"</span>,</span>
<span id="cb1-36"><a href="#cb1-36"></a> var<span class="op">=</span>intercept</span>
<span id="cb1-37"><a href="#cb1-37"></a> <span class="op">+</span> b_age_scaled <span class="op">*</span> age_scaled</span>
<span id="cb1-38"><a href="#cb1-38"></a> <span class="op">+</span> b_cohort_age_scaled <span class="op">*</span> cohort_age_scaled</span>
<span id="cb1-39"><a href="#cb1-39"></a> <span class="op">+</span> b_age_cohort_age_interaction <span class="op">*</span> age_scaled <span class="op">*</span> cohort_age_scaled</span>
<span id="cb1-40"><a href="#cb1-40"></a> <span class="op">+</span> b_period_month[period_month_idx],</span>
<span id="cb1-41"><a href="#cb1-41"></a> dims<span class="op">=</span><span class="st">"obs"</span>,</span>
<span id="cb1-42"><a href="#cb1-42"></a> )</span>
<span id="cb1-43"><a href="#cb1-43"></a></span>
<span id="cb1-44"><a href="#cb1-44"></a> p <span class="op">=</span> pm.Deterministic(name<span class="op">=</span><span class="st">"p"</span>, var<span class="op">=</span>pm.math.invlogit(mu), dims<span class="op">=</span><span class="st">"obs"</span>)</span>
<span id="cb1-45"><a href="#cb1-45"></a></span>
<span id="cb1-46"><a href="#cb1-46"></a> <span class="co"># --- Likelihood ---</span></span>
<span id="cb1-47"><a href="#cb1-47"></a> pm.Binomial(</span>
<span id="cb1-48"><a href="#cb1-48"></a> name<span class="op">=</span><span class="st">"likelihood"</span>,</span>
<span id="cb1-49"><a href="#cb1-49"></a> n<span class="op">=</span>n_users,</span>
<span id="cb1-50"><a href="#cb1-50"></a> p<span class="op">=</span>p,</span>
<span id="cb1-51"><a href="#cb1-51"></a> observed<span class="op">=</span>n_active_users,</span>
<span id="cb1-52"><a href="#cb1-52"></a> dims<span class="op">=</span><span class="st">"obs"</span>,</span>
<span id="cb1-53"><a href="#cb1-53"></a> )</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="footer">
<p><a href="https://juanitorduz.github.io/retention/">A Simple Cohort Retention Analysis in PyMC</a></p>
</div>
</section>
<section id="posterior-distribution" class="slide level2">
<h2>Posterior Distribution</h2>

Expand Down Expand Up @@ -658,20 +599,20 @@ <h2>BART Retention Model</h2>
N_{\text{active}} &amp; \sim \text{Binomial}(N_{\text{total}}, p) \\
\textrm{logit}(p) &amp; = \text{BART}(\text{cohort age}, \text{age}, \text{month})
\end{align*}\]</span></p>
<div class="sourceCode" id="cb2"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1"></a><span class="im">import</span> pymc <span class="im">as</span> pm</span>
<span id="cb2-2"><a href="#cb2-2"></a><span class="im">import</span> pymc_bart <span class="im">as</span> pmb</span>
<span id="cb2-3"><a href="#cb2-3"></a></span>
<span id="cb2-4"><a href="#cb2-4"></a><span class="cf">with</span> pm.Model() <span class="im">as</span> model</span>
<span id="cb2-5"><a href="#cb2-5"></a> ...</span>
<span id="cb2-6"><a href="#cb2-6"></a> mu <span class="op">=</span> pmb.BART(</span>
<span id="cb2-7"><a href="#cb2-7"></a> name<span class="op">=</span><span class="st">"mu"</span>,</span>
<span id="cb2-8"><a href="#cb2-8"></a> X<span class="op">=</span>x,</span>
<span id="cb2-9"><a href="#cb2-9"></a> Y<span class="op">=</span>train_retention_logit,</span>
<span id="cb2-10"><a href="#cb2-10"></a> m<span class="op">=</span><span class="dv">100</span>,</span>
<span id="cb2-11"><a href="#cb2-11"></a> response<span class="op">=</span><span class="st">"mix"</span>,</span>
<span id="cb2-12"><a href="#cb2-12"></a> dims<span class="op">=</span><span class="st">"obs"</span>,</span>
<span id="cb2-13"><a href="#cb2-13"></a> )</span>
<span id="cb2-14"><a href="#cb2-14"></a> ...</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="sourceCode" id="cb1"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1"></a><span class="im">import</span> pymc <span class="im">as</span> pm</span>
<span id="cb1-2"><a href="#cb1-2"></a><span class="im">import</span> pymc_bart <span class="im">as</span> pmb</span>
<span id="cb1-3"><a href="#cb1-3"></a></span>
<span id="cb1-4"><a href="#cb1-4"></a><span class="cf">with</span> pm.Model() <span class="im">as</span> model</span>
<span id="cb1-5"><a href="#cb1-5"></a> ...</span>
<span id="cb1-6"><a href="#cb1-6"></a> mu <span class="op">=</span> pmb.BART(</span>
<span id="cb1-7"><a href="#cb1-7"></a> name<span class="op">=</span><span class="st">"mu"</span>,</span>
<span id="cb1-8"><a href="#cb1-8"></a> X<span class="op">=</span>x,</span>
<span id="cb1-9"><a href="#cb1-9"></a> Y<span class="op">=</span>train_retention_logit,</span>
<span id="cb1-10"><a href="#cb1-10"></a> m<span class="op">=</span><span class="dv">100</span>,</span>
<span id="cb1-11"><a href="#cb1-11"></a> response<span class="op">=</span><span class="st">"mix"</span>,</span>
<span id="cb1-12"><a href="#cb1-12"></a> dims<span class="op">=</span><span class="st">"obs"</span>,</span>
<span id="cb1-13"><a href="#cb1-13"></a> )</span>
<span id="cb1-14"><a href="#cb1-14"></a> ...</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</div><div class="column" style="width:30%;">
<p><img data-src="revenue_retention_presentation_files/images/revenue_retention_presentation_files/retention_bart_files/retention_bart_17_1.svg" class="absolute" style="top: 0px; right: 0px; width: 300px; height: 700px; "></p>
</div>
Expand Down Expand Up @@ -740,38 +681,38 @@ <h3 id="revenue-component">Revenue Component</h3>
</section>
<section id="cohot-revenue-retention-model" class="slide level2">
<h2>Cohot Revenue-Retention Model</h2>
<div class="sourceCode" id="cb3" data-code-line-numbers="|1-5|7-16|18-32"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1"></a>mu <span class="op">=</span> pmb.BART(</span>
<span id="cb3-2"><a href="#cb3-2"></a> name<span class="op">=</span><span class="st">"mu"</span>, X<span class="op">=</span>x, Y<span class="op">=</span>train_retention_logit, m<span class="op">=</span><span class="dv">100</span>, response<span class="op">=</span><span class="st">"mix"</span>, dims<span class="op">=</span><span class="st">"obs"</span></span>
<span id="cb3-3"><a href="#cb3-3"></a>)</span>
<span id="cb3-4"><a href="#cb3-4"></a></span>
<span id="cb3-5"><a href="#cb3-5"></a>p <span class="op">=</span> pm.Deterministic(name<span class="op">=</span><span class="st">"p"</span>, var<span class="op">=</span>pm.math.invlogit(mu), dims<span class="op">=</span><span class="st">"obs"</span>)</span>
<span id="cb3-6"><a href="#cb3-6"></a></span>
<span id="cb3-7"><a href="#cb3-7"></a>lam_log <span class="op">=</span> pm.Deterministic(</span>
<span id="cb3-8"><a href="#cb3-8"></a> name<span class="op">=</span><span class="st">"lam_log"</span>,</span>
<span id="cb3-9"><a href="#cb3-9"></a> var<span class="op">=</span>intercept</span>
<span id="cb3-10"><a href="#cb3-10"></a> <span class="op">+</span> b_age_scaled <span class="op">*</span> age_scaled</span>
<span id="cb3-11"><a href="#cb3-11"></a> <span class="op">+</span> b_cohort_age_scaled <span class="op">*</span> cohort_age_scaled</span>
<span id="cb3-12"><a href="#cb3-12"></a> <span class="op">+</span> b_age_cohort_age_interaction <span class="op">*</span> age_scaled <span class="op">*</span> cohort_age_scaled,</span>
<span id="cb3-13"><a href="#cb3-13"></a> dims<span class="op">=</span><span class="st">"obs"</span>,</span>
<span id="cb3-14"><a href="#cb3-14"></a>)</span>
<span id="cb3-15"><a href="#cb3-15"></a></span>
<span id="cb3-16"><a href="#cb3-16"></a>lam <span class="op">=</span> pm.Deterministic(name<span class="op">=</span><span class="st">"lam"</span>, var<span class="op">=</span>pm.math.exp(lam_log), dims<span class="op">=</span><span class="st">"obs"</span>)</span>
<span id="cb3-17"><a href="#cb3-17"></a></span>
<span id="cb3-18"><a href="#cb3-18"></a>n_active_users_estimated <span class="op">=</span> pm.Binomial(</span>
<span id="cb3-19"><a href="#cb3-19"></a> name<span class="op">=</span><span class="st">"n_active_users_estimated"</span>,</span>
<span id="cb3-20"><a href="#cb3-20"></a> n<span class="op">=</span>n_users,</span>
<span id="cb3-21"><a href="#cb3-21"></a> p<span class="op">=</span>p,</span>
<span id="cb3-22"><a href="#cb3-22"></a> observed<span class="op">=</span>n_active_users,</span>
<span id="cb3-23"><a href="#cb3-23"></a> dims<span class="op">=</span><span class="st">"obs"</span>,</span>
<span id="cb3-24"><a href="#cb3-24"></a>)</span>
<span id="cb3-25"><a href="#cb3-25"></a></span>
<span id="cb3-26"><a href="#cb3-26"></a>x <span class="op">=</span> pm.Gamma(</span>
<span id="cb3-27"><a href="#cb3-27"></a> name<span class="op">=</span><span class="st">"revenue_estimated"</span>,</span>
<span id="cb3-28"><a href="#cb3-28"></a> alpha<span class="op">=</span>n_active_users_estimated <span class="op">+</span> eps,</span>
<span id="cb3-29"><a href="#cb3-29"></a> beta<span class="op">=</span>lam,</span>
<span id="cb3-30"><a href="#cb3-30"></a> observed<span class="op">=</span>revenue,</span>
<span id="cb3-31"><a href="#cb3-31"></a> dims<span class="op">=</span><span class="st">"obs"</span>,</span>
<span id="cb3-32"><a href="#cb3-32"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<div class="sourceCode" id="cb2" data-code-line-numbers="|1-5|7-16|18-32"><pre class="sourceCode numberSource python number-lines code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1"></a>mu <span class="op">=</span> pmb.BART(</span>
<span id="cb2-2"><a href="#cb2-2"></a> name<span class="op">=</span><span class="st">"mu"</span>, X<span class="op">=</span>x, Y<span class="op">=</span>train_retention_logit, m<span class="op">=</span><span class="dv">100</span>, response<span class="op">=</span><span class="st">"mix"</span>, dims<span class="op">=</span><span class="st">"obs"</span></span>
<span id="cb2-3"><a href="#cb2-3"></a>)</span>
<span id="cb2-4"><a href="#cb2-4"></a></span>
<span id="cb2-5"><a href="#cb2-5"></a>p <span class="op">=</span> pm.Deterministic(name<span class="op">=</span><span class="st">"p"</span>, var<span class="op">=</span>pm.math.invlogit(mu), dims<span class="op">=</span><span class="st">"obs"</span>)</span>
<span id="cb2-6"><a href="#cb2-6"></a></span>
<span id="cb2-7"><a href="#cb2-7"></a>lam_log <span class="op">=</span> pm.Deterministic(</span>
<span id="cb2-8"><a href="#cb2-8"></a> name<span class="op">=</span><span class="st">"lam_log"</span>,</span>
<span id="cb2-9"><a href="#cb2-9"></a> var<span class="op">=</span>intercept</span>
<span id="cb2-10"><a href="#cb2-10"></a> <span class="op">+</span> b_age_scaled <span class="op">*</span> age_scaled</span>
<span id="cb2-11"><a href="#cb2-11"></a> <span class="op">+</span> b_cohort_age_scaled <span class="op">*</span> cohort_age_scaled</span>
<span id="cb2-12"><a href="#cb2-12"></a> <span class="op">+</span> b_age_cohort_age_interaction <span class="op">*</span> age_scaled <span class="op">*</span> cohort_age_scaled,</span>
<span id="cb2-13"><a href="#cb2-13"></a> dims<span class="op">=</span><span class="st">"obs"</span>,</span>
<span id="cb2-14"><a href="#cb2-14"></a>)</span>
<span id="cb2-15"><a href="#cb2-15"></a></span>
<span id="cb2-16"><a href="#cb2-16"></a>lam <span class="op">=</span> pm.Deterministic(name<span class="op">=</span><span class="st">"lam"</span>, var<span class="op">=</span>pm.math.exp(lam_log), dims<span class="op">=</span><span class="st">"obs"</span>)</span>
<span id="cb2-17"><a href="#cb2-17"></a></span>
<span id="cb2-18"><a href="#cb2-18"></a>n_active_users_estimated <span class="op">=</span> pm.Binomial(</span>
<span id="cb2-19"><a href="#cb2-19"></a> name<span class="op">=</span><span class="st">"n_active_users_estimated"</span>,</span>
<span id="cb2-20"><a href="#cb2-20"></a> n<span class="op">=</span>n_users,</span>
<span id="cb2-21"><a href="#cb2-21"></a> p<span class="op">=</span>p,</span>
<span id="cb2-22"><a href="#cb2-22"></a> observed<span class="op">=</span>n_active_users,</span>
<span id="cb2-23"><a href="#cb2-23"></a> dims<span class="op">=</span><span class="st">"obs"</span>,</span>
<span id="cb2-24"><a href="#cb2-24"></a>)</span>
<span id="cb2-25"><a href="#cb2-25"></a></span>
<span id="cb2-26"><a href="#cb2-26"></a>x <span class="op">=</span> pm.Gamma(</span>
<span id="cb2-27"><a href="#cb2-27"></a> name<span class="op">=</span><span class="st">"revenue_estimated"</span>,</span>
<span id="cb2-28"><a href="#cb2-28"></a> alpha<span class="op">=</span>n_active_users_estimated <span class="op">+</span> eps,</span>
<span id="cb2-29"><a href="#cb2-29"></a> beta<span class="op">=</span>lam,</span>
<span id="cb2-30"><a href="#cb2-30"></a> observed<span class="op">=</span>revenue,</span>
<span id="cb2-31"><a href="#cb2-31"></a> dims<span class="op">=</span><span class="st">"obs"</span>,</span>
<span id="cb2-32"><a href="#cb2-32"></a>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="cohort-revenue-retention-model-1" class="slide level2">
<h2>Cohort Revenue-Retention Model</h2>
Expand Down
Loading

0 comments on commit b45ddd7

Please sign in to comment.