forked from ermaker/thinkstats2chap3
-
Notifications
You must be signed in to change notification settings - Fork 0
/
chap03soln.py
108 lines (78 loc) · 2.25 KB
/
chap03soln.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""This file contains code for use with "Think Stats",
by Allen B. Downey, available from greenteapress.com
Copyright 2014 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
from __future__ import print_function
import numpy as np
import sys
import nsfg
import first
import thinkstats2
import thinkplot
def PmfMean(pmf):
"""Computes the mean of a PMF.
Returns:
float mean
"""
mean = 0.0
for x, p in pmf.d.items():
mean += p * x
return mean
def PmfVar(pmf, mu=None):
"""Computes the variance of a PMF.
Args:
mu: the point around which the variance is computed;
if omitted, computes the mean
Returns:
float variance
"""
if mu is None:
mu = pmf.Mean()
var = 0.0
for x, p in pmf.d.items():
var += p * (x - mu) ** 2
return var
def Diffs(t):
"""List of differences between the first elements and others.
t: list of numbers
returns: list of numbers
"""
first = t[0]
rest = t[1:]
diffs = [first - x for x in rest]
return diffs
def PairWiseDifferences(live):
"""Summarize pairwise differences for children of the same mother.
live: DataFrame of pregnancy records for live births
"""
live = live[live.prglngth >= 37]
preg_map = nsfg.MakePregMap(live)
diffs = []
for caseid, indices in preg_map.items():
lengths = live.loc[indices].prglngth.values
if len(lengths) >= 2:
diffs.extend(Diffs(lengths))
mean = thinkstats2.Mean(diffs)
print('Mean difference between pairs', mean)
pmf = thinkstats2.Pmf(diffs)
thinkplot.Hist(pmf, align='center')
thinkplot.Show(xlabel='Difference in weeks',
ylabel='PMF')
def main(script):
"""Tests the functions in this module.
script: string script name
"""
live, firsts, others = first.MakeFrames()
PairWiseDifferences(live)
# test PmfMean and PmfVar
prglngth = live.prglngth
pmf = thinkstats2.Pmf(prglngth)
mean = PmfMean(pmf)
var = PmfVar(pmf)
assert(mean == pmf.Mean())
assert(var == pmf.Var())
print('mean/var preg length', mean, var)
print('%s: All tests passed.' % script)
if __name__ == '__main__':
main(*sys.argv)