Skip to content

Commit

Permalink
package structure reorg
Browse files Browse the repository at this point in the history
  • Loading branch information
s3alfisc committed Oct 14, 2022
1 parent e752f03 commit 3d91549
Show file tree
Hide file tree
Showing 9 changed files with 311 additions and 21 deletions.
Empty file removed __init__.py
Empty file.
256 changes: 256 additions & 0 deletions development_notebook.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
---
title: "development notebook"
author: "Alexander Fischer"
date: "2022-09-16"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## in R

... create all required input parameters for `boot_algo()` functions

```{r, warning=FALSE, message = FALSE}
library(fixest)
library(reticulate)
path_to_fwildclusterboot <- "C:/Users/alexa/Dropbox/fwildclusterboot"
devtools::load_all(path_to_fwildclusterboot)
# create the data set
N_G1 <- 50
data2 <- fwildclusterboot:::create_data(N = 1000,
N_G1 = N_G1,
icc1 = 0.8,
N_G2 = N_G1,
icc2 = 0.8,
numb_fe1 = 10,
numb_fe2 = 5,
seed = 41224,
#seed = 123,
weights = 1:N / N)
data2$group_id1 <- as.factor(data2$group_id1)
ssc <- ssc(adj = FALSE, cluster.adj = FALSE, cluster.df = "min", fixef.K = "none")
boot_ssc <- boot_ssc(adj = FALSE, cluster.adj = FALSE, cluster.df = "min", fixef.K = "none")
boot_algo <- "R"
clustid <- cluster <- c("group_id2")
param = "log_income"
R <- NULL
r <- 0
beta0 <- NULL
B <- boot_iter <- 9999
bootcluster = "min"
fe = NULL
sign_level = NULL
conf_int = NULL
seed = NULL
# beta0 = 0.1
type = "rademacher"
impose_null = TRUE
p_val_type = NULL
tol = 1e-6
maxiter = 10
na_omit = TRUE
nthreads = 1
sign_level = 0.05
# full_enumeration = FAL
floattype = "Float64"
p_val_type = "two-tailed"
getauxweights = FALSE
turbo = FALSE
bootstrapc = FALSE
maxmatsize = NULL
engine = "R"
bootstrap_type = "11"
object <- feols(proposition_vote ~ treatment + log_income + as.factor(group_id1), cluster = ~group_id1, data = data2)
etable(object)
system.time(boot_res <- boottest(object, param, B = 99999, clustid = ~group_id2))
#ssc
pval(boot_res)
```

run the first part of `boottest.fixest()`

```{r, warning = FALSE, message = FALSE}
if (!is.null(beta0)) {
stop(
"The function argument 'beta0' is deprecated. Please use the
function argument 'r' instead, by which it is replaced."
)
}
if (inherits(clustid, "formula")) {
clustid <- attr(terms(clustid), "term.labels")
}
if (inherits(bootcluster, "formula")) {
bootcluster <- attr(terms(bootcluster), "term.labels")
}
if (inherits(param, "formula")) {
param <- attr(terms(param), "term.labels")
}
if (inherits(fe, "formula")) {
fe <- attr(terms(fe), "term.labels")
}
internal_seed <- set_seed(
seed = seed,
engine = engine,
type = type
)
if (!is.null(object$fixef_removed)) {
stop(
paste(
"feols() removes fixed effects with the following values: ",
object$fixef_removed,
". Currently, boottest()'s internal pre-processing does not
account for this deletion. Therefore, please exclude such fixed
effects prior to estimation with feols(). You can find them listed
under '$fixef_removed' of your fixest object."
)
)
}
# --------------------------------------------
# check appropriateness of nthreads
nthreads <- check_set_nthreads(nthreads)
if (is.null(clustid)) {
heteroskedastic <- TRUE
if (engine == "R") {
# heteroskedastic models should always be run through R-lean
engine <- "R-lean"
}
} else {
heteroskedastic <- FALSE
}
R_long <- process_R(
R = R,
param = param
)
if (engine != "WildBootTests.jl") {
r_algo_checks(
R = R_long,
p_val_type = p_val_type,
conf_int = conf_int,
B = B
)
}
# check_params_in_model(object = object, param = param)
check_boottest_args_plus(
object = object,
R = R_long,
param = param,
sign_level = sign_level,
B = B,
fe = fe
)
# preprocess the data: Y, X, weights, fixed_effect
preprocess <- preprocess2.fixest(
object = object,
clustid = clustid,
R = R_long,
param = param,
bootcluster = bootcluster,
fe = fe,
engine = engine
)
enumerate <-
check_set_full_enumeration(
preprocess = preprocess,
heteroskedastic = heteroskedastic,
B = B,
type = type,
engine = engine
)
full_enumeration <- enumerate$full_enumeration
B <- enumerate$B
N <- preprocess$N
k <- preprocess$k
G <-
vapply(preprocess$clustid, function(x) {
length(unique(x))
}, numeric(1))
vcov_sign <- preprocess$vcov_sign
small_sample_correction <-
get_ssc(
boot_ssc_object = ssc,
N = N,
k = k,
G = G,
vcov_sign = vcov_sign,
heteroskedastic = heteroskedastic
)
# clustermin, clusteradj
clustid_dims <- preprocess$clustid_dims
# R*beta;
point_estimate <-
as.vector(object$coefficients[param] %*% preprocess$R0[param])
boot_vcov <- boot_coef <- NULL
```

make all objects from `preprocess()` available in the global namespace

```{r, warning = FALSE, message = FALSE}
res <- lapply(names(preprocess), function(x) assign(x, preprocess[[x]], envir = .GlobalEnv))
bootcluster <- as.vector(bootcluster)[[1]]
```

## pass all values to python

via the `reticulate` package

```{python}
import numpy as np
X = np.array(r.X)
y = np.array(r.Y)
#clustid_df = r.clustid_df
bootstrap_type = r.bootstrap_type
N_G_bootcluster = r.N_G_bootcluster
bootcluster = np.array(r.bootcluster)
cluster = np.array(bootcluster)
R = np.array(r.R0)
impose_null = True
B = int(r.boot_iter)
ssc = int(r.small_sample_correction)
pval_type = r.p_val_type
type(X)
X[0:10, 0:10]
```
develop ...



33 changes: 16 additions & 17 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ If you'd like to cooperate, either send us an
Note: everything is still very much work in progress, and there are multiple errors in the code that I am aware of. Still, I believe that the implementation of the WCR11 is more or less correct.

```
import wildboottest
import wildboottest.wildboottest as wb
import numpy as np
import timeit
import time
Expand All @@ -26,29 +27,27 @@ X = np.random.normal(0, 1, N * k).reshape((N,k))
beta = np.random.normal(0,1,k)
beta[0] = 0.005
u = np.random.normal(0,1,N)
y = 1 + X @ beta + u
Y = 1 + X @ beta + u
cluster = np.random.choice(list(range(0,G)), N)
bootcluster = cluster
R = np.zeros(k)
R[0] = 1
B = 99999
start_time = timeit.default_timer()
wb = Wildboottest(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = B, seed = 12341)
wb.get_scores(bootstrap_type = "11", impose_null = True)
wb.get_numer()
wb.get_denom()
wb.numer
wb.denom
wb.get_tboot()
wb.t_boot
wb.get_vcov()
wb.get_tstat()
wb.get_pvalue(pval_type = "two-tailed")
wcr = wb.Wildboottest(X = X, Y = Y, cluster = cluster, bootcluster = bootcluster, R = R, B = B, seed = 12341)
wcr.get_scores(bootstrap_type = "11", impose_null = True)
wcr.get_numer()
wcr.get_denom()
wcr.numer
wcr.denom
wcr.get_tboot()
wcr.t_boot
wcr.get_vcov()
wcr.get_tstat()
wcr.get_pvalue(pval_type = "two-tailed")
print("estimation time:", timeit.default_timer() - start_time)
# >>> 0.1981981981981982
print("p value:", wb.pvalue)
# >>> 0.9225496 seconds
print("p value:", wcr.pvalue)
# >>> p value: 0.15258152581525816
```
34 changes: 34 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pathlib
from setuptools import setup, find_packages

HERE = pathlib.Path(__file__).parent

VERSION = '0.1.0'
PACKAGE_NAME = 'wildboottest'
AUTHOR = ['Alexander Fischer', 'Aleksandr Michuda']
AUTHOR_EMAIL = ['[email protected]', '[email protected]']
URL = 'https://github.com/s3alfisc/wildboottest'

LICENSE = 'MIT'
DESCRIPTION = 'Wild Cluster Bootstrap Inference for Linear Models in Python'
LONG_DESCRIPTION = (HERE / "readme.md").read_text()
LONG_DESC_TYPE = "text/markdown"

INSTALL_REQUIRES = [
'numpy',
'pandas',
'numba'
]

setup(name=PACKAGE_NAME,
version=VERSION,
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
long_description_content_type=LONG_DESC_TYPE,
author=AUTHOR,
license=LICENSE,
author_email=AUTHOR_EMAIL,
url=URL,
install_requires=INSTALL_REQUIRES,
packages=find_packages()
)
1 change: 1 addition & 0 deletions wildboottest/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Binary file added wildboottest/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file not shown.
File renamed without changes.
8 changes: 4 additions & 4 deletions src/Wildboottest-method.py → wildboottest/wildboottest.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def __init__(self, X, Y, cluster, bootcluster, R, B, seed = None):

if isinstance(X, pd.DataFrame):
self.X = X.values
if isinstance(y, pd.DataFrame):
self.y = y.values
if isinstance(Y, pd.DataFrame):
self.Y = Y.values
if isinstance(cluster, pd.DataFrame):
clustid = cluster.unique()
self.cluster = cluster.values
Expand Down Expand Up @@ -74,7 +74,7 @@ def __init__(self, X, Y, cluster, bootcluster, R, B, seed = None):

# split X and Y by (boot)cluster
X_g = X[np.where(bootcluster == g)]
Y_g = y[np.where(bootcluster == g)]
Y_g = Y[np.where(bootcluster == g)]
tXgXg = np.transpose(X_g) @ X_g
tXgyg = np.transpose(X_g) @ Y_g
X_list.append(X_g)
Expand Down Expand Up @@ -228,7 +228,7 @@ def compute_denom(Cg, H, bootclustid, B, G, v, ssc):
def get_tboot(self):

t_boot = self.numer / np.sqrt(self.denom)
self.t_boot = t_boot[1:(B+1)] # drop first element - might be useful for comp. of
self.t_boot = t_boot[1:(self.B+1)] # drop first element - might be useful for comp. of

def get_vcov(self):

Expand Down

0 comments on commit 3d91549

Please sign in to comment.