Skip to content

Commit

Permalink
Merge pull request #3 from MLD3/hotfix-2021-feb
Browse files Browse the repository at this point in the history
Hotfix 2021 feb (v0.2.2)
  • Loading branch information
shengpu-tang authored Feb 21, 2021
2 parents f260174 + 7bece88 commit 0150afb
Show file tree
Hide file tree
Showing 6 changed files with 372 additions and 429 deletions.
8 changes: 4 additions & 4 deletions FIDDLE/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,10 @@ def smart_qcut_dummify(x, bin_edges, use_ordinal_encoding=False):
out = pd.get_dummies(x, prefix=x.name)
else:
if use_ordinal_encoding:
col_names = ['{}>={}'.format(z.name, bin_edge) for bin_edge in bin_edges[:-1]]
col_names = ['{}>{}'.format(z.name, bin_edge) for bin_edge in bin_edges[:-1]]
out = pd.DataFrame(0, z.index, col_names)
for i, bin_edge in enumerate(bin_edges[:-1]):
out.loc[m, col_names[i]] = (z.loc[m] >= bin_edge).astype(int)
out.loc[m, col_names[i]] = (z.loc[m] > bin_edge).astype(int)
out = pd.concat([out, pd.get_dummies(z.where(~m, np.nan), prefix=z.name)], axis=1)
else:
z.loc[m] = pd.cut(z.loc[m].astype(float).to_numpy(), bin_edges, duplicates='drop', include_lowest=True)
Expand All @@ -179,15 +179,15 @@ def smart_dummify_impute(x):
def make_float(v):
try:
return float(v)
except ValueError:
except (ValueError, TypeError):
return v
assert False

def is_numeric(v):
try:
float(v)
return True
except ValueError:
except (ValueError, TypeError):
return False
assert False

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ s = sparse.load_npz('{data_path}/s.npz'.format(data_path=...)).todense()
Example usage:
```bash
python -m FIDDLE.run \
--data_path='./test/small_test/' \
--population='./test/small_test/pop.csv' \
--data_path='./tests/small_test/' \
--population='./tests/small_test/pop.csv' \
--T=24 --dt=5 \
--theta_1=0.001 --theta_2=0.001 --theta_freq=1 \
--stats_functions 'min' 'max' 'mean'
Expand Down
45 changes: 17 additions & 28 deletions tests/icd_test/Run.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,7 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"zsh:1: no matches found: output-*/\n"
]
}
],
"outputs": [],
"source": [
"!rm -rf output-*/"
]
Expand Down Expand Up @@ -111,25 +103,24 @@
"--------------------------------------------------------------------------------\n",
"(N × ^d) table :\t (200, 19)\n",
"number of missing entries :\t 2816 out of 3800 total\n",
"Time elapsed: 0.025395 seconds\n",
"Time elapsed: 0.012328 seconds\n",
"\n",
"Output\n",
"S_all, binary features :\t (200, 21)\n",
"Time elapsed: 0.171098 seconds\n",
"Time elapsed: 0.101575 seconds\n",
"\n",
"--------------------------------------------------------------------------------\n",
"3-A) Post-filter time-invariant data\n",
"--------------------------------------------------------------------------------\n",
"Original : 21\n",
"Nearly-constant: 0\n",
"Correlated : 0\n",
"Time elapsed: 0.178303 seconds\n",
"Time elapsed: 0.104368 seconds\n",
"\n",
"Output\n",
"S: shape=(200, 21), density=0.234\n",
"Total time: 0.180898 seconds\n",
"\n",
"\u001b[0m"
"Total time: 0.106207 seconds\n",
"\n"
]
}
],
Expand Down Expand Up @@ -732,25 +723,24 @@
"--------------------------------------------------------------------------------\n",
"(N × ^d) table :\t (200, 129)\n",
"number of missing entries :\t 23337 out of 25800 total\n",
"Time elapsed: 0.057711 seconds\n",
"Time elapsed: 0.034244 seconds\n",
"\n",
"Output\n",
"S_all, binary features :\t (200, 129)\n",
"Time elapsed: 0.830818 seconds\n",
"Time elapsed: 0.558320 seconds\n",
"\n",
"--------------------------------------------------------------------------------\n",
"3-A) Post-filter time-invariant data\n",
"--------------------------------------------------------------------------------\n",
"Original : 129\n",
"Nearly-constant: 0\n",
"Correlated : 2\n",
"Time elapsed: 0.840801 seconds\n",
"Time elapsed: 0.564048 seconds\n",
"\n",
"Output\n",
"S: shape=(200, 127), density=0.097\n",
"Total time: 0.844234 seconds\n",
"\n",
"\u001b[0m"
"Total time: 0.567541 seconds\n",
"\n"
]
}
],
Expand Down Expand Up @@ -1353,25 +1343,24 @@
"--------------------------------------------------------------------------------\n",
"(N × ^d) table :\t (200, 455)\n",
"number of missing entries :\t 86795 out of 91000 total\n",
"Time elapsed: 0.112510 seconds\n",
"Time elapsed: 0.092419 seconds\n",
"\n",
"Output\n",
"S_all, binary features :\t (200, 455)\n",
"Time elapsed: 2.377939 seconds\n",
"Time elapsed: 1.690453 seconds\n",
"\n",
"--------------------------------------------------------------------------------\n",
"3-A) Post-filter time-invariant data\n",
"--------------------------------------------------------------------------------\n",
"Original : 455\n",
"Nearly-constant: 0\n",
"Correlated : 87\n",
"Time elapsed: 2.428499 seconds\n",
"Time elapsed: 1.715216 seconds\n",
"\n",
"Output\n",
"S: shape=(200, 368), density=0.055\n",
"Total time: 2.435949 seconds\n",
"\n",
"\u001b[0m"
"Total time: 1.719981 seconds\n",
"\n"
]
}
],
Expand Down Expand Up @@ -1916,7 +1905,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
"version": "3.8.5"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 0150afb

Please sign in to comment.