Skip to content

Commit

Permalink
refactor: add doc string for seq_missing and block_missing;
Browse files Browse the repository at this point in the history
  • Loading branch information
WenjieDu committed Jun 30, 2024
1 parent f16f143 commit 6cf07c0
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 2 deletions.
32 changes: 30 additions & 2 deletions pygrinder/block_missing/block_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,40 @@ def block_missing(
feature_idx: list = None,
step_idx: list = None,
) -> Union[np.ndarray, torch.Tensor]:
"""Create block missing data.
Parameters
----------
X :
Data vector. If X has any missing values, they should be numpy.nan.
factor :
The actual missing rate of block_missing is hard to be strictly controlled.
Hence, we use ``factor`` to help adjust the final missing rate.
block_len :
The length of the mask block.
block_width :
The width of the mask block.
feature_idx :
The indices of features for missing block to star with.
step_idx :
The indices of steps for a missing block to start with.
Returns
-------
corrupted_X :
Original X with artificial missing values.
Both originally-missing and artificially-missing values are left as NaN.
"""
if isinstance(X, list):
X = np.asarray(X)
n_samples, n_steps, n_features = X.shape

# assert 0 < p <= 1, f"p must be in range (0, 1), but got {p}"

assert isinstance(
block_len, int
), f"`block_len` must be type of int, but got {type(block_len)}"
Expand Down
26 changes: 26 additions & 0 deletions pygrinder/sequential_missing/seq_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,32 @@ def seq_missing(
feature_idx: list = None,
step_idx: list = None,
) -> Union[np.ndarray, torch.Tensor]:
"""Create subsequence missing data.
Parameters
----------
X :
Data vector. If X has any missing values, they should be numpy.nan.
p :
The probability that values may be masked as missing completely at random.
seq_len :
The length of missing sequence.
feature_idx :
The indices of features for missing sequences to be corrupted.
step_idx :
The indices of steps for a missing sequence to start with.
Returns
-------
corrupted_X :
Original X with artificial missing values.
Both originally-missing and artificially-missing values are left as NaN.
"""
if isinstance(X, list):
X = np.asarray(X)
n_samples, n_steps, n_features = X.shape
Expand Down

0 comments on commit 6cf07c0

Please sign in to comment.