Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overload constructor trait for all model types #27

Merged
merged 5 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MLJBalancing"
uuid = "45f359ea-796d-4f51-95a5-deb1a414c586"
authors = ["Essam Wisam <[email protected]>", "Anthony Blaom <[email protected]> and contributors"]
version = "0.1.4"
version = "0.1.5"

[deps]
MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
Expand All @@ -12,9 +12,9 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[compat]
MLJBase = "1"
MLJBase = "1.4"
OrderedCollections = "1.6"
MLJModelInterface = "1.9"
MLJModelInterface = "1.10"
MLUtils = "0.4"
StatsBase = "0.34"
julia = "1.6"
Expand Down
3 changes: 2 additions & 1 deletion src/balanced_bagging.jl
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,9 @@ MMI.metadata_pkg(
MMI.metadata_model(
BalancedBaggingClassifier,
target_scitype = AbstractVector{<:Finite},
load_path = "MLJBalancing." * string(BalancedBaggingClassifier),
load_path = "MLJBalancing.BalancedBaggingClassifier",
)
MMI.constructor(::Type{<:BalancedBaggingClassifier}) = BalancedBaggingClassifier

MMI.iteration_parameter(::Type{<:BalancedBaggingClassifier{<:Any,<:Any,P}}) where P =
MLJBase.prepend(:model, iteration_parameter(P))
Expand Down
26 changes: 17 additions & 9 deletions src/balanced_model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ struct BalancedModel <:ProbabilisticNetworkComposite
model::Probabilistic # get rid of abstract types
end

BalancedModel(;model=nothing, balancer=nothing) = BalancedModel(model, balancer)
BalancedModel(model; kwargs...) = BalancedModel(; model, kwargs...)
BalancedModel(;model=nothing, balancer=nothing) = BalancedModel(model, balancer)
BalancedModel(model; kwargs...) = BalancedModel(; model, kwargs...)

In the following, we use macros to automate code generation of these for all model
types
Expand Down Expand Up @@ -66,15 +66,15 @@ const ERR_UNSUPPORTED_MODEL(model) = ErrorException(
"$PRETTY_SUPPORTED_MODEL_TYPES.\n"*
"Model provided has type `$(typeof(model))`. "
)
const ERR_NUM_ARGS_BM = "`BalancedModel` can at most have one non-keyword argument where the model is passed."
const ERR_NUM_ARGS_BM = "`BalancedModel` can at most have one non-keyword argument where the model is passed."


"""
BalancedModel(; model=nothing, balancer1=balancer_model1, balancer2=balancer_model2, ...)
BalancedModel(model; balancer1=balancer_model1, balancer2=balancer_model2, ...)

Given a classification model, and one or more balancer models that all implement the `MLJModelInterface`,
`BalancedModel` allows constructing a sequential pipeline that wraps an arbitrary number of balancing models
`BalancedModel` allows constructing a sequential pipeline that wraps an arbitrary number of balancing models
and a classifier together in a sequential pipeline.

# Operation
Expand All @@ -83,7 +83,7 @@ Given a classification model, and one or more balancer models that all implement
- During prediction, the balancers have no effect.

# Arguments
- `model::Supervised`: A classification model that implements the `MLJModelInterface`.
- `model::Supervised`: A classification model that implements the `MLJModelInterface`.
- `balancer1::Static=...`: The first balancer model to pass the data to. This keyword argument can have any name.
- `balancer2::Static=...`: The second balancer model to pass the data to. This keyword argument can have any name.
- and so on for an arbitrary number of balancers.
Expand Down Expand Up @@ -216,14 +216,20 @@ MMI.package_name(::Type{<:UNION_COMPOSITE_TYPES}) = "MLJBalancing"
MMI.package_license(::Type{<:UNION_COMPOSITE_TYPES}) = "MIT"
MMI.package_uuid(::Type{<:UNION_COMPOSITE_TYPES}) = "45f359ea-796d-4f51-95a5-deb1a414c586"
MMI.is_wrapper(::Type{<:UNION_COMPOSITE_TYPES}) = true
MMI.package_url(::Type{<:UNION_COMPOSITE_TYPES}) ="https://github.com/JuliaAI/MLJBalancing.jl"
MMI.package_url(::Type{<:UNION_COMPOSITE_TYPES}) =
"https://github.com/JuliaAI/MLJBalancing.jl"

# load path should point to constructor:
MMI.load_path(::Type{<:UNION_COMPOSITE_TYPES}) = "MLJBalancing.BalancedModel"
MMI.constructor(::Type{<:UNION_COMPOSITE_TYPES}) = BalancedModel

# All the composite types BalancedModelProbabilistic, BalancedModelDeterministic, etc.
const COMPOSITE_TYPES = values(MODELTYPE_TO_COMPOSITETYPE)
for composite_type in COMPOSITE_TYPES
quote
MMI.iteration_parameter(::Type{<:$composite_type{balancernames, M}}) where {balancernames, M} =
MLJBase.prepend(:model, iteration_parameter(M))
MMI.iteration_parameter(
::Type{<:$composite_type{balancernames, M}},
) where {balancernames, M} = MLJBase.prepend(:model, iteration_parameter(M))
end |> eval
for trait in [
:input_scitype,
Expand All @@ -241,7 +247,9 @@ for composite_type in COMPOSITE_TYPES
:is_supervised,
:prediction_type]
quote
MMI.$trait(::Type{<:$composite_type{balancernames, M}}) where {balancernames, M} = MMI.$trait(M)
MMI.$trait(
::Type{<:$composite_type{balancernames, M}},
) where {balancernames, M} = MMI.$trait(M)
end |> eval
end
end
12 changes: 9 additions & 3 deletions test/balanced_bagging.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

@testset "group_inds and get_majority_minority_inds_counts" begin
y = [0, 0, 0, 0, 1, 1, 1, 0]
@test MLJBalancing.group_inds(y) == Dict(0 => [1, 2, 3, 4, 8], 1 => [5, 6, 7])
Expand Down Expand Up @@ -111,7 +110,11 @@ end
pred_manual = mean([pred1, pred2])

## using BalancedBagging
modelo = BalancedBaggingClassifier(model = model, T = 2, rng = Random.MersenneTwister(42))
modelo = BalancedBaggingClassifier(
model = model,
T = 2,
rng = Random.MersenneTwister(42),
)
mach = machine(modelo, X, y)
fit!(mach)
pred_auto = MLJBase.predict(mach, Xt)
Expand All @@ -123,7 +126,10 @@ end

## traits
@test fit_data_scitype(modelo) == fit_data_scitype(model)
@test is_wrapper(modelo)
@test is_wrapper(modelo)
@test constructor(modelo) == BalancedBaggingClassifier
@test package_name(modelo) == "MLJBalancing"
@test load_path(modelo) == "MLJBalancing.BalancedBaggingClassifier"
end


Expand Down
166 changes: 84 additions & 82 deletions test/balanced_model.jl
Original file line number Diff line number Diff line change
@@ -1,93 +1,95 @@
@testset "BalancedModel" begin
### end-to-end test
# Create and split data
X, y = generate_imbalanced_data(100, 5; class_probs = [0.2, 0.3, 0.5], rng=Random.MersenneTwister(42))
X = DataFrame(X)
train_inds, test_inds =
partition(eachindex(y), 0.8, shuffle = true, stratify = y, rng = Random.MersenneTwister(42))
X_train, X_test = X[train_inds, :], X[test_inds, :]
y_train, y_test = y[train_inds], y[test_inds]

# Load models and balancers
DeterministicConstantClassifier = @load DeterministicConstantClassifier pkg=MLJModels
LogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels

# Here are a probabilistic and a deterministic model
model_prob = LogisticClassifier()
model_det = DeterministicConstantClassifier()
# And here are three resamplers from Imbalance.
# The package should actually work with any `Static` transformer of the form `(X, y) -> (Xout, yout)`
# provided that it implements the MLJ interface. Here, the balancer is the transformer
balancer1 = Imbalance.MLJ.RandomOversampler(ratios = 1.0, rng = Random.MersenneTwister(42))
balancer2 = Imbalance.MLJ.SMOTENC(k = 10, ratios = 1.2, rng = Random.MersenneTwister(42))
balancer3 = Imbalance.MLJ.ROSE(ratios = 1.3, rng = Random.MersenneTwister(42))

### 1. Make a pipeline of the three balancers and a probablistic model
## ordinary way
mach = machine(balancer1)
Xover, yover = MLJBase.transform(mach, X_train, y_train)
mach = machine(balancer2)
Xover, yover = MLJBase.transform(mach, Xover, yover)
mach = machine(balancer3)
Xover, yover = MLJBase.transform(mach, Xover, yover)

mach = machine(model_prob, Xover, yover)
fit!(mach)
y_pred = MLJBase.predict(mach, X_test)

# with MLJ balancing
@test_throws MLJBalancing.ERR_MODEL_UNSPECIFIED begin
BalancedModel(b1 = balancer1, b2 = balancer2, b3 = balancer3)
end
@test_throws(
MLJBalancing.ERR_UNSUPPORTED_MODEL(1),
BalancedModel(model = 1, b1 = balancer1, b2 = balancer2, b3 = balancer3),
)
@test_logs (:warn, MLJBalancing.WRN_BALANCER_UNSPECIFIED) begin
BalancedModel(model = model_prob)
end
balanced_model =
BalancedModel(model = model_prob, b1 = balancer1, b2 = balancer2, b3 = balancer3)
mach = machine(balanced_model, X_train, y_train)
fit!(mach)
y_pred2 = MLJBase.predict(mach, X_test)
### end-to-end test
# Create and split data
X, y = generate_imbalanced_data(100, 5; class_probs = [0.2, 0.3, 0.5], rng=Random.MersenneTwister(42))
X = DataFrame(X)
train_inds, test_inds =
partition(eachindex(y), 0.8, shuffle = true, stratify = y, rng = Random.MersenneTwister(42))
X_train, X_test = X[train_inds, :], X[test_inds, :]
y_train, y_test = y[train_inds], y[test_inds]

# Load models and balancers
DeterministicConstantClassifier = @load DeterministicConstantClassifier pkg=MLJModels
LogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels

# Here are a probabilistic and a deterministic model
model_prob = LogisticClassifier()
model_det = DeterministicConstantClassifier()
# And here are three resamplers from Imbalance.
# The package should actually work with any `Static` transformer of the form `(X, y) -> (Xout, yout)`
# provided that it implements the MLJ interface. Here, the balancer is the transformer
balancer1 = Imbalance.MLJ.RandomOversampler(ratios = 1.0, rng = Random.MersenneTwister(42))
balancer2 = Imbalance.MLJ.SMOTENC(k = 10, ratios = 1.2, rng = Random.MersenneTwister(42))
balancer3 = Imbalance.MLJ.ROSE(ratios = 1.3, rng = Random.MersenneTwister(42))

### 1. Make a pipeline of the three balancers and a probablistic model
## ordinary way
mach = machine(balancer1)
Xover, yover = MLJBase.transform(mach, X_train, y_train)
mach = machine(balancer2)
Xover, yover = MLJBase.transform(mach, Xover, yover)
mach = machine(balancer3)
Xover, yover = MLJBase.transform(mach, Xover, yover)

mach = machine(model_prob, Xover, yover)
fit!(mach)
y_pred = MLJBase.predict(mach, X_test)

# with MLJ balancing
@test_throws MLJBalancing.ERR_MODEL_UNSPECIFIED begin
BalancedModel(b1 = balancer1, b2 = balancer2, b3 = balancer3)
end
@test_throws(
MLJBalancing.ERR_UNSUPPORTED_MODEL(1),
BalancedModel(model = 1, b1 = balancer1, b2 = balancer2, b3 = balancer3),
)
@test_logs (:warn, MLJBalancing.WRN_BALANCER_UNSPECIFIED) begin
BalancedModel(model = model_prob)
end
balanced_model =
BalancedModel(model = model_prob, b1 = balancer1, b2 = balancer2, b3 = balancer3)
@test constructor(balanced_model) == BalancedModel

mach = machine(balanced_model, X_train, y_train)
fit!(mach)
y_pred2 = MLJBase.predict(mach, X_test)

@test y_pred ≈ y_pred2

# traits:
@test fit_data_scitype(balanced_model) == fit_data_scitype(model_prob)
@test is_wrapper(balanced_model)

### 2. Make a pipeline of the three balancers and a deterministic model
## ordinary way
mach = machine(balancer1)
Xover, yover = MLJBase.transform(mach, X_train, y_train)
mach = machine(balancer2)
Xover, yover = MLJBase.transform(mach, Xover, yover)
mach = machine(balancer3)
Xover, yover = MLJBase.transform(mach, Xover, yover)

mach = machine(model_det, Xover, yover)
fit!(mach)
y_pred = MLJBase.predict(mach, X_test)

# with MLJ balancing
balanced_model =
BalancedModel(model = model_det, b1 = balancer1, b2 = balancer2, b3 = balancer3)
mach = machine(balanced_model, X_train, y_train)
fit!(mach)
y_pred2 = MLJBase.predict(mach, X_test)

@test y_pred == y_pred2

### check that setpropertyname and getpropertyname work
Base.getproperty(balanced_model, :b1) == balancer1
Base.setproperty!(balanced_model, :b1, balancer2)
Base.getproperty(balanced_model, :b1) == balancer2


### 2. Make a pipeline of the three balancers and a deterministic model
## ordinary way
mach = machine(balancer1)
Xover, yover = MLJBase.transform(mach, X_train, y_train)
mach = machine(balancer2)
Xover, yover = MLJBase.transform(mach, Xover, yover)
mach = machine(balancer3)
Xover, yover = MLJBase.transform(mach, Xover, yover)

mach = machine(model_det, Xover, yover)
fit!(mach)
y_pred = MLJBase.predict(mach, X_test)

# with MLJ balancing
balanced_model =
BalancedModel(model = model_det, b1 = balancer1, b2 = balancer2, b3 = balancer3)
mach = machine(balanced_model, X_train, y_train)
fit!(mach)
y_pred2 = MLJBase.predict(mach, X_test)

@test y_pred == y_pred2

### check that setpropertyname and getpropertyname work
Base.getproperty(balanced_model, :b1) == balancer1
Base.setproperty!(balanced_model, :b1, balancer2)
Base.getproperty(balanced_model, :b1) == balancer2
@test_throws(
MLJBalancing.ERR_NO_PROP,
Base.setproperty!(balanced_model, :name11, balancer2),
Base.setproperty!(balanced_model, :name11, balancer2),
)
end

Expand All @@ -96,7 +98,7 @@ end
## setup parameters
R = Random.MersenneTwister(42)
LogisticClassifier = @load LogisticClassifier pkg = MLJLinearModels verbosity = 0
balancer1 = Imbalance.MLJ.RandomOversampler(ratios = 1.0, rng = Random.MersenneTwister(42))
balancer1 = Imbalance.MLJ.RandomOversampler(ratios = 1.0, rng = Random.MersenneTwister(42))
model = LogisticClassifier()
BalancedModel(model=model, balancer1=balancer1) == BalancedModel(model; balancer1=balancer1)

Expand Down
Loading