Skip to content

Commit

Permalink
feat: added hdbscan and optics learners (#56)
Browse files Browse the repository at this point in the history
* feat: added hdbscan and optics learners

* docs: news update
  • Loading branch information
m-muecke authored Feb 29, 2024
1 parent 15ecf93 commit 0134af3
Show file tree
Hide file tree
Showing 36 changed files with 647 additions and 86 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
^LICENSE$
.ignore
.editorconfig
.gitignore
Expand Down
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,14 @@ Collate:
'LearnerClustFanny.R'
'LearnerClustFarthestFirst.R'
'LearnerClustFeatureless.R'
'LearnerClustHDBSCAN.R'
'LearnerClustHclust.R'
'LearnerClustKKMeans.R'
'LearnerClustKMeans.R'
'LearnerClustMclust.R'
'LearnerClustMeanShift.R'
'LearnerClustMiniBatchKMeans.R'
'LearnerClustOPTICS.R'
'LearnerClustPAM.R'
'LearnerClustSimpleKMeans.R'
'LearnerClustXMeans.R'
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@ export(LearnerClustEM)
export(LearnerClustFanny)
export(LearnerClustFarthestFirst)
export(LearnerClustFeatureless)
export(LearnerClustHDBSCAN)
export(LearnerClustHclust)
export(LearnerClustKKMeans)
export(LearnerClustKMeans)
export(LearnerClustMclust)
export(LearnerClustMeanShift)
export(LearnerClustMiniBatchKMeans)
export(LearnerClustOPTICS)
export(LearnerClustPAM)
export(LearnerClustSimpleKMeans)
export(LearnerClustXMeans)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# mlr3cluster (development version)

* Add DBSCAN learner from 'fpc' package
* Add HDBSCAN learner from 'dbscan' package
* Add OPTICS learner from 'dbscan' package

# mlr3cluster 0.1.8

Expand Down
5 changes: 2 additions & 3 deletions R/LearnerClustAffinityPropagation.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Affinity Propagation Clustering Learner
#'
#' @name mlr_learners_clust.ap
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for Affinity Propagation clustering implemented in [apcluster::apcluster()].
Expand All @@ -26,7 +24,7 @@ LearnerClustAP = R6Class("LearnerClustAP",
initialize = function() {
param_set = ps(
s = p_uty(tags = c("required", "train")),
p = p_uty(default = NA, tags = "train", custom_check = crate(function(x) check_numeric(x))),
p = p_uty(default = NA, tags = "train", custom_check = check_numeric),
q = p_dbl(0, 1, tags = "train"),
maxits = p_int(1L, default = 1000L, tags = "train"),
convits = p_int(1L, default = 100L, tags = "train"),
Expand Down Expand Up @@ -79,4 +77,5 @@ LearnerClustAP = R6Class("LearnerClustAP",
)
)

#' @include aaa.R
learners[["clust.ap"]] = LearnerClustAP
3 changes: 1 addition & 2 deletions R/LearnerClustAgnes.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Agglomerative Hierarchical Clustering Learner
#'
#' @name mlr_learners_clust.agnes
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for agglomerative hierarchical clustering implemented in [cluster::agnes()].
Expand Down Expand Up @@ -83,4 +81,5 @@ LearnerClustAgnes = R6Class("LearnerClustAgnes",
)
)

#' @include aaa.R
learners[["clust.agnes"]] = LearnerClustAgnes
5 changes: 2 additions & 3 deletions R/LearnerClustCMeans.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Fuzzy C-Means Clustering Learner
#'
#' @name mlr_learners_clust.cmeans
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for fuzzy clustering implemented in [e1071::cmeans()].
Expand All @@ -24,7 +22,7 @@ LearnerClustCMeans = R6Class("LearnerClustCMeans",
initialize = function() {
param_set = ps(
centers = p_uty(
tags = c("required", "train"), default = 2L, custom_check = crate(check_centers)
tags = c("required", "train"), default = 2L, custom_check = check_centers
),
iter.max = p_int(1L, default = 100L, tags = "train"),
verbose = p_lgl(default = FALSE, tags = "train"),
Expand Down Expand Up @@ -81,4 +79,5 @@ LearnerClustCMeans = R6Class("LearnerClustCMeans",
)
)

#' @include aaa.R
learners[["clust.cmeans"]] = LearnerClustCMeans
3 changes: 1 addition & 2 deletions R/LearnerClustCobweb.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Cobweb Clustering Learner
#'
#' @name mlr_learners_clust.cobweb
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for Cobweb clustering implemented in [RWeka::Cobweb()].
Expand Down Expand Up @@ -57,4 +55,5 @@ LearnerClustCobweb = R6Class("LearnerClustCobweb",
)
)

#' @include aaa.R
learners[["clust.cobweb"]] = LearnerClustCobweb
25 changes: 12 additions & 13 deletions R/LearnerClustDBSCAN.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
#' @title Density-Based Clustering Learner
#' @title Density-based Spatial Clustering of Applications with Noise (DBSCAN) Clustering Learner
#'
#' @name mlr_learners_clust.dbscan
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for density-based clustering implemented in [dbscan::dbscan()].
#' The predict method uses [dbscan::predict.dbscan_fast()] to compute the
#' cluster memberships for new data.
#' DBSCAN (Density-based spatial clustering of applications with noise) clustering.
#' Calls [dbscan::dbscan()] from \CRANpkg{dbscan}.
#'
#' @templateVar id clust.dbscan
#' @template learner
#' @template example
#'
#' @references
#' `r format_bib("ester1996density")`
#'
#' @export
#' @template seealso_learner
#' @template example
LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN",
inherit = LearnerClust,
public = list(
Expand All @@ -24,7 +25,7 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN",
eps = p_dbl(0, tags = c("required", "train")),
minPts = p_int(0L, default = 5L, tags = "train"),
borderPoints = p_lgl(default = TRUE, tags = "train"),
weights = p_uty(tags = "train", custom_check = crate(function(x) check_numeric(x))),
weights = p_uty(tags = "train", custom_check = check_numeric),
search = p_fct(levels = c("kdtree", "linear", "dist"), default = "kdtree", tags = "train"),
bucketSize = p_int(1L, default = 10L, tags = "train"),
splitRule = p_fct(
Expand Down Expand Up @@ -52,10 +53,7 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN",
.train = function(task) {
pv = self$param_set$get_values(tags = "train")
m = invoke(dbscan::dbscan, x = task$data(), .args = pv)
m = set_class(
list(cluster = m$cluster, eps = m$eps, minPts = m$minPts, data = task$data(), dist = m$dist),
c("dbscan_fast", "dbscan")
)
m = insert_named(m, list(data = task$data()))
if (self$save_assignments) {
self$assignments = m$cluster
}
Expand All @@ -64,10 +62,11 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN",
},

.predict = function(task) {
partition = predict(self$model, newdata = task$data(), self$model$data)
partition = invoke(predict, self$model, newdata = task$data(), data = self$model$data)
PredictionClust$new(task = task, partition = partition)
}
)
)

#' @include aaa.R
learners[["clust.dbscan"]] = LearnerClustDBSCAN
21 changes: 11 additions & 10 deletions R/LearnerClustDBSCANfpc.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
#' @title Density-Based Clustering Learner with fpc
#' @title Density-based Spatial Clustering of Applications with Noise (DBSCAN) Clustering Learner
#'
#' @name mlr_learners_clust.dbscan_fpc
#'
#' @description
#' A [LearnerClust] for density-based clustering implemented in [fpc::dbscan()].
#' The predict method uses [fpc::predict.dbscan()] to compute the
#' cluster memberships for new data.
#' DBSCAN (Density-based spatial clustering of applications with noise) clustering.
#' Calls [fpc::dbscan()] from \CRANpkg{fpc}.
#'
#' @templateVar id clust.dbscan_fpc
#' @template learner
#' @template example
#'
#' @references
#' `r format_bib("ester1996density")`
#'
#' @export
#' @template seealso_learner
#' @template example
LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc",
inherit = LearnerClust,
public = list(
Expand Down Expand Up @@ -60,10 +63,7 @@ LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc",
.train = function(task) {
pars = self$param_set$get_values(tags = "train")
m = invoke(fpc::dbscan, data = task$data(), .args = pars)
m = set_class(
list(cluster = m$cluster, eps = m$eps, MinPts = m$MinPts, isseed = m$isseed, data = task$data()),
"dbscan"
)
m = insert_named(m, list(data = task$data()))
if (self$save_assignments) {
self$assignments = m$cluster
}
Expand All @@ -72,10 +72,11 @@ LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc",
},

.predict = function(task) {
partition = as.integer(predict(self$model, data = self$model$data, newdata = task$data()))
partition = as.integer(invoke(predict, self$model, data = self$model$data), newdata = task$data())
PredictionClust$new(task = task, partition = partition)
}
)
)

#' @include aaa.R
learners[["clust.dbscan_fpc"]] = LearnerClustDBSCANfpc
3 changes: 1 addition & 2 deletions R/LearnerClustDiana.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Divisive Hierarchical Clustering Learner
#'
#' @name mlr_learners_clust.diana
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for divisive hierarchical clustering implemented in [cluster::diana()].
Expand Down Expand Up @@ -64,4 +62,5 @@ LearnerClustDiana = R6Class("LearnerClustDiana",
)
)

#' @include aaa.R
learners[["clust.diana"]] = LearnerClustDiana
3 changes: 1 addition & 2 deletions R/LearnerClustEM.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Expectation-Maximization Clustering Learner
#'
#' @name mlr_learners_clust.em
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for Expectation-Maximization clustering implemented in
Expand Down Expand Up @@ -68,4 +66,5 @@ LearnerClustEM = R6Class("LearnerClustEM",
)
)

#' @include aaa.R
learners[["clust.em"]] = LearnerClustEM
3 changes: 1 addition & 2 deletions R/LearnerClustFanny.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Fuzzy Analysis Clustering Learner
#'
#' @name mlr_learners_clust.fanny
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for fuzzy clustering implemented in [cluster::fanny()].
Expand Down Expand Up @@ -71,4 +69,5 @@ LearnerClustFanny = R6Class("LearnerClustFanny",
)
)

#' @include aaa.R
learners[["clust.fanny"]] = LearnerClustFanny
3 changes: 1 addition & 2 deletions R/LearnerClustFarthestFirst.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Farthest First Clustering Learner
#'
#' @name mlr_learners_clust.ff
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for Farthest First clustering implemented in [RWeka::FarthestFirst()].
Expand Down Expand Up @@ -67,4 +65,5 @@ LearnerClustFarthestFirst = R6Class("LearnerClustFF",
)
)

#' @include aaa.R
learners[["clust.ff"]] = LearnerClustFarthestFirst
3 changes: 1 addition & 2 deletions R/LearnerClustFeatureless.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Featureless Clustering Learner
#'
#' @name mlr_learners_clust.featureless
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A simple [LearnerClust] which randomly (but evenly) assigns observations to
Expand Down Expand Up @@ -80,4 +78,5 @@ LearnerClustFeatureless = R6Class("LearnerClustFeatureless",
)
)

#' @include aaa.R
learners[["clust.featureless"]] = LearnerClustFeatureless
63 changes: 63 additions & 0 deletions R/LearnerClustHDBSCAN.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#' @title Hierarchical DBSCAN (HDBSCAN) Clustering Learner
#'
#' @name mlr_learners_clust.hdbscan
#'
#' @description
#' HDBSCAN (Hierarchical DBSCAN) clustering.
#' Calls [dbscan::hdbscan()] from \CRANpkg{dbscan}.
#'
#' @templateVar id clust.hdbscan
#' @template learner
#'
#' @references
#' `r format_bib("campello2013density")`
#'
#' @export
#' @template seealso_learner
#' @template example
LearnerClustHDBSCAN = R6Class("LearnerClustHDBSCAN",
inherit = LearnerClust,
public = list(
#' @description
#' Creates a new instance of this [R6][R6::R6Class] class.
initialize = function() {
param_set = ps(
minPts = p_int(0L, tags = c("required", "train")),
gen_hdbscan_tree = p_lgl(default = FALSE, tags = "train"),
gen_simplified_tree = p_lgl(default = FALSE, tags = "train")
)

super$initialize(
id = "clust.hdbscan",
feature_types = c("logical", "integer", "numeric"),
predict_types = "partition",
param_set = param_set,
properties = c("partitional", "exclusive", "complete"),
packages = "dbscan",
man = "mlr3cluster::mlr_learners_clust.hdbscan",
label = "HDBSCAN Clustering"
)
}
),
private = list(
.train = function(task) {
pv = self$param_set$get_values(tags = "train")
m = invoke(dbscan::hdbscan, x = task$data(), .args = pv)
m = insert_named(m, list(data = task$data()))

if (self$save_assignments) {
self$assignments = m$cluster
}

return(m)
},

.predict = function(task) {
partition = as.integer(invoke(predict, self$model, newdata = task$data(), data = self$model$data))
PredictionClust$new(task = task, partition = partition)
}
)
)

#' @include aaa.R
learners[["clust.hdbscan"]] = LearnerClustHDBSCAN
3 changes: 1 addition & 2 deletions R/LearnerClustHclust.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Agglomerative Hierarchical Clustering Learner
#'
#' @name mlr_learners_clust.hclust
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for agglomerative hierarchical clustering implemented in [stats::hclust()].
Expand Down Expand Up @@ -82,4 +80,5 @@ LearnerClustHclust = R6Class("LearnerClustHclust",
)
)

#' @include aaa.R
learners[["clust.hclust"]] = LearnerClustHclust
Loading

0 comments on commit 0134af3

Please sign in to comment.