norm4nn · norm4nn · Oct 27, 2024 · Dec 14, 2023 · Dec 14, 2023 · Dec 14, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ jobs:
             otp: "26.1"
             lint: true
           - elixir: "1.14.5"
-            otp: "25.3"
+            otp: "26.1"
     steps:
       - uses: actions/checkout@v2
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,37 @@
 # Changelog
 
-## v0.2.2-dev
+## v0.3.1 (2024-06-18)
+
+### Enhancements
+
+  * Add a notebook about manifold learning
+  * Make knn algorithm configurable on Trimap
+  * Add `d2_pinball_score` and `d2_absolute_error_score`
+
+## v0.3.0 (2024-05-29)
+
+### Enhancements
+
+  * Add LargeVis for visualization of large-scale and high-dimensional data in a low-dimensional (typically 2D or 3D) space
+  * Add `Scholar.Neighbors.KDTree` and `Scholar.Neighbors.RandomProjectionForest`
+  * Add `Scholar.Metrics.Neighbors`
+  * Add `Scholar.Linear.BayesianRidgeRegression`
+  * Add `Scholar.Cluster.Hierarchical`
+  * Add `Scholar.Manifold.Trimap`
+  * Add Mean Pinball Loss function
+  * Add Matthews Correlation Coefficient function
+  * Add D2 Tweedie Score function
+  * Add Mean Tweedie Deviance function
+  * Add Discounted Cumulative Gain function
+  * Add Precision Recall f-score function
+  * Add f-beta score function
+  * Add convergence check to AffinityPropagation
+  * Default Affinity Propagation preference to `reduce_min` and make it customizable
+  * Move preprocessing functionality to their own modules with `fit` and `fit_transform` callbacks
+
+### Breaking changes
+
+  * Split `KNearestNeighbors` into `KNNClassifier` and `KNNRegressor` with custom algorithm support
 
 ## v0.2.1 (2023-08-30)
 
@@ -21,7 +52,7 @@ This version requires Elixir v1.14+.
   * Add `t-SNE`
   * Add `Polynomial Regression`
   * Replace seeds with `Random.key`
-  * Add 'unrolling loops' option 
+  * Add 'unrolling loops' option
   * Add support for custom optimizers in `Logistic Regression`
   * Add `Trapezoidal Integration`
   * Add `AUC-ROC`, `AUC`, and `ROC Curve`

diff --git a/README.md b/README.md
@@ -23,7 +23,7 @@ Add to your `mix.exs`:
 ```elixir
 def deps do
   [
-    {:scholar, "~> 0.2.1"}
+    {:scholar, "~> 0.3.0"}
   ]
 end
 ```
@@ -34,7 +34,7 @@ such as EXLA:
 ```elixir
 def deps do
   [
-    {:scholar, "~> 0.2.1"},
+    {:scholar, "~> 0.3.0"},
     {:exla, ">= 0.0.0"}
   ]
 end
@@ -64,12 +64,12 @@ To use Scholar inside code notebooks, run:
 
 ```elixir
 Mix.install([
-  {:scholar, "~> 0.2.1"},
+  {:scholar, "~> 0.3.0"},
   {:exla, ">= 0.0.0"}
 ])
 
 Nx.global_default_backend(EXLA.Backend)
-# Client can also be set to :cuda / :romc
+# Client can also be set to :cuda / :rocm
 Nx.Defn.global_default_options(compiler: EXLA, client: :host)
 ```
 

diff --git a/benchmarks/kd_tree.exs b/benchmarks/kd_tree.exs
diff --git a/benchmarks/knn.exs b/benchmarks/knn.exs
@@ -13,11 +13,16 @@ inputs_knn = %{
 Benchee.run(
   %{
     "kdtree" => fn x ->
-      kdtree = Scholar.Neighbors.KDTree.fit_bounded(x, Nx.axis_size(x, 0))
+      kdtree = Scholar.Neighbors.KDTree.fit(x)
       Scholar.Neighbors.KDTree.predict(kdtree, x, k: 4)
     end,
     "brute force knn" => fn x ->
-      model = Scholar.Neighbors.KNearestNeighbors.fit(x, Nx.broadcast(1, {Nx.axis_size(x, 0)}), num_classes: 2, num_neighbors: 4)
+      model =
+        Scholar.Neighbors.KNearestNeighbors.fit(x, Nx.broadcast(1, {Nx.axis_size(x, 0)}),
+          num_classes: 2,
+          num_neighbors: 4
+        )
+
       Scholar.Neighbors.KNearestNeighbors.k_neighbors(model, x)
     end
   },

diff --git a/lib/scholar/cluster/affinity_propagation.ex b/lib/scholar/cluster/affinity_propagation.ex
@@ -1,9 +1,9 @@
 defmodule Scholar.Cluster.AffinityPropagation do
   @moduledoc """
   Model representing affinity propagation clustering. The first dimension
-  of `:clusters_centers` is set to the number of samples in the dataset.
-  The artificial centers are filled with `:infinity` values. To fillter
-  them out use `prune` function.
+  of `:cluster_centers` is set to the number of samples in the dataset.
+  The artificial centers are filled with `:infinity` values. To filter
+  them out use the `prune` function.
 
   The algorithm has a time complexity of the order $O(N^2T)$, where $N$ is
   the number of samples and $T$ is the number of iterations until convergence.
@@ -91,7 +91,7 @@ defmodule Scholar.Cluster.AffinityPropagation do
 
   The function returns a struct with the following parameters:
 
-    * `:clusters_centers` - Cluster centers from the initial data.
+    * `:cluster_centers` - Cluster centers from the initial data.
 
     * `:cluster_centers_indices` - Indices of cluster centers.
 

diff --git a/lib/scholar/cluster/dbscan.ex b/lib/scholar/cluster/dbscan.ex
@@ -32,15 +32,17 @@ defmodule Scholar.Cluster.DBSCAN do
       type: :integer
     ],
     metric: [
-      type: {:custom, Scholar.Options, :metric, []},
-      default: {:minkowski, 2},
+      type: {:custom, Scholar.Neighbors.Utils, :pairwise_metric, []},
+      default: &Scholar.Metrics.Distance.pairwise_minkowski/2,
       doc: ~S"""
-      Name of the metric. Possible values:
+      The function that measures the pairwise distance between two points. Possible values:
 
       * `{:minkowski, p}` - Minkowski metric. By changing value of `p` parameter (a positive number or `:infinity`)
-        we can set Manhattan (`1`), Euclidean (`2`), Chebyshev (`:infinity`), or any arbitrary $L_p$ metric.
+      we can set Manhattan (`1`), Euclidean (`2`), Chebyshev (`:infinity`), or any arbitrary $L_p$ metric.
 
       * `:cosine` - Cosine metric.
+
+      * Anonymous function of arity 2 that takes two rank-2 tensors.
       """
     ],
     weights: [
@@ -96,17 +98,17 @@ defmodule Scholar.Cluster.DBSCAN do
     y_dummy = Nx.broadcast(Nx.tensor(0), {num_samples})
 
     neighbor_model =
-      Scholar.Neighbors.RadiusNearestNeighbors.fit(x, y_dummy,
+      Scholar.Neighbors.RadiusNNClassifier.fit(x, y_dummy,
         num_classes: 1,
         radius: opts[:eps],
         metric: opts[:metric]
       )
 
     {_dist, indices} =
-      Scholar.Neighbors.RadiusNearestNeighbors.radius_neighbors(neighbor_model, x)
+      Scholar.Neighbors.RadiusNNClassifier.radius_neighbors(neighbor_model, x)
 
-    n_neigbors = Nx.sum(indices * weights, axes: [1])
-    core_samples = n_neigbors >= opts[:min_samples]
+    n_neighbors = Nx.sum(indices * weights, axes: [1])
+    core_samples = n_neighbors >= opts[:min_samples]
     labels = dbscan_inner(core_samples, indices)
 
     %__MODULE__{

diff --git a/lib/scholar/cluster/hierarchical.ex b/lib/scholar/cluster/hierarchical.ex
@@ -158,8 +158,7 @@ defmodule Scholar.Cluster.Hierarchical do
 
     dendrogram_fun =
       case linkage do
-        # TODO: :centroid, :median
-        # TODO: :ward
+        # TODO: :centroid, :median, :ward
         l when l in [:average, :complete, :single, :weighted] ->
           &parallel_nearest_neighbor/3
       end

diff --git a/lib/scholar/cluster/k_means.ex b/lib/scholar/cluster/k_means.ex
@@ -2,7 +2,7 @@ defmodule Scholar.Cluster.KMeans do
   @moduledoc """
   K-Means Algorithm.
 
-  K-Means is simple clustering method that works iteratively [1]. In the first iteration,
+  K-Means is a simple clustering method that works iteratively [1]. In the first iteration,
   centroids are chosen randomly from input data. It turned out that some initializations
   are especially effective. In 2007 David Arthur and Sergei Vassilvitskii proposed initialization
   called k-means++ which speed up convergence of algorithm drastically [2]. After initialization, from each centroid