From 8c09f7fad193bdb853325ea618b63d2c80b144e0 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Fri, 16 Feb 2024 13:36:02 -0500
Subject: [PATCH 01/29] init split condition injection

---
 sklearn/tree/_splitter.pxd | 5 +++++
 sklearn/tree/_splitter.pyx | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index f1434f5d05cc9..3169a9198d3f1 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -19,6 +19,8 @@ from ._utils cimport UINT32_t
 from ._criterion cimport BaseCriterion, Criterion
 
 
+ctypedef bint (*SplitCondition)(Splitter*)
+
 cdef struct SplitRecord:
     # Data to track sample split
     intp_t feature         # Which feature to split on.
@@ -112,6 +114,9 @@ cdef class Splitter(BaseSplitter):
     cdef const cnp.int8_t[:] monotonic_cst
     cdef bint with_monotonic_cst
 
+    cdef SplitCondition[:] pre_split_conditions
+    cdef SplitCondition[:] post_split_conditions
+
     cdef int init(
         self,
         object X,
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 1f781e55350d2..2352862e67f48 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -155,6 +155,8 @@ cdef class Splitter(BaseSplitter):
         float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
+        SplitCondition[:] pre_split_conditions=[],
+        SplitCondition[:] post_split_conditions=[],
         *argv
     ):
         """
@@ -195,6 +197,9 @@ cdef class Splitter(BaseSplitter):
         self.monotonic_cst = monotonic_cst
         self.with_monotonic_cst = monotonic_cst is not None
 
+        self.pre_split_conditions = pre_split_conditions
+        self.post_split_conditions = post_split_conditions
+
     def __reduce__(self):
         return (type(self), (self.criterion,
                              self.max_features,

From ecfc9b1d1e6f89c476dc2231d9cda3a484c456e9 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Fri, 16 Feb 2024 14:50:27 -0500
Subject: [PATCH 02/29] wip

---
 sklearn/tree/_splitter.pxd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 3169a9198d3f1..04929e679b024 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -19,7 +19,7 @@ from ._utils cimport UINT32_t
 from ._criterion cimport BaseCriterion, Criterion
 
 
-ctypedef bint (*SplitCondition)(Splitter*)
+ctypedef bint (*SplitCondition)(Splitter splitter)
 
 cdef struct SplitRecord:
     # Data to track sample split

From 0c3d5c0f2a1ac6c8ec8ab9a7fa8bb1af8e721797 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Fri, 16 Feb 2024 15:11:51 -0500
Subject: [PATCH 03/29] wip

---
 sklearn/tree/_splitter.pxd | 4 ++--
 sklearn/tree/_splitter.pyx | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 04929e679b024..b8f8d9cfb19f4 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -114,8 +114,8 @@ cdef class Splitter(BaseSplitter):
     cdef const cnp.int8_t[:] monotonic_cst
     cdef bint with_monotonic_cst
 
-    cdef SplitCondition[:] pre_split_conditions
-    cdef SplitCondition[:] post_split_conditions
+    cdef SplitCondition[] pre_split_conditions
+    cdef SplitCondition[] post_split_conditions
 
     cdef int init(
         self,
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 2352862e67f48..beb0ebae3136d 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -155,8 +155,8 @@ cdef class Splitter(BaseSplitter):
         float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
-        SplitCondition[:] pre_split_conditions=[],
-        SplitCondition[:] post_split_conditions=[],
+        SplitCondition[] pre_split_conditions=[],
+        SplitCondition[] post_split_conditions=[],
         *argv
     ):
         """

From 5fd12a2c42db768aaffbd73801fe5e0a2b477089 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Tue, 20 Feb 2024 11:52:26 -0500
Subject: [PATCH 04/29] wip

---
 sklearn/tree/_splitter.pxd | 3 ---
 sklearn/tree/_splitter.pyx | 5 -----
 2 files changed, 8 deletions(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index b8f8d9cfb19f4..2e277e0b1d13f 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -114,9 +114,6 @@ cdef class Splitter(BaseSplitter):
     cdef const cnp.int8_t[:] monotonic_cst
     cdef bint with_monotonic_cst
 
-    cdef SplitCondition[] pre_split_conditions
-    cdef SplitCondition[] post_split_conditions
-
     cdef int init(
         self,
         object X,
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index beb0ebae3136d..1f781e55350d2 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -155,8 +155,6 @@ cdef class Splitter(BaseSplitter):
         float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
-        SplitCondition[] pre_split_conditions=[],
-        SplitCondition[] post_split_conditions=[],
         *argv
     ):
         """
@@ -197,9 +195,6 @@ cdef class Splitter(BaseSplitter):
         self.monotonic_cst = monotonic_cst
         self.with_monotonic_cst = monotonic_cst is not None
 
-        self.pre_split_conditions = pre_split_conditions
-        self.post_split_conditions = post_split_conditions
-
     def __reduce__(self):
         return (type(self), (self.criterion,
                              self.max_features,

From b593ee024ad932a93bbc8fb2797a54a981c35604 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Mon, 26 Feb 2024 19:09:10 -0500
Subject: [PATCH 05/29] injection progress

---
 sklearn/tree/_splitter.pxd |  9 ++++++++-
 sklearn/tree/_splitter.pyx | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 2e277e0b1d13f..3cd2d1dd3898a 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -19,7 +19,11 @@ from ._utils cimport UINT32_t
 from ._criterion cimport BaseCriterion, Criterion
 
 
-ctypedef bint (*SplitCondition)(Splitter splitter)
+ctypedef bint (*SplitCondition)(Splitter splitter) noexcept nogil
+
+cdef class SplitConditions:
+    cdef vector[SplitCondition] value
+
 
 cdef struct SplitRecord:
     # Data to track sample split
@@ -114,6 +118,9 @@ cdef class Splitter(BaseSplitter):
     cdef const cnp.int8_t[:] monotonic_cst
     cdef bint with_monotonic_cst
 
+    cdef public SplitConditions presplit_conditions
+    cdef public SplitConditions postsplit_conditions
+
     cdef int init(
         self,
         object X,
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 1f781e55350d2..260d571f71392 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -43,6 +43,23 @@ cdef float32_t FEATURE_THRESHOLD = 1e-7
 # in SparsePartitioner
 cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 
+cdef bint condition1(Splitter splitter) noexcept nogil:
+    cdef bint bar = splitter.n_samples > 0
+
+    return 1
+
+cdef class SplitConditions:
+    def __init__(self, n):
+        self.value.resize(n)
+
+def foo():
+    presplit_conditions = SplitConditions(2)
+    presplit_conditions.value[0] = condition1
+    presplit_conditions.value[1] = condition1
+
+    postsplit_conditions = SplitConditions(1)
+    postsplit_conditions = condition1
+
 cdef inline void _init_split(SplitRecord* self, intp_t start_pos) noexcept nogil:
     self.impurity_left = INFINITY
     self.impurity_right = INFINITY
@@ -155,6 +172,8 @@ cdef class Splitter(BaseSplitter):
         float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
+        SplitConditions presplit_conditions=None,
+        SplitConditions postsplit_conditions=None,
         *argv
     ):
         """
@@ -195,6 +214,9 @@ cdef class Splitter(BaseSplitter):
         self.monotonic_cst = monotonic_cst
         self.with_monotonic_cst = monotonic_cst is not None
 
+        self.presplit_conditions = presplit_conditions
+        self.postsplit_conditions = postsplit_conditions
+
     def __reduce__(self):
         return (type(self), (self.criterion,
                              self.max_features,
@@ -602,6 +624,11 @@ cdef inline intp_t node_split_best(
                     n_right = end_non_missing - current_split.pos + n_missing
                 if splitter.check_presplit_conditions(&current_split, n_missing, missing_go_to_left) == 1:
                     continue
+                
+                if splitter.presplit_conditions is not None:
+                    for condition in splitter.presplit_conditions.value:
+                        if condition(splitter):
+                            continue
 
                 criterion.update(current_split.pos)
 
@@ -620,6 +647,11 @@ cdef inline intp_t node_split_best(
                 # Reject if min_weight_leaf is not satisfied
                 if splitter.check_postsplit_conditions() == 1:
                     continue
+                
+                if splitter.postsplit_conditions is not None:
+                    for condition in splitter.postsplit_conditions.value:
+                        if condition(splitter):
+                            continue
 
                 current_proxy_improvement = criterion.proxy_impurity_improvement()
 

From 180fac32308195301e80d574b9b026fc66fece8b Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Tue, 27 Feb 2024 13:51:32 -0500
Subject: [PATCH 06/29] injection progress

---
 sklearn/tree/_splitter.pyx | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 260d571f71392..fd65568963a43 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -44,9 +44,7 @@ cdef float32_t FEATURE_THRESHOLD = 1e-7
 cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 
 cdef bint condition1(Splitter splitter) noexcept nogil:
-    cdef bint bar = splitter.n_samples > 0
-
-    return 1
+    return splitter.n_samples > 0
 
 cdef class SplitConditions:
     def __init__(self, n):
@@ -58,7 +56,7 @@ def foo():
     presplit_conditions.value[1] = condition1
 
     postsplit_conditions = SplitConditions(1)
-    postsplit_conditions = condition1
+    postsplit_conditions.value[0] = condition1
 
 cdef inline void _init_split(SplitRecord* self, intp_t start_pos) noexcept nogil:
     self.impurity_left = INFINITY

From c207c3e220f6bf7bb699660da9a28a96834f01bc Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Tue, 27 Feb 2024 14:45:32 -0500
Subject: [PATCH 07/29] split injection refactoring

---
 sklearn/tree/_splitter.pxd |  7 ++-----
 sklearn/tree/_splitter.pyx | 34 ++++++++++++++--------------------
 2 files changed, 16 insertions(+), 25 deletions(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 3cd2d1dd3898a..37e3554f36dd4 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -21,9 +21,6 @@ from ._criterion cimport BaseCriterion, Criterion
 
 ctypedef bint (*SplitCondition)(Splitter splitter) noexcept nogil
 
-cdef class SplitConditions:
-    cdef vector[SplitCondition] value
-
 
 cdef struct SplitRecord:
     # Data to track sample split
@@ -118,8 +115,8 @@ cdef class Splitter(BaseSplitter):
     cdef const cnp.int8_t[:] monotonic_cst
     cdef bint with_monotonic_cst
 
-    cdef public SplitConditions presplit_conditions
-    cdef public SplitConditions postsplit_conditions
+    cdef vector[SplitCondition] presplit_conditions
+    cdef vector[SplitCondition] postsplit_conditions
 
     cdef int init(
         self,
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index fd65568963a43..92c7a082283fe 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -46,17 +46,17 @@ cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 cdef bint condition1(Splitter splitter) noexcept nogil:
     return splitter.n_samples > 0
 
-cdef class SplitConditions:
-    def __init__(self, n):
-        self.value.resize(n)
+cdef bint condition2(Splitter splitter) noexcept nogil:
+    return splitter.n_samples < 10
 
 def foo():
-    presplit_conditions = SplitConditions(2)
-    presplit_conditions.value[0] = condition1
-    presplit_conditions.value[1] = condition1
+    splitter = Splitter()
+
+    splitter.presplit_conditions.push_back(condition1)
+    splitter.presplit_conditions.push_back(condition2)
+
+    splitter.postsplit_conditions.push_back(condition1)
 
-    postsplit_conditions = SplitConditions(1)
-    postsplit_conditions.value[0] = condition1
 
 cdef inline void _init_split(SplitRecord* self, intp_t start_pos) noexcept nogil:
     self.impurity_left = INFINITY
@@ -170,8 +170,6 @@ cdef class Splitter(BaseSplitter):
         float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
-        SplitConditions presplit_conditions=None,
-        SplitConditions postsplit_conditions=None,
         *argv
     ):
         """
@@ -212,8 +210,6 @@ cdef class Splitter(BaseSplitter):
         self.monotonic_cst = monotonic_cst
         self.with_monotonic_cst = monotonic_cst is not None
 
-        self.presplit_conditions = presplit_conditions
-        self.postsplit_conditions = postsplit_conditions
 
     def __reduce__(self):
         return (type(self), (self.criterion,
@@ -623,10 +619,9 @@ cdef inline intp_t node_split_best(
                 if splitter.check_presplit_conditions(&current_split, n_missing, missing_go_to_left) == 1:
                     continue
                 
-                if splitter.presplit_conditions is not None:
-                    for condition in splitter.presplit_conditions.value:
-                        if condition(splitter):
-                            continue
+                for condition in splitter.presplit_conditions:
+                    if condition(splitter):
+                        continue
 
                 criterion.update(current_split.pos)
 
@@ -646,10 +641,9 @@ cdef inline intp_t node_split_best(
                 if splitter.check_postsplit_conditions() == 1:
                     continue
                 
-                if splitter.postsplit_conditions is not None:
-                    for condition in splitter.postsplit_conditions.value:
-                        if condition(splitter):
-                            continue
+                for condition in splitter.postsplit_conditions:
+                    if condition(splitter):
+                        continue
 
                 current_proxy_improvement = criterion.proxy_impurity_improvement()
 

From 7cc71c10c49265cf581efb1637b17af142bb7d29 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Thu, 29 Feb 2024 11:04:19 -0800
Subject: [PATCH 08/29] added condition parameter passthrough prototype

---
 sklearn/tree/_splitter.pxd | 25 ++++++++++++++++++++++---
 sklearn/tree/_splitter.pyx | 33 ++++++++++++++++++++-------------
 2 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 37e3554f36dd4..9eec9dd9afad8 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -19,7 +19,26 @@ from ._utils cimport UINT32_t
 from ._criterion cimport BaseCriterion, Criterion
 
 
-ctypedef bint (*SplitCondition)(Splitter splitter) noexcept nogil
+ctypedef void *SplitConditionParameters
+ctypedef bint (*SplitCondition)(Splitter splitter, void* split_condition_parameters) noexcept nogil
+
+cdef struct SplitConditionTuple:
+    SplitCondition f
+    SplitConditionParameters p
+
+cdef struct DummyParameters:
+    int dummy
+
+cdef struct Condition1Parameters:
+    int some_number
+
+cdef inline bint condition1(Splitter splitter, void* split_condition_parameters) noexcept nogil:
+    cdef Condition1Parameters* p = <Condition1Parameters*>split_condition_parameters
+
+    return splitter.n_samples > 0 and p.some_number < 1000
+
+cdef inline bint condition2(Splitter splitter, void* split_condition_parameters) noexcept nogil:
+    return splitter.n_samples < 10
 
 
 cdef struct SplitRecord:
@@ -115,8 +134,8 @@ cdef class Splitter(BaseSplitter):
     cdef const cnp.int8_t[:] monotonic_cst
     cdef bint with_monotonic_cst
 
-    cdef vector[SplitCondition] presplit_conditions
-    cdef vector[SplitCondition] postsplit_conditions
+    cdef vector[SplitConditionTuple] presplit_conditions
+    cdef vector[SplitConditionTuple] postsplit_conditions
 
     cdef int init(
         self,
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 92c7a082283fe..cc047ac605749 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -19,7 +19,7 @@
 
 from cython cimport final
 from libc.math cimport isnan
-from libc.stdlib cimport qsort
+from libc.stdlib cimport qsort, malloc, free
 from libc.string cimport memcpy
 cimport numpy as cnp
 
@@ -43,19 +43,26 @@ cdef float32_t FEATURE_THRESHOLD = 1e-7
 # in SparsePartitioner
 cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 
-cdef bint condition1(Splitter splitter) noexcept nogil:
-    return splitter.n_samples > 0
+from ._tree cimport Tree
+cdef class FooTree(Tree):
+    cdef Condition1Parameters* c1p
+    cdef DummyParameters* dummy_params
 
-cdef bint condition2(Splitter splitter) noexcept nogil:
-    return splitter.n_samples < 10
+    def __init__(self):
+        splitter = Splitter()
+        self.c1p = <Condition1Parameters*>malloc(sizeof(Condition1Parameters))
+        self.c1p.some_number = 5
 
-def foo():
-    splitter = Splitter()
+        self.dummy_params = <DummyParameters*>malloc(sizeof(DummyParameters))
 
-    splitter.presplit_conditions.push_back(condition1)
-    splitter.presplit_conditions.push_back(condition2)
-
-    splitter.postsplit_conditions.push_back(condition1)
+        splitter.presplit_conditions.push_back(SplitConditionTuple(condition1, self.c1p))
+        splitter.presplit_conditions.push_back(SplitConditionTuple(condition2, self.dummy_params))
+    
+    def __dealloc__(self):
+        if self.c1p is not NULL:
+            free(self.c1p)
+        if self.dummy_params is not NULL:
+            free(self.dummy_params)
 
 
 cdef inline void _init_split(SplitRecord* self, intp_t start_pos) noexcept nogil:
@@ -620,7 +627,7 @@ cdef inline intp_t node_split_best(
                     continue
                 
                 for condition in splitter.presplit_conditions:
-                    if condition(splitter):
+                    if not condition.f(splitter, condition.p):
                         continue
 
                 criterion.update(current_split.pos)
@@ -642,7 +649,7 @@ cdef inline intp_t node_split_best(
                     continue
                 
                 for condition in splitter.postsplit_conditions:
-                    if condition(splitter):
+                    if not condition.f(splitter, condition.p):
                         continue
 
                 current_proxy_improvement = criterion.proxy_impurity_improvement()

From 2470d492c6cf52b5cad1bbeec7e272e56c4470cd Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Thu, 29 Feb 2024 11:32:42 -0800
Subject: [PATCH 09/29] some tidying

---
 sklearn/tree/_splitter.pxd | 21 ++++++++++++++++++---
 sklearn/tree/_splitter.pyx | 15 +++++++--------
 2 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 9eec9dd9afad8..6b20fec2a56dc 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -13,6 +13,7 @@
 cimport numpy as cnp
 
 from libcpp.vector cimport vector
+from libc.stdlib cimport malloc
 
 from ..utils._typedefs cimport float32_t, float64_t, intp_t, int32_t
 from ._utils cimport UINT32_t
@@ -20,7 +21,7 @@ from ._criterion cimport BaseCriterion, Criterion
 
 
 ctypedef void *SplitConditionParameters
-ctypedef bint (*SplitCondition)(Splitter splitter, void* split_condition_parameters) noexcept nogil
+ctypedef bint (*SplitCondition)(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil
 
 cdef struct SplitConditionTuple:
     SplitCondition f
@@ -29,15 +30,29 @@ cdef struct SplitConditionTuple:
 cdef struct DummyParameters:
     int dummy
 
+cdef inline DummyParameters* create_dummy_parameters(int dummy):
+    cdef DummyParameters* result = <DummyParameters*>malloc(sizeof(DummyParameters))
+    if result == NULL:
+        return NULL
+    result.dummy = dummy
+    return result
+
 cdef struct Condition1Parameters:
     int some_number
 
-cdef inline bint condition1(Splitter splitter, void* split_condition_parameters) noexcept nogil:
+cdef inline Condition1Parameters* create_condition1_parameters(int some_number):
+    cdef Condition1Parameters* result = <Condition1Parameters*>malloc(sizeof(Condition1Parameters))
+    if result == NULL:
+        return NULL
+    result.some_number = some_number
+    return result
+
+cdef inline bint condition1(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil:
     cdef Condition1Parameters* p = <Condition1Parameters*>split_condition_parameters
 
     return splitter.n_samples > 0 and p.some_number < 1000
 
-cdef inline bint condition2(Splitter splitter, void* split_condition_parameters) noexcept nogil:
+cdef inline bint condition2(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil:
     return splitter.n_samples < 10
 
 
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index cc047ac605749..d6d191462bff3 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -19,7 +19,7 @@
 
 from cython cimport final
 from libc.math cimport isnan
-from libc.stdlib cimport qsort, malloc, free
+from libc.stdlib cimport qsort, free
 from libc.string cimport memcpy
 cimport numpy as cnp
 
@@ -45,18 +45,17 @@ cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 
 from ._tree cimport Tree
 cdef class FooTree(Tree):
+    cdef Splitter splitter
     cdef Condition1Parameters* c1p
     cdef DummyParameters* dummy_params
 
     def __init__(self):
-        splitter = Splitter()
-        self.c1p = <Condition1Parameters*>malloc(sizeof(Condition1Parameters))
-        self.c1p.some_number = 5
+        self.c1p = create_condition1_parameters(5)
+        self.dummy_params = create_dummy_parameters(0)
 
-        self.dummy_params = <DummyParameters*>malloc(sizeof(DummyParameters))
-
-        splitter.presplit_conditions.push_back(SplitConditionTuple(condition1, self.c1p))
-        splitter.presplit_conditions.push_back(SplitConditionTuple(condition2, self.dummy_params))
+        self.splitter = Splitter()
+        self.splitter.presplit_conditions.push_back(SplitConditionTuple(condition1, self.c1p))
+        self.splitter.presplit_conditions.push_back(SplitConditionTuple(condition2, self.dummy_params))
     
     def __dealloc__(self):
         if self.c1p is not NULL:

From ee3399faf3e2d01f0ccf05e3b7083fe7cbd287c6 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Thu, 29 Feb 2024 12:45:48 -0800
Subject: [PATCH 10/29] more tidying

---
 sklearn/tree/_splitter.pxd | 30 ++++++++++--------------------
 sklearn/tree/_splitter.pyx | 16 ++++++----------
 2 files changed, 16 insertions(+), 30 deletions(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 6b20fec2a56dc..1620d744d75c0 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -27,33 +27,23 @@ cdef struct SplitConditionTuple:
     SplitCondition f
     SplitConditionParameters p
 
-cdef struct DummyParameters:
-    int dummy
-
-cdef inline DummyParameters* create_dummy_parameters(int dummy):
-    cdef DummyParameters* result = <DummyParameters*>malloc(sizeof(DummyParameters))
-    if result == NULL:
-        return NULL
-    result.dummy = dummy
-    return result
+cdef inline bint has_data_condition(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil:
+    return splitter.n_samples < 10
 
-cdef struct Condition1Parameters:
-    int some_number
+cdef struct AlphaRegularityParameters:
+    float64_t alpha
 
-cdef inline Condition1Parameters* create_condition1_parameters(int some_number):
-    cdef Condition1Parameters* result = <Condition1Parameters*>malloc(sizeof(Condition1Parameters))
+cdef inline AlphaRegularityParameters* create_alpha_regularity_parameters(float64_t alpha):
+    cdef AlphaRegularityParameters* result = <AlphaRegularityParameters*>malloc(sizeof(AlphaRegularityParameters))
     if result == NULL:
         return NULL
-    result.some_number = some_number
+    result.alpha = alpha
     return result
 
-cdef inline bint condition1(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil:
-    cdef Condition1Parameters* p = <Condition1Parameters*>split_condition_parameters
-
-    return splitter.n_samples > 0 and p.some_number < 1000
+cdef inline bint alpha_regularity_condition(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil:
+    cdef AlphaRegularityParameters* p = <AlphaRegularityParameters*>split_condition_parameters
 
-cdef inline bint condition2(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil:
-    return splitter.n_samples < 10
+    return 1
 
 
 cdef struct SplitRecord:
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index d6d191462bff3..40c20dad96042 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -46,22 +46,18 @@ cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 from ._tree cimport Tree
 cdef class FooTree(Tree):
     cdef Splitter splitter
-    cdef Condition1Parameters* c1p
-    cdef DummyParameters* dummy_params
+    cdef AlphaRegularityParameters* p_alpha
 
     def __init__(self):
-        self.c1p = create_condition1_parameters(5)
-        self.dummy_params = create_dummy_parameters(0)
+        self.p_alpha = create_alpha_regularity_parameters(0.2)
 
         self.splitter = Splitter()
-        self.splitter.presplit_conditions.push_back(SplitConditionTuple(condition1, self.c1p))
-        self.splitter.presplit_conditions.push_back(SplitConditionTuple(condition2, self.dummy_params))
+        self.splitter.presplit_conditions.push_back(SplitConditionTuple(alpha_regularity_condition, self.p_alpha))
+        self.splitter.presplit_conditions.push_back(SplitConditionTuple(has_data_condition, NULL))
     
     def __dealloc__(self):
-        if self.c1p is not NULL:
-            free(self.c1p)
-        if self.dummy_params is not NULL:
-            free(self.dummy_params)
+        if self.p_alpha is not NULL:
+            free(self.p_alpha)
 
 
 cdef inline void _init_split(SplitRecord* self, intp_t start_pos) noexcept nogil:

From a079e4fdac4f24367686bb1398dcfa6bc2d7d115 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Sat, 9 Mar 2024 22:12:39 -0500
Subject: [PATCH 11/29] splitter injection refactoring

---
 sklearn/tree/_splitter.pxd | 25 +++---------
 sklearn/tree/_splitter.pyx | 80 ++++++++++++++++++++++++++++++--------
 2 files changed, 68 insertions(+), 37 deletions(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 1620d744d75c0..f552101ae40b2 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -20,30 +20,15 @@ from ._utils cimport UINT32_t
 from ._criterion cimport BaseCriterion, Criterion
 
 
-ctypedef void *SplitConditionParameters
-ctypedef bint (*SplitCondition)(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil
+ctypedef void* SplitConditionParameters
+ctypedef bint (*SplitConditionFunction)(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil
 
 cdef struct SplitConditionTuple:
-    SplitCondition f
+    SplitConditionFunction f
     SplitConditionParameters p
 
-cdef inline bint has_data_condition(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil:
-    return splitter.n_samples < 10
-
-cdef struct AlphaRegularityParameters:
-    float64_t alpha
-
-cdef inline AlphaRegularityParameters* create_alpha_regularity_parameters(float64_t alpha):
-    cdef AlphaRegularityParameters* result = <AlphaRegularityParameters*>malloc(sizeof(AlphaRegularityParameters))
-    if result == NULL:
-        return NULL
-    result.alpha = alpha
-    return result
-
-cdef inline bint alpha_regularity_condition(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil:
-    cdef AlphaRegularityParameters* p = <AlphaRegularityParameters*>split_condition_parameters
-
-    return 1
+cdef class SplitCondition:
+    cdef SplitConditionTuple t
 
 
 cdef struct SplitRecord:
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 40c20dad96042..22dbb995dd3f6 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -43,21 +43,56 @@ cdef float32_t FEATURE_THRESHOLD = 1e-7
 # in SparsePartitioner
 cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 
+
+cdef struct HasDataParameters:
+    int min_samples
+
+cdef bint has_data_condition(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil:
+    cdef HasDataParameters* p = <HasDataParameters*>split_condition_parameters
+    return splitter.n_samples >= p.min_samples
+
+cdef class HasDataCondition(SplitCondition):
+    def __cinit__(self, int min_samples):
+        self.t.f = has_data_condition
+        self.t.p = malloc(sizeof(HasDataParameters))
+        (<HasDataParameters*>self.t.p).min_samples = min_samples
+    
+    def __dealloc__(self):
+        if self.t.p is not NULL:
+            free(self.t.p)
+        
+        super.__dealloc__(self)
+
+cdef struct AlphaRegularityParameters:
+    float64_t alpha
+
+cdef bint alpha_regularity_condition(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil:
+    cdef AlphaRegularityParameters* p = <AlphaRegularityParameters*>split_condition_parameters
+
+    return 1
+
+cdef class AlphaRegularityCondition(SplitCondition):
+    def __cinit__(self, float64_t alpha):
+        self.t.f = alpha_regularity_condition
+        self.t.p = malloc(sizeof(AlphaRegularityParameters))
+        (<AlphaRegularityParameters*>self.t.p).alpha = alpha
+    
+    def __dealloc__(self):
+        if self.t.p is not NULL:
+            free(self.t.p)
+        
+        super.__dealloc__(self)
+
+
 from ._tree cimport Tree
 cdef class FooTree(Tree):
     cdef Splitter splitter
-    cdef AlphaRegularityParameters* p_alpha
 
     def __init__(self):
-        self.p_alpha = create_alpha_regularity_parameters(0.2)
-
-        self.splitter = Splitter()
-        self.splitter.presplit_conditions.push_back(SplitConditionTuple(alpha_regularity_condition, self.p_alpha))
-        self.splitter.presplit_conditions.push_back(SplitConditionTuple(has_data_condition, NULL))
-    
-    def __dealloc__(self):
-        if self.p_alpha is not NULL:
-            free(self.p_alpha)
+        self.splitter = Splitter(
+            presplit_conditions = [HasDataCondition(10)],
+            postsplit_conditions = [AlphaRegularityCondition(0.1)],
+        )
 
 
 cdef inline void _init_split(SplitRecord* self, intp_t start_pos) noexcept nogil:
@@ -172,6 +207,8 @@ cdef class Splitter(BaseSplitter):
         float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
+        SplitCondition[:] presplit_conditions,
+        SplitCondition[:] postsplit_conditions,
         *argv
     ):
         """
@@ -212,6 +249,14 @@ cdef class Splitter(BaseSplitter):
         self.monotonic_cst = monotonic_cst
         self.with_monotonic_cst = monotonic_cst is not None
 
+        if presplit_conditions is not None:
+            for condition in presplit_conditions:
+                self.presplit_conditions.push_back((<SplitCondition>condition).t)
+        
+        if postsplit_conditions is not None:
+            for condition in postsplit_conditions:
+                self.postsplit_conditions.push_back((<SplitCondition>condition).t)
+
 
     def __reduce__(self):
         return (type(self), (self.criterion,
@@ -618,13 +663,14 @@ cdef inline intp_t node_split_best(
                 else:
                     n_left = current_split.pos - splitter.start
                     n_right = end_non_missing - current_split.pos + n_missing
-                if splitter.check_presplit_conditions(&current_split, n_missing, missing_go_to_left) == 1:
-                    continue
-                
+
                 for condition in splitter.presplit_conditions:
                     if not condition.f(splitter, condition.p):
                         continue
 
+                if splitter.check_presplit_conditions(&current_split, n_missing, missing_go_to_left) == 1:
+                    continue
+                
                 criterion.update(current_split.pos)
 
                 # Reject if monotonicity constraints are not satisfied
@@ -639,14 +685,14 @@ cdef inline intp_t node_split_best(
                 ):
                     continue
 
-                # Reject if min_weight_leaf is not satisfied
-                if splitter.check_postsplit_conditions() == 1:
-                    continue
-                
                 for condition in splitter.postsplit_conditions:
                     if not condition.f(splitter, condition.p):
                         continue
 
+                # Reject if min_weight_leaf is not satisfied
+                if splitter.check_postsplit_conditions() == 1:
+                    continue
+                
                 current_proxy_improvement = criterion.proxy_impurity_improvement()
 
                 if current_proxy_improvement > best_proxy_improvement:

From 5397b666fe21025c113d30e8eb39c50556b0fca7 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Fri, 15 Mar 2024 17:46:16 -0400
Subject: [PATCH 12/29] cython injection due diligence, converted min_sample
 and monotonic_cst to injections

---
 sklearn/tree/_splitter.pxd |  22 ++++-
 sklearn/tree/_splitter.pyx | 191 +++++++++++++++++++++++++++++--------
 2 files changed, 173 insertions(+), 40 deletions(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index f552101ae40b2..9a400f3954b13 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -6,6 +6,7 @@
 #          Jacob Schreiber <jmschreiber91@gmail.com>
 #          Adam Li <adam2392@gmail.com>
 #          Jong Shin <jshinm@gmail.com>
+#          Samuel Carliles <scarlil1@jhu.edu>
 #
 # License: BSD 3 clause
 
@@ -20,8 +21,27 @@ from ._utils cimport UINT32_t
 from ._criterion cimport BaseCriterion, Criterion
 
 
+# NICE IDEAS THAT DON'T APPEAR POSSIBLE
+# - accessing elements of a memory view of cython extension types in a nogil block/function
+# - storing cython extension types in cpp vectors
+#
+# despite the fact that we can access scalar extension type properties in such a context,
+# as for instance node_split_best does with Criterion and Partition,
+# and we can access the elements of a memory view of primitive types in such a context
+#
+# SO WHERE DOES THAT LEAVE US
+# - we can transform these into cpp vectors of structs
+#   and with some minor casting irritations everything else works ok
 ctypedef void* SplitConditionParameters
-ctypedef bint (*SplitConditionFunction)(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil
+ctypedef bint (*SplitConditionFunction)(
+    Splitter splitter,
+    SplitRecord* current_split,
+    intp_t n_missing,
+    bint missing_go_to_left,
+    float64_t lower_bound,
+    float64_t upper_bound,
+    SplitConditionParameters split_condition_parameters
+) noexcept nogil
 
 cdef struct SplitConditionTuple:
     SplitConditionFunction f
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 22dbb995dd3f6..bb21548ef4b31 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -44,10 +44,99 @@ cdef float32_t FEATURE_THRESHOLD = 1e-7
 cdef float32_t EXTRACT_NNZ_SWITCH = 0.1
 
 
+cdef bint min_sample_leaf_condition(
+    Splitter splitter,
+    SplitRecord* current_split,
+    intp_t n_missing,
+    bint missing_go_to_left,
+    float64_t lower_bound,
+    float64_t upper_bound,
+    SplitConditionParameters split_condition_parameters
+) noexcept nogil:
+    cdef intp_t min_samples_leaf = splitter.min_samples_leaf
+    cdef intp_t end_non_missing = splitter.end - n_missing
+    cdef intp_t n_left, n_right
+
+    if missing_go_to_left:
+        n_left = current_split.pos - splitter.start + n_missing
+        n_right = end_non_missing - current_split.pos
+    else:
+        n_left = current_split.pos - splitter.start
+        n_right = end_non_missing - current_split.pos + n_missing
+
+    # Reject if min_samples_leaf is not guaranteed
+    if n_left < min_samples_leaf or n_right < min_samples_leaf:
+        return 0
+
+    return 1
+
+cdef class MinSamplesLeafCondition(SplitCondition):
+    def __cinit__(self):
+        self.t.f = min_sample_leaf_condition
+        self.t.p = NULL # min_samples is stored in splitter, which is already passed to f
+
+cdef bint min_weight_leaf_condition(
+    Splitter splitter,
+    SplitRecord* current_split,
+    intp_t n_missing,
+    bint missing_go_to_left,
+    float64_t lower_bound,
+    float64_t upper_bound,
+    SplitConditionParameters split_condition_parameters
+) noexcept nogil:
+    cdef float64_t min_weight_leaf = splitter.min_weight_leaf
+
+    # Reject if min_weight_leaf is not satisfied
+    if ((splitter.criterion.weighted_n_left < min_weight_leaf) or
+            (splitter.criterion.weighted_n_right < min_weight_leaf)):
+        return 0
+
+    return 1
+
+cdef class MinWeightLeafCondition(SplitCondition):
+    def __cinit__(self):
+        self.t.f = min_weight_leaf_condition
+        self.t.p = NULL # min_weight_leaf is stored in splitter, which is already passed to f
+
+cdef bint monotonic_constraint_condition(
+    Splitter splitter,
+    SplitRecord* current_split,
+    intp_t n_missing,
+    bint missing_go_to_left,
+    float64_t lower_bound,
+    float64_t upper_bound,
+    SplitConditionParameters split_condition_parameters
+) noexcept nogil:
+    if (
+        splitter.with_monotonic_cst and
+        splitter.monotonic_cst[current_split.feature] != 0 and
+        not splitter.criterion.check_monotonicity(
+            splitter.monotonic_cst[current_split.feature],
+            lower_bound,
+            upper_bound,
+        )
+    ):
+        return 0
+    
+    return 1
+
+cdef class MonotonicConstraintCondition(SplitCondition):
+    def __cinit__(self):
+        self.t.f = monotonic_constraint_condition
+        self.t.p = NULL
+
 cdef struct HasDataParameters:
     int min_samples
 
-cdef bint has_data_condition(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil:
+cdef bint has_data_condition(
+    Splitter splitter,
+    SplitRecord* current_split,
+    intp_t n_missing,
+    bint missing_go_to_left,
+    float64_t lower_bound,
+    float64_t upper_bound,
+    SplitConditionParameters split_condition_parameters
+) noexcept nogil:
     cdef HasDataParameters* p = <HasDataParameters*>split_condition_parameters
     return splitter.n_samples >= p.min_samples
 
@@ -66,7 +155,15 @@ cdef class HasDataCondition(SplitCondition):
 cdef struct AlphaRegularityParameters:
     float64_t alpha
 
-cdef bint alpha_regularity_condition(Splitter splitter, SplitConditionParameters split_condition_parameters) noexcept nogil:
+cdef bint alpha_regularity_condition(
+    Splitter splitter,
+    SplitRecord* current_split,
+    intp_t n_missing,
+    bint missing_go_to_left,
+    float64_t lower_bound,
+    float64_t upper_bound,
+    SplitConditionParameters split_condition_parameters
+) noexcept nogil:
     cdef AlphaRegularityParameters* p = <AlphaRegularityParameters*>split_condition_parameters
 
     return 1
@@ -249,14 +346,24 @@ cdef class Splitter(BaseSplitter):
         self.monotonic_cst = monotonic_cst
         self.with_monotonic_cst = monotonic_cst is not None
 
+        self.min_samples_leaf_condition = MinSamplesLeafCondition()
+        self.min_weight_leaf_condition = MinWeightLeafCondition()
+
+        self.presplit_conditions.push_back((<SplitCondition>self.min_samples_leaf_condition).t)
         if presplit_conditions is not None:
             for condition in presplit_conditions:
                 self.presplit_conditions.push_back((<SplitCondition>condition).t)
         
+        self.postsplit_conditions.push_back((<SplitCondition>self.min_weight_leaf_condition).t)
         if postsplit_conditions is not None:
             for condition in postsplit_conditions:
                 self.postsplit_conditions.push_back((<SplitCondition>condition).t)
 
+        if(self.with_monotonic_cst):
+            self.monotonic_constraint_condition = MonotonicConstraintCondition()
+            self.presplit_conditions.push_back((<SplitCondition>self.monotonic_constraint_condition).t)
+            self.postsplit_conditions.push_back((<SplitCondition>self.monotonic_constraint_condition).t)
+
 
     def __reduce__(self):
         return (type(self), (self.criterion,
@@ -644,54 +751,60 @@ cdef inline intp_t node_split_best(
 
                 current_split.pos = p
 
-                # Reject if monotonicity constraints are not satisfied
-                if (
-                    with_monotonic_cst and
-                    monotonic_cst[current_split.feature] != 0 and
-                    not criterion.check_monotonicity(
-                        monotonic_cst[current_split.feature],
-                        lower_bound,
-                        upper_bound,
-                    )
-                ):
-                    continue
-
-                # Reject if min_samples_leaf is not guaranteed
-                if missing_go_to_left:
-                    n_left = current_split.pos - splitter.start + n_missing
-                    n_right = end_non_missing - current_split.pos
-                else:
-                    n_left = current_split.pos - splitter.start
-                    n_right = end_non_missing - current_split.pos + n_missing
+                # # Reject if monotonicity constraints are not satisfied
+                # if (
+                #     with_monotonic_cst and
+                #     monotonic_cst[current_split.feature] != 0 and
+                #     not criterion.check_monotonicity(
+                #         monotonic_cst[current_split.feature],
+                #         lower_bound,
+                #         upper_bound,
+                #     )
+                # ):
+                #     continue
+
+                # # Reject if min_samples_leaf is not guaranteed
+                # if missing_go_to_left:
+                #     n_left = current_split.pos - splitter.start + n_missing
+                #     n_right = end_non_missing - current_split.pos
+                # else:
+                #     n_left = current_split.pos - splitter.start
+                #     n_right = end_non_missing - current_split.pos + n_missing
 
                 for condition in splitter.presplit_conditions:
-                    if not condition.f(splitter, condition.p):
+                    if not condition.f(
+                        splitter, &current_split, n_missing, missing_go_to_left,
+                        lower_bound, upper_bound, condition.p
+                    ):
                         continue
 
-                if splitter.check_presplit_conditions(&current_split, n_missing, missing_go_to_left) == 1:
-                    continue
+                # if splitter.check_presplit_conditions(&current_split, n_missing, missing_go_to_left) == 1:
+                #     continue
                 
                 criterion.update(current_split.pos)
 
-                # Reject if monotonicity constraints are not satisfied
-                if (
-                    with_monotonic_cst and
-                    monotonic_cst[current_split.feature] != 0 and
-                    not criterion.check_monotonicity(
-                        monotonic_cst[current_split.feature],
-                        lower_bound,
-                        upper_bound,
-                    )
-                ):
-                    continue
+                # # Reject if monotonicity constraints are not satisfied
+                # if (
+                #     with_monotonic_cst and
+                #     monotonic_cst[current_split.feature] != 0 and
+                #     not criterion.check_monotonicity(
+                #         monotonic_cst[current_split.feature],
+                #         lower_bound,
+                #         upper_bound,
+                #     )
+                # ):
+                #     continue
 
                 for condition in splitter.postsplit_conditions:
-                    if not condition.f(splitter, condition.p):
+                    if not condition.f(
+                        splitter, &current_split, n_missing, missing_go_to_left,
+                        lower_bound, upper_bound, condition.p
+                    ):
                         continue
 
-                # Reject if min_weight_leaf is not satisfied
-                if splitter.check_postsplit_conditions() == 1:
-                    continue
+                # # Reject if min_weight_leaf is not satisfied
+                # if splitter.check_postsplit_conditions() == 1:
+                #     continue
                 
                 current_proxy_improvement = criterion.proxy_impurity_improvement()
 

From 44f1d570fd0ba0503737c3f705e83f2ec7b8836a Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Mon, 18 Mar 2024 14:53:58 -0400
Subject: [PATCH 13/29] tree tests pass huzzah!

---
 sklearn/tree/_splitter.pxd |  4 ++++
 sklearn/tree/_splitter.pyx | 36 ++++++++++++++++++++++++------------
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 9a400f3954b13..0edd4eb40231c 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -144,6 +144,10 @@ cdef class Splitter(BaseSplitter):
     cdef const cnp.int8_t[:] monotonic_cst
     cdef bint with_monotonic_cst
 
+    cdef SplitCondition min_samples_leaf_condition
+    cdef SplitCondition min_weight_leaf_condition
+    cdef SplitCondition monotonic_constraint_condition
+
     cdef vector[SplitConditionTuple] presplit_conditions
     cdef vector[SplitConditionTuple] postsplit_conditions
 
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index bb21548ef4b31..983a6f89b4a43 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -66,9 +66,9 @@ cdef bint min_sample_leaf_condition(
 
     # Reject if min_samples_leaf is not guaranteed
     if n_left < min_samples_leaf or n_right < min_samples_leaf:
-        return 0
+        return False
 
-    return 1
+    return True
 
 cdef class MinSamplesLeafCondition(SplitCondition):
     def __cinit__(self):
@@ -89,9 +89,9 @@ cdef bint min_weight_leaf_condition(
     # Reject if min_weight_leaf is not satisfied
     if ((splitter.criterion.weighted_n_left < min_weight_leaf) or
             (splitter.criterion.weighted_n_right < min_weight_leaf)):
-        return 0
+        return False
 
-    return 1
+    return True
 
 cdef class MinWeightLeafCondition(SplitCondition):
     def __cinit__(self):
@@ -116,9 +116,9 @@ cdef bint monotonic_constraint_condition(
             upper_bound,
         )
     ):
-        return 0
+        return False
     
-    return 1
+    return True
 
 cdef class MonotonicConstraintCondition(SplitCondition):
     def __cinit__(self):
@@ -166,7 +166,7 @@ cdef bint alpha_regularity_condition(
 ) noexcept nogil:
     cdef AlphaRegularityParameters* p = <AlphaRegularityParameters*>split_condition_parameters
 
-    return 1
+    return True
 
 cdef class AlphaRegularityCondition(SplitCondition):
     def __cinit__(self, float64_t alpha):
@@ -304,8 +304,8 @@ cdef class Splitter(BaseSplitter):
         float64_t min_weight_leaf,
         object random_state,
         const cnp.int8_t[:] monotonic_cst,
-        SplitCondition[:] presplit_conditions,
-        SplitCondition[:] postsplit_conditions,
+        SplitCondition[:] presplit_conditions = None,
+        SplitCondition[:] postsplit_conditions = None,
         *argv
     ):
         """
@@ -657,6 +657,8 @@ cdef inline intp_t node_split_best(
     # n_total_constants = n_known_constants + n_found_constants
     cdef intp_t n_total_constants = n_known_constants
 
+    cdef bint conditions_hold = True
+
     _init_split(&best_split, end)
 
     partitioner.init_node_split(start, end)
@@ -771,12 +773,17 @@ cdef inline intp_t node_split_best(
                 #     n_left = current_split.pos - splitter.start
                 #     n_right = end_non_missing - current_split.pos + n_missing
 
+                conditions_hold = True
                 for condition in splitter.presplit_conditions:
                     if not condition.f(
                         splitter, &current_split, n_missing, missing_go_to_left,
                         lower_bound, upper_bound, condition.p
                     ):
-                        continue
+                        conditions_hold = False
+                        break
+                
+                if not conditions_hold:
+                    continue
 
                 # if splitter.check_presplit_conditions(&current_split, n_missing, missing_go_to_left) == 1:
                 #     continue
@@ -795,13 +802,18 @@ cdef inline intp_t node_split_best(
                 # ):
                 #     continue
 
+                conditions_hold = True
                 for condition in splitter.postsplit_conditions:
                     if not condition.f(
                         splitter, &current_split, n_missing, missing_go_to_left,
                         lower_bound, upper_bound, condition.p
                     ):
-                        continue
-
+                        conditions_hold = False
+                        break
+                
+                if not conditions_hold:
+                    continue
+                
                 # # Reject if min_weight_leaf is not satisfied
                 # if splitter.check_postsplit_conditions() == 1:
                 #     continue

From 4f19d53c1a57fd2e37739d5028f550eb5ba88ba4 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Mon, 18 Mar 2024 16:19:33 -0400
Subject: [PATCH 14/29] added some splitconditions to header

---
 sklearn/tree/_splitter.pxd | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 0edd4eb40231c..6c9d0d676142a 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -50,6 +50,15 @@ cdef struct SplitConditionTuple:
 cdef class SplitCondition:
     cdef SplitConditionTuple t
 
+cdef class MinSamplesLeafCondition(SplitCondition):
+    pass
+
+cdef class MinWeightLeafCondition(SplitCondition):
+    pass
+
+cdef class MonotonicConstraintCondition(SplitCondition):
+    pass
+
 
 cdef struct SplitRecord:
     # Data to track sample split

From cb71be0cdb8be46b19bbdd91d6c5da4897359ff3 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Thu, 21 Mar 2024 10:33:33 -0400
Subject: [PATCH 15/29] commented out some sample code that was substantially
 increasing peak memory utilization in asv

---
 sklearn/tree/_splitter.pyx | 116 ++++++++++++++++++-------------------
 1 file changed, 58 insertions(+), 58 deletions(-)

diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 983a6f89b4a43..6b0a6950b7739 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -125,71 +125,71 @@ cdef class MonotonicConstraintCondition(SplitCondition):
         self.t.f = monotonic_constraint_condition
         self.t.p = NULL
 
-cdef struct HasDataParameters:
-    int min_samples
-
-cdef bint has_data_condition(
-    Splitter splitter,
-    SplitRecord* current_split,
-    intp_t n_missing,
-    bint missing_go_to_left,
-    float64_t lower_bound,
-    float64_t upper_bound,
-    SplitConditionParameters split_condition_parameters
-) noexcept nogil:
-    cdef HasDataParameters* p = <HasDataParameters*>split_condition_parameters
-    return splitter.n_samples >= p.min_samples
-
-cdef class HasDataCondition(SplitCondition):
-    def __cinit__(self, int min_samples):
-        self.t.f = has_data_condition
-        self.t.p = malloc(sizeof(HasDataParameters))
-        (<HasDataParameters*>self.t.p).min_samples = min_samples
+# cdef struct HasDataParameters:
+#     int min_samples
+
+# cdef bint has_data_condition(
+#     Splitter splitter,
+#     SplitRecord* current_split,
+#     intp_t n_missing,
+#     bint missing_go_to_left,
+#     float64_t lower_bound,
+#     float64_t upper_bound,
+#     SplitConditionParameters split_condition_parameters
+# ) noexcept nogil:
+#     cdef HasDataParameters* p = <HasDataParameters*>split_condition_parameters
+#     return splitter.n_samples >= p.min_samples
+
+# cdef class HasDataCondition(SplitCondition):
+#     def __cinit__(self, int min_samples):
+#         self.t.f = has_data_condition
+#         self.t.p = malloc(sizeof(HasDataParameters))
+#         (<HasDataParameters*>self.t.p).min_samples = min_samples
     
-    def __dealloc__(self):
-        if self.t.p is not NULL:
-            free(self.t.p)
+#     def __dealloc__(self):
+#         if self.t.p is not NULL:
+#             free(self.t.p)
         
-        super.__dealloc__(self)
-
-cdef struct AlphaRegularityParameters:
-    float64_t alpha
-
-cdef bint alpha_regularity_condition(
-    Splitter splitter,
-    SplitRecord* current_split,
-    intp_t n_missing,
-    bint missing_go_to_left,
-    float64_t lower_bound,
-    float64_t upper_bound,
-    SplitConditionParameters split_condition_parameters
-) noexcept nogil:
-    cdef AlphaRegularityParameters* p = <AlphaRegularityParameters*>split_condition_parameters
-
-    return True
-
-cdef class AlphaRegularityCondition(SplitCondition):
-    def __cinit__(self, float64_t alpha):
-        self.t.f = alpha_regularity_condition
-        self.t.p = malloc(sizeof(AlphaRegularityParameters))
-        (<AlphaRegularityParameters*>self.t.p).alpha = alpha
+#         super.__dealloc__(self)
+
+# cdef struct AlphaRegularityParameters:
+#     float64_t alpha
+
+# cdef bint alpha_regularity_condition(
+#     Splitter splitter,
+#     SplitRecord* current_split,
+#     intp_t n_missing,
+#     bint missing_go_to_left,
+#     float64_t lower_bound,
+#     float64_t upper_bound,
+#     SplitConditionParameters split_condition_parameters
+# ) noexcept nogil:
+#     cdef AlphaRegularityParameters* p = <AlphaRegularityParameters*>split_condition_parameters
+
+#     return True
+
+# cdef class AlphaRegularityCondition(SplitCondition):
+#     def __cinit__(self, float64_t alpha):
+#         self.t.f = alpha_regularity_condition
+#         self.t.p = malloc(sizeof(AlphaRegularityParameters))
+#         (<AlphaRegularityParameters*>self.t.p).alpha = alpha
     
-    def __dealloc__(self):
-        if self.t.p is not NULL:
-            free(self.t.p)
+#     def __dealloc__(self):
+#         if self.t.p is not NULL:
+#             free(self.t.p)
         
-        super.__dealloc__(self)
+#         super.__dealloc__(self)
 
 
-from ._tree cimport Tree
-cdef class FooTree(Tree):
-    cdef Splitter splitter
+# from ._tree cimport Tree
+# cdef class FooTree(Tree):
+#     cdef Splitter splitter
 
-    def __init__(self):
-        self.splitter = Splitter(
-            presplit_conditions = [HasDataCondition(10)],
-            postsplit_conditions = [AlphaRegularityCondition(0.1)],
-        )
+#     def __init__(self):
+#         self.splitter = Splitter(
+#             presplit_conditions = [HasDataCondition(10)],
+#             postsplit_conditions = [AlphaRegularityCondition(0.1)],
+#         )
 
 
 cdef inline void _init_split(SplitRecord* self, intp_t start_pos) noexcept nogil:

From e34be5c58a6f26ed38634b2a7b53a95ed0aabe67 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Tue, 9 Apr 2024 15:05:29 -0400
Subject: [PATCH 16/29] added vector resize

---
 sklearn/tree/_splitter.pyx | 43 ++++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 11 deletions(-)

diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 6b0a6950b7739..80cf902c5af07 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -349,20 +349,41 @@ cdef class Splitter(BaseSplitter):
         self.min_samples_leaf_condition = MinSamplesLeafCondition()
         self.min_weight_leaf_condition = MinWeightLeafCondition()
 
-        self.presplit_conditions.push_back((<SplitCondition>self.min_samples_leaf_condition).t)
-        if presplit_conditions is not None:
-            for condition in presplit_conditions:
-                self.presplit_conditions.push_back((<SplitCondition>condition).t)
-        
-        self.postsplit_conditions.push_back((<SplitCondition>self.min_weight_leaf_condition).t)
-        if postsplit_conditions is not None:
-            for condition in postsplit_conditions:
-                self.postsplit_conditions.push_back((<SplitCondition>condition).t)
+        self.presplit_conditions.resize(
+            (len(presplit_conditions) if presplit_conditions is not None else 0)
+            + (2 if self.with_monotonic_cst else 1)
+        )
+        self.postsplit_conditions.resize(
+            (len(postsplit_conditions) if postsplit_conditions is not None else 0)
+            + (2 if self.with_monotonic_cst else 1)
+        )
+
+        offset = 0
+        self.presplit_conditions[offset] = self.min_samples_leaf_condition.t
+        self.postsplit_conditions[offset] = self.min_weight_leaf_condition.t
+        offset += 1
 
         if(self.with_monotonic_cst):
             self.monotonic_constraint_condition = MonotonicConstraintCondition()
-            self.presplit_conditions.push_back((<SplitCondition>self.monotonic_constraint_condition).t)
-            self.postsplit_conditions.push_back((<SplitCondition>self.monotonic_constraint_condition).t)
+            # self.presplit_conditions.push_back((<SplitCondition>self.monotonic_constraint_condition).t)
+            # self.postsplit_conditions.push_back((<SplitCondition>self.monotonic_constraint_condition).t)
+            self.presplit_conditions[offset] = self.monotonic_constraint_condition.t
+            self.postsplit_conditions[offset] = self.monotonic_constraint_condition.t
+            offset += 1
+
+        # self.presplit_conditions.push_back((<SplitCondition>self.min_samples_leaf_condition).t)
+        if presplit_conditions is not None:
+            # for condition in presplit_conditions:
+            #    self.presplit_conditions.push_back((<SplitCondition>condition).t)
+            for i in range(len(presplit_conditions)):
+                self.presplit_conditions[i + offset] = presplit_conditions[i].t
+        
+        # self.postsplit_conditions.push_back((<SplitCondition>self.min_weight_leaf_condition).t)
+        if postsplit_conditions is not None:
+            # for condition in postsplit_conditions:
+            #     self.postsplit_conditions.push_back((<SplitCondition>condition).t)
+            for i in range(len(postsplit_conditions)):
+                self.postsplit_conditions[i + offset] = postsplit_conditions[i].t
 
 
     def __reduce__(self):

From aac802e5d1cc4710dfb63ea14b9ef02a58da6a64 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Wed, 10 Apr 2024 15:10:43 -0400
Subject: [PATCH 17/29] wip

---
 sklearn/tree/_splitter.pyx | 92 +++++++++++++++++++++++---------------
 1 file changed, 57 insertions(+), 35 deletions(-)

diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 80cf902c5af07..0afe0afe52ad6 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -19,6 +19,7 @@
 
 from cython cimport final
 from libc.math cimport isnan
+from libc.stdint cimport uintptr_t
 from libc.stdlib cimport qsort, free
 from libc.string cimport memcpy
 cimport numpy as cnp
@@ -346,44 +347,65 @@ cdef class Splitter(BaseSplitter):
         self.monotonic_cst = monotonic_cst
         self.with_monotonic_cst = monotonic_cst is not None
 
-        self.min_samples_leaf_condition = MinSamplesLeafCondition()
-        self.min_weight_leaf_condition = MinWeightLeafCondition()
+        self._presplit_conditions = presplit_conditions
+        self._postsplit_conditions = postsplit_conditions
 
-        self.presplit_conditions.resize(
-            (len(presplit_conditions) if presplit_conditions is not None else 0)
-            + (2 if self.with_monotonic_cst else 1)
-        )
-        self.postsplit_conditions.resize(
-            (len(postsplit_conditions) if postsplit_conditions is not None else 0)
-            + (2 if self.with_monotonic_cst else 1)
-        )
+        self._presplit_conditions.append(MinSamplesLeafCondition())
+        self._postsplit_conditions.append(MinWeightLeafCondition())
+
+        if self.with_monotonic_cst:
+            self._presplit_conditions.append(MonotonicConstraintCondition())
+            self._postsplit_conditions.append(MonotonicConstraintCondition())
+        
+        self.presplit_conditions.resize(len(self._presplit_conditions))
+        self.postsplit_conditions.resize(len(self._postsplit_conditions))
 
-        offset = 0
-        self.presplit_conditions[offset] = self.min_samples_leaf_condition.t
-        self.postsplit_conditions[offset] = self.min_weight_leaf_condition.t
-        offset += 1
-
-        if(self.with_monotonic_cst):
-            self.monotonic_constraint_condition = MonotonicConstraintCondition()
-            # self.presplit_conditions.push_back((<SplitCondition>self.monotonic_constraint_condition).t)
-            # self.postsplit_conditions.push_back((<SplitCondition>self.monotonic_constraint_condition).t)
-            self.presplit_conditions[offset] = self.monotonic_constraint_condition.t
-            self.postsplit_conditions[offset] = self.monotonic_constraint_condition.t
-            offset += 1
-
-        # self.presplit_conditions.push_back((<SplitCondition>self.min_samples_leaf_condition).t)
-        if presplit_conditions is not None:
-            # for condition in presplit_conditions:
-            #    self.presplit_conditions.push_back((<SplitCondition>condition).t)
-            for i in range(len(presplit_conditions)):
-                self.presplit_conditions[i + offset] = presplit_conditions[i].t
+        for i in range(len(self._presplit_conditions)):
+            self.presplit_conditions[i].f = <SplitConditionFunction><uintptr_t>self._presplit_conditions[i].t.f
+            self.presplit_conditions[i].p = <SplitConditionParameters><uintptr_t>self._presplit_conditions[i].t.p
+        
+        for i in range(len(self._postsplit_conditions)):
+            self.postsplit_conditions[i].f = <SplitConditionFunction><uintptr_t>self._postsplit_conditions[i].t.f
+            self.postsplit_conditions[i].p = <SplitConditionParameters><uintptr_t>self._postsplit_conditions[i].t.p
+        
+        # self.min_samples_leaf_condition = MinSamplesLeafCondition()
+        # self.min_weight_leaf_condition = MinWeightLeafCondition()
+
+        # self.presplit_conditions.resize(
+        #     (len(presplit_conditions) if presplit_conditions is not None else 0)
+        #     + (2 if self.with_monotonic_cst else 1)
+        # )
+        # self.postsplit_conditions.resize(
+        #     (len(postsplit_conditions) if postsplit_conditions is not None else 0)
+        #     + (2 if self.with_monotonic_cst else 1)
+        # )
+
+        # offset = 0
+        # self.presplit_conditions[offset] = self.min_samples_leaf_condition.t
+        # self.postsplit_conditions[offset] = self.min_weight_leaf_condition.t
+        # offset += 1
+
+        # if(self.with_monotonic_cst):
+        #     self.monotonic_constraint_condition = MonotonicConstraintCondition()
+        #     # self.presplit_conditions.push_back((<SplitCondition>self.monotonic_constraint_condition).t)
+        #     # self.postsplit_conditions.push_back((<SplitCondition>self.monotonic_constraint_condition).t)
+        #     self.presplit_conditions[offset] = self.monotonic_constraint_condition.t
+        #     self.postsplit_conditions[offset] = self.monotonic_constraint_condition.t
+        #     offset += 1
+
+        # # self.presplit_conditions.push_back((<SplitCondition>self.min_samples_leaf_condition).t)
+        # if presplit_conditions is not None:
+        #     # for condition in presplit_conditions:
+        #     #    self.presplit_conditions.push_back((<SplitCondition>condition).t)
+        #     for i in range(len(presplit_conditions)):
+        #         self.presplit_conditions[i + offset] = presplit_conditions[i].t
         
-        # self.postsplit_conditions.push_back((<SplitCondition>self.min_weight_leaf_condition).t)
-        if postsplit_conditions is not None:
-            # for condition in postsplit_conditions:
-            #     self.postsplit_conditions.push_back((<SplitCondition>condition).t)
-            for i in range(len(postsplit_conditions)):
-                self.postsplit_conditions[i + offset] = postsplit_conditions[i].t
+        # # self.postsplit_conditions.push_back((<SplitCondition>self.min_weight_leaf_condition).t)
+        # if postsplit_conditions is not None:
+        #     # for condition in postsplit_conditions:
+        #     #     self.postsplit_conditions.push_back((<SplitCondition>condition).t)
+        #     for i in range(len(postsplit_conditions)):
+        #         self.postsplit_conditions[i + offset] = postsplit_conditions[i].t
 
 
     def __reduce__(self):

From a7f5e92741ae4781a92eb6bd697af7789d6c162e Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Mon, 15 Apr 2024 14:13:27 -0400
Subject: [PATCH 18/29] settling injection memory management for now

---
 sklearn/tree/_splitter.pyx | 81 ++++++++++++--------------------------
 1 file changed, 26 insertions(+), 55 deletions(-)

diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 2143aa3a5d742..ff707817d3d60 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -340,65 +340,36 @@ cdef class Splitter(BaseSplitter):
         self.monotonic_cst = monotonic_cst
         self.with_monotonic_cst = monotonic_cst is not None
 
-        self._presplit_conditions = presplit_conditions
-        self._postsplit_conditions = postsplit_conditions
+        self.min_samples_leaf_condition = MinSamplesLeafCondition()
+        self.min_weight_leaf_condition = MinWeightLeafCondition()
 
-        self._presplit_conditions.append(MinSamplesLeafCondition())
-        self._postsplit_conditions.append(MinWeightLeafCondition())
+        self.presplit_conditions.resize(
+            (len(presplit_conditions) if presplit_conditions is not None else 0)
+            + (2 if self.with_monotonic_cst else 1)
+        )
+        self.postsplit_conditions.resize(
+            (len(postsplit_conditions) if postsplit_conditions is not None else 0)
+            + (2 if self.with_monotonic_cst else 1)
+        )
 
-        if self.with_monotonic_cst:
-            self._presplit_conditions.append(MonotonicConstraintCondition())
-            self._postsplit_conditions.append(MonotonicConstraintCondition())
-        
-        self.presplit_conditions.resize(len(self._presplit_conditions))
-        self.postsplit_conditions.resize(len(self._postsplit_conditions))
+        offset = 0
+        self.presplit_conditions[offset] = self.min_samples_leaf_condition.t
+        self.postsplit_conditions[offset] = self.min_weight_leaf_condition.t
+        offset += 1
 
-        for i in range(len(self._presplit_conditions)):
-            self.presplit_conditions[i].f = <SplitConditionFunction><uintptr_t>self._presplit_conditions[i].t.f
-            self.presplit_conditions[i].p = <SplitConditionParameters><uintptr_t>self._presplit_conditions[i].t.p
-        
-        for i in range(len(self._postsplit_conditions)):
-            self.postsplit_conditions[i].f = <SplitConditionFunction><uintptr_t>self._postsplit_conditions[i].t.f
-            self.postsplit_conditions[i].p = <SplitConditionParameters><uintptr_t>self._postsplit_conditions[i].t.p
-        
-        # self.min_samples_leaf_condition = MinSamplesLeafCondition()
-        # self.min_weight_leaf_condition = MinWeightLeafCondition()
-
-        # self.presplit_conditions.resize(
-        #     (len(presplit_conditions) if presplit_conditions is not None else 0)
-        #     + (2 if self.with_monotonic_cst else 1)
-        # )
-        # self.postsplit_conditions.resize(
-        #     (len(postsplit_conditions) if postsplit_conditions is not None else 0)
-        #     + (2 if self.with_monotonic_cst else 1)
-        # )
-
-        # offset = 0
-        # self.presplit_conditions[offset] = self.min_samples_leaf_condition.t
-        # self.postsplit_conditions[offset] = self.min_weight_leaf_condition.t
-        # offset += 1
-
-        # if(self.with_monotonic_cst):
-        #     self.monotonic_constraint_condition = MonotonicConstraintCondition()
-        #     # self.presplit_conditions.push_back((<SplitCondition>self.monotonic_constraint_condition).t)
-        #     # self.postsplit_conditions.push_back((<SplitCondition>self.monotonic_constraint_condition).t)
-        #     self.presplit_conditions[offset] = self.monotonic_constraint_condition.t
-        #     self.postsplit_conditions[offset] = self.monotonic_constraint_condition.t
-        #     offset += 1
-
-        # # self.presplit_conditions.push_back((<SplitCondition>self.min_samples_leaf_condition).t)
-        # if presplit_conditions is not None:
-        #     # for condition in presplit_conditions:
-        #     #    self.presplit_conditions.push_back((<SplitCondition>condition).t)
-        #     for i in range(len(presplit_conditions)):
-        #         self.presplit_conditions[i + offset] = presplit_conditions[i].t
+        if(self.with_monotonic_cst):
+            self.monotonic_constraint_condition = MonotonicConstraintCondition()
+            self.presplit_conditions[offset] = self.monotonic_constraint_condition.t
+            self.postsplit_conditions[offset] = self.monotonic_constraint_condition.t
+            offset += 1
+
+        if presplit_conditions is not None:
+            for i in range(len(presplit_conditions)):
+                self.presplit_conditions[i + offset] = presplit_conditions[i].t
         
-        # # self.postsplit_conditions.push_back((<SplitCondition>self.min_weight_leaf_condition).t)
-        # if postsplit_conditions is not None:
-        #     # for condition in postsplit_conditions:
-        #     #     self.postsplit_conditions.push_back((<SplitCondition>condition).t)
-        #     for i in range(len(postsplit_conditions)):
-        #         self.postsplit_conditions[i + offset] = postsplit_conditions[i].t
+        if postsplit_conditions is not None:
+            for i in range(len(postsplit_conditions)):
+                self.postsplit_conditions[i + offset] = postsplit_conditions[i].t
 
 
     def __reduce__(self):

From 7a70a0b6e076bd7e4f54674ea2148697f80916f4 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Mon, 22 Apr 2024 18:54:41 -0400
Subject: [PATCH 19/29] added regression forest benchmark

---
 asv_benchmarks/benchmarks/ensemble.py | 45 ++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/asv_benchmarks/benchmarks/ensemble.py b/asv_benchmarks/benchmarks/ensemble.py
index c336d1e5f8805..a519cece3ac27 100644
--- a/asv_benchmarks/benchmarks/ensemble.py
+++ b/asv_benchmarks/benchmarks/ensemble.py
@@ -2,6 +2,7 @@
     GradientBoostingClassifier,
     HistGradientBoostingClassifier,
     RandomForestClassifier,
+    RandomForestRegressor
 )
 
 from .common import Benchmark, Estimator, Predictor
@@ -9,8 +10,50 @@
     _20newsgroups_highdim_dataset,
     _20newsgroups_lowdim_dataset,
     _synth_classification_dataset,
+    _synth_regression_dataset,
+    _synth_regression_sparse_dataset
 )
-from .utils import make_gen_classif_scorers
+from .utils import make_gen_classif_scorers, make_gen_reg_scorers
+
+
+class RandomForestRegressorBenchmark(Predictor, Estimator, Benchmark):
+    """
+    Benchmarks for RandomForestRegressor.
+    """
+
+    param_names = ["representation", "n_jobs"]
+    params = (["dense", "sparse"], Benchmark.n_jobs_vals)
+
+    def setup_cache(self):
+        super().setup_cache()
+
+    def make_data(self, params):
+        representation, n_jobs = params
+
+        if representation == "sparse":
+            data = _synth_regression_sparse_dataset()
+        else:
+            data = _synth_regression_dataset()
+
+        return data
+
+    def make_estimator(self, params):
+        representation, n_jobs = params
+
+        n_estimators = 500 if Benchmark.data_size == "large" else 100
+
+        estimator = RandomForestRegressor(
+            n_estimators=n_estimators,
+            min_samples_split=10,
+            max_features="log2",
+            n_jobs=n_jobs,
+            random_state=0,
+        )
+
+        return estimator
+
+    def make_scorers(self):
+        make_gen_reg_scorers(self)
 
 
 class RandomForestClassifierBenchmark(Predictor, Estimator, Benchmark):

From 893d588bccabbd063d1d385a6da7e2d52556c3a6 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Mon, 22 Apr 2024 21:30:25 -0400
Subject: [PATCH 20/29] ran black for linting check

---
 .github/scripts/label_title_regex.py          |  1 +
 asv_benchmarks/benchmarks/ensemble.py         |  4 +-
 benchmarks/bench_glm.py                       |  1 +
 benchmarks/bench_glmnet.py                    |  1 +
 benchmarks/bench_isotonic.py                  |  1 +
 ...kernel_pca_solvers_time_vs_n_components.py |  1 +
 ...ch_kernel_pca_solvers_time_vs_n_samples.py |  1 +
 benchmarks/bench_lasso.py                     |  1 +
 benchmarks/bench_plot_lasso_path.py           |  1 +
 benchmarks/bench_plot_neighbors.py            |  1 +
 benchmarks/bench_plot_nmf.py                  |  7 ++-
 benchmarks/bench_plot_omp_lars.py             |  1 +
 ...ch_plot_polynomial_kernel_approximation.py |  1 +
 benchmarks/bench_plot_svd.py                  |  1 +
 benchmarks/bench_random_projections.py        |  1 +
 benchmarks/bench_saga.py                      |  5 +-
 .../bench_sample_without_replacement.py       |  1 +
 benchmarks/bench_text_vectorizers.py          |  1 +
 benchmarks/bench_tree.py                      |  1 +
 benchmarks/bench_tsne_mnist.py                |  6 ++-
 build_tools/generate_authors_table.py         |  1 +
 build_tools/get_comment.py                    |  3 +-
 build_tools/github/check_wheels.py            |  1 +
 build_tools/github/vendor.py                  |  1 -
 .../update_environments_and_lock_files.py     | 33 ++++++++-----
 doc/sphinxext/doi_role.py                     | 26 +++++-----
 doc/sphinxext/sphinx_issues.py                |  1 +
 .../applications/plot_face_recognition.py     |  1 +
 examples/calibration/plot_calibration.py      |  1 +
 examples/cluster/plot_affinity_propagation.py |  1 +
 examples/cluster/plot_bisect_kmeans.py        |  1 +
 .../covariance/plot_covariance_estimation.py  |  1 -
 .../ensemble/plot_feature_transformation.py   |  1 -
 .../plot_gradient_boosting_early_stopping.py  |  1 +
 .../ensemble/plot_monotonic_constraints.py    |  1 +
 .../linear_model/plot_quantile_regression.py  | 12 +++--
 examples/manifold/plot_swissroll.py           |  1 +
 .../plot_kernel_ridge_regression.py           |  1 +
 .../miscellaneous/plot_metadata_routing.py    |  1 +
 examples/mixture/plot_gmm_init.py             |  1 -
 .../plot_semi_supervised_newsgroups.py        |  1 -
 examples/tree/plot_iris_dtc.py                |  1 +
 maint_tools/check_pxd_in_installation.py      |  8 ++-
 sklearn/__check_build/__init__.py             | 10 ++--
 sklearn/_build_utils/__init__.py              |  1 +
 sklearn/_build_utils/openmp_helpers.py        | 12 +++--
 sklearn/_build_utils/pre_build_helpers.py     |  6 ++-
 sklearn/_build_utils/version.py               |  3 +-
 sklearn/_config.py                            |  4 +-
 sklearn/_distributor_init.py                  |  2 +-
 sklearn/_loss/link.py                         |  1 +
 sklearn/_loss/loss.py                         |  1 +
 sklearn/_min_dependencies.py                  |  1 +
 sklearn/base.py                               |  5 +-
 sklearn/cluster/_agglomerative.py             |  1 +
 sklearn/cluster/_bicluster.py                 |  1 +
 sklearn/cluster/_bisect_k_means.py            |  1 +
 sklearn/cluster/_feature_agglomeration.py     |  1 +
 sklearn/cluster/_hdbscan/hdbscan.py           |  1 +
 sklearn/cluster/_spectral.py                  |  3 +-
 .../tests/test_feature_agglomeration.py       |  1 +
 sklearn/cluster/tests/test_hdbscan.py         |  1 +
 sklearn/cluster/tests/test_hierarchical.py    |  1 +
 sklearn/cluster/tests/test_k_means.py         |  1 +
 sklearn/cluster/tests/test_spectral.py        |  1 +
 sklearn/covariance/_robust_covariance.py      |  1 +
 .../covariance/tests/test_graphical_lasso.py  |  4 +-
 sklearn/datasets/__init__.py                  |  7 ++-
 sklearn/datasets/_arff_parser.py              |  1 +
 sklearn/datasets/_california_housing.py       |  1 +
 sklearn/datasets/_samples_generator.py        |  4 +-
 sklearn/datasets/tests/test_20news.py         |  1 +
 sklearn/datasets/tests/test_arff_parser.py    | 24 ++++++---
 .../datasets/tests/test_california_housing.py |  1 +
 sklearn/datasets/tests/test_common.py         |  1 +
 sklearn/datasets/tests/test_covtype.py        |  1 +
 sklearn/datasets/tests/test_openml.py         |  4 +-
 sklearn/decomposition/__init__.py             |  1 -
 sklearn/decomposition/_dict_learning.py       |  4 +-
 sklearn/decomposition/_nmf.py                 |  7 ++-
 sklearn/decomposition/_pca.py                 |  3 +-
 sklearn/decomposition/_sparse_pca.py          |  1 +
 sklearn/decomposition/_truncated_svd.py       |  3 +-
 sklearn/decomposition/tests/test_fastica.py   |  1 +
 .../tests/test_incremental_pca.py             |  1 +
 sklearn/ensemble/__init__.py                  |  1 +
 sklearn/ensemble/_forest.py                   |  3 +-
 sklearn/ensemble/_gb.py                       |  6 +--
 .../_hist_gradient_boosting/binning.py        |  1 +
 .../_hist_gradient_boosting/grower.py         |  1 +
 .../_hist_gradient_boosting/predictor.py      |  1 +
 .../ensemble/_hist_gradient_boosting/utils.py |  1 +
 .../ensemble/tests/test_gradient_boosting.py  |  1 +
 .../enable_hist_gradient_boosting.py          |  1 +
 sklearn/feature_extraction/text.py            |  6 +--
 sklearn/feature_selection/_sequential.py      |  1 +
 .../tests/test_feature_select.py              |  1 +
 sklearn/gaussian_process/_gpr.py              |  8 +--
 sklearn/gaussian_process/kernels.py           |  4 +-
 sklearn/gaussian_process/tests/test_gpc.py    | 14 ++----
 sklearn/gaussian_process/tests/test_gpr.py    | 14 ++----
 sklearn/impute/__init__.py                    |  1 +
 sklearn/impute/_base.py                       |  5 +-
 sklearn/inspection/__init__.py                |  1 -
 .../tests/test_partial_dependence.py          |  1 +
 .../tests/test_permutation_importance.py      |  4 +-
 sklearn/linear_model/_glm/_newton_solver.py   |  3 +-
 sklearn/linear_model/_glm/tests/test_glm.py   |  3 +-
 sklearn/linear_model/_least_angle.py          |  4 +-
 sklearn/linear_model/_linear_loss.py          |  1 +
 sklearn/linear_model/_logistic.py             |  9 ++--
 sklearn/linear_model/_omp.py                  |  3 +-
 sklearn/linear_model/_stochastic_gradient.py  |  3 +-
 .../linear_model/tests/test_linear_loss.py    |  1 +
 sklearn/manifold/_spectral_embedding.py       |  3 +-
 sklearn/metrics/__init__.py                   |  1 -
 sklearn/metrics/_base.py                      |  1 +
 sklearn/metrics/_classification.py            |  3 +-
 sklearn/metrics/cluster/__init__.py           |  1 +
 sklearn/metrics/tests/test_classification.py  | 15 ++----
 sklearn/mixture/_bayesian_mixture.py          |  1 +
 sklearn/model_selection/_search.py            |  3 +-
 sklearn/model_selection/tests/test_split.py   |  1 +
 .../model_selection/tests/test_validation.py  |  1 +
 sklearn/neighbors/_base.py                    | 10 ++--
 sklearn/neighbors/_kde.py                     |  1 +
 sklearn/neighbors/_unsupervised.py            |  1 +
 .../neighbors/tests/test_nearest_centroid.py  |  1 +
 sklearn/neural_network/_base.py               |  3 +-
 .../neural_network/_multilayer_perceptron.py  |  6 +--
 sklearn/neural_network/_rbm.py                |  3 +-
 .../neural_network/_stochastic_optimizers.py  |  3 +-
 sklearn/neural_network/tests/test_mlp.py      |  3 +-
 sklearn/pipeline.py                           |  1 +
 sklearn/preprocessing/_polynomial.py          |  1 +
 sklearn/random_projection.py                  |  1 +
 .../tests/test_label_propagation.py           |  2 +-
 sklearn/svm/_base.py                          |  6 +--
 sklearn/svm/_bounds.py                        |  1 +
 sklearn/svm/tests/test_svm.py                 |  1 +
 sklearn/tests/random_seed.py                  |  1 +
 sklearn/tests/test_build.py                   |  6 ++-
 sklearn/tests/test_common.py                  |  6 ++-
 sklearn/tests/test_metaestimators.py          |  1 +
 sklearn/tests/test_pipeline.py                |  1 +
 sklearn/tree/tests/test_export.py             | 49 +++++++++++++------
 sklearn/utils/_response.py                    |  1 +
 sklearn/utils/_show_versions.py               |  1 +
 sklearn/utils/estimator_checks.py             |  9 ++--
 sklearn/utils/extmath.py                      |  1 +
 sklearn/utils/fixes.py                        |  1 +
 sklearn/utils/optimize.py                     |  1 +
 sklearn/utils/tests/test_extmath.py           |  4 +-
 sklearn/utils/tests/test_fast_dict.py         |  4 +-
 154 files changed, 309 insertions(+), 222 deletions(-)

diff --git a/.github/scripts/label_title_regex.py b/.github/scripts/label_title_regex.py
index a022c3c4dd2a7..9a689b8db09b4 100644
--- a/.github/scripts/label_title_regex.py
+++ b/.github/scripts/label_title_regex.py
@@ -1,5 +1,6 @@
 """Labels PRs based on title. Must be run in a github action with the
 pull_request_target event."""
+
 import json
 import os
 import re
diff --git a/asv_benchmarks/benchmarks/ensemble.py b/asv_benchmarks/benchmarks/ensemble.py
index a519cece3ac27..877fcdb09fe68 100644
--- a/asv_benchmarks/benchmarks/ensemble.py
+++ b/asv_benchmarks/benchmarks/ensemble.py
@@ -2,7 +2,7 @@
     GradientBoostingClassifier,
     HistGradientBoostingClassifier,
     RandomForestClassifier,
-    RandomForestRegressor
+    RandomForestRegressor,
 )
 
 from .common import Benchmark, Estimator, Predictor
@@ -11,7 +11,7 @@
     _20newsgroups_lowdim_dataset,
     _synth_classification_dataset,
     _synth_regression_dataset,
-    _synth_regression_sparse_dataset
+    _synth_regression_sparse_dataset,
 )
 from .utils import make_gen_classif_scorers, make_gen_reg_scorers
 
diff --git a/benchmarks/bench_glm.py b/benchmarks/bench_glm.py
index 803043398d1ac..84cf31858afa7 100644
--- a/benchmarks/bench_glm.py
+++ b/benchmarks/bench_glm.py
@@ -4,6 +4,7 @@
 Data comes from a random square matrix.
 
 """
+
 from datetime import datetime
 
 import numpy as np
diff --git a/benchmarks/bench_glmnet.py b/benchmarks/bench_glmnet.py
index 7b111f95044e2..1aaad99c10587 100644
--- a/benchmarks/bench_glmnet.py
+++ b/benchmarks/bench_glmnet.py
@@ -16,6 +16,7 @@
 
 In both cases, only 10% of the features are informative.
 """
+
 import gc
 from time import time
 
diff --git a/benchmarks/bench_isotonic.py b/benchmarks/bench_isotonic.py
index 221e6fb12da75..556c452fa3323 100644
--- a/benchmarks/bench_isotonic.py
+++ b/benchmarks/bench_isotonic.py
@@ -10,6 +10,7 @@
 This allows the scaling of the algorithm with the problem size to be
 visualized and understood.
 """
+
 import argparse
 import gc
 from datetime import datetime
diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
index 6551cb74ff86e..26789c173688f 100644
--- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
+++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
@@ -35,6 +35,7 @@
 You can also set `arpack_all=True` to activate arpack solver for large number
 of components (this takes more time).
 """
+
 # Authors: Sylvain MARIE, Schneider Electric
 
 import time
diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py
index 26a45ca9f09ca..cae74c6f442ff 100644
--- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py
+++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py
@@ -37,6 +37,7 @@
 Solvers comparison benchmark: time vs n_components", where this time the number
 of examples is fixed, and the desired number of components varies.
 """
+
 # Author: Sylvain MARIE, Schneider Electric
 
 import time
diff --git a/benchmarks/bench_lasso.py b/benchmarks/bench_lasso.py
index 1c49c6f5cabdf..9bae570505a75 100644
--- a/benchmarks/bench_lasso.py
+++ b/benchmarks/bench_lasso.py
@@ -11,6 +11,7 @@
 
 In both cases, only 10% of the features are informative.
 """
+
 import gc
 from time import time
 
diff --git a/benchmarks/bench_plot_lasso_path.py b/benchmarks/bench_plot_lasso_path.py
index c996c9c09520f..3b46e447401cb 100644
--- a/benchmarks/bench_plot_lasso_path.py
+++ b/benchmarks/bench_plot_lasso_path.py
@@ -2,6 +2,7 @@
 
 The input data is mostly low rank but is a fat infinite tail.
 """
+
 import gc
 import sys
 from collections import defaultdict
diff --git a/benchmarks/bench_plot_neighbors.py b/benchmarks/bench_plot_neighbors.py
index 2d9cf2b08b71d..2cedb19fb23c4 100644
--- a/benchmarks/bench_plot_neighbors.py
+++ b/benchmarks/bench_plot_neighbors.py
@@ -1,6 +1,7 @@
 """
 Plot the scaling of the nearest neighbors algorithms with k, D, and N
 """
+
 from time import time
 
 import matplotlib.pyplot as plt
diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py
index 3484850011c1f..f05ede117191b 100644
--- a/benchmarks/bench_plot_nmf.py
+++ b/benchmarks/bench_plot_nmf.py
@@ -1,6 +1,7 @@
 """
 Benchmarks of Non-Negative Matrix Factorization
 """
+
 # Authors: Tom Dupre la Tour (benchmark)
 #          Chih-Jen Linn (original projected gradient NMF implementation)
 #          Anthony Di Franco (projected gradient, Python and NumPy port)
@@ -258,8 +259,7 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):
         if not isinstance(self.max_iter, numbers.Integral) or self.max_iter < 0:
             raise ValueError(
                 "Maximum number of iterations must be a positive "
-                "integer; got (max_iter=%r)"
-                % self.max_iter
+                "integer; got (max_iter=%r)" % self.max_iter
             )
         if not isinstance(self.tol, numbers.Number) or self.tol < 0:
             raise ValueError(
@@ -305,8 +305,7 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):
         if n_iter == self.max_iter and self.tol > 0:
             warnings.warn(
                 "Maximum number of iteration %d reached. Increase it"
-                " to improve convergence."
-                % self.max_iter,
+                " to improve convergence." % self.max_iter,
                 ConvergenceWarning,
             )
 
diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py
index ec1bf3281f3a4..8a4bc9b1a34fe 100644
--- a/benchmarks/bench_plot_omp_lars.py
+++ b/benchmarks/bench_plot_omp_lars.py
@@ -3,6 +3,7 @@
 
 The input data is mostly low rank but is a fat infinite tail.
 """
+
 import gc
 import sys
 from time import time
diff --git a/benchmarks/bench_plot_polynomial_kernel_approximation.py b/benchmarks/bench_plot_polynomial_kernel_approximation.py
index 1cd9f70a38f44..a80455e21c255 100644
--- a/benchmarks/bench_plot_polynomial_kernel_approximation.py
+++ b/benchmarks/bench_plot_polynomial_kernel_approximation.py
@@ -38,6 +38,7 @@
 (https://people.cs.rutgers.edu/~farach/pubs/FrequentStream.pdf)
 
 """
+
 # Author: Daniel Lopez-Sanchez <lope@usal.es>
 # License: BSD 3 clause
 
diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py
index abd2c6fe9d4d4..ed99d1c44e2fd 100644
--- a/benchmarks/bench_plot_svd.py
+++ b/benchmarks/bench_plot_svd.py
@@ -2,6 +2,7 @@
 
 The data is mostly low rank but is a fat infinite tail.
 """
+
 import gc
 from collections import defaultdict
 from time import time
diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py
index bd8c62ecba484..6551de690994b 100644
--- a/benchmarks/bench_random_projections.py
+++ b/benchmarks/bench_random_projections.py
@@ -6,6 +6,7 @@
 Benchmarks for random projections.
 
 """
+
 import collections
 import gc
 import optparse
diff --git a/benchmarks/bench_saga.py b/benchmarks/bench_saga.py
index dc2ed093f11d0..c5b3e7728e2ec 100644
--- a/benchmarks/bench_saga.py
+++ b/benchmarks/bench_saga.py
@@ -3,6 +3,7 @@
 Benchmarks of sklearn SAGA vs lightning SAGA vs Liblinear. Shows the gain
 in using multinomial logistic regression in term of learning time.
 """
+
 import json
 import os
 import time
@@ -118,9 +119,7 @@ def fit_single(
                 # Lightning predict_proba is not implemented for n_classes > 2
                 y_pred = _predict_proba(lr, X)
             score = log_loss(y, y_pred, normalize=False) / n_samples
-            score += 0.5 * alpha * np.sum(lr.coef_**2) + beta * np.sum(
-                np.abs(lr.coef_)
-            )
+            score += 0.5 * alpha * np.sum(lr.coef_**2) + beta * np.sum(np.abs(lr.coef_))
             scores.append(score)
         train_score, test_score = tuple(scores)
 
diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py
index 743292ca5fa61..39cf1a11ffed6 100644
--- a/benchmarks/bench_sample_without_replacement.py
+++ b/benchmarks/bench_sample_without_replacement.py
@@ -2,6 +2,7 @@
 Benchmarks for sampling without replacement of integer.
 
 """
+
 import gc
 import operator
 import optparse
diff --git a/benchmarks/bench_text_vectorizers.py b/benchmarks/bench_text_vectorizers.py
index 31d4141d1af97..2eab7071544f9 100644
--- a/benchmarks/bench_text_vectorizers.py
+++ b/benchmarks/bench_text_vectorizers.py
@@ -8,6 +8,7 @@
  * psutil (optional, but recommended)
 
 """
+
 import itertools
 import timeit
 
diff --git a/benchmarks/bench_tree.py b/benchmarks/bench_tree.py
index 29cd7584432b7..c522bcb39e994 100644
--- a/benchmarks/bench_tree.py
+++ b/benchmarks/bench_tree.py
@@ -13,6 +13,7 @@
 training set, classify a sample and plot the time taken as a function
 of the number of dimensions.
 """
+
 import gc
 from datetime import datetime
 
diff --git a/benchmarks/bench_tsne_mnist.py b/benchmarks/bench_tsne_mnist.py
index dfd4c4e92f848..813fffcf29141 100644
--- a/benchmarks/bench_tsne_mnist.py
+++ b/benchmarks/bench_tsne_mnist.py
@@ -130,7 +130,8 @@ def sanitize(filename):
         try:
             from bhtsne.bhtsne import run_bh_tsne
         except ImportError as e:
-            raise ImportError("""\
+            raise ImportError(
+                """\
 If you want comparison with the reference implementation, build the
 binary from source (https://github.com/lvdmaaten/bhtsne) in the folder
 benchmarks/bhtsne and add an empty `__init__.py` file in the folder:
@@ -140,7 +141,8 @@ def sanitize(filename):
 $ g++ sptree.cpp tsne.cpp tsne_main.cpp -o bh_tsne -O2
 $ touch __init__.py
 $ cd ..
-""") from e
+"""
+            ) from e
 
         def bhtsne(X):
             """Wrapper for the reference lvdmaaten/bhtsne implementation."""
diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py
index f438927772619..28bb267b6f721 100644
--- a/build_tools/generate_authors_table.py
+++ b/build_tools/generate_authors_table.py
@@ -6,6 +6,7 @@
 The table should be updated for each new inclusion in the teams.
 Generating the table requires admin rights.
 """
+
 import getpass
 import sys
 import time
diff --git a/build_tools/get_comment.py b/build_tools/get_comment.py
index 64c5784e0cd06..466396b640302 100644
--- a/build_tools/get_comment.py
+++ b/build_tools/get_comment.py
@@ -88,8 +88,7 @@ def get_message(log_file, repo, pr_number, sha, run_id, details, versions):
             "https://scikit-learn.org/dev/developers/contributing.html"
             "#how-to-contribute)) and push the changes. If you already have done "
             "that, please send an empty commit with `git commit --allow-empty` "
-            "and push the changes to trigger the CI.\n\n"
-            + sub_text
+            "and push the changes to trigger the CI.\n\n" + sub_text
         )
 
     message = ""
diff --git a/build_tools/github/check_wheels.py b/build_tools/github/check_wheels.py
index 2289709fdc037..5579d86c5ce3e 100644
--- a/build_tools/github/check_wheels.py
+++ b/build_tools/github/check_wheels.py
@@ -1,5 +1,6 @@
 """Checks that dist/* contains the number of wheels built from the
 .github/workflows/wheels.yml config."""
+
 import sys
 from pathlib import Path
 
diff --git a/build_tools/github/vendor.py b/build_tools/github/vendor.py
index 3bc1aceb3437c..28b44be3c9aa9 100644
--- a/build_tools/github/vendor.py
+++ b/build_tools/github/vendor.py
@@ -1,6 +1,5 @@
 """Embed vcomp140.dll and msvcp140.dll."""
 
-
 import os
 import os.path as op
 import shutil
diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py
index ab0f3e590d560..fd77cfd3c0721 100644
--- a/build_tools/update_environments_and_lock_files.py
+++ b/build_tools/update_environments_and_lock_files.py
@@ -102,7 +102,8 @@ def remove_from(alist, to_remove):
         "folder": "build_tools/azure",
         "platform": "linux-64",
         "channel": "conda-forge",
-        "conda_dependencies": common_dependencies + [
+        "conda_dependencies": common_dependencies
+        + [
             "ccache",
             "pytorch",
             "pytorch-cpu",
@@ -123,7 +124,8 @@ def remove_from(alist, to_remove):
         "folder": "build_tools/azure",
         "platform": "osx-64",
         "channel": "conda-forge",
-        "conda_dependencies": common_dependencies + [
+        "conda_dependencies": common_dependencies
+        + [
             "ccache",
             "compilers",
             "llvm-openmp",
@@ -160,7 +162,8 @@ def remove_from(alist, to_remove):
         "channel": "defaults",
         "conda_dependencies": remove_from(
             common_dependencies, ["pandas", "cython", "pip", "ninja", "meson-python"]
-        ) + ["ccache"],
+        )
+        + ["ccache"],
         "package_constraints": {
             "python": "3.9",
             "blas": "[build=openblas]",
@@ -268,7 +271,8 @@ def remove_from(alist, to_remove):
         "folder": "build_tools/azure",
         "platform": "win-64",
         "channel": "conda-forge",
-        "conda_dependencies": remove_from(common_dependencies, ["pandas", "pyamg"]) + [
+        "conda_dependencies": remove_from(common_dependencies, ["pandas", "pyamg"])
+        + [
             "wheel",
             "pip",
         ],
@@ -284,7 +288,8 @@ def remove_from(alist, to_remove):
         "folder": "build_tools/circle",
         "platform": "linux-64",
         "channel": "conda-forge",
-        "conda_dependencies": common_dependencies_without_coverage + [
+        "conda_dependencies": common_dependencies_without_coverage
+        + [
             "scikit-image",
             "seaborn",
             "memory_profiler",
@@ -324,7 +329,8 @@ def remove_from(alist, to_remove):
         "folder": "build_tools/circle",
         "platform": "linux-64",
         "channel": "conda-forge",
-        "conda_dependencies": common_dependencies_without_coverage + [
+        "conda_dependencies": common_dependencies_without_coverage
+        + [
             "scikit-image",
             "seaborn",
             "memory_profiler",
@@ -353,7 +359,8 @@ def remove_from(alist, to_remove):
         "channel": "conda-forge",
         "conda_dependencies": remove_from(
             common_dependencies_without_coverage, ["pandas", "pyamg"]
-        ) + ["pip", "ccache"],
+        )
+        + ["pip", "ccache"],
         "package_constraints": {
             "python": "3.9",
         },
@@ -460,7 +467,8 @@ def get_package_with_constraint(package_name, build_metadata, uses_pip=False):
 
 
 def get_conda_environment_content(build_metadata):
-    template = environment.from_string("""
+    template = environment.from_string(
+        """
 # DO NOT EDIT: this file is generated from the specification found in the
 # following script to centralize the configuration for CI builds:
 # build_tools/update_environments_and_lock_files.py
@@ -476,7 +484,8 @@ def get_conda_environment_content(build_metadata):
   {% for pip_dep in build_metadata.get('pip_dependencies', []) %}
     - {{ pip_dep | get_package_with_constraint(build_metadata, uses_pip=True) }}
   {% endfor %}
-  {% endif %}""".strip())
+  {% endif %}""".strip()
+    )
     return template.render(build_metadata=build_metadata)
 
 
@@ -532,13 +541,15 @@ def write_all_conda_lock_files(build_metadata_list):
 
 
 def get_pip_requirements_content(build_metadata):
-    template = environment.from_string("""
+    template = environment.from_string(
+        """
 # DO NOT EDIT: this file is generated from the specification found in the
 # following script to centralize the configuration for CI builds:
 # build_tools/update_environments_and_lock_files.py
 {% for pip_dep in build_metadata['pip_dependencies'] %}
 {{ pip_dep | get_package_with_constraint(build_metadata, uses_pip=True) }}
-{% endfor %}""".strip())
+{% endfor %}""".strip()
+    )
     return template.render(build_metadata=build_metadata)
 
 
diff --git a/doc/sphinxext/doi_role.py b/doc/sphinxext/doi_role.py
index 32e905fe650ea..9f117b07fa6a3 100644
--- a/doc/sphinxext/doi_role.py
+++ b/doc/sphinxext/doi_role.py
@@ -1,17 +1,17 @@
 """
-    doilinks
-    ~~~~~~~~
-    Extension to add links to DOIs. With this extension you can use e.g.
-    :doi:`10.1016/S0022-2836(05)80360-2` in your documents. This will
-    create a link to a DOI resolver
-    (``https://doi.org/10.1016/S0022-2836(05)80360-2``).
-    The link caption will be the raw DOI.
-    You can also give an explicit caption, e.g.
-    :doi:`Basic local alignment search tool <10.1016/S0022-2836(05)80360-2>`.
-
-    :copyright: Copyright 2015  Jon Lund Steffensen. Based on extlinks by
-        the Sphinx team.
-    :license: BSD.
+doilinks
+~~~~~~~~
+Extension to add links to DOIs. With this extension you can use e.g.
+:doi:`10.1016/S0022-2836(05)80360-2` in your documents. This will
+create a link to a DOI resolver
+(``https://doi.org/10.1016/S0022-2836(05)80360-2``).
+The link caption will be the raw DOI.
+You can also give an explicit caption, e.g.
+:doi:`Basic local alignment search tool <10.1016/S0022-2836(05)80360-2>`.
+
+:copyright: Copyright 2015  Jon Lund Steffensen. Based on extlinks by
+    the Sphinx team.
+:license: BSD.
 """
 
 from docutils import nodes, utils
diff --git a/doc/sphinxext/sphinx_issues.py b/doc/sphinxext/sphinx_issues.py
index 5cd532319cbd7..206359a1bd703 100644
--- a/doc/sphinxext/sphinx_issues.py
+++ b/doc/sphinxext/sphinx_issues.py
@@ -18,6 +18,7 @@
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 """
+
 import re
 
 from docutils import nodes, utils
diff --git a/examples/applications/plot_face_recognition.py b/examples/applications/plot_face_recognition.py
index 1ff4399d60739..97a67fad52776 100644
--- a/examples/applications/plot_face_recognition.py
+++ b/examples/applications/plot_face_recognition.py
@@ -11,6 +11,7 @@
 .. _LFW: http://vis-www.cs.umass.edu/lfw/
 
 """
+
 # %%
 from time import time
 
diff --git a/examples/calibration/plot_calibration.py b/examples/calibration/plot_calibration.py
index f928ae631b78b..91dca761d1fe3 100644
--- a/examples/calibration/plot_calibration.py
+++ b/examples/calibration/plot_calibration.py
@@ -22,6 +22,7 @@
 Brier score.
 
 """
+
 # Authors:
 # Mathieu Blondel <mathieu@mblondel.org>
 # Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
diff --git a/examples/cluster/plot_affinity_propagation.py b/examples/cluster/plot_affinity_propagation.py
index 5816ae298f419..e286104636d67 100644
--- a/examples/cluster/plot_affinity_propagation.py
+++ b/examples/cluster/plot_affinity_propagation.py
@@ -8,6 +8,7 @@
 Between Data Points", Science Feb. 2007
 
 """
+
 import numpy as np
 
 from sklearn import metrics
diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py
index 3aebdffddaf63..a562ebbc96ba5 100644
--- a/examples/cluster/plot_bisect_kmeans.py
+++ b/examples/cluster/plot_bisect_kmeans.py
@@ -13,6 +13,7 @@
 present for regular K-Means.
 
 """
+
 import matplotlib.pyplot as plt
 
 from sklearn.cluster import BisectingKMeans, KMeans
diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py
index df9af8ea330ba..04baa0fd98bc0 100644
--- a/examples/covariance/plot_covariance_estimation.py
+++ b/examples/covariance/plot_covariance_estimation.py
@@ -15,7 +15,6 @@
 trade-off.
 """
 
-
 # %%
 # Generate sample data
 # --------------------
diff --git a/examples/ensemble/plot_feature_transformation.py b/examples/ensemble/plot_feature_transformation.py
index de6f92bad9dfe..d492de07fec87 100644
--- a/examples/ensemble/plot_feature_transformation.py
+++ b/examples/ensemble/plot_feature_transformation.py
@@ -20,7 +20,6 @@
 
 """
 
-
 # Author: Tim Head <betatim@gmail.com>
 #
 # License: BSD 3 clause
diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py
index 1eaba2e852f28..6c239e97d66ee 100644
--- a/examples/ensemble/plot_gradient_boosting_early_stopping.py
+++ b/examples/ensemble/plot_gradient_boosting_early_stopping.py
@@ -31,6 +31,7 @@
 License: BSD 3 clause
 
 """
+
 # %%
 # Data Preparation
 # ----------------
diff --git a/examples/ensemble/plot_monotonic_constraints.py b/examples/ensemble/plot_monotonic_constraints.py
index 15ad8e9524243..dcd5f05af626c 100644
--- a/examples/ensemble/plot_monotonic_constraints.py
+++ b/examples/ensemble/plot_monotonic_constraints.py
@@ -19,6 +19,7 @@
 <https://xgboost.readthedocs.io/en/latest/tutorials/monotonic.html>`_.
 
 """
+
 # %%
 import matplotlib.pyplot as plt
 import numpy as np
diff --git a/examples/linear_model/plot_quantile_regression.py b/examples/linear_model/plot_quantile_regression.py
index 715e6129cdef8..70dda86fabd60 100644
--- a/examples/linear_model/plot_quantile_regression.py
+++ b/examples/linear_model/plot_quantile_regression.py
@@ -261,14 +261,16 @@
 y_pred_lr = linear_regression.fit(X, y_pareto).predict(X)
 y_pred_qr = quantile_regression.fit(X, y_pareto).predict(X)
 
-print(f"""Training error (in-sample performance)
+print(
+    f"""Training error (in-sample performance)
     {linear_regression.__class__.__name__}:
     MAE = {mean_absolute_error(y_pareto, y_pred_lr):.3f}
     MSE = {mean_squared_error(y_pareto, y_pred_lr):.3f}
     {quantile_regression.__class__.__name__}:
     MAE = {mean_absolute_error(y_pareto, y_pred_qr):.3f}
     MSE = {mean_squared_error(y_pareto, y_pred_qr):.3f}
-    """)
+    """
+)
 
 # %%
 # On the training set, we see that MAE is lower for
@@ -298,14 +300,16 @@
     cv=3,
     scoring=["neg_mean_absolute_error", "neg_mean_squared_error"],
 )
-print(f"""Test error (cross-validated performance)
+print(
+    f"""Test error (cross-validated performance)
     {linear_regression.__class__.__name__}:
     MAE = {-cv_results_lr["test_neg_mean_absolute_error"].mean():.3f}
     MSE = {-cv_results_lr["test_neg_mean_squared_error"].mean():.3f}
     {quantile_regression.__class__.__name__}:
     MAE = {-cv_results_qr["test_neg_mean_absolute_error"].mean():.3f}
     MSE = {-cv_results_qr["test_neg_mean_squared_error"].mean():.3f}
-    """)
+    """
+)
 
 # %%
 # We reach similar conclusions on the out-of-sample evaluation.
diff --git a/examples/manifold/plot_swissroll.py b/examples/manifold/plot_swissroll.py
index fe17d9f80030f..65df88588efef 100644
--- a/examples/manifold/plot_swissroll.py
+++ b/examples/manifold/plot_swissroll.py
@@ -8,6 +8,7 @@
 Then, we will explore how they both deal with the addition of a hole
 in the data.
 """
+
 # %%
 # Swiss Roll
 # ---------------------------------------------------
diff --git a/examples/miscellaneous/plot_kernel_ridge_regression.py b/examples/miscellaneous/plot_kernel_ridge_regression.py
index 6d2288936179a..b865778156c3c 100644
--- a/examples/miscellaneous/plot_kernel_ridge_regression.py
+++ b/examples/miscellaneous/plot_kernel_ridge_regression.py
@@ -17,6 +17,7 @@
 datapoint.
 
 """
+
 # %%
 # Authors: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
 # License: BSD 3 clause
diff --git a/examples/miscellaneous/plot_metadata_routing.py b/examples/miscellaneous/plot_metadata_routing.py
index 9984bb6183348..9cad255b763af 100644
--- a/examples/miscellaneous/plot_metadata_routing.py
+++ b/examples/miscellaneous/plot_metadata_routing.py
@@ -20,6 +20,7 @@
 
 First a few imports and some random data for the rest of the script.
 """
+
 # %%
 
 import warnings
diff --git a/examples/mixture/plot_gmm_init.py b/examples/mixture/plot_gmm_init.py
index aa0266c98ff7a..410a843cf78db 100644
--- a/examples/mixture/plot_gmm_init.py
+++ b/examples/mixture/plot_gmm_init.py
@@ -33,7 +33,6 @@
 time to initialize and low number of GaussianMixture iterations to converge.
 """
 
-
 # Author: Gordon Walsh <gordon.p.walsh@gmail.com>
 # Data generation code from Jake Vanderplas <vanderplas@astro.washington.edu>
 
diff --git a/examples/semi_supervised/plot_semi_supervised_newsgroups.py b/examples/semi_supervised/plot_semi_supervised_newsgroups.py
index 58c7f6e42f408..19bcb13c5a99b 100644
--- a/examples/semi_supervised/plot_semi_supervised_newsgroups.py
+++ b/examples/semi_supervised/plot_semi_supervised_newsgroups.py
@@ -11,7 +11,6 @@
 
 """
 
-
 import numpy as np
 
 from sklearn.datasets import fetch_20newsgroups
diff --git a/examples/tree/plot_iris_dtc.py b/examples/tree/plot_iris_dtc.py
index b3d834da5d067..4c54a4119ced3 100644
--- a/examples/tree/plot_iris_dtc.py
+++ b/examples/tree/plot_iris_dtc.py
@@ -14,6 +14,7 @@
 
 We also show the tree structure of a model built on all of the features.
 """
+
 # %%
 # First load the copy of the Iris dataset shipped with scikit-learn:
 from sklearn.datasets import load_iris
diff --git a/maint_tools/check_pxd_in_installation.py b/maint_tools/check_pxd_in_installation.py
index 996d45d64d42a..380edbd6350b6 100644
--- a/maint_tools/check_pxd_in_installation.py
+++ b/maint_tools/check_pxd_in_installation.py
@@ -36,7 +36,9 @@
     # We set the language to c++ and we use numpy.get_include() because
     # some modules require it.
     with open(tmpdir / "setup_tst.py", "w") as f:
-        f.write(textwrap.dedent("""
+        f.write(
+            textwrap.dedent(
+                """
             from setuptools import setup, Extension
             from Cython.Build import cythonize
             import numpy
@@ -47,7 +49,9 @@
                                     include_dirs=[numpy.get_include()])]
 
             setup(ext_modules=cythonize(extensions))
-            """))
+            """
+            )
+        )
 
     subprocess.run(
         ["python", "setup_tst.py", "build_ext", "-i"], check=True, cwd=tmpdir
diff --git a/sklearn/__check_build/__init__.py b/sklearn/__check_build/__init__.py
index 3895a0e430082..ad1a3a818b14d 100644
--- a/sklearn/__check_build/__init__.py
+++ b/sklearn/__check_build/__init__.py
@@ -1,6 +1,7 @@
-""" Module to give helpful messages to the user that did not
+"""Module to give helpful messages to the user that did not
 compile scikit-learn properly.
 """
+
 import os
 
 INPLACE_MSG = """
@@ -28,7 +29,8 @@ def raise_build_error(e):
             dir_content.append(filename.ljust(26))
         else:
             dir_content.append(filename + "\n")
-    raise ImportError("""%s
+    raise ImportError(
+        """%s
 ___________________________________________________________________________
 Contents of %s:
 %s
@@ -38,7 +40,9 @@ def raise_build_error(e):
 If you have installed scikit-learn from source, please do not forget
 to build the package before using it: run `python setup.py install` or
 `make` in the source directory.
-%s""" % (e, local_dir, "".join(dir_content).strip(), msg))
+%s"""
+        % (e, local_dir, "".join(dir_content).strip(), msg)
+    )
 
 
 try:
diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py
index a8ced8aa9d292..ceb72441000c3 100644
--- a/sklearn/_build_utils/__init__.py
+++ b/sklearn/_build_utils/__init__.py
@@ -1,6 +1,7 @@
 """
 Utilities useful during the build.
 """
+
 # author: Andy Mueller, Gael Varoquaux
 # license: BSD
 
diff --git a/sklearn/_build_utils/openmp_helpers.py b/sklearn/_build_utils/openmp_helpers.py
index 9172d40830bb9..ed9bf0ea3eea0 100644
--- a/sklearn/_build_utils/openmp_helpers.py
+++ b/sklearn/_build_utils/openmp_helpers.py
@@ -38,7 +38,8 @@ def check_openmp_support():
         # Pyodide doesn't support OpenMP
         return False
 
-    code = textwrap.dedent("""\
+    code = textwrap.dedent(
+        """\
         #include <omp.h>
         #include <stdio.h>
         int main(void) {
@@ -46,7 +47,8 @@ def check_openmp_support():
         printf("nthreads=%d\\n", omp_get_num_threads());
         return 0;
         }
-        """)
+        """
+    )
 
     extra_preargs = os.getenv("LDFLAGS", None)
     if extra_preargs is not None:
@@ -94,7 +96,8 @@ def check_openmp_support():
                 "Failed to build scikit-learn with OpenMP support"
             ) from openmp_exception
         else:
-            message = textwrap.dedent("""
+            message = textwrap.dedent(
+                """
 
                                 ***********
                                 * WARNING *
@@ -117,7 +120,8 @@ def check_openmp_support():
                   parallelism.
 
                                     ***
-                """)
+                """
+            )
             warnings.warn(message)
 
     return openmp_supported
diff --git a/sklearn/_build_utils/pre_build_helpers.py b/sklearn/_build_utils/pre_build_helpers.py
index f3eb054bb037e..b73fa8658739f 100644
--- a/sklearn/_build_utils/pre_build_helpers.py
+++ b/sklearn/_build_utils/pre_build_helpers.py
@@ -64,10 +64,12 @@ def basic_check_build():
         # The following check won't work in pyodide
         return
 
-    code = textwrap.dedent("""\
+    code = textwrap.dedent(
+        """\
         #include <stdio.h>
         int main(void) {
         return 0;
         }
-        """)
+        """
+    )
     compile_test_program(code)
diff --git a/sklearn/_build_utils/version.py b/sklearn/_build_utils/version.py
index 1f8688a008e9d..49a3cfb82bebd 100644
--- a/sklearn/_build_utils/version.py
+++ b/sklearn/_build_utils/version.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
-""" Extract version number from __init__.py
-"""
+"""Extract version number from __init__.py"""
 
 import os
 
diff --git a/sklearn/_config.py b/sklearn/_config.py
index d4ccaca0a98f7..fc9392de68df6 100644
--- a/sklearn/_config.py
+++ b/sklearn/_config.py
@@ -1,5 +1,5 @@
-"""Global configuration state and functions for management
-"""
+"""Global configuration state and functions for management"""
+
 import os
 import threading
 from contextlib import contextmanager as contextmanager
diff --git a/sklearn/_distributor_init.py b/sklearn/_distributor_init.py
index a0142ac80878f..f0901034e83e4 100644
--- a/sklearn/_distributor_init.py
+++ b/sklearn/_distributor_init.py
@@ -1,4 +1,4 @@
-""" Distributor init file
+"""Distributor init file
 
 Distributors: you can add custom code here to support particular distributions
 of scikit-learn.
diff --git a/sklearn/_loss/link.py b/sklearn/_loss/link.py
index 9459844f6b89a..a6560d58d91e6 100644
--- a/sklearn/_loss/link.py
+++ b/sklearn/_loss/link.py
@@ -1,6 +1,7 @@
 """
 Module contains classes for invertible (and differentiable) link functions.
 """
+
 # Author: Christian Lorentzen <lorentzen.ch@gmail.com>
 
 from abc import ABC, abstractmethod
diff --git a/sklearn/_loss/loss.py b/sklearn/_loss/loss.py
index a3b205ed10687..96863cc00fe01 100644
--- a/sklearn/_loss/loss.py
+++ b/sklearn/_loss/loss.py
@@ -5,6 +5,7 @@
 Specific losses are used for regression, binary classification or multiclass
 classification.
 """
+
 # Goals:
 # - Provide a common private module for loss functions/classes.
 # - To be used in:
diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py
index a7b9c48466a5d..b015a375b2bb0 100644
--- a/sklearn/_min_dependencies.py
+++ b/sklearn/_min_dependencies.py
@@ -1,4 +1,5 @@
 """All minimum dependencies for scikit-learn."""
+
 import argparse
 from collections import defaultdict
 
diff --git a/sklearn/base.py b/sklearn/base.py
index e73ae4c8a180e..d6014332f7cc0 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -1353,9 +1353,8 @@ class _UnstableArchMixin:
 
     def _more_tags(self):
         return {
-            "non_deterministic": _IS_32BIT or platform.machine().startswith(
-                ("ppc", "powerpc")
-            )
+            "non_deterministic": _IS_32BIT
+            or platform.machine().startswith(("ppc", "powerpc"))
         }
 
 
diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index 2da9d8c5a0f43..fcecacc9ca57c 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -7,6 +7,7 @@
           Gael Varoquaux
 License: BSD 3 clause
 """
+
 import warnings
 from heapq import heapify, heappop, heappush, heappushpop
 from numbers import Integral, Real
diff --git a/sklearn/cluster/_bicluster.py b/sklearn/cluster/_bicluster.py
index 18c98ad5348b5..b22f6a369fcc1 100644
--- a/sklearn/cluster/_bicluster.py
+++ b/sklearn/cluster/_bicluster.py
@@ -1,4 +1,5 @@
 """Spectral biclustering algorithms."""
+
 # Authors : Kemal Eren
 # License: BSD 3 clause
 
diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py
index a1f7716ced822..1d4a9e1d84c26 100644
--- a/sklearn/cluster/_bisect_k_means.py
+++ b/sklearn/cluster/_bisect_k_means.py
@@ -1,4 +1,5 @@
 """Bisecting K-means clustering."""
+
 # Author: Michal Krawczyk <mkrwczyk.1@gmail.com>
 
 import warnings
diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py
index f84f18c1c18b3..218db48ad2331 100644
--- a/sklearn/cluster/_feature_agglomeration.py
+++ b/sklearn/cluster/_feature_agglomeration.py
@@ -2,6 +2,7 @@
 Feature agglomeration. Base classes and functions for performing feature
 agglomeration.
 """
+
 # Author: V. Michel, A. Gramfort
 # License: BSD 3 clause
 
diff --git a/sklearn/cluster/_hdbscan/hdbscan.py b/sklearn/cluster/_hdbscan/hdbscan.py
index 380448f1f8589..e77baaf4b1146 100644
--- a/sklearn/cluster/_hdbscan/hdbscan.py
+++ b/sklearn/cluster/_hdbscan/hdbscan.py
@@ -2,6 +2,7 @@
 HDBSCAN: Hierarchical Density-Based Spatial Clustering
          of Applications with Noise
 """
+
 # Authors: Leland McInnes <leland.mcinnes@gmail.com>
 #          Steve Astels <sastels@gmail.com>
 #          John Healy <jchealy@gmail.com>
diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index d323a6b8afd03..91606056c17aa 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -793,7 +793,8 @@ def fit_predict(self, X, y=None):
 
     def _more_tags(self):
         return {
-            "pairwise": self.affinity in [
+            "pairwise": self.affinity
+            in [
                 "precomputed",
                 "precomputed_nearest_neighbors",
             ]
diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py
index 121e8f2cfe400..abeb81dca50aa 100644
--- a/sklearn/cluster/tests/test_feature_agglomeration.py
+++ b/sklearn/cluster/tests/test_feature_agglomeration.py
@@ -1,6 +1,7 @@
 """
 Tests for sklearn.cluster._feature_agglomeration
 """
+
 # Authors: Sergul Aydore 2017
 import warnings
 
diff --git a/sklearn/cluster/tests/test_hdbscan.py b/sklearn/cluster/tests/test_hdbscan.py
index 6db2d4387de18..d586d203747c2 100644
--- a/sklearn/cluster/tests/test_hdbscan.py
+++ b/sklearn/cluster/tests/test_hdbscan.py
@@ -2,6 +2,7 @@
 Tests for HDBSCAN clustering algorithm
 Based on the DBSCAN test code
 """
+
 import numpy as np
 import pytest
 from scipy import stats
diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
index 3c99dd50ea85f..0a139bf3c4571 100644
--- a/sklearn/cluster/tests/test_hierarchical.py
+++ b/sklearn/cluster/tests/test_hierarchical.py
@@ -2,6 +2,7 @@
 Several basic tests for hierarchical clustering procedures
 
 """
+
 # Authors: Vincent Michel, 2010, Gael Varoquaux 2012,
 #          Matteo Visconti di Oleggio Castello 2014
 # License: BSD 3 clause
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 4a112a30b29ed..1f2f8c390c909 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -1,4 +1,5 @@
 """Testing for K-means"""
+
 import re
 import sys
 from io import StringIO
diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 682df64044bf9..689a159851f50 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -1,4 +1,5 @@
 """Testing for Spectral Clustering methods"""
+
 import pickle
 import re
 
diff --git a/sklearn/covariance/_robust_covariance.py b/sklearn/covariance/_robust_covariance.py
index c90e855ca6768..980bf964e6dfa 100644
--- a/sklearn/covariance/_robust_covariance.py
+++ b/sklearn/covariance/_robust_covariance.py
@@ -4,6 +4,7 @@
 Here are implemented estimators that are resistant to outliers.
 
 """
+
 # Author: Virgile Fritsch <virgile.fritsch@inria.fr>
 #
 # License: BSD 3 clause
diff --git a/sklearn/covariance/tests/test_graphical_lasso.py b/sklearn/covariance/tests/test_graphical_lasso.py
index a7d251a5bbdfe..c0e2deb20de16 100644
--- a/sklearn/covariance/tests/test_graphical_lasso.py
+++ b/sklearn/covariance/tests/test_graphical_lasso.py
@@ -1,5 +1,5 @@
-""" Test the graphical_lasso module.
-"""
+"""Test the graphical_lasso module."""
+
 import sys
 from io import StringIO
 
diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py
index 7ae7902f3365c..6f61e027dceaa 100644
--- a/sklearn/datasets/__init__.py
+++ b/sklearn/datasets/__init__.py
@@ -3,6 +3,7 @@
 including methods to load and fetch popular reference datasets. It also
 features some artificial data generators.
 """
+
 import textwrap
 
 from ._base import (
@@ -106,7 +107,8 @@
 
 def __getattr__(name):
     if name == "load_boston":
-        msg = textwrap.dedent("""
+        msg = textwrap.dedent(
+            """
             `load_boston` has been removed from scikit-learn since version 1.2.
 
             The Boston housing prices dataset has an ethical problem: as
@@ -153,7 +155,8 @@ def __getattr__(name):
             "Hedonic housing prices and the demand for clean air."
             Journal of environmental economics and management 5.1 (1978): 81-102.
             <https://www.researchgate.net/publication/4974606_Hedonic_housing_prices_and_the_demand_for_clean_air>
-            """)
+            """
+        )
         raise ImportError(msg)
     try:
         return globals()[name]
diff --git a/sklearn/datasets/_arff_parser.py b/sklearn/datasets/_arff_parser.py
index 5c427441012d6..86dfeb37a6ef5 100644
--- a/sklearn/datasets/_arff_parser.py
+++ b/sklearn/datasets/_arff_parser.py
@@ -1,4 +1,5 @@
 """Implementation of ARFF parsers: via LIAC-ARFF and pandas."""
+
 import itertools
 import re
 from collections import OrderedDict
diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py
index e94996ccdec65..a1e4b911f1bef 100644
--- a/sklearn/datasets/_california_housing.py
+++ b/sklearn/datasets/_california_housing.py
@@ -18,6 +18,7 @@
 Statistics and Probability Letters, 33 (1997) 291-297.
 
 """
+
 # Authors: Peter Prettenhofer
 # License: BSD 3 clause
 
diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py
index 396e4af9389e6..224978bd70770 100644
--- a/sklearn/datasets/_samples_generator.py
+++ b/sklearn/datasets/_samples_generator.py
@@ -221,9 +221,7 @@ def make_classification(
         msg = "n_classes({}) * n_clusters_per_class({}) must be"
         msg += " smaller or equal 2**n_informative({})={}"
         raise ValueError(
-            msg.format(
-                n_classes, n_clusters_per_class, n_informative, 2**n_informative
-            )
+            msg.format(n_classes, n_clusters_per_class, n_informative, 2**n_informative)
         )
 
     if weights is not None:
diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py
index 4072d9c8ec67f..84e7c91d3176f 100644
--- a/sklearn/datasets/tests/test_20news.py
+++ b/sklearn/datasets/tests/test_20news.py
@@ -1,6 +1,7 @@
 """Test the 20news downloader, if the data is available,
 or if specifically requested via environment variable
 (e.g. for CI jobs)."""
+
 from functools import partial
 from unittest.mock import patch
 
diff --git a/sklearn/datasets/tests/test_arff_parser.py b/sklearn/datasets/tests/test_arff_parser.py
index b675439cd2e9d..c4f9e3eb00ffd 100644
--- a/sklearn/datasets/tests/test_arff_parser.py
+++ b/sklearn/datasets/tests/test_arff_parser.py
@@ -83,7 +83,9 @@ def test_pandas_arff_parser_strip_single_quotes(parser_func):
     """Check that we properly strip single quotes from the data."""
     pd = pytest.importorskip("pandas")
 
-    arff_file = BytesIO(textwrap.dedent("""
+    arff_file = BytesIO(
+        textwrap.dedent(
+            """
             @relation 'toy'
             @attribute 'cat_single_quote' {'A', 'B', 'C'}
             @attribute 'str_single_quote' string
@@ -91,7 +93,9 @@ def test_pandas_arff_parser_strip_single_quotes(parser_func):
             @attribute 'class' numeric
             @data
             'A','some text','\"expect double quotes\"',0
-            """).encode("utf-8"))
+            """
+        ).encode("utf-8")
+    )
 
     columns_info = {
         "cat_single_quote": {
@@ -150,7 +154,9 @@ def test_pandas_arff_parser_strip_double_quotes(parser_func):
     """Check that we properly strip double quotes from the data."""
     pd = pytest.importorskip("pandas")
 
-    arff_file = BytesIO(textwrap.dedent("""
+    arff_file = BytesIO(
+        textwrap.dedent(
+            """
             @relation 'toy'
             @attribute 'cat_double_quote' {"A", "B", "C"}
             @attribute 'str_double_quote' string
@@ -158,7 +164,9 @@ def test_pandas_arff_parser_strip_double_quotes(parser_func):
             @attribute 'class' numeric
             @data
             "A","some text","\'expect double quotes\'",0
-            """).encode("utf-8"))
+            """
+        ).encode("utf-8")
+    )
 
     columns_info = {
         "cat_double_quote": {
@@ -217,7 +225,9 @@ def test_pandas_arff_parser_strip_no_quotes(parser_func):
     """Check that we properly parse with no quotes characters."""
     pd = pytest.importorskip("pandas")
 
-    arff_file = BytesIO(textwrap.dedent("""
+    arff_file = BytesIO(
+        textwrap.dedent(
+            """
             @relation 'toy'
             @attribute 'cat_without_quote' {A, B, C}
             @attribute 'str_without_quote' string
@@ -225,7 +235,9 @@ def test_pandas_arff_parser_strip_no_quotes(parser_func):
             @attribute 'class' numeric
             @data
             A,some text,'internal' quote,0
-            """).encode("utf-8"))
+            """
+        ).encode("utf-8")
+    )
 
     columns_info = {
         "cat_without_quote": {
diff --git a/sklearn/datasets/tests/test_california_housing.py b/sklearn/datasets/tests/test_california_housing.py
index ef6fc95db80bf..b24fb5bd66a56 100644
--- a/sklearn/datasets/tests/test_california_housing.py
+++ b/sklearn/datasets/tests/test_california_housing.py
@@ -1,6 +1,7 @@
 """Test the california_housing loader, if the data is available,
 or if specifically requested via environment variable
 (e.g. for CI jobs)."""
+
 from functools import partial
 
 import pytest
diff --git a/sklearn/datasets/tests/test_common.py b/sklearn/datasets/tests/test_common.py
index 8048a31041ddc..5bed37837718b 100644
--- a/sklearn/datasets/tests/test_common.py
+++ b/sklearn/datasets/tests/test_common.py
@@ -1,4 +1,5 @@
 """Test loaders for common functionality."""
+
 import inspect
 import os
 
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index e44fdaae69ec3..018505bc4fa05 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -1,6 +1,7 @@
 """Test the covtype loader, if the data is available,
 or if specifically requested via environment variable
 (e.g. for CI jobs)."""
+
 from functools import partial
 
 import pytest
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index e48e361909603..70bb33e22adb7 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -1,4 +1,5 @@
 """Test the openml loader."""
+
 import gzip
 import json
 import os
@@ -1457,8 +1458,7 @@ def _mock_urlopen_raise(request, *args, **kwargs):
         raise ValueError(
             "This mechanism intends to test correct cache"
             "handling. As such, urlopen should never be "
-            "accessed. URL: %s"
-            % request.get_full_url()
+            "accessed. URL: %s" % request.get_full_url()
         )
 
     data_id = 61
diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py
index 1f9cfe07dc0e8..3d33938a755a7 100644
--- a/sklearn/decomposition/__init__.py
+++ b/sklearn/decomposition/__init__.py
@@ -4,7 +4,6 @@
 this module can be regarded as dimensionality reduction techniques.
 """
 
-
 from ..utils.extmath import randomized_svd
 from ._dict_learning import (
     DictionaryLearning,
diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
index 177d6960033da..267e1cbfe756b 100644
--- a/sklearn/decomposition/_dict_learning.py
+++ b/sklearn/decomposition/_dict_learning.py
@@ -1,5 +1,5 @@
-""" Dictionary learning.
-"""
+"""Dictionary learning."""
+
 # Author: Vlad Niculae, Gael Varoquaux, Alexandre Gramfort
 # License: BSD 3 clause
 
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index db46540e26708..75266c5f64b2b 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1,5 +1,5 @@
-""" Non-negative matrix factorization.
-"""
+"""Non-negative matrix factorization."""
+
 # Author: Vlad Niculae
 #         Lars Buitinck
 #         Mathieu Blondel <mathieu@mblondel.org>
@@ -1769,8 +1769,7 @@ def _fit_transform(self, X, y=None, W=None, H=None, update_H=True):
         if n_iter == self.max_iter and self.tol > 0:
             warnings.warn(
                 "Maximum number of iterations %d reached. Increase "
-                "it to improve convergence."
-                % self.max_iter,
+                "it to improve convergence." % self.max_iter,
                 ConvergenceWarning,
             )
 
diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index abd2fda2d5d2f..4c49337e88093 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -1,5 +1,4 @@
-""" Principal Component Analysis.
-"""
+"""Principal Component Analysis."""
 
 # Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
 #         Olivier Grisel <olivier.grisel@ensta.org>
diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py
index b14df8c5f4d22..fa711ce8c0703 100644
--- a/sklearn/decomposition/_sparse_pca.py
+++ b/sklearn/decomposition/_sparse_pca.py
@@ -1,4 +1,5 @@
 """Matrix factorization with Sparse PCA."""
+
 # Author: Vlad Niculae, Gael Varoquaux, Alexandre Gramfort
 # License: BSD 3 clause
 
diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py
index 725683e8d46c6..d238f35cb2167 100644
--- a/sklearn/decomposition/_truncated_svd.py
+++ b/sklearn/decomposition/_truncated_svd.py
@@ -1,5 +1,4 @@
-"""Truncated SVD for sparse matrices, aka latent semantic analysis (LSA).
-"""
+"""Truncated SVD for sparse matrices, aka latent semantic analysis (LSA)."""
 
 # Author: Lars Buitinck
 #         Olivier Grisel <olivier.grisel@ensta.org>
diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
index 6a376b01ecb19..bd7a35bb8a96f 100644
--- a/sklearn/decomposition/tests/test_fastica.py
+++ b/sklearn/decomposition/tests/test_fastica.py
@@ -1,6 +1,7 @@
 """
 Test the fastica algorithm.
 """
+
 import itertools
 import os
 import warnings
diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
index 5d7c8aa03f174..646aad2db795d 100644
--- a/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -1,4 +1,5 @@
 """Tests for Incremental PCA."""
+
 import warnings
 
 import numpy as np
diff --git a/sklearn/ensemble/__init__.py b/sklearn/ensemble/__init__.py
index f4a3756bdaf1d..8ddf05084f1be 100644
--- a/sklearn/ensemble/__init__.py
+++ b/sklearn/ensemble/__init__.py
@@ -2,6 +2,7 @@
 The :mod:`sklearn.ensemble` module includes ensemble-based methods for
 classification, regression and anomaly detection.
 """
+
 from ._bagging import BaggingClassifier, BaggingRegressor
 from ._base import BaseEnsemble
 from ._forest import (
diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index b5ee64b6e708c..6e5a7e47b0c10 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -1198,8 +1198,7 @@ def _validate_y_class_weight(self, y, classes=None):
                     raise ValueError(
                         "Valid presets for class_weight include "
                         '"balanced" and "balanced_subsample".'
-                        'Given "%s".'
-                        % self.class_weight
+                        'Given "%s".' % self.class_weight
                     )
                 if self.warm_start:
                     warn(
diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
index 49575cefa5090..bd11e373d3915 100644
--- a/sklearn/ensemble/_gb.py
+++ b/sklearn/ensemble/_gb.py
@@ -741,8 +741,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
                         if (
                             "pass parameters to specific steps of "
                             "your pipeline using the "
-                            "stepname__parameter"
-                            in str(e)
+                            "stepname__parameter" in str(e)
                         ):  # pipeline
                             raise ValueError(msg) from e
                         else:  # regular estimator whose input checking failed
@@ -1060,8 +1059,7 @@ def _compute_partial_dependence_recursion(self, grid, target_features):
             warnings.warn(
                 "Using recursion method with a non-constant init predictor "
                 "will lead to incorrect partial dependence values. "
-                "Got init=%s."
-                % self.init,
+                "Got init=%s." % self.init,
                 UserWarning,
             )
         grid = np.asarray(grid, dtype=DTYPE, order="C")
diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py
index 98d01ea5cb9f2..d23f6e7b00a82 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/binning.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py
@@ -5,6 +5,7 @@
 Bin thresholds are computed with the quantiles so that each bin contains
 approximately the same number of samples.
 """
+
 # Author: Nicolas Hug
 
 import numpy as np
diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py
index 15f92cd324768..c9b1b56bc7999 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py
@@ -4,6 +4,7 @@
 TreeGrower builds a regression tree fitting a Newton-Raphson step, based on
 the gradients and hessians of the training data.
 """
+
 # Author: Nicolas Hug
 
 import numbers
diff --git a/sklearn/ensemble/_hist_gradient_boosting/predictor.py b/sklearn/ensemble/_hist_gradient_boosting/predictor.py
index b939712d18893..799c25aadcec3 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/predictor.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/predictor.py
@@ -1,6 +1,7 @@
 """
 This module contains the TreePredictor class which is used for prediction.
 """
+
 # Author: Nicolas Hug
 
 import numpy as np
diff --git a/sklearn/ensemble/_hist_gradient_boosting/utils.py b/sklearn/ensemble/_hist_gradient_boosting/utils.py
index 12f49b6cdce50..1ff17217164c8 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/utils.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/utils.py
@@ -1,4 +1,5 @@
 """This module contains utility routines."""
+
 from ...base import is_classifier
 from .binning import _BinMapper
 
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 4bfbf7c2ff6ee..f13f5983d1f4b 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -1,6 +1,7 @@
 """
 Testing for the gradient boosting module (sklearn.ensemble.gradient_boosting).
 """
+
 import re
 import warnings
 
diff --git a/sklearn/experimental/enable_hist_gradient_boosting.py b/sklearn/experimental/enable_hist_gradient_boosting.py
index d287400c7999f..6fa4512ce39c6 100644
--- a/sklearn/experimental/enable_hist_gradient_boosting.py
+++ b/sklearn/experimental/enable_hist_gradient_boosting.py
@@ -6,6 +6,7 @@
 :term:`experimental`, but these estimators are now stable and can be imported
 normally from `sklearn.ensemble`.
 """
+
 # Don't remove this file, we don't want to break users code just because the
 # feature isn't experimental anymore.
 
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index ea6686ef45eaa..d50c489e6b852 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -409,8 +409,7 @@ def _check_stop_words_consistency(self, stop_words, preprocess, tokenize):
                     "Your stop_words may be inconsistent with "
                     "your preprocessing. Tokenizing the stop "
                     "words generated tokens %r not in "
-                    "stop_words."
-                    % sorted(inconsistent)
+                    "stop_words." % sorted(inconsistent)
                 )
             return not inconsistent
         except Exception:
@@ -516,8 +515,7 @@ def _validate_ngram_range(self):
         if min_n > max_m:
             raise ValueError(
                 "Invalid value for ngram_range=%s "
-                "lower boundary larger than the upper boundary."
-                % str(self.ngram_range)
+                "lower boundary larger than the upper boundary." % str(self.ngram_range)
             )
 
     def _warn_for_unused_params(self):
diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py
index 5a90d46c9758b..9c393724f9cea 100644
--- a/sklearn/feature_selection/_sequential.py
+++ b/sklearn/feature_selection/_sequential.py
@@ -1,6 +1,7 @@
 """
 Sequential feature selection
 """
+
 from numbers import Integral, Real
 
 import numpy as np
diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index 3815a88c374e8..d7bffec5159bf 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -1,6 +1,7 @@
 """
 Todo: cross-check the F-value with stats model
 """
+
 import itertools
 import warnings
 
diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py
index d3723016be127..67bba2e29c857 100644
--- a/sklearn/gaussian_process/_gpr.py
+++ b/sklearn/gaussian_process/_gpr.py
@@ -456,9 +456,7 @@ def predict(self, X, return_std=False, return_cov=False):
                 y_cov = self.kernel_(X) - V.T @ V
 
                 # undo normalisation
-                y_cov = np.outer(y_cov, self._y_train_std**2).reshape(
-                    *y_cov.shape, -1
-                )
+                y_cov = np.outer(y_cov, self._y_train_std**2).reshape(*y_cov.shape, -1)
                 # if y_cov has shape (n_samples, n_samples, 1), reshape to
                 # (n_samples, n_samples)
                 if y_cov.shape[2] == 1:
@@ -483,9 +481,7 @@ def predict(self, X, return_std=False, return_cov=False):
                     y_var[y_var_negative] = 0.0
 
                 # undo normalisation
-                y_var = np.outer(y_var, self._y_train_std**2).reshape(
-                    *y_var.shape, -1
-                )
+                y_var = np.outer(y_var, self._y_train_std**2).reshape(*y_var.shape, -1)
 
                 # if y_var has shape (n_samples, 1), reshape to (n_samples,)
                 if y_var.shape[1] == 1:
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index 3b995c48b1f71..c31335696944c 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -1750,9 +1750,7 @@ def __call__(self, X, Y=None, eval_gradient=False):
 
             # We need to recompute the pairwise dimension-wise distances
             if self.anisotropic:
-                D = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (
-                    length_scale**2
-                )
+                D = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (length_scale**2)
             else:
                 D = squareform(dists**2)[:, :, np.newaxis]
 
diff --git a/sklearn/gaussian_process/tests/test_gpc.py b/sklearn/gaussian_process/tests/test_gpc.py
index 842159f13ac04..bd8bd39e1cc01 100644
--- a/sklearn/gaussian_process/tests/test_gpc.py
+++ b/sklearn/gaussian_process/tests/test_gpc.py
@@ -1,4 +1,4 @@
-"""Testing for Gaussian process classification """
+"""Testing for Gaussian process classification"""
 
 # Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
 # License: BSD 3 clause
@@ -218,8 +218,7 @@ def test_warning_bounds():
 
         assert issubclass(record[0].category, ConvergenceWarning)
         assert (
-            record[0].message.args[0]
-            == "The optimal value found for "
+            record[0].message.args[0] == "The optimal value found for "
             "dimension 0 of parameter "
             "k1__noise_level is close to the "
             "specified upper bound 0.001. "
@@ -229,8 +228,7 @@ def test_warning_bounds():
 
         assert issubclass(record[1].category, ConvergenceWarning)
         assert (
-            record[1].message.args[0]
-            == "The optimal value found for "
+            record[1].message.args[0] == "The optimal value found for "
             "dimension 0 of parameter "
             "k2__length_scale is close to the "
             "specified lower bound 1000.0. "
@@ -250,8 +248,7 @@ def test_warning_bounds():
 
         assert issubclass(record[0].category, ConvergenceWarning)
         assert (
-            record[0].message.args[0]
-            == "The optimal value found for "
+            record[0].message.args[0] == "The optimal value found for "
             "dimension 0 of parameter "
             "length_scale is close to the "
             "specified upper bound 100.0. "
@@ -261,8 +258,7 @@ def test_warning_bounds():
 
         assert issubclass(record[1].category, ConvergenceWarning)
         assert (
-            record[1].message.args[0]
-            == "The optimal value found for "
+            record[1].message.args[0] == "The optimal value found for "
             "dimension 1 of parameter "
             "length_scale is close to the "
             "specified upper bound 100.0. "
diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py
index d890dc05d9f02..e280827926d28 100644
--- a/sklearn/gaussian_process/tests/test_gpr.py
+++ b/sklearn/gaussian_process/tests/test_gpr.py
@@ -1,4 +1,4 @@
-"""Testing for Gaussian process regression """
+"""Testing for Gaussian process regression"""
 
 # Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
 # Modified by: Pete Green <p.l.green@liverpool.ac.uk>
@@ -493,8 +493,7 @@ def test_warning_bounds():
 
         assert issubclass(record[0].category, ConvergenceWarning)
         assert (
-            record[0].message.args[0]
-            == "The optimal value found for "
+            record[0].message.args[0] == "The optimal value found for "
             "dimension 0 of parameter "
             "k1__noise_level is close to the "
             "specified upper bound 0.001. "
@@ -504,8 +503,7 @@ def test_warning_bounds():
 
         assert issubclass(record[1].category, ConvergenceWarning)
         assert (
-            record[1].message.args[0]
-            == "The optimal value found for "
+            record[1].message.args[0] == "The optimal value found for "
             "dimension 0 of parameter "
             "k2__length_scale is close to the "
             "specified lower bound 1000.0. "
@@ -525,8 +523,7 @@ def test_warning_bounds():
 
         assert issubclass(record[0].category, ConvergenceWarning)
         assert (
-            record[0].message.args[0]
-            == "The optimal value found for "
+            record[0].message.args[0] == "The optimal value found for "
             "dimension 0 of parameter "
             "length_scale is close to the "
             "specified lower bound 10.0. "
@@ -536,8 +533,7 @@ def test_warning_bounds():
 
         assert issubclass(record[1].category, ConvergenceWarning)
         assert (
-            record[1].message.args[0]
-            == "The optimal value found for "
+            record[1].message.args[0] == "The optimal value found for "
             "dimension 1 of parameter "
             "length_scale is close to the "
             "specified lower bound 10.0. "
diff --git a/sklearn/impute/__init__.py b/sklearn/impute/__init__.py
index e305bc2a657dc..380bcecaf65b5 100644
--- a/sklearn/impute/__init__.py
+++ b/sklearn/impute/__init__.py
@@ -1,4 +1,5 @@
 """Transformers for missing value imputation"""
+
 import typing
 
 from ._base import MissingIndicator, SimpleImputer
diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py
index af298ae8c380e..04a4dffd10e68 100644
--- a/sklearn/impute/_base.py
+++ b/sklearn/impute/_base.py
@@ -701,9 +701,8 @@ def inverse_transform(self, X):
 
     def _more_tags(self):
         return {
-            "allow_nan": is_pandas_na(self.missing_values) or is_scalar_nan(
-                self.missing_values
-            )
+            "allow_nan": is_pandas_na(self.missing_values)
+            or is_scalar_nan(self.missing_values)
         }
 
     def get_feature_names_out(self, input_features=None):
diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py
index f8e08785e8358..f254967f96166 100644
--- a/sklearn/inspection/__init__.py
+++ b/sklearn/inspection/__init__.py
@@ -1,6 +1,5 @@
 """The :mod:`sklearn.inspection` module includes tools for model inspection."""
 
-
 from ._partial_dependence import partial_dependence
 from ._permutation_importance import permutation_importance
 from ._plot.decision_boundary import DecisionBoundaryDisplay
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index b052609a85a2b..3cb4999eb0833 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -1,6 +1,7 @@
 """
 Testing for the partial dependence module.
 """
+
 import warnings
 
 import numpy as np
diff --git a/sklearn/inspection/tests/test_permutation_importance.py b/sklearn/inspection/tests/test_permutation_importance.py
index 2869e84c78bf8..8b3ed78cdd368 100644
--- a/sklearn/inspection/tests/test_permutation_importance.py
+++ b/sklearn/inspection/tests/test_permutation_importance.py
@@ -437,9 +437,7 @@ def test_permutation_importance_sample_weight():
     # the second half of the samples approaches to infinity, the ratio of
     # the two features importance should equal to 2 on expectation (when using
     # mean absolutes error as the loss function).
-    w = np.hstack(
-        [np.repeat(10.0**10, n_half_samples), np.repeat(1.0, n_half_samples)]
-    )
+    w = np.hstack([np.repeat(10.0**10, n_half_samples), np.repeat(1.0, n_half_samples)])
     lr.fit(x, y, w)
     pi = permutation_importance(
         lr,
diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py
index fa9b431fd2377..0b6adbe44e686 100644
--- a/sklearn/linear_model/_glm/_newton_solver.py
+++ b/sklearn/linear_model/_glm/_newton_solver.py
@@ -502,8 +502,7 @@ def inner_solve(self, X, y, sample_weight):
                 "Further options are to use another solver or to avoid such situation "
                 "in the first place. Possible remedies are removing collinear features"
                 " of X or increasing the penalization strengths.\n"
-                "The original Linear Algebra message was:\n"
-                + str(e),
+                "The original Linear Algebra message was:\n" + str(e),
                 scipy.linalg.LinAlgWarning,
             )
             # Possible causes:
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 5256a5f370272..26f6bdc08d254 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -1107,6 +1107,5 @@ def test_newton_solver_verbosity(capsys, verbose):
     if verbose >= 1:
         assert (
             "The inner solver detected a pointwise Hessian with many negative values"
-            " and resorts to lbfgs instead."
-            in captured.out
+            " and resorts to lbfgs instead." in captured.out
         )
diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py
index efea6c6b4c5f9..4e038ecb28da9 100644
--- a/sklearn/linear_model/_least_angle.py
+++ b/sklearn/linear_model/_least_angle.py
@@ -2,6 +2,7 @@
 Least Angle Regression algorithm. See the documentation on the
 Generalized Linear Model for a complete discussion.
 """
+
 # Author: Fabian Pedregosa <fabian.pedregosa@inria.fr>
 #         Alexandre Gramfort <alexandre.gramfort@inria.fr>
 #         Gael Varoquaux
@@ -1737,8 +1738,7 @@ def fit(self, X, y, **params):
         if hasattr(Gram, "__array__"):
             warnings.warn(
                 'Parameter "precompute" cannot be an array in '
-                '%s. Automatically switch to "auto" instead.'
-                % self.__class__.__name__
+                '%s. Automatically switch to "auto" instead.' % self.__class__.__name__
             )
             Gram = "auto"
 
diff --git a/sklearn/linear_model/_linear_loss.py b/sklearn/linear_model/_linear_loss.py
index 4255706e284f1..e8c1466b30623 100644
--- a/sklearn/linear_model/_linear_loss.py
+++ b/sklearn/linear_model/_linear_loss.py
@@ -1,6 +1,7 @@
 """
 Loss functions for linear models with raw_prediction = X @ coef
 """
+
 import numpy as np
 from scipy import sparse
 
diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py
index 259ce54d3f11e..a8ecc29715886 100644
--- a/sklearn/linear_model/_logistic.py
+++ b/sklearn/linear_model/_logistic.py
@@ -1246,8 +1246,7 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError(
                 "This solver needs samples of at least 2 classes"
                 " in the data, but the data contains only one"
-                " class: %r"
-                % classes_[0]
+                " class: %r" % classes_[0]
             )
 
         if len(self.classes_) == 2:
@@ -1787,8 +1786,7 @@ def fit(self, X, y, sample_weight=None, **params):
             ):
                 raise ValueError(
                     "l1_ratios must be a list of numbers between "
-                    "0 and 1; got (l1_ratios=%r)"
-                    % self.l1_ratios
+                    "0 and 1; got (l1_ratios=%r)" % self.l1_ratios
                 )
             l1_ratios_ = self.l1_ratios
         else:
@@ -1856,8 +1854,7 @@ def fit(self, X, y, sample_weight=None, **params):
             raise ValueError(
                 "This solver needs samples of at least 2 classes"
                 " in the data, but the data contains only one"
-                " class: %r"
-                % classes[0]
+                " class: %r" % classes[0]
             )
 
         if n_classes == 2:
diff --git a/sklearn/linear_model/_omp.py b/sklearn/linear_model/_omp.py
index efac0508963ba..2d6fe48869742 100644
--- a/sklearn/linear_model/_omp.py
+++ b/sklearn/linear_model/_omp.py
@@ -1,5 +1,4 @@
-"""Orthogonal matching pursuit algorithms
-"""
+"""Orthogonal matching pursuit algorithms"""
 
 # Author: Vlad Niculae
 #
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 67187bbdb5934..e0fad5d8be8b8 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -1358,8 +1358,7 @@ def predict_proba(self, X):
             raise NotImplementedError(
                 "predict_(log_)proba only supported when"
                 " loss='log_loss' or loss='modified_huber' "
-                "(%r given)"
-                % self.loss
+                "(%r given)" % self.loss
             )
 
     @available_if(_check_proba)
diff --git a/sklearn/linear_model/tests/test_linear_loss.py b/sklearn/linear_model/tests/test_linear_loss.py
index 659ff134198db..230966db1ceaf 100644
--- a/sklearn/linear_model/tests/test_linear_loss.py
+++ b/sklearn/linear_model/tests/test_linear_loss.py
@@ -4,6 +4,7 @@
 Note that correctness of losses (which compose LinearModelLoss) is already well
 covered in the _loss module.
 """
+
 import numpy as np
 import pytest
 from numpy.testing import assert_allclose
diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py
index f1707fad1c950..2e2e262183a17 100644
--- a/sklearn/manifold/_spectral_embedding.py
+++ b/sklearn/manifold/_spectral_embedding.py
@@ -650,7 +650,8 @@ def __init__(
 
     def _more_tags(self):
         return {
-            "pairwise": self.affinity in [
+            "pairwise": self.affinity
+            in [
                 "precomputed",
                 "precomputed_nearest_neighbors",
             ]
diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
index 713c5fe651dbb..8a818c885043c 100644
--- a/sklearn/metrics/__init__.py
+++ b/sklearn/metrics/__init__.py
@@ -3,7 +3,6 @@
 and pairwise metrics and distance computations.
 """
 
-
 from . import cluster
 from ._classification import (
     accuracy_score,
diff --git a/sklearn/metrics/_base.py b/sklearn/metrics/_base.py
index 53ff14b039e0c..c344008755004 100644
--- a/sklearn/metrics/_base.py
+++ b/sklearn/metrics/_base.py
@@ -2,6 +2,7 @@
 Common code for all metrics.
 
 """
+
 # Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
 #          Mathieu Blondel <mathieu@mblondel.org>
 #          Olivier Grisel <olivier.grisel@ensta.org>
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 999d3795b8dd9..c5290fd39eb7e 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -583,8 +583,7 @@ def multilabel_confusion_matrix(
                 raise ValueError(
                     "All labels must be in [0, n labels) for "
                     "multilabel targets. "
-                    "Got %d < 0"
-                    % np.min(labels)
+                    "Got %d < 0" % np.min(labels)
                 )
 
         if n_labels is not None:
diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py
index a332997a84414..44da911061bc8 100644
--- a/sklearn/metrics/cluster/__init__.py
+++ b/sklearn/metrics/cluster/__init__.py
@@ -5,6 +5,7 @@
 - supervised, which uses a ground truth class values for each sample.
 - unsupervised, which does not and measures the 'quality' of the model itself.
 """
+
 from ._bicluster import consensus_score
 from ._supervised import (
     adjusted_mutual_info_score,
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index ec26ef7dcd399..bbebe2cba2197 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2217,8 +2217,7 @@ def test_recall_warnings(zero_division):
         )
         if zero_division == "warn":
             assert (
-                str(record.pop().message)
-                == "Recall is ill-defined and "
+                str(record.pop().message) == "Recall is ill-defined and "
                 "being set to 0.0 due to no true samples."
                 " Use `zero_division` parameter to control"
                 " this behavior."
@@ -2229,8 +2228,7 @@ def test_recall_warnings(zero_division):
         recall_score([0, 0], [0, 0])
         if zero_division == "warn":
             assert (
-                str(record.pop().message)
-                == "Recall is ill-defined and "
+                str(record.pop().message) == "Recall is ill-defined and "
                 "being set to 0.0 due to no true samples."
                 " Use `zero_division` parameter to control"
                 " this behavior."
@@ -2249,8 +2247,7 @@ def test_precision_warnings(zero_division):
         )
         if zero_division == "warn":
             assert (
-                str(record.pop().message)
-                == "Precision is ill-defined and "
+                str(record.pop().message) == "Precision is ill-defined and "
                 "being set to 0.0 due to no predicted samples."
                 " Use `zero_division` parameter to control"
                 " this behavior."
@@ -2261,8 +2258,7 @@ def test_precision_warnings(zero_division):
         precision_score([0, 0], [0, 0])
         if zero_division == "warn":
             assert (
-                str(record.pop().message)
-                == "Precision is ill-defined and "
+                str(record.pop().message) == "Precision is ill-defined and "
                 "being set to 0.0 due to no predicted samples."
                 " Use `zero_division` parameter to control"
                 " this behavior."
@@ -2307,8 +2303,7 @@ def test_fscore_warnings(zero_division):
             )
             if zero_division == "warn":
                 assert (
-                    str(record.pop().message)
-                    == "F-score is ill-defined and "
+                    str(record.pop().message) == "F-score is ill-defined and "
                     "being set to 0.0 due to no true nor predicted "
                     "samples. Use `zero_division` parameter to "
                     "control this behavior."
diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py
index e361ce8f61a1c..fda1a83702bbf 100644
--- a/sklearn/mixture/_bayesian_mixture.py
+++ b/sklearn/mixture/_bayesian_mixture.py
@@ -1,4 +1,5 @@
 """Bayesian Gaussian Mixture Model."""
+
 # Author: Wei Xue <xuewei4d@gmail.com>
 #         Thierry Guillemot <thierry.guillemot.work@gmail.com>
 # License: BSD 3 clause
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 6b546c6bc9441..9b9072f1491a2 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -484,8 +484,7 @@ def score(self, X, y=None, **params):
         if self.scorer_ is None:
             raise ValueError(
                 "No score function explicitly defined, "
-                "and the estimator doesn't provide one %s"
-                % self.best_estimator_
+                "and the estimator doesn't provide one %s" % self.best_estimator_
             )
         if isinstance(self.scorer_, dict):
             if self.multimetric_:
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 2afb9ae6adce7..fa425a5e6a18b 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -1,4 +1,5 @@
 """Test the split module"""
+
 import re
 import warnings
 from itertools import combinations, combinations_with_replacement, permutations
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 22306d88e021f..43916d8cecb2e 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -1,4 +1,5 @@
 """Test the validation module"""
+
 import os
 import re
 import sys
diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py
index e1e8bdbb09d7c..776d462928fbb 100644
--- a/sklearn/neighbors/_base.py
+++ b/sklearn/neighbors/_base.py
@@ -1,4 +1,5 @@
 """Base and mixin classes for nearest neighbors."""
+
 # Authors: Jake Vanderplas <vanderplas@astro.washington.edu>
 #          Fabian Pedregosa <fabian.pedregosa@inria.fr>
 #          Alexandre Gramfort <alexandre.gramfort@inria.fr>
@@ -444,8 +445,7 @@ def _check_algorithm_metric(self):
                 raise ValueError(
                     "kd_tree does not support callable metric '%s'"
                     "Function call overhead will result"
-                    "in very poor performance."
-                    % self.metric
+                    "in very poor performance." % self.metric
                 )
         elif self.metric not in VALID_METRICS[alg_check] and not isinstance(
             self.metric, DistanceMetric
@@ -898,8 +898,7 @@ class from an array representing our data set and ask who's
             if issparse(X):
                 raise ValueError(
                     "%s does not work with sparse matrices. Densify the data, "
-                    "or set algorithm='brute'"
-                    % self._fit_method
+                    "or set algorithm='brute'" % self._fit_method
                 )
             chunked_results = Parallel(n_jobs, prefer="threads")(
                 delayed(_tree_query_parallel_helper)(
@@ -1253,8 +1252,7 @@ class from an array representing our data set and ask who's
             if issparse(X):
                 raise ValueError(
                     "%s does not work with sparse matrices. Densify the data, "
-                    "or set algorithm='brute'"
-                    % self._fit_method
+                    "or set algorithm='brute'" % self._fit_method
                 )
 
             n_jobs = effective_n_jobs(self.n_jobs)
diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py
index 8885fb4c8c5d0..a9e5fe011150a 100644
--- a/sklearn/neighbors/_kde.py
+++ b/sklearn/neighbors/_kde.py
@@ -2,6 +2,7 @@
 Kernel Density Estimation
 -------------------------
 """
+
 # Author: Jake Vanderplas <jakevdp@cs.washington.edu>
 import itertools
 from numbers import Integral, Real
diff --git a/sklearn/neighbors/_unsupervised.py b/sklearn/neighbors/_unsupervised.py
index a4ff66786340a..4185bbe15826b 100644
--- a/sklearn/neighbors/_unsupervised.py
+++ b/sklearn/neighbors/_unsupervised.py
@@ -1,4 +1,5 @@
 """Unsupervised nearest neighbors learner"""
+
 from ..base import _fit_context
 from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin
 
diff --git a/sklearn/neighbors/tests/test_nearest_centroid.py b/sklearn/neighbors/tests/test_nearest_centroid.py
index ee548d8017810..09c2501818fd3 100644
--- a/sklearn/neighbors/tests/test_nearest_centroid.py
+++ b/sklearn/neighbors/tests/test_nearest_centroid.py
@@ -1,6 +1,7 @@
 """
 Testing for the nearest centroid module.
 """
+
 import numpy as np
 import pytest
 from numpy.testing import assert_array_equal
diff --git a/sklearn/neural_network/_base.py b/sklearn/neural_network/_base.py
index 73d62f9543e98..60ef660ef917d 100644
--- a/sklearn/neural_network/_base.py
+++ b/sklearn/neural_network/_base.py
@@ -1,5 +1,4 @@
-"""Utilities for the neural network modules
-"""
+"""Utilities for the neural network modules"""
 
 # Author: Issam H. Laradji <issam.laradji@gmail.com>
 # License: BSD 3 clause
diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py
index cc419b57f2410..f56f68ac852c2 100644
--- a/sklearn/neural_network/_multilayer_perceptron.py
+++ b/sklearn/neural_network/_multilayer_perceptron.py
@@ -1,5 +1,4 @@
-"""Multi-layer Perceptron
-"""
+"""Multi-layer Perceptron"""
 
 # Authors: Issam H. Laradji <issam.laradji@gmail.com>
 #          Andreas Mueller
@@ -755,8 +754,7 @@ def _check_solver(self):
         if self.solver not in _STOCHASTIC_SOLVERS:
             raise AttributeError(
                 "partial_fit is only available for stochastic"
-                " optimizers. %s is not stochastic."
-                % self.solver
+                " optimizers. %s is not stochastic." % self.solver
             )
         return True
 
diff --git a/sklearn/neural_network/_rbm.py b/sklearn/neural_network/_rbm.py
index e3814f45d3633..4b7f0f9422625 100644
--- a/sklearn/neural_network/_rbm.py
+++ b/sklearn/neural_network/_rbm.py
@@ -1,5 +1,4 @@
-"""Restricted Boltzmann Machine
-"""
+"""Restricted Boltzmann Machine"""
 
 # Authors: Yann N. Dauphin <dauphiya@iro.umontreal.ca>
 #          Vlad Niculae
diff --git a/sklearn/neural_network/_stochastic_optimizers.py b/sklearn/neural_network/_stochastic_optimizers.py
index d9fbaec0098d0..ab87300aff110 100644
--- a/sklearn/neural_network/_stochastic_optimizers.py
+++ b/sklearn/neural_network/_stochastic_optimizers.py
@@ -1,5 +1,4 @@
-"""Stochastic optimization methods for MLP
-"""
+"""Stochastic optimization methods for MLP"""
 
 # Authors: Jiyuan Qian <jq401@nyu.edu>
 # License: BSD 3 clause
diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
index 6b94e2703f7e1..64ad4c5edc019 100644
--- a/sklearn/neural_network/tests/test_mlp.py
+++ b/sklearn/neural_network/tests/test_mlp.py
@@ -732,8 +732,7 @@ def test_warm_start():
         message = (
             "warm_start can only be used where `y` has the same "
             "classes as in the previous call to fit."
-            " Previously got [0 1 2], `y` has %s"
-            % np.unique(y_i)
+            " Previously got [0 1 2], `y` has %s" % np.unique(y_i)
         )
         with pytest.raises(ValueError, match=re.escape(message)):
             clf.fit(X, y_i)
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 4ee0622c699b7..b26b83e66510f 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -2,6 +2,7 @@
 The :mod:`sklearn.pipeline` module implements utilities to build a composite
 estimator, as a chain of transforms and estimators.
 """
+
 # Author: Edouard Duchesnay
 #         Gael Varoquaux
 #         Virgile Fritsch
diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index 2512f411a5a9c..f4c9fb032cfb0 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -1,6 +1,7 @@
 """
 This file contains preprocessing tools based on polynomials.
 """
+
 import collections
 from itertools import chain, combinations
 from itertools import combinations_with_replacement as combinations_w_r
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index c8c0193ac9b0b..886a805960d52 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -22,6 +22,7 @@
   and can even be taken to be an orthogonal projection.
 
 """
+
 # Authors: Olivier Grisel <olivier.grisel@ensta.org>,
 #          Arnaud Joly <a.joly@ulg.ac.be>
 # License: BSD 3 clause
diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py
index 8812c3c352a03..4b046aa111250 100644
--- a/sklearn/semi_supervised/tests/test_label_propagation.py
+++ b/sklearn/semi_supervised/tests/test_label_propagation.py
@@ -1,4 +1,4 @@
-""" test the label propagation module """
+"""test the label propagation module"""
 
 import warnings
 
diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py
index 6d154c99dc669..47d4027c50754 100644
--- a/sklearn/svm/_base.py
+++ b/sklearn/svm/_base.py
@@ -297,8 +297,7 @@ def _warn_from_fit_status(self):
             warnings.warn(
                 "Solver terminated early (max_iter=%i)."
                 "  Consider pre-processing your data with"
-                " StandardScaler or MinMaxScaler."
-                % self.max_iter,
+                " StandardScaler or MinMaxScaler." % self.max_iter,
                 ConvergenceWarning,
             )
 
@@ -1174,8 +1173,7 @@ def _fit_liblinear(
             raise ValueError(
                 "This solver needs samples of at least 2 classes"
                 " in the data, but the data contains only one"
-                " class: %r"
-                % classes_[0]
+                " class: %r" % classes_[0]
             )
 
         class_weight_ = compute_class_weight(class_weight, classes=classes_, y=y)
diff --git a/sklearn/svm/_bounds.py b/sklearn/svm/_bounds.py
index d14297230af4c..b02720637c03b 100644
--- a/sklearn/svm/_bounds.py
+++ b/sklearn/svm/_bounds.py
@@ -1,4 +1,5 @@
 """Determination of parameter bounds"""
+
 # Author: Paolo Losi
 # License: BSD 3 clause
 
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index e1c6e36af28fb..f728136b0f98c 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -3,6 +3,7 @@
 
 TODO: remove hard coded numerical results when possible
 """
+
 import re
 
 import numpy as np
diff --git a/sklearn/tests/random_seed.py b/sklearn/tests/random_seed.py
index 0fffd57a1016d..ecda17e36d2bf 100644
--- a/sklearn/tests/random_seed.py
+++ b/sklearn/tests/random_seed.py
@@ -8,6 +8,7 @@
 
 https://scikit-learn.org/dev/computing/parallelism.html#sklearn-tests-global-random-seed
 """
+
 from os import environ
 from random import Random
 
diff --git a/sklearn/tests/test_build.py b/sklearn/tests/test_build.py
index 72cab1dfcb174..40a960cba6283 100644
--- a/sklearn/tests/test_build.py
+++ b/sklearn/tests/test_build.py
@@ -15,7 +15,8 @@ def test_openmp_parallelism_enabled():
         pytest.skip("test explicitly skipped (SKLEARN_SKIP_OPENMP_TEST)")
 
     base_url = "dev" if __version__.endswith(".dev0") else "stable"
-    err_msg = textwrap.dedent("""
+    err_msg = textwrap.dedent(
+        """
         This test fails because scikit-learn has been built without OpenMP.
         This is not recommended since some estimators will run in sequential
         mode instead of leveraging thread-based parallelism.
@@ -27,6 +28,7 @@ def test_openmp_parallelism_enabled():
 
         You can skip this test by setting the environment variable
         SKLEARN_SKIP_OPENMP_TEST to any value.
-        """).format(base_url)
+        """
+    ).format(base_url)
 
     assert _openmp_parallelism_enabled(), err_msg
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index fccc58f9fa2a5..ea84eec258d83 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -255,11 +255,13 @@ def test_all_tests_are_importable():
     # Ensure that for each contentful subpackage, there is a test directory
     # within it that is also a subpackage (i.e. a directory with __init__.py)
 
-    HAS_TESTS_EXCEPTIONS = re.compile(r"""(?x)
+    HAS_TESTS_EXCEPTIONS = re.compile(
+        r"""(?x)
                                       \.externals(\.|$)|
                                       \.tests(\.|$)|
                                       \._
-                                      """)
+                                      """
+    )
     resource_modules = {
         "sklearn.datasets.data",
         "sklearn.datasets.descr",
diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index b3c6820faefc2..e06d2f59a6c10 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -1,4 +1,5 @@
 """Common tests for metaestimators"""
+
 import functools
 from inspect import signature
 
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index f5ed64a094063..150dcc287e651 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1,6 +1,7 @@
 """
 Test the pipeline module.
 """
+
 import itertools
 import re
 import shutil
diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py
index f8c612b6029c2..cd4a106ee7606 100644
--- a/sklearn/tree/tests/test_export.py
+++ b/sklearn/tree/tests/test_export.py
@@ -1,6 +1,7 @@
 """
 Testing for export functions of decision trees (sklearn.tree.export).
 """
+
 from io import StringIO
 from re import finditer, search
 from textwrap import dedent
@@ -375,12 +376,14 @@ def test_export_text():
     clf = DecisionTreeClassifier(max_depth=2, random_state=0)
     clf.fit(X, y)
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- feature_1 <= 0.00
     |   |--- class: -1
     |--- feature_1 >  0.00
     |   |--- class: 1
-    """).lstrip()
+    """
+    ).lstrip()
 
     assert export_text(clf) == expected_report
     # testing that leaves at level 1 are not truncated
@@ -388,32 +391,38 @@ def test_export_text():
     # testing that the rest of the tree is truncated
     assert export_text(clf, max_depth=10) == expected_report
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- feature_1 <= 0.00
     |   |--- weights: [3.00, 0.00] class: -1
     |--- feature_1 >  0.00
     |   |--- weights: [0.00, 3.00] class: 1
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(clf, show_weights=True) == expected_report
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |- feature_1 <= 0.00
     | |- class: -1
     |- feature_1 >  0.00
     | |- class: 1
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(clf, spacing=1) == expected_report
 
     X_l = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-1, 1]]
     y_l = [-1, -1, -1, 1, 1, 1, 2]
     clf = DecisionTreeClassifier(max_depth=4, random_state=0)
     clf.fit(X_l, y_l)
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- feature_1 <= 0.00
     |   |--- class: -1
     |--- feature_1 >  0.00
     |   |--- truncated branch of depth 2
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(clf, max_depth=0) == expected_report
 
     X_mo = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
@@ -422,12 +431,14 @@ def test_export_text():
     reg = DecisionTreeRegressor(max_depth=2, random_state=0)
     reg.fit(X_mo, y_mo)
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- feature_1 <= 0.0
     |   |--- value: [-1.0, -1.0]
     |--- feature_1 >  0.0
     |   |--- value: [1.0, 1.0]
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(reg, decimals=1) == expected_report
     assert export_text(reg, decimals=1, show_weights=True) == expected_report
 
@@ -435,12 +446,14 @@ def test_export_text():
     reg = DecisionTreeRegressor(max_depth=2, random_state=0)
     reg.fit(X_single, y_mo)
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- first <= 0.0
     |   |--- value: [-1.0, -1.0]
     |--- first >  0.0
     |   |--- value: [1.0, 1.0]
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(reg, decimals=1, feature_names=["first"]) == expected_report
     assert (
         export_text(reg, decimals=1, show_weights=True, feature_names=["first"])
@@ -455,20 +468,24 @@ def test_export_text_feature_class_names_array_support(constructor):
     clf = DecisionTreeClassifier(max_depth=2, random_state=0)
     clf.fit(X, y)
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- b <= 0.00
     |   |--- class: -1
     |--- b >  0.00
     |   |--- class: 1
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(clf, feature_names=constructor(["a", "b"])) == expected_report
 
-    expected_report = dedent("""
+    expected_report = dedent(
+        """
     |--- feature_1 <= 0.00
     |   |--- class: cat
     |--- feature_1 >  0.00
     |   |--- class: dog
-    """).lstrip()
+    """
+    ).lstrip()
     assert export_text(clf, class_names=constructor(["cat", "dog"])) == expected_report
 
 
diff --git a/sklearn/utils/_response.py b/sklearn/utils/_response.py
index e647ba3a4f009..0207cc1205120 100644
--- a/sklearn/utils/_response.py
+++ b/sklearn/utils/_response.py
@@ -2,6 +2,7 @@
 
 It allows to make uniform checks and validation.
 """
+
 import numpy as np
 
 from ..base import is_classifier
diff --git a/sklearn/utils/_show_versions.py b/sklearn/utils/_show_versions.py
index 89052e88b65fe..1431108477263 100644
--- a/sklearn/utils/_show_versions.py
+++ b/sklearn/utils/_show_versions.py
@@ -3,6 +3,7 @@
 
 adapted from :func:`pandas.show_versions`
 """
+
 # License: BSD 3 clause
 
 import platform
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index d2559cb66b2ad..b466a7765b819 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1461,8 +1461,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
         " the fit method."
         " Estimators are only allowed to add private attributes"
         " either started with _ or ended"
-        " with _ but %s added"
-        % ", ".join(attrs_added_by_fit)
+        " with _ but %s added" % ", ".join(attrs_added_by_fit)
     )
 
     # check that fit doesn't change any public attribute
@@ -1477,8 +1476,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
         " the fit method. Estimators are only allowed"
         " to change attributes started"
         " or ended with _, but"
-        " %s changed"
-        % ", ".join(attrs_changed_by_fit)
+        " %s changed" % ", ".join(attrs_changed_by_fit)
     )
 
 
@@ -2927,8 +2925,7 @@ def check_supervised_y_2d(name, estimator_orig):
         assert len(w) > 0, msg
         assert (
             "DataConversionWarning('A column-vector y"
-            " was passed when a 1d array was expected"
-            in msg
+            " was passed when a 1d array was expected" in msg
         )
     assert_allclose(y_pred.ravel(), y_pred_2d.ravel())
 
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index be93464353832..2fe7dbc3cc179 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -2,6 +2,7 @@
 The :mod:`sklearn.utils.extmath` module includes utilities to perform
 optimal mathematical operations in scikit-learn that are not available in SciPy.
 """
+
 # Authors: Gael Varoquaux
 #          Alexandre Gramfort
 #          Alexandre T. Passos
diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
index 8eca047b1a844..33be9f4ab3473 100644
--- a/sklearn/utils/fixes.py
+++ b/sklearn/utils/fixes.py
@@ -3,6 +3,7 @@
 If you add content to this file, please give the version of the package
 at which the fix is no longer needed.
 """
+
 # Authors: Emmanuelle Gouillart <emmanuelle.gouillart@normalesup.org>
 #          Gael Varoquaux <gael.varoquaux@normalesup.org>
 #          Fabian Pedregosa <fpedregosa@acm.org>
diff --git a/sklearn/utils/optimize.py b/sklearn/utils/optimize.py
index 024b0bcaf95ee..d79f514aae778 100644
--- a/sklearn/utils/optimize.py
+++ b/sklearn/utils/optimize.py
@@ -8,6 +8,7 @@
 regression with large design matrix), this approach gives very
 significant speedups.
 """
+
 # This is a modified file from scipy.optimize
 # Original authors: Travis Oliphant, Eric Jones
 # Modifications by Gael Varoquaux, Mathieu Blondel and Tom Dupre la Tour
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index c167a7e9d8f59..5ec962433d7c0 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -703,9 +703,7 @@ def test_incremental_weighted_mean_and_variance_simple(rng, dtype):
     mean, var, _ = _incremental_mean_and_var(X, 0, 0, 0, sample_weight=sample_weight)
 
     expected_mean = np.average(X, weights=sample_weight, axis=0)
-    expected_var = (
-        np.average(X**2, weights=sample_weight, axis=0) - expected_mean**2
-    )
+    expected_var = np.average(X**2, weights=sample_weight, axis=0) - expected_mean**2
     assert_almost_equal(mean, expected_mean)
     assert_almost_equal(var, expected_var)
 
diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py
index 8fada45db3f52..c44250c36daac 100644
--- a/sklearn/utils/tests/test_fast_dict.py
+++ b/sklearn/utils/tests/test_fast_dict.py
@@ -1,5 +1,5 @@
-""" Test fast_dict.
-"""
+"""Test fast_dict."""
+
 import numpy as np
 from numpy.testing import assert_allclose, assert_array_equal
 

From 87c90fd861c97872ab1f247c82ca47efada282e4 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Thu, 23 May 2024 19:24:31 -0400
Subject: [PATCH 21/29] initial pass at refactoring DepthFirstTreeBuilder.build

---
 sklearn/tree/_tree.pxd |  75 +++++++
 sklearn/tree/_tree.pyx | 442 +++++++++++++++++++++--------------------
 2 files changed, 301 insertions(+), 216 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index 2267b4306e261..635d3c5fece07 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -43,6 +43,81 @@ cdef struct ParentInfo:
     float64_t impurity              # the impurity of the parent
     intp_t n_constant_features      # the number of constant features found in parent
 
+ctypedef intp_t (*AddOrUpdateNodeFunc)(
+    Tree tree,
+    intp_t parent,
+    bint is_left,
+    bint is_leaf,
+    SplitRecord* split_node,
+    float64_t impurity,
+    intp_t n_node_samples,
+    float64_t weighted_n_node_samples,
+    unsigned char missing_go_to_left
+) except -1 nogil
+
+# A record on the stack for depth-first tree growing
+cdef struct StackRecord:
+    intp_t start
+    intp_t end
+    intp_t depth
+    intp_t parent
+    bint is_left
+    float64_t impurity
+    intp_t n_constant_features
+    float64_t lower_bound
+    float64_t upper_bound
+
+cdef extern from "<stack>" namespace "std" nogil:
+    cdef cppclass stack[T]:
+        ctypedef T value_type
+        stack() except +
+        bint empty()
+        void pop()
+        void push(T&) except +  # Raise c++ exception for bad_alloc -> MemoryError
+        T& top()
+
+cdef struct BuildEnv:
+    # Parameters
+    intp_t max_depth
+    intp_t min_samples_leaf
+    float64_t min_weight_leaf
+    intp_t min_samples_split
+    float64_t min_impurity_decrease
+
+    unsigned char store_leaf_values
+
+    # Initial capacity
+    intp_t init_capacity
+    bint first
+
+    intp_t start
+    intp_t end
+    intp_t depth
+    intp_t parent
+    bint is_left
+    intp_t n_node_samples
+    float64_t weighted_n_node_samples
+    intp_t node_id
+    float64_t right_child_min, left_child_min, right_child_max, left_child_max
+
+    SplitRecord* split_ptr
+
+    float64_t middle_value
+    bint is_leaf
+    intp_t max_depth_seen
+
+    intp_t rc
+
+    stack[StackRecord] builder_stack
+    stack[StackRecord] update_stack
+    stack[StackRecord]* target_stack
+    StackRecord stack_record
+
+    ParentInfo parent_record
+    
+    AddOrUpdateNodeFunc add_or_update_node
+
+
 cdef class BaseTree:
 
     # Inner structures: values are stored separately from node structure,
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 418eae57e4995..4efb0db5f09c6 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -48,14 +48,6 @@ cdef extern from "numpy/arrayobject.h":
                                 void* data, intp_t flags, object obj)
     intp_t PyArray_SetBaseObject(cnp.ndarray arr, PyObject* obj)
 
-cdef extern from "<stack>" namespace "std" nogil:
-    cdef cppclass stack[T]:
-        ctypedef T value_type
-        stack() except +
-        bint empty()
-        void pop()
-        void push(T&) except +  # Raise c++ exception for bad_alloc -> MemoryError
-        T& top()
 
 # =============================================================================
 # Types and constants
@@ -161,19 +153,44 @@ cdef class TreeBuilder:
 
 
 # Depth first builder ---------------------------------------------------------
-# A record on the stack for depth-first tree growing
-cdef struct StackRecord:
-    intp_t start
-    intp_t end
-    intp_t depth
-    intp_t parent
-    bint is_left
-    float64_t impurity
-    intp_t n_constant_features
-    float64_t lower_bound
-    float64_t upper_bound
 
 
+cdef intp_t tree_add_node(
+    Tree tree,
+    intp_t parent,
+    bint is_left,
+    bint is_leaf,
+    SplitRecord* split_node,
+    float64_t impurity,
+    intp_t n_node_samples,
+    float64_t weighted_n_node_samples,
+    unsigned char missing_go_to_left
+) except -1 nogil:
+    return tree._add_node(
+        parent, is_left, is_leaf,
+        split_node, impurity,
+        n_node_samples, weighted_n_node_samples,
+        missing_go_to_left
+    )
+
+cdef intp_t tree_update_node(
+    Tree tree,
+    intp_t parent,
+    bint is_left,
+    bint is_leaf,
+    SplitRecord* split_node,
+    float64_t impurity,
+    intp_t n_node_samples,
+    float64_t weighted_n_node_samples,
+    unsigned char missing_go_to_left
+) except -1 nogil:
+    return tree._update_node(
+        parent, is_left, is_leaf,
+        split_node, impurity,
+        n_node_samples, weighted_n_node_samples,
+        missing_go_to_left
+    )
+
 cdef class DepthFirstTreeBuilder(TreeBuilder):
     """Build a decision tree in depth-first fashion."""
 
@@ -285,31 +302,32 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         # check input
         X, y, sample_weight = self._check_input(X, y, sample_weight)
 
-        # Parameters
         cdef Splitter splitter = self.splitter
-        cdef intp_t max_depth = self.max_depth
-        cdef intp_t min_samples_leaf = self.min_samples_leaf
-        cdef float64_t min_weight_leaf = self.min_weight_leaf
-        cdef intp_t min_samples_split = self.min_samples_split
-        cdef float64_t min_impurity_decrease = self.min_impurity_decrease
-
-        cdef unsigned char store_leaf_values = self.store_leaf_values
+        cdef SplitRecord split
         cdef cnp.ndarray initial_roots = self.initial_roots
 
+        cdef BuildEnv e
+        e.max_depth = self.max_depth
+        e.min_samples_leaf = self.min_samples_leaf
+        e.min_weight_leaf = self.min_weight_leaf
+        e.min_samples_split = self.min_samples_split
+        e.min_impurity_decrease = self.min_impurity_decrease
+
+        e.store_leaf_values = self.store_leaf_values
+
         # Initial capacity
-        cdef intp_t init_capacity
-        cdef bint first = 0
+        e.first = 0
         if initial_roots is None:
             # Recursive partition (without actual recursion)
             splitter.init(X, y, sample_weight, missing_values_in_feature_mask)
 
             if tree.max_depth <= 10:
-                init_capacity = <intp_t> (2 ** (tree.max_depth + 1)) - 1
+                e.init_capacity = <intp_t> (2 ** (tree.max_depth + 1)) - 1
             else:
-                init_capacity = 2047
+                e.init_capacity = 2047
 
-            tree._resize(init_capacity)
-            first = 1
+            tree._resize(e.init_capacity)
+            e.first = 1
         else:
             # convert numpy array back to dict
             false_roots = {}
@@ -319,39 +337,24 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
             # reset the root array
             self.initial_roots = None
 
-        cdef intp_t start = 0
-        cdef intp_t end = 0
-        cdef intp_t depth
-        cdef intp_t parent
-        cdef bint is_left
-        cdef intp_t n_node_samples = splitter.n_samples
-        cdef float64_t weighted_n_node_samples
-        cdef intp_t node_id
-        cdef float64_t right_child_min, left_child_min, right_child_max, left_child_max
-
-        cdef SplitRecord split
-        cdef SplitRecord* split_ptr = <SplitRecord *>malloc(splitter.pointer_size())
+        e.start = 0
+        e.end = 0
+        e.n_node_samples = splitter.n_samples
+        e.split_ptr = <SplitRecord *>malloc(splitter.pointer_size())
 
-        cdef float64_t middle_value
-        cdef bint is_leaf
-        cdef intp_t max_depth_seen = -1 if first else tree.max_depth
+        e.max_depth_seen = -1 if e.first else tree.max_depth
 
-        cdef intp_t rc = 0
+        e.rc = 0
 
-        cdef stack[StackRecord] builder_stack
-        cdef stack[StackRecord] update_stack
-        cdef StackRecord stack_record
+        _init_parent_record(&e.parent_record)
 
-        cdef ParentInfo parent_record
-        _init_parent_record(&parent_record)
-
-        if not first:
+        if not e.first:
             # push reached leaf nodes onto stack
             for key, value in reversed(sorted(false_roots.items())):
-                end += value[0]
-                update_stack.push({
-                    "start": start,
-                    "end": end,
+                e.end += value[0]
+                e.update_stack.push({
+                    "start": e.start,
+                    "end": e.end,
                     "depth": value[1],
                     "parent": key[0],
                     "is_left": key[1],
@@ -360,12 +363,12 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                     "lower_bound": -INFINITY,
                     "upper_bound": INFINITY,
                 })
-                start += value[0]
+                e.start += value[0]
         else:
             # push root node onto stack
-            builder_stack.push({
+            e.builder_stack.push({
                 "start": 0,
-                "end": n_node_samples,
+                "end": e.n_node_samples,
                 "depth": 0,
                 "parent": _TREE_UNDEFINED,
                 "is_left": 0,
@@ -376,72 +379,75 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
             })
 
         with nogil:
-            while not update_stack.empty():
-                stack_record = update_stack.top()
-                update_stack.pop()
-
-                start = stack_record.start
-                end = stack_record.end
-                depth = stack_record.depth
-                parent = stack_record.parent
-                is_left = stack_record.is_left
-                parent_record.impurity = stack_record.impurity
-                parent_record.n_constant_features = stack_record.n_constant_features
-                parent_record.lower_bound = stack_record.lower_bound
-                parent_record.upper_bound = stack_record.upper_bound
-
-                n_node_samples = end - start
-                splitter.node_reset(start, end, &weighted_n_node_samples)
-
-                is_leaf = (depth >= max_depth or
-                           n_node_samples < min_samples_split or
-                           n_node_samples < 2 * min_samples_leaf or
-                           weighted_n_node_samples < 2 * min_weight_leaf)
-
-                if first:
-                    parent_record.impurity = splitter.node_impurity()
-                    first = 0
+            e.target_stack = &e.update_stack
+            e.add_or_update_node = tree_update_node
+            while not e.target_stack.empty():
+                e.stack_record = e.target_stack.top()
+                e.target_stack.pop()
+
+                e.start = e.stack_record.start
+                e.end = e.stack_record.end
+                e.depth = e.stack_record.depth
+                e.parent = e.stack_record.parent
+                e.is_left = e.stack_record.is_left
+                e.parent_record.impurity = e.stack_record.impurity
+                e.parent_record.n_constant_features = e.stack_record.n_constant_features
+                e.parent_record.lower_bound = e.stack_record.lower_bound
+                e.parent_record.upper_bound = e.stack_record.upper_bound
+
+                e.n_node_samples = e.end - e.start
+                splitter.node_reset(e.start, e.end, &e.weighted_n_node_samples)
+
+                e.is_leaf = (e.depth >= e.max_depth or
+                           e.n_node_samples < e.min_samples_split or
+                           e.n_node_samples < 2 * e.min_samples_leaf or
+                           e.weighted_n_node_samples < 2 * e.min_weight_leaf)
+
+                if e.first:
+                    e.parent_record.impurity = splitter.node_impurity()
+                    e.first = 0
 
                 # impurity == 0 with tolerance due to rounding errors
-                is_leaf = is_leaf or parent_record.impurity <= EPSILON
+                e.is_leaf = e.is_leaf or e.parent_record.impurity <= EPSILON
 
-                if not is_leaf:
+                if not e.is_leaf:
                     splitter.node_split(
-                        &parent_record,
-                        split_ptr,
+                        &e.parent_record,
+                        e.split_ptr,
                     )
 
                     # assign local copy of SplitRecord to assign
                     # pos, improvement, and impurity scores
-                    split = deref(split_ptr)
+                    split = deref(e.split_ptr)
 
                     # If EPSILON=0 in the below comparison, float precision
                     # issues stop splitting, producing trees that are
                     # dissimilar to v0.18
-                    is_leaf = (is_leaf or split.pos >= end or
+                    e.is_leaf = (e.is_leaf or split.pos >= e.end or
                                (split.improvement + EPSILON <
-                                min_impurity_decrease))
+                                e.min_impurity_decrease))
 
-                node_id = tree._update_node(parent, is_left, is_leaf, split_ptr,
-                                            parent_record.impurity,
-                                            n_node_samples, weighted_n_node_samples,
-                                            split.missing_go_to_left)
+                e.node_id = e.add_or_update_node(
+                    tree, e.parent, e.is_left, e.is_leaf, e.split_ptr,
+                    e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
+                    split.missing_go_to_left
+                )
 
-                if node_id == INTPTR_MAX:
-                    rc = -1
+                if e.node_id == INTPTR_MAX:
+                    e.rc = -1
                     break
 
                 # Store value for all nodes, to facilitate tree/model
                 # inspection and interpretation
-                splitter.node_value(tree.value + node_id * tree.value_stride)
+                splitter.node_value(tree.value + e.node_id * tree.value_stride)
                 if splitter.with_monotonic_cst:
                     splitter.clip_node_value(
-                        tree.value + node_id * tree.value_stride,
-                        parent_record.lower_bound,
-                        parent_record.upper_bound
+                        tree.value + e.node_id * tree.value_stride,
+                        e.parent_record.lower_bound,
+                        e.parent_record.upper_bound
                     )
 
-                if not is_leaf:
+                if not e.is_leaf:
                     if (
                         not splitter.with_monotonic_cst or
                         splitter.monotonic_cst[split.feature] == 0
@@ -451,126 +457,130 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                         # Current bounds must always be propagated to both children.
                         # If a monotonic constraint is active, bounds are used in
                         # node value clipping.
-                        left_child_min = right_child_min = parent_record.lower_bound
-                        left_child_max = right_child_max = parent_record.upper_bound
+                        e.left_child_min = e.right_child_min = e.parent_record.lower_bound
+                        e.left_child_max = e.right_child_max = e.parent_record.upper_bound
                     elif splitter.monotonic_cst[split.feature] == 1:
                         # Split on a feature with monotonic increase constraint
-                        left_child_min = parent_record.lower_bound
-                        right_child_max = parent_record.upper_bound
+                        e.left_child_min = e.parent_record.lower_bound
+                        e.right_child_max = e.parent_record.upper_bound
 
                         # Lower bound for right child and upper bound for left child
                         # are set to the same value.
-                        middle_value = splitter.criterion.middle_value()
-                        right_child_min = middle_value
-                        left_child_max = middle_value
+                        e.middle_value = splitter.criterion.middle_value()
+                        e.right_child_min = e.middle_value
+                        e.left_child_max = e.middle_value
                     else:  # i.e. splitter.monotonic_cst[split.feature] == -1
                         # Split on a feature with monotonic decrease constraint
-                        right_child_min = parent_record.lower_bound
-                        left_child_max = parent_record.upper_bound
+                        e.right_child_min = e.parent_record.lower_bound
+                        e.left_child_max = e.parent_record.upper_bound
 
                         # Lower bound for left child and upper bound for right child
                         # are set to the same value.
-                        middle_value = splitter.criterion.middle_value()
-                        left_child_min = middle_value
-                        right_child_max = middle_value
+                        e.middle_value = splitter.criterion.middle_value()
+                        e.left_child_min = e.middle_value
+                        e.right_child_max = e.middle_value
 
                     # Push right child on stack
-                    builder_stack.push({
+                    e.builder_stack.push({
                         "start": split.pos,
-                        "end": end,
-                        "depth": depth + 1,
-                        "parent": node_id,
+                        "end": e.end,
+                        "depth": e.depth + 1,
+                        "parent": e.node_id,
                         "is_left": 0,
                         "impurity": split.impurity_right,
-                        "n_constant_features": parent_record.n_constant_features,
-                        "lower_bound": right_child_min,
-                        "upper_bound": right_child_max,
+                        "n_constant_features": e.parent_record.n_constant_features,
+                        "lower_bound": e.right_child_min,
+                        "upper_bound": e.right_child_max,
                     })
 
                     # Push left child on stack
-                    builder_stack.push({
-                        "start": start,
+                    e.builder_stack.push({
+                        "start": e.start,
                         "end": split.pos,
-                        "depth": depth + 1,
-                        "parent": node_id,
+                        "depth": e.depth + 1,
+                        "parent": e.node_id,
                         "is_left": 1,
                         "impurity": split.impurity_left,
-                        "n_constant_features": parent_record.n_constant_features,
-                        "lower_bound": left_child_min,
-                        "upper_bound": left_child_max,
+                        "n_constant_features": e.parent_record.n_constant_features,
+                        "lower_bound": e.left_child_min,
+                        "upper_bound": e.left_child_max,
                     })
-                elif store_leaf_values and is_leaf:
+                elif e.store_leaf_values and e.is_leaf:
                     # copy leaf values to leaf_values array
-                    splitter.node_samples(tree.value_samples[node_id])
-
-                if depth > max_depth_seen:
-                    max_depth_seen = depth
-
-            while not builder_stack.empty():
-                stack_record = builder_stack.top()
-                builder_stack.pop()
-
-                start = stack_record.start
-                end = stack_record.end
-                depth = stack_record.depth
-                parent = stack_record.parent
-                is_left = stack_record.is_left
-                parent_record.impurity = stack_record.impurity
-                parent_record.n_constant_features = stack_record.n_constant_features
-                parent_record.lower_bound = stack_record.lower_bound
-                parent_record.upper_bound = stack_record.upper_bound
-
-                n_node_samples = end - start
-                splitter.node_reset(start, end, &weighted_n_node_samples)
-
-                is_leaf = (depth >= max_depth or
-                           n_node_samples < min_samples_split or
-                           n_node_samples < 2 * min_samples_leaf or
-                           weighted_n_node_samples < 2 * min_weight_leaf)
-
-                if first:
-                    parent_record.impurity = splitter.node_impurity()
-                    first=0
+                    splitter.node_samples(tree.value_samples[e.node_id])
+
+                if e.depth > e.max_depth_seen:
+                    e.max_depth_seen = e.depth
+
+            e.target_stack = &e.builder_stack
+            e.add_or_update_node = tree_add_node
+            while not e.target_stack.empty():
+                e.stack_record = e.target_stack.top()
+                e.target_stack.pop()
+
+                e.start = e.stack_record.start
+                e.end = e.stack_record.end
+                e.depth = e.stack_record.depth
+                e.parent = e.stack_record.parent
+                e.is_left = e.stack_record.is_left
+                e.parent_record.impurity = e.stack_record.impurity
+                e.parent_record.n_constant_features = e.stack_record.n_constant_features
+                e.parent_record.lower_bound = e.stack_record.lower_bound
+                e.parent_record.upper_bound = e.stack_record.upper_bound
+
+                e.n_node_samples = e.end - e.start
+                splitter.node_reset(e.start, e.end, &e.weighted_n_node_samples)
+
+                e.is_leaf = (e.depth >= e.max_depth or
+                           e.n_node_samples < e.min_samples_split or
+                           e.n_node_samples < 2 * e.min_samples_leaf or
+                           e.weighted_n_node_samples < 2 * e.min_weight_leaf)
+
+                if e.first:
+                    e.parent_record.impurity = splitter.node_impurity()
+                    e.first=0
 
                 # impurity == 0 with tolerance due to rounding errors
-                is_leaf = is_leaf or parent_record.impurity <= EPSILON
+                e.is_leaf = e.is_leaf or e.parent_record.impurity <= EPSILON
 
-                if not is_leaf:
+                if not e.is_leaf:
                     splitter.node_split(
-                        &parent_record,
-                        split_ptr,
+                        &e.parent_record,
+                        e.split_ptr,
                     )
 
                     # assign local copy of SplitRecord to assign
                     # pos, improvement, and impurity scores
-                    split = deref(split_ptr)
+                    split = deref(e.split_ptr)
 
                     # If EPSILON=0 in the below comparison, float precision
                     # issues stop splitting, producing trees that are
                     # dissimilar to v0.18
-                    is_leaf = (is_leaf or split.pos >= end or
+                    e.is_leaf = (e.is_leaf or split.pos >= e.end or
                                (split.improvement + EPSILON <
-                                min_impurity_decrease))
+                                e.min_impurity_decrease))
 
-                node_id = tree._add_node(parent, is_left, is_leaf, split_ptr,
-                                         parent_record.impurity, n_node_samples,
-                                         weighted_n_node_samples, split.missing_go_to_left)
+                e.node_id = e.add_or_update_node(
+                    tree, e.parent, e.is_left, e.is_leaf, e.split_ptr,
+                    e.parent_record.impurity, e.n_node_samples,
+                    e.weighted_n_node_samples, split.missing_go_to_left
+                )
 
-                if node_id == INTPTR_MAX:
-                    rc = -1
+                if e.node_id == INTPTR_MAX:
+                    e.rc = -1
                     break
 
                 # Store value for all nodes, to facilitate tree/model
                 # inspection and interpretation
-                splitter.node_value(tree.value + node_id * tree.value_stride)
+                splitter.node_value(tree.value + e.node_id * tree.value_stride)
                 if splitter.with_monotonic_cst:
                     splitter.clip_node_value(
-                        tree.value + node_id * tree.value_stride,
-                        parent_record.lower_bound,
-                        parent_record.upper_bound
+                        tree.value + e.node_id * tree.value_stride,
+                        e.parent_record.lower_bound,
+                        e.parent_record.upper_bound
                     )
 
-                if not is_leaf:
+                if not e.is_leaf:
                     if (
                         not splitter.with_monotonic_cst or
                         splitter.monotonic_cst[split.feature] == 0
@@ -580,71 +590,71 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                         # Current bounds must always be propagated to both children.
                         # If a monotonic constraint is active, bounds are used in
                         # node value clipping.
-                        left_child_min = right_child_min = parent_record.lower_bound
-                        left_child_max = right_child_max = parent_record.upper_bound
+                        e.left_child_min = e.right_child_min = e.parent_record.lower_bound
+                        e.left_child_max = e.right_child_max = e.parent_record.upper_bound
                     elif splitter.monotonic_cst[split.feature] == 1:
                         # Split on a feature with monotonic increase constraint
-                        left_child_min = parent_record.lower_bound
-                        right_child_max = parent_record.upper_bound
+                        e.left_child_min = e.parent_record.lower_bound
+                        e.right_child_max = e.parent_record.upper_bound
 
                         # Lower bound for right child and upper bound for left child
                         # are set to the same value.
-                        middle_value = splitter.criterion.middle_value()
-                        right_child_min = middle_value
-                        left_child_max = middle_value
+                        e.middle_value = splitter.criterion.middle_value()
+                        e.right_child_min = e.middle_value
+                        e.left_child_max = e.middle_value
                     else:  # i.e. splitter.monotonic_cst[split.feature] == -1
                         # Split on a feature with monotonic decrease constraint
-                        right_child_min = parent_record.lower_bound
-                        left_child_max = parent_record.upper_bound
+                        e.right_child_min = e.parent_record.lower_bound
+                        e.left_child_max = e.parent_record.upper_bound
 
                         # Lower bound for left child and upper bound for right child
                         # are set to the same value.
-                        middle_value = splitter.criterion.middle_value()
-                        left_child_min = middle_value
-                        right_child_max = middle_value
+                        e.middle_value = splitter.criterion.middle_value()
+                        e.left_child_min = e.middle_value
+                        e.right_child_max = e.middle_value
 
                     # Push right child on stack
-                    builder_stack.push({
+                    e.builder_stack.push({
                         "start": split.pos,
-                        "end": end,
-                        "depth": depth + 1,
-                        "parent": node_id,
+                        "end": e.end,
+                        "depth": e.depth + 1,
+                        "parent": e.node_id,
                         "is_left": 0,
                         "impurity": split.impurity_right,
-                        "n_constant_features": parent_record.n_constant_features,
-                        "lower_bound": right_child_min,
-                        "upper_bound": right_child_max,
+                        "n_constant_features": e.parent_record.n_constant_features,
+                        "lower_bound": e.right_child_min,
+                        "upper_bound": e.right_child_max,
                     })
 
                     # Push left child on stack
-                    builder_stack.push({
-                        "start": start,
+                    e.builder_stack.push({
+                        "start": e.start,
                         "end": split.pos,
-                        "depth": depth + 1,
-                        "parent": node_id,
+                        "depth": e.depth + 1,
+                        "parent": e.node_id,
                         "is_left": 1,
                         "impurity": split.impurity_left,
-                        "n_constant_features": parent_record.n_constant_features,
-                        "lower_bound": left_child_min,
-                        "upper_bound": left_child_max,
+                        "n_constant_features": e.parent_record.n_constant_features,
+                        "lower_bound": e.left_child_min,
+                        "upper_bound": e.left_child_max,
                     })
-                elif store_leaf_values and is_leaf:
+                elif e.store_leaf_values and e.is_leaf:
                     # copy leaf values to leaf_values array
-                    splitter.node_samples(tree.value_samples[node_id])
+                    splitter.node_samples(tree.value_samples[e.node_id])
 
-                if depth > max_depth_seen:
-                    max_depth_seen = depth
+                if e.depth > e.max_depth_seen:
+                    e.max_depth_seen = e.depth
 
-            if rc >= 0:
-                rc = tree._resize_c(tree.node_count)
+            if e.rc >= 0:
+                e.rc = tree._resize_c(tree.node_count)
 
-            if rc >= 0:
-                tree.max_depth = max_depth_seen
+            if e.rc >= 0:
+                tree.max_depth = e.max_depth_seen
 
         # free the memory created for the SplitRecord pointer
-        free(split_ptr)
+        free(e.split_ptr)
 
-        if rc == -1:
+        if e.rc == -1:
             raise MemoryError()
 
 # Best first builder ----------------------------------------------------------

From 51da5864a6b3a6f95c4293fc3ed7f57ed124d328 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Tue, 28 May 2024 15:08:57 -0400
Subject: [PATCH 22/29] some renaming to make closure pattern more obvious

---
 sklearn/tree/_splitter.pxd | 14 ++++----
 sklearn/tree/_splitter.pyx | 68 +++++++++++++++++++-------------------
 2 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 0aeb07c9606d4..66c83283f677d 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -30,7 +30,7 @@ from ..utils._typedefs cimport float32_t, float64_t, intp_t, int8_t, int32_t, ui
 # SO WHERE DOES THAT LEAVE US
 # - we can transform these into cpp vectors of structs
 #   and with some minor casting irritations everything else works ok
-ctypedef void* SplitConditionParameters
+ctypedef void* SplitConditionEnv
 ctypedef bint (*SplitConditionFunction)(
     Splitter splitter,
     SplitRecord* current_split,
@@ -38,15 +38,15 @@ ctypedef bint (*SplitConditionFunction)(
     bint missing_go_to_left,
     float64_t lower_bound,
     float64_t upper_bound,
-    SplitConditionParameters split_condition_parameters
+    SplitConditionEnv split_condition_env
 ) noexcept nogil
 
-cdef struct SplitConditionTuple:
+cdef struct SplitConditionClosure:
     SplitConditionFunction f
-    SplitConditionParameters p
+    SplitConditionEnv e
 
 cdef class SplitCondition:
-    cdef SplitConditionTuple t
+    cdef SplitConditionClosure c
 
 cdef class MinSamplesLeafCondition(SplitCondition):
     pass
@@ -150,8 +150,8 @@ cdef class Splitter(BaseSplitter):
     cdef SplitCondition min_weight_leaf_condition
     cdef SplitCondition monotonic_constraint_condition
 
-    cdef vector[SplitConditionTuple] presplit_conditions
-    cdef vector[SplitConditionTuple] postsplit_conditions
+    cdef vector[SplitConditionClosure] presplit_conditions
+    cdef vector[SplitConditionClosure] postsplit_conditions
 
     cdef int init(
         self,
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index ff707817d3d60..c2f092bc18954 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -51,7 +51,7 @@ cdef bint min_sample_leaf_condition(
     bint missing_go_to_left,
     float64_t lower_bound,
     float64_t upper_bound,
-    SplitConditionParameters split_condition_parameters
+    SplitConditionEnv split_condition_env
 ) noexcept nogil:
     cdef intp_t min_samples_leaf = splitter.min_samples_leaf
     cdef intp_t end_non_missing = splitter.end - n_missing
@@ -72,8 +72,8 @@ cdef bint min_sample_leaf_condition(
 
 cdef class MinSamplesLeafCondition(SplitCondition):
     def __cinit__(self):
-        self.t.f = min_sample_leaf_condition
-        self.t.p = NULL # min_samples is stored in splitter, which is already passed to f
+        self.c.f = min_sample_leaf_condition
+        self.c.e = NULL # min_samples is stored in splitter, which is already passed to f
 
 cdef bint min_weight_leaf_condition(
     Splitter splitter,
@@ -82,7 +82,7 @@ cdef bint min_weight_leaf_condition(
     bint missing_go_to_left,
     float64_t lower_bound,
     float64_t upper_bound,
-    SplitConditionParameters split_condition_parameters
+    SplitConditionEnv split_condition_env
 ) noexcept nogil:
     cdef float64_t min_weight_leaf = splitter.min_weight_leaf
 
@@ -95,8 +95,8 @@ cdef bint min_weight_leaf_condition(
 
 cdef class MinWeightLeafCondition(SplitCondition):
     def __cinit__(self):
-        self.t.f = min_weight_leaf_condition
-        self.t.p = NULL # min_weight_leaf is stored in splitter, which is already passed to f
+        self.c.f = min_weight_leaf_condition
+        self.c.e = NULL # min_weight_leaf is stored in splitter, which is already passed to f
 
 cdef bint monotonic_constraint_condition(
     Splitter splitter,
@@ -105,7 +105,7 @@ cdef bint monotonic_constraint_condition(
     bint missing_go_to_left,
     float64_t lower_bound,
     float64_t upper_bound,
-    SplitConditionParameters split_condition_parameters
+    SplitConditionEnv split_condition_env
 ) noexcept nogil:
     if (
         splitter.with_monotonic_cst and
@@ -122,10 +122,10 @@ cdef bint monotonic_constraint_condition(
 
 cdef class MonotonicConstraintCondition(SplitCondition):
     def __cinit__(self):
-        self.t.f = monotonic_constraint_condition
-        self.t.p = NULL
+        self.c.f = monotonic_constraint_condition
+        self.c.e = NULL
 
-# cdef struct HasDataParameters:
+# cdef struct HasDataEnv:
 #     int min_samples
 
 # cdef bint has_data_condition(
@@ -135,24 +135,24 @@ cdef class MonotonicConstraintCondition(SplitCondition):
 #     bint missing_go_to_left,
 #     float64_t lower_bound,
 #     float64_t upper_bound,
-#     SplitConditionParameters split_condition_parameters
+#     SplitConditionEnv split_condition_env
 # ) noexcept nogil:
-#     cdef HasDataParameters* p = <HasDataParameters*>split_condition_parameters
-#     return splitter.n_samples >= p.min_samples
+#     cdef HasDataEnv* e = <HasDataEnv*>split_condition_env
+#     return splitter.n_samples >= e.min_samples
 
 # cdef class HasDataCondition(SplitCondition):
 #     def __cinit__(self, int min_samples):
-#         self.t.f = has_data_condition
-#         self.t.p = malloc(sizeof(HasDataParameters))
-#         (<HasDataParameters*>self.t.p).min_samples = min_samples
+#         self.c.f = has_data_condition
+#         self.c.e = malloc(sizeof(HasDataEnv))
+#         (<HasDataEnv*>self.c.e).min_samples = min_samples
     
 #     def __dealloc__(self):
-#         if self.t.p is not NULL:
-#             free(self.t.p)
+#         if self.c.e is not NULL:
+#             free(self.c.e)
         
 #         super.__dealloc__(self)
 
-# cdef struct AlphaRegularityParameters:
+# cdef struct AlphaRegularityEnv:
 #     float64_t alpha
 
 # cdef bint alpha_regularity_condition(
@@ -162,21 +162,21 @@ cdef class MonotonicConstraintCondition(SplitCondition):
 #     bint missing_go_to_left,
 #     float64_t lower_bound,
 #     float64_t upper_bound,
-#     SplitConditionParameters split_condition_parameters
+#     SplitConditionEnv split_condition_env
 # ) noexcept nogil:
-#     cdef AlphaRegularityParameters* p = <AlphaRegularityParameters*>split_condition_parameters
+#     cdef AlphaRegularityEnv* e = <AlphaRegularityEnv*>split_condition_env
 
 #     return True
 
 # cdef class AlphaRegularityCondition(SplitCondition):
 #     def __cinit__(self, float64_t alpha):
-#         self.t.f = alpha_regularity_condition
-#         self.t.p = malloc(sizeof(AlphaRegularityParameters))
-#         (<AlphaRegularityParameters*>self.t.p).alpha = alpha
+#         self.c.f = alpha_regularity_condition
+#         self.c.e = malloc(sizeof(AlphaRegularityEnv))
+#         (<AlphaRegularityEnv*>self.c.e).alpha = alpha
     
 #     def __dealloc__(self):
-#         if self.t.p is not NULL:
-#             free(self.t.p)
+#         if self.c.e is not NULL:
+#             free(self.c.e)
         
 #         super.__dealloc__(self)
 
@@ -353,23 +353,23 @@ cdef class Splitter(BaseSplitter):
         )
 
         offset = 0
-        self.presplit_conditions[offset] = self.min_samples_leaf_condition.t
-        self.postsplit_conditions[offset] = self.min_weight_leaf_condition.t
+        self.presplit_conditions[offset] = self.min_samples_leaf_condition.c
+        self.postsplit_conditions[offset] = self.min_weight_leaf_condition.c
         offset += 1
 
         if(self.with_monotonic_cst):
             self.monotonic_constraint_condition = MonotonicConstraintCondition()
-            self.presplit_conditions[offset] = self.monotonic_constraint_condition.t
-            self.postsplit_conditions[offset] = self.monotonic_constraint_condition.t
+            self.presplit_conditions[offset] = self.monotonic_constraint_condition.c
+            self.postsplit_conditions[offset] = self.monotonic_constraint_condition.c
             offset += 1
 
         if presplit_conditions is not None:
             for i in range(len(presplit_conditions)):
-                self.presplit_conditions[i + offset] = presplit_conditions[i].t
+                self.presplit_conditions[i + offset] = presplit_conditions[i].c
         
         if postsplit_conditions is not None:
             for i in range(len(postsplit_conditions)):
-                self.postsplit_conditions[i + offset] = postsplit_conditions[i].t
+                self.postsplit_conditions[i + offset] = postsplit_conditions[i].c
 
 
     def __reduce__(self):
@@ -789,7 +789,7 @@ cdef inline intp_t node_split_best(
                 for condition in splitter.presplit_conditions:
                     if not condition.f(
                         splitter, &current_split, n_missing, missing_go_to_left,
-                        lower_bound, upper_bound, condition.p
+                        lower_bound, upper_bound, condition.e
                     ):
                         conditions_hold = False
                         break
@@ -818,7 +818,7 @@ cdef inline intp_t node_split_best(
                 for condition in splitter.postsplit_conditions:
                     if not condition.f(
                         splitter, &current_split, n_missing, missing_go_to_left,
-                        lower_bound, upper_bound, condition.p
+                        lower_bound, upper_bound, condition.e
                     ):
                         conditions_hold = False
                         break

From 6c117a22efbe0caf90a856c51a8cacbbe122b721 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Tue, 28 May 2024 15:52:33 -0400
Subject: [PATCH 23/29] added SplitRecordFactory

---
 sklearn/tree/_splitter.pxd | 10 ++++++++++
 sklearn/tree/_splitter.pyx | 14 ++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/sklearn/tree/_splitter.pxd b/sklearn/tree/_splitter.pxd
index 66c83283f677d..0f16f10538a62 100644
--- a/sklearn/tree/_splitter.pxd
+++ b/sklearn/tree/_splitter.pxd
@@ -71,6 +71,13 @@ cdef struct SplitRecord:
     unsigned char missing_go_to_left  # Controls if missing values go to the left node.
     intp_t n_missing            # Number of missing values for the feature being split on
 
+ctypedef void* SplitRecordFactoryEnv
+ctypedef SplitRecord* (*SplitRecordFactory)(SplitRecordFactoryEnv env) except NULL nogil
+
+cdef struct SplitRecordFactoryClosure:
+    SplitRecordFactory f
+    SplitRecordFactoryEnv e
+
 cdef class BaseSplitter:
     """Abstract interface for splitter."""
 
@@ -100,6 +107,8 @@ cdef class BaseSplitter:
 
     cdef const float64_t[:] sample_weight
 
+    cdef SplitRecordFactoryClosure split_record_factory
+
     # The samples vector `samples` is maintained by the Splitter object such
     # that the samples contained in a node are contiguous. With this setting,
     # `node_split` reorganizes the node samples `samples[start:end]` in two
@@ -131,6 +140,7 @@ cdef class BaseSplitter:
     cdef void node_value(self, float64_t* dest) noexcept nogil
     cdef float64_t node_impurity(self) noexcept nogil
     cdef intp_t pointer_size(self) noexcept nogil
+    cdef SplitRecord* create_split_record(self) except NULL nogil
 
 cdef class Splitter(BaseSplitter):
     """Base class for supervised splitters."""
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index c2f092bc18954..66776e8bc5b38 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -20,7 +20,7 @@
 from cython cimport final
 from libc.math cimport isnan
 from libc.stdint cimport uintptr_t
-from libc.stdlib cimport qsort, free
+from libc.stdlib cimport qsort, free, malloc
 from libc.string cimport memcpy
 
 from ._criterion cimport Criterion
@@ -202,6 +202,9 @@ cdef inline void _init_split(SplitRecord* self, intp_t start_pos) noexcept nogil
     self.missing_go_to_left = False
     self.n_missing = 0
 
+cdef SplitRecord* _base_split_record_factory(SplitRecordFactoryEnv env) except NULL nogil:
+    return <SplitRecord*>malloc(sizeof(SplitRecord));
+
 cdef class BaseSplitter:
     """This is an abstract interface for splitters.
 
@@ -286,6 +289,9 @@ cdef class BaseSplitter:
         `SplitRecord`.
         """
         return sizeof(SplitRecord)
+    
+    cdef SplitRecord* create_split_record(self) except NULL nogil:
+        return self.split_record_factory.f(self.split_record_factory.e)
 
 cdef class Splitter(BaseSplitter):
     """Abstract interface for supervised splitters."""
@@ -352,7 +358,7 @@ cdef class Splitter(BaseSplitter):
             + (2 if self.with_monotonic_cst else 1)
         )
 
-        offset = 0
+        cdef int offset = 0
         self.presplit_conditions[offset] = self.min_samples_leaf_condition.c
         self.postsplit_conditions[offset] = self.min_weight_leaf_condition.c
         offset += 1
@@ -363,6 +369,7 @@ cdef class Splitter(BaseSplitter):
             self.postsplit_conditions[offset] = self.monotonic_constraint_condition.c
             offset += 1
 
+        cdef int i
         if presplit_conditions is not None:
             for i in range(len(presplit_conditions)):
                 self.presplit_conditions[i + offset] = presplit_conditions[i].c
@@ -370,6 +377,9 @@ cdef class Splitter(BaseSplitter):
         if postsplit_conditions is not None:
             for i in range(len(postsplit_conditions)):
                 self.postsplit_conditions[i + offset] = postsplit_conditions[i].c
+        
+        self.split_record_factory.f = _base_split_record_factory
+        self.split_record_factory.e = NULL
 
 
     def __reduce__(self):

From 9e7b1313bd8656ab0d3dddcd507fd468b8bccc62 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Tue, 28 May 2024 16:10:42 -0400
Subject: [PATCH 24/29] SplitRecordFactory progress

---
 sklearn/tree/_tree.pxd |  2 +-
 sklearn/tree/_tree.pyx | 61 ++++++++++++++++++------------------------
 2 files changed, 27 insertions(+), 36 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index 635d3c5fece07..dd0ebcd0aa251 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -100,7 +100,7 @@ cdef struct BuildEnv:
     intp_t node_id
     float64_t right_child_min, left_child_min, right_child_max, left_child_max
 
-    SplitRecord* split_ptr
+    SplitRecord* split
 
     float64_t middle_value
     bint is_leaf
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 4efb0db5f09c6..2dfad80df4204 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -303,7 +303,6 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         X, y, sample_weight = self._check_input(X, y, sample_weight)
 
         cdef Splitter splitter = self.splitter
-        cdef SplitRecord split
         cdef cnp.ndarray initial_roots = self.initial_roots
 
         cdef BuildEnv e
@@ -340,7 +339,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         e.start = 0
         e.end = 0
         e.n_node_samples = splitter.n_samples
-        e.split_ptr = <SplitRecord *>malloc(splitter.pointer_size())
+        e.split = self.splitter.create_split_record()
 
         e.max_depth_seen = -1 if e.first else tree.max_depth
 
@@ -413,24 +412,20 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                 if not e.is_leaf:
                     splitter.node_split(
                         &e.parent_record,
-                        e.split_ptr,
+                        e.split,
                     )
 
-                    # assign local copy of SplitRecord to assign
-                    # pos, improvement, and impurity scores
-                    split = deref(e.split_ptr)
-
                     # If EPSILON=0 in the below comparison, float precision
                     # issues stop splitting, producing trees that are
                     # dissimilar to v0.18
-                    e.is_leaf = (e.is_leaf or split.pos >= e.end or
-                               (split.improvement + EPSILON <
+                    e.is_leaf = (e.is_leaf or e.split.pos >= e.end or
+                               (e.split.improvement + EPSILON <
                                 e.min_impurity_decrease))
 
                 e.node_id = e.add_or_update_node(
-                    tree, e.parent, e.is_left, e.is_leaf, e.split_ptr,
+                    tree, e.parent, e.is_left, e.is_leaf, e.split,
                     e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
-                    split.missing_go_to_left
+                    e.split.missing_go_to_left
                 )
 
                 if e.node_id == INTPTR_MAX:
@@ -450,7 +445,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                 if not e.is_leaf:
                     if (
                         not splitter.with_monotonic_cst or
-                        splitter.monotonic_cst[split.feature] == 0
+                        splitter.monotonic_cst[e.split.feature] == 0
                     ):
                         # Split on a feature with no monotonicity constraint
 
@@ -459,7 +454,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                         # node value clipping.
                         e.left_child_min = e.right_child_min = e.parent_record.lower_bound
                         e.left_child_max = e.right_child_max = e.parent_record.upper_bound
-                    elif splitter.monotonic_cst[split.feature] == 1:
+                    elif splitter.monotonic_cst[e.split.feature] == 1:
                         # Split on a feature with monotonic increase constraint
                         e.left_child_min = e.parent_record.lower_bound
                         e.right_child_max = e.parent_record.upper_bound
@@ -469,7 +464,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                         e.middle_value = splitter.criterion.middle_value()
                         e.right_child_min = e.middle_value
                         e.left_child_max = e.middle_value
-                    else:  # i.e. splitter.monotonic_cst[split.feature] == -1
+                    else:  # i.e. splitter.monotonic_cst[e.split.feature] == -1
                         # Split on a feature with monotonic decrease constraint
                         e.right_child_min = e.parent_record.lower_bound
                         e.left_child_max = e.parent_record.upper_bound
@@ -482,12 +477,12 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
 
                     # Push right child on stack
                     e.builder_stack.push({
-                        "start": split.pos,
+                        "start": e.split.pos,
                         "end": e.end,
                         "depth": e.depth + 1,
                         "parent": e.node_id,
                         "is_left": 0,
-                        "impurity": split.impurity_right,
+                        "impurity": e.split.impurity_right,
                         "n_constant_features": e.parent_record.n_constant_features,
                         "lower_bound": e.right_child_min,
                         "upper_bound": e.right_child_max,
@@ -496,11 +491,11 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                     # Push left child on stack
                     e.builder_stack.push({
                         "start": e.start,
-                        "end": split.pos,
+                        "end": e.split.pos,
                         "depth": e.depth + 1,
                         "parent": e.node_id,
                         "is_left": 1,
-                        "impurity": split.impurity_left,
+                        "impurity": e.split.impurity_left,
                         "n_constant_features": e.parent_record.n_constant_features,
                         "lower_bound": e.left_child_min,
                         "upper_bound": e.left_child_max,
@@ -546,24 +541,20 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                 if not e.is_leaf:
                     splitter.node_split(
                         &e.parent_record,
-                        e.split_ptr,
+                        e.split,
                     )
 
-                    # assign local copy of SplitRecord to assign
-                    # pos, improvement, and impurity scores
-                    split = deref(e.split_ptr)
-
                     # If EPSILON=0 in the below comparison, float precision
                     # issues stop splitting, producing trees that are
                     # dissimilar to v0.18
-                    e.is_leaf = (e.is_leaf or split.pos >= e.end or
-                               (split.improvement + EPSILON <
+                    e.is_leaf = (e.is_leaf or e.split.pos >= e.end or
+                               (e.split.improvement + EPSILON <
                                 e.min_impurity_decrease))
 
                 e.node_id = e.add_or_update_node(
-                    tree, e.parent, e.is_left, e.is_leaf, e.split_ptr,
+                    tree, e.parent, e.is_left, e.is_leaf, e.split,
                     e.parent_record.impurity, e.n_node_samples,
-                    e.weighted_n_node_samples, split.missing_go_to_left
+                    e.weighted_n_node_samples, e.split.missing_go_to_left
                 )
 
                 if e.node_id == INTPTR_MAX:
@@ -583,7 +574,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                 if not e.is_leaf:
                     if (
                         not splitter.with_monotonic_cst or
-                        splitter.monotonic_cst[split.feature] == 0
+                        splitter.monotonic_cst[e.split.feature] == 0
                     ):
                         # Split on a feature with no monotonicity constraint
 
@@ -592,7 +583,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                         # node value clipping.
                         e.left_child_min = e.right_child_min = e.parent_record.lower_bound
                         e.left_child_max = e.right_child_max = e.parent_record.upper_bound
-                    elif splitter.monotonic_cst[split.feature] == 1:
+                    elif splitter.monotonic_cst[e.split.feature] == 1:
                         # Split on a feature with monotonic increase constraint
                         e.left_child_min = e.parent_record.lower_bound
                         e.right_child_max = e.parent_record.upper_bound
@@ -602,7 +593,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                         e.middle_value = splitter.criterion.middle_value()
                         e.right_child_min = e.middle_value
                         e.left_child_max = e.middle_value
-                    else:  # i.e. splitter.monotonic_cst[split.feature] == -1
+                    else:  # i.e. splitter.monotonic_cst[e.split.feature] == -1
                         # Split on a feature with monotonic decrease constraint
                         e.right_child_min = e.parent_record.lower_bound
                         e.left_child_max = e.parent_record.upper_bound
@@ -615,12 +606,12 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
 
                     # Push right child on stack
                     e.builder_stack.push({
-                        "start": split.pos,
+                        "start": e.split.pos,
                         "end": e.end,
                         "depth": e.depth + 1,
                         "parent": e.node_id,
                         "is_left": 0,
-                        "impurity": split.impurity_right,
+                        "impurity": e.split.impurity_right,
                         "n_constant_features": e.parent_record.n_constant_features,
                         "lower_bound": e.right_child_min,
                         "upper_bound": e.right_child_max,
@@ -629,11 +620,11 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                     # Push left child on stack
                     e.builder_stack.push({
                         "start": e.start,
-                        "end": split.pos,
+                        "end": e.split.pos,
                         "depth": e.depth + 1,
                         "parent": e.node_id,
                         "is_left": 1,
-                        "impurity": split.impurity_left,
+                        "impurity": e.split.impurity_left,
                         "n_constant_features": e.parent_record.n_constant_features,
                         "lower_bound": e.left_child_min,
                         "upper_bound": e.left_child_max,
@@ -652,7 +643,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                 tree.max_depth = e.max_depth_seen
 
         # free the memory created for the SplitRecord pointer
-        free(e.split_ptr)
+        free(e.split)
 
         if e.rc == -1:
             raise MemoryError()

From a0176696d929268ee68db33f1a5a75016494b01d Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Wed, 29 May 2024 13:04:23 -0400
Subject: [PATCH 25/29] build loop refactor

---
 sklearn/tree/_tree.pxd |   2 +-
 sklearn/tree/_tree.pyx | 431 +++++++++++++----------------------------
 2 files changed, 140 insertions(+), 293 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index dd0ebcd0aa251..e7627f0a9ab79 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -115,7 +115,7 @@ cdef struct BuildEnv:
 
     ParentInfo parent_record
     
-    AddOrUpdateNodeFunc add_or_update_node
+    bint add_or_update
 
 
 cdef class BaseTree:
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 2dfad80df4204..18c7e06b4e6fe 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -153,44 +153,6 @@ cdef class TreeBuilder:
 
 
 # Depth first builder ---------------------------------------------------------
-
-
-cdef intp_t tree_add_node(
-    Tree tree,
-    intp_t parent,
-    bint is_left,
-    bint is_leaf,
-    SplitRecord* split_node,
-    float64_t impurity,
-    intp_t n_node_samples,
-    float64_t weighted_n_node_samples,
-    unsigned char missing_go_to_left
-) except -1 nogil:
-    return tree._add_node(
-        parent, is_left, is_leaf,
-        split_node, impurity,
-        n_node_samples, weighted_n_node_samples,
-        missing_go_to_left
-    )
-
-cdef intp_t tree_update_node(
-    Tree tree,
-    intp_t parent,
-    bint is_left,
-    bint is_leaf,
-    SplitRecord* split_node,
-    float64_t impurity,
-    intp_t n_node_samples,
-    float64_t weighted_n_node_samples,
-    unsigned char missing_go_to_left
-) except -1 nogil:
-    return tree._update_node(
-        parent, is_left, is_leaf,
-        split_node, impurity,
-        n_node_samples, weighted_n_node_samples,
-        missing_go_to_left
-    )
-
 cdef class DepthFirstTreeBuilder(TreeBuilder):
     """Build a decision tree in depth-first fashion."""
 
@@ -289,6 +251,141 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         # convert dict to numpy array and store value
         self.initial_roots = np.array(list(false_roots.items()))
 
+    cdef intp_t _build_body(self, Tree tree, Splitter splitter, BuildEnv* e) except -1 nogil:
+        while not e.target_stack.empty():
+            e.stack_record = e.target_stack.top()
+            e.target_stack.pop()
+
+            e.start = e.stack_record.start
+            e.end = e.stack_record.end
+            e.depth = e.stack_record.depth
+            e.parent = e.stack_record.parent
+            e.is_left = e.stack_record.is_left
+            e.parent_record.impurity = e.stack_record.impurity
+            e.parent_record.n_constant_features = e.stack_record.n_constant_features
+            e.parent_record.lower_bound = e.stack_record.lower_bound
+            e.parent_record.upper_bound = e.stack_record.upper_bound
+
+            e.n_node_samples = e.end - e.start
+            splitter.node_reset(e.start, e.end, &e.weighted_n_node_samples)
+
+            e.is_leaf = (e.depth >= e.max_depth or
+                        e.n_node_samples < e.min_samples_split or
+                        e.n_node_samples < 2 * e.min_samples_leaf or
+                        e.weighted_n_node_samples < 2 * e.min_weight_leaf)
+
+            if e.first:
+                e.parent_record.impurity = splitter.node_impurity()
+                e.first = 0
+
+            # impurity == 0 with tolerance due to rounding errors
+            e.is_leaf = e.is_leaf or e.parent_record.impurity <= EPSILON
+
+            if not e.is_leaf:
+                splitter.node_split(
+                    &e.parent_record,
+                    e.split,
+                )
+
+                # If EPSILON=0 in the below comparison, float precision
+                # issues stop splitting, producing trees that are
+                # dissimilar to v0.18
+                e.is_leaf = (e.is_leaf or e.split.pos >= e.end or
+                            (e.split.improvement + EPSILON <
+                            e.min_impurity_decrease))
+
+            e.node_id = tree._add_node(
+                e.parent, e.is_left, e.is_leaf, e.split,
+                e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
+                e.split.missing_go_to_left
+            ) if e.add_or_update else tree._update_node(                
+                e.parent, e.is_left, e.is_leaf, e.split,
+                e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
+                e.split.missing_go_to_left
+            )
+
+            if e.node_id == INTPTR_MAX:
+                e.rc = -1
+                break
+
+            # Store value for all nodes, to facilitate tree/model
+            # inspection and interpretation
+            splitter.node_value(tree.value + e.node_id * tree.value_stride)
+            if splitter.with_monotonic_cst:
+                splitter.clip_node_value(
+                    tree.value + e.node_id * tree.value_stride,
+                    e.parent_record.lower_bound,
+                    e.parent_record.upper_bound
+                )
+
+            if not e.is_leaf:
+                if (
+                    not splitter.with_monotonic_cst or
+                    splitter.monotonic_cst[e.split.feature] == 0
+                ):
+                    # Split on a feature with no monotonicity constraint
+
+                    # Current bounds must always be propagated to both children.
+                    # If a monotonic constraint is active, bounds are used in
+                    # node value clipping.
+                    e.left_child_min = e.right_child_min = e.parent_record.lower_bound
+                    e.left_child_max = e.right_child_max = e.parent_record.upper_bound
+                elif splitter.monotonic_cst[e.split.feature] == 1:
+                    # Split on a feature with monotonic increase constraint
+                    e.left_child_min = e.parent_record.lower_bound
+                    e.right_child_max = e.parent_record.upper_bound
+
+                    # Lower bound for right child and upper bound for left child
+                    # are set to the same value.
+                    e.middle_value = splitter.criterion.middle_value()
+                    e.right_child_min = e.middle_value
+                    e.left_child_max = e.middle_value
+                else:  # i.e. splitter.monotonic_cst[e.split.feature] == -1
+                    # Split on a feature with monotonic decrease constraint
+                    e.right_child_min = e.parent_record.lower_bound
+                    e.left_child_max = e.parent_record.upper_bound
+
+                    # Lower bound for left child and upper bound for right child
+                    # are set to the same value.
+                    e.middle_value = splitter.criterion.middle_value()
+                    e.left_child_min = e.middle_value
+                    e.right_child_max = e.middle_value
+
+                # Push right child on stack
+                e.builder_stack.push({
+                    "start": e.split.pos,
+                    "end": e.end,
+                    "depth": e.depth + 1,
+                    "parent": e.node_id,
+                    "is_left": 0,
+                    "impurity": e.split.impurity_right,
+                    "n_constant_features": e.parent_record.n_constant_features,
+                    "lower_bound": e.right_child_min,
+                    "upper_bound": e.right_child_max,
+                })
+
+                # Push left child on stack
+                e.builder_stack.push({
+                    "start": e.start,
+                    "end": e.split.pos,
+                    "depth": e.depth + 1,
+                    "parent": e.node_id,
+                    "is_left": 1,
+                    "impurity": e.split.impurity_left,
+                    "n_constant_features": e.parent_record.n_constant_features,
+                    "lower_bound": e.left_child_min,
+                    "upper_bound": e.left_child_max,
+                })
+            elif e.store_leaf_values and e.is_leaf:
+                # copy leaf values to leaf_values array
+                splitter.node_samples(tree.value_samples[e.node_id])
+
+            if e.depth > e.max_depth_seen:
+                e.max_depth_seen = e.depth
+            
+            return 0
+
+
     cpdef build(
         self,
         Tree tree,
@@ -379,262 +476,12 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
 
         with nogil:
             e.target_stack = &e.update_stack
-            e.add_or_update_node = tree_update_node
-            while not e.target_stack.empty():
-                e.stack_record = e.target_stack.top()
-                e.target_stack.pop()
-
-                e.start = e.stack_record.start
-                e.end = e.stack_record.end
-                e.depth = e.stack_record.depth
-                e.parent = e.stack_record.parent
-                e.is_left = e.stack_record.is_left
-                e.parent_record.impurity = e.stack_record.impurity
-                e.parent_record.n_constant_features = e.stack_record.n_constant_features
-                e.parent_record.lower_bound = e.stack_record.lower_bound
-                e.parent_record.upper_bound = e.stack_record.upper_bound
-
-                e.n_node_samples = e.end - e.start
-                splitter.node_reset(e.start, e.end, &e.weighted_n_node_samples)
-
-                e.is_leaf = (e.depth >= e.max_depth or
-                           e.n_node_samples < e.min_samples_split or
-                           e.n_node_samples < 2 * e.min_samples_leaf or
-                           e.weighted_n_node_samples < 2 * e.min_weight_leaf)
-
-                if e.first:
-                    e.parent_record.impurity = splitter.node_impurity()
-                    e.first = 0
-
-                # impurity == 0 with tolerance due to rounding errors
-                e.is_leaf = e.is_leaf or e.parent_record.impurity <= EPSILON
-
-                if not e.is_leaf:
-                    splitter.node_split(
-                        &e.parent_record,
-                        e.split,
-                    )
-
-                    # If EPSILON=0 in the below comparison, float precision
-                    # issues stop splitting, producing trees that are
-                    # dissimilar to v0.18
-                    e.is_leaf = (e.is_leaf or e.split.pos >= e.end or
-                               (e.split.improvement + EPSILON <
-                                e.min_impurity_decrease))
-
-                e.node_id = e.add_or_update_node(
-                    tree, e.parent, e.is_left, e.is_leaf, e.split,
-                    e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
-                    e.split.missing_go_to_left
-                )
-
-                if e.node_id == INTPTR_MAX:
-                    e.rc = -1
-                    break
-
-                # Store value for all nodes, to facilitate tree/model
-                # inspection and interpretation
-                splitter.node_value(tree.value + e.node_id * tree.value_stride)
-                if splitter.with_monotonic_cst:
-                    splitter.clip_node_value(
-                        tree.value + e.node_id * tree.value_stride,
-                        e.parent_record.lower_bound,
-                        e.parent_record.upper_bound
-                    )
-
-                if not e.is_leaf:
-                    if (
-                        not splitter.with_monotonic_cst or
-                        splitter.monotonic_cst[e.split.feature] == 0
-                    ):
-                        # Split on a feature with no monotonicity constraint
-
-                        # Current bounds must always be propagated to both children.
-                        # If a monotonic constraint is active, bounds are used in
-                        # node value clipping.
-                        e.left_child_min = e.right_child_min = e.parent_record.lower_bound
-                        e.left_child_max = e.right_child_max = e.parent_record.upper_bound
-                    elif splitter.monotonic_cst[e.split.feature] == 1:
-                        # Split on a feature with monotonic increase constraint
-                        e.left_child_min = e.parent_record.lower_bound
-                        e.right_child_max = e.parent_record.upper_bound
-
-                        # Lower bound for right child and upper bound for left child
-                        # are set to the same value.
-                        e.middle_value = splitter.criterion.middle_value()
-                        e.right_child_min = e.middle_value
-                        e.left_child_max = e.middle_value
-                    else:  # i.e. splitter.monotonic_cst[e.split.feature] == -1
-                        # Split on a feature with monotonic decrease constraint
-                        e.right_child_min = e.parent_record.lower_bound
-                        e.left_child_max = e.parent_record.upper_bound
-
-                        # Lower bound for left child and upper bound for right child
-                        # are set to the same value.
-                        e.middle_value = splitter.criterion.middle_value()
-                        e.left_child_min = e.middle_value
-                        e.right_child_max = e.middle_value
-
-                    # Push right child on stack
-                    e.builder_stack.push({
-                        "start": e.split.pos,
-                        "end": e.end,
-                        "depth": e.depth + 1,
-                        "parent": e.node_id,
-                        "is_left": 0,
-                        "impurity": e.split.impurity_right,
-                        "n_constant_features": e.parent_record.n_constant_features,
-                        "lower_bound": e.right_child_min,
-                        "upper_bound": e.right_child_max,
-                    })
-
-                    # Push left child on stack
-                    e.builder_stack.push({
-                        "start": e.start,
-                        "end": e.split.pos,
-                        "depth": e.depth + 1,
-                        "parent": e.node_id,
-                        "is_left": 1,
-                        "impurity": e.split.impurity_left,
-                        "n_constant_features": e.parent_record.n_constant_features,
-                        "lower_bound": e.left_child_min,
-                        "upper_bound": e.left_child_max,
-                    })
-                elif e.store_leaf_values and e.is_leaf:
-                    # copy leaf values to leaf_values array
-                    splitter.node_samples(tree.value_samples[e.node_id])
-
-                if e.depth > e.max_depth_seen:
-                    e.max_depth_seen = e.depth
+            e.add_or_update = 0
+            self._build_body(tree, splitter, &e)
 
             e.target_stack = &e.builder_stack
-            e.add_or_update_node = tree_add_node
-            while not e.target_stack.empty():
-                e.stack_record = e.target_stack.top()
-                e.target_stack.pop()
-
-                e.start = e.stack_record.start
-                e.end = e.stack_record.end
-                e.depth = e.stack_record.depth
-                e.parent = e.stack_record.parent
-                e.is_left = e.stack_record.is_left
-                e.parent_record.impurity = e.stack_record.impurity
-                e.parent_record.n_constant_features = e.stack_record.n_constant_features
-                e.parent_record.lower_bound = e.stack_record.lower_bound
-                e.parent_record.upper_bound = e.stack_record.upper_bound
-
-                e.n_node_samples = e.end - e.start
-                splitter.node_reset(e.start, e.end, &e.weighted_n_node_samples)
-
-                e.is_leaf = (e.depth >= e.max_depth or
-                           e.n_node_samples < e.min_samples_split or
-                           e.n_node_samples < 2 * e.min_samples_leaf or
-                           e.weighted_n_node_samples < 2 * e.min_weight_leaf)
-
-                if e.first:
-                    e.parent_record.impurity = splitter.node_impurity()
-                    e.first=0
-
-                # impurity == 0 with tolerance due to rounding errors
-                e.is_leaf = e.is_leaf or e.parent_record.impurity <= EPSILON
-
-                if not e.is_leaf:
-                    splitter.node_split(
-                        &e.parent_record,
-                        e.split,
-                    )
-
-                    # If EPSILON=0 in the below comparison, float precision
-                    # issues stop splitting, producing trees that are
-                    # dissimilar to v0.18
-                    e.is_leaf = (e.is_leaf or e.split.pos >= e.end or
-                               (e.split.improvement + EPSILON <
-                                e.min_impurity_decrease))
-
-                e.node_id = e.add_or_update_node(
-                    tree, e.parent, e.is_left, e.is_leaf, e.split,
-                    e.parent_record.impurity, e.n_node_samples,
-                    e.weighted_n_node_samples, e.split.missing_go_to_left
-                )
-
-                if e.node_id == INTPTR_MAX:
-                    e.rc = -1
-                    break
-
-                # Store value for all nodes, to facilitate tree/model
-                # inspection and interpretation
-                splitter.node_value(tree.value + e.node_id * tree.value_stride)
-                if splitter.with_monotonic_cst:
-                    splitter.clip_node_value(
-                        tree.value + e.node_id * tree.value_stride,
-                        e.parent_record.lower_bound,
-                        e.parent_record.upper_bound
-                    )
-
-                if not e.is_leaf:
-                    if (
-                        not splitter.with_monotonic_cst or
-                        splitter.monotonic_cst[e.split.feature] == 0
-                    ):
-                        # Split on a feature with no monotonicity constraint
-
-                        # Current bounds must always be propagated to both children.
-                        # If a monotonic constraint is active, bounds are used in
-                        # node value clipping.
-                        e.left_child_min = e.right_child_min = e.parent_record.lower_bound
-                        e.left_child_max = e.right_child_max = e.parent_record.upper_bound
-                    elif splitter.monotonic_cst[e.split.feature] == 1:
-                        # Split on a feature with monotonic increase constraint
-                        e.left_child_min = e.parent_record.lower_bound
-                        e.right_child_max = e.parent_record.upper_bound
-
-                        # Lower bound for right child and upper bound for left child
-                        # are set to the same value.
-                        e.middle_value = splitter.criterion.middle_value()
-                        e.right_child_min = e.middle_value
-                        e.left_child_max = e.middle_value
-                    else:  # i.e. splitter.monotonic_cst[e.split.feature] == -1
-                        # Split on a feature with monotonic decrease constraint
-                        e.right_child_min = e.parent_record.lower_bound
-                        e.left_child_max = e.parent_record.upper_bound
-
-                        # Lower bound for left child and upper bound for right child
-                        # are set to the same value.
-                        e.middle_value = splitter.criterion.middle_value()
-                        e.left_child_min = e.middle_value
-                        e.right_child_max = e.middle_value
-
-                    # Push right child on stack
-                    e.builder_stack.push({
-                        "start": e.split.pos,
-                        "end": e.end,
-                        "depth": e.depth + 1,
-                        "parent": e.node_id,
-                        "is_left": 0,
-                        "impurity": e.split.impurity_right,
-                        "n_constant_features": e.parent_record.n_constant_features,
-                        "lower_bound": e.right_child_min,
-                        "upper_bound": e.right_child_max,
-                    })
-
-                    # Push left child on stack
-                    e.builder_stack.push({
-                        "start": e.start,
-                        "end": e.split.pos,
-                        "depth": e.depth + 1,
-                        "parent": e.node_id,
-                        "is_left": 1,
-                        "impurity": e.split.impurity_left,
-                        "n_constant_features": e.parent_record.n_constant_features,
-                        "lower_bound": e.left_child_min,
-                        "upper_bound": e.left_child_max,
-                    })
-                elif e.store_leaf_values and e.is_leaf:
-                    # copy leaf values to leaf_values array
-                    splitter.node_samples(tree.value_samples[e.node_id])
-
-                if e.depth > e.max_depth_seen:
-                    e.max_depth_seen = e.depth
+            e.add_or_update = 1
+            self._build_body(tree, splitter, &e)
 
             if e.rc >= 0:
                 e.rc = tree._resize_c(tree.node_count)

From 4325b0a101ea34c8193e21d003ee381fa9695b70 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Wed, 29 May 2024 13:43:46 -0400
Subject: [PATCH 26/29] add_or_update tweak

---
 sklearn/tree/_tree.pyx | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 18c7e06b4e6fe..ee0d979aad858 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -294,15 +294,18 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                             (e.split.improvement + EPSILON <
                             e.min_impurity_decrease))
 
-            e.node_id = tree._add_node(
-                e.parent, e.is_left, e.is_leaf, e.split,
-                e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
-                e.split.missing_go_to_left
-            ) if e.add_or_update else tree._update_node(                
-                e.parent, e.is_left, e.is_leaf, e.split,
-                e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
-                e.split.missing_go_to_left
-            )
+            if e.add_or_update:
+                e.node_id = tree._add_node(
+                    e.parent, e.is_left, e.is_leaf, e.split,
+                    e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
+                    e.split.missing_go_to_left
+                )
+            else:
+                e.node_id = tree._update_node(                
+                    e.parent, e.is_left, e.is_leaf, e.split,
+                    e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
+                    e.split.missing_go_to_left
+                )
 
             if e.node_id == INTPTR_MAX:
                 e.rc = -1

From 78c3a1b8352ab901cb07dcba0e6795103b3ced67 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Thu, 30 May 2024 10:18:12 -0400
Subject: [PATCH 27/29] reverted to back out build body refactor

---
 sklearn/tree/_tree.pxd |   2 +-
 sklearn/tree/_tree.pyx | 434 +++++++++++++++++++++++++++--------------
 2 files changed, 293 insertions(+), 143 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index e7627f0a9ab79..dd0ebcd0aa251 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -115,7 +115,7 @@ cdef struct BuildEnv:
 
     ParentInfo parent_record
     
-    bint add_or_update
+    AddOrUpdateNodeFunc add_or_update_node
 
 
 cdef class BaseTree:
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index ee0d979aad858..2dfad80df4204 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -153,6 +153,44 @@ cdef class TreeBuilder:
 
 
 # Depth first builder ---------------------------------------------------------
+
+
+cdef intp_t tree_add_node(
+    Tree tree,
+    intp_t parent,
+    bint is_left,
+    bint is_leaf,
+    SplitRecord* split_node,
+    float64_t impurity,
+    intp_t n_node_samples,
+    float64_t weighted_n_node_samples,
+    unsigned char missing_go_to_left
+) except -1 nogil:
+    return tree._add_node(
+        parent, is_left, is_leaf,
+        split_node, impurity,
+        n_node_samples, weighted_n_node_samples,
+        missing_go_to_left
+    )
+
+cdef intp_t tree_update_node(
+    Tree tree,
+    intp_t parent,
+    bint is_left,
+    bint is_leaf,
+    SplitRecord* split_node,
+    float64_t impurity,
+    intp_t n_node_samples,
+    float64_t weighted_n_node_samples,
+    unsigned char missing_go_to_left
+) except -1 nogil:
+    return tree._update_node(
+        parent, is_left, is_leaf,
+        split_node, impurity,
+        n_node_samples, weighted_n_node_samples,
+        missing_go_to_left
+    )
+
 cdef class DepthFirstTreeBuilder(TreeBuilder):
     """Build a decision tree in depth-first fashion."""
 
@@ -251,144 +289,6 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         # convert dict to numpy array and store value
         self.initial_roots = np.array(list(false_roots.items()))
 
-    cdef intp_t _build_body(self, Tree tree, Splitter splitter, BuildEnv* e) except -1 nogil:
-        while not e.target_stack.empty():
-            e.stack_record = e.target_stack.top()
-            e.target_stack.pop()
-
-            e.start = e.stack_record.start
-            e.end = e.stack_record.end
-            e.depth = e.stack_record.depth
-            e.parent = e.stack_record.parent
-            e.is_left = e.stack_record.is_left
-            e.parent_record.impurity = e.stack_record.impurity
-            e.parent_record.n_constant_features = e.stack_record.n_constant_features
-            e.parent_record.lower_bound = e.stack_record.lower_bound
-            e.parent_record.upper_bound = e.stack_record.upper_bound
-
-            e.n_node_samples = e.end - e.start
-            splitter.node_reset(e.start, e.end, &e.weighted_n_node_samples)
-
-            e.is_leaf = (e.depth >= e.max_depth or
-                        e.n_node_samples < e.min_samples_split or
-                        e.n_node_samples < 2 * e.min_samples_leaf or
-                        e.weighted_n_node_samples < 2 * e.min_weight_leaf)
-
-            if e.first:
-                e.parent_record.impurity = splitter.node_impurity()
-                e.first = 0
-
-            # impurity == 0 with tolerance due to rounding errors
-            e.is_leaf = e.is_leaf or e.parent_record.impurity <= EPSILON
-
-            if not e.is_leaf:
-                splitter.node_split(
-                    &e.parent_record,
-                    e.split,
-                )
-
-                # If EPSILON=0 in the below comparison, float precision
-                # issues stop splitting, producing trees that are
-                # dissimilar to v0.18
-                e.is_leaf = (e.is_leaf or e.split.pos >= e.end or
-                            (e.split.improvement + EPSILON <
-                            e.min_impurity_decrease))
-
-            if e.add_or_update:
-                e.node_id = tree._add_node(
-                    e.parent, e.is_left, e.is_leaf, e.split,
-                    e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
-                    e.split.missing_go_to_left
-                )
-            else:
-                e.node_id = tree._update_node(                
-                    e.parent, e.is_left, e.is_leaf, e.split,
-                    e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
-                    e.split.missing_go_to_left
-                )
-
-            if e.node_id == INTPTR_MAX:
-                e.rc = -1
-                break
-
-            # Store value for all nodes, to facilitate tree/model
-            # inspection and interpretation
-            splitter.node_value(tree.value + e.node_id * tree.value_stride)
-            if splitter.with_monotonic_cst:
-                splitter.clip_node_value(
-                    tree.value + e.node_id * tree.value_stride,
-                    e.parent_record.lower_bound,
-                    e.parent_record.upper_bound
-                )
-
-            if not e.is_leaf:
-                if (
-                    not splitter.with_monotonic_cst or
-                    splitter.monotonic_cst[e.split.feature] == 0
-                ):
-                    # Split on a feature with no monotonicity constraint
-
-                    # Current bounds must always be propagated to both children.
-                    # If a monotonic constraint is active, bounds are used in
-                    # node value clipping.
-                    e.left_child_min = e.right_child_min = e.parent_record.lower_bound
-                    e.left_child_max = e.right_child_max = e.parent_record.upper_bound
-                elif splitter.monotonic_cst[e.split.feature] == 1:
-                    # Split on a feature with monotonic increase constraint
-                    e.left_child_min = e.parent_record.lower_bound
-                    e.right_child_max = e.parent_record.upper_bound
-
-                    # Lower bound for right child and upper bound for left child
-                    # are set to the same value.
-                    e.middle_value = splitter.criterion.middle_value()
-                    e.right_child_min = e.middle_value
-                    e.left_child_max = e.middle_value
-                else:  # i.e. splitter.monotonic_cst[e.split.feature] == -1
-                    # Split on a feature with monotonic decrease constraint
-                    e.right_child_min = e.parent_record.lower_bound
-                    e.left_child_max = e.parent_record.upper_bound
-
-                    # Lower bound for left child and upper bound for right child
-                    # are set to the same value.
-                    e.middle_value = splitter.criterion.middle_value()
-                    e.left_child_min = e.middle_value
-                    e.right_child_max = e.middle_value
-
-                # Push right child on stack
-                e.builder_stack.push({
-                    "start": e.split.pos,
-                    "end": e.end,
-                    "depth": e.depth + 1,
-                    "parent": e.node_id,
-                    "is_left": 0,
-                    "impurity": e.split.impurity_right,
-                    "n_constant_features": e.parent_record.n_constant_features,
-                    "lower_bound": e.right_child_min,
-                    "upper_bound": e.right_child_max,
-                })
-
-                # Push left child on stack
-                e.builder_stack.push({
-                    "start": e.start,
-                    "end": e.split.pos,
-                    "depth": e.depth + 1,
-                    "parent": e.node_id,
-                    "is_left": 1,
-                    "impurity": e.split.impurity_left,
-                    "n_constant_features": e.parent_record.n_constant_features,
-                    "lower_bound": e.left_child_min,
-                    "upper_bound": e.left_child_max,
-                })
-            elif e.store_leaf_values and e.is_leaf:
-                # copy leaf values to leaf_values array
-                splitter.node_samples(tree.value_samples[e.node_id])
-
-            if e.depth > e.max_depth_seen:
-                e.max_depth_seen = e.depth
-            
-            return 0
-
-
     cpdef build(
         self,
         Tree tree,
@@ -479,12 +379,262 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
 
         with nogil:
             e.target_stack = &e.update_stack
-            e.add_or_update = 0
-            self._build_body(tree, splitter, &e)
+            e.add_or_update_node = tree_update_node
+            while not e.target_stack.empty():
+                e.stack_record = e.target_stack.top()
+                e.target_stack.pop()
+
+                e.start = e.stack_record.start
+                e.end = e.stack_record.end
+                e.depth = e.stack_record.depth
+                e.parent = e.stack_record.parent
+                e.is_left = e.stack_record.is_left
+                e.parent_record.impurity = e.stack_record.impurity
+                e.parent_record.n_constant_features = e.stack_record.n_constant_features
+                e.parent_record.lower_bound = e.stack_record.lower_bound
+                e.parent_record.upper_bound = e.stack_record.upper_bound
+
+                e.n_node_samples = e.end - e.start
+                splitter.node_reset(e.start, e.end, &e.weighted_n_node_samples)
+
+                e.is_leaf = (e.depth >= e.max_depth or
+                           e.n_node_samples < e.min_samples_split or
+                           e.n_node_samples < 2 * e.min_samples_leaf or
+                           e.weighted_n_node_samples < 2 * e.min_weight_leaf)
+
+                if e.first:
+                    e.parent_record.impurity = splitter.node_impurity()
+                    e.first = 0
+
+                # impurity == 0 with tolerance due to rounding errors
+                e.is_leaf = e.is_leaf or e.parent_record.impurity <= EPSILON
+
+                if not e.is_leaf:
+                    splitter.node_split(
+                        &e.parent_record,
+                        e.split,
+                    )
+
+                    # If EPSILON=0 in the below comparison, float precision
+                    # issues stop splitting, producing trees that are
+                    # dissimilar to v0.18
+                    e.is_leaf = (e.is_leaf or e.split.pos >= e.end or
+                               (e.split.improvement + EPSILON <
+                                e.min_impurity_decrease))
+
+                e.node_id = e.add_or_update_node(
+                    tree, e.parent, e.is_left, e.is_leaf, e.split,
+                    e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
+                    e.split.missing_go_to_left
+                )
+
+                if e.node_id == INTPTR_MAX:
+                    e.rc = -1
+                    break
+
+                # Store value for all nodes, to facilitate tree/model
+                # inspection and interpretation
+                splitter.node_value(tree.value + e.node_id * tree.value_stride)
+                if splitter.with_monotonic_cst:
+                    splitter.clip_node_value(
+                        tree.value + e.node_id * tree.value_stride,
+                        e.parent_record.lower_bound,
+                        e.parent_record.upper_bound
+                    )
+
+                if not e.is_leaf:
+                    if (
+                        not splitter.with_monotonic_cst or
+                        splitter.monotonic_cst[e.split.feature] == 0
+                    ):
+                        # Split on a feature with no monotonicity constraint
+
+                        # Current bounds must always be propagated to both children.
+                        # If a monotonic constraint is active, bounds are used in
+                        # node value clipping.
+                        e.left_child_min = e.right_child_min = e.parent_record.lower_bound
+                        e.left_child_max = e.right_child_max = e.parent_record.upper_bound
+                    elif splitter.monotonic_cst[e.split.feature] == 1:
+                        # Split on a feature with monotonic increase constraint
+                        e.left_child_min = e.parent_record.lower_bound
+                        e.right_child_max = e.parent_record.upper_bound
+
+                        # Lower bound for right child and upper bound for left child
+                        # are set to the same value.
+                        e.middle_value = splitter.criterion.middle_value()
+                        e.right_child_min = e.middle_value
+                        e.left_child_max = e.middle_value
+                    else:  # i.e. splitter.monotonic_cst[e.split.feature] == -1
+                        # Split on a feature with monotonic decrease constraint
+                        e.right_child_min = e.parent_record.lower_bound
+                        e.left_child_max = e.parent_record.upper_bound
+
+                        # Lower bound for left child and upper bound for right child
+                        # are set to the same value.
+                        e.middle_value = splitter.criterion.middle_value()
+                        e.left_child_min = e.middle_value
+                        e.right_child_max = e.middle_value
+
+                    # Push right child on stack
+                    e.builder_stack.push({
+                        "start": e.split.pos,
+                        "end": e.end,
+                        "depth": e.depth + 1,
+                        "parent": e.node_id,
+                        "is_left": 0,
+                        "impurity": e.split.impurity_right,
+                        "n_constant_features": e.parent_record.n_constant_features,
+                        "lower_bound": e.right_child_min,
+                        "upper_bound": e.right_child_max,
+                    })
+
+                    # Push left child on stack
+                    e.builder_stack.push({
+                        "start": e.start,
+                        "end": e.split.pos,
+                        "depth": e.depth + 1,
+                        "parent": e.node_id,
+                        "is_left": 1,
+                        "impurity": e.split.impurity_left,
+                        "n_constant_features": e.parent_record.n_constant_features,
+                        "lower_bound": e.left_child_min,
+                        "upper_bound": e.left_child_max,
+                    })
+                elif e.store_leaf_values and e.is_leaf:
+                    # copy leaf values to leaf_values array
+                    splitter.node_samples(tree.value_samples[e.node_id])
+
+                if e.depth > e.max_depth_seen:
+                    e.max_depth_seen = e.depth
 
             e.target_stack = &e.builder_stack
-            e.add_or_update = 1
-            self._build_body(tree, splitter, &e)
+            e.add_or_update_node = tree_add_node
+            while not e.target_stack.empty():
+                e.stack_record = e.target_stack.top()
+                e.target_stack.pop()
+
+                e.start = e.stack_record.start
+                e.end = e.stack_record.end
+                e.depth = e.stack_record.depth
+                e.parent = e.stack_record.parent
+                e.is_left = e.stack_record.is_left
+                e.parent_record.impurity = e.stack_record.impurity
+                e.parent_record.n_constant_features = e.stack_record.n_constant_features
+                e.parent_record.lower_bound = e.stack_record.lower_bound
+                e.parent_record.upper_bound = e.stack_record.upper_bound
+
+                e.n_node_samples = e.end - e.start
+                splitter.node_reset(e.start, e.end, &e.weighted_n_node_samples)
+
+                e.is_leaf = (e.depth >= e.max_depth or
+                           e.n_node_samples < e.min_samples_split or
+                           e.n_node_samples < 2 * e.min_samples_leaf or
+                           e.weighted_n_node_samples < 2 * e.min_weight_leaf)
+
+                if e.first:
+                    e.parent_record.impurity = splitter.node_impurity()
+                    e.first=0
+
+                # impurity == 0 with tolerance due to rounding errors
+                e.is_leaf = e.is_leaf or e.parent_record.impurity <= EPSILON
+
+                if not e.is_leaf:
+                    splitter.node_split(
+                        &e.parent_record,
+                        e.split,
+                    )
+
+                    # If EPSILON=0 in the below comparison, float precision
+                    # issues stop splitting, producing trees that are
+                    # dissimilar to v0.18
+                    e.is_leaf = (e.is_leaf or e.split.pos >= e.end or
+                               (e.split.improvement + EPSILON <
+                                e.min_impurity_decrease))
+
+                e.node_id = e.add_or_update_node(
+                    tree, e.parent, e.is_left, e.is_leaf, e.split,
+                    e.parent_record.impurity, e.n_node_samples,
+                    e.weighted_n_node_samples, e.split.missing_go_to_left
+                )
+
+                if e.node_id == INTPTR_MAX:
+                    e.rc = -1
+                    break
+
+                # Store value for all nodes, to facilitate tree/model
+                # inspection and interpretation
+                splitter.node_value(tree.value + e.node_id * tree.value_stride)
+                if splitter.with_monotonic_cst:
+                    splitter.clip_node_value(
+                        tree.value + e.node_id * tree.value_stride,
+                        e.parent_record.lower_bound,
+                        e.parent_record.upper_bound
+                    )
+
+                if not e.is_leaf:
+                    if (
+                        not splitter.with_monotonic_cst or
+                        splitter.monotonic_cst[e.split.feature] == 0
+                    ):
+                        # Split on a feature with no monotonicity constraint
+
+                        # Current bounds must always be propagated to both children.
+                        # If a monotonic constraint is active, bounds are used in
+                        # node value clipping.
+                        e.left_child_min = e.right_child_min = e.parent_record.lower_bound
+                        e.left_child_max = e.right_child_max = e.parent_record.upper_bound
+                    elif splitter.monotonic_cst[e.split.feature] == 1:
+                        # Split on a feature with monotonic increase constraint
+                        e.left_child_min = e.parent_record.lower_bound
+                        e.right_child_max = e.parent_record.upper_bound
+
+                        # Lower bound for right child and upper bound for left child
+                        # are set to the same value.
+                        e.middle_value = splitter.criterion.middle_value()
+                        e.right_child_min = e.middle_value
+                        e.left_child_max = e.middle_value
+                    else:  # i.e. splitter.monotonic_cst[e.split.feature] == -1
+                        # Split on a feature with monotonic decrease constraint
+                        e.right_child_min = e.parent_record.lower_bound
+                        e.left_child_max = e.parent_record.upper_bound
+
+                        # Lower bound for left child and upper bound for right child
+                        # are set to the same value.
+                        e.middle_value = splitter.criterion.middle_value()
+                        e.left_child_min = e.middle_value
+                        e.right_child_max = e.middle_value
+
+                    # Push right child on stack
+                    e.builder_stack.push({
+                        "start": e.split.pos,
+                        "end": e.end,
+                        "depth": e.depth + 1,
+                        "parent": e.node_id,
+                        "is_left": 0,
+                        "impurity": e.split.impurity_right,
+                        "n_constant_features": e.parent_record.n_constant_features,
+                        "lower_bound": e.right_child_min,
+                        "upper_bound": e.right_child_max,
+                    })
+
+                    # Push left child on stack
+                    e.builder_stack.push({
+                        "start": e.start,
+                        "end": e.split.pos,
+                        "depth": e.depth + 1,
+                        "parent": e.node_id,
+                        "is_left": 1,
+                        "impurity": e.split.impurity_left,
+                        "n_constant_features": e.parent_record.n_constant_features,
+                        "lower_bound": e.left_child_min,
+                        "upper_bound": e.left_child_max,
+                    })
+                elif e.store_leaf_values and e.is_leaf:
+                    # copy leaf values to leaf_values array
+                    splitter.node_samples(tree.value_samples[e.node_id])
+
+                if e.depth > e.max_depth_seen:
+                    e.max_depth_seen = e.depth
 
             if e.rc >= 0:
                 e.rc = tree._resize_c(tree.node_count)

From b8cc636565f14dcbcf4ad912cc1336db25638e30 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Thu, 30 May 2024 11:22:37 -0400
Subject: [PATCH 28/29] refactor baby step

---
 sklearn/tree/_tree.pxd |  14 --
 sklearn/tree/_tree.pyx | 306 +++++++++++++++++++----------------------
 2 files changed, 138 insertions(+), 182 deletions(-)

diff --git a/sklearn/tree/_tree.pxd b/sklearn/tree/_tree.pxd
index dd0ebcd0aa251..930a21ad05783 100644
--- a/sklearn/tree/_tree.pxd
+++ b/sklearn/tree/_tree.pxd
@@ -43,18 +43,6 @@ cdef struct ParentInfo:
     float64_t impurity              # the impurity of the parent
     intp_t n_constant_features      # the number of constant features found in parent
 
-ctypedef intp_t (*AddOrUpdateNodeFunc)(
-    Tree tree,
-    intp_t parent,
-    bint is_left,
-    bint is_leaf,
-    SplitRecord* split_node,
-    float64_t impurity,
-    intp_t n_node_samples,
-    float64_t weighted_n_node_samples,
-    unsigned char missing_go_to_left
-) except -1 nogil
-
 # A record on the stack for depth-first tree growing
 cdef struct StackRecord:
     intp_t start
@@ -114,8 +102,6 @@ cdef struct BuildEnv:
     StackRecord stack_record
 
     ParentInfo parent_record
-    
-    AddOrUpdateNodeFunc add_or_update_node
 
 
 cdef class BaseTree:
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 2dfad80df4204..5dff8ed049921 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -153,44 +153,6 @@ cdef class TreeBuilder:
 
 
 # Depth first builder ---------------------------------------------------------
-
-
-cdef intp_t tree_add_node(
-    Tree tree,
-    intp_t parent,
-    bint is_left,
-    bint is_leaf,
-    SplitRecord* split_node,
-    float64_t impurity,
-    intp_t n_node_samples,
-    float64_t weighted_n_node_samples,
-    unsigned char missing_go_to_left
-) except -1 nogil:
-    return tree._add_node(
-        parent, is_left, is_leaf,
-        split_node, impurity,
-        n_node_samples, weighted_n_node_samples,
-        missing_go_to_left
-    )
-
-cdef intp_t tree_update_node(
-    Tree tree,
-    intp_t parent,
-    bint is_left,
-    bint is_leaf,
-    SplitRecord* split_node,
-    float64_t impurity,
-    intp_t n_node_samples,
-    float64_t weighted_n_node_samples,
-    unsigned char missing_go_to_left
-) except -1 nogil:
-    return tree._update_node(
-        parent, is_left, is_leaf,
-        split_node, impurity,
-        n_node_samples, weighted_n_node_samples,
-        missing_go_to_left
-    )
-
 cdef class DepthFirstTreeBuilder(TreeBuilder):
     """Build a decision tree in depth-first fashion."""
 
@@ -289,6 +251,141 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
         # convert dict to numpy array and store value
         self.initial_roots = np.array(list(false_roots.items()))
 
+    cdef void _build_body(self, Tree tree, Splitter splitter, BuildEnv* e, bint update) noexcept nogil:
+        while not e.target_stack.empty():
+            e.stack_record = e.target_stack.top()
+            e.target_stack.pop()
+
+            e.start = e.stack_record.start
+            e.end = e.stack_record.end
+            e.depth = e.stack_record.depth
+            e.parent = e.stack_record.parent
+            e.is_left = e.stack_record.is_left
+            e.parent_record.impurity = e.stack_record.impurity
+            e.parent_record.n_constant_features = e.stack_record.n_constant_features
+            e.parent_record.lower_bound = e.stack_record.lower_bound
+            e.parent_record.upper_bound = e.stack_record.upper_bound
+
+            e.n_node_samples = e.end - e.start
+            splitter.node_reset(e.start, e.end, &e.weighted_n_node_samples)
+
+            e.is_leaf = (e.depth >= e.max_depth or
+                        e.n_node_samples < e.min_samples_split or
+                        e.n_node_samples < 2 * e.min_samples_leaf or
+                        e.weighted_n_node_samples < 2 * e.min_weight_leaf)
+
+            if e.first:
+                e.parent_record.impurity = splitter.node_impurity()
+                e.first = 0
+
+            # impurity == 0 with tolerance due to rounding errors
+            e.is_leaf = e.is_leaf or e.parent_record.impurity <= EPSILON
+
+            if not e.is_leaf:
+                splitter.node_split(
+                    &e.parent_record,
+                    e.split,
+                )
+
+                # If EPSILON=0 in the below comparison, float precision
+                # issues stop splitting, producing trees that are
+                # dissimilar to v0.18
+                e.is_leaf = (e.is_leaf or e.split.pos >= e.end or
+                            (e.split.improvement + EPSILON <
+                            e.min_impurity_decrease))
+
+            if update == 1:
+                e.node_id = tree._update_node(
+                    e.parent, e.is_left, e.is_leaf, e.split,
+                    e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
+                    e.split.missing_go_to_left
+                )
+            else:
+                e.node_id = tree._add_node(
+                    e.parent, e.is_left, e.is_leaf, e.split,
+                    e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
+                    e.split.missing_go_to_left
+                )
+
+            if e.node_id == INTPTR_MAX:
+                e.rc = -1
+                break
+
+            # Store value for all nodes, to facilitate tree/model
+            # inspection and interpretation
+            splitter.node_value(tree.value + e.node_id * tree.value_stride)
+            if splitter.with_monotonic_cst:
+                splitter.clip_node_value(
+                    tree.value + e.node_id * tree.value_stride,
+                    e.parent_record.lower_bound,
+                    e.parent_record.upper_bound
+                )
+
+            if not e.is_leaf:
+                if (
+                    not splitter.with_monotonic_cst or
+                    splitter.monotonic_cst[e.split.feature] == 0
+                ):
+                    # Split on a feature with no monotonicity constraint
+
+                    # Current bounds must always be propagated to both children.
+                    # If a monotonic constraint is active, bounds are used in
+                    # node value clipping.
+                    e.left_child_min = e.right_child_min = e.parent_record.lower_bound
+                    e.left_child_max = e.right_child_max = e.parent_record.upper_bound
+                elif splitter.monotonic_cst[e.split.feature] == 1:
+                    # Split on a feature with monotonic increase constraint
+                    e.left_child_min = e.parent_record.lower_bound
+                    e.right_child_max = e.parent_record.upper_bound
+
+                    # Lower bound for right child and upper bound for left child
+                    # are set to the same value.
+                    e.middle_value = splitter.criterion.middle_value()
+                    e.right_child_min = e.middle_value
+                    e.left_child_max = e.middle_value
+                else:  # i.e. splitter.monotonic_cst[e.split.feature] == -1
+                    # Split on a feature with monotonic decrease constraint
+                    e.right_child_min = e.parent_record.lower_bound
+                    e.left_child_max = e.parent_record.upper_bound
+
+                    # Lower bound for left child and upper bound for right child
+                    # are set to the same value.
+                    e.middle_value = splitter.criterion.middle_value()
+                    e.left_child_min = e.middle_value
+                    e.right_child_max = e.middle_value
+
+                # Push right child on stack
+                e.builder_stack.push({
+                    "start": e.split.pos,
+                    "end": e.end,
+                    "depth": e.depth + 1,
+                    "parent": e.node_id,
+                    "is_left": 0,
+                    "impurity": e.split.impurity_right,
+                    "n_constant_features": e.parent_record.n_constant_features,
+                    "lower_bound": e.right_child_min,
+                    "upper_bound": e.right_child_max,
+                })
+
+                # Push left child on stack
+                e.builder_stack.push({
+                    "start": e.start,
+                    "end": e.split.pos,
+                    "depth": e.depth + 1,
+                    "parent": e.node_id,
+                    "is_left": 1,
+                    "impurity": e.split.impurity_left,
+                    "n_constant_features": e.parent_record.n_constant_features,
+                    "lower_bound": e.left_child_min,
+                    "upper_bound": e.left_child_max,
+                })
+            elif e.store_leaf_values and e.is_leaf:
+                # copy leaf values to leaf_values array
+                splitter.node_samples(tree.value_samples[e.node_id])
+
+            if e.depth > e.max_depth_seen:
+                e.max_depth_seen = e.depth
+    
     cpdef build(
         self,
         Tree tree,
@@ -379,136 +476,9 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
 
         with nogil:
             e.target_stack = &e.update_stack
-            e.add_or_update_node = tree_update_node
-            while not e.target_stack.empty():
-                e.stack_record = e.target_stack.top()
-                e.target_stack.pop()
-
-                e.start = e.stack_record.start
-                e.end = e.stack_record.end
-                e.depth = e.stack_record.depth
-                e.parent = e.stack_record.parent
-                e.is_left = e.stack_record.is_left
-                e.parent_record.impurity = e.stack_record.impurity
-                e.parent_record.n_constant_features = e.stack_record.n_constant_features
-                e.parent_record.lower_bound = e.stack_record.lower_bound
-                e.parent_record.upper_bound = e.stack_record.upper_bound
-
-                e.n_node_samples = e.end - e.start
-                splitter.node_reset(e.start, e.end, &e.weighted_n_node_samples)
-
-                e.is_leaf = (e.depth >= e.max_depth or
-                           e.n_node_samples < e.min_samples_split or
-                           e.n_node_samples < 2 * e.min_samples_leaf or
-                           e.weighted_n_node_samples < 2 * e.min_weight_leaf)
-
-                if e.first:
-                    e.parent_record.impurity = splitter.node_impurity()
-                    e.first = 0
-
-                # impurity == 0 with tolerance due to rounding errors
-                e.is_leaf = e.is_leaf or e.parent_record.impurity <= EPSILON
-
-                if not e.is_leaf:
-                    splitter.node_split(
-                        &e.parent_record,
-                        e.split,
-                    )
-
-                    # If EPSILON=0 in the below comparison, float precision
-                    # issues stop splitting, producing trees that are
-                    # dissimilar to v0.18
-                    e.is_leaf = (e.is_leaf or e.split.pos >= e.end or
-                               (e.split.improvement + EPSILON <
-                                e.min_impurity_decrease))
-
-                e.node_id = e.add_or_update_node(
-                    tree, e.parent, e.is_left, e.is_leaf, e.split,
-                    e.parent_record.impurity, e.n_node_samples, e.weighted_n_node_samples,
-                    e.split.missing_go_to_left
-                )
-
-                if e.node_id == INTPTR_MAX:
-                    e.rc = -1
-                    break
-
-                # Store value for all nodes, to facilitate tree/model
-                # inspection and interpretation
-                splitter.node_value(tree.value + e.node_id * tree.value_stride)
-                if splitter.with_monotonic_cst:
-                    splitter.clip_node_value(
-                        tree.value + e.node_id * tree.value_stride,
-                        e.parent_record.lower_bound,
-                        e.parent_record.upper_bound
-                    )
-
-                if not e.is_leaf:
-                    if (
-                        not splitter.with_monotonic_cst or
-                        splitter.monotonic_cst[e.split.feature] == 0
-                    ):
-                        # Split on a feature with no monotonicity constraint
-
-                        # Current bounds must always be propagated to both children.
-                        # If a monotonic constraint is active, bounds are used in
-                        # node value clipping.
-                        e.left_child_min = e.right_child_min = e.parent_record.lower_bound
-                        e.left_child_max = e.right_child_max = e.parent_record.upper_bound
-                    elif splitter.monotonic_cst[e.split.feature] == 1:
-                        # Split on a feature with monotonic increase constraint
-                        e.left_child_min = e.parent_record.lower_bound
-                        e.right_child_max = e.parent_record.upper_bound
-
-                        # Lower bound for right child and upper bound for left child
-                        # are set to the same value.
-                        e.middle_value = splitter.criterion.middle_value()
-                        e.right_child_min = e.middle_value
-                        e.left_child_max = e.middle_value
-                    else:  # i.e. splitter.monotonic_cst[e.split.feature] == -1
-                        # Split on a feature with monotonic decrease constraint
-                        e.right_child_min = e.parent_record.lower_bound
-                        e.left_child_max = e.parent_record.upper_bound
-
-                        # Lower bound for left child and upper bound for right child
-                        # are set to the same value.
-                        e.middle_value = splitter.criterion.middle_value()
-                        e.left_child_min = e.middle_value
-                        e.right_child_max = e.middle_value
-
-                    # Push right child on stack
-                    e.builder_stack.push({
-                        "start": e.split.pos,
-                        "end": e.end,
-                        "depth": e.depth + 1,
-                        "parent": e.node_id,
-                        "is_left": 0,
-                        "impurity": e.split.impurity_right,
-                        "n_constant_features": e.parent_record.n_constant_features,
-                        "lower_bound": e.right_child_min,
-                        "upper_bound": e.right_child_max,
-                    })
-
-                    # Push left child on stack
-                    e.builder_stack.push({
-                        "start": e.start,
-                        "end": e.split.pos,
-                        "depth": e.depth + 1,
-                        "parent": e.node_id,
-                        "is_left": 1,
-                        "impurity": e.split.impurity_left,
-                        "n_constant_features": e.parent_record.n_constant_features,
-                        "lower_bound": e.left_child_min,
-                        "upper_bound": e.left_child_max,
-                    })
-                elif e.store_leaf_values and e.is_leaf:
-                    # copy leaf values to leaf_values array
-                    splitter.node_samples(tree.value_samples[e.node_id])
-
-                if e.depth > e.max_depth_seen:
-                    e.max_depth_seen = e.depth
+            self._build_body(tree, splitter, &e, 1)
 
             e.target_stack = &e.builder_stack
-            e.add_or_update_node = tree_add_node
             while not e.target_stack.empty():
                 e.stack_record = e.target_stack.top()
                 e.target_stack.pop()
@@ -551,8 +521,8 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
                                (e.split.improvement + EPSILON <
                                 e.min_impurity_decrease))
 
-                e.node_id = e.add_or_update_node(
-                    tree, e.parent, e.is_left, e.is_leaf, e.split,
+                e.node_id = tree._add_node(
+                    e.parent, e.is_left, e.is_leaf, e.split,
                     e.parent_record.impurity, e.n_node_samples,
                     e.weighted_n_node_samples, e.split.missing_go_to_left
                 )

From f2256580d2482e607f40a938f3569f20cec95e95 Mon Sep 17 00:00:00 2001
From: scarliles <scarlil1@jhu.edu>
Date: Thu, 30 May 2024 11:53:46 -0400
Subject: [PATCH 29/29] update node refactor more baby steps

---
 sklearn/tree/_tree.pyx | 127 +----------------------------------------
 1 file changed, 1 insertion(+), 126 deletions(-)

diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index 5dff8ed049921..6e5ad54848b3c 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -479,132 +479,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
             self._build_body(tree, splitter, &e, 1)
 
             e.target_stack = &e.builder_stack
-            while not e.target_stack.empty():
-                e.stack_record = e.target_stack.top()
-                e.target_stack.pop()
-
-                e.start = e.stack_record.start
-                e.end = e.stack_record.end
-                e.depth = e.stack_record.depth
-                e.parent = e.stack_record.parent
-                e.is_left = e.stack_record.is_left
-                e.parent_record.impurity = e.stack_record.impurity
-                e.parent_record.n_constant_features = e.stack_record.n_constant_features
-                e.parent_record.lower_bound = e.stack_record.lower_bound
-                e.parent_record.upper_bound = e.stack_record.upper_bound
-
-                e.n_node_samples = e.end - e.start
-                splitter.node_reset(e.start, e.end, &e.weighted_n_node_samples)
-
-                e.is_leaf = (e.depth >= e.max_depth or
-                           e.n_node_samples < e.min_samples_split or
-                           e.n_node_samples < 2 * e.min_samples_leaf or
-                           e.weighted_n_node_samples < 2 * e.min_weight_leaf)
-
-                if e.first:
-                    e.parent_record.impurity = splitter.node_impurity()
-                    e.first=0
-
-                # impurity == 0 with tolerance due to rounding errors
-                e.is_leaf = e.is_leaf or e.parent_record.impurity <= EPSILON
-
-                if not e.is_leaf:
-                    splitter.node_split(
-                        &e.parent_record,
-                        e.split,
-                    )
-
-                    # If EPSILON=0 in the below comparison, float precision
-                    # issues stop splitting, producing trees that are
-                    # dissimilar to v0.18
-                    e.is_leaf = (e.is_leaf or e.split.pos >= e.end or
-                               (e.split.improvement + EPSILON <
-                                e.min_impurity_decrease))
-
-                e.node_id = tree._add_node(
-                    e.parent, e.is_left, e.is_leaf, e.split,
-                    e.parent_record.impurity, e.n_node_samples,
-                    e.weighted_n_node_samples, e.split.missing_go_to_left
-                )
-
-                if e.node_id == INTPTR_MAX:
-                    e.rc = -1
-                    break
-
-                # Store value for all nodes, to facilitate tree/model
-                # inspection and interpretation
-                splitter.node_value(tree.value + e.node_id * tree.value_stride)
-                if splitter.with_monotonic_cst:
-                    splitter.clip_node_value(
-                        tree.value + e.node_id * tree.value_stride,
-                        e.parent_record.lower_bound,
-                        e.parent_record.upper_bound
-                    )
-
-                if not e.is_leaf:
-                    if (
-                        not splitter.with_monotonic_cst or
-                        splitter.monotonic_cst[e.split.feature] == 0
-                    ):
-                        # Split on a feature with no monotonicity constraint
-
-                        # Current bounds must always be propagated to both children.
-                        # If a monotonic constraint is active, bounds are used in
-                        # node value clipping.
-                        e.left_child_min = e.right_child_min = e.parent_record.lower_bound
-                        e.left_child_max = e.right_child_max = e.parent_record.upper_bound
-                    elif splitter.monotonic_cst[e.split.feature] == 1:
-                        # Split on a feature with monotonic increase constraint
-                        e.left_child_min = e.parent_record.lower_bound
-                        e.right_child_max = e.parent_record.upper_bound
-
-                        # Lower bound for right child and upper bound for left child
-                        # are set to the same value.
-                        e.middle_value = splitter.criterion.middle_value()
-                        e.right_child_min = e.middle_value
-                        e.left_child_max = e.middle_value
-                    else:  # i.e. splitter.monotonic_cst[e.split.feature] == -1
-                        # Split on a feature with monotonic decrease constraint
-                        e.right_child_min = e.parent_record.lower_bound
-                        e.left_child_max = e.parent_record.upper_bound
-
-                        # Lower bound for left child and upper bound for right child
-                        # are set to the same value.
-                        e.middle_value = splitter.criterion.middle_value()
-                        e.left_child_min = e.middle_value
-                        e.right_child_max = e.middle_value
-
-                    # Push right child on stack
-                    e.builder_stack.push({
-                        "start": e.split.pos,
-                        "end": e.end,
-                        "depth": e.depth + 1,
-                        "parent": e.node_id,
-                        "is_left": 0,
-                        "impurity": e.split.impurity_right,
-                        "n_constant_features": e.parent_record.n_constant_features,
-                        "lower_bound": e.right_child_min,
-                        "upper_bound": e.right_child_max,
-                    })
-
-                    # Push left child on stack
-                    e.builder_stack.push({
-                        "start": e.start,
-                        "end": e.split.pos,
-                        "depth": e.depth + 1,
-                        "parent": e.node_id,
-                        "is_left": 1,
-                        "impurity": e.split.impurity_left,
-                        "n_constant_features": e.parent_record.n_constant_features,
-                        "lower_bound": e.left_child_min,
-                        "upper_bound": e.left_child_max,
-                    })
-                elif e.store_leaf_values and e.is_leaf:
-                    # copy leaf values to leaf_values array
-                    splitter.node_samples(tree.value_samples[e.node_id])
-
-                if e.depth > e.max_depth_seen:
-                    e.max_depth_seen = e.depth
+            self._build_body(tree, splitter, &e, 0)
 
             if e.rc >= 0:
                 e.rc = tree._resize_c(tree.node_count)