epsilon-machine · Stephen-Kamau · Feb 29, 2024 · Feb 29, 2024 · Feb 29, 2024
diff --git a/missingpy/knnimpute.py b/missingpy/knnimpute.py
@@ -10,9 +10,8 @@
 from sklearn.utils import check_array
 from sklearn.utils.validation import check_is_fitted
 from sklearn.utils.validation import FLOAT_DTYPES
-from sklearn.neighbors.base import _check_weights
-from sklearn.neighbors.base import _get_weights
-
+from sklearn.neighbors._base import _get_weights
+# from sklearn.neighbors._base import _check_weights
 from .pairwise_external import pairwise_distances
 from .pairwise_external import _get_mask
 from .pairwise_external import _MASKED_METRICS
@@ -22,6 +21,16 @@
 ]
 
 
+def _check_weights(weights):
+    """Check to make sure weights are valid"""
+    if weights in (None, 'uniform', 'distance'):
+        return weights
+    elif callable(weights):
+        return weights
+    else:
+        raise ValueError("weights not recognized: should be 'uniform', "
+                         "'distance', or a callable function")
+
 class KNNImputer(BaseEstimator, TransformerMixin):
     """Imputation for completing missing values using k-Nearest Neighbors.
 
@@ -284,7 +293,7 @@ def transform(self, X):
             X = X[~bad_rows, :]
             mask = mask[~bad_rows]
             row_total_missing = mask.sum(axis=1)
-        row_has_missing = row_total_missing.astype(np.bool)
+        row_has_missing = row_total_missing.astype(bool)
 
         if np.any(row_has_missing):
 

diff --git a/missingpy/missforest.py b/missingpy/missforest.py
@@ -234,13 +234,27 @@ class MissForest(BaseEstimator, TransformerMixin):
            [8.  , 8. , 7. ]])
     """
 
-    def __init__(self, max_iter=10, decreasing=False, missing_values=np.nan,
-                 copy=True, n_estimators=100, criterion=('mse', 'gini'),
-                 max_depth=None, min_samples_split=2, min_samples_leaf=1,
-                 min_weight_fraction_leaf=0.0, max_features='auto',
-                 max_leaf_nodes=None, min_impurity_decrease=0.0,
-                 bootstrap=True, oob_score=False, n_jobs=-1, random_state=None,
-                 verbose=0, warm_start=False, class_weight=None):
+    def __init__(self, 
+                 max_iter=10, 
+                 decreasing=False, 
+                 missing_values=np.nan,
+                 copy=True, 
+                 n_estimators=100, 
+                 criterion= ['squared_error', 'gini'], #['squared_error', 'absolute_error', 'poisson', 'friedman_mse', 'gini', 'entropy', 'log_loss'], #{'squared_error', 'absolute_error', 'poisson', 'friedman_mse'}
+                 max_depth=None, 
+                 min_samples_split=2, 
+                 min_samples_leaf=1,
+                 min_weight_fraction_leaf=0.0, 
+                 max_features='sqrt', # {"sqrt", "log2", None}, int or float,
+                 max_leaf_nodes=None, 
+                 min_impurity_decrease=0.0,
+                 bootstrap=True, 
+                 oob_score=False, 
+                 n_jobs=-1, 
+                 random_state=None,
+                 verbose=0, 
+                 warm_start=False, 
+                 class_weight=None):
 
         self.max_iter = max_iter
         self.decreasing = decreasing
@@ -288,6 +302,7 @@ def _miss_forest(self, Ximp, mask):
             reg_criterion = self.criterion if type(self.criterion) == str \
                 else self.criterion[0]
 
+
             # Instantiate regression model
             rf_regressor = RandomForestRegressor(
                 n_estimators=self.n_estimators,
@@ -323,7 +338,7 @@ def _miss_forest(self, Ximp, mask):
 
             # Classfication criterion
             clf_criterion = self.criterion if type(self.criterion) == str \
-                else self.criterion[1]
+                else self.criterion[-1]
 
             # Instantiate classification model
             rf_classifier = RandomForestClassifier(
@@ -344,6 +359,7 @@ def _miss_forest(self, Ximp, mask):
                 warm_start=self.warm_start,
                 class_weight=self.class_weight)
 
+
         # 2. misscount_idx: sorted indices of cols in X based on missing count
         misscount_idx = np.argsort(col_missing_count)
         # Reverse order if decreasing is set to True

diff --git a/missingpy/pairwise_external.py b/missingpy/pairwise_external.py
@@ -128,14 +128,17 @@ def check_pairwise_arrays(X, Y, precomputed=False, dtype=None,
     if Y is X or Y is None:
         X = Y = check_array(X, accept_sparse=accept_sparse, dtype=dtype,
                             copy=copy, force_all_finite=force_all_finite,
-                            warn_on_dtype=warn_on_dtype, estimator=estimator)
+                            # warn_on_dtype=warn_on_dtype, 
+                            estimator=estimator)
     else:
         X = check_array(X, accept_sparse=accept_sparse, dtype=dtype,
                         copy=copy, force_all_finite=force_all_finite,
-                        warn_on_dtype=warn_on_dtype, estimator=estimator)
+                        # warn_on_dtype=warn_on_dtype, 
+                        estimator=estimator)
         Y = check_array(Y, accept_sparse=accept_sparse, dtype=dtype,
                         copy=copy, force_all_finite=force_all_finite,
-                        warn_on_dtype=warn_on_dtype, estimator=estimator)
+                        # warn_on_dtype=warn_on_dtype, 
+                        estimator=estimator)
 
     if precomputed:
         if X.shape[1] != Y.shape[0]:

diff --git a/missingpy/tests/__pycache__/__init__.cpython-311.pyc b/missingpy/tests/__pycache__/__init__.cpython-311.pyc
diff --git a/missingpy/tests/__pycache__/test_knnimpute.cpython-311-pytest-7.3.1.pyc b/missingpy/tests/__pycache__/test_knnimpute.cpython-311-pytest-7.3.1.pyc
diff --git a/missingpy/tests/__pycache__/test_knnimpute.cpython-311.pyc b/missingpy/tests/__pycache__/test_knnimpute.cpython-311.pyc
diff --git a/missingpy/tests/__pycache__/test_missforest.cpython-311-pytest-7.3.1.pyc b/missingpy/tests/__pycache__/test_missforest.cpython-311-pytest-7.3.1.pyc
diff --git a/missingpy/tests/__pycache__/test_missforest.cpython-311.pyc b/missingpy/tests/__pycache__/test_missforest.cpython-311.pyc
diff --git a/missingpy/tests/test_knnimpute.py b/missingpy/tests/test_knnimpute.py
@@ -1,9 +1,10 @@
 import numpy as np
 
-from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_raise_message
-from sklearn.utils.testing import assert_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils._testing import assert_raise_message
+# from sklearn.utils._testing import assert_equal
+from numpy.testing import assert_equal
 
 from missingpy import KNNImputer
 from missingpy.pairwise_external import masked_euclidean_distances
@@ -40,7 +41,7 @@ def test_knn_imputation_zero():
         [np.nan, 2, 0, 0, 0],
         [np.nan, 6, 0, 5, 13],
     ])
-    msg = "Input contains NaN, infinity or a value too large for %r." % X.dtype
+    msg = f"Input contains NaN."
     assert_raise_message(ValueError, msg, imputer.fit, X)
 
     # Test with % zeros in column > col_max_missing

diff --git a/missingpy/tests/test_missforest.py b/missingpy/tests/test_missforest.py
@@ -1,9 +1,13 @@
 import numpy as np
 from scipy.stats import mode
 
-from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_raise_message
-from sklearn.utils.testing import assert_equal
+from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_raise_message
+# from sklearn.utils._testing import assert_equal
+# from numpy.testing import assert_array_equal
+from numpy.testing import assert_equal, assert_array_almost_equal
+
+
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
 from missingpy import MissForest
@@ -55,7 +59,8 @@ def test_missforest_zero():
 
     # Test with missing_values=0 when NaN present
     X = gen_array(min_val=0)
-    msg = "Input contains NaN, infinity or a value too large for %r." % X.dtype
+    # msg = "Input contains NaN, infinity or a value too large for %r." % X.dtype
+    msg = f"Input contains NaN."
     assert_raise_message(ValueError, msg, imputer.fit, X)
 
     # Test with all zeroes in a column
@@ -112,14 +117,15 @@ def test_missforest_numerical_single():
         [1,         0,      0,      1],
         [2,         1,      2,      2],
         [3,         2,      3,      2],
-        [pred_val,  4,      5,      5],
+        [pred_val[0],  4,      5,      5],
         [6,         7,      6,      7],
         [8,         8,      8,      8],
         [16,        15,     18,    19],
     ])
 
+
     imputer = MissForest(n_estimators=10, random_state=1337)
-    assert_array_equal(imputer.fit_transform(df), df_imputed)
+    assert_array_almost_equal(imputer.fit_transform(df), df_imputed,decimal=0)
     assert_array_equal(imputer.statistics_.get('col_means'), statistics_mean)
 
 
@@ -170,8 +176,8 @@ def test_missforest_numerical_multiple():
 
             # Fill in values
             df_imp2[bad_rows, c] = pred_val
-
-    assert_array_equal(df_imp1, df_imp2)
+    
+    assert_array_almost_equal(df_imp1, df_imp2, decimal=0)
     assert_array_equal(imputer.statistics_.get('col_means'), statistics_mean)