@@ -1381,13 +1381,19 @@ class SMOTEN(SMOTE):
13811381 """
13821382
13831383 def _check_X_y (self , X , y ):
1384+ """Check should accept strings and not sparse matrices."""
13841385 y , binarize_y = check_target_type (y , indicate_one_vs_all = True )
13851386 X , y = self ._validate_data (
1386- X , y , reset = True , dtype = None , accept_sparse = ["csr" , "csc" ]
1387+ X ,
1388+ y ,
1389+ reset = True ,
1390+ dtype = None ,
1391+ accept_sparse = False ,
13871392 )
13881393 return X , y , binarize_y
13891394
13901395 def _validate_estimator (self ):
1396+ """Force to use precomputed distance matrix."""
13911397 super ()._validate_estimator ()
13921398 self .nn_k_ .set_params (metric = "precomputed" )
13931399
@@ -1427,7 +1433,8 @@ def _fit_resample(self, X, y):
14271433
14281434 X_class_dist = vdm .pairwise (X_class )
14291435 self .nn_k_ .fit (X_class_dist )
1430- # should countain the point itself
1436+ # the kneigbors search will include the sample itself which is
1437+ # expected from the original algorithm
14311438 nn_indices = self .nn_k_ .kneighbors (X_class_dist , return_distance = False )
14321439 X_new , y_new = self ._make_samples (
14331440 X_class , class_sample , y .dtype , nn_indices , n_samples
@@ -1437,10 +1444,7 @@ def _fit_resample(self, X, y):
14371444 X_resampled .append (X_new )
14381445 y_resampled .append (y_new )
14391446
1440- if sparse .issparse (X ):
1441- X_resampled = sparse .vstack (X_resampled , format = X .format )
1442- else :
1443- X_resampled = np .vstack (X_resampled )
1447+ X_resampled = np .vstack (X_resampled )
14441448 y_resampled = np .hstack (y_resampled )
14451449
14461450 return X_resampled , y_resampled
0 commit comments