@@ -727,9 +727,11 @@ def _fit_resample(self, X, y):
727727 return X_resampled , y_resampled
728728
729729
730- # @Substitution(
731- # sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
732- # random_state=_random_state_docstring)
730+ @Substitution (
731+ sampling_strategy = BaseOverSampler ._sampling_strategy_docstring ,
732+ n_jobs = _n_jobs_docstring ,
733+ random_state = _random_state_docstring ,
734+ )
733735class SMOTENC (SMOTE ):
734736 """Synthetic Minority Over-sampling Technique for Nominal and Continuous.
735737
@@ -748,64 +750,17 @@ class SMOTENC(SMOTE):
748750 - mask array of shape (n_features, ) and ``bool`` dtype for which
749751 ``True`` indicates the categorical features.
750752
751- sampling_strategy : float, str, dict or callable, default='auto'
752- Sampling information to resample the data set.
753-
754- - When ``float``, it corresponds to the desired ratio of the number of
755- samples in the minority class over the number of samples in the
756- majority class after resampling. Therefore, the ratio is expressed as
757- :math:`\\ alpha_{os} = N_{rm} / N_{M}` where :math:`N_{rm}` is the
758- number of samples in the minority class after resampling and
759- :math:`N_{M}` is the number of samples in the majority class.
760-
761- .. warning::
762- ``float`` is only available for **binary** classification. An
763- error is raised for multi-class classification.
764-
765- - When ``str``, specify the class targeted by the resampling. The
766- number of samples in the different classes will be equalized.
767- Possible choices are:
768-
769- ``'minority'``: resample only the minority class;
770-
771- ``'not minority'``: resample all classes but the minority class;
772-
773- ``'not majority'``: resample all classes but the majority class;
774-
775- ``'all'``: resample all classes;
776-
777- ``'auto'``: equivalent to ``'not majority'``.
778-
779- - When ``dict``, the keys correspond to the targeted classes. The
780- values correspond to the desired number of samples for each targeted
781- class.
782-
783- - When callable, function taking ``y`` and returns a ``dict``. The keys
784- correspond to the targeted classes. The values correspond to the
785- desired number of samples for each class.
786-
787- random_state : int, RandomState instance, default=None
788- Control the randomization of the algorithm.
753+ {sampling_strategy}
789754
790- - If int, ``random_state`` is the seed used by the random number
791- generator;
792- - If ``RandomState`` instance, random_state is the random number
793- generator;
794- - If ``None``, the random number generator is the ``RandomState``
795- instance used by ``np.random``.
755+ {random_state}
796756
797757 k_neighbors : int or object, default=5
798758 If ``int``, number of nearest neighbours to used to construct synthetic
799759 samples. If object, an estimator that inherits from
800760 :class:`~sklearn.neighbors.base.KNeighborsMixin` that will be used to
801761 find the k_neighbors.
802762
803- n_jobs : int, default=None
804- Number of CPU cores used during the cross-validation loop.
805- ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
806- ``-1`` means using all processors. See
807- `Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_
808- for more details.
763+ {n_jobs}
809764
810765 See Also
811766 --------
@@ -846,16 +801,16 @@ class SMOTENC(SMOTE):
846801 >>> X, y = make_classification(n_classes=2, class_sep=2,
847802 ... weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0,
848803 ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
849- >>> print('Original dataset shape (%s, %s)' % X.shape)
804+ >>> print(f 'Original dataset shape {{ X.shape}}' )
850805 Original dataset shape (1000, 20)
851- >>> print(f'Original dataset samples per class {Counter(y)}')
852- Original dataset samples per class Counter({1: 900, 0: 100})
806+ >>> print(f'Original dataset samples per class {{ Counter(y)} }')
807+ Original dataset samples per class Counter({{ 1: 900, 0: 100} })
853808 >>> # simulate the 2 last columns to be categorical features
854809 >>> X[:, -2:] = RandomState(10).randint(0, 4, size=(1000, 2))
855810 >>> sm = SMOTENC(random_state=42, categorical_features=[18, 19])
856811 >>> X_res, y_res = sm.fit_resample(X, y)
857- >>> print(f'Resampled dataset samples per class {Counter(y_res)}')
858- Resampled dataset samples per class Counter({0: 900, 1: 900})
812+ >>> print(f'Resampled dataset samples per class {{ Counter(y_res)} }')
813+ Resampled dataset samples per class Counter({{ 0: 900, 1: 900} })
859814 """
860815
861816 _required_parameters = ["categorical_features" ]
0 commit comments