1212# Christos Aridas
1313# Guillaume Lemaitre <[email protected] > 1414# License: BSD
15-
1615from sklearn import pipeline
1716from sklearn .base import clone
18- from sklearn .utils import Bunch , _print_elapsed_time
17+ from sklearn .utils import _print_elapsed_time
1918from sklearn .utils .metaestimators import if_delegate_has_method
2019from sklearn .utils .validation import check_memory
2120
@@ -145,7 +144,8 @@ def _validate_steps(self):
145144 ):
146145 raise TypeError (
147146 "All intermediate steps of the chain should "
148- "be estimators that implement fit and transform or sample."
147+ "be estimators that implement fit and transform or "
148+ "fit_resample."
149149 " '%s' implements both)" % (t )
150150 )
151151
@@ -167,6 +167,21 @@ def _validate_steps(self):
167167 % (estimator , type (estimator ))
168168 )
169169
170+ def _iter (
171+ self , with_final = True , filter_passthrough = True , filter_resample = True
172+ ):
173+ """Generate (idx, (name, trans)) tuples from self.steps.
174+
175+ When `filter_passthrough` is `True`, 'passthrough' and None
176+ transformers are filtered out. When `filter_resample` is `True`,
177+ estimator with a method `fit_resample` are filtered out.
178+ """
179+ it = super ()._iter (with_final , filter_passthrough )
180+ if filter_resample :
181+ return filter (lambda x : not hasattr (x [- 1 ], "fit_resample" ), it )
182+ else :
183+ return it
184+
170185 # Estimator interface
171186
172187 def _fit (self , X , y = None , ** fit_params ):
@@ -175,7 +190,7 @@ def _fit(self, X, y=None, **fit_params):
175190 # Setup the memory
176191 memory = check_memory (self .memory )
177192
178- fit_transform_one_cached = memory .cache (_fit_transform_one )
193+ fit_transform_one_cached = memory .cache (pipeline . _fit_transform_one )
179194 fit_resample_one_cached = memory .cache (_fit_resample_one )
180195
181196 fit_params_steps = {
@@ -194,7 +209,8 @@ def _fit(self, X, y=None, **fit_params):
194209 for (step_idx ,
195210 name ,
196211 transformer ) in self ._iter (with_final = False ,
197- filter_passthrough = False ):
212+ filter_passthrough = False ,
213+ filter_resample = False ):
198214 if (transformer is None or transformer == 'passthrough' ):
199215 with _print_elapsed_time ('Pipeline' ,
200216 self ._log_message (step_idx )):
@@ -208,7 +224,7 @@ def _fit(self, X, y=None, **fit_params):
208224 else :
209225 cloned_transformer = clone (transformer )
210226 elif hasattr (memory , "cachedir" ):
211- # joblib < 0.11
227+ # joblib <= 0.11
212228 if memory .cachedir is None :
213229 # we do not clone when caching is disabled to
214230 # preserve backward compatibility
@@ -354,38 +370,6 @@ def fit_resample(self, X, y=None, **fit_params):
354370 elif hasattr (last_step , "fit_resample" ):
355371 return last_step .fit_resample (Xt , yt , ** fit_params )
356372
357- @if_delegate_has_method (delegate = "_final_estimator" )
358- def predict (self , X , ** predict_params ):
359- """Apply transformers/samplers to the data, and predict with the final
360- estimator
361-
362- Parameters
363- ----------
364- X : iterable
365- Data to predict on. Must fulfill input requirements of first step
366- of the pipeline.
367-
368- **predict_params : dict of string -> object
369- Parameters to the ``predict`` called at the end of all
370- transformations in the pipeline. Note that while this may be
371- used to return uncertainties from some models with return_std
372- or return_cov, uncertainties that are generated by the
373- transformations in the pipeline are not propagated to the
374- final estimator.
375-
376- Returns
377- -------
378- y_pred : array-like
379-
380- """
381- Xt = X
382- for _ , _ , transform in self ._iter (with_final = False ):
383- if hasattr (transform , "fit_resample" ):
384- pass
385- else :
386- Xt = transform .transform (Xt )
387- return self .steps [- 1 ][- 1 ].predict (Xt , ** predict_params )
388-
389373 @if_delegate_has_method (delegate = "_final_estimator" )
390374 def fit_predict (self , X , y = None , ** fit_params ):
391375 """Applies fit_predict of last step in pipeline after transforms.
@@ -419,233 +403,6 @@ def fit_predict(self, X, y=None, **fit_params):
419403 y_pred = self .steps [- 1 ][- 1 ].fit_predict (Xt , yt , ** fit_params )
420404 return y_pred
421405
422- @if_delegate_has_method (delegate = "_final_estimator" )
423- def predict_proba (self , X ):
424- """Apply transformers/samplers, and predict_proba of the final
425- estimator
426-
427- Parameters
428- ----------
429- X : iterable
430- Data to predict on. Must fulfill input requirements of first step
431- of the pipeline.
432-
433- Returns
434- -------
435- y_proba : array-like, shape = [n_samples, n_classes]
436-
437- """
438- Xt = X
439- for _ , _ , transform in self ._iter (with_final = False ):
440- if hasattr (transform , "fit_resample" ):
441- pass
442- else :
443- Xt = transform .transform (Xt )
444- return self .steps [- 1 ][- 1 ].predict_proba (Xt )
445-
446- @if_delegate_has_method (delegate = "_final_estimator" )
447- def score_samples (self , X ):
448- """Apply transforms, and score_samples of the final estimator.
449- Parameters
450- ----------
451- X : iterable
452- Data to predict on. Must fulfill input requirements of first step
453- of the pipeline.
454- Returns
455- -------
456- y_score : ndarray, shape (n_samples,)
457- """
458- Xt = X
459- for _ , _ , transformer in self ._iter (with_final = False ):
460- if hasattr (transformer , "fit_resample" ):
461- pass
462- else :
463- Xt = transformer .transform (Xt )
464- return self .steps [- 1 ][- 1 ].score_samples (Xt )
465-
466- @if_delegate_has_method (delegate = "_final_estimator" )
467- def decision_function (self , X ):
468- """Apply transformers/samplers, and decision_function of the final
469- estimator
470-
471- Parameters
472- ----------
473- X : iterable
474- Data to predict on. Must fulfill input requirements of first step
475- of the pipeline.
476-
477- Returns
478- -------
479- y_score : array-like, shape = [n_samples, n_classes]
480-
481- """
482- Xt = X
483- for _ , _ , transform in self ._iter (with_final = False ):
484- if hasattr (transform , "fit_resample" ):
485- pass
486- else :
487- Xt = transform .transform (Xt )
488- return self .steps [- 1 ][- 1 ].decision_function (Xt )
489-
490- @if_delegate_has_method (delegate = "_final_estimator" )
491- def predict_log_proba (self , X ):
492- """Apply transformers/samplers, and predict_log_proba of the final
493- estimator
494-
495- Parameters
496- ----------
497- X : iterable
498- Data to predict on. Must fulfill input requirements of first step
499- of the pipeline.
500-
501- Returns
502- -------
503- y_score : array-like, shape = [n_samples, n_classes]
504-
505- """
506- Xt = X
507- for _ , _ , transform in self ._iter (with_final = False ):
508- if hasattr (transform , "fit_resample" ):
509- pass
510- else :
511- Xt = transform .transform (Xt )
512- return self .steps [- 1 ][- 1 ].predict_log_proba (Xt )
513-
514- @property
515- def transform (self ):
516- """Apply transformers/samplers, and transform with the final estimator
517-
518- This also works where final estimator is ``None``: all prior
519- transformations are applied.
520-
521- Parameters
522- ----------
523- X : iterable
524- Data to transform. Must fulfill input requirements of first step
525- of the pipeline.
526-
527- Returns
528- -------
529- Xt : array-like, shape = [n_samples, n_transformed_features]
530- """
531- # _final_estimator is None or has transform, otherwise attribute error
532- if self ._final_estimator != "passthrough" :
533- self ._final_estimator .transform
534- return self ._transform
535-
536- def _transform (self , X ):
537- Xt = X
538- for _ , _ , transform in self ._iter ():
539- if hasattr (transform , "fit_resample" ):
540- pass
541- else :
542- Xt = transform .transform (Xt )
543- return Xt
544-
545- @property
546- def inverse_transform (self ):
547- """Apply inverse transformations in reverse order
548-
549- All estimators in the pipeline must support ``inverse_transform``.
550-
551- Parameters
552- ----------
553- Xt : array-like, shape = [n_samples, n_transformed_features]
554- Data samples, where ``n_samples`` is the number of samples and
555- ``n_features`` is the number of features. Must fulfill
556- input requirements of last step of pipeline's
557- ``inverse_transform`` method.
558-
559- Returns
560- -------
561- Xt : array-like, shape = [n_samples, n_features]
562- """
563- # raise AttributeError if necessary for hasattr behaviour
564- for _ , _ , transform in self ._iter ():
565- transform .inverse_transform
566- return self ._inverse_transform
567-
568- def _inverse_transform (self , X ):
569- Xt = X
570- reverse_iter = reversed (list (self ._iter ()))
571- for _ , _ , transform in reverse_iter :
572- if hasattr (transform , "fit_resample" ):
573- pass
574- else :
575- Xt = transform .inverse_transform (Xt )
576- return Xt
577-
578- @if_delegate_has_method (delegate = "_final_estimator" )
579- def score (self , X , y = None , sample_weight = None ):
580- """Apply transformers/samplers, and score with the final estimator
581-
582- Parameters
583- ----------
584- X : iterable
585- Data to predict on. Must fulfill input requirements of first step
586- of the pipeline.
587-
588- y : iterable, default=None
589- Targets used for scoring. Must fulfill label requirements for all
590- steps of the pipeline.
591-
592- sample_weight : array-like, default=None
593- If not None, this argument is passed as ``sample_weight`` keyword
594- argument to the ``score`` method of the final estimator.
595-
596- Returns
597- -------
598- score : float
599- """
600- Xt = X
601- for _ , _ , transform in self ._iter (with_final = False ):
602- if hasattr (transform , "fit_resample" ):
603- pass
604- else :
605- Xt = transform .transform (Xt )
606- score_params = {}
607- if sample_weight is not None :
608- score_params ["sample_weight" ] = sample_weight
609- return self .steps [- 1 ][- 1 ].score (Xt , y , ** score_params )
610-
611- @if_delegate_has_method (delegate = '_final_estimator' )
612- def score_samples (self , X ):
613- """Apply transforms, and score_samples of the final estimator.
614- Parameters
615- ----------
616- X : iterable
617- Data to predict on. Must fulfill input requirements of first step
618- of the pipeline.
619- Returns
620- -------
621- y_score : ndarray, shape (n_samples,)
622- """
623- Xt = X
624- for _ , _ , transformer in self ._iter (with_final = False ):
625- if hasattr (transformer , "fit_resample" ):
626- pass
627- else :
628- Xt = transformer .transform (Xt )
629- return self .steps [- 1 ][- 1 ].score_samples (Xt )
630-
631-
632- def _fit_transform_one (transformer ,
633- X ,
634- y ,
635- weight ,
636- message_clsname = '' ,
637- message = None ,
638- ** fit_params ):
639- with _print_elapsed_time (message_clsname , message ):
640- if hasattr (transformer , "fit_transform" ):
641- res = transformer .fit_transform (X , y , ** fit_params )
642- else :
643- res = transformer .fit (X , y , ** fit_params ).transform (X )
644- # if we have a weight for this transformer, multiply output
645- if weight is None :
646- return res , transformer
647- return res * weight , transformer
648-
649406
650407def _fit_resample_one (sampler ,
651408 X ,
0 commit comments