U
    2d/                     @   s   d Z ddlmZmZ ddlZddlZddlmZ ddl	m
Z
mZmZ ddlmZmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ G dd deee
ZdS )z
Sequential feature selection
    )IntegralRealN   )SelectorMixin   )BaseEstimatorMetaEstimatorMixinclone)
HasMethodsHiddenInterval
StrOptions)
_safe_tags)check_is_fitted)cross_val_score)get_scorer_namesc                	   @   s   e Zd ZU dZedggeddhdhdeedddd	eedd
dd	e	d
gd
eedd
dd	geddhgd
ee
e egdgd
egdZeed< dd
dd
dd
dddZdddZdd Zdd Zdd Zd
S )SequentialFeatureSelectora  Transformer that performs Sequential Feature Selection.

    This Sequential Feature Selector adds (forward selection) or
    removes (backward selection) features to form a feature subset in a
    greedy fashion. At each stage, this estimator chooses the best feature to
    add or remove based on the cross-validation score of an estimator. In
    the case of unsupervised learning, this Sequential Feature Selector
    looks only at the features (X), not the desired outputs (y).

    Read more in the :ref:`User Guide <sequential_feature_selection>`.

    .. versionadded:: 0.24

    Parameters
    ----------
    estimator : estimator instance
        An unfitted estimator.

    n_features_to_select : "auto", int or float, default='warn'
        If `"auto"`, the behaviour depends on the `tol` parameter:

        - if `tol` is not `None`, then features are selected until the score
          improvement does not exceed `tol`.
        - otherwise, half of the features are selected.

        If integer, the parameter is the absolute number of features to select.
        If float between 0 and 1, it is the fraction of features to select.

        .. versionadded:: 1.1
           The option `"auto"` was added in version 1.1.

        .. deprecated:: 1.1
           The default changed from `None` to `"warn"` in 1.1 and will become
           `"auto"` in 1.3. `None` and `'warn'` will be removed in 1.3.
           To keep the same behaviour as `None`, set
           `n_features_to_select="auto" and `tol=None`.

    tol : float, default=None
        If the score is not incremented by at least `tol` between two
        consecutive feature additions or removals, stop adding or removing.
        `tol` is enabled only when `n_features_to_select` is `"auto"`.

        .. versionadded:: 1.1

    direction : {'forward', 'backward'}, default='forward'
        Whether to perform forward selection or backward selection.

    scoring : str or callable, default=None
        A single str (see :ref:`scoring_parameter`) or a callable
        (see :ref:`scoring`) to evaluate the predictions on the test set.

        NOTE that when using a custom scorer, it should return a single
        value.

        If None, the estimator's score method is used.

    cv : int, cross-validation generator or an iterable, default=None
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:

        - None, to use the default 5-fold cross validation,
        - integer, to specify the number of folds in a `(Stratified)KFold`,
        - :term:`CV splitter`,
        - An iterable yielding (train, test) splits as arrays of indices.

        For integer/None inputs, if the estimator is a classifier and ``y`` is
        either binary or multiclass, :class:`StratifiedKFold` is used. In all
        other cases, :class:`KFold` is used. These splitters are instantiated
        with `shuffle=False` so the splits will be the same across calls.

        Refer :ref:`User Guide <cross_validation>` for the various
        cross-validation strategies that can be used here.

    n_jobs : int, default=None
        Number of jobs to run in parallel. When evaluating a new feature to
        add or remove, the cross-validation procedure is parallel over the
        folds.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
        for more details.

    Attributes
    ----------
    n_features_in_ : int
        Number of features seen during :term:`fit`. Only defined if the
        underlying estimator exposes such an attribute when fit.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    n_features_to_select_ : int
        The number of features that were selected.

    support_ : ndarray of shape (n_features,), dtype=bool
        The mask of selected features.

    See Also
    --------
    GenericUnivariateSelect : Univariate feature selector with configurable
        strategy.
    RFE : Recursive feature elimination based on importance weights.
    RFECV : Recursive feature elimination based on importance weights, with
        automatic selection of the number of features.
    SelectFromModel : Feature selection based on thresholds of importance
        weights.

    Examples
    --------
    >>> from sklearn.feature_selection import SequentialFeatureSelector
    >>> from sklearn.neighbors import KNeighborsClassifier
    >>> from sklearn.datasets import load_iris
    >>> X, y = load_iris(return_X_y=True)
    >>> knn = KNeighborsClassifier(n_neighbors=3)
    >>> sfs = SequentialFeatureSelector(knn, n_features_to_select=3)
    >>> sfs.fit(X, y)
    SequentialFeatureSelector(estimator=KNeighborsClassifier(n_neighbors=3),
                              n_features_to_select=3)
    >>> sfs.get_support()
    array([ True, False,  True,  True])
    >>> sfs.transform(X).shape
    (150, 3)
    fitautowarn)
deprecatedr   r   right)closedNZneitherforwardbackwardZ	cv_object	estimatorn_features_to_selecttol	directionscoringcvn_jobs_parameter_constraints   )r   r   r   r    r!   r"   c                C   s.   || _ || _|| _|| _|| _|| _|| _d S Nr   )selfr   r   r   r   r    r!   r"    r'   I/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/feature_selection/_sequential.py__init__   s    z"SequentialFeatureSelector.__init__c                 C   s  |    | jdkrtdt |  }| j|dd|dd d}|jd }d	| j }| jdkr| j	d
k	rtt
d|d | _n~| jdkr| j	d
k	r|d | _q|d | _nRt| jtrd| j  k r|k sn t
|| j| _nt| jtrt|| j | _t| j}tj|td}| jdks.| jdkr4| jn|| j }tj }	| j	d
k	oZ| jdk}
t|D ]B}| ||||\}}|
r||	 | j	k r q|}	d||< qd| jdkr| }|| _| j | _| S )a  Learn the features to select from X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training vectors, where `n_samples` is the number of samples and
            `n_features` is the number of predictors.

        y : array-like of shape (n_samples,), default=None
            Target values. This parameter may be ignored for
            unsupervised learning.

        Returns
        -------
        self : object
            Returns the instance itself.
        )r   Na  Leaving `n_features_to_select` to None is deprecated in 1.0 and will become 'auto' in 1.3. To keep the same behaviour as with None (i.e. select half of the features) and avoid this warning, you should manually set `n_features_to_select='auto'` and set tol=None when creating an instance.Zcscr   	allow_nanT)Zaccept_sparseZensure_min_featuresZforce_all_finiter   zn_features_to_select must be either 'auto', 'warn', None, an integer in [1, n_features - 1] representing the absolute number of features, or a float in (0, 1] representing a percentage of features to select. Got Nz4tol is only enabled if `n_features_to_select='auto'`r   r   )shapeZdtyper   r   )Z_validate_paramsr   warningsr   FutureWarningZ	_get_tagsZ_validate_datagetr+   r   
ValueErrorZn_features_to_select_
isinstancer   r   intr	   r   npzerosboolr   infrange_get_best_new_feature_scoresupport_sum)r&   XytagsZ
n_features	error_msgZcloned_estimatorcurrent_maskZn_iterationsZ	old_scoreZis_auto_select_new_feature_idxZ	new_scorer'   r'   r(   r      sp    









   zSequentialFeatureSelector.fitc           
   	      s   t | }i  |D ]V}| }d||< | jdkr8| }|d d |f }t|||| j| j| jd  |< qt	  fddd}	|	 |	 fS )NTr   )r!   r    r"   c                    s    |  S r%   r'   )feature_idxZscoresr'   r(   <lambda>3      zGSequentialFeatureSelector._get_best_new_feature_score.<locals>.<lambda>key)
r2   Zflatnonzerocopyr   r   r!   r    r"   Zmeanmax)
r&   r   r:   r;   r>   Zcandidate_feature_indicesrA   Zcandidate_maskZX_newr@   r'   rB   r(   r7     s$    
z5SequentialFeatureSelector._get_best_new_feature_scorec                 C   s   t |  | jS r%   )r   r8   r&   r'   r'   r(   _get_support_mask6  s    z+SequentialFeatureSelector._get_support_maskc                 C   s   dt | jddiS )Nr*   rE   )r   r   rI   r'   r'   r(   
_more_tags:  s     z$SequentialFeatureSelector._more_tags)N)__name__
__module____qualname____doc__r
   r   r   r   r   r   setr   callabler#   dict__annotations__r)   r   r7   rJ   rK   r'   r'   r'   r(   r      s2   
 

fr   )rO   Znumbersr   r   Znumpyr2   r,   _baser   baser   r   r	   Zutils._param_validationr
   r   r   r   Zutils._tagsr   Zutils.validationr   Zmodel_selectionr   Zmetricsr   r   r'   r'   r'   r(   <module>   s   