U
    3dV8                     @   sF   d dl Z d dlZddlmZ ddlmZ ddlmZ G dd dZdS )    N   )check_matplotlib_support)check_random_state)_safe_indexingc                   @   sd   e Zd ZdZdd ZdddddddZedd	ddddd
ddZedd	ddddd
ddZdS )PredictionErrorDisplaya  Visualization of the prediction error of a regression model.

    This tool can display "residuals vs predicted" or "actual vs predicted"
    using scatter plots to qualitatively assess the behavior of a regressor,
    preferably on held-out data points.

    See the details in the docstrings of
    :func:`~sklearn.metrics.PredictionErrorDisplay.from_estimator` or
    :func:`~sklearn.metrics.PredictionErrorDisplay.from_predictions` to
    create a visualizer. All parameters are stored as attributes.

    For general information regarding `scikit-learn` visualization tools, read
    more in the :ref:`Visualization Guide <visualizations>`.
    For details regarding interpreting these plots, refer to the
    :ref:`Model Evaluation Guide <visualization_regression_evaluation>`.

    .. versionadded:: 1.2

    Parameters
    ----------
    y_true : ndarray of shape (n_samples,)
        True values.

    y_pred : ndarray of shape (n_samples,)
        Prediction values.

    Attributes
    ----------
    line_ : matplotlib Artist
        Optimal line representing `y_true == y_pred`. Therefore, it is a
        diagonal line for `kind="predictions"` and a horizontal line for
        `kind="residuals"`.

    errors_lines_ : matplotlib Artist or None
        Residual lines. If `with_errors=False`, then it is set to `None`.

    scatter_ : matplotlib Artist
        Scatter data points.

    ax_ : matplotlib Axes
        Axes with the different matplotlib axis.

    figure_ : matplotlib Figure
        Figure containing the scatter and lines.

    See Also
    --------
    PredictionErrorDisplay.from_estimator : Prediction error visualization
        given an estimator and some data.
    PredictionErrorDisplay.from_predictions : Prediction error visualization
        given the true and predicted targets.

    Examples
    --------
    >>> import matplotlib.pyplot as plt
    >>> from sklearn.datasets import load_diabetes
    >>> from sklearn.linear_model import Ridge
    >>> from sklearn.metrics import PredictionErrorDisplay
    >>> X, y = load_diabetes(return_X_y=True)
    >>> ridge = Ridge().fit(X, y)
    >>> y_pred = ridge.predict(X)
    >>> display = PredictionErrorDisplay(y_true=y, y_pred=y_pred)
    >>> display.plot()
    <...>
    >>> plt.show()
    c                C   s   || _ || _d S )Ny_truey_pred)selfr   r	    r   D/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/metrics/_plot/regression.py__init__N   s    zPredictionErrorDisplay.__init__Nresidual_vs_predicted)kindscatter_kwargsline_kwargsc                C   s  t | jj d d}||kr:tdd| d|dddlm} |dkrRi }|dkr^i }d	d
d}dddd}||}||}|dkr| \}	}|dkrXtt	| j
t	| j}
tt	| j
t	| j}|j||
g||
gf|d | _| j| j
 }}d\}}|j||f|| _|jddd |t	j||
dd |t	j||
dd nR|jt	| jt	| jgddgf|d | _|j| j| j
| j f|| _d\}}|j||d || _|j| _| S )a  Plot visualization.

        Extra keyword arguments will be passed to matplotlib's ``plot``.

        Parameters
        ----------
        ax : matplotlib axes, default=None
            Axes object to plot on. If `None`, a new figure and axes is
            created.

        kind : {"actual_vs_predicted", "residual_vs_predicted"},                 default="residual_vs_predicted"
            The type of plot to draw:

            - "actual_vs_predicted" draws the the observed values (y-axis) vs.
              the predicted values (x-axis).
            - "residual_vs_predicted" draws the residuals, i.e difference
              between observed and predicted values, (y-axis) vs. the predicted
              values (x-axis).

        scatter_kwargs : dict, default=None
            Dictionary with keywords passed to the `matplotlib.pyplot.scatter`
            call.

        line_kwargs : dict, default=None
            Dictionary with keyword passed to the `matplotlib.pyplot.plot`
            call to draw the optimal line.

        Returns
        -------
        display : :class:`~sklearn.metrics.plot.PredictionErrorDisplay`
            Object that stores computed values.
        z.plot)actual_vs_predictedr   z`kind` must be one of z, z. Got z	 instead.r   Nztab:blueg?)coloralphaZblackgffffff?z--)r   r   Z	linestyler   )Predicted valueszActual valuesequalZdatalim)Z
adjustable   )num)r   zResiduals (actual - predicted))xlabelylabel)r   	__class____name__
ValueErrorjoinZmatplotlib.pyplotZpyplotZsubplotsmaxnpr   r	   minplotZline_ZscatterZscatter_Z
set_aspectZ
set_xticksZlinspaceZ
set_ytickssetZax_figureZfigure_)r
   axr   r   r   Zexpected_kindZpltZdefault_scatter_kwargsZdefault_line_kwargs_Z	max_valueZ	min_valueZx_dataZy_datar   r   r   r   r   r"   R   sj    )

  
zPredictionErrorDisplay.ploti  )r   	subsamplerandom_stater%   r   r   c             
   C   s4   t | j d ||}
| j||
||||||	dS )a8  Plot the prediction error given a regressor and some data.

        For general information regarding `scikit-learn` visualization tools,
        read more in the :ref:`Visualization Guide <visualizations>`.
        For details regarding interpreting these plots, refer to the
        :ref:`Model Evaluation Guide <visualization_regression_evaluation>`.

        .. versionadded:: 1.2

        Parameters
        ----------
        estimator : estimator instance
            Fitted regressor or a fitted :class:`~sklearn.pipeline.Pipeline`
            in which the last estimator is a regressor.

        X : {array-like, sparse matrix} of shape (n_samples, n_features)
            Input values.

        y : array-like of shape (n_samples,)
            Target values.

        kind : {"actual_vs_predicted", "residual_vs_predicted"},                 default="residual_vs_predicted"
            The type of plot to draw:

            - "actual_vs_predicted" draws the the observed values (y-axis) vs.
              the predicted values (x-axis).
            - "residual_vs_predicted" draws the residuals, i.e difference
              between observed and predicted values, (y-axis) vs. the predicted
              values (x-axis).

        subsample : float, int or None, default=1_000
            Sampling the samples to be shown on the scatter plot. If `float`,
            it should be between 0 and 1 and represents the proportion of the
            original dataset. If `int`, it represents the number of samples
            display on the scatter plot. If `None`, no subsampling will be
            applied. by default, a 1000 samples or less will be displayed.

        random_state : int or RandomState, default=None
            Controls the randomness when `subsample` is not `None`.
            See :term:`Glossary <random_state>` for details.

        ax : matplotlib axes, default=None
            Axes object to plot on. If `None`, a new figure and axes is
            created.

        scatter_kwargs : dict, default=None
            Dictionary with keywords passed to the `matplotlib.pyplot.scatter`
            call.

        line_kwargs : dict, default=None
            Dictionary with keyword passed to the `matplotlib.pyplot.plot`
            call to draw the optimal line.

        Returns
        -------
        display : :class:`~sklearn.metrics.PredictionErrorDisplay`
            Object that stores the computed values.

        See Also
        --------
        PredictionErrorDisplay : Prediction error visualization for regression.
        PredictionErrorDisplay.from_predictions : Prediction error visualization
            given the true and predicted targets.

        Examples
        --------
        >>> import matplotlib.pyplot as plt
        >>> from sklearn.datasets import load_diabetes
        >>> from sklearn.linear_model import Ridge
        >>> from sklearn.metrics import PredictionErrorDisplay
        >>> X, y = load_diabetes(return_X_y=True)
        >>> ridge = Ridge().fit(X, y)
        >>> disp = PredictionErrorDisplay.from_estimator(ridge, X, y)
        >>> plt.show()
        z.from_estimator)r   r	   r   r'   r(   r%   r   r   )r   r   Zpredictfrom_predictions)clsZ	estimatorXyr   r'   r(   r%   r   r   r	   r   r   r   from_estimator   s    Z
z%PredictionErrorDisplay.from_estimatorc                C   s   t | j d t|}t|}	t|tjrF|dkr~td| dn8t|tjr~|dksb|dkrrtd| dt	|	| }|dk	r||	k r|j
t|	|d	}
t||
dd
}t||
dd
}t||d}|j||||dS )a  Plot the prediction error given the true and predicted targets.

        For general information regarding `scikit-learn` visualization tools,
        read more in the :ref:`Visualization Guide <visualizations>`.
        For details regarding interpreting these plots, refer to the
        :ref:`Model Evaluation Guide <visualization_regression_evaluation>`.

        .. versionadded:: 1.2

        Parameters
        ----------
        y_true : array-like of shape (n_samples,)
            True target values.

        y_pred : array-like of shape (n_samples,)
            Predicted target values.

        kind : {"actual_vs_predicted", "residual_vs_predicted"},                 default="residual_vs_predicted"
            The type of plot to draw:

            - "actual_vs_predicted" draws the the observed values (y-axis) vs.
              the predicted values (x-axis).
            - "residual_vs_predicted" draws the residuals, i.e difference
              between observed and predicted values, (y-axis) vs. the predicted
              values (x-axis).

        subsample : float, int or None, default=1_000
            Sampling the samples to be shown on the scatter plot. If `float`,
            it should be between 0 and 1 and represents the proportion of the
            original dataset. If `int`, it represents the number of samples
            display on the scatter plot. If `None`, no subsampling will be
            applied. by default, a 1000 samples or less will be displayed.

        random_state : int or RandomState, default=None
            Controls the randomness when `subsample` is not `None`.
            See :term:`Glossary <random_state>` for details.

        ax : matplotlib axes, default=None
            Axes object to plot on. If `None`, a new figure and axes is
            created.

        scatter_kwargs : dict, default=None
            Dictionary with keywords passed to the `matplotlib.pyplot.scatter`
            call.

        line_kwargs : dict, default=None
            Dictionary with keyword passed to the `matplotlib.pyplot.plot`
            call to draw the optimal line.

        Returns
        -------
        display : :class:`~sklearn.metrics.PredictionErrorDisplay`
            Object that stores the computed values.

        See Also
        --------
        PredictionErrorDisplay : Prediction error visualization for regression.
        PredictionErrorDisplay.from_estimator : Prediction error visualization
            given an estimator and some data.

        Examples
        --------
        >>> import matplotlib.pyplot as plt
        >>> from sklearn.datasets import load_diabetes
        >>> from sklearn.linear_model import Ridge
        >>> from sklearn.metrics import PredictionErrorDisplay
        >>> X, y = load_diabetes(return_X_y=True)
        >>> ridge = Ridge().fit(X, y)
        >>> y_pred = ridge.predict(X)
        >>> disp = PredictionErrorDisplay.from_predictions(y_true=y, y_pred=y_pred)
        >>> plt.show()
        z.from_predictionsr   zWhen an integer, subsample=z should be positive.   z!When a floating-point, subsample=z should be in the (0, 1) range.N)size)Zaxisr   )r%   r   r   r   )r   r   r   len
isinstancenumbersIntegralr   Realintchoicer    Zaranger   r   r"   )r*   r   r	   r   r'   r(   r%   r   r   Z	n_samplesindicesZvizr   r   r   r)     s8    V

z'PredictionErrorDisplay.from_predictions)N)	r   
__module____qualname____doc__r   r"   classmethodr-   r)   r   r   r   r   r   
   s0   C dhr   )r2   Znumpyr    utilsr   r   r   r   r   r   r   r   <module>   s
   