U
    3dK                     @   s   d dl mZ d dlZddlmZmZ	m
Z ddlmZ dd Zdd	 Zd
d Zdd Zd4ddZddddZdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd5d*d+Zd6d,d-Z d.d/ Z!d0d1 Z"d2d3 Z#dS )7    N   )csr_mean_variance_axis0csc_mean_variance_axis0incr_mean_variance_axis0   )_check_sample_weightc                 C   s,   t | r| jnt| }d| }t|dS )z2Raises a TypeError if X is not a CSR or CSC matrixz,Expected a CSR or CSC sparse matrix, got %s.N)spissparseformattype	TypeError)XZ
input_typeerr r   =/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/utils/sparsefuncs.py_raise_typeerror   s    r   c                 C   s   | dkrt d|  d S )N)r   r   z8Unknown axis value: %d. Use 0 for rows, or 1 for columns)
ValueErroraxisr   r   r   _raise_error_wrong_axis   s    r   c                 C   s6   |j d | j d kst|  j|j| jdd9  _dS )a
  Inplace column scaling of a CSR matrix.

    Scale each feature of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to normalize using the variance of the features.
        It should be of CSR format.

    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Array of precomputed feature-wise values to use for scaling.
    r   r   Zclip)modeN)shapeAssertionErrordataZtakeindicesr   Zscaler   r   r   inplace_csr_column_scale   s    r   c                 C   s:   |j d | j d kst|  jt|t| j9  _dS )a  Inplace row scaling of a CSR matrix.

    Scale each sample of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to be scaled. It should be of CSR format.

    scale : ndarray of float of shape (n_samples,)
        Array of precomputed sample-wise values to use for scaling.
    r   N)r   r   r   nprepeatdiffindptrr   r   r   r   inplace_csr_row_scale2   s    r!   Fc                 C   s|   t | t| tjr<|dkr*t| ||dS t| j||dS n<t| tjrp|dkr^t| ||dS t| j||dS nt|  dS )a4  Compute mean and variance along an axis on a CSR or CSC matrix.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Input data. It can be of CSR or CSC format.

    axis : {0, 1}
        Axis along which the axis should be computed.

    weights : ndarray of shape (n_samples,) or (n_features,), default=None
        If axis is set to 0 shape is (n_samples,) or
        if axis is set to 1 shape is (n_features,).
        If it is set to None, then samples are equally weighted.

        .. versionadded:: 0.24

    return_sum_weights : bool, default=False
        If True, returns the sum of weights seen for each feature
        if `axis=0` or each sample if `axis=1`.

        .. versionadded:: 0.24

    Returns
    -------

    means : ndarray of shape (n_features,), dtype=floating
        Feature-wise means.

    variances : ndarray of shape (n_features,), dtype=floating
        Feature-wise variances.

    sum_weights : ndarray of shape (n_features,), dtype=floating
        Returned if `return_sum_weights` is `True`.
    r   )weightsreturn_sum_weightsN)	r   
isinstancer   
csr_matrix_csr_mean_var_axis0_csc_mean_var_axis0T
csc_matrixr   )r   r   r"   r#   r   r   r   mean_variance_axisD   s4    $        r*   )r"   c                C   s(  t | t| tjtjfs"t|  t|dkrDtj|j	||j
d}t|t|  krjt|kstn td|dkrt|| j	d krtd| j	d  dt| dn6t|| j	d krtd| j	d  dt| d|dkr| jn| } |d	k	rt|| | j
d}t| ||||d
S )a7	  Compute incremental mean and variance along an axis on a CSR or CSC matrix.

    last_mean, last_var are the statistics computed at the last step by this
    function. Both must be initialized to 0-arrays of the proper size, i.e.
    the number of features in X. last_n is the number of samples encountered
    until now.

    Parameters
    ----------
    X : CSR or CSC sparse matrix of shape (n_samples, n_features)
        Input data.

    axis : {0, 1}
        Axis along which the axis should be computed.

    last_mean : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Array of means to update with the new data X.
        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.

    last_var : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Array of variances to update with the new data X.
        Should be of shape (n_features,) if axis=0 or (n_samples,) if axis=1.

    last_n : float or ndarray of shape (n_features,) or (n_samples,),             dtype=floating
        Sum of the weights seen so far, excluding the current weights
        If not float, it should be of shape (n_features,) if
        axis=0 or (n_samples,) if axis=1. If float it corresponds to
        having same weights for all samples (or features).

    weights : ndarray of shape (n_samples,) or (n_features,), default=None
        If axis is set to 0 shape is (n_samples,) or
        if axis is set to 1 shape is (n_features,).
        If it is set to None, then samples are equally weighted.

        .. versionadded:: 0.24

    Returns
    -------
    means : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Updated feature-wise means if axis = 0 or
        sample-wise means if axis = 1.

    variances : ndarray of shape (n_features,) or (n_samples,), dtype=floating
        Updated feature-wise variances if axis = 0 or
        sample-wise variances if axis = 1.

    n : ndarray of shape (n_features,) or (n_samples,), dtype=integral
        Updated number of seen samples per feature if axis=0
        or number of seen features per sample if axis=1.

        If weights is not None, n is a sum of the weights of the seen
        samples or features instead of the actual number of seen
        samples or features.

    Notes
    -----
    NaNs are ignored in the algorithm.
    r   )dtypez8last_mean, last_var, last_n do not have the same shapes.r   zHIf axis=1, then last_mean, last_n, last_var should be of size n_samples z (Got z).zIIf axis=0, then last_mean, last_n, last_var should be of size n_features N)	last_meanlast_varlast_nr"   )r   r$   r   r%   r)   r   r   sizefullr   r+   r   r(   r   _incr_mean_var_axis0)r   r   r,   r-   r.   r"   r   r   r   incr_mean_variance_axis   s4    <(
    r2   c                 C   s>   t | tjrt| j| n t | tjr2t| | nt|  dS )a  Inplace column scaling of a CSC/CSR matrix.

    Scale each feature of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to normalize using the variance of the features. It should be
        of CSC or CSR format.

    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Array of precomputed feature-wise values to use for scaling.
    N)r$   r   r)   r!   r(   r%   r   r   r   r   r   r   inplace_column_scale   s
    r3   c                 C   s>   t | tjrt| j| n t | tjr2t| | nt|  dS )a  Inplace row scaling of a CSR or CSC matrix.

    Scale each row of the data matrix by multiplying with specific scale
    provided by the caller assuming a (n_samples, n_features) shape.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix to be scaled. It should be of CSR or CSC format.

    scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Array of precomputed sample-wise values to use for scaling.
    N)r$   r   r)   r   r(   r%   r!   r   r   r   r   r   inplace_row_scale   s
    r4   c                 C   sv   ||fD ]}t |tjrtdq|dk r8|| jd 7 }|dk rN|| jd 7 }| j|k}|| j| j|k< || j|< dS )aK  Swap two rows of a CSC matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two rows are to be swapped. It should be of
        CSC format.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.
     m and n should be valid integersr   N)r$   r   ndarrayr   r   r   )r   mntZm_maskr   r   r   inplace_swap_row_csc  s    

r:   c              	   C   sx  ||fD ]}t |tjrtdq|dk r8|| jd 7 }|dk rN|| jd 7 }||kr`|| }}| j}|| }||d  }|| }||d  }|| }	|| }
|	|
kr| j|d |  |
|	 7  < ||
 | j|d < ||	 | j|< t| jd| | j|| | j|| | j|| | j|d g| _t| jd| | j|| | j|| | j|| | j|d g| _dS )aK  Swap two rows of a CSR matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two rows are to be swapped. It should be of
        CSR format.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.
    r5   r   r   r   N)	r$   r   r6   r   r   r    Zconcatenater   r   )r   r7   r8   r9   r    Zm_startZm_stopZn_startZn_stopZnz_mZnz_nr   r   r   inplace_swap_row_csr(  sH    

	r;   c                 C   s@   t | tjrt| || n"t | tjr4t| || nt|  dS )a[  
    Swap two rows of a CSC/CSR matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two rows are to be swapped. It should be of CSR or
        CSC format.

    m : int
        Index of the row of X to be swapped.

    n : int
        Index of the row of X to be swapped.
    N)r$   r   r)   r:   r%   r;   r   r   r7   r8   r   r   r   inplace_swap_rowg  s
    r=   c                 C   sl   |dk r|| j d 7 }|dk r,|| j d 7 }t| tjrFt| || n"t| tjr`t| || nt|  dS )ag  
    Swap two columns of a CSC/CSR matrix in-place.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Matrix whose two columns are to be swapped. It should be of
        CSR or CSC format.

    m : int
        Index of the column of X to be swapped.

    n : int
        Index of the column of X to be swapped.
    r   r   N)r   r$   r   r)   r;   r%   r:   r   r<   r   r   r   inplace_swap_column  s    r>   c                 C   sL   t t | j}t| | j| j| jf| jd} || j| j| }||fS )N)r   )	r   Zflatnonzeror   r    r   r   r   r   Zreduceat)r   Zufuncmajor_indexvaluer   r   r   _minor_reduce  s    rA   c                 C   s   | j | }|dkrtd| j d|  }|dkr8|  n|  }|  t||\}}t|j| |k }||| d||< |dk}	t	|	|}t	|	|}|dkrt
j|tt||ff| jd|fd}
n(t
j||tt|ff| j|dfd}
|
j S )Nr   &zero-size array to reduction operationr   )r+   r   )r   r   ZtocscZtocsrZsum_duplicatesrA   r   r   r    compressr   Z
coo_matrixzeroslenr+   Aravel)r   r   
min_or_maxNMmatr?   r@   not_fullmaskresr   r   r   _min_or_max_axis  s0    
    rO   c                 C   s   |d krdd| j krtd| jd}| jdkr4|S || j }| jt	| j kr`|||}|S |dk rt|d7 }|dks|dkrt
| ||S tdd S )Nr   rB   r   r   z.invalid axis, use 0 for rows, or 1 for columns)r   r   r+   r   nnzreducer   rG   r   productrO   )r   r   rH   Zzeror7   r   r   r   _sparse_min_or_max  s    


rS   c                 C   s   t | |tjt | |tjfS N)rS   r   Zminimummaximumr   r   r   r   r   _sparse_min_max  s    rW   c                 C   s   t | |tjt | |tjfS rT   )rS   r   ZfminZfmaxrV   r   r   r   _sparse_nan_min_max  s    rX   c                 C   s<   t | tjtjfr0|r"t| |dS t| |dS nt|  dS )a  Compute minimium and maximum along an axis on a CSR or CSC matrix.

     Optionally ignore NaN values.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Input data. It should be of CSR or CSC format.

    axis : {0, 1}
        Axis along which the axis should be computed.

    ignore_nan : bool, default=False
        Ignore or passing through NaN values.

        .. versionadded:: 0.20

    Returns
    -------

    mins : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Feature-wise minima.

    maxs : ndarray of shape (n_features,), dtype={np.float32, np.float64}
        Feature-wise maxima.
    r   N)r$   r   r%   r)   rX   rW   r   )r   r   Z
ignore_nanr   r   r   min_max_axis  s
    rY   c                 C   s   |dkrd}n(|dkrd}n| j dkr6td | j |dkrb|dkrL| jS tt| j|S n|dkrt| j}|dkr|dS || S |dkr|dkrtj| j	| j
d d	S t|t| j}tj| j	| j
d |d
S ntd |dS )a  A variant of X.getnnz() with extension to weighting on axis 0.

    Useful in efficiently calculating multilabel metrics.

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_labels)
        Input data. It should be of CSR format.

    axis : {0, 1}, default=None
        The axis on which the data is aggregated.

    sample_weight : array-like of shape (n_samples,), default=None
        Weight for each row of X.

    Returns
    -------
    nnz : int, float, ndarray of shape (n_samples,) or ndarray of shape (n_features,)
        Number of non-zero values in the array along a given axis. Otherwise,
        the total number of non-zero values in the array is returned.
    r   r   Zcsrz#Expected CSR sparse format, got {0}NZintp)	minlength)r\   r"   zUnsupported axis: {0})r
   r   rP   r   dotr   r    ZastypeZbincountr   r   r   r   )r   r   Zsample_weightoutr"   r   r   r   count_nonzero  s*    

r_   c                 C   sp   t | | }|stjS t| dk }t|d\}}|   |rLt|| ||S t|d | ||t|| || d S )zCompute the median of data with n_zeros additional zeros.

    This function is used to support sparse matrices; it modifies data
    in-place.
    r   r   r   g       @)rE   r   nanr_   divmodsort_get_elem_at_rank)r   n_zerosZn_elems
n_negativeZmiddleZis_oddr   r   r   _get_median7  s    rf   c                 C   s,   | |k r||  S | | |k r dS || |  S )z@Find the value in data augmented with n_zeros for the given rankr   r   )Zrankr   re   rd   r   r   r   rc   M  s
    rc   c           
      C   s   t | tjstd| j | j}| j\}}t|}t	t
|dd |dd D ]8\}\}}t| j|| }||j }	t||	||< qR|S )aC  Find the median across axis 0 of a CSC matrix.

    It is equivalent to doing np.median(X, axis=0).

    Parameters
    ----------
    X : sparse matrix of shape (n_samples, n_features)
        Input data. It should be of CSC format.

    Returns
    -------
    median : ndarray of shape (n_features,)
        Median.
    z%Expected matrix of CSC format, got %sNrZ   r   )r$   r   r)   r   r
   r    r   r   rD   	enumeratezipcopyr   r/   rf   )
r   r    Z	n_samplesZ
n_featuresZmedianZf_indstartendr   Znzr   r   r   csc_median_axis_0V  s    

*
rl   )NF)F)NN)$Zscipy.sparsesparser   Znumpyr   Zsparsefuncs_fastr   r&   r   r'   r   r1   Zutils.validationr   r   r   r   r!   r*   r2   r3   r4   r:   r;   r=   r>   rA   rO   rS   rW   rX   rY   r_   rf   rc   rl   r   r   r   r   <module>   s2   
<^?
$
6	