U
    3dD                     @   s  d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlZddl	Z	ddl
Z
ddlZddlZddlmZ ddlZddlZddlmZ d	d
lmZ d	dlmZmZ d	dlmZ ddlmZ d	dlmZ d	dlmZ d	dl m!Z!m"Z" d	dl#m$Z$ d	dl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0 ddlm1Z1 d	dl2m3Z3 ej4Z4ej5Z5ddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-gZ6e
7 d.kZ8d/e9d0 d1kZ:d2d3 Z;d4d5 Z<d6d7 Z=d8d9 Z>d:d; Z?d<d= Z@dmd?d@ZAddAdBdCZBdddDdEdFZCdGdH ZDd>ddddIdJd'ZEdddKdLd(ZFd>dMdNdOZGdPdQ ZHddRdSdTZIddUdVdWZJdXdY ZKdZd[ ZLd\d# ZMd]d^ ZNednd_d`ZOdddadbdcZPddde ZQdfdg ZRdhdi ZSdjd) ZTdkdl ZUdS )oz=
The :mod:`sklearn.utils` module includes various utilities.
    )Sequence)contextmanager)compress)isliceN)suppress)issparse   )murmurhash3_32)compute_class_weightcompute_sample_weight)_joblib   )DataConversionWarning)
deprecated)all_estimators)parse_versionthreadpool_info)estimator_html_repr)as_float_arrayassert_all_finitecheck_random_statecolumn_or_1dcheck_arraycheck_consistent_length	check_X_y	indexablecheck_symmetriccheck_scalar_is_arraylike_not_scalar)
get_config)Bunchr	   r   r   r   r   r
   r   r   r   r   r   r   r   indices_to_maskr   parallel_backendregister_parallel_backendresampleshufflecheck_matplotlib_supportr   r   r   r    PyPy   P    c                  C   s   ddl } ddl}t }tdd |D }|s0dS td}|D ]V}|d dkrNq<|d	}|d
}|dksr|dkrx dS |dkr<t||k r< dS q<dS )z8Return True if in an unstable configuration for OpenBLASr   Nc                 s   s   | ]}|d  dkV  qdS )internal_apiopenblasN ).0infor-   r-   :/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/utils/__init__.py	<genexpr>\   s     z6_in_unstable_openblas_configuration.<locals>.<genexpr>Fz0.3.16r+   r,   versionarchitectureTZ
neoversen1)numpyscipyr   anyr   get)r4   r5   Zmodules_infoZopen_blas_usedZopenblas_arm64_stable_versionr/   Zopenblas_versionZopenblas_architecturer-   r-   r0   #_in_unstable_openblas_configurationS   s(    


r8   c                 C   sD   t |}t |jt jr|S t| dr@t |jd }|| }|S )a  Return a mask which is safe to use on X.

    Parameters
    ----------
    X : {array-like, sparse matrix}
        Data on which to apply mask.

    mask : ndarray
        Mask to be used on X.

    Returns
    -------
    mask : ndarray
        Array that is safe to use on X.
    Ztoarrayr   )npasarrayZ
issubdtypedtypeZsignedintegerhasattrarangeshape)Xmaskindr-   r-   r0   	safe_maskt   s    

rB   c                 C   s4   |dkr| t | |ddf S tjd| jd fdS )a  Return a mask which is safer to use on X than safe_mask.

    This mask is safer than safe_mask since it returns an
    empty array, when a sparse matrix is sliced with a boolean mask
    with all False, instead of raising an unhelpful error in older
    versions of SciPy.

    See: https://github.com/scipy/scipy/issues/5361

    Also note that we can avoid doing the dot product by checking if
    the len_mask is not zero in _huber_loss_and_gradient but this
    is not going to be the bottleneck, since the number of outliers
    and non_outliers are typically non-zero and it makes the code
    tougher to follow.

    Parameters
    ----------
    X : {array-like, sparse matrix}
        Data on which to apply mask.

    mask : ndarray
        Mask to be used on X.

    len_mask : int
        The length of the mask.

    Returns
    -------
    mask : ndarray
        Array that is safe to use on X.
    r   Nr   )r>   )rB   r9   zerosr>   )r?   r@   Zlen_maskr-   r-   r0   axis0_safe_slice   s     rD   c                 C   sL   t | r|dkrt|}t|tr,t|}|dkr<| | S | dd|f S )zAIndex an array or scipy.sparse consistently across NumPy version.boolr   N)r   r9   r:   
isinstancetuplelist)arraykey	key_dtypeaxisr-   r-   r0   _array_indexing   s
    

rM   c                 C   sp   t |rt|}|dkr<t|ts<t|s<| j||dS |dkrJ| jn| j}|rd|dd|f S || S dS )z%Index a pandas dataframe or a series.intrL   N)	r   r9   r:   rF   sliceisscalarZtakeilocloc)r?   rJ   rK   rL   Zindexerr-   r-   r0   _pandas_indexing   s    
rT   c                    sD   t |st|tr | S |dkr2tt |S  fdd|D S )zIndex a Python list.rE   c                    s   g | ]} | qS r-   r-   )r.   idxr?   r-   r0   
<listcomp>   s     z"_list_indexing.<locals>.<listcomp>)r9   rQ   rF   rP   rH   r   )r?   rJ   rK   r-   rV   r0   _list_indexing   s
    rX   Tc           	      C   s|  d}t dtdtdtjdi}ddddddd}| dkr8dS t| t| rvz|t|  W S  t	k
rt   t
|Y nX t| tr|std| jdkr| jdkrdS t| j}t| j}|dk	r|dk	r||krt
||dk	r|S |S t| ttfr6t| }dd	 |D }|sdS t|d
kr.t
|| S t| drpz|| jj W S  t	k
rn   t
|Y nX t
|dS )as  Determine the data type of key.

    Parameters
    ----------
    key : scalar, slice or array-like
        The key from which we want to infer the data type.

    accept_slice : bool, default=True
        Whether or not to raise an error if the key is a slice.

    Returns
    -------
    dtype : {'int', 'str', 'bool', None}
        Returns the data type of key.
    ~No valid specification of the columns. Only a scalar, list or slice of all integers or all strings, or boolean mask is allowedrN   strrE   )iubOUSNzBOnly array-like or scalar are supported. A Python slice was given.c                 S   s   h | ]}t |qS r-   )_determine_key_type)r.   eltr-   r-   r0   	<setcomp>  s     z&_determine_key_type.<locals>.<setcomp>r   r;   )rN   rZ   rE   r9   Zbool_rF   rG   keystypeKeyError
ValueErrorrP   	TypeErrorstartstopra   rH   setlenpopr<   r;   kind)	rJ   Zaccept_sliceerr_msgZdtype_to_strZarray_dtype_to_strZkey_start_typeZkey_stop_typeZ
unique_keyZkey_typer-   r-   r0   ra      s\    	


ra   rO   c                C   s   |dkr| S |dkr"t d|t|}|dkrB|dkrBt d|dkrj| jdkrjt d	t| | j|dkr|dkrt| d
st dt| drt| |||dS t| drt| |||dS t| ||S dS )a  Return rows, items or columns of X using indices.

    .. warning::

        This utility is documented, but **private**. This means that
        backward compatibility might be broken without any deprecation
        cycle.

    Parameters
    ----------
    X : array-like, sparse-matrix, list, pandas.DataFrame, pandas.Series
        Data from which to sample rows, items or columns. `list` are only
        supported when `axis=0`.
    indices : bool, int, str, slice, array-like
        - If `axis=0`, boolean and integer array-like, integer slice,
          and scalar integer are supported.
        - If `axis=1`:
            - to select a single column, `indices` can be of `int` type for
              all `X` types and `str` only for dataframe. The selected subset
              will be 1D, unless `X` is a sparse matrix in which case it will
              be 2D.
            - to select multiples columns, `indices` can be one of the
              following: `list`, `array`, `slice`. The type used in
              these containers can be one of the following: `int`, 'bool' and
              `str`. However, `str` is only supported when `X` is a dataframe.
              The selected subset will be 2D.
    axis : int, default=0
        The axis along which `X` will be subsampled. `axis=0` will select
        rows while `axis=1` will select columns.

    Returns
    -------
    subset
        Subset of X on axis 0 or 1.

    Notes
    -----
    CSR, CSC, and LIL sparse matrices are supported. COO sparse matrices are
    not supported.
    N)r   r   zR'axis' should be either 0 (to index rows) or 1 (to index  column). Got {} instead.r   rZ   z.String indexing is not supported with 'axis=0'r   r   z'X' should be a 2D NumPy array, 2D sparse matrix or pandas dataframe when indexing the columns (i.e. 'axis=1'). Got {} instead with {} dimension(s).rS   LSpecifying the columns using strings is only supported for pandas DataFramesrR   rO   r>   )	rg   formatra   ndimre   r<   rT   rM   rX   )r?   indicesrL   Zindices_dtyper-   r-   r0   _safe_indexing  s6    ) 

rt   )row_indexercolumn_indexerc             	   C   sz   |dkrt dddn|}|dkr,t dddn|}t| drjt   tdt || j||f< W 5 Q R X n|| ||f< dS )an  Safe assignment to a numpy array, sparse matrix, or pandas dataframe.

    Parameters
    ----------
    X : {ndarray, sparse-matrix, dataframe}
        Array to be modified. It is expected to be 2-dimensional.

    values : ndarray
        The values to be assigned to `X`.

    row_indexer : array-like, dtype={int, bool}, default=None
        A 1-dimensional array to select the rows of interest. If `None`, all
        rows are selected.

    column_indexer : array-like, dtype={int, bool}, default=None
        A 1-dimensional array to select the columns of interest. If `None`, all
        columns are selected.
    NrR   ignore)rP   r<   warningscatch_warningssimplefilterFutureWarningrR   )r?   valuesru   rv   r-   r-   r0   _safe_assigni  s    

r}   c              
   C   s  | j d }t|}t|ttfr(|s(g S |dkrztt||}W n8 tk
r| } zt	d
|d ||W 5 d}~X Y nX t| S |dkrz
| j}W n tk
r   t	dY nX t|tr|g}nlt|tr4|j|j }}	|dk	 r||}|	dk	r||	d }	n|d }	ttt|||	S t|}zFg }
|D ]8}||}t|tjsrt	d| d|
| qFW n. tk
r } zt	d	|W 5 d}~X Y nX |
S t	d
dS )zGet feature column indices for input data X and key.

    For accepted values of `key`, see the docstring of
    :func:`_safe_indexing_column`.
    r   )rE   rN   z+all features must be in [0, {}] or [-{}, 0]NrZ   rp   zSelected columns, z, are not unique in dataframez/A given column is not a column of the dataframerY   )r>   ra   rF   rH   rG   rt   r9   r=   
IndexErrorrg   rq   Z
atleast_1dtolistcolumnsAttributeErrorrZ   rP   ri   rj   Zget_locr   rangenumbersIntegralappendrf   )r?   rJ   Z	n_columnsrK   rU   eZall_columnsr   ri   rj   Zcolumn_indicescolZcol_idxr-   r-   r0   _get_column_indices  sd    
 








r   )replace	n_samplesrandom_statestratifyc                    s  |}t |}t|dkrdS |d }t|dr8|jd nt|}|dkrN|}n||krj| sjtd||f t|  |dkr| r|jd||fd n t| |	   d|  nt
|ddd}|jdkrtd	d
 |D }tj|dd\}}	|jd }
t|	}ttj|	ddt|dd }t|||}g  t|
D ](}|j|| || | d} | qB|  dd
 |D } fdd
|D }t|dkr|d S |S dS )aP
  Resample arrays or sparse matrices in a consistent way.

    The default strategy implements one step of the bootstrapping
    procedure.

    Parameters
    ----------
    *arrays : sequence of array-like of shape (n_samples,) or             (n_samples, n_outputs)
        Indexable data-structures can be arrays, lists, dataframes or scipy
        sparse matrices with consistent first dimension.

    replace : bool, default=True
        Implements resampling with replacement. If False, this will implement
        (sliced) random permutations.

    n_samples : int, default=None
        Number of samples to generate. If left to None this is
        automatically set to the first dimension of the arrays.
        If replace is False it should not be larger than the length of
        arrays.

    random_state : int, RandomState instance or None, default=None
        Determines random number generation for shuffling
        the data.
        Pass an int for reproducible results across multiple function calls.
        See :term:`Glossary <random_state>`.

    stratify : array-like of shape (n_samples,) or (n_samples, n_outputs),             default=None
        If not None, data is split in a stratified fashion, using this as
        the class labels.

    Returns
    -------
    resampled_arrays : sequence of array-like of shape (n_samples,) or             (n_samples, n_outputs)
        Sequence of resampled copies of the collections. The original arrays
        are not impacted.

    See Also
    --------
    shuffle : Shuffle arrays or sparse matrices in a consistent way.

    Examples
    --------
    It is possible to mix sparse and dense arrays in the same run::

      >>> import numpy as np
      >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])
      >>> y = np.array([0, 1, 2])

      >>> from scipy.sparse import coo_matrix
      >>> X_sparse = coo_matrix(X)

      >>> from sklearn.utils import resample
      >>> X, X_sparse, y = resample(X, X_sparse, y, random_state=0)
      >>> X
      array([[1., 0.],
             [2., 1.],
             [1., 0.]])

      >>> X_sparse
      <3x2 sparse matrix of type '<... 'numpy.float64'>'
          with 4 stored elements in Compressed Sparse Row format>

      >>> X_sparse.toarray()
      array([[1., 0.],
             [2., 1.],
             [1., 0.]])

      >>> y
      array([0, 1, 0])

      >>> resample(y, n_samples=2, random_state=0)
      array([0, 1])

    Example using stratification::

      >>> y = [0, 0, 1, 1, 1, 1, 1, 1, 1]
      >>> resample(y, n_samples=5, replace=False, stratify=y,
      ...          random_state=0)
      [1, 1, 1, 0, 1]
    r   Nr>   z@Cannot sample %d out of arrays with dim %d when replace is False)sizeF)	ensure_2dr;   r   c                 S   s   g | ]}d  |dqS ) rZ   )joinastype)r.   rowr-   r-   r0   rW   H  s     zresample.<locals>.<listcomp>T)Zreturn_inverseZ	mergesort)rn   )r   c                 S   s    g | ]}t |r| n|qS r-   )r   Ztocsrr.   ar-   r-   r0   rW   `  s     c                    s   g | ]}t | qS r-   )rt   r   rs   r-   r0   rW   a  s     r   )r   rl   r<   r>   rg   r   randintr9   r=   r%   r   rr   rI   uniqueZbincountsplitZargsortZcumsum_approximate_moder   choiceextendZpermutation)r   r   r   r   arraysZmax_n_samplesfirstyclassesZ	y_indicesZ	n_classesclass_countsZclass_indicesZn_ir[   Z	indices_iZresampled_arraysr-   r   r0   r$     sT    U




 
)r   r   c                 G   s   t |d|| dS )a  Shuffle arrays or sparse matrices in a consistent way.

    This is a convenience alias to ``resample(*arrays, replace=False)`` to do
    random permutations of the collections.

    Parameters
    ----------
    *arrays : sequence of indexable data-structures
        Indexable data-structures can be arrays, lists, dataframes or scipy
        sparse matrices with consistent first dimension.

    random_state : int, RandomState instance or None, default=None
        Determines random number generation for shuffling
        the data.
        Pass an int for reproducible results across multiple function calls.
        See :term:`Glossary <random_state>`.

    n_samples : int, default=None
        Number of samples to generate. If left to None this is
        automatically set to the first dimension of the arrays.  It should
        not be larger than the length of arrays.

    Returns
    -------
    shuffled_arrays : sequence of indexable data-structures
        Sequence of shuffled copies of the collections. The original arrays
        are not impacted.

    See Also
    --------
    resample : Resample arrays or sparse matrices in a consistent way.

    Examples
    --------
    It is possible to mix sparse and dense arrays in the same run::

      >>> import numpy as np
      >>> X = np.array([[1., 0.], [2., 1.], [0., 0.]])
      >>> y = np.array([0, 1, 2])

      >>> from scipy.sparse import coo_matrix
      >>> X_sparse = coo_matrix(X)

      >>> from sklearn.utils import shuffle
      >>> X, X_sparse, y = shuffle(X, X_sparse, y, random_state=0)
      >>> X
      array([[0., 0.],
             [2., 1.],
             [1., 0.]])

      >>> X_sparse
      <3x2 sparse matrix of type '<... 'numpy.float64'>'
          with 3 stored elements in Compressed Sparse Row format>

      >>> X_sparse.toarray()
      array([[0., 0.],
             [2., 1.],
             [1., 0.]])

      >>> y
      array([2, 1, 0])

      >>> shuffle(y, n_samples=2, random_state=0)
      array([0, 1])
    F)r   r   r   )r$   )r   r   r   r-   r-   r0   r%   i  s    B   )copyc                C   sR   t | dddgdd} t| r8|r(|  } |  jdC  _n|rF| d } n| dC } | S )a  Element wise squaring of array-likes and sparse matrices.

    Parameters
    ----------
    X : {array-like, ndarray, sparse matrix}

    copy : bool, default=True
        Whether to create a copy of X and operate on it or to perform
        inplace computation (default behaviour).

    Returns
    -------
    X ** 2 : element wise square
         Return the element-wise square of the input.
    ZcsrZcscZcooF)Zaccept_sparser   r   )r   r   r   data)r?   r   r-   r-   r0   safe_sqr  s    
r   c                 c   s$   t t| |}|r|V  q dS q dS )zzChunk generator, ``gen`` into lists of length ``chunksize``. The last
    chunk may have a length less than ``chunksize``.N)rH   r   )gen	chunksizechunkr-   r-   r0   _chunk_generator  s    r   )min_batch_sizec                c   s   t |tjstd| |dkr,td| d}tt| | D ]*}|| }|| | krZq@t||V  |}q@|| k rt|| V  dS )a,  Generator to create slices containing `batch_size` elements from 0 to `n`.

    The last slice may contain less than `batch_size` elements, when
    `batch_size` does not divide `n`.

    Parameters
    ----------
    n : int
        Size of the sequence.
    batch_size : int
        Number of elements in each batch.
    min_batch_size : int, default=0
        Minimum number of elements in each batch.

    Yields
    ------
    slice of `batch_size` elements

    See Also
    --------
    gen_even_slices: Generator to create n_packs slices going up to n.

    Examples
    --------
    >>> from sklearn.utils import gen_batches
    >>> list(gen_batches(7, 3))
    [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]
    >>> list(gen_batches(6, 3))
    [slice(0, 3, None), slice(3, 6, None)]
    >>> list(gen_batches(2, 3))
    [slice(0, 2, None)]
    >>> list(gen_batches(7, 3, min_batch_size=0))
    [slice(0, 3, None), slice(3, 6, None), slice(6, 7, None)]
    >>> list(gen_batches(7, 3, min_batch_size=2))
    [slice(0, 3, None), slice(3, 7, None)]
    z1gen_batches got batch_size=%s, must be an integerr   z/gen_batches got batch_size=%s, must be positiveN)rF   r   r   rh   rg   r   rN   rP   )nZ
batch_sizer   ri   _endr-   r-   r0   gen_batches  s    %r   )r   c                c   sz   d}|dk rt d| t|D ]T}| | }|| | k r@|d7 }|dkr || }|dk	rbt||}t||dV  |}q dS )a  Generator to create `n_packs` evenly spaced slices going up to `n`.

    If `n_packs` does not divide `n`, except for the first `n % n_packs`
    slices, remaining slices may contain fewer elements.

    Parameters
    ----------
    n : int
        Size of the sequence.
    n_packs : int
        Number of slices to generate.
    n_samples : int, default=None
        Number of samples. Pass `n_samples` when the slices are to be used for
        sparse matrix indexing; slicing off-the-end raises an exception, while
        it works for NumPy arrays.

    Yields
    ------
    `slice` representing a set of indices from 0 to n.

    See Also
    --------
    gen_batches: Generator to create slices containing batch_size elements
        from 0 to n.

    Examples
    --------
    >>> from sklearn.utils import gen_even_slices
    >>> list(gen_even_slices(10, 1))
    [slice(0, 10, None)]
    >>> list(gen_even_slices(10, 10))
    [slice(0, 1, None), slice(1, 2, None), ..., slice(9, 10, None)]
    >>> list(gen_even_slices(10, 5))
    [slice(0, 2, None), slice(2, 4, None), ..., slice(8, 10, None)]
    >>> list(gen_even_slices(10, 3))
    [slice(0, 4, None), slice(4, 7, None), slice(7, 10, None)]
    r   r   z+gen_even_slices got n_packs=%s, must be >=1N)rg   r   minrP   )r   Zn_packsr   ri   Zpack_numZthis_nr   r-   r-   r0   gen_even_slices  s    &
r   c                 C   s0   t | tjrt| S t | tr$| S t| S dS )a  Cast iterable x to a Sequence, avoiding a copy if possible.

    Parameters
    ----------
    x : iterable
        The iterable to be converted.

    Returns
    -------
    x : Sequence
        If `x` is a NumPy array, it returns it as a `ndarray`. If `x`
        is a `Sequence`, `x` is returned as-is. If `x` is from any other
        type, `x` is returned casted as a list.
    N)rF   r9   Zndarrayr:   r   rH   xr-   r-   r0   
tosequenceC  s
    

r   c                 C   s"   t jt| td}| |dd< |S )av  Convert sequence to a 1-D NumPy array of object dtype.

    numpy.array constructor has a similar use but it's output
    is ambiguous. It can be 1-D NumPy array of object dtype if
    the input is a ragged array, but if the input is a list of
    equal length arrays, then the output is a 2D numpy.array.
    _to_object_array solves this ambiguity by guarantying that
    the output is a 1-D NumPy array of objects for any input.

    Parameters
    ----------
    sequence : array-like of shape (n_elements,)
        The sequence to be converted.

    Returns
    -------
    out : ndarray of shape (n_elements,), dtype=object
        The converted sequence into a 1-D NumPy array of object dtype.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.utils import _to_object_array
    >>> _to_object_array([np.array([0]), np.array([1])])
    array([array([0]), array([1])], dtype=object)
    >>> _to_object_array([np.array([0]), np.array([1, 2])])
    array([array([0]), array([1, 2])], dtype=object)
    >>> _to_object_array([np.array([0]), np.array([1, 2])])
    array([array([0]), array([1, 2])], dtype=object)
    r;   N)r9   emptyrl   object)sequenceoutr-   r-   r0   _to_object_arrayZ  s    r   c                 C   s0   |t | krtdt j|td}d|| < |S )aS  Convert list of indices to boolean mask.

    Parameters
    ----------
    indices : list-like
        List of integers treated as indices.
    mask_length : int
        Length of boolean mask to be generated.
        This parameter must be greater than max(indices).

    Returns
    -------
    mask : 1d boolean nd-array
        Boolean array that is True where indices are present, else False.

    Examples
    --------
    >>> from sklearn.utils import indices_to_mask
    >>> indices = [1, 2 , 3, 4]
    >>> indices_to_mask(indices, 5)
    array([False,  True,  True,  True,  True])
    z-mask_length must be greater than max(indices)r   T)r9   maxrg   rC   rE   )rs   Zmask_lengthr@   r-   r-   r0   r!   ~  s
    c                 C   sX   d|  }|dkrd|d  }nd| }d||f }dt | t | }d||d |f S )	zCreate one line message for logging purposes.

    Parameters
    ----------
    source : str
        String indicating the source or the reference of the message.

    message : str
        Short message.

    time : int
        Time in seconds.
    z[%s] <   z%4.1fminz %5.1fsz %s, total=%sF   z%s%s%s.)rl   )sourcemessagetimeZstart_messageZtime_strZend_messageZdots_lenr-   r-   r0   _message_with_time  s    r   c                 c   s:   |dkrdV  n&t  }dV  tt| |t  |  dS )aj  Log elapsed time to stdout when the context is exited.

    Parameters
    ----------
    source : str
        String indicating the source or the reference of the message.

    message : str, default=None
        Short message. If None, nothing will be printed.

    Returns
    -------
    context_manager
        Prints elapsed time upon exit if verbose.
    N)timeitZdefault_timerprintr   )r   r   ri   r-   r-   r0   _print_elapsed_time  s
    r   )
max_n_rowsworking_memoryc                C   s`   |dkrt  d }t|d |  }|dk	r4t||}|dk r\td|t| d f  d}|S )a  Calculate how many rows can be processed within `working_memory`.

    Parameters
    ----------
    row_bytes : int
        The expected number of bytes of memory that will be consumed
        during the processing of each row.
    max_n_rows : int, default=None
        The maximum return value.
    working_memory : int or float, default=None
        The number of rows to fit inside this number of MiB will be
        returned. When None (default), the value of
        ``sklearn.get_config()['working_memory']`` is used.

    Returns
    -------
    int
        The number of rows which can be processed within `working_memory`.

    Warns
    -----
    Issues a UserWarning if `row_bytes exceeds `working_memory` MiB.
    Nr   i   r   zOCould not adhere to working_memory config. Currently %.0fMiB, %.0fMiB required.g      >)r   rN   r   rx   warnr9   ceil)Z	row_bytesr   r   Zchunk_n_rowsr-   r-   r0   get_chunk_n_rows  s    

r   c              
   C   s4   t t" ddlm} | |kW  5 Q R  S Q R X dS )a  Test if x is pandas.NA.

    We intentionally do not use this function to return `True` for `pd.NA` in
    `is_scalar_nan`, because estimators that support `pd.NA` are the exception
    rather than the rule at the moment. When `pd.NA` is more universally
    supported, we may reconsider this decision.

    Parameters
    ----------
    x : any type

    Returns
    -------
    boolean
    r   )NAF)r   ImportErrorpandasr   )r   r   r-   r-   r0   _is_pandas_na  s    
r   c                 C   s   t | tjot| S )a}  Test if x is NaN.

    This function is meant to overcome the issue that np.isnan does not allow
    non-numerical types as input, and that np.nan is not float('nan').

    Parameters
    ----------
    x : any type
        Any scalar value.

    Returns
    -------
    bool
        Returns true if x is NaN, and false otherwise.

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.utils import is_scalar_nan
    >>> is_scalar_nan(np.nan)
    True
    >>> is_scalar_nan(float("nan"))
    True
    >>> is_scalar_nan(None)
    False
    >>> is_scalar_nan("")
    False
    >>> is_scalar_nan([np.nan])
    False
    )rF   r   Realmathisnanr   r-   r-   r0   is_scalar_nan  s    r   c                 C   s   t |}| |   | }t|}t||  }|dkr|| }tt|ddd }|D ]V}t||k\}	tt	|	|}
|j
|	|
dd}	||	  d7  < ||
8 }|dkr` qq`|tS )a   Computes approximate mode of multivariate hypergeometric.

    This is an approximation to the mode of the multivariate
    hypergeometric given by class_counts and n_draws.
    It shouldn't be off by more than one.

    It is the mostly likely outcome of drawing n_draws many
    samples from the population given by class_counts.

    Parameters
    ----------
    class_counts : ndarray of int
        Population per class.
    n_draws : int
        Number of draws (samples to draw) from the overall population.
    rng : random state
        Used to break ties.

    Returns
    -------
    sampled_classes : ndarray of int
        Number of samples drawn from each class.
        np.sum(sampled_classes) == n_draws

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.utils import _approximate_mode
    >>> _approximate_mode(class_counts=np.array([4, 2]), n_draws=3, rng=0)
    array([2, 1])
    >>> _approximate_mode(class_counts=np.array([5, 2]), n_draws=4, rng=0)
    array([3, 1])
    >>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),
    ...                   n_draws=2, rng=0)
    array([0, 1, 1, 0])
    >>> _approximate_mode(class_counts=np.array([2, 2, 2, 1]),
    ...                   n_draws=2, rng=42)
    array([1, 1, 0, 0])
    r   Nr   F)r   r   r   )r   sumr9   floorrN   sortr   wherer   rl   r   r   )r   Zn_drawsrngZ
continuousZflooredZneed_to_add	remainderr|   valueZindsZadd_nowr-   r-   r0   r   5  s     (
r   c              
   C   sD   zddl }W n2 tk
r> } ztd| |W 5 d}~X Y nX dS )aK  Raise ImportError with detailed error message if mpl is not installed.

    Plot utilities like any of the Display's plotting functions should lazily import
    matplotlib and call this helper before any computation.

    Parameters
    ----------
    caller_name : str
        The name of the caller that requires matplotlib.
    r   NzP{} requires matplotlib. You can install matplotlib with `pip install matplotlib`)
matplotlibr   rq   )caller_namer   r   r-   r-   r0   r&   z  s    c              
   C   sF   zddl }|W S  tk
r@ } ztd| |W 5 d}~X Y nX dS )as  Raise ImportError with detailed error message if pandas is not installed.

    Plot utilities like :func:`fetch_openml` should lazily import
    pandas and call this helper before any computation.

    Parameters
    ----------
    caller_name : str
        The name of the caller that requires pandas.

    Returns
    -------
    pandas
        The pandas package.
    r   Nz{} requires pandas.)r   r   rq   )r   r   r   r-   r-   r0   check_pandas_support  s
    r   )T)N)V__doc__collections.abcr   
contextlibr   	itertoolsr   r   r   r   platformstructr   r   rx   r4   r9   Zscipy.sparser   Z
murmurhashr	   Zclass_weightr
   r    r   
exceptionsr   deprecationr   Z	discoveryr   fixesr   r   Z_estimator_html_reprr   Z
validationr   r   r   r   r   r   r   r   r   r   r   r   Z_bunchr    r"   r#   __all__python_implementationZIS_PYPYcalcsizeZ	_IS_32BITr8   rB   rD   rM   rT   rX   ra   rt   r}   r   r$   r%   r   r   r   r   r   r   r!   r   r   r   r   r   r   r&   r   r-   r-   r-   r0   <module>   s   4!%	
FL%E G65$ )"E