U
    2‰d'  ã                   @   sÊ   d dl Zd dlmZmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZ d dlmZ d!dd„Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd „ ZdS )"é    N)ÚoptimizeÚsparse)Úassert_almost_equal)Úassert_array_equal)Úassert_array_almost_equal)Úmake_regression)ÚHuberRegressorÚLinearRegressionÚSGDRegressorÚRidge©Ú_huber_loss_and_gradienté2   é   c                 C   sj   t j d¡}t| |ddd\}}td|  ƒ}| d| |¡}d| dd||jd f¡ ||d d …f< ||fS )Nr   gš™™™™™©?)Ú	n_samplesÚ
n_featuresÚrandom_stateÚnoiseçš™™™™™¹?ç       @é   )ÚnpÚrandomÚRandomStater   ÚintÚrandintÚnormalÚshape)r   r   ÚrngÚXÚyZ	num_noiseZrandom_samples© r!   úI/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/linear_model/tests/test_huber.pyÚmake_regression_with_outliers   s       ÿ
(r#   c                  C   sX   t ƒ \} }tƒ }| | |¡ tddd}| | |¡ t|j|jdƒ t|j|jdƒ d S )Ng     @@ç        )ÚepsilonÚalphaé   é   )r#   r	   Úfitr   r   Úcoef_Ú
intercept_)r   r    ÚlrÚhuberr!   r!   r"   Ú%test_huber_equals_lr_for_high_epsilon   s    
r.   c                  C   s4   t ƒ \} }tdd}| | |¡ |j|jks0t‚d S )Nr   )Úmax_iter)r#   r   r)   Ún_iter_r/   ÚAssertionError)r   r    r-   r!   r!   r"   Útest_huber_max_iter)   s    

r2   c            
      C   s®   t j d¡} tƒ \}}|  dd|jd ¡}dd„ }dd„ }tdƒD ]f}|jd d |jd d	 fD ]D}|  |¡}t  |d
 ¡|d
< t	 
|||||dd|¡}	t|	ddƒ qbqBd S )Nr   r'   r   c                 W   s   t | f|žŽ d S )Nr   r   ©ÚxÚargsr!   r!   r"   Ú	loss_func6   s    z&test_huber_gradient.<locals>.loss_funcc                 W   s   t | f|žŽ d S )Nr   r   r3   r!   r!   r"   Ú	grad_func9   s    z&test_huber_gradient.<locals>.grad_funcé   r(   éÿÿÿÿç{®Gáz„?r   gíµ ÷Æ°>é   )r   r   r   r#   r   r   ÚrangeZrandnÚabsr   Z
check_gradr   )
r   r   r    Úsample_weightr6   r7   Ú_r   ÚwZ	grad_samer!   r!   r"   Útest_huber_gradient0   s(    
 
       ÿrA   c               	   C   s”  t ƒ \} }tƒ }| | |¡ |j}|j}tt t |j¡¡t t |j¡¡ƒ}|j| |t 	|j
d ¡d t|j| || ƒ t|j| || ƒ t ddd\} }t | t | d | d | d f¡f¡}t ||d g|d g|d gf¡}| ||¡ |j}|j}t 	| j
d ¡}d|d< d|d< |j| ||d t|j| || ƒ t|j| || ƒ t | ¡}	tƒ }
|
j|	||d t|
j| || ƒ d S )	Nr   )r>   r8   r   ©r   r   r   r'   r(   )r#   r   r)   r*   r+   Úmaxr   Zmeanr=   Zonesr   r   ZvstackZconcatenater   Ú
csr_matrix)r   r    r-   Z
huber_coefZhuber_interceptZscaleZX_newZy_newr>   ÚX_csrÚhuber_sparser!   r!   r"   Útest_huber_sample_weightsH   s2    
&&$
rG   c                  C   s`   t ƒ \} }tdd}| | |¡ t | ¡}tdd}| ||¡ t|j|jƒ t|j|jƒ d S )Nr   ©r&   )	r#   r   r)   r   rD   r   r*   r   Ú	outliers_)r   r    r-   rE   rF   r!   r!   r"   Útest_huber_sparsep   s    



rJ   c                  C   s~   t ƒ \} }tddd}| | |¡ |j}t |¡r6t‚| | d| ¡ |j}t||ƒ | d|  d| ¡ |j}t||ƒ d S )NFr$   )Úfit_interceptr&   r   )r#   r   r)   rI   r   Úallr1   r   )r   r    r-   Zn_outliers_mask_1Zn_outliers_mask_2Zn_outliers_mask_3r!   r!   r"   Útest_huber_scaling_invariant|   s    

rM   c               
   C   s   t ddd\} }tdddd}| | |¡ | |j }||j }| ||¡ t|jdd	ƒ tdd
dddddd d}| ||¡ t|j|jdƒ d S )Né
   r(   rB   Fr$   gš™™™™™õ?)rK   r&   r%   ç      ð?r'   r-   Tr   é'  )r&   ZlossÚshuffler   r/   rK   r%   Útolr   )r#   r   r)   Úscale_r   r
   r   r*   )r   r    r-   ZX_scaleZy_scaleZsgdregr!   r!   r"   Útest_huber_and_sgd_same_results   s&    

ø
rT   c                  C   s\   t ƒ \} }tddddd}| | |¡ |j ¡ }| | |¡ t|j|dƒ |jdksXt‚d S )NrO   rP   Tr   )r&   r/   Z
warm_startrR   r   r   )r#   r   r)   r*   Úcopyr   r0   r1   )r   r    Z
huber_warmZhuber_warm_coefr!   r!   r"   Útest_huber_warm_start©   s    

rV   c            
      C   sØ   t ƒ \} }tdd}| | |¡ t | |j¡|j | }t |¡|j|j	 k }| 
| | || ¡}| 
| |  ||  ¡}tdd}| | |¡ | 
| | || ¡}| 
| |  ||  ¡}	||ksÈt‚|	|ksÔt‚d S )Nr:   rH   )r#   r   r)   r   Údotr*   r+   r=   r%   rS   Zscorer   r1   )
r   r    r-   Zlinear_lossÚmaskZhuber_scoreZhuber_outlier_scoreZridgeZridge_scoreZridge_outlier_scorer!   r!   r"   Útest_huber_better_r2_score¸   s    


rY   c                  C   s.   t ddddd\} }| dk}tƒ  ||¡ d S )NéÈ   r(   g      @r   )r   r   r   r   )r   r   r)   )r   r    ZX_boolr!   r!   r"   Útest_huber_boolÏ   s    r[   )r   r   )Znumpyr   Zscipyr   r   Zsklearn.utils._testingr   r   r   Zsklearn.datasetsr   Zsklearn.linear_modelr   r	   r
   r   Zsklearn.linear_model._huberr   r#   r.   r2   rA   rG   rJ   rM   rT   rV   rY   r[   r!   r!   r!   r"   Ú<module>   s$   
(