U
    2‰dCw  ã                   @   sL  d dl Z d dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ d dl m!Z!m"Z" d dl#m$Z$m%Z%m&Z& d dl'm(Z( e%ƒ Z)dd„ Z*dd„ Z+dd„ Z,dd„ Z-dd„ Z.dLdd „Z/dMd!d"„Z0dNd#d$„Z1d%d&„ Z2d'd(„ Z3ej4 5d)¡d*d+„ ƒZ6ej4 5d)¡d,d-„ ƒZ7ej4 5d)¡d.d/„ ƒZ8d0d1„ Z9ej4 :d2e;d3ƒ¡d4d5„ ƒZ<ej4 5d)¡d6d7„ ƒZ=ej4 5d)¡d8d9„ ƒZ>d:d;„ Z?ej4 5d)¡d<d=„ ƒZ@ej4 5d)¡d>d?„ ƒZAd@dA„ ZBdBdC„ ZCdDdE„ ZDdFdG„ ZEej4 :dHd dIg¡dJdK„ ƒZFdS )Oé    N)Ú	logsumexp)ÚHalfMultinomialLoss)ÚLinearModelLoss)Úget_auto_step_size)Ú"_multinomial_grad_loss_all_samples)ÚLogisticRegressionÚRidge)Úmake_dataset)Ú	row_norms)Úassert_almost_equal)Úassert_array_almost_equal)Úassert_allclose)Úcompute_class_weight)Úcheck_random_state)ÚLabelEncoderÚLabelBinarizer)Ú
make_blobsÚ	load_irisÚmake_classification)Úclonec                 C   sD   | | }|dkr"t  | ¡|  S |dk r0| S | t  |¡d  S )Ng      2@g      2Àç      ð?)ÚmathÚexp)ÚpÚyÚz© r   úG/tmp/pip-unpacked-wheel-zrfo1fqw/sklearn/linear_model/tests/test_sag.pyÚ	log_dloss"   s    r   c              	   C   s    t  t  dt  | |  ¡ ¡¡S )Nr   )ÚnpÚmeanÚlogr   ©r   r   r   r   r   Úlog_loss,   s    r#   c                 C   s   | | S )Nr   r"   r   r   r   Úsquared_dloss1   s    r$   c                 C   s   t  d| |  | |  ¡S )Nç      à?)r   r    r"   r   r   r   Úsquared_loss5   s    r&   c                 C   s8   |   ¡ } t || ¡}|||ƒ}|||  | ¡ d 7 }|S )Nç       @)Úravelr   Údot)ÚwÚalphaZmyXZmyyÚlossÚpredr   r   r   r   Úget_pobj:   s
    
r.   é   FTc
                 C   sÊ  | j d | j d  }
}t | j d ¡}t | j d ¡}t |
|f¡}d}d}t |
¡}tj d¡}d}tƒ }|rtd}t|ƒD ]B}t|
ƒD ]2}t| d¡|
 ƒ}| | }| 	|¡ t 
||¡| }|||| ƒ}|d k	ræ||| 9 }|| ||  }|||  }||7 }|||< |	r4||| ddt|ƒ   8 }|rª|||  }|||< ||7 }||ddt|ƒ   9 }|	r’||| t|ƒ | | 8 }n||| t|ƒ | 8 }||| t|ƒ 8 }qŠq|||fS )Nr   r/   ç        éM   r   ç{®Gáz„?)Úshaper   ÚzerosÚrandomÚRandomStateÚsetÚrangeÚintÚrandÚaddr)   Úlen)ÚXr   Ú	step_sizer+   Ún_iterÚdlossÚsparseÚsample_weightÚfit_interceptÚsagaÚ	n_samplesÚ
n_featuresÚweightsÚsum_gradientÚgradient_memoryÚ	interceptÚintercept_sum_gradientZintercept_gradient_memoryÚrngÚdecayÚseenÚepochÚkÚidxÚentryr   ÚgradientÚupdateÚgradient_correctionr   r   r   ÚsagB   sP    

þrV   c           "      C   sÖ  || dkrt dƒ‚| jd | jd  }}t |¡}t |¡}tj|td}t |¡}t|
ƒ}d}d}d}d}tƒ }t || ¡}|rŠd}d}t|ƒD ]¼}t|ƒD ]¬}t| d¡| ƒ}| | }| 	|¡ |dkrNt|ƒD ]l}|| dkr||  ||d  ||  8  < n0||  ||d  ||| d   ||  8  < |||< qà|t 
||¡ | }|||| ƒ}|d k	r†||| 9 }|| } | || |  }!||!7 }|	rèt|ƒD ]2}||  |!| | ddt|ƒ   | 8  < q´|rV|||  }!||!7 }|!|ddt|ƒ   9 }!|	r>||| t|ƒ | |! 8 }n||| t|ƒ | 8 }|||< |d||  9 }|dkrŽ||t|ƒ  |d< n ||d  ||t|ƒ   ||< |dkrJ|dk rJt|ƒD ]j}|| dkrú||  || ||  8  < n,||  || ||| d   ||  8  < |d ||< qÊd||< ||9 }d}|d7 }q¤q–t|ƒD ]f}|| dkr’||  ||d  ||  8  < n0||  ||d  ||| d   ||  8  < q^||9 }||fS )	Nr   z:Sparse sag does not handle the case step_size * alpha == 1r   r/   ©Zdtyper0   r2   g•Ö&è.>)ÚZeroDivisionErrorr3   r   r4   r9   r   r7   r8   r:   r;   r)   r<   )"r=   r   r>   r+   r?   r@   rB   rA   rC   rD   Úrandom_staterE   rF   rG   rH   Zlast_updatedrI   rL   rJ   rK   ZwscalerM   rN   Zc_sumÚcounterrO   rP   rQ   rR   Újr   rS   rT   rU   r   r   r   Ú
sag_sparse‚   s°    ÿ




"þ

ÿþýÿ
þ
 þ"þr\   c                 C   sT   |r,dt  t j| |  dd¡| d|   S dt  t j| |  dd¡| |  S d S )Nç      @r/   ©Zaxisr   )r   ÚmaxÚsum)r=   r+   rC   Úclassificationr   r   r   Úget_step_size÷   s    (rb   c                  C   s*  d} t | dddd\}}d||dk< d}d}t|||ƒ}d	D ]è}|d
krNd}nd}t||dd| |  |ddd}| ||¡ t|||||t||dkd\}	}
t|||||t||dkd\}}t |	¡}	t 	|
¡}
t |¡}t 	|¡}t
|	|jdd t
|
|jdd t
||jdd t
||jdd q<d S )Né   é   r   çš™™™™™¹?©rE   ZcentersrY   Zcluster_stdéÿÿÿÿçš™™™™™ñ?T)rV   rD   rV   éP   é,  ç•dyáý¥=r   é
   Úovr©ÚsolverrC   ÚtolÚCÚmax_iterrY   Úmulti_classrD   )r?   r@   rC   rD   é	   ©Údecimal)r   rb   r   Úfitr\   r   rV   r   Z
atleast_2dZ
atleast_1dr   Úcoef_Ú
intercept_)rE   r=   r   r+   rC   r>   ro   r?   ÚclfrG   rJ   Úweights2Ú
intercept2r   r   r   Útest_classifier_matchingþ   s`    
ù	ø

ø





r}   c               	   C   sà   d} d}t j d¡}|j| |fd}|j|d}| |¡}d}d}d}t|||dd}	t|d	d
||  |d}
|
 ||¡ t|||	||t	|d\}}t
|||	||t	|d\}}t||
jƒ t||
jƒ t||
jƒ t||
jƒ d S )Nrl   é   ©Úsizer   éd   TF©ra   rk   rV   )rC   rp   ro   r+   rr   ©r?   r@   rC   )r   r5   r6   Únormalr)   rb   r   rw   r\   r$   rV   r   rx   ry   )rE   rF   rL   r=   Útrue_wr   r+   r?   rC   r>   rz   Zweights1Ú
intercept1r{   r|   r   r   r   Útest_regressor_matching5  sP    
ûù
	ù

r‡   zignore:The max_iter was reachedc               	   C   sð   d} d}d}t | dddd\}}tdd	d
d| |  |ddd}t|ƒ}td	d
d| |  |ddd}| ||¡ | t |¡|¡ | ||¡ t|j|||tƒ}t|j|||tƒ}	t|j|||tƒ}
t	||	dd t	|	|
dd t	|
|dd dS )z%tests if the sag pobj matches log regr   r   rc   rd   r   re   rf   rV   FgH¯¼šò×z>rl   rm   rn   )rC   rp   rq   rr   rY   rs   é   ru   N)
r   r   r   rw   ÚspÚ
csr_matrixr.   rx   r#   r   )rE   r+   rr   r=   r   Úclf1Úclf2Úclf3Úpobj1Úpobj2Úpobj3r   r   r   Ú)test_sag_pobj_matches_logistic_regressione  s>    
ù	
ú	r‘   c                  C   s  d} d}d}d}d}t j d¡}|j| |fd}|j|d}| |¡}t|dd||dd	}	t|	ƒ}
t|d
d||dd	}|	 ||¡ |
 t 	|¡|¡ | ||¡ t
|	j|||tƒ}t
|
j|||tƒ}t
|j|||tƒ}t||dd t||dd t||dd dS )z'tests if the sag pobj matches ridge regr   rl   r   Fr   rk   rV   é*   ©rC   rp   ro   r+   rr   rY   çñhãˆµøä>Zlsqrrˆ   ru   N)r   r5   r6   r„   r)   r   r   rw   r‰   rŠ   r.   rx   r&   r   )rE   rF   r+   r?   rC   rL   r=   r…   r   r‹   rŒ   r   rŽ   r   r   r   r   r   Ú&test_sag_pobj_matches_ridge_regression  sF    
úú	r•   c                  C   s  d} d}d}d}d}d}t j d¡}|j||fd}|j|d}t  ||¡d	 }	t|| |d
d}
t||d| | ||d}t|ƒ}| ||	¡ | t	 
|¡|	¡ t||	|
| |t||d\}}t||	|
| |td||d	\}}t|j ¡ | ¡ dd t|j|dd dS )z0tests if the sag regressor is computed correctlyre   rl   é(   r   gíµ ÷Æ°>Tr   r   r'   Fr‚   rV   r“   )r?   r@   rC   rY   )r?   r@   rA   rC   rY   é   ru   r/   N)r   r5   r6   r„   r)   rb   r   r   rw   r‰   rŠ   r\   r$   r   rx   r(   r   ry   )r+   rF   rE   rr   rp   rC   rL   r=   r*   r   r>   r‹   rŒ   Ú
spweights1Úspintercept1Ú
spweights2Úspintercept2r   r   r   Ú%test_sag_regressor_computed_correctly¹  sZ    úø
÷
rœ   c               
   C   s‚  t jdddgdddgdddggt jd} d}d}d}t| d	d
 ¡ }| jd }t||dd dD ]ð}dD ]æ}|rÞ|| t|ƒ }|d|  t|ƒ d }td| | |ƒ}	td| | |ƒ}
dd| |	  }dd| |
  }n,d|| t|ƒ  }d|d|  t|ƒ  }t	||d|||d}t	||d|||d}t||dd t||dd qhq`d}t
jt|d t	||d|ƒ W 5 Q R X d S )Nr/   rd   r—   rˆ   rW   ç333333ó?Fé   T)Úsquaredr   ru   )TFr]   r   rŸ   )rE   Zis_sagar!   z:Unknown loss function for SAG solver, got wrong instead of©ÚmatchZwrong)r   ÚarrayÚfloat64r
   r_   r3   r   r9   Úminr   ÚpytestÚraisesÚ
ValueError)r=   r+   rC   Zmax_squared_sumZmax_squared_sum_rE   rD   ZL_sqrZL_logZmun_sqrZmun_logZstep_size_sqrZstep_size_logZstep_size_sqr_Zstep_size_log_Úmsgr   r   r   Útest_get_auto_step_sizeö  sR    (
ÿúú	r©   Úseedr—   c                 C   s@  d\}}d}d}d}d}t j | ¡}t  |||¡ |d¡}d| ¡  }	t|d||| |d	}
t|
ƒ}|
 ||	¡ | t	 
|¡|	¡ |
 ||	¡}| ||	¡}|d
ks¦t‚|d
ks²t‚d| ¡  | |d¡ ¡  }	t|d||| d}
t|
ƒ}|
 ||	¡ | t	 
|¡|	¡ |
 ||	¡}| ||	¡}|dks.t‚|dks<t‚dS )z(tests if the sag regressor performs well)éûÿÿÿr~   rj   gü©ñÒMbP?r   re   r/   r%   rV   )rp   ro   rr   r+   rY   g\Âõ(\ï?)rp   ro   rr   r+   çÍÌÌÌÌÌÜ?N)r   r5   r6   ZlinspaceZreshaper(   r   r   rw   r‰   rŠ   ZscoreÚAssertionErrorÚrandn)rª   ZxminZxmaxrE   rp   rr   r+   rL   r=   r   r‹   rŒ   Zscore1Zscore2r   r   r   Útest_sag_regressor(  s>    ûr¯   c               
   C   s.  d} d}d}d}d}t |dddd\}}t|| |dd}t |¡}t |¡}	d	|	||d
 k< |	}tdd|  | ||d|dd}
t|
ƒ}|
 ||¡ | t 	|¡|¡ t
|||| |t|d\}}t
|||| |td|d\}}t|
j ¡ | ¡ dd t|
j|d
d t|j ¡ | ¡ dd t|j|d
d dS )z4tests if the binary classifier is computed correctlyre   é2   r”   Trd   r   rf   r‚   rg   r/   rV   r   r1   rm   ©ro   rq   rr   rp   rY   rC   rs   rƒ   )r?   r@   rA   rC   ru   N)r   rb   r   ÚuniqueÚonesr   r   rw   r‰   rŠ   r\   r   r   rx   r(   r   ry   )r+   rE   r?   rp   rC   r=   r   r>   ÚclassesÚy_tmpr‹   rŒ   Ú	spweightsÚspinterceptrš   r›   r   r   r   Ú&test_sag_classifier_computed_correctlyR  s\    


ù	ù
	ø
r¸   c                  C   sÄ  d} d}d}d}d}t |dddd\}}t|| |dd	}t |¡}td
d|  | ||d|dd}	t|	ƒ}
|	 ||¡ |
 t |¡|¡ g }g }g }g }|D ]x}t 	|¡}d|||k< t
|||| t||d\}}t
|||| t|d|d\}}| |¡ | |¡ | |¡ | |¡ qšt |¡}t |¡}t |¡}t |¡}t|ƒD ]z\}}t|	j|  ¡ ||  ¡ dd t|	j| || dd t|
j|  ¡ ||  ¡ dd t|
j| || dd qDdS )z8tests if the multiclass classifier is computed correctlyre   rc   r”   r–   Tr—   r   rf   r‚   rV   r   r1   rm   r±   rg   )r@   r?   rC   )r@   r?   rA   rC   rd   ru   r/   N)r   rb   r   r²   r   r   rw   r‰   rŠ   r³   r\   r   ÚappendÚvstackr¢   Ú	enumerater   rx   r(   r   ry   )r+   rE   rp   rr   rC   r=   r   r>   r´   r‹   rŒ   Úcoef1r†   Úcoef2r|   ÚclÚ	y_encodedr˜   r™   rš   r›   Úir   r   r   Ú&test_sag_multiclass_computed_correctlyŠ  sv    

ù	
ù
	ø








  rÁ   c                  C   sÆ   d} d}d}d}d}t j d¡}|j||fd}|j|d}t  ||¡}t  |¡}tdd	|  | ||d
d}	t|	ƒ}
|	 ||¡ |
 t	 
|¡|¡ |	 |¡}|
 |¡}t||dd t||dd dS )z(tests if classifier results match targetre   rc   rl   r2   éÈ   r   r   rV   r   r1   )ro   rq   rr   rp   rY   é   ru   N)r   r5   r6   r„   r)   Úsignr   r   rw   r‰   rŠ   Zpredictr   )r+   rF   rE   rp   rr   rL   r=   r*   r   r‹   rŒ   Zpred1Zpred2r   r   r   Útest_classifier_resultsÒ  s0    

û

rÅ   c                  C   sf  d} d}d}d}d}t |dddd\}}t|| |dd	}t |¡}t |¡}	d
|	||d k< |	}dddœ}
tdd|  | ||d|d|
d}t|ƒ}| ||¡ | t 	|¡|¡ t
ƒ }t|
t |¡|d}|| |¡ }t|||| |t||d\}}t|||| |td||d	\}}t|j ¡ | ¡ dd t|j|dd t|j ¡ | ¡ dd t|j|dd dS )z8tests binary classifier with classweights for each classre   r°   rc   r”   Trd   rl   rf   r‚   rg   r/   r¬   çš™™™™™á?)r/   rg   rV   r   r1   rm   ©ro   rq   rr   rp   rY   rC   rs   Úclass_weight©r´   r   )r?   r@   rB   rC   )r?   r@   rA   rB   rC   ru   N)r   rb   r   r²   r³   r   r   rw   r‰   rŠ   r   r   Úfit_transformr\   r   r   rx   r(   r   ry   )r+   rE   r?   rp   rC   r=   r   r>   r´   rµ   rÈ   r‹   rŒ   ÚleÚclass_weight_rB   r¶   r·   rš   r›   r   r   r   Ú#test_binary_classifier_class_weightï  sj    



ø
ø

÷
rÍ   c                  C   sú  d} d}d}d}ddddœ}d	}t |d
ddd\}}t|| |d	d}t |¡}	tdd|  | ||d|d|d}
t|
ƒ}|
 ||¡ | t |¡|¡ t	ƒ }t
|t |¡|d}|| |¡ }g }g }g }g }|	D ]x}t |¡}d|||k< t|||| |t|d\}}t|||| |t|d	d\}}| |¡ | |¡ | |¡ | |¡ qÐt |¡}t |¡}t |¡}t |¡}t|	ƒD ]z\}}t|
j|  ¡ ||  ¡ dd t|
j| || dd t|j|  ¡ ||  ¡ dd t|j| || dd qzdS )z1tests multiclass with classweights for each classre   rc   r”   r°   r¬   rÆ   g      è?)r   r/   rd   Tr—   r   rf   r‚   rV   r   r1   rm   rÇ   rÉ   rg   )r?   r@   rB   )r?   r@   rB   rA   rd   ru   r/   N)r   rb   r   r²   r   r   rw   r‰   rŠ   r   r   rÊ   r³   r\   r   r¹   rº   r¢   r»   r   rx   r(   r   ry   )r+   rE   rp   rr   rÈ   rC   r=   r   r>   r´   r‹   rŒ   rË   rÌ   rB   r¼   r†   r½   r|   r¾   r¿   r˜   r™   rš   r›   rÀ   r   r   r   Ú'test_multiclass_classifier_class_weight.  s€    

ø

ù
	ø








  rÎ   c               	   C   sL   ddgddgg} ddg}d}t jt|d tdd | |¡ W 5 Q R X d	S )
z1tests if ValueError is thrown with only one classr/   rd   r—   rˆ   z;This solver needs samples of at least 2 classes in the datar    rV   )ro   N)r¥   r¦   r§   r   rw   )r=   r   r¨   r   r   r   Útest_classifier_single_classz  s
    rÏ   c               	   C   sš   ddgddgg} ddg}d}d}t  d¡}tdd| |d}tjt|d	 | | |¡ W 5 Q R X t|d|d
}tjt|d	 | | |¡ W 5 Q R X d S )Nr   r/   rg   Fr   zQCurrent sag implementation does not handle the case step_size * alpha_scaled == 1rV   )ro   rq   rC   r    )rC   ro   r+   )ÚreÚescaper   r¥   r¦   rX   rw   r   )r=   r   rC   r+   r¨   r‹   rŒ   r   r   r   Útest_step_size_alpha_error„  s    ÿrÒ   c                  C   sô   t jt j tj¡ } }| j\}}tt |¡ƒ}t	dƒ}| 
||¡}| 
|¡}| 
|¡}t ||¡ t| ||dd\}	}
t|	|||||ƒ\}}tt|ddd}t ||f¡j}|j|| |d|d\}}|d d …d d…f j}t||ƒ t||ƒ d S )	Nr’   ©rY   ©Ú	n_classesT©Z	base_lossrC   r0   ©Zl2_reg_strengthrB   rg   )ÚirisÚdataÚtargetZastyper   r£   r3   r<   r²   r   r®   Úabsr	   r   r   r   rº   ÚTÚloss_gradientr   r   )r=   r   rE   rF   rÕ   rL   rG   rJ   Úsample_weightsZdatasetÚ_Úloss_1Úgrad_1r,   Úweights_interceptÚloss_2Úgrad_2r   r   r   Útest_multinomial_loss—  s@    


     ÿþ    ÿ

rå   c                  C   s¤  d} t  ddgddgddgddgg¡}t jddd	dgt jd
}tƒ }| |¡}t  dddgdddgg¡}t  dddg¡}t  ddddg¡}t  ||¡| }t|dd}	||	d d …t jf  }
|d d …t jf |
 |  ¡  }|d d …t jf t  	|
¡|  }t  |j
|¡}tt| ddd}t  ||f¡j
}|j|||d|d\}}|d d …d d…f j
}t||ƒ t||ƒ d}t  dddgddd gg¡}t||ƒ t||ƒ d S )!Nr—   rh   gš™™™™™@gš™™™™™Àgffffff
@gš™™™™™Àr   r/   rd   rW   re   gš™™™™™É?g333333Ó?r   gÍÌÌÌÌÌô¿r   gš™™™™™É¿gš™™™™™é?r^   rÔ   TrÖ   r0   r×   rg   gc¦>1X\'@g¯>úîÖá¿g6’á
èù¿gÇØ	/Ái@gúbïÅíì¿g›sô@g<FzQkÀ)r   r¢   r£   r   rÊ   r)   r   Znewaxisr`   r   rÜ   r   r   rº   rÝ   r   r   )rÕ   r=   r   ZlbinZY_binrG   rJ   rÞ   Z
predictionZlogsumexp_predictionr   rà   Zdiffrá   r,   râ   rã   rä   Zloss_gtZgrad_gtr   r   r   Ú"test_multinomial_loss_ground_truth¸  sF    "
  þ    ÿ


ÿ
ræ   ro   rD   c              	   C   sn   t j d¡}t|d\}}t| |dd}| ||¡ t j|jd d …< tj	t
dd | ||¡ W 5 Q R X d S )Nr’   rÓ   T)ro   rY   Z
warm_startzFloating-point under-/overflowr    )r   r5   r6   r   r   rw   Únanrx   r¥   r¦   r§   )ro   rL   r=   r   rz   r   r   r   Ú test_sag_classifier_raises_errorá  s    rè   )r/   NFNTF)r/   NNFTFr   )T)Gr   rÐ   r¥   Znumpyr   Zscipy.sparserA   r‰   Zscipy.specialr   Zsklearn._loss.lossr   Z!sklearn.linear_model._linear_lossr   Zsklearn.linear_model._sagr   Zsklearn.linear_model._sag_fastr   Zsklearn.linear_modelr   r   Zsklearn.linear_model._baser	   Zsklearn.utils.extmathr
   Zsklearn.utils._testingr   r   r   Zsklearn.utilsr   r   Zsklearn.preprocessingr   r   Zsklearn.datasetsr   r   r   Zsklearn.baser   rØ   r   r#   r$   r&   r.   rV   r\   rb   r}   r‡   ÚmarkÚfilterwarningsr‘   r•   rœ   r©   Zparametrizer8   r¯   r¸   rÁ   rÅ   rÍ   rÎ   rÏ   rÒ   rå   ræ   rè   r   r   r   r   Ú<module>   sˆ   
      ö
E       õ
u
70

'

+

<2
)

7

G

>

K
!)