U
    ,‰d¸  ã                   @   sÐ   d dl Zd dlmZmZmZ d dlmZmZm	Z	 d dl
mZ dd„ Zdd„ Zd	d
„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd „ ZG d!d"„ d"e	ƒZed#krÌe ¡  dS )$é    N)ÚcudaÚint32Úfloat32)Úskip_on_cudasimÚunittestÚCUDATestCase)ÚENABLE_CUDASIMc                 C   s   t  d¡}t  ¡  || |< d S ©Né   )r   ÚgridÚsyncthreads©ÚaryÚi© r   úE/tmp/pip-unpacked-wheel-eu7e0c37/numba/cuda/tests/cudapy/test_sync.pyÚuseless_syncthreads   s    
r   c                 C   s   t  d¡}t  ¡  || |< d S r	   ©r   r   Úsyncwarpr   r   r   r   Úuseless_syncwarp   s    
r   c                 C   s    t  d¡}t  d¡ || |< d S )Nr
   éÿÿ  r   r   r   r   r   Úuseless_syncwarp_with_mask   s    

r   c                 C   sð   t j dt¡}t  d¡}|||< t  ¡  |dk rR|| ||d   ||< t  d¡ |dk r||| ||d   ||< t  d¡ |dk r¦|| ||d   ||< t  d¡ |d	k rÐ|| ||d	   ||< t  d
¡ |dkrì|d |d  | d< d S )Né    r
   é   r   é   éÿ   é   é   é   é   r   )r   ÚsharedÚarrayr   r   r   )ÚresÚsmr   r   r   r   Úcoop_syncwarp   s$    




r$   c                 C   sR   d}t j |t¡}t  d¡}|dkr:t|ƒD ]}|||< q,t  ¡  || | |< d S )Néd   r
   r   )r   r    r!   r   r   Úranger   )r   ÚNr#   r   Újr   r   r   Úsimple_smem4   s    

r)   c                 C   sT   t  d¡\}}t j dt¡}|d |d  |||f< t  ¡  |||f | ||f< d S )Nr   ©é
   é   r
   ©r   r   r    r!   r   r   )r   r   r(   r#   r   r   r   Úcoop_smem2d?   s
    r.   c                 C   s<   t  d¡}t j dt¡}|d ||< t  ¡  || | |< d S )Nr
   r   r   r-   )r   r   r#   r   r   r   Údyn_shared_memoryG   s
    
r/   c                 C   s,   | d  d7  < t  ¡  | d  d7  < d S ©Nr   é{   iA  )r   Zthreadfence©r   r   r   r   Úuse_threadfenceO   s    r3   c                 C   s,   | d  d7  < t  ¡  | d  d7  < d S r0   )r   Zthreadfence_blockr2   r   r   r   Úuse_threadfence_blockU   s    r4   c                 C   s,   | d  d7  < t  ¡  | d  d7  < d S r0   )r   Zthreadfence_systemr2   r   r   r   Úuse_threadfence_system[   s    r5   c                 C   s    t  d¡}t  | | ¡||< d S r	   )r   r   Zsyncthreads_count©Úary_inÚary_outr   r   r   r   Úuse_syncthreads_counta   s    
r9   c                 C   s    t  d¡}t  | | ¡||< d S r	   )r   r   Zsyncthreads_andr6   r   r   r   Úuse_syncthreads_andf   s    
r:   c                 C   s    t  d¡}t  | | ¡||< d S r	   )r   r   Zsyncthreads_orr6   r   r   r   Úuse_syncthreads_ork   s    
r;   c                 C   s   t rdS t ¡ j| kS d S )NT)r   r   Zget_current_deviceZcompute_capability)Úccr   r   r   Ú_safe_cc_checkp   s    r=   c                   @   s´   e Zd Zdd„ Zdd„ Zedƒdd„ ƒZedƒe e	dƒd	¡d
d„ ƒƒZ
edƒe e	dƒd	¡dd„ ƒƒZdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zd S )!ÚTestCudaSyncc                 C   sT   t  d¡|ƒ}d}tj|tjd}tj|tjd}|d|f |ƒ tj ||¡ d S )Núvoid(int32[::1])r+   ©Zdtyper
   )r   ÚjitÚnpÚemptyr   ÚarangeÚtestingÚassert_equal)ÚselfÚkernelÚcompiledÚnelemr   Úexpr   r   r   Ú_test_uselessx   s    zTestCudaSync._test_uselessc                 C   s   |   t¡ d S ©N)rL   r   ©rG   r   r   r   Útest_useless_syncthreads€   s    z%TestCudaSync.test_useless_syncthreadsz#syncwarp not implemented on cudasimc                 C   s   |   t¡ d S rM   )rL   r   rN   r   r   r   Útest_useless_syncwarpƒ   s    z"TestCudaSync.test_useless_syncwarp)é   r   z'Partial masks require CC 7.0 or greaterc                 C   s   |   t¡ d S rM   )rL   r   rN   r   r   r   Útest_useless_syncwarp_with_mask‡   s    z,TestCudaSync.test_useless_syncwarp_with_maskc                 C   sP   d}d}d}t  d¡tƒ}tjdtjd}|||f |ƒ tj ||d ¡ d S )Nið  r   r
   r?   r@   r   )r   rA   r$   rB   Úzerosr   rE   rF   )rG   ÚexpectedZnthreadsZnblocksrI   r"   r   r   r   Útest_coop_syncwarp   s    zTestCudaSync.test_coop_syncwarpc              	   C   sV   t  d¡tƒ}d}tj|tjd}|d|f |ƒ |  t |tj|tjdk¡¡ d S )Nr?   r%   r@   r
   )	r   rA   r)   rB   rC   r   Ú
assertTrueÚallrD   )rG   rI   rJ   r   r   r   r   Útest_simple_smemœ   s
    zTestCudaSync.test_simple_smemc                 C   s’   t  d¡tƒ}d}tj|tjd}|d|f |ƒ t |¡}t|jd ƒD ]0}t|jd ƒD ]}|d |d  |||f< q\qJ|  	t 
||¡¡ d S )Nzvoid(float32[:,::1])r*   r@   r
   r   )r   rA   r.   rB   rC   r   Z
empty_liker&   ÚshaperV   Zallclose)rG   rI   rY   r   rK   r   r(   r   r   r   Útest_coop_smem2d£   s    
zTestCudaSync.test_coop_smem2dc              
   C   sf   t  d¡tƒ}d}tj|tjd}|d|d|jd f |ƒ |  t |dtj	|jtj
d k¡¡ d S )Nzvoid(float32[::1])é2   r@   r
   r   r   r   )r   rA   r/   rB   rC   r   ÚsizerV   rW   rD   r   )rG   rI   rY   r   r   r   r   Útest_dyn_shared_memory®   s
    z#TestCudaSync.test_dyn_shared_memoryc                 C   sb   t d d … f}t |¡tƒ}tjdtj d}|d |ƒ |  d|d ¡ ts^|  d| 	|¡¡ d S )Nr+   r@   ©r
   r
   é¼  r   z
membar.gl;)
r   r   rA   r3   rB   rS   ÚassertEqualr   ÚassertInÚinspect_asm©rG   ÚsigrI   r   r   r   r   Útest_threadfence_codegenµ   s    z%TestCudaSync.test_threadfence_codegenc                 C   sb   t d d … f}t |¡tƒ}tjdtj d}|d |ƒ |  d|d ¡ ts^|  d| 	|¡¡ d S )Nr+   r@   r^   r_   r   zmembar.cta;)
r   r   rA   r4   rB   rS   r`   r   ra   rb   rc   r   r   r   Útest_threadfence_block_codegen¿   s    z+TestCudaSync.test_threadfence_block_codegenc                 C   sb   t d d … f}t |¡tƒ}tjdtj d}|d |ƒ |  d|d ¡ ts^|  d| 	|¡¡ d S )Nr+   r@   r^   r_   r   zmembar.sys;)
r   r   rA   r5   rB   rS   r`   r   ra   rb   rc   r   r   r   Útest_threadfence_system_codegenÉ   s    z,TestCudaSync.test_threadfence_system_codegenc                 C   sd   t  d¡tƒ}tjdtjd}tjdtjd}d|d< d|d< |d ||ƒ |  t |dk¡¡ d S )	Núvoid(int32[:], int32[:])éH   r@   r   é   é*   )r
   ri   éF   )	r   rA   r9   rB   Úonesr   rS   rV   rW   )rG   rI   r7   r8   r   r   r   Útest_syncthreads_countÓ   s    z#TestCudaSync.test_syncthreads_countc                 C   sŠ   t  d¡tƒ}d}tj|tjd}tj|tjd}|d|f ||ƒ |  t |dk¡¡ d|d< |d|f ||ƒ |  t |dk¡¡ d S ©Nrh   r%   r@   r
   r   rj   )	r   rA   r:   rB   rm   r   rS   rV   rW   ©rG   rI   rJ   r7   r8   r   r   r   Útest_syncthreads_andÜ   s    z!TestCudaSync.test_syncthreads_andc                 C   sŠ   t  d¡tƒ}d}tj|tjd}tj|tjd}|d|f ||ƒ |  t |dk¡¡ d|d< |d|f ||ƒ |  t |dk¡¡ d S ro   )r   rA   r;   rB   rS   r   rV   rW   rp   r   r   r   Útest_syncthreads_orç   s    z TestCudaSync.test_syncthreads_orN)Ú__name__Ú
__module__Ú__qualname__rL   rO   r   rP   r   Z
skipUnlessr=   rR   rU   rX   rZ   r]   re   rf   rg   rn   rq   rr   r   r   r   r   r>   w   s.   

ÿ
ÿ


	r>   Ú__main__)ZnumpyrB   Znumbar   r   r   Znumba.cuda.testingr   r   r   Znumba.core.configr   r   r   r   r$   r)   r.   r/   r3   r4   r5   r9   r:   r;   r=   r>   rs   Úmainr   r   r   r   Ú<module>   s(   |