U
    ,dp                     @   s  d dl Z d dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZmZmZmZ dd Zdd	 Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Z d&d' Z!d(d) Z"d*d+ Z#d,d- Z$d.d/ Z%d0d1 Z&d2d3 Z'd4d5 Z(d6d7 Z)d8d9 Z*d:d; Z+d<d= Z,ej-d>d?d@dA Z.ej-d>d?dBdC Z/dDdE Z0dFdG Z1dHdI Z2dJdK Z3dLdM Z4dNdO Z5dPdQ Z6dRdS Z7dTdU Z8dVdW Z9dXdY Z:dZd[ Z;d\d] Z<d^d_ Z=d`da Z>dbdc Z?ddde Z@G dfdg dgeZAeBdhkreC  dS )i    N)cudaint64)compile_ptx)f2)unittestCUDATestCaseskip_on_cudasimskip_unless_cc_53c                 C   s   t jj}|| d< d S Nr   r   	threadIdxxaryi r   K/tmp/pip-unpacked-wheel-eu7e0c37/numba/cuda/tests/cudapy/test_intrinsics.pysimple_threadidx   s    r   c                 C   s   t jj}|| |< d S Nr   r   r   r   r   fill_threadidx   s    r   c                 C   s>   t jj}t jj}t jj}|d |d  |d  | |||f< d S N   )r   r   r   yz)r   r   jkr   r   r   fill3d_threadidx   s    r   c                 C   s   t d}|| |< d S r   r   gridr   r   r   r   simple_grid1d   s    
r   c                 C   s"   t d\}}|| | ||f< d S N   r   )r   r   r   r   r   r   simple_grid2d#   s    r"   c                 C   s(   t d}t d}|dkr$|| d< d S )Nr   r   r   r   gridsize)r   r   r   r   r   r   simple_gridsize1d(   s    

r%   c                 C   s@   t d\}}t d\}}|dkr<|dkr<|| d< || d< d S )Nr!   r   r   r#   )r   r   r   r   r   r   r   r   simple_gridsize2d/   s
    r&   c           	      C   sp   t d\}}t jjt jj }t jjt jj }| j\}}t|||D ]&}t|||D ]}|| | ||f< qTqDd S r    )r   r   gridDimr   blockDimr   shaperange)	cstartXstartYgridXgridYheightwidthr   r   r   r   r   intrinsic_forloop_step7   s    
r2   c                 C   s   t || d< d S r
   )r   Zpopcr   r+   r   r   r   simple_popcB   s    r4   c                 C   s   t |||| d< d S r
   )r   fmar   abr+   r   r   r   
simple_fmaF   s    r9   c                 C   s   t j|d |d | d< d S r
   r   fp16Zhaddr   r7   r8   r   r   r   simple_haddJ   s    r=   c                 C   s   t j||| d< d S r
   r:   r<   r   r   r   simple_hadd_scalarN   s    r>   c                 C   s$   t j|d |d |d | d< d S r
   r   r;   Zhfmar6   r   r   r   simple_hfmaR   s    r@   c                 C   s   t j|||| d< d S r
   r?   r6   r   r   r   simple_hfma_scalarV   s    rA   c                 C   s   t j|d |d | d< d S r
   r   r;   Zhsubr<   r   r   r   simple_hsubZ   s    rC   c                 C   s   t j||| d< d S r
   rB   r<   r   r   r   simple_hsub_scalar^   s    rD   c                 C   s   t j|d |d | d< d S r
   r   r;   Zhmulr<   r   r   r   simple_hmulb   s    rF   c                 C   s   t j||| d< d S r
   rE   r<   r   r   r   simple_hmul_scalarf   s    rG   c                 C   s   t j|d | d< d S r
   r   r;   Zhnegr   r7   r   r   r   simple_hnegj   s    rJ   c                 C   s   t j|| d< d S r
   rH   rI   r   r   r   simple_hneg_scalarn   s    rK   c                 C   s   t j|d | d< d S r
   r   r;   ZhabsrI   r   r   r   simple_habsr   s    rM   c                 C   s   t j|| d< d S r
   rL   rI   r   r   r   simple_habs_scalarv   s    rN   c                 C   s   t j||| d< d S r
   )r   r;   Zheqr<   r   r   r   simple_heq_scalarz   s    rO   c                 C   s   t j||| d< d S r
   )r   r;   hner<   r   r   r   simple_hne_scalar~   s    rQ   c                 C   s   t j||| d< d S r
   )r   r;   hger<   r   r   r   simple_hge_scalar   s    rS   c                 C   s   t j||| d< d S r
   )r   r;   Zhgtr<   r   r   r   simple_hgt_scalar   s    rT   c                 C   s   t j||| d< d S r
   )r   r;   hler<   r   r   r   simple_hle_scalar   s    rV   c                 C   s   t j||| d< d S r
   r   r;   hltr<   r   r   r   simple_hlt_scalar   s    rY   T)Zdevicec                 C   s   t j| |S r   rW   r   r   r   r   r   
hlt_func_1   s    r[   c                 C   s   t j| |S r   rW   rZ   r   r   r   
hlt_func_2   s    r\   c                 C   s   t ||ot||| d< d S r
   )r[   r\   rr7   r8   r+   r   r   r   test_multiple_hcmp_1   s    r_   c                 C   s    t ||otj||| d< d S r
   )r[   r   r;   rX   r]   r   r   r   test_multiple_hcmp_2   s    r`   c                 C   s    t ||otj||| d< d S r
   )r[   r   r;   rR   r]   r   r   r   test_multiple_hcmp_3   s    ra   c                 C   s$   t j||ot j||| d< d S r
   rW   r]   r   r   r   test_multiple_hcmp_4   s    rb   c                 C   s$   t j||ot j||| d< d S r
   )r   r;   rX   rR   r]   r   r   r   test_multiple_hcmp_5   s    rc   c                 C   s   t j||| d< d S r
   )r   r;   Zhmaxr<   r   r   r   simple_hmax_scalar   s    rd   c                 C   s   t j||| d< d S r
   )r   r;   Zhminr<   r   r   r   simple_hmin_scalar   s    re   c                 C   s   t || d< d S r
   )r   ZcbrtrI   r   r   r   simple_cbrt   s    rf   c                 C   s   t || d< d S r
   )r   Zbrevr3   r   r   r   simple_brev   s    rg   c                 C   s   t || d< d S r
   )r   Zclzr3   r   r   r   
simple_clz   s    rh   c                 C   s   t || d< d S r
   )r   Zffsr3   r   r   r   
simple_ffs   s    ri   c                 C   s   t || d< d S r
   roundr3   r   r   r   simple_round   s    rl   c                 C   s   t ||| d< d S r
   rj   )r   r+   ndigitsr   r   r   simple_round_to   s    rn   c                 C   sF   t d}| | dkr:|d dkr0|| | |< qBd| |< nd| |< d S )Nr      r!   r         r   )r7   r8   r+   r   r   r   r   branching_with_ifs   s    

rr   c                 C   sB   t d}t |d dk|| d}t | | dk|d| |< d S )Nr   r!   r   rp   ro   rq   )r   r   selp)r7   r8   r+   r   innerr   r   r   branching_with_selps   s    
ru   c                 C   s   t d}t j| |< d S r   )r   r   Zlaneidr   r   r   r   simple_laneid   s    
rv   c                 C   s   t j| d< d S r
   )r   Zwarpsize)r   r   r   r   simple_warpsize   s    rw   c                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Ze	ddd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zed d! Zed"d# Ze	d$d%d& Zed'd( Zed)d* Ze	d$d+d, Zed-d. Zed/d0 Ze	d$d1d2 Zed3d4 Zed5d6 Ze	d$d7d8 Zed9d: Z ed;d< Z!e	d$d=d> Z"ed?d@ Z#edAdB Z$e	d$dCdD Z%edEdF Z&edGdH Z'edIdJ Z(edKdL Z)dMdN Z*dOdP Z+dQdR Z,e	dSdTdU Z-dVdW Z.dXdY Z/dZd[ Z0d\d] Z1e	dSd^d_ Z2d`da Z3dbdc Z4ddde Z5dfdg Z6e	dSdhdi Z7djdk Z8dldm Z9dndo Z:dpdq Z;drds Z<e	dtdudv Z=dwdx Z>dydz Z?e	dtd{d| Z@d}d~ ZAdS )TestCudaIntrinsicc                 C   s@   t dt}tjdtjd}|d | | |d dk d S )Nvoid(int32[:])r   dtyper   r   r   )r   jitr   nponesint32
assertTrueselfcompiledr   r   r   r   test_simple_threadidx   s    z'TestCudaIntrinsic.test_simple_threadidxc                 C   sZ   t dt}d}tj|tjd}tj|tjd}|d|f | | t||k d S )Nry   
   rz   r   )	r   r}   r   r~   r   r   aranger   all)r   r   Nr   expr   r   r   test_fill_threadidx   s    z%TestCudaIntrinsic.test_fill_threadidxc                    sN   d\  fdd} fdd}| }| }|  t||k d S )N)ro         c                     s>   t dt} tj ftjd}| d ff | |S )Nzvoid(int32[:,:,::1])rz   r   )r   r}   r   r~   zerosr   r   r   XYZr   r   c_contigous  s    z<TestCudaIntrinsic.test_fill3d_threadidx.<locals>.c_contigousc                     sD   t dt} ttj ftjd}| d ff | |S )Nzvoid(int32[::1,:,:])rz   r   )r   r}   r   r~   Zasfortranarrayr   r   r   r   r   r   f_contigous	  s    z<TestCudaIntrinsic.test_fill3d_threadidx.<locals>.f_contigous)r   r~   r   )r   r   r   Zc_resZf_resr   r   r   test_fill3d_threadidx   s    
z'TestCudaIntrinsic.test_fill3d_threadidxc                 C   s\   t dt}d\}}|| }tj|tjd}|||f | | t|t|k d S )Nvoid(int32[::1])rq      rz   )	r   r}   r   r~   emptyr   r   r   r   )r   r   ntidnctaidZnelemr   r   r   r   test_simple_grid1d  s    z$TestCudaIntrinsic.test_simple_grid1dc           	      C   s   t dt}d}d}|d |d  |d |d  f}tj|tjd}| }|||f | t|jd D ](}t|jd D ]}|| |||f< q~ql| 	t
||k d S Nzvoid(int32[:,::1])ro   rq   r   r   r   r   rz   )r   r}   r"   r~   r   r   copyr*   r)   r   r   )	r   r   r   r   r)   r   r   r   r   r   r   r   test_simple_grid2d  s     z$TestCudaIntrinsic.test_simple_grid2dc                 C   sN   t dt}d\}}tjdtjd}|||f | | |d ||  d S )Nr   r   r   rz   r   )r   r}   r%   r~   r   r   assertEqualr   r   r   r   r   r   r   r   test_simple_gridsize1d*  s
    z(TestCudaIntrinsic.test_simple_gridsize1dzTests PTX emissionc           
      C   s  t d d  t t d d  f}t|t}t|t}d}d}tjddtj d}| }d|d d< tj|tj d}||df ||| |	|}	| 
d	ttd
|	 tjj||dd tj|tj d}||df ||| |	|}	| 
dttd
|	 tjj||dd d S )N    r      )r)   Z
fill_valuer{   rq   r   rz   r   r!   z	\s+bra\s+Z	branching)err_msgr   rs   )r   r   r}   rr   ru   r~   fullr   r   Zinspect_asmr   lenrefindalltestingZassert_array_equal)
r   sigZcu_branching_with_ifsZcu_branching_with_selpsnr8   r+   expectedr7   ptxr   r   r   	test_selp1  s$    

zTestCudaIntrinsic.test_selpc                 C   sr   t dt}d}d}tjdtjd}|||f | | |d |d |d   | |d |d |d   d S )Nr   r   r   r!   rz   r   r   )r   r}   r&   r~   r   r   r   r   r   r   r   test_simple_gridsize2dJ  s    z(TestCudaIntrinsic.test_simple_gridsize2dc              	   C   s   t dt}d}d}|d |d  |d |d  f}tj|tjd}|||f | |\}}|j\}}	tt|d t|d D ]j\}
}||
 ||  }}t||	|D ]B}t|||D ]0}| 	|||f || k|||f || f qqqd S r   )
r   r}   r2   r~   r   r   r)   zipr*   r   )r   r   r   r   r)   r   r.   r/   r0   r1   r   r   r,   r-   r   r   r   r   r   test_intrinsic_forloop_stepT  s     
"z-TestCudaIntrinsic.test_intrinsic_forloop_stepc                 C   sF   t jdd }tjdtjdddd}|d | tj|d d S )Nc                 S   s:   t d\}}}t d\}}}|| | | |||f< d S Nrq   r#   )outr   r   r   r7   r8   r+   r   r   r   foof  s    z*TestCudaIntrinsic.test_3dgrid.<locals>.fooi  rz   	   )rq   rq   rq   r   )r   r}   r~   r   r   reshaper   Zassert_equal)r   r   arrr   r   r   test_3dgride  s
    
zTestCudaIntrinsic.test_3dgridc                 C   sZ   t jdd }d\}}}tj|| | tjd|||}|d | | t| d S )Nc           	      S   s   t d\}}}t d\}}}|t jjt jjt jj  kor|t jjt jjt jj  kor|t jjt jjt jj  k}|t jjt j	j ko|t jjt j	j ko|t jjt j	j k}|o|| |||f< d S r   )
r   r   r$   r   r   ZblockIdxr(   r   r   r'   )	r   r   r   r   r7   r8   r+   Zgrid_is_rightZgridsize_is_rightr   r   r   r   r  s    z,TestCudaIntrinsic.test_3dgrid_2.<locals>.foo)   r      rz   ))ro   rq   r!   )rq   r!   ro   )r   r}   r~   r   Zbool_r   r   r   )r   r   r   r   r   r   r   r   r   test_3dgrid_2q  s    

"zTestCudaIntrinsic.test_3dgrid_2c                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nvoid(int32[:], uint32)r   rz   r|      r   ro   r   r}   r4   r~   r   r   assertEqualsr   r   r   r   test_popc_u4  s    zTestCudaIntrinsic.test_popc_u4c                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nzvoid(int32[:], uint64)r   rz   r|   l        @ r   ro   r   r   r   r   r   test_popc_u8  s    zTestCudaIntrinsic.test_popc_u8c                 C   sF   t dt}tjdtjd}|d |ddd tj|d d	 d S )
Nzvoid(f4[:], f4, f4, f4)r   rz   r|          @      @      @r   r   )r   r}   r9   r~   r   float32r   assert_allcloser   r   r   r   test_fma_f4  s    zTestCudaIntrinsic.test_fma_f4c                 C   sF   t dt}tjdtjd}|d |ddd tj|d d	 d S )
Nzvoid(f8[:], f8, f8, f8)r   rz   r|   r   r   r   r   r   )r   r}   r9   r~   r   float64r   r   r   r   r   r   test_fma_f8  s    zTestCudaIntrinsic.test_fma_f8c                 C   sl   t dt}tjdtjd}tjdgtjd}tjdgtjd}|d ||| tj|d ||  d S Nzvoid(f2[:], f2[:], f2[:])r   rz   r   r   r|   r   )	r   r}   r=   r~   r   float16arrayr   r   r   r   r   arg1arg2r   r   r   	test_hadd  s    zTestCudaIntrinsic.test_haddc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S )Nvoid(f2[:], f2, f2)r   rz   JM!	@r   r|   r   )r   r}   r>   r~   r   r   r   r   r   r   r   r   r   refr   r   r   test_hadd_scalar  s    

z"TestCudaIntrinsic.test_hadd_scalarz(Compilation unsupported in the simulatorc                 C   s4   t d d  t t f}tt|dd\}}| d| d S )Nr   rq   cczadd.f16)r   r   r>   assertInr   argsr   _r   r   r   test_hadd_ptx  s    zTestCudaIntrinsic.test_hadd_ptxc                 C   s   t dt}tjdtjd}tjdgtjd}tjdgtjd}tjdgtjd}|d |||| tj|d || |  d S )	Nz void(f2[:], f2[:], f2[:], f2[:])r   rz   r   r   r   r|   r   )	r   r}   r@   r~   r   r   r   r   r   )r   r   r   r   r   arg3r   r   r   	test_hfma  s    zTestCudaIntrinsic.test_hfmac                 C   sp   t dt}tjdtjd}td}td}td}|d |||| || | }tj|d | d S )	Nzvoid(f2[:], f2, f2, f2)r   rz   r   r   r   r|   r   )r   r}   rA   r~   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   test_hfma_scalar  s    


z"TestCudaIntrinsic.test_hfma_scalarc                 C   s6   t d d  t t t f}tt|dd\}}| d| d S )Nr   r   z
fma.rn.f16)r   r   rA   r   r   r   r   r   test_hfma_ptx  s    zTestCudaIntrinsic.test_hfma_ptxc                 C   sl   t dt}tjdtjd}tjdgtjd}tjdgtjd}|d ||| tj|d ||  d S r   )	r   r}   rC   r~   r   r   r   r   r   r   r   r   r   	test_hsub  s    zTestCudaIntrinsic.test_hsubc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S Nr   r   rz   r   gQ?r|   r   )r   r}   rD   r~   r   r   r   r   r   r   r   r   test_hsub_scalar  s    

z"TestCudaIntrinsic.test_hsub_scalarc                 C   s4   t d d  t t f}tt|dd\}}| d| d S )Nr   r   zsub.f16)r   r   rD   r   r   r   r   r   test_hsub_ptx  s    zTestCudaIntrinsic.test_hsub_ptxc                 C   sj   t  t}tjdtjd}tjdgtjd}tjdgtjd}|d ||| tj|d ||  d S )Nr   rz   r   r   r|   r   )	r   r}   rF   r~   r   r   r   r   r   r   r   r   r   	test_hmul  s    zTestCudaIntrinsic.test_hmulc                 C   s`   t dt}tjdtjd}td}td}|d ||| || }tj|d | d S r   )r   r}   rG   r~   r   r   r   r   r   r   r   r   test_hmul_scalar  s    

z"TestCudaIntrinsic.test_hmul_scalarc                 C   s4   t d d  t t f}tt|dd\}}| d| d S )Nr   r   zmul.f16)r   r   rG   r   r   r   r   r   test_hmul_ptx  s    zTestCudaIntrinsic.test_hmul_ptxc                 C   sV   t dt}tjdtjd}tjdgtjd}|d || tj|d |  d S )Nzvoid(f2[:], f2[:])r   rz   r   r|   r   )	r   r}   rJ   r~   r   r   r   r   r   r   r   r   r   r   r   r   	test_hneg  s
    zTestCudaIntrinsic.test_hnegc                 C   sR   t dt}tjdtjd}td}|d || | }tj|d | d S )Nvoid(f2[:], f2)r   rz   r   r|   r   )r   r}   rK   r~   r   r   r   r   r   r   r   r   r   r   r   r   test_hneg_scalar  s    
z"TestCudaIntrinsic.test_hneg_scalarc                 C   s2   t d d  t f}tt|dd\}}| d| d S )Nr   r   zneg.f16)r   r   rK   r   r   r   r   r   test_hneg_ptx  s    zTestCudaIntrinsic.test_hneg_ptxc                 C   sV   t  t}tjdtjd}tjdgtjd}|d || tj|d t	| d S )Nr   rz         r|   r   )
r   r}   rM   r~   r   r   r   r   r   absr   r   r   r   	test_habs  s
    zTestCudaIntrinsic.test_habsc                 C   sT   t dt}tjdtjd}td}|d || t|}tj|d | d S )Nr   r   rz   gJM!	r|   r   )	r   r}   rN   r~   r   r   r   r   r   r   r   r   r   test_habs_scalar#  s    
z"TestCudaIntrinsic.test_habs_scalarc                 C   sN   t d d  t f}tt|dd\}}tj dk r>| |d n| d| d S )Nr   r   )r   r!   zand\.b16.*0x7FFF;zabs.f16)r   r   rN   r   Zruntimeget_versionassertRegexr   r   r   r   r   test_habs_ptx,  s
    zTestCudaIntrinsic.test_habs_ptxc              
   C   s&  t tttttf}tjtjtj	tj
tjtjf}t||D ]\}}| j|d td|}tjdtjd}tjdtjd}td}td}	td}
|d ||	|	 ||	|	}| ||d	  |d ||	|
 ||	|
}| ||d	  |d ||	| ||	|}| ||d	  W 5 Q R X q6d S )
N)opzvoid(b1[:], f2, f2)r   rz   r!   rq   ro   r|   r   )rO   rQ   rS   rT   rV   rY   operatoreqnegegtleltr   subTestr   r}   r~   r   bool8r   r   )r   fnsopsfnr   kernelr   gotr   r   Zarg4r   r   r   test_fp16_comparison5  s6        





z&TestCudaIntrinsic.test_fp16_comparisonc              
   C   s   t ttttf}|D ]x}| j|db td|}tj	dtj
d}td}td}td}|d |||| | |d	  W 5 Q R X qd S )
N)r  zvoid(b1[:], f2, f2, f2)r   rz   r   r   r   r|   r   )r_   r`   ra   rb   rc   r  r   r}   r~   r   r  r   r   )r   Z	functionsr  r   r   r   r   r   r   r   r   !test_multiple_float16_comparisonsU  s    


z3TestCudaIntrinsic.test_multiple_float16_comparisonsc                 C   s   t dt}tjdtjd}td}td}|d ||| tj|d | td}|d ||| tj|d | d S 	Nr   r   rz   r   r   r|   r   g      @)	r   r}   rd   r~   r   r   r   r   floatr   r   r   r   	test_hmaxf  s    


zTestCudaIntrinsic.test_hmaxc                 C   s   t dt}tjdtjd}td}td}|d ||| tj|d | td}|d ||| tj|d | d S r  )	r   r}   re   r~   r   r   r   r   r  r   r   r   r   	test_hminr  s    


zTestCudaIntrinsic.test_hminc                 C   sJ   t dt}tjdtjd}d}|d || tj|d |d  d S )Nzvoid(float32[:], float32)r   rz   r   r|   r   UUUUUU?)r   r}   rf   r~   r   r   r   r   r   r   r   Zcbrt_argr   r   r   test_cbrt_f32~  s
    zTestCudaIntrinsic.test_cbrt_f32c                 C   sJ   t dt}tjdtjd}d}|d || tj|d |d  d S )Nzvoid(float64[:], float64)r   rz   g      @r|   r   r  )r   r}   rf   r~   r   r   r   r   r  r   r   r   test_cbrt_f64  s
    zTestCudaIntrinsic.test_cbrt_f64c                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nzvoid(uint32[:], uint32)r   rz   r|   i0  r   i  )r   r}   rg   r~   r   Zuint32r   r   r   r   r   test_brev_u4  s    zTestCudaIntrinsic.test_brev_u4z.only get given a Python "int", assumes 32 bitsc                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nzvoid(uint64[:], uint64)r   rz   r|   l   0  C r   l       `x)r   r}   rg   r~   r   Zuint64r   r   r   r   r   test_brev_u8  s    zTestCudaIntrinsic.test_brev_u8c                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nvoid(int32[:], int32)r   rz   r|      r      r   r}   rh   r~   r   r   r   r   r   r   r   test_clz_i4  s    zTestCudaIntrinsic.test_clz_i4c                 C   s@   t dt}tjdtjd}|d |d | |d d dS )	a  
        Although the CUDA Math API
        (http://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html)
        only says int32 & int64 arguments are supported in C code, the LLVM
        IR input supports i8, i16, i32 & i64 (LLVM doesn't have a concept of
        unsigned integers, just unsigned operations on integers).
        http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics
        r   r   rz   r|   r  r   r  Nr  r   r   r   r   test_clz_u4  s    	zTestCudaIntrinsic.test_clz_u4c                 C   s@   t dt}tjdtjd}|d |d | |d d d S Nr  r   rz   r|   l    r   r  r   r   r   r   test_clz_i4_1s  s    z TestCudaIntrinsic.test_clz_i4_1sc                 C   sB   t dt}tjdtjd}|d |d | |d dd d S )Nr  r   rz   r|   r   r   CUDA semanticsr  r   r   r   r   test_clz_i4_0s  s    z TestCudaIntrinsic.test_clz_i4_0sc                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nvoid(int32[:], int64)r   rz   r|      r   /   r  r   r   r   r   test_clz_i8  s    zTestCudaIntrinsic.test_clz_i8c                 C   s^   t dt}tjdtjd}|d |d | |d d |d |d | |d d	 d S )
Nr  r   rz   r|   r  r              r   r   r}   ri   r~   r   r   r   r   r   r   r   test_ffs_i4  s    zTestCudaIntrinsic.test_ffs_i4c                 C   s^   t dt}tjdtjd}|d |d | |d d |d |d | |d d	 d S )
Nr   r   rz   r|   r  r   r$  r%  r   r&  r   r   r   r   test_ffs_u4  s    zTestCudaIntrinsic.test_ffs_u4c                 C   s@   t dt}tjdtjd}|d |d | |d d d S r  r&  r   r   r   r   test_ffs_i4_1s  s    z TestCudaIntrinsic.test_ffs_i4_1sc                 C   s@   t dt}tjdtjd}|d |d | |d d d S )Nr  r   rz   r|   r   r&  r   r   r   r   test_ffs_i4_0s  s    z TestCudaIntrinsic.test_ffs_i4_0sc                 C   s^   t dt}tjdtjd}|d |d | |d d |d |d | |d d	 d S )
Nr   r   rz   r|   r!  r   r   l        !   r&  r   r   r   r   test_ffs_i8  s    zTestCudaIntrinsic.test_ffs_i8c                 C   sj   t dt}d}tj|d tjd}ttjdtjd|}|d|d f | | t	||k d S )Nry   r!   r   rz   r   )
r   r}   rv   r~   r   r   Ztiler   r   r   )r   r   countr   r   r   r   r   test_simple_laneid  s    z$TestCudaIntrinsic.test_simple_laneidc                 C   s@   t dt}tjdtjd}|d | | |d dd d S )Nry   r   rz   r|   r   r   r  )r   r}   rw   r~   r   r   r   r   r   r   r   test_simple_warpsize  s    z&TestCudaIntrinsic.test_simple_warpsizec                 C   sN   t dt}tjdtjd}dD ]&}|d || | |d t| q"d S )Nzvoid(int64[:], float32)r   rz   r   g      g      g      g      ?g      @g      @g      @r|   r   r   r}   rl   r~   r   r   r   rk   r   r   r   r   r   r   r   test_round_f4  s
    zTestCudaIntrinsic.test_round_f4c                 C   sN   t dt}tjdtjd}dD ]&}|d || | |d t| q"d S )Nzvoid(int64[:], float64)r   rz   r0  r|   r   r1  r2  r   r   r   test_round_f8  s
    zTestCudaIntrinsic.test_round_f8c              
   C   s   t dt}tjdtjd}tjd tjdtj}t	|t
tjtj tjgf d}t||D ]L\}}| j||d0 |d ||| | j|d	 t||d
d W 5 Q R X qpd S )N void(float32[:], float32, int32)r   rz   {   r   )r   r   r!   rq   ro   r   rp   valrm   r|   r   singleprec)r   r}   rn   r~   r   r   randomseedZastypeconcatenater   infnan	itertoolsproductr  assertPreciseEqualrk   r   r   r   valsdigitsr=  rm   r   r   r   test_round_to_f4  s    "	z"TestCudaIntrinsic.test_round_to_f4z$Overflow behavior differs on CPythonc                 C   sT   t dt}tjdtjd}ttjj}d}|d ||| | |d | d S )Nr5  r   rz   i,  r|   r   )	r   r}   rn   r~   r   r   finfomaxr   r   r   r   r=  rm   r   r   r   test_round_to_f4_overflow  s    z+TestCudaIntrinsic.test_round_to_f4_overflowc                 C   sT   t dt}tjdtjd}d}d}|d ||| | j|d t||dd	 d S )
Nr5  r   rz   gQ?rq   r|   r   r>  r?  )r   r}   rn   r~   r   r   rH  rk   rO  r   r   r   test_round_to_f4_halfway+  s    z*TestCudaIntrinsic.test_round_to_f4_halfwayc              
   C   s  t dt}tjdtjd}tjd tjd}t|t	tj
tj
 tjgf d}t||D ]L\}}| j||d0 |d ||| | j|d	 t||d
d W 5 Q R X qhd}d}| j||d0 |d ||| | j|d	 t||dd W 5 Q R X d S )N void(float64[:], float64, int32)r   rz   r6  r   )r7  r8  r9  r:  r;  r   r   r!   rq   ro   r   r<  r|   r   exactr?  g`8p=<   double)r   r}   rn   r~   r   r   rA  rB  rC  r   rD  rE  rF  rG  r  rH  rk   rI  r   r   r   test_round_to_f85  s&    "z"TestCudaIntrinsic.test_round_to_f8c                 C   sT   t dt}tjdtjd}ttjj}d}|d ||| | |d | d S )NrR  r   rz   r   r|   r   )	r   r}   rn   r~   r   r   rM  rN  r   rO  r   r   r   test_round_to_f8_overflowL  s    z+TestCudaIntrinsic.test_round_to_f8_overflowc                 C   sT   t dt}tjdtjd}d}d}|d ||| | j|d t||dd	 d S )
NrR  r   rz   g\(\?rq   r|   r   rU  r?  )r   r}   rn   r~   r   r   rH  rk   rO  r   r   r   test_round_to_f8_halfwayY  s    z*TestCudaIntrinsic.test_round_to_f8_halfwayN)B__name__
__module____qualname__r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r	   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r
  r  r  r  r  r  r  r  r  r  r  r  r#  r'  r(  r)  r*  r,  r.  r/  r3  r4  rL  rP  rQ  rV  rW  rX  r   r   r   r   rx      s   



	

	




	


	
















rx   __main__)DrF  Znumpyr~   r   r   Znumbar   r   Z
numba.cudar   Znumba.core.typesr   Znumba.cuda.testingr   r   r   r	   r   r   r   r   r"   r%   r&   r2   r4   r9   r=   r>   r@   rA   rC   rD   rF   rG   rJ   rK   rM   rN   rO   rQ   rS   rT   rV   rY   r}   r[   r\   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rl   rn   rr   ru   rv   rw   rx   rY  mainr   r   r   r   <module>   s~   



    x
