U
    ,‰d	%  ã                   @   s6  d dl Z d dlmZ d dlmZ ddlmZ ddlmZ dd„ Z	d	d
„ Z
dd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zdd „ Zd!d"„ Zd#d$„ Zd%d&„ Zd'd(„ Zd)d*„ Zd+d,„ Zd-d.„ Zd/d0„ Zd1d2„ Zd3d4„ Zd5d6d7d8d9d:d;d<d=d>d?d@dAdBdCœZ dDdE„ Z!G dFdG„ dGe"ƒZ#dHdI„ Z$dS )Jé    N)Úir)Úcgutilsé   )Únvvm)Úcurrent_contextc                 C   sN   dt |ƒ d }t t |¡t t |¡¡t |¡t |¡f¡}t | ||¡S )NZ___numba_atomic_iZ	_cas_hack)Ústrr   ÚFunctionTypeÚIntTypeÚPointerTyper   Úget_or_insert_function)ÚlmodÚisizeÚfnameÚfnty© r   ú8/tmp/pip-unpacked-wheel-eu7e0c37/numba/cuda/nvvmutils.pyÚdeclare_atomic_cas_int   s    þÿr   c                 C   sD   t  ¡ jr(|  |||dd¡}|  |d¡S |  t||ƒ|||f¡S d S )NÚ	monotonicr   )r   ZNVVMZ	is_nvvm70ZcmpxchgZextract_valueÚcallr   )Úbuilderr   r   ÚptrÚcmpÚvalÚoutr   r   r   Úatomic_cmpxchg   s    
ÿr   c                 C   s6   d}t  t  ¡ t  t  ¡ d¡t  ¡ f¡}t | ||¡S )Nz)llvm.numba_nvvm.atomic.load.add.f32.p0f32r   ©r   r   Z	FloatTyper
   r   r   ©r   r   r   r   r   r   Údeclare_atomic_add_float32   s
    
ÿr   c                 C   sH   t ƒ jjdkrd}nd}t t ¡ t t ¡ ¡t ¡ f¡}t | ||¡S )N)é   r   z)llvm.numba_nvvm.atomic.load.add.f64.p0f64Z___numba_atomic_double_add)	r   ZdeviceZcompute_capabilityr   r   Ú
DoubleTyper
   r   r   r   r   r   r   Údeclare_atomic_add_float64&   s    
ÿr    c                 C   s4   d}t  t  ¡ t  t  ¡ ¡t  ¡ f¡}t | ||¡S )NZ___numba_atomic_float_subr   r   r   r   r   Údeclare_atomic_sub_float320   s
    
ÿr!   c                 C   s4   d}t  t  ¡ t  t  ¡ ¡t  ¡ f¡}t | ||¡S )NZ___numba_atomic_double_sub©r   r   r   r
   r   r   r   r   r   r   Údeclare_atomic_sub_float647   s
    
ÿr#   c                 C   s:   d}t  t  d¡t  t  d¡¡t  d¡f¡}t | ||¡S )Nz"llvm.nvvm.atomic.load.inc.32.p0i32é    ©r   r   r	   r
   r   r   r   r   r   r   Údeclare_atomic_inc_int32>   s
    ÿr&   c                 C   s:   d}t  t  d¡t  t  d¡¡t  d¡f¡}t | ||¡S )NZ___numba_atomic_u64_incé@   r%   r   r   r   r   Údeclare_atomic_inc_int64E   s
    ÿr(   c                 C   s:   d}t  t  d¡t  t  d¡¡t  d¡f¡}t | ||¡S )Nz"llvm.nvvm.atomic.load.dec.32.p0i32r$   r%   r   r   r   r   Údeclare_atomic_dec_int32L   s
    ÿr)   c                 C   s:   d}t  t  d¡t  t  d¡¡t  d¡f¡}t | ||¡S )NZ___numba_atomic_u64_decr'   r%   r   r   r   r   Údeclare_atomic_dec_int64S   s
    ÿr*   c                 C   s4   d}t  t  ¡ t  t  ¡ ¡t  ¡ f¡}t | ||¡S )NZ___numba_atomic_float_maxr   r   r   r   r   Údeclare_atomic_max_float32Z   s
    
ÿr+   c                 C   s4   d}t  t  ¡ t  t  ¡ ¡t  ¡ f¡}t | ||¡S )NZ___numba_atomic_double_maxr"   r   r   r   r   Údeclare_atomic_max_float64a   s
    
ÿr,   c                 C   s4   d}t  t  ¡ t  t  ¡ ¡t  ¡ f¡}t | ||¡S )NZ___numba_atomic_float_minr   r   r   r   r   Údeclare_atomic_min_float32h   s
    
ÿr-   c                 C   s4   d}t  t  ¡ t  t  ¡ ¡t  ¡ f¡}t | ||¡S )NZ___numba_atomic_double_minr"   r   r   r   r   Údeclare_atomic_min_float64o   s
    
ÿr.   c                 C   s4   d}t  t  ¡ t  t  ¡ ¡t  ¡ f¡}t | ||¡S )NZ___numba_atomic_float_nanmaxr   r   r   r   r   Údeclare_atomic_nanmax_float32v   s
    
ÿr/   c                 C   s4   d}t  t  ¡ t  t  ¡ ¡t  ¡ f¡}t | ||¡S )NZ___numba_atomic_double_nanmaxr"   r   r   r   r   Údeclare_atomic_nanmax_float64}   s
    
ÿr0   c                 C   s4   d}t  t  ¡ t  t  ¡ ¡t  ¡ f¡}t | ||¡S )NZ___numba_atomic_float_nanminr   r   r   r   r   Údeclare_atomic_nanmin_float32„   s
    
ÿr1   c                 C   s4   d}t  t  ¡ t  t  ¡ ¡t  ¡ f¡}t | ||¡S )NZ___numba_atomic_double_nanminr"   r   r   r   r   Údeclare_atomic_nanmin_float64‹   s
    
ÿr2   c                 C   s,   d}t  t  d¡t  d¡f¡}t | ||¡S )NZcudaCGGetIntrinsicHandler'   r$   ©r   r   r	   r   r   r   r   r   r   Ú declare_cudaCGGetIntrinsicHandle’   s
    
ÿr4   c                 C   s4   d}t  t  d¡t  d¡t  d¡f¡}t | ||¡S )NZcudaCGSynchronizer$   r'   r3   r   r   r   r   Údeclare_cudaCGSynchronize™   s
    ÿr5   c           
      C   s€   t jdt jdt jdi| }t|ƒ}dddœ ||¡}d| d }||||f }t |¡}t ||¡}t ||g¡}	t	 
| |	|¡S )	NZsharedÚlocalZconstantZf32Zf64)ÚfloatÚdoublezllvm.nvvm.ptr.z.to.gen.p0%s.p%d%s)r   ZADDRSPACE_SHAREDZADDRSPACE_LOCALÚADDRSPACE_CONSTANTr   Úgetr   r
   r   r   r   )
r   ZelemtypeÚ	addrspaceZaddrspacenameZtynameZs2g_name_fmtZs2g_nameZelem_ptr_tyZelem_ptr_ty_addrspaceZs2g_fntyr   r   r   Úinsert_addrspace_conv    s"       ýü
r<   c           	      C   s†   | j jj}t | d¡d ¡}tj||jdtj	d}d|_
d|_||_t d¡}t |tj	¡}|  ||¡}t||tj	ƒ}|  ||g¡S )Nzutf-8ó    Ú_str)Únamer;   ZinternalTé   )Zbasic_blockÚfunctionÚmoduler   Zmake_bytearrayÚencodeZadd_global_variableÚtyper   r9   ÚlinkageZglobal_constantZinitializerr   r	   r
   Zbitcastr<   r   )	r   Úvaluer   ÚcvalÚglZchartyZconstcharptrtyZcharptrÚconvr   r   r   Údeclare_string°   s    
ÿ
rJ   c                 C   s8   t  t  d¡¡}t  t  d¡||g¡}t | |d¡}|S )Nr@   r$   Úvprintf)r   r
   r	   r   r   r   )r   Z	voidptrtyZ	vprintftyrK   r   r   r   Údeclare_vprintÁ   s    rL   zllvm.nvvm.read.ptx.sreg.tid.xzllvm.nvvm.read.ptx.sreg.tid.yzllvm.nvvm.read.ptx.sreg.tid.zzllvm.nvvm.read.ptx.sreg.ntid.xzllvm.nvvm.read.ptx.sreg.ntid.yzllvm.nvvm.read.ptx.sreg.ntid.zzllvm.nvvm.read.ptx.sreg.ctaid.xzllvm.nvvm.read.ptx.sreg.ctaid.yzllvm.nvvm.read.ptx.sreg.ctaid.zz llvm.nvvm.read.ptx.sreg.nctaid.xz llvm.nvvm.read.ptx.sreg.nctaid.yz llvm.nvvm.read.ptx.sreg.nctaid.zz llvm.nvvm.read.ptx.sreg.warpsizezllvm.nvvm.read.ptx.sreg.laneid)ztid.xztid.yztid.zzntid.xzntid.yzntid.zzctaid.xzctaid.yzctaid.zznctaid.xznctaid.yznctaid.zZwarpsizeZlaneidc                 C   s6   | j }t t d¡d¡}t ||t| ¡}|  |d¡S )Nr$   r   )rB   r   r   r	   r   r   ÚSREG_MAPPINGr   )r   r?   rB   r   Úfnr   r   r   Ú	call_sregâ   s    rO   c                   @   s<   e Zd Zdd„ Zdd„ Zdd„ Zdd„ Zd	d
„ Zdd„ ZdS )ÚSRegBuilderc                 C   s
   || _ d S ©N)r   )Úselfr   r   r   r   Ú__init__ê   s    zSRegBuilder.__init__c                 C   s   t | jd| ƒS )Nztid.%s©rO   r   ©rR   Úxyzr   r   r   Útidí   s    zSRegBuilder.tidc                 C   s   t | jd| ƒS )Nzctaid.%srT   rU   r   r   r   Úctaidð   s    zSRegBuilder.ctaidc                 C   s   t | jd| ƒS )Nzntid.%srT   rU   r   r   r   Úntidó   s    zSRegBuilder.ntidc                 C   s   t | jd| ƒS )Nz	nctaid.%srT   rU   r   r   r   Únctaidö   s    zSRegBuilder.nctaidc                 C   s:   |   |¡}|  |¡}|  |¡}| j | j ||¡|¡}|S rQ   )rW   rY   rX   r   ÚaddÚmul)rR   rV   rW   rY   rZ   Úresr   r   r   Úgetdimù   s
    


zSRegBuilder.getdimN)	Ú__name__Ú
__module__Ú__qualname__rS   rW   rX   rY   rZ   r^   r   r   r   r   rP   é   s   rP   c                    sD   t | ƒ‰ ‡ fdd„dD ƒ}tt |d |¡ƒ}|dkr<|d S |S d S )Nc                 3   s   | ]}ˆ   |¡V  qd S rQ   )r^   )Ú.0rV   ©Zsregr   r   Ú	<genexpr>  s     z get_global_id.<locals>.<genexpr>rV   r   r   )rP   ÚlistÚ	itertoolsÚislice)r   ZdimÚitÚseqr   rc   r   Úget_global_id  s    rj   )%rf   Zllvmliter   Z
numba.corer   Zcudadrvr   Úapir   r   r   r   r    r!   r#   r&   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r4   r5   r<   rJ   rL   rM   rO   ÚobjectrP   rj   r   r   r   r   Ú<module>   sZ   	
î