U
    ,d8                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d	d
lmZ d dlmZmZ G dd dejZe de jZG dd deZG dd deZ dS )    N)ir)typingtypes	debuginfoitanium_manglercgutils)
Dispatcher)cached_property)BaseContext)MinimalCallConv)	cmathdecl   )nvvm)codegen	nvvmutilsc                       s$   e Zd Zdd Z fddZ  ZS )CUDATypingContextc                 C   sp   ddl m}m}m}m} ddlm} | |j | |j | t	j | |j | |j | |j
 d S )Nr   )cudadeclcudamathlibdevicedeclvector_typesr   )enumdecl) r   r   r   r   numba.core.typingr   install_registryregistryr   Ztyping_registry)selfr   r   r   r   r    r   5/tmp/pip-unpacked-wheel-eu7e0c37/numba/cuda/target.pyload_additional_registries   s    z,CUDATypingContext.load_additional_registriesc                    s   ddl m} t|trt||sz
|j}W nj tk
r   |jsHtd|j	 }d|d< |
dd|d< |
dd|d< ||j|}||_|}Y nX tt| |S )	Nr   )CUDADispatcherz<using cpu function on device but its compilation is disabledTZdevicedebugFopt)Znumba.cuda.dispatcherr   
isinstancer   Z_CUDATypingContext__dispatcherAttributeErrorZ_can_compile
ValueErrortargetoptionscopygetZpy_funcsuperr   resolve_value_type)r   valr   r%   Zdisp	__class__r   r   r)       s"    



z$CUDATypingContext.resolve_value_type)__name__
__module____qualname__r   r)   __classcell__r   r   r+   r   r      s   r   z	[^a-z0-9]c                       s   e Zd ZdZdZd* fdd	Zedd Zedd Zd	d
 Z	dd Z
dd Zdd Zedd Zedd Zedd ZdddddZd+ddZdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Z  ZS ),CUDATargetContextTcudac                    s   t  || d S N)r(   __init__)r   Z	typingctxtargetr+   r   r   r4   D   s    zCUDATargetContext.__init__c                 C   s   t  jrtjS tjS d S r3   )r   NVVM	is_nvvm70r   	DIBuilderZNvvmDIBuilderr   r   r   r   r8   G   s    
zCUDATargetContext.DIBuilderc                 C   s   dS )NFr   r9   r   r   r   enable_boundscheckN   s    z$CUDATargetContext.enable_boundscheckc                 C   s   | j |S r3   )_internal_codegenZ_create_empty_module)r   namer   r   r   create_moduleT   s    zCUDATargetContext.create_modulec                 C   s   t d| _ttj| _d S )Nznumba.cuda.jit)r   ZJITCUDACodegenr;   llZcreate_target_datar   Zdata_layout_target_datar9   r   r   r   initW   s    zCUDATargetContext.initc                 C   s   ddl m}m}m} ddl m}m}m} ddl m}m} ddl m	}	 ddl
m}
 ddl
m} dd	lm}m}m}m}m} | |j | |j | |j | |	j | |j | |j d S )
Nr   )numberstupleobjslicing)rangeobj	iteratorsenumimpl)unicodecharseq)	cmathimpl)arrayobj)
npdatetimer   )cudaimpl	printimpllibdeviceimplmathimplr   )Znumba.cpythonrA   rB   rC   rD   rE   rF   rG   rH   rI   Znumba.nprJ   rK   r   rL   rM   rN   rO   r   r   r   Zimpl_registry)r   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   r   r   r   r   r   [   s    z,CUDATargetContext.load_additional_registriesc                 C   s   | j S r3   )r;   r9   r   r   r   r   o   s    zCUDATargetContext.codegenc                 C   s   | j S r3   )r?   r9   r   r   r   target_datar   s    zCUDATargetContext.target_datac                    s*   ddl m  d}t fdd|D }|S )z
        Some CUDA intrinsics are at the module level, but cannot be treated as
        constants, because they are loaded from a special register in the PTX.
        These include threadIdx, blockDim, etc.
        r   r2   )Z	threadIdxZblockDimZblockIdxZgridDimZlaneidZwarpsizec                    s   g | ]}t  |fqS r   )r   Module).0ZncrQ   r   r   
<listcomp>   s   z;CUDATargetContext.nonconst_module_attrs.<locals>.<listcomp>)Znumbar2   tuple)r   Z	nonconstsZnonconsts_with_modr   rQ   r   nonconst_module_attrsv   s    z'CUDATargetContext.nonconst_module_attrsc                 C   s   t | S r3   )CUDACallConvr9   r   r   r   	call_conv   s    zCUDATargetContext.call_convr   Nabi_tagsuidc                C   s   t j||||dS )NrY   )r   Zmangle)r   r<   argtypesrZ   r[   r   r   r   mangler   s    
zCUDATargetContext.manglerc                 C   sT   t j|jdd}|  j|j d|||d}	|	| | |	|||||}
|	|
fS )a  
        Adapt a code library ``codelib`` with the numba compiled CUDA kernel
        with name ``fname`` and arguments ``argtypes`` for NVVM.
        A new library is created with a wrapper function that can be used as
        the kernel entry point for the given kernel.

        Returns the new code library and the wrapper function.

        Parameters:

        codelib:       The CodeLibrary containing the device function to wrap
                       in a kernel call.
        fndesc:        The FunctionDescriptor of the source function.
        debug:         Whether to compile with debug.
        nvvm_options:  Dict of NVVM options used when compiling the new library.
        filename:      The source filename that the function is contained in.
        linenum:       The source line that the function is on.
        max_registers: The max_registers argument for the code library.
        cudapynsZ_kernel_)Z
entry_namenvvm_optionsmax_registers)r   prepend_namespacellvm_func_namer   Zcreate_libraryr<   Zadd_linking_librarygenerate_kernel_wrapper)r   Zcodelibfndescr    ra   filenamelinenumrb   kernel_namelibrarywrapperr   r   r   prepare_cuda_kernel   s      

  z%CUDATargetContext.prepare_cuda_kernelc           "   
      s  |j }| |}t|j}	tt |	}
| dttd| j	
tjg|	 }t||j}tj|jdd}t|
| t d}|r| j|| d}| ||j|| |||  fdd}|d	}g }g }d
D ](}||d|  ||d|  q|| j}| j	||tj||\}}|rt||j |  W 5 Q R X | |!|j" t#|j$j%d}t&' j(r|)|||j*dd}|+|d}nPt|j$|j$|j$|j$g}d}tj||d}|,||||j*g}|-d||}t./|}| |^ t0d
|D ] \}} |1|}!|2|!|  q(t0d
|D ] \}} |3|}!|2|!|  qTW 5 Q R X W 5 Q R X |  t&4  |5 |r|6  |6  |7 j  S )z
        Generate the kernel wrapper in the given ``library``.
        The function being wrapped is described by ``fndesc``.
        The wrapper function is returned.
        zcuda.kernel.wrapper    r^   r_   r   )modulefilepathZcgctxc                    s4    j |  }ttd|}t|jjd |_|S )Nrm   )	r<   r   add_global_variabler   IntTypeConstanttypepointeeinitializer)Zpostfixr<   gvZwrapfnZwrapper_moduler   r   define_error_gv   s    
zBCUDATargetContext.generate_kernel_wrapper.<locals>.define_error_gvZ__errcode__Zxyzz	__tid%s__z__ctaid%s__N	monotonicr   Z___numba_atomic_i32_cas_hack)r<   z==)8r\   Zget_arg_packerlistZargument_typesr   FunctionTypeZVoidTyper=   rq   rX   Zget_return_typer   ZpyobjectZFunctionrd   r   rc   r<   Z	IRBuilderZappend_basic_blockr8   Zmark_subprogramargsZmark_locationappendZfrom_argumentscall_functionvoidr   Z	if_likelyZis_okZret_voidZif_thennot_Zis_python_excrr   rs   rt   r   r6   r7   ZcmpxchgcodeZextract_valuecallZicmp_unsignedr   ZSRegBuilderziptidstoreZctaidZset_cuda_kernelZadd_ir_modulefinalizeZget_function)"r   rj   rf   ri   r    rg   rh   r\   ZarginfoZargtysZwrapfntyZfntyfuncprefixedbuilderr   rx   Zgv_excZgv_tidZgv_ctaidiZcallargsstatus_oldZxchgchangedZcasfntyZcas_hackZcasfnZsregZdimptrr*   r   rw   r   re      s    


           


$

z)CUDATargetContext.generate_kernel_wrapperc              	      s<  |j } fddt|jddD }ttdt|}t||}tj	}t
j||jd|d}	d|	_d	|	_||	_ |j}
 |
}d
|d   |	_t|td|}|	ttd|}|||g} | |} fdd|jD } fdd|jD } j||||jj|||j|j dd |! S )i
        Unlike the parent version.  This returns a a pointer in the constant
        addrspace.
        c                    s   g | ]}  tj|qS r   )get_constantr   byte)rS   r   r9   r   r   rT     s   z9CUDATargetContext.make_constant_array.<locals>.<listcomp>A)order   Z_cudapy_cmem	addrspaceinternalT   r   c                    s   g | ]}  tj|qS r   r   r   ZintprS   sr9   r   r   rT   0  s     c                    s   g | ]}  tj|qS r   r   r   r9   r   r   rT   1  s     N)datashapestridesitemsizeparentZmeminfo)"rn   itertobytesr   Z	ArrayTyperq   lenrr   r   ADDRSPACE_CONSTANTr   rp   rs   linkageglobal_constantru   Zget_data_typeZdtypeZget_abi_sizeof
bit_lengthalignr   insert_addrspace_convbitcastZPointerTyper   Z
make_arrayr   r   Zpopulate_arrayr   r   r   Z	_getvalue)r   r   ZarytyZarrlmodZ	constvalsZ
constarytyZconstaryr   rv   Zlldtyper   convZaddrspaceptrZgenptrZaryZkshapeZkstridesr   r9   r   make_constant_array  s<    

 z%CUDATargetContext.make_constant_arrayc                 C   s   t |dd }ddt|g}|j|}|dkrdt j||j	|t
jd}d|_d|_||_|j	jj}||t
jS )	r   zutf-8    $Z__conststring__Nr   r   T)r   Zmake_bytearrayencodejoinr   Zmangle_identifierglobalsr'   rp   rs   r   r   r   r   ru   rt   elementr   Z
as_pointer)r   modstringtextr<   rv   Zchartyr   r   r   insert_const_string:  s    
z%CUDATargetContext.insert_const_stringc                 C   s"   |j }| ||}| ||tjS )z
        Insert a constant string in the constant addresspace and return a
        generic i8 pointer to the data.

        This function attempts to deduplicate.
        )rn   r   r   r   r   )r   r   r   r   rv   r   r   r   insert_string_const_addrspaceP  s
    z/CUDATargetContext.insert_string_const_addrspacec                 C   s*   |j }|jj}t|||}|||gS )zI
        Perform addrspace conversion according to the NVVM spec
        )rn   rs   rt   r   r   r   )r   r   r   r   r   Z	base_typer   r   r   r   r   \  s    z'CUDATargetContext.insert_addrspace_convc                 C   s   dS )zRun O1 function passes
        Nr   )r   r   r   r   r   optimize_functione  s    z#CUDATargetContext.optimize_function)r2   )N)r-   r.   r/   Zimplement_powi_as_math_callZstrict_alignmentr4   propertyr8   r:   r=   r@   r   r   rP   r	   rV   rX   r]   rl   re   r   r   r   r   r   r0   r   r   r+   r   r1   @   s4   




 
"`,	r1   c                   @   s   e Zd ZdS )rW   N)r-   r.   r/   r   r   r   r   rW   s  s   rW   )!reZllvmlite.bindingZbindingr>   Zllvmliter   Z
numba.corer   r   r   r   r   Znumba.core.dispatcherr   Znumba.core.utilsr	   Znumba.core.baser
   Znumba.core.callconvr   r   r   Zcudadrvr   Z
numba.cudar   r   r   compileIZVALID_CHARSr1   rW   r   r   r   r   <module>   s    )  5