U
    ,d=-                     @   s~  d dl mZ d dlmZmZmZmZmZ d dlm	Z	m
Z
mZmZmZmZ d dlmZ d dlmZmZmZmZ d dlmZmZ d dlmZmZmZ d dlmZ d d	lm Z  d
d Z!G dd deZ"G dd deZ#dd Z$edddG dd deZ%edddG dd deZ&edddG dd deZ'G dd de
Z(ed(ddZ)ed)d d!Z*d*d"d#Z+d$d% Z,G d&d' d'e-Z.dS )+    )ConcreteTemplate)typestypingfuncdescconfigcompiler)sanitize_compile_result_entriesCompilerBaseDefaultPassBuilderFlagsOptionCompileResult)global_compiler_lock)LoweringPassAnalysisPassPassManagerregister_pass)NumbaInvalidConfigWarningTypingError)IRLegalizationNativeLoweringAnnotateTypes)warn)get_current_devicec                 C   s"   | d krd S t | tst| S d S N)
isinstancedictAssertionError)x r   7/tmp/pip-unpacked-wheel-eu7e0c37/numba/cuda/compiler.py_nvvm_options_type   s    r!   c                   @   s   e Zd ZeedddZdS )	CUDAFlagsNzNVVM options)typedefaultdoc)__name__
__module____qualname__r   r!   nvvm_optionsr   r   r   r    r"      s
   r"   c                   @   s   e Zd Zedd ZdS )CUDACompileResultc                 C   s   t | S r   )idselfr   r   r    entry_point2   s    zCUDACompileResult.entry_pointN)r&   r'   r(   propertyr.   r   r   r   r    r*   1   s   r*   c                  K   s   t | } tf | S r   )r   r*   )entriesr   r   r    cuda_compile_result7   s    r1   TF)Zmutates_CFGZanalysis_onlyc                   @   s    e Zd ZdZdd Zdd ZdS )CUDABackendZcuda_backendc                 C   s   t |  d S r   r   __init__r,   r   r   r    r4   A   s    zCUDABackend.__init__c              
   C   sJ   |d }t j|jf|j }t|j|j|jj|j	|j
|j||jd|_dS )zH
        Back-end: Packages lowering output in a compile result
        cr)typing_contexttarget_contextZtyping_errortype_annotationlibrarycall_helper	signaturefndescT)r   r;   return_typeargsr1   	typingctx	targetctxstatusZfail_reasonr8   r9   r:   r<   r5   )r-   stateZloweredr;   r   r   r    run_passD   s    
zCUDABackend.run_passNr&   r'   r(   _namer4   rC   r   r   r   r    r2   <   s   r2   c                   @   s$   e Zd ZdZdZdd Zdd ZdS )CreateLibraryz
    Create a CUDACodeLibrary for the NativeLowering pass to populate. The
    NativeLowering pass will create a code library if none exists, but we need
    to set it up with nvvm_options from the flags if they are present.
    create_libraryc                 C   s   t |  d S r   r3   r,   r   r   r    r4   b   s    zCreateLibrary.__init__c                 C   s8   |j  }|jj}|jj}|j||d|_|j  dS )N)r)   T)	r@   codegenZfunc_idZfunc_qualnameflagsr)   rG   r9   Zenable_object_caching)r-   rB   rH   namer)   r   r   r    rC   e   s    

zCreateLibrary.run_passN)r&   r'   r(   __doc__rE   r4   rC   r   r   r   r    rF   X   s   rF   c                   @   s    e Zd ZdZdd Zdd ZdS )CUDALegalizationZcuda_legalizationc                 C   s   t |  d S r   )r   r4   r,   r   r   r    r4   u   s    zCUDALegalization.__init__c                    sX   ddl m} | jrdS |j} fdd | D ]\}t|tjr4 |j q4dS )Nr   )NVVMFc                    sT   t | tjtjfr& d}t|n*t | tjrP| j D ]} |d j q<d S )Nz is a char sequence type. This type is not supported with CUDA toolkit versions < 11.2. To use this type, you need to update your CUDA toolkit - try 'conda install cudatoolkit=11' if you are using conda to manage your environment.   )	r   r   ZUnicodeCharSeqZCharSeqr   ZRecordfieldsitemsr#   )dtypemsgZsubdtypecheck_dtypekr   r    rT      s    

z.CUDALegalization.run_pass.<locals>.check_dtype)	Znumba.cuda.cudadrv.nvvmrM   Z	is_nvvm70typemaprP   r   r   ArrayrQ   )r-   rB   rM   Ztypmapvr   rS   r    rC   x   s    zCUDALegalization.run_passNrD   r   r   r   r    rL   p   s   rL   c                   @   s   e Zd Zdd Zdd ZdS )CUDACompilerc                 C   st   t }td}|| j}|j|j || j}|j|j |td | 	| j}|j|j |
  |gS )NcudazCUDA legalization)r
   r   Zdefine_untyped_pipelinerB   ZpassesextendZdefine_typed_pipelineadd_passrL   define_cuda_lowering_pipelinefinalize)r-   ZdpbpmZuntyped_passesZtyped_passesZlowering_passesr   r   r    define_pipelines   s    zCUDACompiler.define_pipelinesc                 C   sP   t d}|td |td |td |td |td |  |S )NZcuda_loweringz$ensure IR is legal prior to loweringzannotate typeszcreate libraryznative loweringzcuda backend)r   r\   r   r   rF   r   r2   r^   )r-   rB   r_   r   r   r    r]      s    z*CUDACompiler.define_cuda_lowering_pipelineN)r&   r'   r(   r`   r]   r   r   r   r    rY      s   rY   Nc                 C   s   ddl m} |j}	|j}
t }d|_d|_d|_|s8|rFd|_d|_	nd|_	|rVd|_
|r`d|_|rj||_ddlm} |d  tj|	|
| |||i td	}W 5 Q R X |j}|  |S )
NrN   cuda_targetTpythonZnumpyr   )target_overriderZ   )r?   r@   funcr>   r=   rI   localsZpipeline_class)
descriptorrb   r6   r7   r"   Z
no_compileZno_cpython_wrapperZno_cfunc_wrapperZ	debuginfoZerror_modelZforceinlinefastmathr)   Znumba.core.target_extensionrd   r   Zcompile_extrarY   r9   r^   )pyfuncr=   r>   debuglineinfoinlinerh   r)   rb   r?   r@   rI   rd   cresr9   r   r   r    compile_cuda   s>    
	rn   c              	   C   s   |r|rd}t t| ||||r&dndd}	t| d|||||	d}
|
jj}|rX|
j}n6|
j}|
jj}t	|
jj
}||
j|
j||	||\}}|ptj}|j|d}||fS )a  Compile a Python function to PTX for a given set of argument types.

    :param pyfunc: The Python function to compile.
    :param args: A tuple of argument types to compile for.
    :param debug: Whether to include debug info in the generated PTX.
    :type debug: bool
    :param lineinfo: Whether to include a line mapping from the generated PTX
                     to the source code. Usually this is used with optimized
                     code (since debug mode would automatically include this),
                     so we want debug info in the LLVM but only the line
                     mapping in the final PTX.
    :type lineinfo: bool
    :param device: Whether to compile a device function. Defaults to ``False``,
                   to compile global kernel functions.
    :type device: bool
    :param fastmath: Whether to enable fast math flags (ftz=1, prec_sqrt=0,
                     prec_div=, and fma=1)
    :type fastmath: bool
    :param cc: Compute capability to compile for, as a tuple ``(MAJOR, MINOR)``.
               Defaults to ``(5, 3)``.
    :type cc: tuple
    :param opt: Enable optimizations. Defaults to ``True``.
    :type opt: bool
    :return: (ptx, resty): The PTX code and inferred return type
    :rtype: tuple
    z{debug=True with opt=True (the default) is not supported by CUDA. This may result in a crash - set debug=False or opt=False.   r   )rj   rk   rh   optN)rj   rk   rh   r)   )cc)r   r   rn   r;   r=   r9   r7   r8   filenameintlinenumZprepare_cuda_kernelr<   r   ZCUDA_DEFAULT_PTX_CCZget_asm_str)ri   r>   rj   rk   devicerh   rq   rp   rR   r)   rm   ZrestylibZtgtrr   rt   kernelZptxr   r   r    compile_ptx   s4    
  
rx   c              
   C   s    t  j}t| ||||||ddS )zCompile a Python function to PTX for a given set of argument types for
    the current device's compute capabilility. This calls :func:`compile_ptx`
    with an appropriate ``cc`` value for the current device.T)rj   rk   ru   rh   rq   rp   )r   Zcompute_capabilityrx   )ri   r>   rj   rk   ru   rh   rp   rq   r   r   r    compile_ptx_for_current_device  s    
   ry   c                    st   ddl m} |j}|j}tj|f| t|  G  fdddt}tj	| ||d}|
 | |
 |  S )NrN   ra   c                       s   e Zd Z ZgZdS )z9declare_device_function.<locals>.device_function_templateN)r&   r'   r(   keyZcasesr   Zextfnsigr   r    device_function_template0  s   r}   )rJ   restypeargtypes)rg   rb   r6   r7   r   r;   ExternFunctionr   r   ZExternalFunctionDescriptorZinsert_user_function)rJ   r~   r   rb   r?   r@   r}   r<   r   r{   r    declare_device_function)  s    
  r   c                   @   s   e Zd Zdd ZdS )r   c                 C   s   || _ || _d S r   )rJ   r|   )r-   rJ   r|   r   r   r    r4   <  s    zExternFunction.__init__N)r&   r'   r(   r4   r   r   r   r    r   ;  s   r   )FFFFN)FFFFNT)FFFFT)/Znumba.core.typing.templatesr   Z
numba.corer   r   r   r   r   Znumba.core.compilerr   r	   r
   r   r   r   Znumba.core.compiler_lockr   Znumba.core.compiler_machineryr   r   r   r   Znumba.core.errorsr   r   Znumba.core.typed_passesr   r   r   warningsr   Znumba.cuda.apir   r!   r"   r*   r1   r2   rF   rL   rY   rn   rx   ry   r   objectr   r   r   r   r    <module>   sL    	


""      -      ;      

