U
    ,dw                     @   s  d dl Zd dlZd dlZd dlZd dlZd dlmZmZm	Z	m
Z
mZmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z)m*Z* d dl+m
Z, d dl-m.Z. d dl-m/Z/ d dl0m1Z1 G dd dej2Z3G dd de4Z5G dd dZ6G dd deZ7G dd deZ8G dd deej2Z9dS )     N)config	serializesigutilstypestypingutils)Cache	CacheImpl)global_compiler_lock)
Dispatcher)NumbaPerformanceWarning)Purposetypeofget_current_device)wrap_arg)compile_cudaCUDACompiler)driver)get_context)get_cudalib)cuda_target)missing_launch_config_msgnormalize_kernel_dimensions)r   cuda)_dispatcher)warnc                
       s   e Zd ZdZed* fdd	Zedd Zed	d
 Zdd Z	edd Z
edd Ze fddZdd Zdd Zedd Zedd Zdd Zdd Zdd  Zd+d!d"Zd,d$d%Zd-d&d'Zd(d) Z  ZS )._Kernelz
    CUDA Kernel specialized for a given set of argument types. When called, this
    object launches the kernel on the device.
    NFTc              
      sL  |rt dt   d| _d | _|| _|| _|| _|| _|p@g | _	| j| j||
rVdndd}t
| jtj| j| j| j|||d}|j}| jj}|j}|j}||j|j|||||	\}}|sg }d| k| _| jr|tdd	d
 |D ]}|| q|j| _|j| _|j| _|| _|j| _|| _|j| _|j | _ g | _!g | _"g | _#d S )Nz,Cannot compile a device function as a kernelF   r   )debuglineinfofastmathopt)r    r!   inliner"   nvvm_optionsZcudaCGGetIntrinsicHandleZ	cudadevrtT)Zstatic)$RuntimeErrorsuper__init__Z
objectmodeentry_pointpy_funcargtypesr    r!   
extensionsr   r   voidtarget_context__code__co_filenameco_firstlinenoZprepare_cuda_kernellibraryfndescget_asm_strcooperativeappendr   Zadd_linking_filename
entry_name	signaturetype_annotation_type_annotation_codelibrarycall_helperenvironment_referenced_environmentsZliftedZreload_init)selfr*   r+   linkr    r!   r$   r"   r,   Zmax_registersr#   devicer%   cresZtgt_ctxcodefilenamelinenumlibkernelfilepath	__class__ 9/tmp/pip-unpacked-wheel-eu7e0c37/numba/cuda/dispatcher.pyr(   %   sf    


  z_Kernel.__init__c                 C   s   | j S N)r<   r@   rL   rL   rM   r2   x   s    z_Kernel.libraryc                 C   s   | j S rN   )r;   rO   rL   rL   rM   r:   |   s    z_Kernel.type_annotationc                 C   s   | j S rN   )r?   rO   rL   rL   rM   _find_referenced_environments   s    z%_Kernel._find_referenced_environmentsc                 C   s
   | j  S rN   )r.   codegenrO   rL   rL   rM   rQ      s    z_Kernel.codegenc                 C   s   t | jjS rN   )tupler9   argsrO   rL   rL   rM   argument_types   s    z_Kernel.argument_typesc	           
         sX   |  | }	t| |	  d|	_||	_||	_||	_d|	_||	_||	_	||	_
||	_||	_|	S )&
        Rebuild an instance.
        N)__new__r'   r(   r)   r5   r8   r9   r;   r<   r    r!   r=   r,   )
clsr5   r7   r9   codelibraryr    r!   r=   r,   instancerJ   rL   rM   _rebuild   s    
z_Kernel._rebuildc              
   C   s(   t | j| j| j| j| j| j| j| jdS )a  
        Reduce the instance for serialization.
        Compiled definitions are serialized in PTX form.
        Type annotation are discarded.
        Thread, block and shared memory configuration are serialized.
        Stream information is discarded.
        )r5   r7   r9   rX   r    r!   r=   r,   )	dictr5   r8   r9   r<   r    r!   r=   r,   rO   rL   rL   rM   _reduce_states   s    
   z_Kernel._reduce_statesc                 C   s   | j   dS )z7
        Force binding to current CUDA context
        N)r<   
get_cufuncrO   rL   rL   rM   bind   s    z_Kernel.bindc                 C   s   t  S )z,
        Get current active context
        r   rO   rL   rL   rM   rB      s    z_Kernel.devicec                 C   s   | j  jjS )zN
        The number of registers used by each thread for this kernel.
        )r<   r]   attrsregsrO   rL   rL   rM   regs_per_thread   s    z_Kernel.regs_per_threadc                 C   s
   | j  S )z6
        Returns the LLVM IR for this kernel.
        )r<   get_llvm_strrO   rL   rL   rM   inspect_llvm   s    z_Kernel.inspect_llvmc                 C   s   | j j|dS )z7
        Returns the PTX code for this kernel.
        cc)r<   r4   )r@   re   rL   rL   rM   inspect_asm   s    z_Kernel.inspect_asmc                 C   s
   | j  S )zp
        Returns the SASS code for this kernel.

        Requires nvdisasm to be available on the PATH.
        )r<   Zget_sassrO   rL   rL   rM   inspect_sass   s    z_Kernel.inspect_sassc                 C   sb   | j dkrtd|dkr tj}td| j| jf |d td|d t| j |d td|d dS )
        Produce a dump of the Python source of this function annotated with the
        corresponding Numba IR and type information. The dump is written to
        *file*, or *sys.stdout* if *file* is *None*.
        Nz Type annotation is not availablez%s %sfilezP--------------------------------------------------------------------------------zP================================================================================)r;   
ValueErrorsysstdoutprintr8   rT   )r@   rj   rL   rL   rM   inspect_types   s    
z_Kernel.inspect_typesr   c                 C   sH   t  }| j }t|tr*tdd |}||||}|jj	}|| S )a  
        Calculates the maximum number of blocks that can be launched for this
        kernel in a cooperative grid in the current context, for the given block
        and dynamic shared memory sizes.

        :param blockdim: Block dimensions, either as a scalar for a 1D block, or
                         a tuple for 2D or 3D blocks.
        :param dynsmemsize: Dynamic shared memory size in bytes.
        :return: The maximum number of blocks in the grid.
        c                 S   s   | | S rN   rL   )xyrL   rL   rM   <lambda>       z5_Kernel.max_cooperative_grid_blocks.<locals>.<lambda>)
r   r<   r]   
isinstancerR   	functoolsreduceZ$get_active_blocks_per_multiprocessorrB   ZMULTIPROCESSOR_COUNT)r@   blockdimZdynsmemsizectxcufuncZactive_per_smZsm_countrL   rL   rM   max_cooperative_grid_blocks   s    

z#_Kernel.max_cooperative_grid_blocksc                    s  | j   | jrT jd } j|\}}|ttjks>t	t }	|j
d|d g }
g }t| j|D ]\}}| ||||
| qhtjrtjd}nd }|r|jp|}tj jf|||||fd| ji | jrtt|	|| |	jdkr fddfddd	D }fd
dd	D }|	j}| j|\}}}|d krNd}n$|\}}}tj|}d|||f }d|||f }|rd||d f f|dd   }n|f}|| |
D ]}|  qd S )NZ__errcode__r   )streamr5   c                    s<    j d j| f \}}t }tt||| |jS )Nz%s__%s__)	moduleget_global_symbolr7   ctypesc_intr   device_to_host	addressofvalue)r7   Zmemszval)ry   rL   rM   load_symbol#  s    
z#_Kernel.launch.<locals>.load_symbolc                    s   g | ]} d | qS )tidrL   .0ir   rL   rM   
<listcomp>+  s     z"_Kernel.launch.<locals>.<listcomp>Zzyxc                    s   g | ]} d | qS )ctaidrL   r   r   rL   rM   r   ,  s      z"In function %r, file %s, line %s, z%stid=%s ctaid=%sz%s: %s   )r<   r]   r    r7   r|   r}   r~   sizeofr   AssertionErrormemsetziprT   _prepare_argsr   USE_NV_BINDINGZbindingZCUstreamhandleZlaunch_kernelr5   r   r   r   r=   Zget_exceptionospathabspath)r@   rS   griddimrw   r{   	sharedmemexcnameZexcmemZexcszZexcvalretr
kernelargstvZzero_streamZstream_handler   r   rD   ZexcclsZexc_argslocZlocinfosymrI   linenoprefixwbrL   )ry   r   rM   launch   sb    





z_Kernel.launchc                 C   sh  t | jD ]}|j||||d\}}q
t|tjrt|||}tj	}t
d}	t
d}
||j}||jj}t|}tjrt|}t
|}||	 ||
 || || || t|jD ]}|||j|  qt|jD ]}|||j|  qnPt|tjrBttd| |}|| n"|tjkrttt|tj}|| n|tjkrt|}|| n|tj krt!|}|| n|tj"krt#t|}|| n|tj$kr|t!|j% |t!|j& nL|tj'krL|t|j% |t|j& nt|tj(tj)frz|t*|tj+ nt|tj,rt|||}|j-}tjrt
t|}|| nt|tj.rt/|t/|kst0t1||D ]\}}| 2||||| qnVt|tj3rZz| 2|j|j4||| W n  t5k
rV   t5||Y nX n
t5||dS )zF
        Convert arguments to ctypes and append to kernelargs
        )r{   r   r   zc_%sN)6reversedr,   Zprepare_argsrt   r   Arrayr   Z	to_devicer~   	c_ssize_tc_void_psizeZdtypeitemsizer   Zdevice_pointerr   intr6   rangendimshapestridesZIntegergetattrZfloat16c_uint16npviewZuint16Zfloat64c_doubleZfloat32c_floatbooleanc_uint8Z	complex64realimagZ
complex128Z
NPDatetimeZNPTimedeltac_int64Zint64ZRecordZdevice_ctypes_pointerZ	BaseTuplelenr   r   r   Z
EnumMemberr   NotImplementedError)r@   tyr   r{   r   r   	extensionZdevaryZc_intpZmeminfoparentZnitemsr   ptrdataZaxcvalZdevrecr   r   rL   rL   rM   r   E  s    











    z_Kernel._prepare_args)	NFFFFNNTF)N)r   )r   r   )__name__
__module____qualname____doc__r
   r(   propertyr2   r:   rP   rQ   rT   classmethodrZ   r\   r^   rB   ra   rc   rf   rg   ro   rz   r   r   __classcell__rL   rL   rJ   rM   r      sF                R








Hr   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )ForAllc                 C   s6   |dk rt d| || _|| _|| _|| _|| _d S )Nr   z0Can't create ForAll with negative task count: %s)rk   
dispatcherntasksthread_per_blockr{   r   )r@   r   r   tpbr{   r   rL   rL   rM   r(     s    zForAll.__init__c                 G   s^   | j dkrd S | jjr| j}n| jj| }| |}| j | d | }|||| j| jf | S )Nr   r   )r   r   specialized
specialize_compute_thread_per_blockr{   r   )r@   rS   r   rw   r   rL   rL   rM   __call__  s    


zForAll.__call__c                 C   sZ   | j }|dkr|S t }tt|j }t|j d| j	dd}|j
f |\}}|S d S )Nr   i   )funcZb2d_funcZmemsizeZblocksizelimit)r   r   nextiter	overloadsvaluesr[   r<   r]   r   Zget_max_potential_block_size)r@   r   r   rx   rH   kwargs_rL   rL   rM   r     s    z ForAll._compute_thread_per_blockN)r   r   r   r(   r   r   rL   rL   rL   rM   r     s   
r   c                   @   s   e Zd Zdd Zdd ZdS )_LaunchConfigurationc           	      C   sd   || _ || _|| _|| _|| _tjr`d}|d |d  |d  }||k r`d| d}tt| d S )N   r   r      z
Grid size zB will likely result in GPU under-utilization due to low occupancy.)	r   r   rw   r{   r   r   ZCUDA_LOW_OCCUPANCY_WARNINGSr   r   )	r@   r   r   rw   r{   r   Zmin_grid_sizeZ	grid_sizemsgrL   rL   rM   r(     s    	z_LaunchConfiguration.__init__c                 G   s   | j || j| j| j| jS rN   )r   callr   rw   r{   r   r@   rS   rL   rL   rM   r     s     z_LaunchConfiguration.__call__N)r   r   r   r(   r   rL   rL   rL   rM   r     s   r   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )CUDACacheImplc                 C   s   |  S rN   )r\   )r@   rH   rL   rL   rM   rv     s    zCUDACacheImpl.reducec                 C   s   t jf |S rN   )r   rZ   )r@   r.   payloadrL   rL   rM   rebuild  s    zCUDACacheImpl.rebuildc                 C   s   dS )NTrL   )r@   rC   rL   rL   rM   check_cachable  s    zCUDACacheImpl.check_cachableN)r   r   r   rv   r   r   rL   rL   rL   rM   r     s   r   c                   @   s   e Zd ZdZeZdS )	CUDACachezS
    Implements a cache that saves and loads CUDA kernels and compile results.
    N)r   r   r   r   r   Z_impl_classrL   rL   rL   rM   r     s   r   c                       s  e Zd ZdZdZeZef fdd	Ze	dd Z
dd Zejd	d
d9ddZdd Zd:ddZe	dd Zdd Zdd Zdd Zdd Zdd Ze	dd Zd;d!d"Zd#d$ Zd%d& Zd'd( Zd)d* Zd<d+d,Zd=d-d.Zd>d/d0Zd?d1d2Z d3d4 Z!e"d5d6 Z#d7d8 Z$  Z%S )@CUDADispatchera  
    CUDA Dispatcher object. When configured and called, the dispatcher will
    specialize itself for the given arguments (if no suitable specialized
    version already exists) & compute capability, and launch on the device
    associated with the current context.

    Dispatcher objects are not to be constructed by the user, but instead are
    created using the :func:`numba.cuda.jit` decorator.
    Fc                    s"   t  j|||d d| _i | _d S )N)targetoptionspipeline_classF)r'   r(   _specializedspecializations)r@   r*   r   r   rJ   rL   rM   r(     s
    
	zCUDADispatcher.__init__c                 C   s
   t | S rN   )
cuda_typesr   rO   rL   rL   rM   _numba_type_*  s    zCUDADispatcher._numba_type_c                 C   s   t | j| _d S rN   )r   r*   _cacherO   rL   rL   rM   enable_caching.  s    zCUDADispatcher.enable_cachingr   )maxsizer   c                 C   s   t ||\}}t| ||||S rN   )r   r   )r@   r   rw   r{   r   rL   rL   rM   	configure1  s    zCUDADispatcher.configurec                 C   s   t |dkrtd| j| S )N)r   r      z.must specify at least the griddim and blockdim)r   rk   r   r   rL   rL   rM   __getitem__6  s    zCUDADispatcher.__getitem__c                 C   s   t | ||||dS )a3  Returns a 1D-configured dispatcher for a given number of tasks.

        This assumes that:

        - the kernel maps the Global Thread ID ``cuda.grid(1)`` to tasks on a
          1-1 basis.
        - the kernel checks that the Global Thread ID is upper-bounded by
          ``ntasks``, and does nothing if it is not.

        :param ntasks: The number of tasks.
        :param tpb: The size of a block. An appropriate value is chosen if this
                    parameter is not supplied.
        :param stream: The stream on which the configured dispatcher will be
                       launched.
        :param sharedmem: The number of bytes of dynamic shared memory required
                          by the kernel.
        :return: A configured dispatcher, ready to launch on a set of
                 arguments.)r   r{   r   )r   )r@   r   r   r{   r   rL   rL   rM   forall;  s    zCUDADispatcher.forallc                 C   s   | j dS )aS  
        A list of objects that must have a `prepare_args` function. When a
        specialized kernel is called, each argument will be passed through
        to the `prepare_args` (from the last object in this list to the
        first). The arguments to `prepare_args` are:

        - `ty` the numba type of the argument
        - `val` the argument value itself
        - `stream` the CUDA stream used for the current call to the kernel
        - `retr` a list of zero-arg functions that you may want to append
          post-call cleanup work to.

        The `prepare_args` function must return a tuple `(ty, val)`, which
        will be passed in turn to the next right-most `extension`. After all
        the extensions have been called, the resulting `(ty, val)` will be
        passed into Numba's default argument marshalling logic.
        r,   )r   getrO   rL   rL   rM   r,   Q  s    zCUDADispatcher.extensionsc                 O   s   t td S rN   )rk   r   )r@   rS   r   rL   rL   rM   r   f  s    zCUDADispatcher.__call__c                 C   sB   | j rtt| j }ntjj| f| }|||||| dS )zJ
        Compile if necessary and invoke this kernel with *args*.
        N)	r   r   r   r   r   r   r   Z
_cuda_callr   )r@   rS   r   rw   r{   r   rH   rL   rL   rM   r   j  s    zCUDADispatcher.callc                    s(   |rt  fdd|D } t|S )Nc                    s   g | ]}  |qS rL   )typeof_pyvalr   arO   rL   rM   r   x  s     z4CUDADispatcher._compile_for_args.<locals>.<listcomp>)r   compilerR   )r@   rS   kwsr+   rL   rO   rM   _compile_for_argsu  s    z CUDADispatcher._compile_for_argsc                 C   sN   zt |tjW S  tk
rH   t|rBt tj|ddtj Y S  Y nX d S )NF)sync)r   r   argumentrk   r   Zis_cuda_arrayZas_cuda_array)r@   r   rL   rL   rM   r   {  s    
zCUDADispatcher.typeof_pyvalc                    s   t  j}t fdd|D } jr,td j||f}|rD|S  j}t j	|d}|
| |  d|_| j||f< |S )zd
        Create a new instance of this dispatcher specialized for the given
        *args*.
        c                    s   g | ]} j |qS rL   )Z	typingctxZresolve_argument_typer   rO   rL   rM   r     s     z-CUDADispatcher.specialize.<locals>.<listcomp>zDispatcher already specialized)r   T)r   compute_capabilityrR   r   r&   r   r   r   r   r*   r   Zdisable_compiler   )r@   rS   re   r+   Zspecializationr   rL   rO   rM   r     s$    
zCUDADispatcher.specializec                 C   s   | j S )z>
        True if the Dispatcher has been specialized.
        )r   rO   rL   rL   rM   r     s    zCUDADispatcher.specializedNc                 C   sH   |dk	r| j |j jS | jr0tt| j  jS dd | j  D S dS )a  
        Returns the number of registers used by each thread in this kernel for
        the device in the current context.

        :param signature: The signature of the compiled kernel to get register
                          usage for. This may be omitted for a specialized
                          kernel.
        :return: The number of registers used by the compiled variant of the
                 kernel for the given signature and current device.
        Nc                 S   s   i | ]\}}||j qS rL   )ra   r   sigoverloadrL   rL   rM   
<dictcomp>  s    z6CUDADispatcher.get_regs_per_thread.<locals>.<dictcomp>)r   rS   ra   r   r   r   r   itemsr@   r9   rL   rL   rM   get_regs_per_thread  s    z"CUDADispatcher.get_regs_per_threadc                 C   sP   | j r| t| | jj}d|}tj||| jd}t	
| j}||||fS )z
        Get a typing.ConcreteTemplate for this dispatcher and the given
        *args* and *kws* types.  This allows resolution of the return type.

        A (template, pysig, args, kws) tuple is returned.
        zCallTemplate({0}))keyZ
signatures)_can_compilecompile_devicerR   r*   r   formatr   Zmake_concrete_templateZnopython_signaturesr   Zpysignature)r@   rS   r   	func_namer7   Zcall_templateZpysigrL   rL   rM   get_call_template  s    
  z CUDADispatcher.get_call_templatec              
   C   s   || j kr| j | jd}| jd}| jd}|| jdrHdnd|d}t| jd|||||d	}|| j |< |j|j|j	|j
g W 5 Q R X n
| j | }|S )
zCompile the device function for the given argument types.

        Each signature is compiled once by caching the compiled function inside
        this object.

        Returns the `CompileResult`.
        r    r$   r"   r#   r   r   )r    r#   r"   N)r    r$   r"   r%   )r   Z_compiling_counterr   r   r   r*   r.   Zinsert_user_functionr)   r3   r2   )r@   rS   r    r$   r"   r%   rC   rL   rL   rM   r    s,    




zCUDADispatcher.compile_devicec                 C   s,   dd |D }| j ||dd || j|< d S )Nc                 S   s   g | ]
}|j qS rL   )_coder   rL   rL   rM   r     s     z/CUDADispatcher.add_overload.<locals>.<listcomp>Tr   )Z_insertr   )r@   rH   r+   Zc_sigrL   rL   rM   add_overload  s    zCUDADispatcher.add_overloadc                 C   s   t |\}}|dks$|tjks$t| jr<tt| j	 S | j
|}|dk	rT|S | j|| j}|dk	r| j|  d7  < nH| j|  d7  < | jstdt| j|f| j}|  | j|| | || |S )z
        Compile and bind to the current context a version of this kernel
        specialized for the given signature.
        Nr   zCompilation disabled)r   Znormalize_signaturer   noner   r   r   r   r   r   r   r   Zload_overloadZ	targetctxZ_cache_hitsZ_cache_missesr  r&   r   r*   r   r^   Zsave_overloadr  )r@   r   r+   return_typerH   rL   rL   rM   r     s$    zCUDADispatcher.compilec                 C   sh   | j d}|dk	r8|r(| j| j S | j|  S n,|rPdd | j D S dd | j D S dS )z
        Return the LLVM IR for this kernel.

        :param signature: A tuple of argument types.
        :return: The LLVM IR for the given signature, or a dict of LLVM IR
                 for all previously-encountered signatures.

        rB   Nc                 S   s   i | ]\}}||j  qS rL   )r2   rb   r   rL   rL   rM   r  4  s    z/CUDADispatcher.inspect_llvm.<locals>.<dictcomp>c                 S   s   i | ]\}}||  qS rL   )rc   r   rL   rL   rM   r  7  s    )r   r   r   r2   rb   rc   r  r@   r9   rB   rL   rL   rM   rc   #  s    	zCUDADispatcher.inspect_llvmc                    s|   t  j | jd}|dk	rD|r2| j| j S | j|  S n4|r` fdd| j D S  fdd| j D S dS )a+  
        Return this kernel's PTX assembly code for for the device in the
        current context.

        :param signature: A tuple of argument types.
        :return: The PTX code for the given signature, or a dict of PTX codes
                 for all previously-encountered signatures.
        rB   Nc                    s   i | ]\}}||j  qS rL   )r2   r4   r   rd   rL   rM   r  L  s    z.CUDADispatcher.inspect_asm.<locals>.<dictcomp>c                    s   i | ]\}}||  qS rL   )rf   r   rd   rL   rM   r  O  s    )	r   r   r   r   r   r2   r4   rf   r  r  rL   rd   rM   rf   :  s    	

zCUDADispatcher.inspect_asmc                 C   sB   | j drtd|dk	r*| j|  S dd | j D S dS )a  
        Return this kernel's SASS assembly code for for the device in the
        current context.

        :param signature: A tuple of argument types.
        :return: The SASS code for the given signature, or a dict of SASS codes
                 for all previously-encountered signatures.

        SASS for the device in the current context is returned.

        Requires nvdisasm to be available on the PATH.
        rB   z(Cannot inspect SASS of a device functionNc                 S   s   i | ]\}}||  qS rL   )rg   )r   r   defnrL   rL   rM   r  e  s    z/CUDADispatcher.inspect_sass.<locals>.<dictcomp>)r   r   r&   r   rg   r  r  rL   rL   rM   rg   R  s    zCUDADispatcher.inspect_sassc                 C   s2   |dkrt j}| j D ]\}}|j|d qdS )rh   Nri   )rl   rm   r   r  ro   )r@   rj   r   r  rL   rL   rM   ro   h  s    zCUDADispatcher.inspect_typesc                 C   s   | j  D ]}|  q
d S rN   )r   r   r^   )r@   r  rL   rL   rM   r^   t  s    zCUDADispatcher.bindc                 C   s   | ||}|S )rU   rL   )rW   r*   r   rY   rL   rL   rM   rZ   x  s    
zCUDADispatcher._rebuildc                 C   s   t | j| jdS )zd
        Reduce the instance for serialization.
        Compiled definitions are discarded.
        )r*   r   )r[   r*   r   rO   rL   rL   rM   r\     s    zCUDADispatcher._reduce_states)r   r   )r   r   r   )N)N)N)N)N)&r   r   r   r   Z
_fold_argsr   Ztargetdescrr   r(   r   r   r   ru   	lru_cacher   r   r   r,   r   r   r   r   r   r   r  r
  r  r  r   rc   rf   rg   ro   r^   r   rZ   r\   r   rL   rL   rJ   rM   r     sB   





$$




r   ):Znumpyr   r   rl   r~   ru   Z
numba.corer   r   r   r   r   r   Znumba.core.cachingr   r	   Znumba.core.compiler_lockr
   Znumba.core.dispatcherr   Znumba.core.errorsr   Znumba.core.typing.typeofr   r   Znumba.cuda.apir   Znumba.cuda.argsr   Znumba.cuda.compilerr   r   Znumba.cuda.cudadrvr   Znumba.cuda.cudadrv.devicesr   Znumba.cuda.cudadrv.libsr   Znumba.cuda.descriptorr   Znumba.cuda.errorsr   r   Z
numba.cudar   Znumbar   r   warningsr   ZReduceMixinr   objectr   r   r   r   r   rL   rL   rL   rM   <module>   s>       .