U
    jgm0                     @   sP  d Z ddlmZ ddlZddlZddlZddlmZ ddlZ	ddl
mZ ddlmZ G dd	 d	eZG d
d dZG dd dZG dd deZG dd deZG dd deZe Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Ze Z G dd deZ!G dd deZ"G dd deZ#edd Z$dS )zf
Implements the cuda module as called from within an executing kernel
(@cuda.jit-decorated function).
    )contextmanagerN)types)numpy_support   )vector_typesc                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )Dim3z;
    Used to implement thread/block indices/dimensions
    c                 C   s   || _ || _|| _d S Nxyz)selfr
   r   r    r   B/tmp/pip-unpacked-wheel-qtpwf23r/numba/cuda/simulator/kernelapi.py__init__   s    zDim3.__init__c                 C   s   d| j | j| jf S )Nz(%s, %s, %s)r	   r   r   r   r   __str__   s    zDim3.__str__c                 C   s   d| j | j| jf S )NzDim3(%s, %s, %s)r	   r   r   r   r   __repr__   s    zDim3.__repr__c                 c   s   | j V  | jV  | jV  d S r   r	   r   r   r   r   __iter__!   s    zDim3.__iter__N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r      s
   r   c                   @   s   e Zd ZdZdd ZdS )	GridGroupz+
    Used to implement the grid group.
    c                 C   s   t    d S r   	threadingcurrent_threadsyncthreadsr   r   r   r   sync,   s    zGridGroup.syncN)r   r   r   r   r   r   r   r   r   r   '   s   r   c                   @   s   e Zd ZdZdd ZdS )
FakeCUDACgz!
    CUDA Cooperative Groups
    c                 C   s   t  S r   )r   r   r   r   r   	this_grid7   s    zFakeCUDACg.this_gridN)r   r   r   r   r    r   r   r   r   r   3   s   r   c                   @   s   e Zd ZdZdd ZdS )FakeCUDALocalz
    CUDA Local arrays
    c                 C   s"   t |tjrt|}t||S r   )
isinstancer   Typer   as_dtypenpempty)r   shapedtyper   r   r   array?   s    
zFakeCUDALocal.arrayN)r   r   r   r   r)   r   r   r   r   r!   ;   s   r!   c                   @   s   e Zd ZdZdd ZdS )FakeCUDAConstz
    CUDA Const arrays
    c                 C   s   |S r   r   )r   Zaryr   r   r   
array_likeI   s    zFakeCUDAConst.array_likeN)r   r   r   r   r+   r   r   r   r   r*   E   s   r*   c                   @   s    e Zd ZdZdd Zdd ZdS )FakeCUDAShareda  
    CUDA Shared arrays.

    Limitations: assumes that only one call to cuda.shared.array is on a line,
    and that that line is only executed once per thread. i.e.::

        a = cuda.shared.array(...); b = cuda.shared.array(...)

    will erroneously alias a and b, and::

        for i in range(10):
            sharedarrs[i] = cuda.shared.array(...)

    will alias all arrays created at that point (though it is not certain that
    this would be supported by Numba anyway).
    c                 C   s"   i | _ || _tj|tjd| _d S N)r(   )_allocations_dynshared_sizer%   zerosbyte
_dynshared)r   dynshared_sizer   r   r   r   _   s    zFakeCUDAShared.__init__c                 C   s   t |tjrt|}|dkr>| j|j }tj| j	j
||dS tt }|d dd }| j|}|d krt||}|| j|< |S )Nr   )r(   count   )r"   r   r#   r   r$   r/   itemsizer%   Z
frombufferr2   data	tracebackextract_stacksys	_getframer.   getr&   )r   r'   r(   r4   stackZcallerresr   r   r   r)   d   s    

zFakeCUDAShared.arrayN)r   r   r   r   r   r)   r   r   r   r   r,   M   s   r,   c                   @   s|   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdS )FakeCUDAAtomicc              	   C   s,   t  || }||  |7  < W 5 Q R X |S r   )addlockr   r)   indexvaloldr   r   r   add   s    zFakeCUDAAtomic.addc              	   C   s,   t  || }||  |8  < W 5 Q R X |S r   )sublockrB   r   r   r   sub   s    zFakeCUDAAtomic.subc              	   C   s,   t  || }||  |M  < W 5 Q R X |S r   )andlockrB   r   r   r   and_   s    zFakeCUDAAtomic.and_c              	   C   s,   t  || }||  |O  < W 5 Q R X |S r   )orlockrB   r   r   r   or_   s    zFakeCUDAAtomic.or_c              	   C   s,   t  || }||  |N  < W 5 Q R X |S r   )xorlockrB   r   r   r   xor   s    zFakeCUDAAtomic.xorc              	   C   s>   t 0 || }||kr d||< n||  d7  < W 5 Q R X |S Nr   r   )inclockrB   r   r   r   inc   s    
zFakeCUDAAtomic.incc              	   C   sF   t 8 || }|dks||kr(|||< n||  d8  < W 5 Q R X |S rO   )declockrB   r   r   r   dec   s    
zFakeCUDAAtomic.decc              	   C   s$   t  || }|||< W 5 Q R X |S r   )exchlockrB   r   r   r   exch   s    zFakeCUDAAtomic.exchc              	   C   s*   t  || }t||||< W 5 Q R X |S r   )maxlockmaxrB   r   r   r   rW      s    zFakeCUDAAtomic.maxc              	   C   s*   t  || }t||||< W 5 Q R X |S r   )minlockminrB   r   r   r   rY      s    zFakeCUDAAtomic.minc              	   C   s2   t $ || }t|| |g||< W 5 Q R X |S r   )rV   r%   nanmaxrB   r   r   r   rZ      s     zFakeCUDAAtomic.nanmaxc              	   C   s2   t $ || }t|| |g||< W 5 Q R X |S r   )rX   r%   nanminrB   r   r   r   r[      s     zFakeCUDAAtomic.nanminc              
   C   sB   t 4 d|j }|| }||kr(|||< |W  5 Q R  S Q R X d S )N)r   )compare_and_swaplockndim)r   r)   rE   rD   rC   loadedr   r   r   compare_and_swap   s    
zFakeCUDAAtomic.compare_and_swapc              
   C   s8   t * || }||kr|||< |W  5 Q R  S Q R X d S r   )caslock)r   r)   rC   rE   rD   r^   r   r   r   cas   s
    zFakeCUDAAtomic.casN)r   r   r   rF   rH   rJ   rL   rN   rQ   rS   rU   rW   rY   rZ   r[   r_   ra   r   r   r   r   r@      s   		r@   c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Zd7d8 Zd9d: Zd;d< Z d=S )>FakeCUDAFp16c                 C   s   || S r   r   r   abr   r   r   hadd   s    zFakeCUDAFp16.haddc                 C   s   || S r   r   rc   r   r   r   hsub   s    zFakeCUDAFp16.hsubc                 C   s   || S r   r   rc   r   r   r   hmul   s    zFakeCUDAFp16.hmulc                 C   s   || S r   r   rc   r   r   r   hdiv   s    zFakeCUDAFp16.hdivc                 C   s   || | S r   r   r   rd   re   cr   r   r   hfma   s    zFakeCUDAFp16.hfmac                 C   s   | S r   r   r   rd   r   r   r   hneg   s    zFakeCUDAFp16.hnegc                 C   s   t |S r   )absrm   r   r   r   habs   s    zFakeCUDAFp16.habsc                 C   s   t j|t jdS r-   )r%   sinfloat16r   r
   r   r   r   hsin   s    zFakeCUDAFp16.hsinc                 C   s   t j|t jdS r-   )r%   cosrr   rs   r   r   r   hcos  s    zFakeCUDAFp16.hcosc                 C   s   t j|t jdS r-   )r%   logrr   rs   r   r   r   hlog  s    zFakeCUDAFp16.hlogc                 C   s   t j|t jdS r-   )r%   log2rr   rs   r   r   r   hlog2  s    zFakeCUDAFp16.hlog2c                 C   s   t j|t jdS r-   )r%   log10rr   rs   r   r   r   hlog10  s    zFakeCUDAFp16.hlog10c                 C   s   t j|t jdS r-   )r%   exprr   rs   r   r   r   hexp  s    zFakeCUDAFp16.hexpc                 C   s   t j|t jdS r-   )r%   Zexp2rr   rs   r   r   r   hexp2  s    zFakeCUDAFp16.hexp2c                 C   s   t d| S )N
   r%   rr   rs   r   r   r   hexp10  s    zFakeCUDAFp16.hexp10c                 C   s   t j|t jdS r-   )r%   sqrtrr   rs   r   r   r   hsqrt  s    zFakeCUDAFp16.hsqrtc                 C   s   t |d S )Ng      r   rs   r   r   r   hrsqrt  s    zFakeCUDAFp16.hrsqrtc                 C   s   t j|t jdS r-   r%   ceilrr   rs   r   r   r   hceil  s    zFakeCUDAFp16.hceilc                 C   s   t j|t jdS r-   r   rs   r   r   r   hfloor   s    zFakeCUDAFp16.hfloorc                 C   s   t j|t jdS r-   )r%   Z
reciprocalrr   rs   r   r   r   hrcp#  s    zFakeCUDAFp16.hrcpc                 C   s   t j|t jdS r-   )r%   truncrr   rs   r   r   r   htrunc&  s    zFakeCUDAFp16.htruncc                 C   s   t j|t jdS r-   )r%   Zrintrr   rs   r   r   r   hrint)  s    zFakeCUDAFp16.hrintc                 C   s   ||kS r   r   rc   r   r   r   heq,  s    zFakeCUDAFp16.heqc                 C   s   ||kS r   r   rc   r   r   r   hne/  s    zFakeCUDAFp16.hnec                 C   s   ||kS r   r   rc   r   r   r   hge2  s    zFakeCUDAFp16.hgec                 C   s   ||kS r   r   rc   r   r   r   hgt5  s    zFakeCUDAFp16.hgtc                 C   s   ||kS r   r   rc   r   r   r   hle8  s    zFakeCUDAFp16.hlec                 C   s   ||k S r   r   rc   r   r   r   hlt;  s    zFakeCUDAFp16.hltc                 C   s
   t ||S r   )rW   rc   r   r   r   hmax>  s    zFakeCUDAFp16.hmaxc                 C   s
   t ||S r   )rY   rc   r   r   r   hminA  s    zFakeCUDAFp16.hminN)!r   r   r   rf   rg   rh   ri   rl   rn   rp   rt   rv   rx   rz   r|   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rb      s<   rb   c                   @   s  e Zd ZdZdd Zedd Zedd Zedd	 Zed
d Z	edd Z
edd Zedd Zedd Zedd Zedd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8S )9FakeCUDAModulea7  
    An instance of this class will be injected into the __globals__ for an
    executing function in order to implement calls to cuda.*. This will fail to
    work correctly if the user code does::

        from numba import cuda as something_else

    In other words, the CUDA module must be called cuda.
    c                 C   s   t | | _t | | _t | _t | _t|| _t	 | _
t | _t | _t D ],\}}t| || |jD ]}t| || qhqNd S r   )r   gridDimblockDimr   _cgr!   _localr,   _sharedr*   _constr@   _atomicrb   _fp16r   itemssetattraliases)r   Zgrid_dimZ	block_dimr3   nameZsvtyaliasr   r   r   r   P  s    



zFakeCUDAModule.__init__c                 C   s   | j S r   )r   r   r   r   r   cgc  s    zFakeCUDAModule.cgc                 C   s   | j S r   )r   r   r   r   r   localg  s    zFakeCUDAModule.localc                 C   s   | j S r   )r   r   r   r   r   sharedk  s    zFakeCUDAModule.sharedc                 C   s   | j S r   )r   r   r   r   r   consto  s    zFakeCUDAModule.constc                 C   s   | j S r   )r   r   r   r   r   atomics  s    zFakeCUDAModule.atomicc                 C   s   | j S r   )r   r   r   r   r   fp16w  s    zFakeCUDAModule.fp16c                 C   s
   t  jS r   )r   r   	threadIdxr   r   r   r   r   {  s    zFakeCUDAModule.threadIdxc                 C   s
   t  jS r   )r   r   blockIdxr   r   r   r   r     s    zFakeCUDAModule.blockIdxc                 C   s   dS N    r   r   r   r   r   warpsize  s    zFakeCUDAModule.warpsizec                 C   s   t  jd S r   )r   r   Z	thread_idr   r   r   r   laneid  s    zFakeCUDAModule.laneidc                 C   s   t    d S r   r   r   r   r   r   r     s    zFakeCUDAModule.syncthreadsc                 C   s   d S r   r   r   r   r   r   threadfence  s    zFakeCUDAModule.threadfencec                 C   s   d S r   r   r   r   r   r   threadfence_block  s    z FakeCUDAModule.threadfence_blockc                 C   s   d S r   r   r   r   r   r   threadfence_system  s    z!FakeCUDAModule.threadfence_systemc                 C   s   t  |S r   )r   r   syncthreads_countr   rD   r   r   r   r     s    z FakeCUDAModule.syncthreads_countc                 C   s   t  |S r   )r   r   syncthreads_andr   r   r   r   r     s    zFakeCUDAModule.syncthreads_andc                 C   s   t  |S r   )r   r   syncthreads_orr   r   r   r   r     s    zFakeCUDAModule.syncthreads_orc                 C   s   t |dS )N1)binr4   r   r   r   r   popc  s    zFakeCUDAModule.popcc                 C   s   || | S r   r   rj   r   r   r   fma  s    zFakeCUDAModule.fmac                 C   s   |d S )NgUUUUUU?r   rm   r   r   r   cbrt  s    zFakeCUDAModule.cbrtc                 C   s   t d|d d d dS )N{:032b}r6   )intformatr   r   r   r   brev  s    zFakeCUDAModule.brevc                 C   s    d |}t|t|d S )Nr   0)r   lenlstrip)r   rD   sr   r   r   clz  s    
zFakeCUDAModule.clzc                 C   s,   d |}t|t|d d d }|S )Nr   r   r   !   )r   r   rstrip)r   rD   r   rr   r   r   ffs  s    
zFakeCUDAModule.ffsc                 C   s   |r|S |S r   r   rj   r   r   r   selp  s    zFakeCUDAModule.selpc                 C   s   | j }| j}| j}|j|j |j }|dkr0|S |j|j |j }|dkrR||fS |j|j |j }|dkrv|||fS td| d S )Nr   r6      z*Global ID has 1-3 dimensions. %d requested)r   r   r   r
   r   r   RuntimeError)r   nbdimbidtidr
   r   r   r   r   r   grid  s    
zFakeCUDAModule.gridc                 C   sn   | j }| j}|j|j }|dkr$|S |j|j }|dkr@||fS |j|j }|dkr^|||fS td| d S )Nr   r6   r   z,Global grid has 1-3 dimensions. %d requested)r   r   r
   r   r   r   )r   r   r   Zgdimr
   r   r   r   r   r   gridsize  s    
zFakeCUDAModule.gridsizeN) r   r   r   r   r   propertyr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   E  sL   











r   c              	   #   sl   ddl m  | j}t fdd| D }tfdd| D }|| z
d V  W 5 || X d S )Nr   cudac                 3   s"   | ]\}}| kr||fV  qd S r   r   .0kvr   r   r   	<genexpr>  s      z&swapped_cuda_module.<locals>.<genexpr>c                 3   s   | ]\}}| fV  qd S r   r   r   )fake_cuda_moduler   r   r     s     )Znumbar   __globals__dictr   update)fnr   Zfn_globsorigreplr   )r   r   r   swapped_cuda_module  s    

r   )%r   
contextlibr   r;   r   r9   Z
numba.corer   Znumpyr%   Znumba.npr   r   objectr   r   r   r!   r*   r,   LockrA   rG   rI   rK   rM   rV   rX   r\   r`   rP   rR   rT   r@   rb   r   r   r   r   r   r   <module>   s@   
/_\ 