from Kinho.lib import cpu, gpu
import numpy as np
from numba import cuda

EPS = 1e-8

def test_sigmoid2_gpu():
    BATCH = 256
    DIM = 1000
    
    signals_host = np.random.randn(BATCH, 1, DIM)
    
    signals_device = cuda.to_device(signals_host)
    buffer_device = cuda.device_array(shape=(BATCH, 1, DIM), dtype=np.float64)
    
    ret_cpu = cpu.sigmoid2(signals=signals_host)
    gpu.sigmoid2(signals=signals_device, buffer=buffer_device)
    ret_gpu = buffer_device.copy_to_host()
    
    for i in range(BATCH):
        for j in range(DIM):
            assert abs(ret_cpu[i, 0, j] - ret_gpu[i, 0, j]) < EPS
