diff options
Diffstat (limited to 'gdr_test.cu')
-rw-r--r-- | gdr_test.cu | 11 |
1 files changed, 10 insertions, 1 deletions
diff --git a/gdr_test.cu b/gdr_test.cu index 13af482..cc624fc 100644 --- a/gdr_test.cu +++ b/gdr_test.cu @@ -172,6 +172,8 @@ int main(int argc, char *argv[]) { // initAssert (cuMemHostRegister ((void*)((((uintptr_t)bar)/65536)*65536), 65536, CU_MEMHOSTREGISTER_DEVICEMAP)); initAssert (cuMemHostRegister ((void*)bar, 4096, CU_MEMHOSTREGISTER_IOMEMORY)); initAssert (cuMemHostGetDevicePointer(&dBAR, (void*)bar, 0)); + // no effect + //initAssert (cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dBAR)); bar_info = pcilib_get_bar_info(pci, BAR); printf("%p (Phys: 0x%lx, Size: 0x%x)\n", bar_info[BAR].virt_addr, bar_info[BAR].phys_addr, bar_info[BAR].size); @@ -208,6 +210,7 @@ int main(int argc, char *argv[]) { WR32 (REG_UPDATE_THRESHOLD, 0); WR64 (REG_UPDATE_ADDRESS, desc_bus); WR32 (REG_DMA, 1); + WR32 (REG_INTERCONNECT, 0x232); WR32 (REG_COUNTER, 1); #ifdef VERBOSE @@ -244,13 +247,19 @@ int main(int argc, char *argv[]) { double lat = 1000. * kbuf[0] / gpu_props.clockRate; double latk = 1000. * kbuf[1] / gpu_props.clockRate; double latc = ((tsk.tv_sec - tss.tv_sec)*1000000 + 1. * (tsk.tv_nsec - tss.tv_nsec) / 1000.) / GPU_ITERS; +# ifdef USE_HW_CONTER + double lath = 4. * RD32 (0x20) / 1000; +# else + double lath = 0; +# endif #else double lat = (tse.tv_sec - tss.tv_sec)*1000000 + 1. * (tse.tv_nsec - tss.tv_nsec) / 1000.; double latk = (tsk.tv_sec - tss.tv_sec)*1000000 + 1. * (tsk.tv_nsec - tss.tv_nsec) / 1000.; double latc = 0; + double lath = 0; #endif - printf("Latency: %.3lf us / %.3lf us (%.3lf us) %x %x %x %x\n", lat, latk, latc, kbuf[0], kbuf[1], kbuf[2], kbuf[3]); + printf("hw: % 6.3lf us, sw: % 6.3lf us, +krn: % 6.3lf us, total: % 7.3lf us: %x %x %x %x\n", lath, lat, latk, latc, kbuf[0], kbuf[1], kbuf[2], kbuf[3]); #else if (!i) gettimeofday(&tvs, NULL); #endif /* VERBOSE */ |