summaryrefslogtreecommitdiffstats
path: root/gdr_test.cu
diff options
context:
space:
mode:
Diffstat (limited to 'gdr_test.cu')
-rw-r--r--gdr_test.cu11
1 files changed, 10 insertions, 1 deletions
diff --git a/gdr_test.cu b/gdr_test.cu
index 13af482..cc624fc 100644
--- a/gdr_test.cu
+++ b/gdr_test.cu
@@ -172,6 +172,8 @@ int main(int argc, char *argv[]) {
// initAssert (cuMemHostRegister ((void*)((((uintptr_t)bar)/65536)*65536), 65536, CU_MEMHOSTREGISTER_DEVICEMAP));
initAssert (cuMemHostRegister ((void*)bar, 4096, CU_MEMHOSTREGISTER_IOMEMORY));
initAssert (cuMemHostGetDevicePointer(&dBAR, (void*)bar, 0));
+ // no effect
+ //initAssert (cuPointerSetAttribute(&flag, CU_POINTER_ATTRIBUTE_SYNC_MEMOPS, dBAR));
bar_info = pcilib_get_bar_info(pci, BAR);
printf("%p (Phys: 0x%lx, Size: 0x%x)\n", bar_info[BAR].virt_addr, bar_info[BAR].phys_addr, bar_info[BAR].size);
@@ -208,6 +210,7 @@ int main(int argc, char *argv[]) {
WR32 (REG_UPDATE_THRESHOLD, 0);
WR64 (REG_UPDATE_ADDRESS, desc_bus);
WR32 (REG_DMA, 1);
+ WR32 (REG_INTERCONNECT, 0x232);
WR32 (REG_COUNTER, 1);
#ifdef VERBOSE
@@ -244,13 +247,19 @@ int main(int argc, char *argv[]) {
double lat = 1000. * kbuf[0] / gpu_props.clockRate;
double latk = 1000. * kbuf[1] / gpu_props.clockRate;
double latc = ((tsk.tv_sec - tss.tv_sec)*1000000 + 1. * (tsk.tv_nsec - tss.tv_nsec) / 1000.) / GPU_ITERS;
+# ifdef USE_HW_CONTER
+ double lath = 4. * RD32 (0x20) / 1000;
+# else
+ double lath = 0;
+# endif
#else
double lat = (tse.tv_sec - tss.tv_sec)*1000000 + 1. * (tse.tv_nsec - tss.tv_nsec) / 1000.;
double latk = (tsk.tv_sec - tss.tv_sec)*1000000 + 1. * (tsk.tv_nsec - tss.tv_nsec) / 1000.;
double latc = 0;
+ double lath = 0;
#endif
- printf("Latency: %.3lf us / %.3lf us (%.3lf us) %x %x %x %x\n", lat, latk, latc, kbuf[0], kbuf[1], kbuf[2], kbuf[3]);
+ printf("hw: % 6.3lf us, sw: % 6.3lf us, +krn: % 6.3lf us, total: % 7.3lf us: %x %x %x %x\n", lath, lat, latk, latc, kbuf[0], kbuf[1], kbuf[2], kbuf[3]);
#else
if (!i) gettimeofday(&tvs, NULL);
#endif /* VERBOSE */