From 7ec4581ad37b88bbb300ac00850603433a8cdfe9 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Thu, 20 Jun 2013 19:15:55 +0200 Subject: Multipage DMA tests for Xilinx --- .bzrignore | 2 + apps/CMakeLists.txt | 3 + apps/load.sh | 3 + apps/xilinx2.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++++ driver/kmem.c | 19 ++++-- ipecamera/ipecamera.c | 3 +- ipecamera/model.h | 2 +- 7 files changed, 202 insertions(+), 9 deletions(-) create mode 100755 apps/load.sh create mode 100644 apps/xilinx2.c diff --git a/.bzrignore b/.bzrignore index f584400..17e5a11 100644 --- a/.bzrignore +++ b/.bzrignore @@ -19,3 +19,5 @@ Makefile *.so.* install_manifest.txt ./xilinx +apps/xilinx +apps/xilinx2 diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 2f882a3..80a506f 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -6,3 +6,6 @@ link_directories(${UFODECODE_LIBRARY_DIRS}) add_executable(xilinx xilinx.c) target_link_libraries(xilinx pcilib rt) + +add_executable(xilinx2 xilinx2.c) +target_link_libraries(xilinx2 pcilib rt) diff --git a/apps/load.sh b/apps/load.sh new file mode 100755 index 0000000..6456c63 --- /dev/null +++ b/apps/load.sh @@ -0,0 +1,3 @@ +#! /bin/bash + +echo "10ee 6028" > /sys/bus/pci/drivers/pciDriver/new_id diff --git a/apps/xilinx2.c b/apps/xilinx2.c new file mode 100644 index 0000000..6dd1be3 --- /dev/null +++ b/apps/xilinx2.c @@ -0,0 +1,179 @@ +#define _BSD_SOURCE +#define _POSIX_C_SOURCE 199309L +#include +#include +#include +#include +#include +#include +#include + +#include "pcilib.h" +#include "irq.h" +#include "kmem.h" + +#define DEVICE "/dev/fpga0" +#define BAR PCILIB_BAR0 +#define USE PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 1) +#define BUFFERS 1 +#define ITERATIONS 16384 +#define HUGE_PAGE 128 // number of pages per huge page +#define PAGE_SIZE 4096 // other values are not supported in the kernel +#define TIMEOUT 100000 + +/* IRQs are slow for some reason. REALTIME mode is slower. Adding delays does not really help, +otherall we have only 3 checks in average. Check ready seems to be not needed and adds quite +much extra time */ +//#define USE_IRQ +//#define CHECK_READY +//#define REALTIME +//#define ADD_DELAYS + +//#define WR(addr, value) { val = value; pcilib_write(pci, BAR, addr, sizeof(val), &val); } +//#define RD(addr, value) { pcilib_read(pci, BAR, addr, sizeof(val), &val); value = val; } +#define WR(addr, value) { *(uint32_t*)(bar + addr) = value; } +#define RD(addr, value) { value = *(uint32_t*)(bar + addr); } + +static void fail(const char *msg, ...) { + va_list va; + + va_start(va, msg); + vprintf(msg, va); + va_end(va); + printf("\n"); + + exit(-1); +} + +void hpsleep(size_t ns) { + struct timespec wait, tv; + + clock_gettime(CLOCK_REALTIME, &wait); + + wait.tv_nsec += ns; + if (wait.tv_nsec > 999999999) { + wait.tv_sec += 1; + wait.tv_nsec = 1000000000 - wait.tv_nsec; + } + + do { + clock_gettime(CLOCK_REALTIME, &tv); + } while ((wait.tv_sec > tv.tv_sec)||((wait.tv_sec == tv.tv_sec)&&(wait.tv_nsec > tv.tv_nsec))); +} + + +int main() { + int err; + int i, j; + pcilib_t *pci; + pcilib_kmem_handle_t *kbuf; + uint32_t status; + struct timeval start, end; + size_t size, run_time; + void* volatile bar; + uintptr_t bus_addr[BUFFERS]; + + pcilib_kmem_flags_t clean_flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE; + +#ifdef ADD_DELAYS + long rpt = 0, rpt2 = 0; + size_t best_time; + best_time = 1000000000L * HUGE_PAGE * PAGE_SIZE / (4L * 1024 * 1024 * 1024); +#endif /* ADD_DELAYS */ + +#ifdef REALTIME + pid_t pid; + struct sched_param sched = {0}; + + pid = getpid(); + sched.sched_priority = sched_get_priority_min(SCHED_FIFO); + if (sched_setscheduler(pid, SCHED_FIFO, &sched)) + printf("Warning: not able to get real-time priority\n"); +#endif /* REALTIME */ + + pci = pcilib_open(DEVICE, PCILIB_MODEL_DETECT); + if (!pci) fail("pcilib_open"); + + bar = pcilib_map_bar(pci, BAR); + if (!bar) { + pcilib_close(pci); + fail("map bar"); + } + + // Reset + WR(0x00, 1) + usleep(1000); + WR(0x00, 0) + + pcilib_enable_irq(pci, PCILIB_IRQ_TYPE_ALL, 0); + pcilib_clear_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT); + + pcilib_clean_kernel_memory(pci, USE, clean_flags); + + kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_DMA_C2S_PAGE, BUFFERS, HUGE_PAGE * PAGE_SIZE, 4096, USE, 0); + + WR(0x04, 0) + WR(0x0C, 0x20) + WR(0x10, (HUGE_PAGE * (PAGE_SIZE / 0x80))) + WR(0x14, 0x13131313) + + for (j = 0; j < BUFFERS; j++ ) { + bus_addr[j] = pcilib_kmem_get_block_ba(pci, kbuf, j); + } + + gettimeofday(&start, NULL); + + for (i = 0; i < ITERATIONS; i++) { + for (j = 0; j < BUFFERS; j++ ) { +// uintptr_t ba = pcilib_kmem_get_block_ba(pci, kbuf, j); +// WR(0x08, ba) + WR(0x08, bus_addr[j]); + WR(0x04, 0x01) + +#ifdef USE_IRQ + err = pcilib_wait_irq(pci, PCILIB_IRQ_SOURCE_DEFAULT, TIMEOUT, NULL); + if (err) printf("Timeout waiting for IRQ, err: %i\n", err); + + RD(0x04, status); + if ((status&0xFFFF) != 0x101) printf("Invalid status %x\n", status); +// WR(0x04, 0x00); +#else /* USE_IRQ */ +# ifdef ADD_DELAYS +// hpsleep(best_time); + do { + rpt++; + RD(0x04, status); + } while (status != 0x101); +# else /* ADD_DELAYS */ + do { + RD(0x04, status); + } while (status != 0x101); +# endif /* ADD_DELAYS */ +#endif /* USE_IRQ */ + + WR(0x00, 1) +#ifdef CHECK_READY + do { + rpt2++; + RD(0x04, status); + } while (status != 0); +#endif /* CHECK_READY */ + WR(0x00, 0) + } + } + gettimeofday(&end, NULL); + + pcilib_free_kernel_memory(pci, kbuf, 0); + pcilib_disable_irq(pci, 0); + pcilib_unmap_bar(pci, BAR, bar); + pcilib_close(pci); + + run_time = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); + size = (long long int)ITERATIONS * BUFFERS * HUGE_PAGE * PAGE_SIZE; + + printf("%.3lf GB/s: transfered %zu bytes in %zu us using %u buffers\n", 1000000. * size / run_time / 1024 / 1024 / 1024, size, run_time, BUFFERS); + +# ifdef ADD_DELAYS + printf("Repeats: %lf, %lf\n",1. * rpt / (ITERATIONS * BUFFERS), 1. * rpt2 / (ITERATIONS * BUFFERS)); +#endif /* USE_IRQ */ +} diff --git a/driver/kmem.c b/driver/kmem.c index 37a7368..ee64a78 100644 --- a/driver/kmem.c +++ b/driver/kmem.c @@ -130,23 +130,28 @@ int pcidriver_kmem_alloc(pcidriver_privdata_t *privdata, kmem_handle_t *kmem_han retptr = pci_alloc_consistent( privdata->pdev, kmem_handle->size, &(kmem_entry->dma_handle) ); break; case PCILIB_KMEM_TYPE_PAGE: - retptr = (void*)__get_free_pages(GFP_KERNEL, get_order(PAGE_SIZE)); + if (kmem_handle->size == 0) + kmem_handle->size = PAGE_SIZE; + else if (kmem_handle->size%PAGE_SIZE) + goto kmem_alloc_mem_fail; + + retptr = (void*)__get_free_pages(GFP_KERNEL|__GFP_DMA, get_order(kmem_handle->size)); kmem_entry->dma_handle = 0; - kmem_handle->size = PAGE_SIZE; + kmem_handle->size = kmem_handle->size; if (retptr) { if (kmem_entry->type == PCILIB_KMEM_TYPE_DMA_S2C_PAGE) { kmem_entry->direction = PCI_DMA_TODEVICE; - kmem_entry->dma_handle = pci_map_single(privdata->pdev, retptr, PAGE_SIZE, PCI_DMA_TODEVICE); + kmem_entry->dma_handle = pci_map_single(privdata->pdev, retptr, kmem_handle->size, PCI_DMA_TODEVICE); if (pci_dma_mapping_error(privdata->pdev, kmem_entry->dma_handle)) { - free_page((unsigned long)retptr); + free_pages((unsigned long)retptr, get_order(kmem_handle->size)); goto kmem_alloc_mem_fail; } } else if (kmem_entry->type == PCILIB_KMEM_TYPE_DMA_C2S_PAGE) { kmem_entry->direction = PCI_DMA_FROMDEVICE; - kmem_entry->dma_handle = pci_map_single(privdata->pdev, retptr, PAGE_SIZE, PCI_DMA_FROMDEVICE); + kmem_entry->dma_handle = pci_map_single(privdata->pdev, retptr, kmem_handle->size, PCI_DMA_FROMDEVICE); if (pci_dma_mapping_error(privdata->pdev, kmem_entry->dma_handle)) { - free_page((unsigned long)retptr); + free_pages((unsigned long)retptr, get_order(kmem_handle->size)); goto kmem_alloc_mem_fail; } @@ -435,7 +440,7 @@ int pcidriver_kmem_free_entry(pcidriver_privdata_t *privdata, pcidriver_kmem_ent pci_unmap_single(privdata->pdev, kmem_entry->dma_handle, kmem_entry->size, PCI_DMA_FROMDEVICE); } } - free_page((unsigned long)kmem_entry->cpua); + free_pages((unsigned long)kmem_entry->cpua, get_order(kmem_entry->size)); break; } diff --git a/ipecamera/ipecamera.c b/ipecamera/ipecamera.c index 27bea77..b4bbeb8 100644 --- a/ipecamera/ipecamera.c +++ b/ipecamera/ipecamera.c @@ -123,7 +123,8 @@ pcilib_context_t *ipecamera_init(pcilib_t *pcilib) { ctx->firmware = value; break; default: - pcilib_error("Unsupported version of firmware (%lu)", value); +// pcilib_error("Unsupported version of firmware (%lu)", value); + ; } #ifdef IPECAMERA_BUG_POSTPONED_READ diff --git a/ipecamera/model.h b/ipecamera/model.h index 4c527df..37f9096 100644 --- a/ipecamera/model.h +++ b/ipecamera/model.h @@ -8,7 +8,7 @@ //#define IPECAMERA_DEBUG -#define IPECAMERA_DMA_R3 +//#define IPECAMERA_DMA_R3 #define IPECAMERA_DMA_ADDRESS 1 #define IPECAMERA_DMA_PACKET_LENGTH 4096 -- cgit v1.2.3 From f000eb0d43193ef8225f947226d9cd3deb00115b Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Sun, 14 Jul 2013 05:49:19 +0200 Subject: Support pre-allocated memory with memmap in KMem --- apps/xilinx2.c | 46 +++++++++++++++++++++++++++++++++++++++++++--- driver/kmem.c | 30 +++++++++++++++++++++++++++--- kmem.c | 10 ++++++++-- pcilib_types.h | 5 ++++- 4 files changed, 82 insertions(+), 9 deletions(-) diff --git a/apps/xilinx2.c b/apps/xilinx2.c index 6dd1be3..90a2b6a 100644 --- a/apps/xilinx2.c +++ b/apps/xilinx2.c @@ -2,6 +2,7 @@ #define _POSIX_C_SOURCE 199309L #include #include +#include #include #include #include @@ -15,9 +16,10 @@ #define DEVICE "/dev/fpga0" #define BAR PCILIB_BAR0 #define USE PCILIB_KMEM_USE(PCILIB_KMEM_USE_USER, 1) +#define STATIC_REGION 0x80000000 // to reserve 512 MB at the specified address, add "memmap=512M$2G" to kernel parameters #define BUFFERS 1 -#define ITERATIONS 16384 -#define HUGE_PAGE 128 // number of pages per huge page +#define ITERATIONS 100 +#define HUGE_PAGE 4096 // number of pages per huge page #define PAGE_SIZE 4096 // other values are not supported in the kernel #define TIMEOUT 100000 @@ -28,6 +30,7 @@ much extra time */ //#define CHECK_READY //#define REALTIME //#define ADD_DELAYS +#define CHECK_RESULT //#define WR(addr, value) { val = value; pcilib_write(pci, BAR, addr, sizeof(val), &val); } //#define RD(addr, value) { pcilib_read(pci, BAR, addr, sizeof(val), &val); value = val; } @@ -64,7 +67,7 @@ void hpsleep(size_t ns) { int main() { int err; - int i, j; + long i, j; pcilib_t *pci; pcilib_kmem_handle_t *kbuf; uint32_t status; @@ -110,7 +113,30 @@ int main() { pcilib_clean_kernel_memory(pci, USE, clean_flags); +#ifdef STATIC_REGION + kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_REGION_C2S, BUFFERS, HUGE_PAGE * PAGE_SIZE, STATIC_REGION, USE, 0); +#else /* STATIC_REGION */ kbuf = pcilib_alloc_kernel_memory(pci, PCILIB_KMEM_TYPE_DMA_C2S_PAGE, BUFFERS, HUGE_PAGE * PAGE_SIZE, 4096, USE, 0); +#endif /* STATIC_REGION */ + + if (!kbuf) { + printf("KMem allocation failed\n"); + exit(0); + } + + +#ifdef CHECK_RESULT + volatile uint32_t *ptr0 = pcilib_kmem_get_block_ua(pci, kbuf, 0); + + memset((void*)ptr0, 0, (HUGE_PAGE * PAGE_SIZE)); + + for (i = 0; i < (HUGE_PAGE * PAGE_SIZE / 4); i++) { + if (ptr0[i] != 0) break; + } + if (i < (HUGE_PAGE * PAGE_SIZE / 4)) { + printf("Initialization error in position %lu, value = %x\n", i * 4, ptr0[i]); + } +#endif /* CHECK_RESULT */ WR(0x04, 0) WR(0x0C, 0x20) @@ -163,6 +189,18 @@ int main() { } gettimeofday(&end, NULL); + +#ifdef CHECK_RESULT + pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_FROMDEVICE, 0); + + for (i = 0; i < (HUGE_PAGE * PAGE_SIZE / 4); i++) { + if (ptr0[i] != 0x13131313) break; + } + if (i < (HUGE_PAGE * PAGE_SIZE / 4)) { + printf("Error in position %lu, value = %x\n", i * 4, ptr0[i]); + } +#endif /* CHECK_RESULT */ + pcilib_free_kernel_memory(pci, kbuf, 0); pcilib_disable_irq(pci, 0); pcilib_unmap_bar(pci, BAR, bar); @@ -176,4 +214,6 @@ int main() { # ifdef ADD_DELAYS printf("Repeats: %lf, %lf\n",1. * rpt / (ITERATIONS * BUFFERS), 1. * rpt2 / (ITERATIONS * BUFFERS)); #endif /* USE_IRQ */ + + } diff --git a/driver/kmem.c b/driver/kmem.c index ee64a78..f36ff8c 100644 --- a/driver/kmem.c +++ b/driver/kmem.c @@ -129,15 +129,23 @@ int pcidriver_kmem_alloc(pcidriver_privdata_t *privdata, kmem_handle_t *kmem_han case PCILIB_KMEM_TYPE_CONSISTENT: retptr = pci_alloc_consistent( privdata->pdev, kmem_handle->size, &(kmem_entry->dma_handle) ); break; + case PCILIB_KMEM_TYPE_REGION: + retptr = ioremap(kmem_handle->pa, kmem_handle->size); + kmem_entry->dma_handle = kmem_handle->pa; + if (kmem_entry->type == PCILIB_KMEM_TYPE_REGION_S2C) { + kmem_entry->direction = PCI_DMA_TODEVICE; + } else if (kmem_entry->type == PCILIB_KMEM_TYPE_REGION_C2S) { + kmem_entry->direction = PCI_DMA_FROMDEVICE; + } + break; case PCILIB_KMEM_TYPE_PAGE: if (kmem_handle->size == 0) kmem_handle->size = PAGE_SIZE; else if (kmem_handle->size%PAGE_SIZE) goto kmem_alloc_mem_fail; - + retptr = (void*)__get_free_pages(GFP_KERNEL|__GFP_DMA, get_order(kmem_handle->size)); kmem_entry->dma_handle = 0; - kmem_handle->size = kmem_handle->size; if (retptr) { if (kmem_entry->type == PCILIB_KMEM_TYPE_DMA_S2C_PAGE) { @@ -432,6 +440,9 @@ int pcidriver_kmem_free_entry(pcidriver_privdata_t *privdata, pcidriver_kmem_ent case PCILIB_KMEM_TYPE_CONSISTENT: pci_free_consistent( privdata->pdev, kmem_entry->size, (void *)(kmem_entry->cpua), kmem_entry->dma_handle ); break; + case PCILIB_KMEM_TYPE_REGION: + iounmap((void *)(kmem_entry->cpua)); + break; case PCILIB_KMEM_TYPE_PAGE: if (kmem_entry->dma_handle) { if (kmem_entry->type == PCILIB_KMEM_TYPE_DMA_S2C_PAGE) { @@ -609,12 +620,25 @@ int pcidriver_mmap_kmem(pcidriver_privdata_t *privdata, struct vm_area_struct *v virt_to_phys((void*)kmem_entry->cpua), page_to_pfn(virt_to_page((void*)kmem_entry->cpua))); - ret = remap_pfn_range_cpua_compat( + if ((kmem_entry->type&PCILIB_KMEM_TYPE_MASK) == PCILIB_KMEM_TYPE_REGION) { + mod_info("Mapping address %08lx / Size %08lx\n", + kmem_entry->dma_handle, + (vma_size < kmem_entry->size)?vma_size:kmem_entry->size) + + ret = remap_pfn_range_compat( + vma, + vma->vm_start, + kmem_entry->dma_handle, + (vma_size < kmem_entry->size)?vma_size:kmem_entry->size, + vma->vm_page_prot); + } else { + ret = remap_pfn_range_cpua_compat( vma, vma->vm_start, kmem_entry->cpua, (vma_size < kmem_entry->size)?vma_size:kmem_entry->size, vma->vm_page_prot ); + } if (ret) { mod_info("kmem remap failed: %d (%lx)\n", ret,kmem_entry->cpua); diff --git a/kmem.c b/kmem.c index d693b60..64f593a 100644 --- a/kmem.c +++ b/kmem.c @@ -91,13 +91,19 @@ pcilib_kmem_handle_t *pcilib_alloc_kernel_memory(pcilib_t *ctx, pcilib_kmem_type kh.align = alignment; kh.use = use; - if ((type&PCILIB_KMEM_TYPE_MASK) != PCILIB_KMEM_TYPE_PAGE) { + if ((type&PCILIB_KMEM_TYPE_MASK) == PCILIB_KMEM_TYPE_REGION) { + kh.align = 0; + } else if ((type&PCILIB_KMEM_TYPE_MASK) != PCILIB_KMEM_TYPE_PAGE) { kh.size += alignment; } - + for ( i = 0; i < nmemb; i++) { kh.item = i; kh.flags = flags; + + if ((type&PCILIB_KMEM_TYPE_MASK) == PCILIB_KMEM_TYPE_REGION) { + kh.pa = alignment + i * size; + } ret = ioctl(ctx->handle, PCIDRIVER_IOC_KMEM_ALLOC, &kh); if (ret) { diff --git a/pcilib_types.h b/pcilib_types.h index f4f8f20..52c0879 100644 --- a/pcilib_types.h +++ b/pcilib_types.h @@ -15,7 +15,10 @@ typedef enum { PCILIB_KMEM_TYPE_CONSISTENT = 0x00000, PCILIB_KMEM_TYPE_PAGE = 0x10000, PCILIB_KMEM_TYPE_DMA_S2C_PAGE = 0x10001, - PCILIB_KMEM_TYPE_DMA_C2S_PAGE = 0x10002 + PCILIB_KMEM_TYPE_DMA_C2S_PAGE = 0x10002, + PCILIB_KMEM_TYPE_REGION = 0x20000, + PCILIB_KMEM_TYPE_REGION_S2C = 0x20001, + PCILIB_KMEM_TYPE_REGION_C2S = 0x20002 } pcilib_kmem_type_t; typedef enum { -- cgit v1.2.3 From 57fe574e058bc34c95a71995060ac45f71300ab6 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Fri, 26 Jul 2013 17:02:23 +0200 Subject: Allow setting of TLP size --- apps/xilinx2.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/apps/xilinx2.c b/apps/xilinx2.c index 90a2b6a..30e0f94 100644 --- a/apps/xilinx2.c +++ b/apps/xilinx2.c @@ -19,6 +19,7 @@ #define STATIC_REGION 0x80000000 // to reserve 512 MB at the specified address, add "memmap=512M$2G" to kernel parameters #define BUFFERS 1 #define ITERATIONS 100 +#define TLP_SIZE 64 #define HUGE_PAGE 4096 // number of pages per huge page #define PAGE_SIZE 4096 // other values are not supported in the kernel #define TIMEOUT 100000 @@ -26,7 +27,7 @@ /* IRQs are slow for some reason. REALTIME mode is slower. Adding delays does not really help, otherall we have only 3 checks in average. Check ready seems to be not needed and adds quite much extra time */ -//#define USE_IRQ +#define USE_IRQ //#define CHECK_READY //#define REALTIME //#define ADD_DELAYS @@ -139,8 +140,8 @@ int main() { #endif /* CHECK_RESULT */ WR(0x04, 0) - WR(0x0C, 0x20) - WR(0x10, (HUGE_PAGE * (PAGE_SIZE / 0x80))) + WR(0x0C, TLP_SIZE) + WR(0x10, (HUGE_PAGE * (PAGE_SIZE / (4 * TLP_SIZE)))) WR(0x14, 0x13131313) for (j = 0; j < BUFFERS; j++ ) { @@ -194,6 +195,7 @@ int main() { pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_FROMDEVICE, 0); for (i = 0; i < (HUGE_PAGE * PAGE_SIZE / 4); i++) { +// printf("%lx ", ptr0[i]); if (ptr0[i] != 0x13131313) break; } if (i < (HUGE_PAGE * PAGE_SIZE / 4)) { -- cgit v1.2.3 From f5b4c23cc79affe851016c7ef7970b3e3489fad3 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Fri, 26 Jul 2013 18:30:01 +0200 Subject: Add missing semicolon in the driver --- driver/kmem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/driver/kmem.c b/driver/kmem.c index f36ff8c..b8ce0d0 100644 --- a/driver/kmem.c +++ b/driver/kmem.c @@ -621,9 +621,9 @@ int pcidriver_mmap_kmem(pcidriver_privdata_t *privdata, struct vm_area_struct *v page_to_pfn(virt_to_page((void*)kmem_entry->cpua))); if ((kmem_entry->type&PCILIB_KMEM_TYPE_MASK) == PCILIB_KMEM_TYPE_REGION) { - mod_info("Mapping address %08lx / Size %08lx\n", - kmem_entry->dma_handle, - (vma_size < kmem_entry->size)?vma_size:kmem_entry->size) + mod_info("Mapping address %08lx / Size %08lx\n", + (unsigned long)kmem_entry->dma_handle, + (vma_size < kmem_entry->size)?vma_size:kmem_entry->size); ret = remap_pfn_range_compat( vma, -- cgit v1.2.3 From 7d1a222eaa8cb0965446ad0e745271c070521e78 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Fri, 26 Jul 2013 18:30:47 +0200 Subject: Support offseted BARs in the xilinx2 test app --- apps/xilinx2.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/apps/xilinx2.c b/apps/xilinx2.c index 30e0f94..757c388 100644 --- a/apps/xilinx2.c +++ b/apps/xilinx2.c @@ -35,8 +35,8 @@ much extra time */ //#define WR(addr, value) { val = value; pcilib_write(pci, BAR, addr, sizeof(val), &val); } //#define RD(addr, value) { pcilib_read(pci, BAR, addr, sizeof(val), &val); value = val; } -#define WR(addr, value) { *(uint32_t*)(bar + addr) = value; } -#define RD(addr, value) { value = *(uint32_t*)(bar + addr); } +#define WR(addr, value) { *(uint32_t*)(bar + addr + offset) = value; } +#define RD(addr, value) { value = *(uint32_t*)(bar + addr + offset); } static void fail(const char *msg, ...) { va_list va; @@ -77,6 +77,9 @@ int main() { void* volatile bar; uintptr_t bus_addr[BUFFERS]; + pcilib_bar_t bar_tmp = BAR; + uintptr_t offset = 0; + pcilib_kmem_flags_t clean_flags = PCILIB_KMEM_FLAG_HARDWARE|PCILIB_KMEM_FLAG_PERSISTENT|PCILIB_KMEM_FLAG_EXCLUSIVE; #ifdef ADD_DELAYS @@ -104,6 +107,8 @@ int main() { fail("map bar"); } + pcilib_detect_address(pci, &bar_tmp, &offset, 1); + // Reset WR(0x00, 1) usleep(1000); @@ -190,7 +195,6 @@ int main() { } gettimeofday(&end, NULL); - #ifdef CHECK_RESULT pcilib_kmem_sync_block(pci, kbuf, PCILIB_KMEM_SYNC_FROMDEVICE, 0); -- cgit v1.2.3 From c57db04f528e671040256d322bb8f21a8d8e9ac1 Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Fri, 26 Jul 2013 19:10:37 +0200 Subject: Read out performance counters for Xilinx DMA with big buffers --- apps/counters.sh | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100755 apps/counters.sh diff --git a/apps/counters.sh b/apps/counters.sh new file mode 100755 index 0000000..e3ba0f5 --- /dev/null +++ b/apps/counters.sh @@ -0,0 +1,117 @@ +#! /bin/bash + +BAR=0 +USE=1 +ITERATIONS=1 +TLP_SIZE=32 +BUFFER_SIZE=8 + +function pci { + PCILIB_PATH=`pwd`/.. + LD_LIBRARY_PATH="$PCILIB_PATH" $PCILIB_PATH/pci $* +} + + +function reset { + pci -b $BAR -w 0 1 + usleep 1000 + pci -b $BAR -w 0 0 + pci -b $BAR -w 4 0 +} + +function read_cfg { +# echo $1 1>&2 + pci -a config -r 0x$1 | awk '{ print $2; }' +} + +function parse_config { + info=0x`pci -b $BAR -r 0 | awk '{ print $2; }'` + model=`printf "%X" $((info>>24))` + if [ $model -eq 14 ]; then + model="Xilinx Virtex-6" + else + model="Xilinx $model" + fi + version=$(((info >> 8) & 0xFF)) + data_width=$((16 * (2 ** ((info >> 16) & 0xF)))) + + echo "$model, build $version, $data_width bits" + + + next=`read_cfg 34 | cut -c 7-8` + + while [ $next -ne 0 ]; do + cap=`read_cfg $next` + capid=`echo $cap | cut -c 7-8` + if [ $capid -eq 10 ]; then + addr=`printf "%X" $((0x$next + 12))` + pcie_link1=`read_cfg $addr` + addr=`printf "%X" $((0x$next + 16))` + pcie_link2=`read_cfg $addr` + + link_speed=$((((0x$pcie_link2 & 0xF0000) >> 16))) + link_width=$((((0x$pcie_link2 & 0x3F00000) >> 20))) + + dev_link_speed=$((((0x$pcie_link1 & 0xF)))) + dev_link_width=$((((0x$pcie_link1 & 0x3F0) >> 4))) + fi + next=`echo $cap | cut -c 5-6` + done + + echo "Link: PCIe gen$link_speed x$link_width" + if [ $link_speed -ne $dev_link_speed -o $link_width -ne $dev_link_width ]; then + echo " * But device capable of gen$dev_link_speed x$dev_link_width" + fi + + info=0x`read_cfg 40` + max_tlp=$((2 ** (5 + ((info & 0xE0) >> 5)))) + echo "TLP: 32 dwords (transfering 32 TLP per request)" + if [ $max_tlp -ne $TLP_SIZE ]; then + echo " * But device is able to transfer TLP up to $max_tlp bytes" + fi + + # 2500 MT/s, but PCIe gen1 and gen2 uses 10 bit encoding + speed=$((link_width * link_speed * 2500 / 10)) +} + +reset +parse_config + +pci --enable-irq +pci --acknowledge-irq + +# TLP size +pci -b $BAR -w 0x0C 0x`echo "obase=16; $TLP_SIZE" | bc` +# TLP count +pci -b $BAR -w 0x10 0x`echo "obase=16; $BUFFER_SIZE * 1024 * 1024 / $TLP_SIZE / 4" | bc` +# Data +pci -b $BAR -w 0x14 0x13131313 + +bus="80000000" +dmaperf=0 +for i in `seq 1 $ITERATIONS`; do + for addr in $bus; do + pci -b $BAR -w 0x08 0x$addr + +#Trigger + pci -b $BAR -w 0x04 0x01 + pci --wait-irq + + status=`pci -b $BAR -r 0x04 | awk '{print $2; }' | cut -c 5-8` + if [ $status != "0101" ]; then + echo "Read failed, invalid status: $status" + fi + + dmaperf=$((dmaperf + 0x`pci -b $BAR -r 0x28 | awk '{print $2}'`)) + reset + done +done + +pci --free-kernel-memory $USE +pci --disable-irq + +echo +# Don't ask me about this formula +echo "Performance reported by FPGA: $(($BUFFER_SIZE * 1024 * 1024 * ITERATIONS * $speed / $dmaperf / 8)) MB/s" + +#pci -b $BAR -r 0 -s 32 -- cgit v1.2.3