From b9a1af8a3b03a5e8575d87c3593a5931cd0e7a8c Mon Sep 17 00:00:00 2001 From: "Suren A. Chilingaryan" Date: Wed, 6 May 2015 05:46:01 +0200 Subject: Add fields reporting consumed buffers and space to the dma_engine_status and provide better ipedma benchmarking --- dma/CMakeLists.txt | 2 +- dma/ipe.c | 143 +++++++++++++++++------------------------------ dma/ipe.h | 2 - dma/ipe_private.h | 8 ++- dma/nwl_engine_buffers.h | 13 ++++- docs/README | 3 + pcilib/dma.h | 1 + 7 files changed, 75 insertions(+), 97 deletions(-) diff --git a/dma/CMakeLists.txt b/dma/CMakeLists.txt index 8a3a8e1..38f13eb 100644 --- a/dma/CMakeLists.txt +++ b/dma/CMakeLists.txt @@ -5,7 +5,7 @@ include_directories( set(HEADERS ${HEADERS} nwl.h nwl_private.h nwl_engine.h nwl_irq.h nwl_loopback.h ipe.h ipe_private.h) -add_library(dma STATIC nwl.c nwl_engine.c nwl_irq.c nwl_loopback.c ipe.c) +add_library(dma STATIC nwl.c nwl_engine.c nwl_irq.c nwl_loopback.c ipe.c ipe_benchmark.c) #set(HEADERS ${HEADERS} ipe.h ipe_private.h) #add_library(dma STATIC ipe.c) diff --git a/dma/ipe.c b/dma/ipe.c index 5ad28e1..8d35a14 100644 --- a/dma/ipe.c +++ b/dma/ipe.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -18,8 +19,6 @@ #include "ipe_private.h" -#define WR(addr, value) { *(uint32_t*)(ctx->base_addr + addr) = value; } -#define RD(addr, value) { value = *(uint32_t*)(ctx->base_addr + addr); } pcilib_dma_context_t *dma_ipe_init(pcilib_t *pcilib, const char *model, const void *arg) { @@ -156,9 +155,6 @@ int dma_ipe_start(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcilib_dm ctx->reused = 1; ctx->preserve = 1; - -// usleep(100000); - // Detect the current state of DMA engine #ifdef IPEDMA_BUG_DMARD FILE *f = fopen("/tmp/pcitool_lastread", "r"); @@ -182,13 +178,13 @@ int dma_ipe_start(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcilib_dm // Disable DMA WR(IPEDMA_REG_CONTROL, 0x0); - usleep(100000); + usleep(IPEDMA_RESET_DELAY); // Reset DMA engine WR(IPEDMA_REG_RESET, 0x1); - usleep(100000); + usleep(IPEDMA_RESET_DELAY); WR(IPEDMA_REG_RESET, 0x0); - usleep(100000); + usleep(IPEDMA_RESET_DELAY); #ifndef IPEDMA_BUG_DMARD // Verify PCIe link status @@ -226,15 +222,15 @@ int dma_ipe_start(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcilib_dm uintptr_t bus_addr_check, bus_addr = pcilib_kmem_get_block_ba(ctx->dmactx.pcilib, pages, i); WR(IPEDMA_REG_PAGE_ADDR, bus_addr); if (bus_addr%4096) printf("Bad address %lu: %lx\n", i, bus_addr); - + RD(IPEDMA_REG_PAGE_ADDR, bus_addr_check); if (bus_addr_check != bus_addr) { pcilib_error("Written (%x) and read (%x) bus addresses does not match\n", bus_addr, bus_addr_check); } - - usleep(1000); + + usleep(IPEDMA_ADD_PAGE_DELAY); } - + // Enable DMA WR(IPEDMA_REG_CONTROL, 0x1); @@ -284,17 +280,17 @@ int dma_ipe_stop(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcilib_dma // Disable DMA WR(IPEDMA_REG_CONTROL, 0); - usleep(100000); + usleep(IPEDMA_RESET_DELAY); // Reset DMA engine WR(IPEDMA_REG_RESET, 0x1); - usleep(100000); + usleep(IPEDMA_RESET_DELAY); WR(IPEDMA_REG_RESET, 0x0); - usleep(100000); + usleep(IPEDMA_RESET_DELAY); // Reseting configured DMA pages WR(IPEDMA_REG_PAGE_COUNT, 0); - usleep(100000); + usleep(IPEDMA_RESET_DELAY); } // Clean buffers @@ -348,6 +344,8 @@ int dma_ipe_get_status(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcil status->started = ctx->started; status->ring_size = ctx->ring_size; status->buffer_size = ctx->page_size; + status->written_buffers = 0; + status->written_bytes = 0; // For simplicity, we keep last_read here, and fix in the end status->ring_tail = ctx->last_read; @@ -365,31 +363,45 @@ int dma_ipe_get_status(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcil if (n_buffers > ctx->ring_size) n_buffers = ctx->ring_size; - if (buffers) { + if (buffers) memset(buffers, 0, n_buffers * sizeof(pcilib_dma_buffer_status_t)); - if (status->ring_head >= status->ring_tail) { - for (i = status->ring_tail + 1; (i <= status->ring_head)&&(i < n_buffers); i++) { + if (status->ring_head >= status->ring_tail) { + for (i = status->ring_tail + 1; i <= status->ring_head; i++) { + status->written_buffers++; + status->written_bytes += ctx->page_size; + + if ((buffers)&&(i < n_buffers)) { buffers[i].used = 1; buffers[i].size = ctx->page_size; buffers[i].first = 1; buffers[i].last = 1; } - } else { - for (i = 0; (i <= status->ring_head)&&(i < n_buffers); i++) { + } + } else { + for (i = 0; i <= status->ring_head; i++) { + status->written_buffers++; + status->written_bytes += ctx->page_size; + + if ((buffers)&&(i < n_buffers)) { buffers[i].used = 1; buffers[i].size = ctx->page_size; buffers[i].first = 1; buffers[i].last = 1; - } + } + } - for (i = status->ring_tail + 1; (i < status->ring_size)&&(i < n_buffers); i++) { + for (i = status->ring_tail + 1; i < status->ring_size; i++) { + status->written_buffers++; + status->written_bytes += ctx->page_size; + + if ((buffers)&&(i < n_buffers)) { buffers[i].used = 1; buffers[i].size = ctx->page_size; buffers[i].first = 1; buffers[i].last = 1; - } - } + } + } } // We actually keep last_read in the ring_tail, so need to increase @@ -414,11 +426,22 @@ int dma_ipe_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uin volatile uint32_t *empty_detected_ptr; pcilib_dma_flags_t packet_flags = PCILIB_DMA_FLAG_EOP; + size_t nodata_sleep; #ifdef IPEDMA_BUG_DMARD pcilib_register_value_t value; #endif /* IPEDMA_BUG_DMARD */ + switch (sched_getscheduler(0)) { + case SCHED_FIFO: + case SCHED_RR: + nodata_sleep = IPEDMA_NODATA_SLEEP; + break; + default: + pcilib_info_once("Streaming DMA data using non real-time thread (may cause extra CPU load)", errno); + nodata_sleep = 0; + } + size_t cur_read; ipe_dma_t *ctx = (ipe_dma_t*)vctx; @@ -458,7 +481,9 @@ int dma_ipe_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uin gettimeofday(&start, NULL); memcpy(&cur, &start, sizeof(struct timeval)); while (((*last_written_addr_ptr == 0)||(ctx->last_read_addr == (*last_written_addr_ptr)))&&((wait == PCILIB_TIMEOUT_INFINITE)||(((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)) < wait))) { - usleep(10); + if (nodata_sleep) + usleep(nodata_sleep); + #ifdef IPEDMA_SUPPORT_EMPTY_DETECTED if ((ret != PCILIB_STREAMING_REQ_PACKET)&&(*empty_detected_ptr)) break; #endif /* IPEDMA_SUPPORT_EMPTY_DETECTED */ @@ -527,69 +552,3 @@ int dma_ipe_stream_read(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, uin return 0; } - -double dma_ipe_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction) { - int err = 0; - - ipe_dma_t *ctx = (ipe_dma_t*)vctx; - - int iter; - size_t us = 0; - struct timeval start, cur; - - void *buf; - size_t bytes, rbytes; - - if ((direction == PCILIB_DMA_TO_DEVICE)||(direction == PCILIB_DMA_BIDIRECTIONAL)) return -1.; - - if ((dma != PCILIB_DMA_ENGINE_INVALID)&&(dma > 1)) return -1.; - - err = dma_ipe_start(vctx, 0, PCILIB_DMA_FLAGS_DEFAULT); - if (err) return err; - - WR(IPEDMA_REG_CONTROL, 0x0); - - err = pcilib_skip_dma(ctx->dmactx.pcilib, 0); - if (err) { - pcilib_error("Can't start benchmark, devices continuously writes unexpected data using DMA engine"); - return -1; - } - - if (size%IPEDMA_PAGE_SIZE) size = (1 + size / IPEDMA_PAGE_SIZE) * IPEDMA_PAGE_SIZE; - - // Allocate memory and prepare data - buf = malloc(size); - if (!buf) return -1; - - for (iter = 0; iter < iterations; iter++) { - gettimeofday(&start, NULL); - - // Starting DMA - WR(IPEDMA_REG_CONTROL, 0x1); - - for (bytes = 0; bytes < size; bytes += rbytes) { - err = pcilib_read_dma(ctx->dmactx.pcilib, 0, addr, size - bytes, buf + bytes, &rbytes); - if (err) { - pcilib_error("Can't read data from DMA, error %i", err); - return -1; - } - } - - // Stopping DMA - WR(IPEDMA_REG_CONTROL, 0x0); - if (err) break; - - gettimeofday(&cur, NULL); - us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)); - - err = pcilib_skip_dma(ctx->dmactx.pcilib, 0); - if (err) { - pcilib_error("Can't start iteration, devices continuously writes unexpected data using DMA engine"); - break; - } - } - - free(buf); - - return err?-1:((1. * size * iterations * 1000000) / (1024. * 1024. * us)); -} diff --git a/dma/ipe.h b/dma/ipe.h index 6e05fe1..44fbc99 100644 --- a/dma/ipe.h +++ b/dma/ipe.h @@ -5,8 +5,6 @@ #include "pcilib.h" #include "version.h" -//#define PCILIB_NWL_MODIFICATION_IPECAMERA 0x100 - pcilib_dma_context_t *dma_ipe_init(pcilib_t *ctx, const char *model, const void *arg); void dma_ipe_free(pcilib_dma_context_t *vctx); diff --git a/dma/ipe_private.h b/dma/ipe_private.h index 82ea73a..1fb4e9e 100644 --- a/dma/ipe_private.h +++ b/dma/ipe_private.h @@ -7,12 +7,15 @@ #define IPEDMA_CORES 1 #define IPEDMA_TLP_SIZE 32 #define IPEDMA_PAGE_SIZE 4096 -#define IPEDMA_DMA_PAGES 16 /**< number of DMA pages in the ring buffer to allocate */ +#define IPEDMA_DMA_PAGES 32 /**< number of DMA pages in the ring buffer to allocate */ #define IPEDMA_DMA_PROGRESS_THRESHOLD 1 /**< how many pages the DMA engine should fill before reporting progress */ #define IPEDMA_DESCRIPTOR_SIZE 128 #define IPEDMA_DESCRIPTOR_ALIGNMENT 64 #define IPEDMA_BUG_LAST_READ /**< We should forbid writting the second last available DMA buffer (the last is forbidden by design) */ +#define IPEDMA_RESET_DELAY 100000 /**< Sleep between accessing DMA control and reset registers */ +#define IPEDMA_ADD_PAGE_DELAY 1000 /**< Delay between submitting successive DMA pages into IPEDMA_REG_PAGE_ADDR register */ +#define IPEDMA_NODATA_SLEEP 10 /**< To keep CPU free */ //#define IPEDMA_BUG_DMARD /**< No register read during DMA transfer */ //#define IPEDMA_DETECT_PACKETS /**< Using empty_deceted flag */ @@ -30,6 +33,9 @@ #define IPEDMA_REG_UPDATE_THRESHOLD 0x60 +#define WR(addr, value) { *(uint32_t*)(ctx->base_addr + addr) = value; } +#define RD(addr, value) { value = *(uint32_t*)(ctx->base_addr + addr); } + typedef struct ipe_dma_s ipe_dma_t; diff --git a/dma/nwl_engine_buffers.h b/dma/nwl_engine_buffers.h index d3af079..ef1c74f 100644 --- a/dma/nwl_engine_buffers.h +++ b/dma/nwl_engine_buffers.h @@ -374,6 +374,8 @@ int dma_nwl_get_status(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcil status->ring_size = ectx->ring_size; status->buffer_size = ectx->page_size; status->ring_tail = ectx->tail; + status->written_buffers = 0; + status->written_bytes = 0; if (ectx->desc->direction == PCILIB_DMA_FROM_DEVICE) { size_t pos = 0; @@ -402,7 +404,16 @@ int dma_nwl_get_status(pcilib_dma_context_t *vctx, pcilib_dma_engine_t dma, pcil ring += PCILIB_NWL_DMA_DESCRIPTOR_SIZE; } + } + + for (i = 0; (i < ectx->ring_size)&&(i < n_buffers); i++) { + bstatus = NWL_RING_GET(ring, DMA_BD_BUFL_STATUS_OFFSET); + if (bstatus & DMA_BD_COMP_MASK) { + status->written_buffers++; + if ((bstatus & (DMA_BD_ERROR_MASK)) == 0) + status->written_bytes += bstatus & DMA_BD_BUFL_MASK; + } } - + return 0; } diff --git a/docs/README b/docs/README index 1cbf8ee..647df41 100644 --- a/docs/README +++ b/docs/README @@ -8,4 +8,7 @@ Supported environmental variables IPECAMERA_DEBUG_RAW_PACKETS - Store all raw packets read from DMA grouped in frames (variable may specify directory) IPECAMERA_DEBUG_RAW_FRAMES - Store all raw frames (variable may specify directory) IPECAMERA_DEBUG_HARDWARE - Produce various debugging information about ipecamera operation + + PCILIB_BENCHMARK_HARDWARE - Remove all unnecessary software processing (like copying memcpy) to check hardware performance + PCILIB_BENCHMARK_STREAMING - Emulate streaming mode while benchmarking DMA engines \ No newline at end of file diff --git a/pcilib/dma.h b/pcilib/dma.h index 85eb943..36ead8d 100644 --- a/pcilib/dma.h +++ b/pcilib/dma.h @@ -19,6 +19,7 @@ typedef struct { int started; /**< Informs if the engine is currently started or not */ size_t ring_size, buffer_size; /**< The number of allocated DMA buffers and size of each buffer in bytes */ size_t ring_head, ring_tail; /**< The first and the last buffer containing the data */ + size_t written_buffers, written_bytes; /**< Number of pending buffers and total number of written bytes */ } pcilib_dma_engine_status_t; typedef enum { -- cgit v1.2.3