diff options
-rw-r--r-- | dma/ipe_benchmark.c | 192 | ||||
-rwxr-xr-x | tests/device_info.sh | 81 | ||||
-rwxr-xr-x | tests/new_device.sh | 6 | ||||
-rwxr-xr-x | tests/reload.sh | 25 |
4 files changed, 298 insertions, 6 deletions
diff --git a/dma/ipe_benchmark.c b/dma/ipe_benchmark.c new file mode 100644 index 0000000..3c10715 --- /dev/null +++ b/dma/ipe_benchmark.c @@ -0,0 +1,192 @@ +#define _PCILIB_DMA_IPE_C +#define _BSD_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sched.h> +#include <sys/time.h> +#include <arpa/inet.h> + +#include "pci.h" +#include "pcilib.h" +#include "error.h" +#include "tools.h" +#include "debug.h" + +#include "ipe.h" +#include "ipe_private.h" + + +typedef struct { + size_t size; + size_t pos; + pcilib_dma_flags_t flags; +} dma_ipe_skim_callback_context_t; + +static int dma_ipe_skim_callback(void *arg, pcilib_dma_flags_t flags, size_t bufsize, void *buf) { + dma_ipe_skim_callback_context_t *ctx = (dma_ipe_skim_callback_context_t*)arg; + + ctx->pos += bufsize; + + if (flags & PCILIB_DMA_FLAG_EOP) { + if ((ctx->pos < ctx->size)&&(ctx->flags&PCILIB_DMA_FLAG_MULTIPACKET)) { + if (ctx->flags&PCILIB_DMA_FLAG_WAIT) return PCILIB_STREAMING_WAIT; + else return PCILIB_STREAMING_CONTINUE; + } + return PCILIB_STREAMING_STOP; + } + + return PCILIB_STREAMING_REQ_FRAGMENT; +} + +int dma_ipe_skim_dma_custom(pcilib_t *ctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, void *buf, size_t *read_bytes) { + int err; + + dma_ipe_skim_callback_context_t opts = { + size, 0, flags + }; + + err = pcilib_stream_dma(ctx, dma, addr, size, flags, timeout, dma_ipe_skim_callback, &opts); + if (read_bytes) *read_bytes = opts.pos; + return err; +} + + +double dma_ipe_benchmark(pcilib_dma_context_t *vctx, pcilib_dma_engine_addr_t dma, uintptr_t addr, size_t size, size_t iterations, pcilib_dma_direction_t direction) { + int err = 0; + + ipe_dma_t *ctx = (ipe_dma_t*)vctx; + + int iter; + size_t us = 0; + struct timeval start, cur; + + void *buf; + size_t bytes, rbytes; + + int (*read_dma)(pcilib_t *ctx, pcilib_dma_engine_t dma, uintptr_t addr, size_t size, pcilib_dma_flags_t flags, pcilib_timeout_t timeout, void *buf, size_t *read_bytes); + + if ((direction == PCILIB_DMA_TO_DEVICE)||(direction == PCILIB_DMA_BIDIRECTIONAL)) return -1.; + + if ((dma != PCILIB_DMA_ENGINE_INVALID)&&(dma > 1)) return -1.; + + if (size%IPEDMA_PAGE_SIZE) size = (1 + size / IPEDMA_PAGE_SIZE) * IPEDMA_PAGE_SIZE; + + err = dma_ipe_start(vctx, 0, PCILIB_DMA_FLAGS_DEFAULT); + if (err) return err; + + if (getenv("PCILIB_BENCHMARK_HARDWARE")) + read_dma = dma_ipe_skim_dma_custom; + else + read_dma = pcilib_read_dma_custom; + + // There is no significant difference and we can remove this when testing phase is over. + if (getenv("PCILIB_BENCHMARK_STREAMING")) { + size_t dma_buffer_space; + pcilib_dma_engine_status_t dma_status; + + if (read_dma == pcilib_read_dma_custom) + pcilib_info_once("Benchmarking the DMA streaming (with memcpy)"); + else + pcilib_info_once("Benchmarking the DMA streaming (without memcpy)"); + + // Starting DMA + WR(IPEDMA_REG_CONTROL, 0x1); + + gettimeofday(&start, NULL); + pcilib_calc_deadline(&start, IPEDMA_DMA_TIMEOUT * IPEDMA_DMA_PAGES); + +#ifdef IPEDMA_BUG_LAST_READ + dma_buffer_space = (IPEDMA_DMA_PAGES - 2) * IPEDMA_PAGE_SIZE; +#else /* IPEDMA_BUG_LAST_READ */ + dma_buffer_space = (IPEDMA_DMA_PAGES - 1) * IPEDMA_PAGE_SIZE; +#endif /* IPEDMA_BUG_LAST_READ */ + + // Allocate memory and prepare data + buf = malloc(size + dma_buffer_space); + if (!buf) return -1; + + // Wait all DMA buffers are filled + memset(&dma_status, 0, sizeof(dma_status)); + do { + usleep(10 * IPEDMA_NODATA_SLEEP); + err = dma_ipe_get_status(vctx, dma, &dma_status, 0, NULL); + } while ((!err)&&(dma_status.written_bytes < dma_buffer_space)&&(pcilib_calc_time_to_deadline(&start) > 0)); + + if (err) { + pcilib_error("Error (%i) getting dma status", err); + return -1; + } else if (dma_status.written_bytes < dma_buffer_space) { + pcilib_error("Timeout while waiting DMA engine to feel the buffer space completely, only %zu bytes of %zu written", dma_status.written_bytes, dma_buffer_space); + return -1; + } + + gettimeofday(&start, NULL); + for (iter = 0; iter < iterations; iter++) { + for (bytes = 0; bytes < (size + dma_buffer_space); bytes += rbytes) { + err = read_dma(ctx->dmactx.pcilib, 0, addr, size + dma_buffer_space - bytes, PCILIB_DMA_FLAG_MULTIPACKET, PCILIB_DMA_TIMEOUT, buf + bytes, &rbytes); + if (err) { + pcilib_error("Can't read data from DMA, error %i", err); + return -1; + } + } + dma_buffer_space = 0; + } + + gettimeofday(&cur, NULL); + us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)); + + // Stopping DMA + WR(IPEDMA_REG_CONTROL, 0x0); + pcilib_skip_dma(ctx->dmactx.pcilib, 0); + } else { + if (read_dma == dma_ipe_skim_dma_custom) + pcilib_info_once("Benchmarking the DMA hardware (without memcpy)"); + + WR(IPEDMA_REG_CONTROL, 0x0); + + err = pcilib_skip_dma(ctx->dmactx.pcilib, 0); + if (err) { + pcilib_error("Can't start benchmark, devices continuously writes unexpected data using DMA engine"); + return -1; + } + + // Allocate memory and prepare data + buf = malloc(size); + if (!buf) return -1; + + for (iter = 0; iter < iterations; iter++) { + gettimeofday(&start, NULL); + + // Starting DMA + WR(IPEDMA_REG_CONTROL, 0x1); + + for (bytes = 0; bytes < size; bytes += rbytes) { + err = read_dma(ctx->dmactx.pcilib, 0, addr, size - bytes, PCILIB_DMA_FLAG_MULTIPACKET, PCILIB_DMA_TIMEOUT, buf + bytes, &rbytes); + if (err) { + pcilib_error("Can't read data from DMA, error %i", err); + return -1; + } + } + + // Stopping DMA + WR(IPEDMA_REG_CONTROL, 0x0); + if (err) break; + + gettimeofday(&cur, NULL); + us += ((cur.tv_sec - start.tv_sec)*1000000 + (cur.tv_usec - start.tv_usec)); + + err = pcilib_skip_dma(ctx->dmactx.pcilib, 0); + if (err) { + pcilib_error("Can't start iteration, devices continuously writes unexpected data using DMA engine"); + break; + } + } + } + + free(buf); + + return err?-1:((1. * size * iterations * 1000000) / (1024. * 1024. * us)); +} diff --git a/tests/device_info.sh b/tests/device_info.sh new file mode 100755 index 0000000..21e59db --- /dev/null +++ b/tests/device_info.sh @@ -0,0 +1,81 @@ +#! /bin/bash + +BAR=0 + +function pci { + PCILIB_PATH=`pwd`/.. + LD_LIBRARY_PATH="$PCILIB_PATH/pcilib" $PCILIB_PATH/pcitool/pci $* +} + + +function read_cfg { + pci -a config -r 0x$1 | awk '{ print $2; }' | sed -e 's/\s*//g' -e '/^\s*$/d' +} + +function parse_config { + info=0x`pci -b $BAR -r 0 | awk '{ print $2; }' | sed -e 's/\s*//g' -e '/^\s*$/d'` + model=`printf "%X" $((info>>24))` + if [ $model -eq 14 ]; then + model="Xilinx Virtex-6" + else + model="Xilinx $model" + fi + version=$(((info >> 8) & 0xFF)) + data_width=$((16 * (2 ** ((info >> 16) & 0xF)))) + + echo "$model, build $version, $data_width bits" + + + next=`read_cfg 34 | cut -c 7-8` +# next=`printf "%u" $next` + + while [ $((0x$next)) -ne 0 ]; do + cap=`read_cfg $next` + capid=`echo $cap | cut -c 7-8` + if [ $capid -eq 10 ]; then + addr=`printf "%X" $((0x$next + 4))` + device_capabilities=`read_cfg $addr` + + addr=`printf "%X" $((0x$next + 8))` + device_control=`read_cfg $addr` + + addr=`printf "%X" $((0x$next + 12))` + pcie_link1=`read_cfg $addr` + addr=`printf "%X" $((0x$next + 16))` + pcie_link2=`read_cfg $addr` + + link_speed=$((((0x$pcie_link2 & 0xF0000) >> 16))) + link_width=$((((0x$pcie_link2 & 0x3F00000) >> 20))) + + dev_link_speed=$((((0x$pcie_link1 & 0xF)))) + dev_link_width=$((((0x$pcie_link1 & 0x3F0) >> 4))) + + max_payload=$(((1 << ((0x$device_capabilities & 0x07) + 7)))) + dev_payload=$(((1 << (((0x$device_capabilities >> 5) & 0x07) + 7)))) + fi + next=`echo $cap | cut -c 5-6` + done + + echo "Link: PCIe gen$link_speed x$link_width" + if [ $link_speed -ne $dev_link_speed -o $link_width -ne $dev_link_width ]; then + echo " * But device capable of gen$dev_link_speed x$dev_link_width" + fi + + echo "Payload: $dev_payload" + if [ $dev_payload -ne $max_payload ]; then + echo " * But device capable of $max_payload" + fi + + + info=0x`read_cfg 40` + max_tlp=$((2 ** (5 + ((info & 0xE0) >> 5)))) + echo "TLP: 32 dwords (transfering 32 TLP per request)" + if [ $max_tlp -ne 32 ]; then + echo " * But device is able to transfer TLP up to $max_tlp bytes" + fi + + # 2500 MT/s, but PCIe gen1 and gen2 uses 10 bit encoding + speed=$((link_width * link_speed * 2500 / 10)) +} + +parse_config diff --git a/tests/new_device.sh b/tests/new_device.sh deleted file mode 100755 index e3f8f8a..0000000 --- a/tests/new_device.sh +++ /dev/null @@ -1,6 +0,0 @@ -#! /bin/bash - -#Add new device to the driver -#echo "10ee 6028" > /sys/bus/pci/drivers/pciDriver/new_id -#Enable bus mastering -#setpci -s 03:00.0 4.w=0x07 diff --git a/tests/reload.sh b/tests/reload.sh new file mode 100755 index 0000000..62c6e22 --- /dev/null +++ b/tests/reload.sh @@ -0,0 +1,25 @@ +#! /bin/bash + +device=`lspci -n | grep -m 1 "10ee:" | awk '{print $1}'` +if [ -z "$device" ]; then + echo "Xilinx device doesn't exist, rescanning..." + echo 1 > /sys/bus/pci/rescan + exit +else + echo "Xilinx is located at: " $device +fi +echo "remove driver" +rmmod pciDriver +echo "remove devices" +echo 1 > /sys/bus/pci/devices/0000\:${device:0:2}\:${device:3:4}/remove +sleep 1 +echo "rescan" +echo 1 > /sys/bus/pci/rescan +sleep 1 +echo "instantiate driver" +modprobe pciDriver +# for devices with different ID +#echo "10ee 6028" > /sys/bus/pci/drivers/pciDriver/new_id +pci -i +#echo Enabling bus mastering on device $dev +#setpci -s $device 4.w=0x07 |