From 871fab410b76d8823a2a4d2c867923f1bcc9f636 Mon Sep 17 00:00:00 2001 From: Winefred Washington Date: Fri, 23 Dec 2016 11:47:37 -0800 Subject: [PATCH] adding SDAccel driver Change-Id: I4409f468bb2f680086ce8768e5a700fcd2280cd5 --- sdk/SDAccel/HAL/driver/include/xclbin.h | 124 ++ sdk/SDAccel/HAL/driver/include/xclhal.h | 394 +++++ sdk/SDAccel/HAL/driver/include/xclperf.h | 300 ++++ .../xcldma/include/perfmon_parameters.h | 274 +++ .../xcldma/include/xbar_sys_parameters.h | 146 ++ .../HAL/driver/xcldma/include/xdma-ioctl.h | 148 ++ .../HAL/driver/xcldma/user/datamover.h | 182 ++ .../HAL/driver/xcldma/user/memorymanager.cpp | 220 +++ .../HAL/driver/xcldma/user/memorymanager.h | 76 + sdk/SDAccel/HAL/driver/xcldma/user/perf.cpp | 980 +++++++++++ sdk/SDAccel/HAL/driver/xcldma/user/prom.cpp | 445 +++++ sdk/SDAccel/HAL/driver/xcldma/user/shim.cpp | 1250 ++++++++++++++ sdk/SDAccel/HAL/driver/xcldma/user/shim.h | 256 +++ sdk/SDAccel/HAL/driver/xcldma/user/xspi.cpp | 1531 +++++++++++++++++ 14 files changed, 6326 insertions(+) create mode 100644 sdk/SDAccel/HAL/driver/include/xclbin.h create mode 100644 sdk/SDAccel/HAL/driver/include/xclhal.h create mode 100755 sdk/SDAccel/HAL/driver/include/xclperf.h create mode 100644 sdk/SDAccel/HAL/driver/xcldma/include/perfmon_parameters.h create mode 100644 sdk/SDAccel/HAL/driver/xcldma/include/xbar_sys_parameters.h create mode 100644 sdk/SDAccel/HAL/driver/xcldma/include/xdma-ioctl.h create mode 100644 sdk/SDAccel/HAL/driver/xcldma/user/datamover.h create mode 100644 sdk/SDAccel/HAL/driver/xcldma/user/memorymanager.cpp create mode 100644 sdk/SDAccel/HAL/driver/xcldma/user/memorymanager.h create mode 100644 sdk/SDAccel/HAL/driver/xcldma/user/perf.cpp create mode 100644 sdk/SDAccel/HAL/driver/xcldma/user/prom.cpp create mode 100644 sdk/SDAccel/HAL/driver/xcldma/user/shim.cpp create mode 100644 sdk/SDAccel/HAL/driver/xcldma/user/shim.h create mode 100755 sdk/SDAccel/HAL/driver/xcldma/user/xspi.cpp diff --git a/sdk/SDAccel/HAL/driver/include/xclbin.h b/sdk/SDAccel/HAL/driver/include/xclbin.h new file mode 100644 index 000000000..dcd3c1a67 --- /dev/null +++ b/sdk/SDAccel/HAL/driver/include/xclbin.h @@ -0,0 +1,124 @@ +/** + * Xilinx SDAccel xclbin container definition + * Copyright (C) 2015-2016, Xilinx Inc - All rights reserved + */ + +#ifndef _XCLBIN_H_ +#define _XCLBIN_H_ + +#if defined(__KERNEL__) +#include +#elif defined(__cplusplus) +#include +#include +#else +#include +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * Container format for Xilinx bitstreams, metadata and other + * binary blobs. + * Every segment must be aligned at 8 byte boundary with null byte padding + * between adjacent segments if required. + * For segements which are not present both offset and length must be 0 in + * the header. + * Currently only xclbin0\0 is recognized as file magic. In future if/when file + * format is updated the magic string will be changed to xclbin1\0 and so on. + */ + enum XCLBIN_MODE { + XCLBIN_FLAT, + XCLBIN_PR, + XCLBIN_TANDEM_STAGE2, + XCLBIN_TANDEM_STAGE2_WITH_PR, + XCLBIN_MODE_MAX + }; + + struct xclBin { + char m_magic[8]; /* should be xclbin0\0 */ + uint64_t m_length; /* total size of the xclbin file */ + uint64_t m_timeStamp; /* number of seconds since epoch when xclbin was created */ + uint64_t m_version; /* tool version used to create xclbin */ + unsigned m_mode; /* XCLBIN_MODE */ + char m_nextXclBin[24]; /* Name of next xclbin file in the daisy chain */ + uint64_t m_metadataOffset; /* file offset of embedded metadata */ + uint64_t m_metadataLength; /* size of the embedded metdata */ + uint64_t m_primaryFirmwareOffset; /* file offset of bitstream or emulation archive */ + uint64_t m_primaryFirmwareLength; /* size of the bistream or emulation archive */ + uint64_t m_secondaryFirmwareOffset; /* file offset of clear bitstream if any */ + uint64_t m_secondaryFirmwareLength; /* size of the clear bitstream */ + uint64_t m_driverOffset; /* file offset of embedded device driver if any (currently unused) */ + uint64_t m_driverLength; /* size of the embedded device driver (currently unused) */ + + // Extra debug information for hardware and hardware emulation debug + + uint64_t m_dwarfOffset ; + uint64_t m_dwarfLength ; + uint64_t m_ipiMappingOffset ; + uint64_t m_ipiMappingLength ; + }; + + /* + * XCLBIN1 LAYOUT + * -------------- + * + * ----------------------------------------- + * | Magic | + * ----------------------------------------- + * | Header | + * ----------------------------------------- + * | One or more section headers | + * ----------------------------------------- + * | Matching number of sections with data | + * ----------------------------------------- + * + */ + enum xclBin1SectionKind { + BITSTREAM, + CLEARING_BITSTREAM, + EMBEDDED_METADATA, + FIRMWARE, + DEBUG_DATA + }; + + struct xclBin1SectionHeader { + unsigned m_sectionKind; /* Section type */ + unsigned short m_freq[4]; /* Target frequency for the section if applicable */ + char m_sectionName[16]; /* Examples: "stage2", "clear1", "clear2", "ocl1", "ocl2, "ublaze" */ + unsigned m_customFlagsA; /* Example: Number of Kernels in this region */ + unsigned m_customFlagsB; /* Example: Number of Kernels in this region */ + uint64_t m_sectionOffset; /* File offset of section data */ + uint64_t m_sectionSize; /* Size of section data */ + }; + + struct xclBin1Header { + uint64_t m_length; /* Total size of the xclbin file */ + uint64_t m_timeStamp; /* Number of seconds since epoch when xclbin was created */ + unsigned m_version; /* Tool version used to create xclbin */ + unsigned m_mode; /* XCLBIN_MODE */ + uint64_t m_platformId; /* 64 bit platform ID: vendor-device-subvendor-subdev */ + uint64_t m_featureId; /* 64 bit feature id */ + char m_nextXclBin[16]; /* Name of next xclbin file in the daisy chain */ + char m_debugBin[16]; /* Name of binary with debug information */ + unsigned m_numSections; /* Number of section headers */ + }; + + struct xclBin1 { + char m_magic[8]; /* Should be xclbin1\0 */ + uint64_t m_signature[4]; /* File signature for validation of binary */ + struct xclBin1Header m_header; /* Inline header */ + struct xclBin1SectionHeader m_sections[1]; /* One or more section headers follow */ + }; + + +#ifdef __cplusplus +} +#endif + +#endif + +// XSIP watermark, do not delete 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/SDAccel/HAL/driver/include/xclhal.h b/sdk/SDAccel/HAL/driver/include/xclhal.h new file mode 100644 index 000000000..ce4c78fe6 --- /dev/null +++ b/sdk/SDAccel/HAL/driver/include/xclhal.h @@ -0,0 +1,394 @@ +/** + * Xilinx SDAccel HAL userspace driver APIs + * Copyright (C) 2015-2016, Xilinx Inc - All rights reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#ifndef _XCL_HAL_H_ +#define _XCL_HAL_H_ + +#ifdef __cplusplus +#include +#include +#else +#include +#include +#endif + +#if defined(_WIN32) +#ifdef XCL_DRIVER_DLL_EXPORT +#define XCL_DRIVER_DLLESPEC __declspec(dllexport) +#else +#define XCL_DRIVER_DLLESPEC __declspec(dllimport) +#endif +#else +#define XCL_DRIVER_DLLESPEC __attribute__((visibility("default"))) +#endif + + +#include "xclperf.h" + +#ifdef __cplusplus +extern "C" { +#endif + + typedef void * xclDeviceHandle; + + struct xclBin; + /** + * Structure used to obtain various bits of information from the device. + */ + + struct xclDeviceInfo { + unsigned mMagic; // = 0X586C0C6C; XL OpenCL X->58(ASCII), L->6C(ASCII), O->0 C->C L->6C(ASCII); + char mName[256]; + unsigned short mHALMajorVersion; + unsigned short mHALMinorVersion; + unsigned short mVendorId; + unsigned short mDeviceId; + unsigned mDeviceVersion; + unsigned short mSubsystemId; + unsigned short mSubsystemVendorId; + size_t mDDRSize; // Size of DDR memory + size_t mDataAlignment; // Minimum data alignment requirement for host buffers + size_t mDDRFreeSize; // Total unused/available DDR memory + size_t mMinTransferSize; // Minimum DMA buffer size + float mTemp; + float mVoltage; + float mCurrent; + unsigned mDDRBankCount; + unsigned mOCLFrequency; + unsigned mPCIeLinkWidth; + unsigned mPCIeLinkSpeed; + unsigned short mDMAThreads; + // More properties here + }; + + struct xclDeviceInfo2 { + unsigned mMagic; // = 0X586C0C6C; XL OpenCL X->58(ASCII), L->6C(ASCII), O->0 C->C L->6C(ASCII); + char mName[256]; + unsigned short mHALMajorVersion; + unsigned short mHALMinorVersion; + unsigned short mVendorId; + unsigned short mDeviceId; + unsigned short mSubsystemId; + unsigned short mSubsystemVendorId; + unsigned short mDeviceVersion; +// unsigned mDriverVersion; // Enable this after driver unification since it changes the ABI + size_t mDDRSize; // Size of DDR memory + size_t mDataAlignment; // Minimum data alignment requirement for host buffers + size_t mDDRFreeSize; // Total unused/available DDR memory + size_t mMinTransferSize; // Minimum DMA buffer size +// size_t mBRAMSize; // Enable this after driver unification since it changes the ABI + unsigned short mDDRBankCount; + unsigned short mOCLFrequency[4]; + unsigned short mPCIeLinkWidth; + unsigned short mPCIeLinkSpeed; + unsigned short mDMAThreads; + short mOnChipTemp; + short mFanTemp; + unsigned short mVInt; + unsigned short mVAux; + unsigned short mVBram; + float mCurrent; +// unsigned short mCurrent; // Change float to short after driver unification since it changes the ABI + unsigned short mNumClocks; + unsigned short mFanSpeed; + bool mMigCalib; + // More properties here + }; + + enum xclMemoryDomains { + XCL_MEM_HOST_RAM = 0x00000000, + XCL_MEM_DEVICE_RAM = 0x00000001, + XCL_MEM_DEVICE_BRAM = 0x00000002, + XCL_MEM_SVM = 0x00000003, + XCL_MEM_CMA = 0x00000004, + XCL_MEM_DEVICE_REG = 0x00000005 + }; + + enum xclDDRFlags { + XCL_DEVICE_RAM_BANK0 = 0, + XCL_DEVICE_RAM_BANK1 = 1, + XCL_DEVICE_RAM_BANK2 = 2, + XCL_DEVICE_RAM_BANK3 = 3 + }; + + enum xclBRAMFlags { + XCL_DEVICE_BRAM0 = 0, + XCL_DEVICE_BRAM1 = 1, + XCL_DEVICE_BRAM2 = 2, + XCL_DEVICE_BRAM3 = 3, + }; + + /** + * Define address spaces on the device AXI bus. The enums are used in xclRead() and xclWrite() + * to pass relative offsets. + */ + + enum xclAddressSpace { + XCL_ADDR_SPACE_DEVICE_FLAT = 0, // Absolute address space + XCL_ADDR_SPACE_DEVICE_RAM = 1, // Address space for the DDR memory + XCL_ADDR_KERNEL_CTRL = 2, // Address space for the OCL Region control port + XCL_ADDR_SPACE_DEVICE_PERFMON = 3, // Address space for the Performance monitors + XCL_ADDR_SPACE_DEVICE_REG = 4, // Address space for device registers. + XCL_ADDR_SPACE_MAX = 8 + }; + + /** + * Defines verbosity levels which are passed to xclOpen during device creation time + */ + + enum xclVerbosityLevel { + XCL_QUIET = 0, + XCL_INFO = 1, + XCL_WARN = 2, + XCL_ERROR = 3 + }; + + enum xclResetKind { + XCL_RESET_KERNEL, + XCL_RESET_FULL + }; + + // VERSION 1.0 APIs + // ---------------- + + /** + * @defgroup devman DEVICE MANAGMENT APIs + * -------------------------------------- + * APIs to open, close, query and program the device + * @{ + */ + + /** + * Open a device and obtain its handle. + * "deviceIndex" is 0 for first device, 1 for the second device and so on + * "logFileName" is optional and if not NULL should be used to log messages + * "level" specifies the verbosity level for the messages being logged to logFileName + */ + + XCL_DRIVER_DLLESPEC xclDeviceHandle xclOpen(unsigned deviceIndex, const char *logFileName, xclVerbosityLevel level); + + /** + * Close an opened device + */ + + XCL_DRIVER_DLLESPEC void xclClose(xclDeviceHandle handle); + + /** + * Obtain various bits of information from the device + */ + + XCL_DRIVER_DLLESPEC int xclGetDeviceInfo(xclDeviceHandle handle, xclDeviceInfo *info); + + /** + * Obtain various bits of information from the device + */ + + XCL_DRIVER_DLLESPEC int xclGetDeviceInfo2(xclDeviceHandle handle, xclDeviceInfo2 *info); + + /** + * Download bitstream to the device. The bitstream is in xclBin format and stored in xclBinFileName. + * The bitstream may be PR bistream for devices which support PR and full bitstream for devices + * which require full configuration. + */ + + XCL_DRIVER_DLLESPEC int xclLoadBitstream(xclDeviceHandle handle, const char *xclBinFileName); + + /** + * Download bitstream to the device. The bitstream is passed in memory in xclBin format. The bitstream + * may be PR bistream for devices which support PR and full bitstream for devices which require full + * configuration. + */ + + XCL_DRIVER_DLLESPEC int xclLoadXclBin(xclDeviceHandle handle, const xclBin *buffer); + + /** @} */ + + /** + * @defgroup bufman BUFFER MANAGMENT APIs + * -------------------------------------- + * + * Buffer management APIs are used for managing device memory. The board vendors are expected to + * provide a memory manager with the following 4 APIs. The xclCopyXXX functions will be used by + * runtime to migrate buffers between host and device memory. + * @{ + */ + + /** + * Allocate a buffer on the device DDR and return its address + */ + + XCL_DRIVER_DLLESPEC uint64_t xclAllocDeviceBuffer(xclDeviceHandle handle, size_t size); + + /** + * Allocate a buffer on the device DDR bank and return its address + */ + + XCL_DRIVER_DLLESPEC uint64_t xclAllocDeviceBuffer2(xclDeviceHandle handle, size_t size, + xclMemoryDomains domain, + unsigned flags); + + /** + * Free a previously allocated buffer on the device DDR + */ + + XCL_DRIVER_DLLESPEC void xclFreeDeviceBuffer(xclDeviceHandle handle, uint64_t buf); + + /** + * Copy host buffer contents to previously allocated device memory. "seek" specifies how many bytes to skip + * at the beginning of the destination before copying "size" bytes of host buffer. + */ + + XCL_DRIVER_DLLESPEC size_t xclCopyBufferHost2Device(xclDeviceHandle handle, uint64_t dest, + const void *src, size_t size, size_t seek); + + /** + * Copy contents of previously allocated device memory to host buffer. "skip" specifies how many bytes to skip + * from the beginning of the source before copying "size" bytes of device buffer. + */ + + XCL_DRIVER_DLLESPEC size_t xclCopyBufferDevice2Host(xclDeviceHandle handle, void *dest, + uint64_t src, size_t size, size_t skip); + + /** @} */ + + /** + * @defgroup readwrite DEVICE READ AND WRITE APIs + * ---------------------------------------------- + * + * These functions are used to read and write peripherals sitting on the address map. An implementation + * may use these to implement xclCopyXXX functions. OpenCL runtime will be using the BUFFER MANAGEMNT + * APIs described above to manage OpenCL buffers. It would use xclRead/xclWrite to program and manage + * peripherals on the card. For programming the Kernel, OpenCL runtime uses the kernel control register + * map generated by the OpenCL compiler. + * Note that the offset is wrt the address space + * @{ + */ + + XCL_DRIVER_DLLESPEC size_t xclWrite(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, + const void *hostBuf, size_t size); + + XCL_DRIVER_DLLESPEC size_t xclRead(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, + void *hostbuf, size_t size); + + /** @} */ + + // EXTENSIONS FOR PARTIAL RECONFIG FLOW + // ------------------------------------ + // TODO: Deprecate this. Update the device PROM with new base bitsream + XCL_DRIVER_DLLESPEC int xclUpgradeFirmware(xclDeviceHandle handle, const char *fileName); + + // Update the device PROM with new base bitsream(s). + XCL_DRIVER_DLLESPEC int xclUpgradeFirmware2(xclDeviceHandle handle, const char *file1, const char* file2); + + //TODO: Deprecate this. Update the device PROM for XSpi + XCL_DRIVER_DLLESPEC int xclUpgradeFirmwareXSpi(xclDeviceHandle handle, const char *fileName, int index); + + //Test the flash + XCL_DRIVER_DLLESPEC int xclTestXSpi(xclDeviceHandle handle, int slave_index); + + // Boot the FPGA with new bitsream in PROM. This will break the PCIe link and render the device + // unusable till a reboot of the host + XCL_DRIVER_DLLESPEC int xclBootFPGA(xclDeviceHandle handle); + + // NEW APIs in VERSION 1.1 + // ----------------------- + + /** + * @addtogroup devman + * @{ + */ + + /** + * Reset the device. All running kernels will be killed and buffers in DDR will be purged. + * A device would be reset if a user's application dies without waiting for running kernel(s) to finish. + */ + + XCL_DRIVER_DLLESPEC int xclResetDevice(xclDeviceHandle handle, xclResetKind kind); + + /** + * Set the OCL region frequncy + */ + + XCL_DRIVER_DLLESPEC int xclReClock(xclDeviceHandle handle, unsigned targetFreqMHz); + + /** + * Set the OCL region frequncies + */ + + XCL_DRIVER_DLLESPEC int xclReClock2(xclDeviceHandle handle, unsigned short region, + const unsigned short *targetFreqMHz); + + /** + * Return a count of devices found in the system + */ + XCL_DRIVER_DLLESPEC unsigned xclProbe(); + + /** + * Get exclusive ownership of the device. The lock is necessary before performing buffer + * migration, register access or bitstream downloads. + */ + XCL_DRIVER_DLLESPEC int xclLockDevice(xclDeviceHandle handle); + + /** @} */ + + /** + * @defgroup perfmon PERFORMANCE MONITORING OPERATIONS + * --------------------------------------------------- + * + * These functions are used to read and write to the performance monitoring infrastructure. + * OpenCL runtime will be using the BUFFER MANAGEMNT APIs described above to manage OpenCL buffers. + * It would use these functions to initialize and sample the performance monitoring on the card. + * Note that the offset is wrt the address space + */ + + XCL_DRIVER_DLLESPEC size_t xclGetDeviceTimestamp(xclDeviceHandle handle); + + XCL_DRIVER_DLLESPEC double xclGetDeviceClockFreqMHz(xclDeviceHandle handle); + + XCL_DRIVER_DLLESPEC double xclGetReadMaxBandwidthMBps(xclDeviceHandle handle); + + XCL_DRIVER_DLLESPEC double xclGetWriteMaxBandwidthMBps(xclDeviceHandle handle); + + XCL_DRIVER_DLLESPEC void xclSetOclRegionProfilingNumberSlots(xclDeviceHandle handle, + uint32_t numSlots); + + XCL_DRIVER_DLLESPEC size_t xclPerfMonClockTraining(xclDeviceHandle handle, xclPerfMonType type); + + XCL_DRIVER_DLLESPEC size_t xclPerfMonStartCounters(xclDeviceHandle handle, xclPerfMonType type); + + XCL_DRIVER_DLLESPEC size_t xclPerfMonStopCounters(xclDeviceHandle handle, xclPerfMonType type); + + XCL_DRIVER_DLLESPEC size_t xclPerfMonReadCounters(xclDeviceHandle handle, xclPerfMonType type, + xclCounterResults& counterResults); + + XCL_DRIVER_DLLESPEC size_t xclPerfMonStartTrace(xclDeviceHandle handle, xclPerfMonType type, + uint32_t startTrigger); + + XCL_DRIVER_DLLESPEC size_t xclPerfMonStopTrace(xclDeviceHandle handle, xclPerfMonType type); + + XCL_DRIVER_DLLESPEC uint32_t xclPerfMonGetTraceCount(xclDeviceHandle handle, xclPerfMonType type); + + XCL_DRIVER_DLLESPEC size_t xclPerfMonReadTrace(xclDeviceHandle handle, xclPerfMonType type, + xclTraceResultsVector& traceVector); + + /** @} */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sdk/SDAccel/HAL/driver/include/xclperf.h b/sdk/SDAccel/HAL/driver/include/xclperf.h new file mode 100755 index 000000000..6be7ae9f8 --- /dev/null +++ b/sdk/SDAccel/HAL/driver/include/xclperf.h @@ -0,0 +1,300 @@ +/** + * Xilinx SDAccel HAL userspace driver extension APIs + * Performance Monitoring Exposed Parameters + * Copyright (C) 2015-2016, Xilinx Inc - All rights reserved + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#ifndef _XCL_PERF_H_ +#define _XCL_PERF_H_ + +// DSA version (e.g., XCL_PLATFORM=xilinx_adm-pcie-7v3_1ddr_1_1) +// TODO: this will eventually be read from the device using lspci (see CR 870994) +#define DSA_MAJOR_VERSION 1 +#define DSA_MINOR_VERSION 1 + +/************************ APM 0: Monitor MIG Ports ****************************/ + +#define XPAR_AXI_PERF_MON_0_NUMBER_SLOTS 2 + +#if 1 +#define XPAR_AXI_PERF_MON_0_SLOT0_NAME "OCL Region" +#define XPAR_AXI_PERF_MON_0_SLOT1_NAME "Host" +#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT 0 +#define XPAR_AXI_PERF_MON_0_HOST_SLOT 1 +#else +// Uncomment for DSA v1.0 +// NOTE: since device profiling didn't work in v1.0, we'll leave this commented +//#define XPAR_AXI_PERF_MON_0_SLOT0_NAME "Host" +//#define XPAR_AXI_PERF_MON_0_SLOT1_NAME "OCL Region" +//#define XPAR_AXI_PERF_MON_0_HOST_SLOT 0 +//#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT 1 +#endif + +#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT2 2 +#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT3 3 +#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT4 4 +#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT5 5 +#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT6 6 +#define XPAR_AXI_PERF_MON_0_OCL_REGION_SLOT7 7 + +#define XPAR_AXI_PERF_MON_0_SLOT2_NAME "OCL Region, Master 2" +#define XPAR_AXI_PERF_MON_0_SLOT3_NAME "OCL Region, Master 3" +#define XPAR_AXI_PERF_MON_0_SLOT4_NAME "OCL Region, Master 4" +#define XPAR_AXI_PERF_MON_0_SLOT5_NAME "OCL Region, Master 5" +#define XPAR_AXI_PERF_MON_0_SLOT6_NAME "OCL Region, Master 6" +#define XPAR_AXI_PERF_MON_0_SLOT7_NAME "OCL Region, Master 7" + +#define XPAR_AXI_PERF_MON_0_SLOT0_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_0_SLOT1_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_0_SLOT2_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_0_SLOT3_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_0_SLOT4_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_0_SLOT5_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_0_SLOT6_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_0_SLOT7_DATA_WIDTH 512 + +/* Profile */ +#define XPAR_AXI_PERF_MON_0_IS_EVENT_COUNT 1 +#define XPAR_AXI_PERF_MON_0_HAVE_SAMPLED_COUNTERS 1 +#define XPAR_AXI_PERF_MON_0_NUMBER_COUNTERS (XPAR_AXI_PERF_MON_0_NUMBER_SLOTS * XAPM_METRIC_COUNTERS_PER_SLOT) + +/* Trace */ +#define XPAR_AXI_PERF_MON_0_IS_EVENT_LOG 1 +#define XPAR_AXI_PERF_MON_0_SHOW_AXI_IDS 1 +#define XPAR_AXI_PERF_MON_0_SHOW_AXI_LEN 1 +// 2 DDR platform +#define XPAR_AXI_PERF_MON_0_SHOW_AXI_IDS_2DDR 0 +#define XPAR_AXI_PERF_MON_0_SHOW_AXI_LEN_2DDR 1 + +/* AXI Stream FIFOs */ +#define XPAR_AXI_PERF_MON_0_TRACE_NUMBER_FIFO 3 +#define XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH 128 +#define XPAR_AXI_PERF_MON_0_TRACE_NUMBER_SAMPLES 4096 +#define MAX_TRACE_NUMBER_SAMPLES 8192 + +#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_0 0x010000 +#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_1 0x011000 +#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_2 0x012000 +// CR 877788: the extra 0x80001000 is a bug in Vivado where the AXI4 base address is not set correctly +// TODO: remove it once that bug is fixed! +#define XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL (0x2000000000 + 0x80001000) + +/********************* APM 1: Monitor PCIe DMA Masters ************************/ + +#define XPAR_AXI_PERF_MON_1_NUMBER_SLOTS 2 + +#define XPAR_AXI_PERF_MON_1_SLOT0_NAME "DMA AXI4 Master" +#define XPAR_AXI_PERF_MON_1_SLOT1_NAME "DMA AXI4-Lite Master" +#define XPAR_AXI_PERF_MON_1_SLOT2_NAME "Null" +#define XPAR_AXI_PERF_MON_1_SLOT3_NAME "Null" +#define XPAR_AXI_PERF_MON_1_SLOT4_NAME "Null" +#define XPAR_AXI_PERF_MON_1_SLOT5_NAME "Null" +#define XPAR_AXI_PERF_MON_1_SLOT6_NAME "Null" +#define XPAR_AXI_PERF_MON_1_SLOT7_NAME "Null" + +#define XPAR_AXI_PERF_MON_1_SLOT0_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_1_SLOT1_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_1_SLOT2_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_1_SLOT3_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_1_SLOT4_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_1_SLOT5_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_1_SLOT6_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_1_SLOT7_DATA_WIDTH 512 + +/* Profile */ +#define XPAR_AXI_PERF_MON_1_IS_EVENT_COUNT 1 +#define XPAR_AXI_PERF_MON_1_HAVE_SAMPLED_COUNTERS 1 +#define XPAR_AXI_PERF_MON_1_NUMBER_COUNTERS (XPAR_AXI_PERF_MON_1_NUMBER_SLOTS * XAPM_METRIC_COUNTERS_PER_SLOT) +#define XPAR_AXI_PERF_MON_1_SCALE_FACTOR 1 + +/* Trace */ +#define XPAR_AXI_PERF_MON_1_IS_EVENT_LOG 0 +#define XPAR_AXI_PERF_MON_1_SHOW_AXI_IDS 0 +#define XPAR_AXI_PERF_MON_1_SHOW_AXI_LEN 0 + +/* AXI Stream FIFOs */ +#define XPAR_AXI_PERF_MON_1_TRACE_NUMBER_FIFO 0 +#define XPAR_AXI_PERF_MON_1_TRACE_WORD_WIDTH 0 +#define XPAR_AXI_PERF_MON_1_TRACE_NUMBER_SAMPLES 0 + +/************************ APM 2: Monitor OCL Region ***************************/ + +#define XPAR_AXI_PERF_MON_2_NUMBER_SLOTS 1 + +#define XPAR_AXI_PERF_MON_2_SLOT0_NAME "Kernel0" +#define XPAR_AXI_PERF_MON_2_SLOT1_NAME "Kernel1" +#define XPAR_AXI_PERF_MON_2_SLOT2_NAME "Kernel2" +#define XPAR_AXI_PERF_MON_2_SLOT3_NAME "Kernel3" +#define XPAR_AXI_PERF_MON_2_SLOT4_NAME "Kernel4" +#define XPAR_AXI_PERF_MON_2_SLOT5_NAME "Kernel5" +#define XPAR_AXI_PERF_MON_2_SLOT6_NAME "Kernel6" +#define XPAR_AXI_PERF_MON_2_SLOT7_NAME "Kernel7" + +#define XPAR_AXI_PERF_MON_2_SLOT0_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_2_SLOT1_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_2_SLOT2_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_2_SLOT3_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_2_SLOT4_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_2_SLOT5_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_2_SLOT6_DATA_WIDTH 512 +#define XPAR_AXI_PERF_MON_2_SLOT7_DATA_WIDTH 512 + +/* Profile */ +#define XPAR_AXI_PERF_MON_2_IS_EVENT_COUNT 0 +#define XPAR_AXI_PERF_MON_2_HAVE_SAMPLED_COUNTERS 0 +#define XPAR_AXI_PERF_MON_2_NUMBER_COUNTERS 0 +#define XPAR_AXI_PERF_MON_2_SCALE_FACTOR 1 + +/* Trace */ +#define XPAR_AXI_PERF_MON_2_IS_EVENT_LOG 1 +#define XPAR_AXI_PERF_MON_2_SHOW_AXI_IDS 0 +#define XPAR_AXI_PERF_MON_2_SHOW_AXI_LEN 0 + +/* AXI Stream FIFOs */ +/* NOTE: number of FIFOs is dependent upon the number of compute units being monitored */ +//#define XPAR_AXI_PERF_MON_2_TRACE_NUMBER_FIFO 2 +#define XPAR_AXI_PERF_MON_2_TRACE_WORD_WIDTH 64 +#define XPAR_AXI_PERF_MON_2_TRACE_NUMBER_SAMPLES 4096 + +#define XPAR_AXI_PERF_MON_2_TRACE_OFFSET_0 -0x03000 +#define XPAR_AXI_PERF_MON_2_TRACE_OFFSET_1 -0x02000 +#define XPAR_AXI_PERF_MON_2_TRACE_OFFSET_2 -0x01000 + +/************************ APM Profile Counters ********************************/ + +#define XAPM_MAX_NUMBER_SLOTS 8 +#define XAPM_METRIC_COUNTERS_PER_SLOT 8 + +/* Metric counters per slot */ +#define XAPM_METRIC_WRITE_BYTES 0 +#define XAPM_METRIC_WRITE_TRANX 1 +#define XAPM_METRIC_WRITE_LATENCY 2 +#define XAPM_METRIC_READ_BYTES 3 +#define XAPM_METRIC_READ_TRANX 4 +#define XAPM_METRIC_READ_LATENCY 5 +#define XAPM_METRIC_WRITE_MIN_MAX 6 +#define XAPM_METRIC_READ_MIN_MAX 7 + +#define XAPM_METRIC_COUNT0_NAME "Write Byte Count" +#define XAPM_METRIC_COUNT1_NAME "Write Transaction Count" +#define XAPM_METRIC_COUNT2_NAME "Total Write Latency" +#define XAPM_METRIC_COUNT3_NAME "Read Byte Count" +#define XAPM_METRIC_COUNT4_NAME "Read Transaction Count" +#define XAPM_METRIC_COUNT5_NAME "Total Read Latency" +#define XAPM_METRIC_COUNT6_NAME "Min/Max Write Latency" +#define XAPM_METRIC_COUNT7_NAME "Min/Max Read Latency" + +/************************ APM Trace Stream ************************************/ + +/* Bit locations of trace flags */ +#define XAPM_READ_LAST 6 +#define XAPM_READ_FIRST 5 +#define XAPM_READ_ADDR 4 +#define XAPM_RESPONSE 3 +#define XAPM_WRITE_LAST 2 +#define XAPM_WRITE_FIRST 1 +#define XAPM_WRITE_ADDR 0 + +/* Bit locations of external event flags */ +#define XAPM_EXT_START 2 +#define XAPM_EXT_STOP 1 +#define XAPM_EXT_EVENT 0 + +/* Total number of bits per slot */ +#define FLAGS_PER_SLOT 7 +#define EXT_EVENTS_PER_SLOT 3 + +/* Cycles to add to timestamp if overflow occurs */ +#define LOOP_ADD_TIME (1<<16) + +/********************** Definitions: Enums, Structs ***************************/ + +/* Performance monitor type or location */ +enum xclPerfMonType { + XCL_PERF_MON_MEMORY = 0, + XCL_PERF_MON_HOST_INTERFACE = 1, + XCL_PERF_MON_OCL_REGION = 2, + XCL_PERF_MON_TOTAL_PROFILE = 3 +}; + +/* Performance monitor start event */ +enum xclPerfMonStartEvent { + XCL_PERF_MON_START_ADDR = 0, + XCL_PERF_MON_START_FIRST_DATA = 1 +}; + +/* Performance monitor end event */ +enum xclPerfMonEndEvent { + XCL_PERF_MON_END_LAST_DATA = 0, + XCL_PERF_MON_END_RESPONSE = 1 +}; + +enum xclPerfMonCounterType { + XCL_PERF_MON_WRITE_BYTES = 0, + XCL_PERF_MON_WRITE_TRANX = 1, + XCL_PERF_MON_WRITE_LATENCY = 2, + XCL_PERF_MON_READ_BYTES = 3, + XCL_PERF_MON_READ_TRANX = 4, + XCL_PERF_MON_READ_LATENCY = 5 +}; + +/* Performance monitor counter results */ +typedef struct { + //unsigned int NumSlots; + float SampleIntervalUsec; + unsigned int WriteBytes[XAPM_MAX_NUMBER_SLOTS]; + unsigned int WriteTranx[XAPM_MAX_NUMBER_SLOTS]; + unsigned int WriteLatency[XAPM_MAX_NUMBER_SLOTS]; + unsigned short WriteMinLatency[XAPM_MAX_NUMBER_SLOTS]; + unsigned short WriteMaxLatency[XAPM_MAX_NUMBER_SLOTS]; + unsigned int ReadBytes[XAPM_MAX_NUMBER_SLOTS]; + unsigned int ReadTranx[XAPM_MAX_NUMBER_SLOTS]; + unsigned int ReadLatency[XAPM_MAX_NUMBER_SLOTS]; + unsigned short ReadMinLatency[XAPM_MAX_NUMBER_SLOTS]; + unsigned short ReadMaxLatency[XAPM_MAX_NUMBER_SLOTS]; +} xclCounterResults; + +/* Performance monitor trace results */ +typedef struct { + unsigned char LogID; /* 0: event flags, 1: host timestamp */ + unsigned char Overflow; + unsigned char WriteStartEvent; + unsigned char WriteEndEvent; + unsigned char ReadStartEvent; + unsigned short Timestamp; + unsigned int HostTimestamp; + unsigned char RID[XAPM_MAX_NUMBER_SLOTS]; + unsigned char ARID[XAPM_MAX_NUMBER_SLOTS]; + unsigned char BID[XAPM_MAX_NUMBER_SLOTS]; + unsigned char AWID[XAPM_MAX_NUMBER_SLOTS]; + unsigned char EventFlags[XAPM_MAX_NUMBER_SLOTS]; + unsigned char ExtEventFlags[XAPM_MAX_NUMBER_SLOTS]; + unsigned char WriteAddrLen[XAPM_MAX_NUMBER_SLOTS]; + unsigned char ReadAddrLen[XAPM_MAX_NUMBER_SLOTS]; + unsigned short WriteBytes[XAPM_MAX_NUMBER_SLOTS]; + unsigned short ReadBytes[XAPM_MAX_NUMBER_SLOTS]; + unsigned short WriteAddrId[XAPM_MAX_NUMBER_SLOTS]; + unsigned short ReadAddrId[XAPM_MAX_NUMBER_SLOTS]; +} xclTraceResults; + +typedef struct { + unsigned int mLength; + //unsigned int mNumSlots; + xclTraceResults mArray[MAX_TRACE_NUMBER_SAMPLES]; +} xclTraceResultsVector; + +#endif + +// XSIP watermark, do not delete 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/SDAccel/HAL/driver/xcldma/include/perfmon_parameters.h b/sdk/SDAccel/HAL/driver/xcldma/include/perfmon_parameters.h new file mode 100644 index 000000000..85be84d10 --- /dev/null +++ b/sdk/SDAccel/HAL/driver/xcldma/include/perfmon_parameters.h @@ -0,0 +1,274 @@ +/* + * Performance Monitoring Internal Parameters + * Date: January 9, 2015 + * Author: Paul Schumacher + * + * NOTE: partially taken from file xaxipmon_hw.h in v5.0 of APM driver + */ + +#ifndef _PERFMON_PARAMETERS_H +#define _PERFMON_PARAMETERS_H + +/************************ AXI Stream FIFOs ************************************/ + +/* Address offsets in core */ +#define AXI_FIFO_RDFR 0x18 +#define AXI_FIFO_RDFD 0x20 +#define AXI_FIFO_RDFD_AXI_FULL 0x1000 +#define AXI_FIFO_RLR 0x24 +#define AXI_FIFO_SRR 0x28 +#define AXI_FIFO_RESET_VALUE 0xA5 + +/************************ APM Constant Definitions ****************************/ + +/* Register offsets of AXIMONITOR in the Device Config */ + +#define XAPM_GCC_HIGH_OFFSET 0x0000 /**< Global Clock Counter 32 to 63 bits */ +#define XAPM_GCC_LOW_OFFSET 0x0004 /**< Global Clock Counter Lower 0-31 bits */ +#define XAPM_SI_HIGH_OFFSET 0x0020 /**< Sample Interval MSB */ +#define XAPM_SI_LOW_OFFSET 0x0024 /**< Sample Interval LSB */ +#define XAPM_SICR_OFFSET 0x0028 /**< Sample Interval Control Register */ +#define XAPM_SR_OFFSET 0x002C /**< Sample Register */ +#define XAPM_GIE_OFFSET 0x0030 /**< Global Interrupt Enable Register */ +#define XAPM_IE_OFFSET 0x0034 /**< Interrupt Enable Register */ +#define XAPM_IS_OFFSET 0x0038 /**< Interrupt Status Register */ + +#define XAPM_MSR0_OFFSET 0x0044 /**< Metric Selector 0 Register */ +#define XAPM_MSR1_OFFSET 0x0048 /**< Metric Selector 1 Register */ +#define XAPM_MSR2_OFFSET 0x004C /**< Metric Selector 2 Register */ + +#define XAPM_MC0_OFFSET 0x0100 /**< Metric Counter 0 Register */ +#define XAPM_INC0_OFFSET 0x0104 /**< Incrementer 0 Register */ +#define XAPM_RANGE0_OFFSET 0x0108 /**< Range 0 Register */ +#define XAPM_MC0LOGEN_OFFSET 0x010C /**< Metric Counter 0 Log Enable Register */ +#define XAPM_MC1_OFFSET 0x0110 /**< Metric Counter 1 Register */ +#define XAPM_INC1_OFFSET 0x0114 /**< Incrementer 1 Register */ +#define XAPM_RANGE1_OFFSET 0x0118 /**< Range 1 Register */ +#define XAPM_MC1LOGEN_OFFSET 0x011C /**< Metric Counter 1 Log Enable Register */ +#define XAPM_MC2_OFFSET 0x0120 /**< Metric Counter 2 Register */ +#define XAPM_INC2_OFFSET 0x0124 /**< Incrementer 2 Register */ +#define XAPM_RANGE2_OFFSET 0x0128 /**< Range 2 Register */ +#define XAPM_MC2LOGEN_OFFSET 0x012C /**< Metric Counter 2 Log Enable Register */ +#define XAPM_MC3_OFFSET 0x0130 /**< Metric Counter 3 Register */ +#define XAPM_INC3_OFFSET 0x0134 /**< Incrementer 3 Register */ +#define XAPM_RANGE3_OFFSET 0x0138 /**< Range 3 Register */ +#define XAPM_MC3LOGEN_OFFSET 0x013C /**< Metric Counter 3 Log Enable Register */ +#define XAPM_MC4_OFFSET 0x0140 /**< Metric Counter 4 Register */ +#define XAPM_INC4_OFFSET 0x0144 /**< Incrementer 4 Register */ +#define XAPM_RANGE4_OFFSET 0x0148 /**< Range 4 Register */ +#define XAPM_MC4LOGEN_OFFSET 0x014C /**< Metric Counter 4 Log Enable Register */ +#define XAPM_MC5_OFFSET 0x0150 /**< Metric Counter 5 Register */ +#define XAPM_INC5_OFFSET 0x0154 /**< Incrementer 5 Register */ +#define XAPM_RANGE5_OFFSET 0x0158 /**< Range 5 Register */ +#define XAPM_MC5LOGEN_OFFSET 0x015C /**< Metric Counter 5 Log Enable Register */ +#define XAPM_MC6_OFFSET 0x0160 /**< Metric Counter 6 Register */ +#define XAPM_INC6_OFFSET 0x0164 /**< Incrementer 6 Register */ +#define XAPM_RANGE6_OFFSET 0x0168 /**< Range 6 Register */ +#define XAPM_MC6LOGEN_OFFSET 0x016C /**< Metric Counter 6 Log Enable Register */ +#define XAPM_MC7_OFFSET 0x0170 /**< Metric Counter 7 Register */ +#define XAPM_INC7_OFFSET 0x0174 /**< Incrementer 7 Register */ +#define XAPM_RANGE7_OFFSET 0x0178 /**< Range 7 Register */ +#define XAPM_MC7LOGEN_OFFSET 0x017C /**< Metric Counter 7 Log Enable Register */ +#define XAPM_MC8_OFFSET 0x0180 /**< Metric Counter 8 Register */ +#define XAPM_INC8_OFFSET 0x0184 /**< Incrementer 8 Register */ +#define XAPM_RANGE8_OFFSET 0x0188 /**< Range 8 Register */ +#define XAPM_MC8LOGEN_OFFSET 0x018C /**< Metric Counter 8 Log Enable Register */ +#define XAPM_MC9_OFFSET 0x0190 /**< Metric Counter 9 Register */ +#define XAPM_INC9_OFFSET 0x0194 /**< Incrementer 9 Register */ +#define XAPM_RANGE9_OFFSET 0x0198 /**< Range 9 Register */ +#define XAPM_MC9LOGEN_OFFSET 0x019C /**< Metric Counter 9 Log Enable Register */ + +#define XAPM_SMC0_OFFSET 0x0200 /**< Sampled Metric Counter 0 Register */ +#define XAPM_SINC0_OFFSET 0x0204 /**< Sampled Incrementer 0 Register */ +#define XAPM_SMC1_OFFSET 0x0210 /**< Sampled Metric Counter 1 Register */ +#define XAPM_SINC1_OFFSET 0x0214 /**< Sampled Incrementer 1 Register */ +#define XAPM_SMC2_OFFSET 0x0220 /**< Sampled Metric Counter 2 Register */ +#define XAPM_SINC2_OFFSET 0x0224 /**< Sampled Incrementer 2 Register */ +#define XAPM_SMC3_OFFSET 0x0230 /**< Sampled Metric Counter 3 Register */ +#define XAPM_SINC3_OFFSET 0x0234 /**< Sampled Incrementer 3 Register */ +#define XAPM_SMC4_OFFSET 0x0240 /**< Sampled Metric Counter 4 Register */ +#define XAPM_SINC4_OFFSET 0x0244 /**< Sampled Incrementer 4 Register */ +#define XAPM_SMC5_OFFSET 0x0250 /**< Sampled Metric Counter 5 Register */ +#define XAPM_SINC5_OFFSET 0x0254 /**< Sampled Incrementer 5 Register */ +#define XAPM_SMC6_OFFSET 0x0260 /**< Sampled Metric Counter 6 Register */ +#define XAPM_SINC6_OFFSET 0x0264 /**< Sampled Incrementer 6 Register */ +#define XAPM_SMC7_OFFSET 0x0270 /**< Sampled Metric Counter 7 Register */ +#define XAPM_SINC7_OFFSET 0x0274 /**< Sampled Incrementer 7 Register */ +#define XAPM_SMC8_OFFSET 0x0280 /**< Sampled Metric Counter 8 Register */ +#define XAPM_SINC8_OFFSET 0x0284 /**< Sampled Incrementer 8 Register */ +#define XAPM_SMC9_OFFSET 0x0290 /**< Sampled Metric Counter 9 Register */ +#define XAPM_SINC9_OFFSET 0x0294 /**< Sampled Incrementer 9 Register */ + +#define XAPM_MC10_OFFSET 0x01A0 /**< Metric Counter 10 Register */ +#define XAPM_MC11_OFFSET 0x01B0 /**< Metric Counter 11 Register */ +#define XAPM_MC12_OFFSET 0x0500 /**< Metric Counter 12 Register */ +#define XAPM_MC13_OFFSET 0x0510 /**< Metric Counter 13 Register */ +#define XAPM_MC14_OFFSET 0x0520 /**< Metric Counter 14Register */ +#define XAPM_MC15_OFFSET 0x0530 /**< Metric Counter 15 Register */ +#define XAPM_MC16_OFFSET 0x0540 /**< Metric Counter 16 Register */ +#define XAPM_MC17_OFFSET 0x0550 /**< Metric Counter 17 Register */ +#define XAPM_MC18_OFFSET 0x0560 /**< Metric Counter 18 Register */ +#define XAPM_MC19_OFFSET 0x0570 /**< Metric Counter 19 Register */ +#define XAPM_MC20_OFFSET 0x0580 /**< Metric Counter 20 Register */ +#define XAPM_MC21_OFFSET 0x0590 /**< Metric Counter 21 Register */ +#define XAPM_MC22_OFFSET 0x05A0 /**< Metric Counter 22 Register */ +#define XAPM_MC23_OFFSET 0x05B0 /**< Metric Counter 23 Register */ +#define XAPM_MC24_OFFSET 0x0700 /**< Metric Counter 24 Register */ +#define XAPM_MC25_OFFSET 0x0710 /**< Metric Counter 25 Register */ +#define XAPM_MC26_OFFSET 0x0720 /**< Metric Counter 26 Register */ +#define XAPM_MC27_OFFSET 0x0730 /**< Metric Counter 27 Register */ +#define XAPM_MC28_OFFSET 0x0740 /**< Metric Counter 28 Register */ +#define XAPM_MC29_OFFSET 0x0750 /**< Metric Counter 29 Register */ +#define XAPM_MC30_OFFSET 0x0760 /**< Metric Counter 30 Register */ +#define XAPM_MC31_OFFSET 0x0770 /**< Metric Counter 31 Register */ +#define XAPM_MC32_OFFSET 0x0780 /**< Metric Counter 32 Register */ +#define XAPM_MC33_OFFSET 0x0790 /**< Metric Counter 33 Register */ +#define XAPM_MC34_OFFSET 0x07A0 /**< Metric Counter 34 Register */ +#define XAPM_MC35_OFFSET 0x07B0 /**< Metric Counter 35 Register */ +#define XAPM_MC36_OFFSET 0x0900 /**< Metric Counter 36 Register */ +#define XAPM_MC37_OFFSET 0x0910 /**< Metric Counter 37 Register */ +#define XAPM_MC38_OFFSET 0x0920 /**< Metric Counter 38 Register */ +#define XAPM_MC39_OFFSET 0x0930 /**< Metric Counter 39 Register */ +#define XAPM_MC40_OFFSET 0x0940 /**< Metric Counter 40 Register */ +#define XAPM_MC41_OFFSET 0x0950 /**< Metric Counter 41 Register */ +#define XAPM_MC42_OFFSET 0x0960 /**< Metric Counter 42 Register */ +#define XAPM_MC43_OFFSET 0x0970 /**< Metric Counter 43 Register */ +#define XAPM_MC44_OFFSET 0x0980 /**< Metric Counter 44 Register */ +#define XAPM_MC45_OFFSET 0x0990 /**< Metric Counter 45 Register */ +#define XAPM_MC46_OFFSET 0x09A0 /**< Metric Counter 46 Register */ +#define XAPM_MC47_OFFSET 0x09B0 /**< Metric Counter 47 Register */ + +#define XAPM_SMC10_OFFSET 0x02A0 /**< Sampled Metric Counter 10 Register */ +#define XAPM_SMC11_OFFSET 0x02B0 /**< Sampled Metric Counter 11 Register */ +#define XAPM_SMC12_OFFSET 0x0600 /**< Sampled Metric Counter 12 Register */ +#define XAPM_SMC13_OFFSET 0x0610 /**< Sampled Metric Counter 13 Register */ +#define XAPM_SMC14_OFFSET 0x0620 /**< Sampled Metric Counter 14 Register */ +#define XAPM_SMC15_OFFSET 0x0630 /**< Sampled Metric Counter 15 Register */ +#define XAPM_SMC16_OFFSET 0x0640 /**< Sampled Metric Counter 16 Register */ +#define XAPM_SMC17_OFFSET 0x0650 /**< Sampled Metric Counter 17 Register */ +#define XAPM_SMC18_OFFSET 0x0660 /**< Sampled Metric Counter 18 Register */ +#define XAPM_SMC19_OFFSET 0x0670 /**< Sampled Metric Counter 19 Register */ +#define XAPM_SMC20_OFFSET 0x0680 /**< Sampled Metric Counter 20 Register */ +#define XAPM_SMC21_OFFSET 0x0690 /**< Sampled Metric Counter 21 Register */ +#define XAPM_SMC22_OFFSET 0x06A0 /**< Sampled Metric Counter 22 Register */ +#define XAPM_SMC23_OFFSET 0x06B0 /**< Sampled Metric Counter 23 Register */ +#define XAPM_SMC24_OFFSET 0x0800 /**< Sampled Metric Counter 24 Register */ +#define XAPM_SMC25_OFFSET 0x0810 /**< Sampled Metric Counter 25 Register */ +#define XAPM_SMC26_OFFSET 0x0820 /**< Sampled Metric Counter 26 Register */ +#define XAPM_SMC27_OFFSET 0x0830 /**< Sampled Metric Counter 27 Register */ +#define XAPM_SMC28_OFFSET 0x0840 /**< Sampled Metric Counter 28 Register */ +#define XAPM_SMC29_OFFSET 0x0850 /**< Sampled Metric Counter 29 Register */ +#define XAPM_SMC30_OFFSET 0x0860 /**< Sampled Metric Counter 30 Register */ +#define XAPM_SMC31_OFFSET 0x0870 /**< Sampled Metric Counter 31 Register */ +#define XAPM_SMC32_OFFSET 0x0880 /**< Sampled Metric Counter 32 Register */ +#define XAPM_SMC33_OFFSET 0x0890 /**< Sampled Metric Counter 33 Register */ +#define XAPM_SMC34_OFFSET 0x08A0 /**< Sampled Metric Counter 34 Register */ +#define XAPM_SMC35_OFFSET 0x08B0 /**< Sampled Metric Counter 35 Register */ +#define XAPM_SMC36_OFFSET 0x0A00 /**< Sampled Metric Counter 36 Register */ +#define XAPM_SMC37_OFFSET 0x0A10 /**< Sampled Metric Counter 37 Register */ +#define XAPM_SMC38_OFFSET 0x0A20 /**< Sampled Metric Counter 38 Register */ +#define XAPM_SMC39_OFFSET 0x0A30 /**< Sampled Metric Counter 39 Register */ +#define XAPM_SMC40_OFFSET 0x0A40 /**< Sampled Metric Counter 40 Register */ +#define XAPM_SMC41_OFFSET 0x0A50 /**< Sampled Metric Counter 41 Register */ +#define XAPM_SMC42_OFFSET 0x0A60 /**< Sampled Metric Counter 42 Register */ +#define XAPM_SMC43_OFFSET 0x0A70 /**< Sampled Metric Counter 43 Register */ +#define XAPM_SMC44_OFFSET 0x0A80 /**< Sampled Metric Counter 44 Register */ +#define XAPM_SMC45_OFFSET 0x0A90 /**< Sampled Metric Counter 45 Register */ +#define XAPM_SMC46_OFFSET 0x0AA0 /**< Sampled Metric Counter 46 Register */ +#define XAPM_SMC47_OFFSET 0x0AB0 /**< Sampled Metric Counter 47 Register */ +/* Sampled metric counters 48-63: In Profile mode, this are min/max latency registers */ +#define XAPM_SMC48_OFFSET 0x0254 /**< Sampled Metric Counter 48 Register */ +#define XAPM_SMC49_OFFSET 0x0258 /**< Sampled Metric Counter 49 Register */ +#define XAPM_SMC50_OFFSET 0x02B4 /**< Sampled Metric Counter 50 Register */ +#define XAPM_SMC51_OFFSET 0x02B8 /**< Sampled Metric Counter 51 Register */ +#define XAPM_SMC52_OFFSET 0x0654 /**< Sampled Metric Counter 52 Register */ +#define XAPM_SMC53_OFFSET 0x0658 /**< Sampled Metric Counter 53 Register */ +#define XAPM_SMC54_OFFSET 0x06B4 /**< Sampled Metric Counter 54 Register */ +#define XAPM_SMC55_OFFSET 0x06B8 /**< Sampled Metric Counter 55 Register */ +#define XAPM_SMC56_OFFSET 0x0854 /**< Sampled Metric Counter 56 Register */ +#define XAPM_SMC57_OFFSET 0x0858 /**< Sampled Metric Counter 57 Register */ +#define XAPM_SMC58_OFFSET 0x08B4 /**< Sampled Metric Counter 58 Register */ +#define XAPM_SMC59_OFFSET 0x08B8 /**< Sampled Metric Counter 59 Register */ +#define XAPM_SMC60_OFFSET 0x0A54 /**< Sampled Metric Counter 60 Register */ +#define XAPM_SMC61_OFFSET 0x0A58 /**< Sampled Metric Counter 61 Register */ +#define XAPM_SMC62_OFFSET 0x0AB4 /**< Sampled Metric Counter 62 Register */ +#define XAPM_SMC63_OFFSET 0x0AB8 /**< Sampled Metric Counter 63 Register */ + +#define XAPM_CTL_OFFSET 0x0300 /**< Control Register */ +#define XAPM_ID_OFFSET 0x0304 /**< Latency ID Register */ +#define XAPM_IDMASK_OFFSET 0x0308 /**< ID Mask Register */ +#define XAPM_FEC_OFFSET 0x0400 /**< Flag Enable Control Register */ +#define XAPM_SWD_OFFSET 0x0404 /**< Software-written Data Register */ +#define XAPM_ENT_OFFSET 0x0408 /**< Enable Trace Register */ + +/* AXI Monitor Sample Interval Control Register mask(s) */ + +#define XAPM_SICR_MCNTR_RST_MASK 0x00000100 /**< Enable the Metric Counter Reset */ +#define XAPM_SICR_LOAD_MASK 0x00000002 /**< Load the Sample Interval Register Value into the counter */ +#define XAPM_SICR_ENABLE_MASK 0x00000001 /**< Enable the downcounter */ + +/* Interrupt Status/Enable Register Bit Definitions and Masks */ + +#define XAPM_IXR_MC9_OVERFLOW_MASK 0x00001000 /**< Metric Counter 9 Overflow> */ +#define XAPM_IXR_MC8_OVERFLOW_MASK 0x00000800 /**< Metric Counter 8 Overflow> */ +#define XAPM_IXR_MC7_OVERFLOW_MASK 0x00000400 /**< Metric Counter 7 Overflow> */ +#define XAPM_IXR_MC6_OVERFLOW_MASK 0x00000200 /**< Metric Counter 6 Overflow> */ +#define XAPM_IXR_MC5_OVERFLOW_MASK 0x00000100 /**< Metric Counter 5 Overflow> */ +#define XAPM_IXR_MC4_OVERFLOW_MASK 0x00000080 /**< Metric Counter 4 Overflow> */ +#define XAPM_IXR_MC3_OVERFLOW_MASK 0x00000040 /**< Metric Counter 3 Overflow> */ +#define XAPM_IXR_MC2_OVERFLOW_MASK 0x00000020 /**< Metric Counter 2 Overflow> */ +#define XAPM_IXR_MC1_OVERFLOW_MASK 0x00000010 /**< Metric Counter 1 Overflow> */ +#define XAPM_IXR_MC0_OVERFLOW_MASK 0x00000008 /**< Metric Counter 0 Overflow> */ +#define XAPM_IXR_FIFO_FULL_MASK 0x00000004 /**< Event Log FIFO full> */ +#define XAPM_IXR_SIC_OVERFLOW_MASK 0x00000002 /**< Sample Interval Counter Overflow> */ +#define XAPM_IXR_GCC_OVERFLOW_MASK 0x00000001 /**< Global Clock Counter Overflow> */ +#define XAPM_IXR_ALL_MASK (XAPM_IXR_SIC_OVERFLOW_MASK | \ + XAPM_IXR_GCC_OVERFLOW_MASK | \ + XAPM_IXR_FIFO_FULL_MASK | \ + XAPM_IXR_MC0_OVERFLOW_MASK | \ + XAPM_IXR_MC1_OVERFLOW_MASK | \ + XAPM_IXR_MC2_OVERFLOW_MASK | \ + XAPM_IXR_MC3_OVERFLOW_MASK | \ + XAPM_IXR_MC4_OVERFLOW_MASK | \ + XAPM_IXR_MC5_OVERFLOW_MASK | \ + XAPM_IXR_MC6_OVERFLOW_MASK | \ + XAPM_IXR_MC7_OVERFLOW_MASK | \ + XAPM_IXR_MC8_OVERFLOW_MASK | \ + XAPM_IXR_MC9_OVERFLOW_MASK) + +/* AXI Monitor Control Register mask(s) */ + +#define XAPM_CR_FIFO_RESET_MASK 0x02000000 /**< FIFO Reset */ +#define XAPM_CR_GCC_RESET_MASK 0x00020000 /**< Global Clk Counter Reset */ +#define XAPM_CR_GCC_ENABLE_MASK 0x00010000 /**< Global Clk Counter Enable */ +#define XAPM_CR_EVTLOG_EXTTRIGGER_MASK 0x00000200 /**< Enable External trigger to start event Log */ +#define XAPM_CR_EVENTLOG_ENABLE_MASK 0x00000100 /**< Event Log Enable */ +#define XAPM_CR_RDLATENCY_END_MASK 0x00000080 /**< Write Latency End point */ +#define XAPM_CR_RDLATENCY_START_MASK 0x00000040 /**< Read Latency Start point */ +#define XAPM_CR_WRLATENCY_END_MASK 0x00000020 /**< Write Latency End point */ +#define XAPM_CR_WRLATENCY_START_MASK 0x00000010 /**< Write Latency Start point */ +#define XAPM_CR_IDFILTER_ENABLE_MASK 0x00000008 /**< ID Filter Enable */ +#define XAPM_CR_MCNTR_EXTTRIGGER_MASK 0x00000004 /**< Enable External trigger to start Metric Counters */ +#define XAPM_CR_MCNTR_RESET_MASK 0x00000002 /**< Metrics Counter Reset */ +#define XAPM_CR_MCNTR_ENABLE_MASK 0x00000001 /**< Metrics Counter Enable */ + +/* AXI Monitor ID Register mask(s) */ + +#define XAPM_ID_RID_MASK 0xFFFF0000 /**< Read ID */ +#define XAPM_ID_WID_MASK 0x0000FFFF /**< Write ID */ + +/* AXI Monitor ID Mask Register mask(s) */ + +#define XAPM_MASKID_RID_MASK 0xFFFF0000 /**< Read ID Mask */ +#define XAPM_MASKID_WID_MASK 0x0000FFFF /**< Write ID Mask*/ + +/* AXI Monitor Min/Max Register masks and shifts */ + +#define XAPM_MAX_LATENCY_MASK 0xFFFF0000 /**< Max Latency Mask */ +#define XAPM_MIN_LATENCY_MASK 0x0000FFFF /**< Min Latency Mask */ +#define XAPM_MAX_LATENCY_SHIFT 16 /**< Max Latency Shift */ +#define XAPM_MIN_LATENCY_SHIFT 0 /**< Min Latency Shift */ + +#endif + +// XSIP watermark, do not delete 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/SDAccel/HAL/driver/xcldma/include/xbar_sys_parameters.h b/sdk/SDAccel/HAL/driver/xcldma/include/xbar_sys_parameters.h new file mode 100644 index 000000000..3e5ae36a3 --- /dev/null +++ b/sdk/SDAccel/HAL/driver/xcldma/include/xbar_sys_parameters.h @@ -0,0 +1,146 @@ +// Copyright Xilinx, Inc 2014-2016 +// Author: Sonal Santan +// Register definition for the XDMA + +#ifndef __XDMA_SYS_PARAMETERS_H__ +#define __XDMA_SYS_PARAMETERS_H__ + +#include "perfmon_parameters.h" + +#define XILINX_VENDOR_ID 0x10EE + +//parameters for HWICAP, Flash and APM on PCIe BAR +#define OCL_CTLR_OFFSET 0x000000 +#define HWICAP_OFFSET 0x020000 +#define AXI_GATE_OFFSET 0x030000 +#define AXI_GATE_OFFSET_READ 0x030008 + +#define FEATURE_ID 0x031000 + +#define GENERAL_STATUS 0x032000 + +#define BPI_FLASH_OFFSET 0x040000 + +#define AXI_I2C_OFFSET 0x041000 +#define PERFMON0_OFFSET 0x100000 +#define PERFMON1_OFFSET 0x120000 +#define PERFMON2_OFFSET 0x010000 + +#define OCL_CLKWIZ_OFFSET 0x050000 +#define OCL_CLKWIZ_BASEADDR 0x050000 +#define OCL_CLKWIZ_BASEADDR2 0x051000 + +#define OCL_CLKWIZ_STATUS_OFFSET 0x4 +#define OCL_CLKWIZ_CONFIG_OFFSET(n) (0x200 + 4 * (n)) + +// These are kept only for backwards compatipility. These macros should +// not be used anymore. +#define OCL_CLKWIZ_STATUS (OCL_CLKWIZ_BASEADDR + OCL_CLKWIZ_STATUS_OFFSET) +#define OCL_CLKWIZ_CONFIG(n) (OCL_CLKWIZ_BASEADDR + OCL_CLKWIZ_CONFIG_OFFSET(n)) + +#define HWICAP_BAR 0 +#define BPI_FLASH_BAR 0 +#define ACCELERATOR_BAR 0 +#define PERFMON_BAR 0 +#define HWICAP_WRITE_FIFO_SIZE 64 +#define MMAP_SIZE_USER 0x400000 +#define MMAP_SIZE_CTRL 0x8000 +#define DDR_BUFFER_ALIGNMENT 0x40 +#define DMA_HWICAP_BITFILE_BUFFER_SIZE 1024 +#define OCL_CU_CTRL_RANGE 0x1000 + +#define ULTRASCALE_MCAP_CONFIG_BASE 0x340 + +/************************** Constant Definitions ****************************/ + +/* Input frequency */ +#define XDMA_7V3_INPUT_FREQ 100 +#define XDMA_KU3_INPUT_FREQ 100 + +#define XDMA_7V3_CLKWIZ_CONFIG0 0x04000a01 +#define XDMA_KU3_CLKWIZ_CONFIG0 0x04000a01 + +/* Used for parsing bitstream header */ +#define XHI_EVEN_MAGIC_BYTE 0x0f +#define XHI_ODD_MAGIC_BYTE 0xf0 + +/* Extra mode for IDLE */ +#define XHI_OP_IDLE -1 + +#define XHI_BIT_HEADER_FAILURE -1 + +/* The imaginary module length register */ +#define XHI_MLR 15 + +/** + * AXI IIC Bus Interface v2.0 + * http://www.xilinx.com/support/documentation/ip_documentation/axi_iic/v2_0/pg090-axi-iic.pdf + */ +#define AXI_I2C_SOFT_RESET AXI_I2C_OFFSET+0x040 +#define AXI_I2C_CR AXI_I2C_OFFSET+0x100 +#define AXI_I2C_TX_FIFO AXI_I2C_OFFSET+0x108 +#define AXI_I2C_RX_FIFO AXI_I2C_OFFSET+0x10c +#define AXI_I2C_RX_FIFO_PIRQ AXI_I2C_OFFSET+0x120 + + +/** ICAP register definition **/ +#define XHWICAP_GIER HWICAP_OFFSET+0x1c +#define XHWICAP_ISR HWICAP_OFFSET+0x20 +#define XHWICAP_IER HWICAP_OFFSET+0x28 +#define XHWICAP_WF HWICAP_OFFSET+0x100 +#define XHWICAP_RF HWICAP_OFFSET+0x104 +#define XHWICAP_SZ HWICAP_OFFSET+0x108 +#define XHWICAP_CR HWICAP_OFFSET+0x10c +#define XHWICAP_SR HWICAP_OFFSET+0x110 +#define XHWICAP_WFV HWICAP_OFFSET+0x114 +#define XHWICAP_RFO HWICAP_OFFSET+0x118 +#define XHWICAP_ASR HWICAP_OFFSET+0x11c + +/** +* Bitstream header information. +*/ +typedef struct { + unsigned int HeaderLength; /* Length of header in 32 bit words */ + unsigned int BitstreamLength; /* Length of bitstream to read in bytes*/ + unsigned char *DesignName; /* Design name read from bitstream header */ + unsigned char *PartName; /* Part name read from bitstream header */ + unsigned char *Date; /* Date read from bitstream header */ + unsigned char *Time; /* Bitstream creation time read from header */ + unsigned int MagicLength; /* Length of the magic numbers in header */ +} XHwIcap_Bit_Header; + +/* + * Flash programming constants + * XAPP 518 + * http://www.xilinx.com/support/documentation/application_notes/xapp518-isp-bpi-prom-virtex-6-pcie.pdf + * Table 1 + */ + +#define START_ADDR_CMD 0x53410000 +#define END_ADDR_CMD 0x45000000 +#define UNLOCK_CMD 0x556E6C6B +#define ERASE_CMD 0x45726173 +#define PROGRAM_CMD 0x50726F67 + +#define READY_STAT 0x00008000 +#define ERASE_STAT 0x00000000 +#define PROGRAM_STAT 0x00000080 + +/* + * Booting FPGA from PROM + * http://www.xilinx.com/support/documentation/user_guides/ug470_7Series_Config.pdf + * Table 7.1 + */ + +#define DUMMY_WORD 0xFFFFFFFF +#define SYNC_WORD 0xAA995566 +#define TYPE1_NOOP 0x20000000 +#define TYPE1_WRITE_WBSTAR 0x30020001 +#define WBSTAR_ADD10 0x00000000 +#define WBSTAR_ADD11 0x01000000 +#define TYPE1_WRITE_CMD 0x30008001 +#define IPROG_CMD 0x0000000F + +#endif + +// XSIP watermark, do not delete 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/SDAccel/HAL/driver/xcldma/include/xdma-ioctl.h b/sdk/SDAccel/HAL/driver/xcldma/include/xdma-ioctl.h new file mode 100644 index 000000000..a4e5b16e5 --- /dev/null +++ b/sdk/SDAccel/HAL/driver/xcldma/include/xdma-ioctl.h @@ -0,0 +1,148 @@ +#ifndef _XDMA_IOCALLS_POSIX_H_ +#define _XDMA_IOCALLS_POSIX_H_ + +#ifndef _WINDOWS +// TODO: Windows build support +#include +#endif + +/* Use 'x' as magic number */ +#define XDMA_IOC_MAGIC 'x' +/* XL OpenCL X->58(ASCII), L->6C(ASCII), O->0 C->C L->6C(ASCII); */ +#define XDMA_XCL_MAGIC 0X586C0C6C + +#define OCL_NUM_CLOCKS 2 + +/* + * S means "Set" through a ptr, + * T means "Tell" directly with the argument value + * G means "Get": reply by setting through a pointer + * Q means "Query": response is on the return value + * X means "eXchange": switch G and S atomically + * H means "sHift": switch T and Q atomically + * + * _IO(type,nr) no arguments + * _IOR(type,nr,datatype) read data from driver + * _IOW(type,nr.datatype) write data to driver + * _IORW(type,nr,datatype) read/write data + * + * _IOC_DIR(nr) returns direction + * _IOC_TYPE(nr) returns magic + * _IOC_NR(nr) returns number + * _IOC_SIZE(nr) returns size + */ + +enum XDMA_IOC_TYPES { + XDMA_IOC_NOP, + XDMA_IOC_INFO, + XDMA_IOC_ICAP_DOWNLOAD, + XDMA_IOC_MCAP_DOWNLOAD, + XDMA_IOC_HOT_RESET, + XDMA_IOC_OCL_RESET, + XDMA_IOC_OCL_FREQ_SCALING, + XDMA_IOC_REBOOT, + XDMA_IOC_INFO2, + XDMA_IOC_OCL_FREQ_SCALING2, + XDMA_IOC_MAX +}; + +/** + * TODO: Change the structs to use linux kernel preferred types like (u)int64_t + * instead of (unsigned) short, etc. + */ + +struct xdma_ioc_base { + unsigned int magic; + unsigned int command; +}; + +struct xdma_ioc_info { + struct xdma_ioc_base base; + unsigned short vendor; + unsigned short device; + unsigned short subsystem_vendor; + unsigned short subsystem_device; + unsigned dma_engine_version; + unsigned driver_version; + unsigned long long feature_id; + unsigned ocl_frequency; + unsigned pcie_link_width; + unsigned pcie_link_speed; +}; + +struct xdma_ioc_info2 { + struct xdma_ioc_base base; + unsigned short vendor; + unsigned short device; + unsigned short subsystem_vendor; + unsigned short subsystem_device; + unsigned dma_engine_version; + unsigned driver_version; + unsigned long long feature_id; + unsigned short ocl_frequency[OCL_NUM_CLOCKS]; + unsigned short pcie_link_width; + unsigned short pcie_link_speed; + unsigned short num_clocks; + int16_t onchip_temp; + int16_t fan_temp; + unsigned short fan_speed; + unsigned short vcc_int; + unsigned short vcc_aux; + unsigned short vcc_bram; + bool mig_calibration; + char reserved[64]; +}; + +struct xdma_ioc_bitstream { + struct xdma_ioc_base base; + struct xclBin *xclbin; +}; + +struct xdma_performance_ioctl +{ + /* IOCTL_XDMA_IOCTL_Vx */ + uint32_t version; + uint32_t transfer_size; + /* measurement */ + uint32_t stopped; + uint32_t iterations; + uint64_t clock_cycle_count; + uint64_t data_cycle_count; + uint64_t pending_count; +}; + +struct xdma_ioc_freqscaling { + struct xdma_ioc_base base; + unsigned ocl_target_freq; +}; + +struct xdma_ioc_freqscaling2 { + struct xdma_ioc_base base; + unsigned ocl_region; + unsigned short ocl_target_freq[OCL_NUM_CLOCKS]; +}; + +#define XDMA_IOCINFO _IOWR(XDMA_IOC_MAGIC,XDMA_IOC_INFO, struct xdma_ioc_info) +#define XDMA_IOCINFO2 _IOWR(XDMA_IOC_MAGIC,XDMA_IOC_INFO2, struct xdma_ioc_info2) +#define XDMA_IOCICAPDOWNLOAD _IOW(XDMA_IOC_MAGIC,XDMA_IOC_ICAP_DOWNLOAD, struct xdma_ioc_bitstream) +#define XDMA_IOCMCAPDOWNLOAD _IOW(XDMA_IOC_MAGIC,XDMA_IOC_MCAP_DOWNLOAD, struct xdma_ioc_bitstream) +#define XDMA_IOCHOTRESET _IOW(XDMA_IOC_MAGIC,XDMA_IOC_HOT_RESET, struct xdma_ioc_base) +#define XDMA_IOCOCLRESET _IOW(XDMA_IOC_MAGIC,XDMA_IOC_OCL_RESET, struct xdma_ioc_base) +#define XDMA_IOCFREQSCALING _IOWR(XDMA_IOC_MAGIC,XDMA_IOC_OCL_FREQ_SCALING, struct xdma_ioc_freqscaling) +#define XDMA_IOCFREQSCALING2 _IOWR(XDMA_IOC_MAGIC,XDMA_IOC_OCL_FREQ_SCALING2, struct xdma_ioc_freqscaling2) +#define XDMA_IOCREBOOT _IOW(XDMA_IOC_MAGIC,XDMA_IOC_REBOOT, struct xdma_ioc_base) +// Legacy IOCTL NAME +#define XDMA_IOCRESET (XDMA_IOCHOTRESET) +#define IOCTL_XDMA_PERF_V1 (1) + +/* IOCTL codes */ +#define IOCTL_XDMA_PERF_START _IOW('q', 1, struct xdma_performance_ioctl *) +#define IOCTL_XDMA_PERF_STOP _IOW('q', 2, struct xdma_performance_ioctl *) +#define IOCTL_XDMA_PERF_GET _IOR('q', 3, struct xdma_performance_ioctl *) + +#define IOCTL_XDMA_ADDRMODE_SET _IOW('q', 4, int) +#define IOCTL_XDMA_ADDRMODE_GET _IOR('q', 5, int) + +#define XDMA_ADDRMODE_MEMORY (0) +#define XDMA_ADDRMODE_FIXED (1) +#endif diff --git a/sdk/SDAccel/HAL/driver/xcldma/user/datamover.h b/sdk/SDAccel/HAL/driver/xcldma/user/datamover.h new file mode 100644 index 000000000..706fc8697 --- /dev/null +++ b/sdk/SDAccel/HAL/driver/xcldma/user/datamover.h @@ -0,0 +1,182 @@ +#ifndef _XDMA_DATA_MOVER_H_ +#define _XDMA_DATA_MOVER_H_ + +/** + * Copyright (C) 2016 Xilinx, Inc + * Author: Sonal Santan + * XDMA HAL multi-threading safe, multi-channel DMA read/write support + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Work around GCC 4.8 + XDMA BAR implementation bugs +// With -O3 PCIe BAR read/write are not reliable hence force -O2 as max +// optimization level for pcieBarRead() and pcieBarWrite() +#if defined(__GNUC__) && defined(NDEBUG) +#define SHIM_O2 __attribute__ ((optimize("-O2"))) +#else +#define SHIM_O2 +#endif + +namespace xclxdma { + class DMAChannelManager + { + public: + DMAChannelManager(unsigned deviceIndex, unsigned count, std::ios_base::openmode mode) : mCount(count) { + std::string baseName("/dev/xcldma/xcldma"); + baseName += std::to_string(deviceIndex); + assert((mode == std::ios_base::in) || (mode == std::ios_base::out)); + const char *suffix = (mode == std::ios_base::out) ? "_h2c_" : "_c2h_"; + baseName += suffix; + for (mIndex = 0; mIndex < static_cast(mCount); ++mIndex) { + std::string fileName(baseName); + fileName += std::to_string(mIndex); + mChannel.push_back(open(fileName.c_str(), (mode == std::ios_base::out) ? O_WRONLY : O_RDONLY)); + } + --mIndex; + } + + ~DMAChannelManager() { + unlock(); + for (unsigned i = 0; i < mCount; i++) { + close(mChannel[i]); + } + } + + bool isGood() const { + for (unsigned i = 0; i < mCount; i++) { + if (mChannel[i] < 0) + return false; + } + return true; + } + + void releaseDMAChannel(int channel) { + std::lock_guard lck(mMtx); + mChannel[++mIndex] = channel; + mCV.notify_one(); + } + + int acquireDMAChannel() { + std::unique_lock lck(mMtx); + while(mIndex < 0) { + mCV.wait(lck); + } + return mChannel[mIndex--]; + } + + bool lock() const { + for (unsigned i = 0; i < mCount; i++) { + if (!flock(mChannel[i], LOCK_EX | LOCK_NB)) + continue; + // Unable to lock channel i, unlock all channels locked so far + for (unsigned j = 0; j < i; j++) { + flock(mChannel[j], LOCK_UN); + } + return false; + } + return true; + } + + void unlock() const { + for (unsigned i = 0; i < mCount; i++) { + flock(mChannel[i], LOCK_UN); + } + } + + unsigned channelCount() const { + return mCount; + } + + private: + std::mutex mMtx; + std::condition_variable mCV; + std::vector mChannel; + const unsigned mCount; + int mIndex; + }; + + class DataMover { + public: + DataMover(unsigned index, unsigned count) : mWrite(index, count, std::ios_base::out), + mRead(index, count, std::ios_base::in) {} + + // TODO: Make pwrite64 and pread64 use RAII for the channel resource + ssize_t pwrite64(const void* buf, size_t count, off64_t offset) { + int fd = mWrite.acquireDMAChannel(); + ssize_t rc = pwrite(fd, buf, count, offset); + mWrite.releaseDMAChannel(fd); + return rc; + } + ssize_t pread64(void* buf, size_t count, off64_t offset) { + int fd = mRead.acquireDMAChannel(); + ssize_t rc = pread(fd, buf, count, offset); + mRead.releaseDMAChannel(fd); + return rc; + } + // Like memset but using pwrite + void pset64(const void* buf, size_t count, off64_t offset, unsigned rep) { + int fd = mWrite.acquireDMAChannel(); + off64_t curr = offset; + while (rep-- > 0) { +#ifndef RDI_COVERITY +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-result" + pwrite(fd, buf, count, curr); +# pragma GCC diagnostic pop + curr += count; +#endif + } + mWrite.releaseDMAChannel(fd); + } + bool isGood() { + return (mWrite.isGood() && mRead.isGood()); + } + + int lock() { + if (mWrite.lock() && mRead.lock()) + return true; + unlock(); + return false; + } + + void unlock() { + mWrite.unlock(); + mRead.unlock(); + } + + unsigned channelCount() const { + return mWrite.channelCount() + mRead.channelCount(); + } + + private: + DMAChannelManager mWrite; + DMAChannelManager mRead; + }; +} + + +#endif diff --git a/sdk/SDAccel/HAL/driver/xcldma/user/memorymanager.cpp b/sdk/SDAccel/HAL/driver/xcldma/user/memorymanager.cpp new file mode 100644 index 000000000..f4cb7fc1d --- /dev/null +++ b/sdk/SDAccel/HAL/driver/xcldma/user/memorymanager.cpp @@ -0,0 +1,220 @@ +/** + * Copyright (C) 2015 Xilinx, Inc + * Author: Sonal Santan + * XDMA HAL Driver layered on top of XDMA kernel driver + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#include "memorymanager.h" +#include +#include + +/* + * Define GCC version macro so we can use newer C++11 features + * if possible + */ +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) + + +xclxdma::MemoryManager::MemoryManager(uint64_t size, uint64_t start, + unsigned alignment) : mSize(size), mStart(start), mAlignment(alignment), + mCoalesceThreshold(4), mFreeSize(0) +{ + assert(start % alignment == 0); + mFreeBufferList.push_back(std::make_pair(mStart, mSize)); + mFreeSize = mSize; +} + +xclxdma::MemoryManager::~MemoryManager() +{ + + +} + +uint64_t +xclxdma::MemoryManager::alloc(size_t size) +{ + if (size == 0) + size = mAlignment; + + uint64_t result = mNull; + const size_t mod_size = size % mAlignment; + const size_t pad = (mod_size > 0) ? (mAlignment - mod_size) : 0; + size += pad; + + std::lock_guard lock(mMemManagerMutex); + for (PairList::iterator i = mFreeBufferList.begin(), e = mFreeBufferList.end(); i != e; ++i) { + if (i->second < size) + continue; + result = i->first; + if (i->second > size) { + // Resize the existing entry in freelist + i->first += size; + i->second -= size; + } + else { + // remove the exact match found + mFreeBufferList.erase(i); + } + mBusyBufferList.push_back(std::make_pair(result, size)); + mFreeSize -= size; + break; + } + return result; +} + +void +xclxdma::MemoryManager::free(uint64_t buf) +{ + std::lock_guard lock(mMemManagerMutex); + PairList::iterator i = find(buf); + if (i == mBusyBufferList.end()) + return; + mFreeSize += i->second; + mFreeBufferList.push_back(std::make_pair(i->first, i->second)); + mBusyBufferList.erase(i); + if (mFreeBufferList.size() > mCoalesceThreshold) { + coalesce(); + } +} + + +void +xclxdma::MemoryManager::coalesce() +{ + // First sort the free buffers and then attempt to coalesce the neighbors + mFreeBufferList.sort(); + + PairList::iterator curr = mFreeBufferList.begin(); + PairList::iterator next = curr; + ++next; + PairList::iterator last = mFreeBufferList.end(); + while (next != last) { + if ((curr->first + curr->second) != next->first) { + // Non contiguous blocks + curr = next; + ++next; + continue; + } + // Coalesce curr and next + curr->second += next->second; + mFreeBufferList.erase(next); + next = curr; + ++next; + } +} + +// Caller should have acquired the mutex lock before calling find(); +xclxdma::MemoryManager::PairList::iterator +xclxdma::MemoryManager::find(uint64_t buf) +{ +#if GCC_VERSION >= 40800 + PairList::iterator i = std::find_if(mBusyBufferList.begin(), mBusyBufferList.end(), [&] (const PairList::value_type& s) + { return s.first == buf; }); +#else + PairList::iterator i = mBusyBufferList.begin(); + PairList::iterator last = mBusyBufferList.end(); + while(i != last) { + if (i->first == buf) + break; + ++i; + } +#endif + return i; +} + +void +xclxdma::MemoryManager::reset() +{ + std::lock_guard lock(mMemManagerMutex); + mFreeBufferList.clear(); + mBusyBufferList.clear(); + mFreeBufferList.push_back(std::make_pair(mStart, mSize)); + mFreeSize = 0; +} + +std::pair +xclxdma::MemoryManager::lookup(uint64_t buf) +{ + std::lock_guard lock(mMemManagerMutex); + PairList::iterator i = find(buf); + if (i != mBusyBufferList.end()) + return *i; + // Compiler bug -- Some versions of GCC C++11 compiler do not + // like mNull directly inside std::make_pair, so capture mNull + // in a temporary + const uint64_t v = mNull; + return std::make_pair(v, v); +} + + +bool +xclxdma::MemoryManager::reserve(uint64_t base, size_t size) +{ + assert(size); + if (size > mSize) + return false; + + if (base < mStart) + return false; + + if (base > (mStart + mSize)) + return false; + + const size_t mod_size = size % mAlignment; + const size_t pad = (mod_size > 0) ? (mAlignment - mod_size) : 0; + size += pad; + + std::lock_guard lock(mMemManagerMutex); + for (PairList::iterator i = mFreeBufferList.begin(), e = mFreeBufferList.end(); i != e; ++i) { + if (i->second < size) + continue; + if (i->first > base) + continue; + if ((base + size) > (i->first + i->second)) + continue; + uint64_t a = i->first; + uint64_t b = i->second; + + i->second = base - i->first; + if ((i->first == base) && (i->second == 0)) { + //Exact match + mFreeBufferList.erase(i); + break; + } + if (i->first == base) { + // Hole at the end; Resize exisiting entry + i->first = base + size; + break; + } + if ((i->first + i->second) == (base + size)) { + // Hole in the beginning; Resize exisiting entry + i->second -= size; + break; + } + // We have holes on both sides + // Resize hole in the beginning + i->second = base - i->first; + + // Now create an entry for the hole at the end + b = b + a - base - size; + a = base + size; + mFreeBufferList.insert(++i, std::make_pair(a, b)); + } + mBusyBufferList.push_back(std::make_pair(base, size)); + mFreeSize -= size; + return true; +} diff --git a/sdk/SDAccel/HAL/driver/xcldma/user/memorymanager.h b/sdk/SDAccel/HAL/driver/xcldma/user/memorymanager.h new file mode 100644 index 000000000..85661cd73 --- /dev/null +++ b/sdk/SDAccel/HAL/driver/xcldma/user/memorymanager.h @@ -0,0 +1,76 @@ +#ifndef _XDMA_MEMORY_MANAGER_H_ +#define _XDMA_MEMORY_MANAGER_H_ + +/** + * Copyright (C) 2015 Xilinx, Inc + * Author: Sonal Santan + * Simple usermode XDMA DDR memory manager used by HAL + * Eventually the common code here will be used by all HAL drivers. + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + + +#include +#include +#include "driver/include/xclhal.h" + +namespace xclxdma { + class MemoryManager { + std::mutex mMemManagerMutex; + std::list > mFreeBufferList; + std::list > mBusyBufferList; + const uint64_t mSize; + const uint64_t mStart; + const uint64_t mAlignment; + const unsigned mCoalesceThreshold; + uint64_t mFreeSize; + + typedef std::list > PairList; + + public: + static const uint64_t mNull = 0xffffffffffffffffull; + + public: + MemoryManager(uint64_t size, uint64_t start, unsigned alignment); + ~MemoryManager(); + uint64_t alloc(size_t size); + void free(uint64_t buf); + void reset(); + std::pairlookup(uint64_t buf); + bool reserve(uint64_t base, size_t size); + + uint64_t size() const { + return mSize; + } + + uint64_t start() const { + return mStart; + } + + uint64_t freeSize() const { + return mFreeSize; + } + + static bool isNullAlloc(const std::pair& buf) { + return ((buf.first == mNull) || (buf.second == mNull)); + } + + private: + /* Note that these should be called after acquiring mMemManagerMutex */ + void coalesce(); + PairList::iterator find(uint64_t buf); + }; +} + +#endif diff --git a/sdk/SDAccel/HAL/driver/xcldma/user/perf.cpp b/sdk/SDAccel/HAL/driver/xcldma/user/perf.cpp new file mode 100644 index 000000000..ffdd10ec6 --- /dev/null +++ b/sdk/SDAccel/HAL/driver/xcldma/user/perf.cpp @@ -0,0 +1,980 @@ +/* + * Copyright (C) 2015 Xilinx, Inc + * Author: Paul Schumacher + * Performance Monitoring using PCIe for XDMA HAL Driver + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#include "shim.h" +#include "datamover.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifndef _WINDOWS +// TODO: Windows build support +// unistd.h is linux only header file +// it is included for read, write, close, lseek64 +#include +#endif + +#ifdef _WINDOWS +#define __func__ __FUNCTION__ +#endif + +#define FAST_OFFLOAD_MAJOR 2 +#define FAST_OFFLOAD_MINOR 2 + +namespace xclxdma { + // **************** + // Helper functions + // **************** + + bool XDMAShim::isDSAVersion(unsigned majorVersion, unsigned minorVersion, bool onlyThisVersion) { + unsigned checkVersion = (majorVersion << 4) + (minorVersion); + if (onlyThisVersion) + return (mDeviceInfo.mDeviceVersion == checkVersion); + return (mDeviceInfo.mDeviceVersion >= checkVersion); + } + + unsigned XDMAShim::getBankCount() { + return mDeviceInfo.mDDRBankCount; + } + + void XDMAShim::xclSetOclRegionProfilingNumberSlots(uint32_t numSlots) { + mOclRegionProfilingNumberSlots = numSlots; + } + + // Get host timestamp to write to APM + // IMPORTANT NOTE: this *must* be compatible with the method of generating + // timestamps as defined in RTProfile::getTraceTime() + uint64_t XDMAShim::getHostTraceTimeNsec() { + struct timespec now; + int err; + if ((err = clock_gettime(CLOCK_MONOTONIC, &now)) < 0) + return 0; + + return (uint64_t) now.tv_sec * 1000000000UL + (uint64_t) now.tv_nsec; + } + + uint64_t XDMAShim::getPerfMonBaseAddress(xclPerfMonType type) { + if (type == XCL_PERF_MON_MEMORY) return PERFMON0_OFFSET; + if (type == XCL_PERF_MON_HOST_INTERFACE) return PERFMON1_OFFSET; + if (type == XCL_PERF_MON_OCL_REGION) return PERFMON2_OFFSET; + return 0; + } + + uint64_t XDMAShim::getPerfMonFifoBaseAddress(xclPerfMonType type, uint32_t fifonum) { + if (type == XCL_PERF_MON_MEMORY) { + // Only one FIFO in >= v2.2 + if (isDSAVersion(FAST_OFFLOAD_MAJOR, FAST_OFFLOAD_MINOR, false)) + return PERFMON0_OFFSET + XPAR_AXI_PERF_MON_0_TRACE_OFFSET_0; + + if (fifonum == 0) return (PERFMON0_OFFSET + XPAR_AXI_PERF_MON_0_TRACE_OFFSET_0); + if (fifonum == 1) return (PERFMON0_OFFSET + XPAR_AXI_PERF_MON_0_TRACE_OFFSET_1); + if (fifonum == 2) return (PERFMON0_OFFSET + XPAR_AXI_PERF_MON_0_TRACE_OFFSET_2); + return 0; + } + if (type == XCL_PERF_MON_OCL_REGION) { + if (fifonum == 0) return (PERFMON2_OFFSET + XPAR_AXI_PERF_MON_2_TRACE_OFFSET_0); + if (fifonum == 1) return (PERFMON2_OFFSET + XPAR_AXI_PERF_MON_2_TRACE_OFFSET_1); + if (fifonum == 2) return (PERFMON2_OFFSET + XPAR_AXI_PERF_MON_2_TRACE_OFFSET_2); + return 0; + } + return 0; + } + + uint64_t XDMAShim::getPerfMonFifoReadBaseAddress(xclPerfMonType type, uint32_t fifonum) { + if (type == XCL_PERF_MON_MEMORY) { + // Use AXI-MM to access trace FIFO + // NOTE: requires compatible change in base platform + if (isDSAVersion(FAST_OFFLOAD_MAJOR, FAST_OFFLOAD_MINOR, false)) + return XPAR_AXI_PERF_MON_0_TRACE_OFFSET_AXI_FULL; + + if (fifonum == 0) return (PERFMON0_OFFSET + XPAR_AXI_PERF_MON_0_TRACE_OFFSET_0); + if (fifonum == 1) return (PERFMON0_OFFSET + XPAR_AXI_PERF_MON_0_TRACE_OFFSET_1); + if (fifonum == 2) return (PERFMON0_OFFSET + XPAR_AXI_PERF_MON_0_TRACE_OFFSET_2); + return 0; + } + if (type == XCL_PERF_MON_OCL_REGION) { + if (fifonum == 0) return (PERFMON2_OFFSET + XPAR_AXI_PERF_MON_2_TRACE_OFFSET_0); + if (fifonum == 1) return (PERFMON2_OFFSET + XPAR_AXI_PERF_MON_2_TRACE_OFFSET_1); + if (fifonum == 2) return (PERFMON2_OFFSET + XPAR_AXI_PERF_MON_2_TRACE_OFFSET_2); + return 0; + } + return 0; + } + + uint32_t XDMAShim::getPerfMonNumberFifos(xclPerfMonType type) { + if (type == XCL_PERF_MON_MEMORY) + return XPAR_AXI_PERF_MON_0_TRACE_NUMBER_FIFO; + if (type == XCL_PERF_MON_HOST_INTERFACE) + return XPAR_AXI_PERF_MON_1_TRACE_NUMBER_FIFO; + if (type == XCL_PERF_MON_OCL_REGION) { + if (mOclRegionProfilingNumberSlots > 4) + return 3; + else + return 2; + } + return 0; + } + + uint32_t XDMAShim::getPerfMonNumberSlots(xclPerfMonType type) { + if (type == XCL_PERF_MON_MEMORY) { + return (getBankCount() + 1); + } + if (type == XCL_PERF_MON_HOST_INTERFACE) { + return XPAR_AXI_PERF_MON_1_NUMBER_SLOTS; + } + if (type == XCL_PERF_MON_OCL_REGION) { + return mOclRegionProfilingNumberSlots; + } + return 1; + } + + uint32_t XDMAShim::getPerfMonNumberSamples(xclPerfMonType type) { + if (type == XCL_PERF_MON_MEMORY) return XPAR_AXI_PERF_MON_0_TRACE_NUMBER_SAMPLES; + if (type == XCL_PERF_MON_HOST_INTERFACE) return XPAR_AXI_PERF_MON_1_TRACE_NUMBER_SAMPLES; + // TODO: get number of samples from metadata + if (type == XCL_PERF_MON_OCL_REGION) return XPAR_AXI_PERF_MON_2_TRACE_NUMBER_SAMPLES; + return 0; + } + + uint32_t XDMAShim::getPerfMonByteScaleFactor(xclPerfMonType type) { + // NOTE: In the NWL DMA base platform, the APM slot data was only 32 bits + // while the MIG interface was 512 bits + //if (type == XCL_PERF_MON_MEMORY && isDSAVersion(1, 0, true)) + // return 16; + return 1; + } + + uint8_t XDMAShim::getPerfMonShowIDS(xclPerfMonType type) { + if (type == XCL_PERF_MON_MEMORY) { + if (isDSAVersion(1, 0, true)) + return 0; + if (getBankCount() > 1) + return XPAR_AXI_PERF_MON_0_SHOW_AXI_IDS_2DDR; + return XPAR_AXI_PERF_MON_0_SHOW_AXI_IDS; + } + if (type == XCL_PERF_MON_HOST_INTERFACE) { + return XPAR_AXI_PERF_MON_1_SHOW_AXI_IDS; + } + // TODO: get show IDs + if (type == XCL_PERF_MON_OCL_REGION) { + return XPAR_AXI_PERF_MON_2_SHOW_AXI_IDS; + } + return 0; + } + + uint8_t XDMAShim::getPerfMonShowLEN(xclPerfMonType type) { + if (type == XCL_PERF_MON_MEMORY) { + if (getBankCount() > 1) + return XPAR_AXI_PERF_MON_0_SHOW_AXI_LEN_2DDR; + return XPAR_AXI_PERF_MON_0_SHOW_AXI_LEN; + } + if (type == XCL_PERF_MON_HOST_INTERFACE) { + return XPAR_AXI_PERF_MON_1_SHOW_AXI_LEN; + } + // TODO: get show IDs + if (type == XCL_PERF_MON_OCL_REGION) { + return XPAR_AXI_PERF_MON_2_SHOW_AXI_LEN; + } + return 0; + } + + uint32_t XDMAShim::getPerfMonSlotStartBit(xclPerfMonType type, uint32_t slotnum) { + // NOTE: ID widths also set to 5 in HEAD/data/sdaccel/board_support/alpha_data/common/xclplat/xclplat_ip.tcl + uint32_t bitsPerID = 5; + uint8_t showIDs = getPerfMonShowIDS(type); + uint8_t showLen = getPerfMonShowLEN(type); + uint32_t bitsPerSlot = 10 + (bitsPerID * 4 * showIDs) + (16 * showLen); + return (18 + (bitsPerSlot * slotnum)); + } + + uint32_t XDMAShim::getPerfMonSlotDataWidth(xclPerfMonType type, uint32_t slotnum) { + // TODO: this only supports slot 0 + if (slotnum == 0) return XPAR_AXI_PERF_MON_0_SLOT0_DATA_WIDTH; + if (slotnum == 1) return XPAR_AXI_PERF_MON_0_SLOT1_DATA_WIDTH; + if (slotnum == 2) return XPAR_AXI_PERF_MON_0_SLOT2_DATA_WIDTH; + if (slotnum == 3) return XPAR_AXI_PERF_MON_0_SLOT3_DATA_WIDTH; + if (slotnum == 4) return XPAR_AXI_PERF_MON_0_SLOT4_DATA_WIDTH; + if (slotnum == 5) return XPAR_AXI_PERF_MON_0_SLOT5_DATA_WIDTH; + if (slotnum == 6) return XPAR_AXI_PERF_MON_0_SLOT6_DATA_WIDTH; + if (slotnum == 7) return XPAR_AXI_PERF_MON_0_SLOT7_DATA_WIDTH; + return XPAR_AXI_PERF_MON_0_SLOT0_DATA_WIDTH; + } + + // Get the device clock frequency (in MHz) + double XDMAShim::xclGetDeviceClockFreqMHz() { + unsigned clockFreq = mDeviceInfo.mOCLFrequency[0]; + if (clockFreq == 0) + clockFreq = 200; + + //if (mLogStream.is_open()) + // mLogStream << __func__ << ": clock freq = " << clockFreq << std::endl; + return ((double)clockFreq); + } + + // Get the maximum bandwidth for host reads from the device (in MB/sec) + // NOTE: for now, set to: (256/8 bytes) * 300 MHz = 9600 MBps + double XDMAShim::xclGetReadMaxBandwidthMBps() { + return 9600.0; + } + + // Get the maximum bandwidth for host writes to the device (in MB/sec) + // NOTE: for now, set to: (256/8 bytes) * 300 MHz = 9600 MBps + double XDMAShim::xclGetWriteMaxBandwidthMBps() { + return 9600.0; + } + + // Convert binary string to decimal + uint32_t XDMAShim::bin2dec(std::string str, int start, int number) { + return bin2dec(str.c_str(), start, number); + } + + // Convert binary char * to decimal + uint32_t XDMAShim::bin2dec(const char* ptr, int start, int number) { + const char* temp_ptr = ptr + start; + uint32_t value = 0; + int i = 0; + + do { + if (*temp_ptr != '0' && *temp_ptr!= '1') + return value; + value <<= 1; + if(*temp_ptr=='1') + value += 1; + i++; + temp_ptr++; + } while (i < number); + + return value; + } + + // Convert decimal to binary string + // NOTE: length of string is always sizeof(uint32_t) * 8 + std::string XDMAShim::dec2bin(uint32_t n) { + char result[(sizeof(uint32_t) * 8) + 1]; + unsigned index = sizeof(uint32_t) * 8; + result[index] = '\0'; + + do { + result[ --index ] = '0' + (n & 1); + } while (n >>= 1); + + for (int i=index-1; i >= 0; --i) + result[i] = '0'; + + return std::string( result ); + } + + // Convert decimal to binary string of length bits + std::string XDMAShim::dec2bin(uint32_t n, unsigned bits) { + char result[bits + 1]; + unsigned index = bits; + result[index] = '\0'; + + do result[ --index ] = '0' + (n & 1); + while (n >>= 1); + + for (int i=index-1; i >= 0; --i) + result[i] = '0'; + + return std::string( result ); + } + + // Reset all APM trace AXI stream FIFOs + size_t XDMAShim::resetFifos(xclPerfMonType type) { + uint64_t resetCoreAddress[] = { + getPerfMonFifoBaseAddress(type, 0) + AXI_FIFO_SRR, + getPerfMonFifoBaseAddress(type, 1) + AXI_FIFO_SRR, + getPerfMonFifoBaseAddress(type, 2) + AXI_FIFO_SRR + }; + + uint64_t resetFifoAddress[] = { + getPerfMonFifoBaseAddress(type, 0) + AXI_FIFO_RDFR, + getPerfMonFifoBaseAddress(type, 1) + AXI_FIFO_RDFR, + getPerfMonFifoBaseAddress(type, 2) + AXI_FIFO_RDFR + }; + + size_t size = 0; + uint32_t regValue = AXI_FIFO_RESET_VALUE; + + for (int f=0; f < XPAR_AXI_PERF_MON_0_TRACE_NUMBER_FIFO; f++) { + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, resetCoreAddress[f], ®Value, 4); + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, resetFifoAddress[f], ®Value, 4); + } + + return size; + } + + // ******** + // Counters + // ******** + + // Start device counters performance monitoring + size_t XDMAShim::xclPerfMonStartCounters(xclPerfMonType type) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " + << type << ", Start device counters..." << std::endl; + } + + size_t size = 0; + uint32_t regValue; + uint64_t baseAddress = getPerfMonBaseAddress(type); + + // 1. Reset APM metric counters + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_CTL_OFFSET, ®Value, 4); + + regValue = regValue | XAPM_CR_MCNTR_RESET_MASK; + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_CTL_OFFSET, ®Value, 4); + + regValue = regValue & ~(XAPM_CR_MCNTR_RESET_MASK); + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_CTL_OFFSET, ®Value, 4); + + // 2. Start APM metric counters + regValue = regValue | XAPM_CR_MCNTR_ENABLE_MASK; + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_CTL_OFFSET, ®Value, 4); + + // 3. Specify APM metric counters to _not_ reset after reading + regValue = 0x0; + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_SICR_OFFSET, ®Value, 4); + + // 4. Read from sample register to ensure total time is read again at end + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_SR_OFFSET, ®Value, 4); + + return size; + } + + // Stop both profile and trace performance monitoring + size_t XDMAShim::xclPerfMonStopCounters(xclPerfMonType type) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " + << type << ", Stop and reset device counters..." << std::endl; + } + + size_t size = 0; + uint32_t regValue; + uint64_t baseAddress = getPerfMonBaseAddress(type); + + // 1. Stop APM metric counters + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_CTL_OFFSET, ®Value, 4); + + regValue = regValue & ~(XAPM_CR_MCNTR_ENABLE_MASK); + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_CTL_OFFSET, ®Value, 4); + + return size; + } + + // Read APM performance counters + size_t XDMAShim::xclPerfMonReadCounters(xclPerfMonType type, xclCounterResults& counterResults) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() + << ", " << type << ", " << &counterResults + << ", Read device counters..." << std::endl; + } + + // Initialize all values in struct to 0 + memset(&counterResults, 0, sizeof(xclCounterResults)); + + size_t size = 0; + uint32_t scaleFactor = getPerfMonByteScaleFactor(type); + uint64_t baseAddress = getPerfMonBaseAddress(type); + + uint64_t intervalAddress = baseAddress + XAPM_SR_OFFSET; + uint64_t metricAddress[] = { + // Slot 0 + baseAddress + XAPM_SMC0_OFFSET, baseAddress + XAPM_SMC1_OFFSET, + baseAddress + XAPM_SMC2_OFFSET, baseAddress + XAPM_SMC3_OFFSET, + baseAddress + XAPM_SMC4_OFFSET, baseAddress + XAPM_SMC5_OFFSET, + baseAddress + XAPM_SMC48_OFFSET, baseAddress + XAPM_SMC49_OFFSET, + // Slot 1 + baseAddress + XAPM_SMC6_OFFSET, baseAddress + XAPM_SMC7_OFFSET, + baseAddress + XAPM_SMC8_OFFSET, baseAddress + XAPM_SMC9_OFFSET, + baseAddress + XAPM_SMC10_OFFSET, baseAddress + XAPM_SMC11_OFFSET, + baseAddress + XAPM_SMC50_OFFSET, baseAddress + XAPM_SMC51_OFFSET, + // Slot 2 + baseAddress + XAPM_SMC12_OFFSET, baseAddress + XAPM_SMC13_OFFSET, + baseAddress + XAPM_SMC14_OFFSET, baseAddress + XAPM_SMC15_OFFSET, + baseAddress + XAPM_SMC16_OFFSET, baseAddress + XAPM_SMC17_OFFSET, + baseAddress + XAPM_SMC52_OFFSET, baseAddress + XAPM_SMC53_OFFSET, + // Slot 3 + baseAddress + XAPM_SMC18_OFFSET, baseAddress + XAPM_SMC19_OFFSET, + baseAddress + XAPM_SMC20_OFFSET, baseAddress + XAPM_SMC21_OFFSET, + baseAddress + XAPM_SMC22_OFFSET, baseAddress + XAPM_SMC23_OFFSET, + baseAddress + XAPM_SMC54_OFFSET, baseAddress + XAPM_SMC55_OFFSET, + // Slot 4 + baseAddress + XAPM_SMC24_OFFSET, baseAddress + XAPM_SMC25_OFFSET, + baseAddress + XAPM_SMC26_OFFSET, baseAddress + XAPM_SMC27_OFFSET, + baseAddress + XAPM_SMC28_OFFSET, baseAddress + XAPM_SMC29_OFFSET, + baseAddress + XAPM_SMC56_OFFSET, baseAddress + XAPM_SMC57_OFFSET, + // Slot 5 + baseAddress + XAPM_SMC30_OFFSET, baseAddress + XAPM_SMC31_OFFSET, + baseAddress + XAPM_SMC32_OFFSET, baseAddress + XAPM_SMC33_OFFSET, + baseAddress + XAPM_SMC34_OFFSET, baseAddress + XAPM_SMC35_OFFSET, + baseAddress + XAPM_SMC58_OFFSET, baseAddress + XAPM_SMC59_OFFSET, + // Slot 6 + baseAddress + XAPM_SMC36_OFFSET, baseAddress + XAPM_SMC37_OFFSET, + baseAddress + XAPM_SMC38_OFFSET, baseAddress + XAPM_SMC39_OFFSET, + baseAddress + XAPM_SMC40_OFFSET, baseAddress + XAPM_SMC41_OFFSET, + baseAddress + XAPM_SMC60_OFFSET, baseAddress + XAPM_SMC61_OFFSET, + // Slot 7 + baseAddress + XAPM_SMC42_OFFSET, baseAddress + XAPM_SMC43_OFFSET, + baseAddress + XAPM_SMC44_OFFSET, baseAddress + XAPM_SMC45_OFFSET, + baseAddress + XAPM_SMC46_OFFSET, baseAddress + XAPM_SMC47_OFFSET, + baseAddress + XAPM_SMC62_OFFSET, baseAddress + XAPM_SMC63_OFFSET + }; + + // Read sample interval register + // NOTE: this also latches the sampled metric counters + uint32_t sampleInterval; + size_t ret = xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, intervalAddress, &sampleInterval, 4); + if (ret < 0) return ret; + counterResults.SampleIntervalUsec = sampleInterval / xclGetDeviceClockFreqMHz(); + + // Read all sampled metric counters + uint32_t countnum = 0; + uint32_t numSlots = getPerfMonNumberSlots(type); + //counterResults.NumSlots = numSlots; + + uint32_t temp[XAPM_METRIC_COUNTERS_PER_SLOT]; + + for (uint32_t s=0; s < numSlots; s++) { + for (int c=0; c < XAPM_METRIC_COUNTERS_PER_SLOT; c++) + size += xclRead(XCL_ADDR_SPACE_DEVICE_PERFMON, metricAddress[countnum++], &temp[c], 4); + + counterResults.WriteBytes[s] = temp[XAPM_METRIC_WRITE_BYTES] * scaleFactor; + counterResults.WriteTranx[s] = temp[XAPM_METRIC_WRITE_TRANX]; + counterResults.WriteLatency[s] = temp[XAPM_METRIC_WRITE_LATENCY]; + counterResults.WriteMinLatency[s] = (temp[XAPM_METRIC_WRITE_MIN_MAX] & XAPM_MIN_LATENCY_MASK) >> XAPM_MIN_LATENCY_SHIFT; + counterResults.WriteMaxLatency[s] = (temp[XAPM_METRIC_WRITE_MIN_MAX] & XAPM_MAX_LATENCY_MASK) >> XAPM_MAX_LATENCY_SHIFT; + + counterResults.ReadBytes[s] = temp[XAPM_METRIC_READ_BYTES] * scaleFactor; + counterResults.ReadTranx[s] = temp[XAPM_METRIC_READ_TRANX]; + counterResults.ReadLatency[s] = temp[XAPM_METRIC_READ_LATENCY]; + counterResults.ReadMinLatency[s] = (temp[XAPM_METRIC_READ_MIN_MAX] & XAPM_MIN_LATENCY_MASK) >> XAPM_MIN_LATENCY_SHIFT; + counterResults.ReadMaxLatency[s] = (temp[XAPM_METRIC_READ_MIN_MAX] & XAPM_MAX_LATENCY_MASK) >> XAPM_MAX_LATENCY_SHIFT; + } + + return size; + } + + // ***** + // Trace + // ***** + + // Clock training used in converting device trace timestamps to host domain + size_t XDMAShim::xclPerfMonClockTraining(xclPerfMonType type) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " + << type << ", Send clock training..." << std::endl; + } + + size_t size = 0; + uint64_t baseAddress = getPerfMonBaseAddress(type); + + // Send host timestamps to target device + // NOTE: this is used for training to interpolate between time domains + for (int i=0; i < 3; i++) { +#if 1 + uint64_t hostTimeNsec = getHostTraceTimeNsec(); + + uint32_t hostTimeHigh = hostTimeNsec >> 32; + uint32_t hostTimeLow = hostTimeNsec & 0xffffffff; +#else + // Test values + uint32_t hostTimeHigh = 0xf00df00d; + uint32_t hostTimeLow = 0xdeadbeef; +#endif + + // Send upper then lower 32 bits of host timestamp to APM SW data register + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_SWD_OFFSET, &hostTimeHigh, 4); + size += xclWrite(XCL_ADDR_SPACE_DEVICE_PERFMON, baseAddress + XAPM_SWD_OFFSET, &hostTimeLow, 4); + + if (mLogStream.is_open()) { + mLogStream << " Host timestamp: 0x" << std::hex << hostTimeHigh + << " " << hostTimeLow << std::dec << std::endl; + } + } + + return size; + } + + // Start trace performance monitoring + size_t XDMAShim::xclPerfMonStartTrace(xclPerfMonType type, uint32_t startTrigger) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() + << ", " << type << ", " << startTrigger + << ", Start device tracing..." << std::endl; + } + + size_t size = 0; + uint32_t regValue; + uint64_t ctrlAddress = getPerfMonBaseAddress(type) + XAPM_CTL_OFFSET; + xclAddressSpace addressSpace = (type == XCL_PERF_MON_OCL_REGION) ? + XCL_ADDR_KERNEL_CTRL : XCL_ADDR_SPACE_DEVICE_PERFMON; + + // 1. Reset APM trace stream FIFO + size += xclRead(addressSpace, ctrlAddress, ®Value, 4); + + regValue = regValue | XAPM_CR_FIFO_RESET_MASK; + size += xclWrite(addressSpace, ctrlAddress, ®Value, 4); + + regValue = regValue & ~(XAPM_CR_FIFO_RESET_MASK); + size += xclWrite(addressSpace, ctrlAddress, ®Value, 4); + + // 2. Start APM event log + regValue = regValue | XAPM_CR_EVENTLOG_ENABLE_MASK; + size += xclWrite(addressSpace, ctrlAddress, ®Value, 4); + + // 3. Reset trace FIFOs + size += resetFifos(type); + + // 4. Send host timestamps to target device + size += xclPerfMonClockTraining(type); + + // 5. Disable host monitoring on slot 1 + // TODO: replace check for value of startTrigger (temp way + // of keeping slot 1 enabled in 06_perfmon test) + if ((type == XCL_PERF_MON_MEMORY) && (startTrigger == 0)) { + regValue = 0xFFFFFF0F; + uint64_t enableTraceAddress = getPerfMonBaseAddress(type) + XAPM_ENT_OFFSET; + size += xclWrite(addressSpace, enableTraceAddress, ®Value, 4); + } + + // 6. Write to event trace trigger register + // TODO: add support for triggering in device here + //size += xclWrite(addressSpace, TBD, &startTrigger, 4); + + return size; + } + + // Stop trace performance monitoring + size_t XDMAShim::xclPerfMonStopTrace(xclPerfMonType type) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " + << type << ", Stop and reset device tracing..." << std::endl; + } + + size_t size = 0; + uint32_t regValue; + uint64_t ctrlAddress = getPerfMonBaseAddress(type) + XAPM_CTL_OFFSET; + xclAddressSpace addressSpace = (type == XCL_PERF_MON_OCL_REGION) ? + XCL_ADDR_KERNEL_CTRL : XCL_ADDR_SPACE_DEVICE_PERFMON; + + // 1. Stop APM event log and metric counters + size += xclRead(addressSpace, ctrlAddress, ®Value, 4); + + regValue = regValue & ~(XAPM_CR_EVENTLOG_ENABLE_MASK); + size += xclWrite(addressSpace, ctrlAddress, ®Value, 4); + + size += resetFifos(type); + + return size; + } + + // Get trace word count + uint32_t XDMAShim::xclPerfMonGetTraceCount(xclPerfMonType type) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() + << ", " << type << std::endl; + } + + xclAddressSpace addressSpace = (type == XCL_PERF_MON_OCL_REGION) ? + XCL_ADDR_KERNEL_CTRL : XCL_ADDR_SPACE_DEVICE_PERFMON; + + // Only read first FIFO (and assume the others have the same # words) + // NOTE: we do this for speed improvements + uint32_t fifoCount; + xclRead(addressSpace, getPerfMonFifoBaseAddress(type, 0) + AXI_FIFO_RLR, &fifoCount, 4); + // Read bits 22:0 per AXI-Stream FIFO product guide (PG080, 10/1/14) + uint32_t numBytes = fifoCount & 0x7FFFFF; + + uint32_t numSamples = 0; + if (type == XCL_PERF_MON_MEMORY && isDSAVersion(FAST_OFFLOAD_MAJOR, FAST_OFFLOAD_MINOR, false)) + numSamples = numBytes / (XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH/8); + else + numSamples = numBytes >> 2; + + if (mLogStream.is_open()) { + mLogStream << " No. of trace samples = " << std::dec << numSamples + << " (fifoCount = 0x" << std::hex << fifoCount << ")" << std::dec << std::endl; + } + + return numSamples; + } + + // Read all values from APM trace AXI stream FIFOs + size_t XDMAShim::xclPerfMonReadTrace(xclPerfMonType type, xclTraceResultsVector& traceVector) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() + << ", " << type << ", " << &traceVector + << ", Reading device trace stream..." << std::endl; + } + + traceVector.mLength = 0; + + uint32_t numSamples = xclPerfMonGetTraceCount(type); + if (numSamples == 0) + return 0; + + uint64_t fifoReadAddress[] = {0, 0, 0}; + if (type == XCL_PERF_MON_MEMORY && isDSAVersion(FAST_OFFLOAD_MAJOR, FAST_OFFLOAD_MINOR, false)) { + fifoReadAddress[0] = getPerfMonFifoReadBaseAddress(type, 0) + AXI_FIFO_RDFD_AXI_FULL; + } + else { + for (int i=0; i < 3; i++) + fifoReadAddress[i] = getPerfMonFifoReadBaseAddress(type, i) + AXI_FIFO_RDFD; + } + + xclAddressSpace addressSpace = (type == XCL_PERF_MON_OCL_REGION) ? + XCL_ADDR_KERNEL_CTRL : XCL_ADDR_SPACE_DEVICE_PERFMON; + uint32_t numSlots = getPerfMonNumberSlots(type); + uint32_t numFifos = getPerfMonNumberFifos(type); + + size_t size = 0; +#ifndef _WINDOWS + // TODO: Windows build support + // runtime array size is not supported + uint32_t temp[numFifos]; + memset(&temp, 0, numFifos*sizeof(uint32_t)); +#else + uint32_t temp[3]; + memset(&temp, 0, 3*sizeof(uint32_t)); +#endif + + // Limit to max number of samples so we don't overrun trace buffer on host + uint32_t maxSamples = getPerfMonNumberSamples(type); + numSamples = (numSamples > maxSamples) ? maxSamples : numSamples; + traceVector.mLength = numSamples; + + const uint32_t bytesPerSample = (XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH / 8); + const uint32_t wordsPerSample = (XPAR_AXI_PERF_MON_0_TRACE_WORD_WIDTH / 32); + //uint32_t numBytes = numSamples * bytesPerSample; + uint32_t numWords = numSamples * wordsPerSample; + + // Create trace buffer on host (requires alignment) + const int BUFFER_BYTES = MAX_TRACE_NUMBER_SAMPLES * bytesPerSample; + const int BUFFER_WORDS = MAX_TRACE_NUMBER_SAMPLES * wordsPerSample; +#ifndef _WINDOWS +// TODO: Windows build support +// alignas is defined in c++11 +#if GCC_VERSION >= 40800 + alignas(AXI_FIFO_RDFD_AXI_FULL) uint32_t hostbuf[BUFFER_WORDS]; +#else + AlignedAllocator alignedBuffer(AXI_FIFO_RDFD_AXI_FULL, BUFFER_WORDS); + uint32_t* hostbuf = alignedBuffer.getBuffer(); +#endif +#else + uint32_t hostbuf[BUFFER_WORDS]; +#endif + + // ****************************** + // Read all words from trace FIFO + // NOTE: DSA Version >= 2.2 + // ****************************** + if (type == XCL_PERF_MON_MEMORY && isDSAVersion(FAST_OFFLOAD_MAJOR, FAST_OFFLOAD_MINOR, false)) { + memset((void *)hostbuf, 0, BUFFER_BYTES); + + // Iterate over chunks + // NOTE: AXI limits this to 4K bytes per transfer + uint32_t chunkSizeWords = 256 * wordsPerSample; + if (chunkSizeWords > 1024) chunkSizeWords = 1024; + uint32_t chunkSizeBytes = 4 * chunkSizeWords; + uint32_t words=0; + + // Read trace a chunk of bytes at a time + if (numWords > chunkSizeWords) { + for (; words < (numWords-chunkSizeWords); words += chunkSizeWords) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ": reading " << chunkSizeBytes << " bytes from 0x" + << std::hex << fifoReadAddress[0] << " and writing it to 0x" + << (void *)(hostbuf + words) << std::dec << std::endl; + } + + if (mDataMover->pread64((void *)(hostbuf + words), chunkSizeBytes, fifoReadAddress[0]) < 0) + return 0; + + size += chunkSizeBytes; + } + } + + // Read remainder of trace not divisible by chunk size + if (words < numWords) { + chunkSizeBytes = 4 * (numWords - words); + + if (mLogStream.is_open()) { + mLogStream << __func__ << ": reading " << chunkSizeBytes << " bytes from 0x" + << std::hex << fifoReadAddress[0] << " and writing it to 0x" + << (void *)(hostbuf + words) << std::dec << std::endl; + } + + if (mDataMover->pread64((void *)(hostbuf + words), chunkSizeBytes, fifoReadAddress[0]) < 0) + return 0; + + size += chunkSizeBytes; + } + + if (mLogStream.is_open()) { + mLogStream << __func__ << ": done reading " << size << " bytes " << std::endl; + } + } + + // ****************************** + // Read & process all trace FIFOs + // ****************************** + for (uint32_t wordnum=0; wordnum < numSamples; wordnum++) { + if (type == XCL_PERF_MON_MEMORY && isDSAVersion(FAST_OFFLOAD_MAJOR, FAST_OFFLOAD_MINOR, false)) { + uint32_t index = wordsPerSample * wordnum; + bool allZeros = true; + for (uint32_t fifonum=0; fifonum < numFifos; fifonum++) { + temp[fifonum] = *(hostbuf + index + fifonum); + allZeros &= (temp[fifonum] == 0); + } + if (allZeros) + continue; + } + else { + // NOTE: Using AXI-Lite so we use the same address with burst length of 1 word + for (uint32_t fifonum=0; fifonum < numFifos; fifonum++) + size += xclRead(addressSpace, fifoReadAddress[fifonum], &temp[fifonum], 4); + } + + xclTraceResults results; + // Assign to all 0s to avoid uninitialized variables + memset(&results, 0, sizeof(xclTraceResults)); + + uint64_t temp64 = ((uint64_t)temp[1] << 32) | temp[0]; + results.LogID = temp64 & 0x1; + results.Timestamp = (temp64 >> 1) & 0xFFFF; + results.Overflow = (temp64 >> 17) & 0x1; + results.ReadStartEvent = XCL_PERF_MON_START_ADDR; + results.WriteStartEvent = XCL_PERF_MON_START_ADDR; + results.WriteEndEvent = XCL_PERF_MON_END_LAST_DATA; + + if (results.LogID != 0) { + results.HostTimestamp = (temp64 >> 18) & 0xFFFFFFFF; + } + else { + for (uint32_t s=0; s < numSlots; s++) { + uint32_t b = getPerfMonSlotStartBit(type, s); + + if (b >= 32) + temp64 = ((((uint64_t)temp[2] << 32) | temp[1]) >> (b-32)); + else + temp64 = ((((uint64_t)temp[1] << 32) | temp[0]) >> b); + + results.ExtEventFlags[s] = temp64 & 0x7; + results.EventFlags[s] = (temp64 >> 3) & 0x7F; + + if (getPerfMonShowIDS(type)) { + if (getPerfMonShowLEN(type)) { + results.ReadAddrLen[s] = (temp64 >> 10) & 0xFF; + results.WriteAddrLen[s] = (temp64 >> 18) & 0xFF; + + // TODO: assumes AXI ID width of 5 + results.RID[s] = (temp64 >> 26) & 0x1F; + results.ARID[s] = (temp64 >> 31) & 0x1F; + results.BID[s] = (temp64 >> 36) & 0x1F; + results.AWID[s] = (temp64 >> 41) & 0x1F; + } + else { + // TODO: assumes AXI ID width of 5 + results.RID[s] = (temp64 >> 10) & 0x1F; + results.ARID[s] = (temp64 >> 15) & 0x1F; + results.BID[s] = (temp64 >> 20) & 0x1F; + results.AWID[s] = (temp64 >> 25) & 0x1F; + } + } + else { + if (getPerfMonShowLEN(type)) { + results.ReadAddrLen[s] = (temp64 >> 10) & 0xFF; + results.WriteAddrLen[s] = (temp64 >> 18) & 0xFF; + } + } + + // # bytes = burst length * bytes/burst = (addr len + 1) * bytes/burst + uint32_t dataWidth = getPerfMonSlotDataWidth(type, s); + results.ReadBytes[s] = (results.ReadAddrLen[s] + 1) * (dataWidth/8); + results.WriteBytes[s] = (results.WriteAddrLen[s] + 1) * (dataWidth/8); + } // for slot + } // if-else logID != 0 + + traceVector.mArray[wordnum] = results; + + // Log values (if requested) + if (mLogStream.is_open()) { + mLogStream << " Trace sample " << std::dec << wordnum << ": "; + for (int fifonum=numFifos-1; fifonum >= 0; fifonum--) + mLogStream << dec2bin(temp[fifonum]) << " "; + mLogStream << std::endl; + + if (results.LogID == 1) { + mLogStream << std::hex << " Host Timestamp: " << results.HostTimestamp << std::endl; + } + else { + if (type == XCL_PERF_MON_OCL_REGION) { + mLogStream << " Ext Event flags: "; + for (int slot=numSlots-1; slot >= 0; slot--) + mLogStream << dec2bin(results.ExtEventFlags[slot], 3) << " "; + } + else { + mLogStream << " Event flags: "; + for (int slot=numSlots-1; slot >= 0; slot--) + mLogStream << dec2bin(results.EventFlags[slot], 7) << " "; + } + + mLogStream << "(ReadAddrLen[0] = " << (int)(results.ReadAddrLen[0]) + << ", WriteAddrLen[0] = " << (int)(results.WriteAddrLen[0]) + << ", ReadAddrLen[1] = " << (int)(results.ReadAddrLen[1]) + << ", WriteAddrLen[1] = " << (int)(results.WriteAddrLen[1]); + + if (getPerfMonShowIDS(type)) { + mLogStream << ", RID: " << (int)results.RID[0] << ", ARID: " << (int)results.ARID[0] + << ", BID: " << (int)results.BID[0] << ", AWID: " << (int)results.AWID[0]; + } + mLogStream << ")" << std::endl; + } + } + } + + return size; + } + +} // namespace xclxdma + + +size_t xclPerfMonStartCounters(xclDeviceHandle handle, xclPerfMonType type) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclPerfMonStartCounters(type); +} + + +size_t xclPerfMonStopCounters(xclDeviceHandle handle, xclPerfMonType type) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclPerfMonStopCounters(type); +} + + +size_t xclPerfMonReadCounters(xclDeviceHandle handle, xclPerfMonType type, xclCounterResults& counterResults) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclPerfMonReadCounters(type, counterResults); +} + + +size_t xclPerfMonClockTraining(xclDeviceHandle handle, xclPerfMonType type) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclPerfMonClockTraining(type); +} + + +size_t xclPerfMonStartTrace(xclDeviceHandle handle, xclPerfMonType type, uint32_t startTrigger) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclPerfMonStartTrace(type, startTrigger); +} + + +size_t xclPerfMonStopTrace(xclDeviceHandle handle, xclPerfMonType type) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclPerfMonStopTrace(type); +} + + +uint32_t xclPerfMonGetTraceCount(xclDeviceHandle handle, xclPerfMonType type) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclPerfMonGetTraceCount(type); +} + + +size_t xclPerfMonReadTrace(xclDeviceHandle handle, xclPerfMonType type, xclTraceResultsVector& traceVector) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclPerfMonReadTrace(type, traceVector); +} + + +double xclGetDeviceClockFreqMHz(xclDeviceHandle handle) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return 0.0; + return drv->xclGetDeviceClockFreqMHz(); +} + + +double xclGetReadMaxBandwidthMBps(xclDeviceHandle handle) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return 0.0; + return drv->xclGetReadMaxBandwidthMBps(); +} + + +double xclGetWriteMaxBandwidthMBps(xclDeviceHandle handle) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return 0.0; + return drv->xclGetWriteMaxBandwidthMBps(); +} + + +size_t xclGetDeviceTimestamp(xclDeviceHandle handle) +{ + return 0; +} + + +void xclSetOclRegionProfilingNumberSlots(xclDeviceHandle handle, uint32_t numSlots) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return; + return drv->xclSetOclRegionProfilingNumberSlots(numSlots); +} + +// XSIP watermark, do not delete 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/SDAccel/HAL/driver/xcldma/user/prom.cpp b/sdk/SDAccel/HAL/driver/xcldma/user/prom.cpp new file mode 100644 index 000000000..061f25428 --- /dev/null +++ b/sdk/SDAccel/HAL/driver/xcldma/user/prom.cpp @@ -0,0 +1,445 @@ +/* + * Copyright (C) 2015-2016 Xilinx, Inc + * In-System Programming of BPI PROM using PCIe + * Based on XAPP518 (v1.3) April 23, 2014 + * Author: Sonal Santan + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "shim.h" +#include "driver/xcldma/include/xdma-ioctl.h" + +#ifdef WINDOWS +#define __func__ __FUNCTION__ +#endif + +namespace xclxdma { + int XDMAShim::freezeAXIGate() { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << std::endl; + } + unsigned char buf = 0x0; + return pcieBarWrite(HWICAP_BAR, AXI_GATE_OFFSET, &buf, 1); + } + + int XDMAShim::freeAXIGate() { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << std::endl; + } + // First pulse the OCL RESET. This is important for PR with multiple + // clocks as it resets the edge triggered clock converter FIFO +#ifndef _WINDOWS + const timespec interval = {0, 500}; +#endif + unsigned char buf = 0x2; + if (pcieBarWrite(HWICAP_BAR, AXI_GATE_OFFSET, &buf, 1)) + return -1; + buf = 0x0; +#ifndef _WINDOWS +// TODO: Windows build support +// nanosleep is defined in unistd.h + nanosleep(&interval, 0); +#endif + if (pcieBarWrite(HWICAP_BAR, AXI_GATE_OFFSET, &buf, 1)) + return -1; + buf = 0x2; +#ifndef _WINDOWS +// TODO: Windows build support +// nanosleep is defined in unistd.h + nanosleep(&interval, 0); +#endif + if (pcieBarWrite(HWICAP_BAR, AXI_GATE_OFFSET, &buf, 1)) + return -1; + buf = 0x3; +#ifndef _WINDOWS +// TODO: Windows build support +// nanosleep is defined in unistd.h + nanosleep(&interval, 0); +#endif + return pcieBarWrite(HWICAP_BAR, AXI_GATE_OFFSET, &buf, 1); + } + + + int XDMAShim::xclUpgradeFirmware(const char *mcsFile) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << mcsFile << std::endl; + } + + std::cout << "INFO: Reseting hardware\n"; + if (freezeAXIGate() != 0) { + return -1; + } + +#ifndef _WINDOWS +// TODO: Windows build support +// timespec + const timespec req = {0, 5000}; + nanosleep(&req, 0); +#endif + if (freeAXIGate() != 0) { + return -1; + } +#ifndef _WINDOWS +// TODO: Windows build support +// nanosleep is defined in unistd.h + nanosleep(&req, 0); +#endif + + std::string line; + std::ifstream mcsStream(mcsFile); + std::string startAddress; + ELARecord record; + bool endRecordFound = false; + + if(!mcsStream.is_open()) { + std::cout << "ERROR: Cannot open " << mcsFile << ". Check that it exists and is readable." << std::endl; + return -ENOENT; + } + + std::cout << "INFO: Parsing file " << mcsFile << std::endl; + while (!mcsStream.eof() && !endRecordFound) { + std::string line; + std::getline(mcsStream, line); + if (line.size() == 0) { + continue; + } + if (line[0] != ':') { + return -1; + } + const unsigned dataLen = std::stoi(line.substr(1, 2), 0 , 16); + const unsigned address = std::stoi(line.substr(3, 4), 0, 16); + const unsigned recordType = std::stoi(line.substr(7, 2), 0 , 16); + switch (recordType) { + case 0x00: + { + if (dataLen > 16) { + // For xilinx mcs files data length should be 16 for all records + // except for the last one which can be smaller + return -1; + } + if (address != record.mDataCount) { + return -1; + } + if (record.mEndAddress != address) { + return -1; + } + record.mDataCount += dataLen; + record.mEndAddress += dataLen; + break; + } + case 0x01: + { + if (startAddress.size() == 0) { + break; + } + mRecordList.push_back(record); + endRecordFound = true; + break; + } + case 0x02: + { + break; + } + case 0x04: + { + if (address != 0x0) { + return -1; + } + if (dataLen != 2) { + return -1; + } + std::string newAddress = line.substr(9, dataLen * 2); + if (startAddress.size()) { + // Finish the old record + mRecordList.push_back(record); + } + // Start a new record + record.mStartAddress = std::stoi(newAddress, 0 , 16); + record.mDataPos = mcsStream.tellg(); + record.mEndAddress = 0; + record.mDataCount = 0; + startAddress = newAddress; + } + } + } + + mcsStream.seekg(0); + std::cout << "INFO: Found " << mRecordList.size() << " ELA Records\n"; + + return program(mcsStream); + } + + int XDMAShim::prepare(unsigned startAddress, unsigned endAddress) { + startAddress &= 0x00ffffff; // truncate to 24 bits + startAddress >>= 8; // Pick the middle 16 bits + endAddress &= 0x00ffffff; // truncate to 24 bits + + if (waitForReady(READY_STAT)) { + return -1; + } + + std::cout << "INFO: Sending the address range\n"; + // Send start and end address + unsigned command = START_ADDR_CMD; + command |= startAddress; + if (pcieBarWrite(BPI_FLASH_BAR, BPI_FLASH_OFFSET, &command, 4)) { + return -1; + } + + command = END_ADDR_CMD; + command |= endAddress; + if (pcieBarWrite(BPI_FLASH_BAR, BPI_FLASH_OFFSET, &command, 4)) { + return -1; + } + +// if (waitForReady(READY_STAT)) { +// return -1; +// } + + std::cout << "INFO: Sending unlock command\n"; + // Send unlock command + command = UNLOCK_CMD; + if (pcieBarWrite(BPI_FLASH_BAR, BPI_FLASH_OFFSET, &command, 4)) { + return -1; + } + if (waitForReady(READY_STAT)) { + return -1; + } + + // Send erase command + std::cout << "INFO: Sending erase command\n"; + command = ERASE_CMD; + if (pcieBarWrite(BPI_FLASH_BAR, BPI_FLASH_OFFSET, &command, 4)) { + return -1; + } + // now hanging here + if (waitForReady(ERASE_STAT)) { + return -1; + } + + if (waitForReady(READY_STAT)) { + return -1; + } + + // Send program command + std::cout << "INFO: Erasing the address range\n"; + command = PROGRAM_CMD; + if (pcieBarWrite(BPI_FLASH_BAR, BPI_FLASH_OFFSET, &command, 4)) { + return -1; + } + + if (waitForReady(PROGRAM_STAT)) { + return -1; + } + + return 0; + } + + int XDMAShim::program(std::ifstream& mcsStream, const ELARecord& record) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << std::endl; + } +#ifndef _WINDOWS +// TODO: Windows build support +// timespec + const timespec req = {0, 2000}; +#endif + + std::cout << "Programming block (" << std::hex << record.mStartAddress << ", " << record.mEndAddress << std::dec << ")" << std::endl; + assert(mcsStream.tellg() < record.mDataPos); + mcsStream.seekg(record.mDataPos, std::ifstream::beg); + unsigned char buffer[64]; + int bufferIndex = 0; + for (unsigned index = record.mDataCount; index > 0;) { + std::string line; + std::getline(mcsStream, line); + const unsigned dataLen = std::stoi(line.substr(1, 2), 0 , 16); + index -= dataLen; + const unsigned recordType = std::stoi(line.substr(7, 2), 0 , 16); + if (recordType != 0x00) { + continue; + } + const std::string data = line.substr(9, dataLen * 2); + // Write in byte swapped order + for (unsigned i = 0; i < data.length(); i += 2) { + if ((bufferIndex % 4) == 0) { + bufferIndex += 4; + } + assert(bufferIndex <= 64); + unsigned value = std::stoi(data.substr(i, 2), 0, 16); + buffer[--bufferIndex] = (unsigned char)value; + if ((bufferIndex % 4) == 0) { + bufferIndex += 4; + } + if (bufferIndex == 64) { + break; + } + } + + assert((bufferIndex % 4) == 0); + assert(bufferIndex <= 64); + if (bufferIndex == 64) { + if (waitForReady(PROGRAM_STAT, false)) { + return -1; + } + if (pcieBarWrite(BPI_FLASH_BAR, BPI_FLASH_OFFSET, buffer, 64)) { + return -1; + } + if (waitForReady(PROGRAM_STAT, false)) { + return -1; + } +#ifndef _WINDOWS +// TODO: Windows build support +// nanosleep is defined in unistd.h + nanosleep(&req, 0); +#endif + bufferIndex = 0; + } + } + if (bufferIndex) { + if (waitForReady(PROGRAM_STAT, false)) { + return -1; + } + if (pcieBarWrite(BPI_FLASH_BAR, BPI_FLASH_OFFSET, buffer, bufferIndex)) { + return -1; + } + if (waitForReady(PROGRAM_STAT, false)) { + return -1; + } +#ifndef _WINDOWS +// TODO: Windows build support +// nanosleep is defined in unistd.h + nanosleep(&req, 0); +#endif + } + return 0; + } + + int XDMAShim::program(std::ifstream& mcsStream) { + int status = 0; + for (ELARecordList::iterator i = mRecordList.begin(), e = mRecordList.end(); i != e; ++i) { + i->mStartAddress <<= 16; + i->mEndAddress += i->mStartAddress; + // Convert from 2 bytes address to 4 bytes address + i->mStartAddress /= 2; + i->mEndAddress /= 2; + } + std::cout << "INFO: Start address 0x" << std::hex << mRecordList.front().mStartAddress << std::dec << "\n"; + std::cout << "INFO: End address 0x" << std::hex << mRecordList.back().mEndAddress << std::dec << "\n"; + if (prepare(mRecordList.front().mStartAddress, mRecordList.back().mEndAddress)) { + std::cout << "ERROR: Could not unlock or erase the blocks\n"; + return -1; + } +#ifndef _WINDOWS +// TODO: Windows build support +// timespec + const timespec req = {0, 1000}; +#endif + int beatCount = 0; + for (ELARecordList::iterator i = mRecordList.begin(), e = mRecordList.end(); i != e; ++i) + { + beatCount++; + if(beatCount%10==0) { + std::cout << "." << std::flush; + } + + if (program(mcsStream, *i)) { + std::cout << "ERROR: Could not program the block\n"; + return -1; + } +#ifndef _WINDOWS +// TODO: Windows build support +// nanosleep is defined in unistd.h + nanosleep(&req, 0); +#endif + } + std::cout << std::endl; + // Now keep writing 0xff till the hardware says ready + if (waitAndFinish(READY_STAT, 0xff)) { + return -1; + } + return status; + } + + int XDMAShim::waitForReady(unsigned code, bool verbose) { + unsigned status = ~code; + long long delay = 0; +#ifndef _WINDOWS +// TODO: Windows build support +// timespec + const timespec req = {0, 5000}; +#endif + if (verbose) { + std::cout << "INFO: Waiting for hardware\n"; + } + while ((status != code) && (delay < 30000000000)) { +#ifndef _WINDOWS +// TODO: Windows build support +// nanosleep is defined in unistd.h + nanosleep(&req, 0); +#endif + if (pcieBarRead(BPI_FLASH_BAR, BPI_FLASH_OFFSET, &status, 4)) { + return -1; + } + delay += 5000; + } + return (status == code) ? 0 : -1; + } + + int XDMAShim::waitAndFinish(unsigned code, unsigned data, bool verbose) { + unsigned status = ~code; + long long delay = 0; +#ifndef _WINDOWS +// TODO: Windows build support +// timespec + const timespec req = {0, 5000}; +#endif + if (verbose) { + std::cout << "INFO: Finishing up\n"; + } + if (pcieBarRead(BPI_FLASH_BAR, BPI_FLASH_OFFSET, &status, 4)) { + return -1; + } + while ((status != code) && (delay < 30000000000)) { +#ifndef _WINDOWS +// TODO: Windows build support +// nanosleep is defined in unistd.h + nanosleep(&req, 0); +#endif + if (pcieBarWrite(BPI_FLASH_BAR, BPI_FLASH_OFFSET, &data, 4)) { + return -1; + } + if (pcieBarRead(BPI_FLASH_BAR, BPI_FLASH_OFFSET, &status, 4)) { + return -1; + } + delay += 5000; + } + return (status == code) ? 0 : -1; + } + + int XDMAShim::xclBootFPGA() { + xdma_ioc_base base = {0X586C0C6C, XDMA_IOCREBOOT}; + return ioctl(mUserHandle, XDMA_IOCREBOOT, &base); + } +} diff --git a/sdk/SDAccel/HAL/driver/xcldma/user/shim.cpp b/sdk/SDAccel/HAL/driver/xcldma/user/shim.cpp new file mode 100644 index 000000000..1630dcfde --- /dev/null +++ b/sdk/SDAccel/HAL/driver/xcldma/user/shim.cpp @@ -0,0 +1,1250 @@ +/** + * Copyright (C) 2015-2016 Xilinx, Inc + * Author: Sonal Santan + * XDMA HAL Driver layered on top of XDMA kernel driver + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#include "shim.h" +#include "memorymanager.h" +#include "datamover.h" +#include +/* + * Define GCC version macro so we can use newer C++11 features + * if possible + */ +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) + +#include + +#ifndef _WINDOWS +// TODO: Windows build support +// sys/mman.h is linux only header file +// it is included for mmap +#include +#endif + +#ifndef _WINDOWS +// TODO: Windows build support +// unistd.h is linux only header file +// it is included for read, write, close, lseek64 +#include +#endif + +#include +#include + +#ifndef _WINDOWS +// TODO: Windows build support +// sys/ioctl.h is linux only header file +// it is included for ioctl +#include +#endif + +#ifndef _WINDOWS +// TODO: Windows build support +// sys/file.h is linux only header file +// it is included for flock +#include +#endif + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "driver/include/xclbin.h" +#include "driver/xcldma/include/xdma-ioctl.h" + +#ifdef _WINDOWS +#define __func__ __FUNCTION__ +#endif + +#ifdef _WINDOWS +#define MAP_FAILED (void *)-1 +#endif + +#if defined(__PPC64__) +#define OSTAG "-ppc64le" +#else +#define OSTAG "" +#endif + +namespace xclxdma { + const unsigned XDMAShim::TAG = 0X586C0C6C; // XL OpenCL X->58(ASCII), L->6C(ASCII), O->0 C->C L->6C(ASCII); + + xclDeviceInfo2 to_info2(const xclDeviceInfo info) { + xclDeviceInfo2 info2; + std::memset(&info2, 0, sizeof(info2)); + info2.mMagic = info.mMagic; + std::memcpy(info2.mName, info.mName, 256); + info2.mHALMajorVersion = info.mHALMajorVersion; + info2.mHALMinorVersion = info.mHALMinorVersion; + info2.mVendorId = info.mVendorId; + info2.mDeviceId = info.mDeviceId; + info2.mSubsystemId = info.mSubsystemId; + info2.mSubsystemVendorId = info.mSubsystemVendorId; + info2.mDeviceVersion = info.mDeviceVersion; + info2.mDDRSize = info.mDDRSize; + info2.mDataAlignment = info.mDataAlignment; + info2.mDDRFreeSize = info.mDDRFreeSize; + info2.mMinTransferSize = info.mMinTransferSize; + info2.mDDRBankCount = info.mDDRBankCount; + info2.mOCLFrequency[0] = info.mOCLFrequency; + info2.mPCIeLinkWidth = info.mPCIeLinkWidth; + info2.mPCIeLinkSpeed = info.mPCIeLinkSpeed; + info2.mDMAThreads = info.mDMAThreads; + return info2; + } + + int XDMAShim::xclLoadBitstream(const char *fileName) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << fileName << std::endl; + } + + if (!mLocked) + return -EPERM; + + std::ifstream stream(fileName); + if (!stream.is_open()) { + return errno; + } + + stream.seekg(0, stream.end); + int length = stream.tellg(); + stream.seekg(0, stream.beg); + char *buffer = new char[length]; + stream.read(buffer, length); + stream.close(); + xclBin *header = (xclBin *)buffer; + if (std::memcmp(header->m_magic, "xclbin0", 8)) { + return -EINVAL; + } + + return xclLoadXclBin(header); + } + + + int XDMAShim::xclLoadXclBin(const xclBin *buffer) + { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << buffer << std::endl; + } + + if (!mLocked) + return -EPERM; + +#ifndef _WINDOWS + const unsigned cmd = isUltraScale() ? XDMA_IOCMCAPDOWNLOAD : XDMA_IOCICAPDOWNLOAD; + xdma_ioc_bitstream obj = {{0X586C0C6C, cmd}, const_cast(buffer)}; + int ret = ioctl(mUserHandle, cmd, &obj); + if(0 != ret) + return ret; + + // If it is an XPR DSA, zero out the DDR again as downloading the XCLBIN + // reinitializes the DDR and results in ECC error. + if(isXPR()) { + if (mLogStream.is_open()) { + mLogStream << __func__ << "XPR Device found, zeroing out DDR again.." << std::endl; + } + + if (zeroOutDDR() == false){ + if (mLogStream.is_open()) { + mLogStream << __func__ << "zeroing out DDR failed" << std::endl; + } + return -EIO; + } + } + + return ret; +#endif + } + + size_t XDMAShim::xclReadModifyWrite(uint64_t offset, const void *hostBuf, size_t size) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " + << offset << ", " << hostBuf << ", " << size << std::endl; + } +#ifndef _WINDOWS +// TODO: Windows build support +// alignas is defined in c++11 +#if GCC_VERSION >= 40800 + alignas(DDR_BUFFER_ALIGNMENT) char buffer[DDR_BUFFER_ALIGNMENT]; +#else + AlignedAllocator alignedBuffer(DDR_BUFFER_ALIGNMENT, DDR_BUFFER_ALIGNMENT); + char* buffer = alignedBuffer.getBuffer(); +#endif +#else + char buffer[DDR_BUFFER_ALIGNMENT]; +#endif + + const size_t mod_size = offset % DDR_BUFFER_ALIGNMENT; + // Read back one full aligned block starting from preceding aligned address + const uint64_t mod_offset = offset - mod_size; + if (xclRead(XCL_ADDR_SPACE_DEVICE_RAM, mod_offset, buffer, DDR_BUFFER_ALIGNMENT) != DDR_BUFFER_ALIGNMENT) + return -1; + + // Update the local copy of buffer with user requested data + const size_t copy_size = (size + mod_size > DDR_BUFFER_ALIGNMENT) ? DDR_BUFFER_ALIGNMENT - mod_size : size; + std::memcpy(buffer + mod_size, hostBuf, copy_size); + + // Write back the updated aligned block + if (xclWrite(XCL_ADDR_SPACE_DEVICE_RAM, mod_offset, buffer, DDR_BUFFER_ALIGNMENT) != DDR_BUFFER_ALIGNMENT) + return -1; + + // Write any remaining blocks over DDR_BUFFER_ALIGNMENT size + if (size + mod_size > DDR_BUFFER_ALIGNMENT) { + size_t write_size = xclWrite(XCL_ADDR_SPACE_DEVICE_RAM, mod_offset + DDR_BUFFER_ALIGNMENT, + (const char *)hostBuf + copy_size, size - copy_size); + if (write_size != (size - copy_size)) + return -1; + } + return size; + } + + size_t XDMAShim::xclWrite(xclAddressSpace space, uint64_t offset, const void *hostBuf, size_t size) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << space << ", " + << offset << ", " << hostBuf << ", " << size << std::endl; + } + + if (!mLocked) + return -1; + + switch (space) { + case XCL_ADDR_SPACE_DEVICE_RAM: + { + const size_t totalSize = size; + const size_t mod_size1 = offset % DDR_BUFFER_ALIGNMENT; + const size_t mod_size2 = size % DDR_BUFFER_ALIGNMENT; + if (mod_size1) { + // Buffer not aligned at DDR_BUFFER_ALIGNMENT boundary, need to do Read-Modify-Write + return xclReadModifyWrite(offset, hostBuf, size); + } + else if (mod_size2) { + // Buffer not a multiple of DDR_BUFFER_ALIGNMENT, write out the initial block and + // then perform a Read-Modify-Write for the remainder buffer + const size_t blockSize = size - mod_size2; + if (xclWrite(space, offset, hostBuf, blockSize) != blockSize) + return -1; + offset += blockSize; + hostBuf = (const char *)hostBuf + blockSize; + if (xclReadModifyWrite(offset, hostBuf, mod_size2) != mod_size2) + return -1; + return totalSize; + } + + const char *curr = static_cast(hostBuf); + while (size > maxDMASize) { +#ifndef _WINDOWS +// TODO: Windows build support + if (mDataMover->pwrite64(curr,maxDMASize,offset) < 0) + return -1; +#endif + offset += maxDMASize; + curr += maxDMASize; + size -= maxDMASize; + } +#ifndef _WINDOWS +// TODO: Windows build support + if (mDataMover->pwrite64(curr,size,offset) < 0) + return -1; +#endif + return totalSize; + } + case XCL_ADDR_SPACE_DEVICE_PERFMON: + { + if (pcieBarWrite(PERFMON_BAR, offset, hostBuf, size) == 0) { + return size; + } + return -1; + } + case XCL_ADDR_KERNEL_CTRL: + { + if (mLogStream.is_open()) { + const unsigned *reg = static_cast(hostBuf); + size_t regSize = size / 4; + if (regSize > 32) + regSize = 32; + for (unsigned i = 0; i < regSize; i++) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << space << ", 0x" + << std::hex << offset + i << std::dec << ", 0x" << std::hex << reg[i] << std::dec << std::endl; + + } + } + if (pcieBarWrite(ACCELERATOR_BAR, offset, hostBuf, size) == 0) { + return size; + } + return -1; + } + default: + { + return -1; + } + } + } + + + size_t XDMAShim::xclReadSkipCopy(uint64_t offset, void *hostBuf, size_t size) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " + << offset << ", " << hostBuf << ", " << size << std::endl; + } + + const size_t mod_size = offset % DDR_BUFFER_ALIGNMENT; + // Need to do Read-Modify-Read +#ifndef _WINDOWS +// TODO: Windows build support +// alignas is defined in c++11 +#if GCC_VERSION >= 40800 + alignas(DDR_BUFFER_ALIGNMENT) char buffer[DDR_BUFFER_ALIGNMENT]; +#else + AlignedAllocator alignedBuffer(DDR_BUFFER_ALIGNMENT, DDR_BUFFER_ALIGNMENT); + char* buffer = alignedBuffer.getBuffer(); +#endif +#else + char buffer[DDR_BUFFER_ALIGNMENT]; +#endif + + // Read back one full aligned block starting from preceding aligned address + const uint64_t mod_offset = offset - mod_size; + if (xclRead(XCL_ADDR_SPACE_DEVICE_RAM, mod_offset, buffer, DDR_BUFFER_ALIGNMENT) != DDR_BUFFER_ALIGNMENT) + return -1; + + const size_t copy_size = (size + mod_size > DDR_BUFFER_ALIGNMENT) ? DDR_BUFFER_ALIGNMENT - mod_size : size; + + // Update the user buffer with partial read + std::memcpy(hostBuf, buffer + mod_size, copy_size); + + // Update the remainder of user buffer + if (size + mod_size > DDR_BUFFER_ALIGNMENT) { + const size_t read_size = xclRead(XCL_ADDR_SPACE_DEVICE_RAM, mod_offset + DDR_BUFFER_ALIGNMENT, + (char *)hostBuf + copy_size, size - copy_size); + if (read_size != (size - copy_size)) + return -1; + } + return size; + } + + size_t XDMAShim::xclRead(xclAddressSpace space, uint64_t offset, void *hostBuf, size_t size) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << space << ", " + << offset << ", " << hostBuf << ", " << size << std::endl; + } + + switch (space) { + case XCL_ADDR_SPACE_DEVICE_RAM: + { + const size_t mod_size1 = offset % DDR_BUFFER_ALIGNMENT; + const size_t mod_size2 = size % DDR_BUFFER_ALIGNMENT; + const size_t totalSize = size; + +// if(!mLocked) +// return -1; + + if (mod_size1) { + // Buffer not aligned at DDR_BUFFER_ALIGNMENT boundary, need to do Read-Skip-Copy + return xclReadSkipCopy(offset, hostBuf, size); + } + else if (mod_size2) { + // Buffer not a multiple of DDR_BUFFER_ALIGNMENT, read the initial block and + // then perform a Read-Skip-Copy for the remainder buffer + const size_t blockSize = size - mod_size2; + if (xclRead(space, offset, hostBuf, blockSize) != blockSize) + return -1; + offset += blockSize; + hostBuf = (char *)hostBuf + blockSize; + if (xclReadSkipCopy(offset, hostBuf, mod_size2) != mod_size2) + return -1; + return totalSize; + } + + char *curr = static_cast(hostBuf); + while (size > maxDMASize) { +#ifndef _WINDOWS +// TODO: Windows build support + if (mDataMover->pread64(curr,maxDMASize,offset) < 0) + return -1; +#endif + offset += maxDMASize; + curr += maxDMASize; + size -= maxDMASize; + } + +#ifndef _WINDOWS +// TODO: Windows build support + if (mDataMover->pread64(curr,size,offset) < 0) + return -1; +#endif + return totalSize; + } + case XCL_ADDR_SPACE_DEVICE_PERFMON: + { + if (pcieBarRead(PERFMON_BAR, offset, hostBuf, size) == 0) { + return size; + } + return -1; + } + case XCL_ADDR_KERNEL_CTRL: + { + int result = pcieBarRead(ACCELERATOR_BAR, offset, hostBuf, size); + if (mLogStream.is_open()) { + const unsigned *reg = static_cast(hostBuf); + size_t regSize = size / 4; + if (regSize > 4) + regSize = 4; + for (unsigned i = 0; i < regSize; i++) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << space << ", 0x" + << std::hex << offset + i << std::dec << ", 0x" << std::hex << reg[i] << std::dec << std::endl; + } + } + return !result ? size : 0; + } + default: + { + return -1; + } + } + } + + uint64_t XDMAShim::xclAllocDeviceBuffer(size_t size) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << size << std::endl; + } + + if (size == 0) + size = DDR_BUFFER_ALIGNMENT; + + uint64_t result = MemoryManager::mNull; + for (auto i : mDDRMemoryManager) { + result = i->alloc(size); + if (result != MemoryManager::mNull) + break; + } + return result; + } + + uint64_t XDMAShim::xclAllocDeviceBuffer2(size_t size, xclMemoryDomains domain, unsigned flags) + { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << size << ", " + << domain << ", " << flags << std::endl; + } + + if (domain != XCL_MEM_DEVICE_RAM) + return MemoryManager::mNull; + + if (size == 0) + size = DDR_BUFFER_ALIGNMENT; + + if (flags >= mDDRMemoryManager.size()) { + return MemoryManager::mNull; + } + return mDDRMemoryManager[flags]->alloc(size); + } + + void XDMAShim::xclFreeDeviceBuffer(uint64_t buf) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << buf << std::endl; + } + + uint64_t size = 0; + for (auto i : mDDRMemoryManager) { + size += i->size(); + if (buf < size) { + i->free(buf); + } + } + } + + + size_t XDMAShim::xclCopyBufferHost2Device(uint64_t dest, const void *src, size_t size, size_t seek) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << dest << ", " + << src << ", " << size << ", " << seek << std::endl; + } + +#ifdef DEBUG + { + // Ensure that this buffer was allocated by memory manager before + const uint64_t v = MemoryManager::mNull; + std::pair buf = std::make_pair(v, v); + uint64_t high = 0; + for (auto i : mDDRMemoryManager) { + high += i->size(); + if (dest < high) { + buf = i->lookup(dest); + break; + } + } + if (MemoryManager::isNullAlloc(buf)) + return -1; + + if (buf.second < (size + seek)) + return -1; + } +#endif + dest += seek; + return xclWrite(XCL_ADDR_SPACE_DEVICE_RAM, dest, src, size); + } + + + size_t XDMAShim::xclCopyBufferDevice2Host(void *dest, uint64_t src, size_t size, size_t skip) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << dest << ", " + << src << ", " << size << ", " << skip << std::endl; + } + + +#ifdef DEBUG + { + // Ensure that this buffer was allocated by memory manager before + const uint64_t v = MemoryManager::mNull; + std::pair buf = std::make_pair(v, v); + uint64_t high = 0; + for (auto i : mDDRMemoryManager) { + high += i->size(); + if (src < high) { + buf = i->lookup(src); + break; + } + } + if (MemoryManager::isNullAlloc(buf)) + return -1; + + if (buf.second < (size + skip)) + return -1; + } +#endif + src += skip; + return xclRead(XCL_ADDR_SPACE_DEVICE_RAM, src, dest, size); + } + + + XDMAShim *XDMAShim::handleCheck(void *handle) { + // Sanity checks + if (!handle) + return 0; + if (*(unsigned *)handle != TAG) + return 0; + if (!((XDMAShim *)handle)->isGood()) { + return 0; + } + + return (XDMAShim *)handle; + } + + unsigned XDMAShim::xclProbe() { + char file_name_buf[128]; + unsigned i = 0; + for (i = 0; i < 64; i++) { + std::sprintf((char *)&file_name_buf, "/dev/xcldma/xcldma%d_user", i); +#ifndef _WINDOWS +// TODO: Windows build support +// open, close is defined in unistd.h + int fd = open(file_name_buf, O_RDWR); + if (fd < 0) { + return i; + } + close(fd); +#endif + } + return i; + } + + void XDMAShim::initMemoryManager() + { + if (!mDeviceInfo.mDDRBankCount) + return; + const uint64_t bankSize = mDeviceInfo.mDDRSize / mDeviceInfo.mDDRBankCount; + uint64_t start = 0; + for (unsigned i = 0; i < mDeviceInfo.mDDRBankCount; i++) { + mDDRMemoryManager.push_back(new MemoryManager(bankSize, start, DDR_BUFFER_ALIGNMENT)); + start += bankSize; + } + } + + XDMAShim::~XDMAShim() + { +#ifndef _WINDOWS +// TODO: Windows build support +// munmap is defined in sys/mman.h +// close is defined in unistd.h + if (mUserMap != MAP_FAILED) { + munmap(mUserMap, MMAP_SIZE_USER); + } + if (mUserHandle > 0) { + close(mUserHandle); + } + + delete mDataMover; +#endif + for (auto i : mDDRMemoryManager) { + delete i; + } + + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << std::endl; + mLogStream.close(); + } + } + + XDMAShim::XDMAShim(unsigned index, const char *logfileName, + xclVerbosityLevel verbosity) : mTag(TAG), mBoardNumber(index), + maxDMASize(0xfa0000), + mLocked(false), + mOffsets{0x0, 0x0, 0x0, 0x0}, + mOclRegionProfilingNumberSlots(XPAR_AXI_PERF_MON_2_NUMBER_SLOTS) + { + mDataMover = new DataMover(mBoardNumber, 1 /* 1 channel each dir */); + char file_name_buf[128]; + std::sprintf((char *)&file_name_buf, "/dev/xcldma/xcldma%d_user", mBoardNumber); + mUserHandle = open(file_name_buf, O_RDWR | O_SYNC); + + mUserMap = (char *)mmap(0, MMAP_SIZE_USER, PROT_READ | PROT_WRITE, MAP_SHARED, mUserHandle, 0); + if (mUserMap == MAP_FAILED) { + close(mUserHandle); + mUserHandle = -1; + } + + if (logfileName && (logfileName[0] != '\0')) { + mLogStream.open(logfileName); + mLogStream << "FUNCTION, THREAD ID, ARG..." << std::endl; + mLogStream << __func__ << ", " << std::this_thread::get_id() << std::endl; + } + + // First try the new info2 method and if that fails fall back to legacy info + if (xclGetDeviceInfo2(&mDeviceInfo)) { + xclDeviceInfo oldInfo; + if (xclGetDeviceInfo(&oldInfo)) { + close(mUserHandle); + mUserHandle = -1; + } + else { + mDeviceInfo = to_info2(oldInfo); + } + } + initMemoryManager(); + } + + bool XDMAShim::isGood() const { + if (!mDataMover) + return false; + if (mUserHandle < 0) + return false; + return mDataMover->isGood(); + // TODO: Add sanity check for card state + } + + + int XDMAShim::pcieBarRead(int bar_num, unsigned long long offset, void* buffer, unsigned long long length) { + const char *mem = 0; + switch (bar_num) { + case 0: + { + if ((length + offset) > MMAP_SIZE_USER) { + return -1; + } + mem = mUserMap; + break; + } + default: + { + return -1; + } + } + + char *qBuf = (char *)buffer; + while (length >= 4) { + *(unsigned *)qBuf = *(unsigned *)(mem + offset); + offset += 4; + qBuf += 4; + length -= 4; + } + while (length) { + *qBuf = *(mem + offset); + offset++; + qBuf++; + length--; + } + +// std::memcpy(buffer, mem + offset, length); + return 0; + } + + int XDMAShim::pcieBarWrite(int bar_num, unsigned long long offset, const void* buffer, unsigned long long length) { + char *mem = 0; + switch (bar_num) { + case 0: + { + if ((length + offset) > MMAP_SIZE_USER) { + return -1; + } + mem = mUserMap; + break; + } + default: + { + return -1; + } + } + + char *qBuf = (char *)buffer; + while (length >= 4) { + *(unsigned *)(mem + offset) = *(unsigned *)qBuf; + offset += 4; + qBuf += 4; + length -= 4; + } + while (length) { + *(mem + offset) = *qBuf; + offset++; + qBuf++; + length--; + } + +// std::memcpy(mem + offset, buffer, length); + return 0; + } + + bool XDMAShim::zeroOutDDR() + { + // Zero out the DDR so MIG ECC believes we have touched all the bits + // and it does not complain when we try to read back without explicit + // write. The latter usually happens as a result of read-modify-write + // TODO: Try and speed this up. + // [1] Possibly move to kernel mode driver. + // [2] Zero out specific buffers when they are allocated + static const unsigned long long BLOCK_SIZE = 0x4000000; + void *buf = 0; + if (posix_memalign(&buf, DDR_BUFFER_ALIGNMENT, BLOCK_SIZE)) + return false; + memset(buf, 0, BLOCK_SIZE); + mDataMover->pset64(buf, BLOCK_SIZE, 0, mDeviceInfo.mDDRSize/BLOCK_SIZE); + free(buf); + return true; + } + + bool XDMAShim::xclLockDevice() + { + if (mDataMover->lock() == false) + return false; + + if (flock(mUserHandle, LOCK_EX | LOCK_NB) == -1) { + mDataMover->unlock(); + return false; + } + mLocked = true; + + return zeroOutDDR(); + } + + std::string XDMAShim::getDSAName(unsigned short deviceId, unsigned short subsystemId) + { + std::string dsa("xilinx:?:?:?"); + const unsigned dsaNum = (deviceId << 16) | subsystemId; + switch(dsaNum) + { + case 0x71380121: + dsa = "xilinx:adm-pcie-7v3:1ddr" OSTAG ":2.1"; + break; + case 0x71380122: + dsa = "xilinx:adm-pcie-7v3:1ddr" OSTAG ":2.2"; + break; + case 0x71380123: + dsa = "xilinx:adm-pcie-7v3:1ddr" OSTAG ":2.3"; + break; + case 0x71380130: + dsa = "xilinx:adm-pcie-7v3:1ddr" OSTAG ":3.0"; + break; + case 0x71380131: + dsa = "xilinx:adm-pcie-7v3:1ddr" OSTAG ":3.1"; + break; + case 0x71380132: + dsa = "xilinx:adm-pcie-7v3:1ddr" OSTAG ":3.2"; + break; + case 0x71380221: + dsa = "xilinx:adm-pcie-7v3:2ddr" OSTAG ":2.1"; + break; + case 0x81380121: + dsa = "xilinx:adm-pcie-ku3:1ddr" OSTAG ":2.1"; + break; + case 0x81380122: + dsa = "xilinx:adm-pcie-ku3:1ddr" OSTAG ":2.2"; + break; + case 0x81380130: + dsa = "xilinx:adm-pcie-ku3:1ddr" OSTAG ":3.0"; + break; + case 0x81380221: + dsa = "xilinx:adm-pcie-ku3:2ddr" OSTAG ":2.1"; + break; + case 0x81380222: + dsa = "xilinx:adm-pcie-ku3:2ddr" OSTAG ":2.2"; + break; + case 0x81380230: + dsa = "xilinx:adm-pcie-ku3:2ddr" OSTAG ":3.0"; + break; + case 0x81380231: + dsa = "xilinx:adm-pcie-ku3:2ddr" OSTAG ":3.1"; + break; + case 0x81380232: + dsa = "xilinx:adm-pcie-ku3:2ddr" OSTAG ":3.2"; + break; + case 0x81381231: + dsa = "xilinx:adm-pcie-ku3:2ddr-40g:3.1"; + break; + case 0x81381232: + dsa = "xilinx:adm-pcie-ku3:2ddr-40g:3.2"; + break; + case 0x81388221: + dsa = "xilinx:adm-pcie-ku3:tandem-2ddr:2.1"; + break; + case 0x81388222: + dsa = "xilinx:adm-pcie-ku3:tandem-2ddr:2.2"; + break; + case 0x81388230: + dsa = "xilinx:adm-pcie-ku3:tandem-2ddr:3.0"; + break; + case 0x81384221: + dsa = "xilinx:adm-pcie-ku3:exp-pr-2ddr:2.1"; + break; + case 0x81384222: + dsa = "xilinx:adm-pcie-ku3:2ddr-xpr:2.2"; + break; + case 0x81384230: + dsa = "xilinx:adm-pcie-ku3:2ddr-xpr:3.0"; + break; + case 0x81384231: + dsa = "xilinx:adm-pcie-ku3:2ddr-xpr:3.1"; + break; + case 0x81384232: + dsa = "xilinx:adm-pcie-ku3:2ddr-xpr:3.2"; + break; + case 0x82380222: + dsa = "xilinx:tul-pcie3-ku115:2ddr:2.2"; + break; + case 0x82380230: + dsa = "xilinx:tul-pcie3-ku115:2ddr:3.0"; + break; + case 0x82380231: + dsa = "xilinx:tul-pcie3-ku115:2ddr:3.1"; + break; + case 0x82380232: + dsa = "xilinx:tul-pcie3-ku115:2ddr:3.2"; + break; + case 0x82384422: + dsa = "xilinx:tul-pcie3-ku115:4ddr-xpr:2.2"; + break; + case 0x82384430: + dsa = "xilinx:tul-pcie3-ku115:4ddr-xpr:3.0"; + break; + case 0x82384431: + dsa = "xilinx:tul-pcie3-ku115:4ddr-xpr:3.1"; + break; + case 0x82384432: + dsa = "xilinx:xil-accel-rd-ku115:4ddr-xpr:3.2"; + break; + case 0x83384431: + dsa = "xilinx:tul-pcie3-vu095:4ddr-xpr:3.1"; + break; + case 0x83384432: + dsa = "xilinx:tul-pcie3-vu095:4ddr-xpr:3.2"; + break; + case 0x84380231: + dsa = "xilinx:adm-pcie-8k5:2ddr:3.1"; + break; + case 0x84380232: + dsa = "xilinx:adm-pcie-8k5:2ddr:3.2"; + break; + case 0x923F4232: + dsa = "xilinx:minotaur-pcie-vu9p:2ddr-xpr:3.2"; + break; + case 0x923F4432: + dsa = "xilinx:minotaur-pcie-vu9p:4ddr-xpr:3.2"; + break; + + default: + break; + } + return dsa; + } + + int XDMAShim::xclGetDeviceInfo2(xclDeviceInfo2 *info) + { + std::memset(info, 0, sizeof(xclDeviceInfo2)); + info->mMagic = 0X586C0C6C; + info->mHALMajorVersion = XCLHAL_MAJOR_VER; + info->mHALMajorVersion = XCLHAL_MINOR_VER; + info->mMinTransferSize = DDR_BUFFER_ALIGNMENT; + info->mDMAThreads = mDataMover->channelCount(); +#ifndef _WINDOWS +// TODO: Windows build support +// XDMA_IOCINFO depends on _IOW, which is defined indirectly by +// ioctl is defined in sys/ioctl.h + xdma_ioc_info2 obj = {{0X586C0C6C, XDMA_IOCINFO2}}; + int ret = ioctl(mUserHandle, XDMA_IOCINFO2, &obj); + if (ret) + return ret; + info->mVendorId = obj.vendor; + info->mDeviceId = obj.device; + info->mSubsystemId = obj.subsystem_device; + info->mSubsystemVendorId = obj.subsystem_vendor; + info->mDeviceVersion = obj.subsystem_device & 0x00ff; +#endif + // TUL cards (0x8238) have 4 GB / bank; other cards have 8 GB memory / bank + info->mDDRSize = (info->mDeviceId == 0x8238) ? 0x100000000 : 0x200000000; + info->mDataAlignment = DDR_BUFFER_ALIGNMENT; + info->mNumClocks = obj.num_clocks; + for (int i = 0; i < obj.num_clocks; ++i) { + info->mOCLFrequency[i] = obj.ocl_frequency[i]; + } + info->mPCIeLinkWidth = obj.pcie_link_width; + info->mPCIeLinkSpeed = obj.pcie_link_speed; + info->mDDRBankCount = info->mSubsystemId & 0x0f00; + info->mDDRBankCount >>= 8; + if (info->mDDRBankCount == 0) + info->mDDRBankCount = 1; + + info->mDDRSize *= info->mDDRBankCount; + for (auto i : mDDRMemoryManager) { + info->mDDRFreeSize += i->freeSize(); + } + + const std::string deviceName = getDSAName(info->mDeviceId, info->mSubsystemId); + if (mLogStream.is_open()) + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << deviceName << std::endl; + + std::size_t length = deviceName.copy(info->mName, deviceName.length(),0); + info->mName[length] = '\0'; + + if (mLogStream.is_open()) { + mLogStream << __func__ << ": name=" << deviceName << ", version=0x" << std::hex << info->mDeviceVersion + << ", clock freq=" << std::dec << info->mOCLFrequency[0] + << ", clock freq 2=" << std::dec << info->mOCLFrequency[1] << std::endl; + } + + info->mOnChipTemp = obj.onchip_temp; + info->mFanTemp = obj.fan_temp; + info->mVInt = obj.vcc_int; + info->mVAux = obj.vcc_aux; + info->mVBram = obj.vcc_bram; + info->mMigCalib = obj.mig_calibration; + + return 0; + } + + int XDMAShim::xclGetDeviceInfo(xclDeviceInfo *info) + { + std::memset(info, 0, sizeof(xclDeviceInfo)); + info->mMagic = 0X586C0C6C; + info->mHALMajorVersion = XCLHAL_MAJOR_VER; + info->mHALMajorVersion = XCLHAL_MINOR_VER; + info->mMinTransferSize = DDR_BUFFER_ALIGNMENT; + info->mDMAThreads = mDataMover->channelCount(); +#ifndef _WINDOWS +// TODO: Windows build support +// XDMA_IOCINFO depends on _IOW, which is defined indirectly by +// ioctl is defined in sys/ioctl.h + xdma_ioc_info obj = {{0X586C0C6C, XDMA_IOCINFO}}; + int ret = ioctl(mUserHandle, XDMA_IOCINFO, &obj); + if (ret) + return ret; + info->mVendorId = obj.vendor; + info->mDeviceId = obj.device; + info->mSubsystemId = obj.subsystem_device; + info->mSubsystemVendorId = obj.subsystem_vendor; + info->mDeviceVersion = obj.subsystem_device & 0x00ff; +#endif + // TUL cards (0x8238) have 4 GB / bank; other cards have 8 GB memory / bank + info->mDDRSize = (info->mDeviceId == 0x8238) ? 0x100000000 : 0x200000000; + info->mDataAlignment = DDR_BUFFER_ALIGNMENT; + info->mOCLFrequency = obj.ocl_frequency; + info->mPCIeLinkWidth = obj.pcie_link_width; + info->mPCIeLinkSpeed = obj.pcie_link_speed; + info->mDDRBankCount = info->mSubsystemId & 0x0f00; + info->mDDRBankCount >>= 8; + if (info->mDDRBankCount == 0) + info->mDDRBankCount = 1; + + info->mDDRSize *= info->mDDRBankCount; + for (auto i : mDDRMemoryManager) { + info->mDDRFreeSize += i->freeSize(); + } + + const std::string deviceName = getDSAName(info->mDeviceId, info->mSubsystemId); + if (mLogStream.is_open()) + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << deviceName << std::endl; + + + std::size_t length = deviceName.copy(info->mName, deviceName.length(),0); + info->mName[length] = '\0'; + + if (mLogStream.is_open()) { + mLogStream << __func__ << ": name=" << deviceName << ", version=0x" << std::hex << info->mDeviceVersion + << ", clock freq=" << std::dec << info->mOCLFrequency << std::endl; + } + return 0; + } + + int XDMAShim::resetDevice(xclResetKind kind) { +#ifndef _WINDOWS +// TODO: Windows build support +// XDMA_IOCRESET depends on _IOW, which is defined indirectly by +// ioctl is defined in sys/ioctl.h + for (auto i : mDDRMemoryManager) { + i->reset(); + } + + // Call a new IOCTL to just reset the OCL region + if (kind == XCL_RESET_FULL) { + xdma_ioc_base obj = {0X586C0C6C, XDMA_IOCHOTRESET}; + return ioctl(mUserHandle, XDMA_IOCHOTRESET, &obj); + } + else if (kind == XCL_RESET_KERNEL) { + xdma_ioc_base obj = {0X586C0C6C, XDMA_IOCOCLRESET}; + return ioctl(mUserHandle, XDMA_IOCOCLRESET, &obj); + } + return -EINVAL; +#else + return 0; +#endif + } + + int XDMAShim::xclReClock(unsigned freqMHz) + { + xdma_ioc_freqscaling obj = {{0X586C0C6C, XDMA_IOCFREQSCALING}, freqMHz}; + return ioctl(mUserHandle, XDMA_IOCFREQSCALING, &obj); + } + + int XDMAShim::xclReClock2(unsigned short region, const unsigned short *targetFreqMHz) + { + xdma_ioc_freqscaling2 obj; + std::memset(&obj, 0, sizeof(xdma_ioc_freqscaling2)); + obj.base= {0X586C0C6C, XDMA_IOCFREQSCALING2}; + obj.ocl_region = region; + obj.ocl_target_freq[0] = targetFreqMHz[0]; + obj.ocl_target_freq[1] = targetFreqMHz[1]; + return ioctl(mUserHandle, XDMA_IOCFREQSCALING2, &obj); + } +} + + +xclDeviceHandle xclOpen(unsigned index, const char *logfileName, xclVerbosityLevel level) +{ + xclxdma::XDMAShim *handle = new xclxdma::XDMAShim(index, logfileName, level); + if (!xclxdma::XDMAShim::handleCheck(handle)) { + delete handle; + handle = 0; + } + + return (xclDeviceHandle *)handle; +} + +void xclClose(xclDeviceHandle handle) +{ + if (xclxdma::XDMAShim::handleCheck(handle)) { + delete ((xclxdma::XDMAShim *)handle); + } +} + + +int xclGetDeviceInfo(xclDeviceHandle handle, xclDeviceInfo *info) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclGetDeviceInfo(info); +} + +int xclGetDeviceInfo2(xclDeviceHandle handle, xclDeviceInfo2 *info) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclGetDeviceInfo2(info); +} + +int xclLoadBitstream(xclDeviceHandle handle, const char *xclBinFileName) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclLoadBitstream(xclBinFileName); +} + +int xclLoadXclBin(xclDeviceHandle handle, const xclBin *buffer) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclLoadXclBin(buffer); +} + +size_t xclWrite(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, const void *hostBuf, size_t size) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclWrite(space, offset, hostBuf, size); +} + +size_t xclRead(xclDeviceHandle handle, xclAddressSpace space, uint64_t offset, void *hostBuf, size_t size) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclRead(space, offset, hostBuf, size); +} + + +uint64_t xclAllocDeviceBuffer(xclDeviceHandle handle, size_t size) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclAllocDeviceBuffer(size); +} + + +uint64_t xclAllocDeviceBuffer2(xclDeviceHandle handle, size_t size, xclMemoryDomains domain, + unsigned flags) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclAllocDeviceBuffer2(size, domain, flags); +} + + +void xclFreeDeviceBuffer(xclDeviceHandle handle, uint64_t buf) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return; + return drv->xclFreeDeviceBuffer(buf); +} + + +size_t xclCopyBufferHost2Device(xclDeviceHandle handle, uint64_t dest, const void *src, size_t size, size_t seek) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclCopyBufferHost2Device(dest, src, size, seek); +} + + +size_t xclCopyBufferDevice2Host(xclDeviceHandle handle, void *dest, uint64_t src, size_t size, size_t skip) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclCopyBufferDevice2Host(dest, src, size, skip); +} + + +//This will be deprecated. +int xclUpgradeFirmware(xclDeviceHandle handle, const char *fileName) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclUpgradeFirmware(fileName); +} + +int xclUpgradeFirmware2(xclDeviceHandle handle, const char *fileName1, const char* fileName2) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + + if(!fileName2 || std::strlen(fileName2) == 0) + return drv->xclUpgradeFirmware(fileName1); + else + return drv->xclUpgradeFirmware2(fileName1, fileName2); +} + +int xclUpgradeFirmwareXSpi(xclDeviceHandle handle, const char *fileName, int index) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclUpgradeFirmwareXSpi(fileName, index); +} + +int xclTestXSpi(xclDeviceHandle handle, int index) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclTestXSpi(index); +} + +int xclBootFPGA(xclDeviceHandle handle) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclBootFPGA(); +} + +unsigned xclProbe() +{ + return xclxdma::XDMAShim::xclProbe(); +} + + +int xclResetDevice(xclDeviceHandle handle, xclResetKind kind) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->resetDevice(kind); +} + +int xclReClock(xclDeviceHandle handle, unsigned targetFreqMHz) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclReClock(targetFreqMHz); +} + + +int xclReClock2(xclDeviceHandle handle, unsigned short region, const unsigned short *targetFreqMHz) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclReClock2(region, targetFreqMHz); +} + + +int xclLockDevice(xclDeviceHandle handle) +{ + xclxdma::XDMAShim *drv = xclxdma::XDMAShim::handleCheck(handle); + if (!drv) + return -1; + return drv->xclLockDevice() ? 0 : -1; +} + +// XSIP watermark, do not delete 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/SDAccel/HAL/driver/xcldma/user/shim.h b/sdk/SDAccel/HAL/driver/xcldma/user/shim.h new file mode 100644 index 000000000..ae3f820d7 --- /dev/null +++ b/sdk/SDAccel/HAL/driver/xcldma/user/shim.h @@ -0,0 +1,256 @@ +#ifndef _XDMA_SHIM_H_ +#define _XDMA_SHIM_H_ + +/** + * Copyright (C) 2015-2016 Xilinx, Inc + * Author: Sonal Santan + * XDMA HAL Driver layered on top of XDMA kernel driver + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#include "driver/include/xclhal.h" +#include "driver/include/xclperf.h" +#include "driver/xcldma/include/xbar_sys_parameters.h" + +#include +#include +#include +#include +#include + +// Work around GCC 4.8 + XDMA BAR implementation bugs +// With -O3 PCIe BAR read/write are not reliable hence force -O2 as max +// optimization level for pcieBarRead() and pcieBarWrite() +#if defined(__GNUC__) && defined(NDEBUG) +#define SHIM_O2 __attribute__ ((optimize("-O2"))) +#else +#define SHIM_O2 +#endif + +namespace xclxdma { + // Memory alignment for DDR and AXI-MM trace access + template class AlignedAllocator { + void *mBuffer; + size_t mCount; + public: + T *getBuffer() { + return (T *)mBuffer; + } + + size_t size() const { + return mCount * sizeof(T); + } + + AlignedAllocator(size_t alignment, size_t count) : mBuffer(0), mCount(count) { + if (posix_memalign(&mBuffer, alignment, count * sizeof(T))) { + mBuffer = 0; + } + } + ~AlignedAllocator() { + if (mBuffer) + free(mBuffer); + } + }; + + class MemoryManager; + class DataMover; + // XDMA Shim + class XDMAShim { + + struct ELARecord { + unsigned mStartAddress; + unsigned mEndAddress; + unsigned mDataCount; + + std::streampos mDataPos; + ELARecord() : mStartAddress(0), mEndAddress(0), + mDataCount(0), mDataPos(0) {} + }; + + typedef std::list ELARecordList; + + typedef std::list > PairList; + + public: + + // Bitstreams + int xclLoadBitstream(const char *fileName); + int xclLoadXclBin(const xclBin *buffer); + int xclUpgradeFirmware(const char *fileName); + int xclUpgradeFirmware2(const char *file1, const char* file2); + int xclUpgradeFirmwareXSpi(const char *fileName, int device_index=0); + int xclTestXSpi(int device_index); + int xclBootFPGA(); + int resetDevice(xclResetKind kind); + int xclReClock(unsigned targetFreqMHz); + int xclReClock2(unsigned short region, const unsigned short *targetFreqMHz); + + // Raw read/write + size_t xclWrite(xclAddressSpace space, uint64_t offset, const void *hostBuf, size_t size); + size_t xclRead(xclAddressSpace space, uint64_t offset, void *hostBuf, size_t size); + + // Buffer management + uint64_t xclAllocDeviceBuffer(size_t size); + uint64_t xclAllocDeviceBuffer2(size_t size, xclMemoryDomains domain, unsigned flags); + void xclFreeDeviceBuffer(uint64_t buf); + size_t xclCopyBufferHost2Device(uint64_t dest, const void *src, size_t size, size_t seek); + size_t xclCopyBufferDevice2Host(void *dest, uint64_t src, size_t size, size_t skip); + + // Performance monitoring + // Control + double xclGetDeviceClockFreqMHz(); + double xclGetReadMaxBandwidthMBps(); + double xclGetWriteMaxBandwidthMBps(); + void xclSetOclRegionProfilingNumberSlots(uint32_t numSlots); + size_t xclPerfMonClockTraining(xclPerfMonType type); + // Counters + size_t xclPerfMonStartCounters(xclPerfMonType type); + size_t xclPerfMonStopCounters(xclPerfMonType type); + size_t xclPerfMonReadCounters(xclPerfMonType type, xclCounterResults& counterResults); + // Trace + size_t xclPerfMonStartTrace(xclPerfMonType type, uint32_t startTrigger); + size_t xclPerfMonStopTrace(xclPerfMonType type); + uint32_t xclPerfMonGetTraceCount(xclPerfMonType type); + size_t xclPerfMonReadTrace(xclPerfMonType type, xclTraceResultsVector& traceVector); + + // Sanity checks + int xclGetDeviceInfo(xclDeviceInfo *info); + int xclGetDeviceInfo2(xclDeviceInfo2 *info); + static XDMAShim *handleCheck(void *handle); + static unsigned xclProbe(); + bool xclLockDevice(); + unsigned getTAG() const { + return mTag; + } + bool isGood() const; + + ~XDMAShim(); + XDMAShim(unsigned index, const char *logfileName, xclVerbosityLevel verbosity); + + private: + + size_t xclReadModifyWrite(uint64_t offset, const void *hostBuf, size_t size); + size_t xclReadSkipCopy(uint64_t offset, void *hostBuf, size_t size); + bool zeroOutDDR(); + + bool isXPR() const { + return ((mDeviceInfo.mSubsystemId >> 12) == 4); + } + + bool isMultipleOCLClockSupported() { + unsigned dsaNum = ((mDeviceInfo.mDeviceId << 16) | mDeviceInfo.mSubsystemId); + // 0x82384431 : TUL KU115 4ddr 3.1 DSA + return ((dsaNum == 0x82384431) || (dsaNum == 0x82384432))? true : false; + } + + bool isUltraScale() const { + return (mDeviceInfo.mDeviceId & 0x8000); + } + void initMemoryManager(); + + // Core DMA code + SHIM_O2 int pcieBarRead(int bar_num, unsigned long long offset, void* buffer, unsigned long long length); + SHIM_O2 int pcieBarWrite(int bar_num, unsigned long long offset, const void* buffer, unsigned long long length); + int freezeAXIGate(); + int freeAXIGate(); + + // PROM flashing + int prepare(unsigned startAddress, unsigned endAddress); + int program(std::ifstream& mcsStream, const ELARecord& record); + int program(std::ifstream& mcsStream); + int waitForReady(unsigned code, bool verbose = true); + int waitAndFinish(unsigned code, unsigned data, bool verbose = true); + + //XSpi flashing. + bool prepareXSpi(); + int programXSpi(std::ifstream& mcsStream, const ELARecord& record); + int programXSpi(std::ifstream& mcsStream); + bool waitTxEmpty(); + bool isFlashReady(); + //bool windDownWrites(); + bool bulkErase(); + bool sectorErase(unsigned Addr); + bool writeEnable(); +#if 0 + bool dataTransfer(bool read); +#endif + bool readPage(unsigned addr, uint8_t readCmd = 0xff); + bool writePage(unsigned addr, uint8_t writeCmd = 0xff); + unsigned readReg(unsigned offset); + int writeReg(unsigned regOffset, unsigned value); + bool finalTransfer(uint8_t *sendBufPtr, uint8_t *recvBufPtr, int byteCount); + bool getFlashId(); + //All remaining read /write register commands can be issued through this function. + bool readRegister(unsigned commandCode, unsigned bytes); + bool writeRegister(unsigned commandCode, unsigned value, unsigned bytes); + bool select4ByteAddressMode(); + bool deSelect4ByteAddressMode(); + + + // Performance monitoring helper functions + bool isDSAVersion(unsigned majorVersion, unsigned minorVersion, bool onlyThisVersion); + unsigned getBankCount(); + uint64_t getHostTraceTimeNsec(); + uint64_t getPerfMonBaseAddress(xclPerfMonType type); + uint64_t getPerfMonFifoBaseAddress(xclPerfMonType type, uint32_t fifonum); + uint64_t getPerfMonFifoReadBaseAddress(xclPerfMonType type, uint32_t fifonum); + uint32_t getPerfMonNumberSlots(xclPerfMonType type); + uint32_t getPerfMonNumberSamples(xclPerfMonType type); + uint32_t getPerfMonNumberFifos(xclPerfMonType type); + uint32_t getPerfMonByteScaleFactor(xclPerfMonType type); + uint8_t getPerfMonShowIDS(xclPerfMonType type); + uint8_t getPerfMonShowLEN(xclPerfMonType type); + uint32_t getPerfMonSlotStartBit(xclPerfMonType type, uint32_t slotnum); + uint32_t getPerfMonSlotDataWidth(xclPerfMonType type, uint32_t slotnum); + size_t resetFifos(xclPerfMonType type); + uint32_t bin2dec(std::string str, int start, int number); + uint32_t bin2dec(const char * str, int start, int number); + std::string dec2bin(uint32_t n); + std::string dec2bin(uint32_t n, unsigned bits); + static std::string getDSAName(unsigned short deviceId, unsigned short subsystemId); + + private: + // This is a hidden signature of this class and helps in preventing + // user errors when incorrect pointers are passed in as handles. + const unsigned mTag; + const int mBoardNumber; + const size_t maxDMASize; + bool mLocked; + +#ifndef _WINDOWS +// TODO: Windows build support + // mOffsets doesn't seem to be used + // and it caused window compilation error when we try to initialize it + const uint64_t mOffsets[XCL_ADDR_SPACE_MAX]; +#endif + DataMover *mDataMover; + int mUserHandle; + uint32_t mOclRegionProfilingNumberSlots; + + char *mUserMap; + std::ofstream mLogStream; + xclVerbosityLevel mVerbosity; + std::string mBinfile; + ELARecordList mRecordList; + std::vector mDDRMemoryManager; + xclDeviceInfo2 mDeviceInfo; + + public: + static const unsigned TAG; + }; +} + +#endif + +// XSIP watermark, do not delete 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 diff --git a/sdk/SDAccel/HAL/driver/xcldma/user/xspi.cpp b/sdk/SDAccel/HAL/driver/xcldma/user/xspi.cpp new file mode 100755 index 000000000..158050026 --- /dev/null +++ b/sdk/SDAccel/HAL/driver/xcldma/user/xspi.cpp @@ -0,0 +1,1531 @@ +/* + * Copyright (C) 2016 Xilinx, Inc + * Author(s) : Sonal Santan + * : Hem Neema + * + * Licensed under the Apache License, Version 2.0 (the "License"). You may + * not use this file except in compliance with the License. A copy of the + * License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "shim.h" +#include "driver/xcldma/include/xdma-ioctl.h" + +#ifdef WINDOWS +#define __func__ __FUNCTION__ +#endif + +#define FLASH_BASE_ADDRESS BPI_FLASH_OFFSET +#define PAGE_SIZE 256 +static const bool FOUR_BYTE_ADDRESSING = false; + +//testing sizes. +#define WRITE_DATA_SIZE 128 +#define READ_DATA_SIZE 128 + + +#define COMMAND_PAGE_PROGRAM 0x02 /* Page Program command */ +#define COMMAND_QUAD_WRITE 0x32 /* Quad Input Fast Program */ +#define COMMAND_EXT_QUAD_WRITE 0x38 /* Extended quad input fast program */ +#define COMMAND_SECTOR_ERASE 0xD8 /* Sector Erase command */ +#define COMMAND_BULK_ERASE 0xC7 /* Bulk Erase command */ +#define COMMAND_RANDOM_READ 0x03 /* Random read command */ +#define COMMAND_DUAL_READ 0x3B /* Dual Output Fast Read */ +#define COMMAND_DUAL_IO_READ 0xBB /* Dual IO Fast Read */ +#define COMMAND_QUAD_READ 0x6B /* Quad Output Fast Read */ +#define COMMAND_QUAD_IO_READ 0xEB /* Quad IO Fast Read */ +#define COMMAND_IDCODE_READ 0x9F /* Read ID Code */ +//read commands +#define COMMAND_STATUSREG_READ 0x05 /* Status read command */ +#define COMMAND_FLAG_STATUSREG_READ 0x70 /* Status flag read command */ +#define COMMAND_NON_VOLATILE_CFGREG_READ 0xB5 /* Non volatile configuration register read command */ +#define COMMAND_VOLATILE_CFGREG_READ 0x85 /* Volatile configuration register read command */ +#define COMMAND_ENH_VOLATILE_CFGREG_READ 0x65 /* Enhanced volatile configuration register read command */ +#define COMMAND_EXTENDED_ADDRESS_REG_READ 0xC8 /* Enhanced volatile configuration register read command */ +//write commands +#define COMMAND_STATUSREG_WRITE 0x01 /* Status read command */ +#define COMMAND_NON_VOLATILE_CFGREG_WRITE 0xB1 /* Non volatile configuration register read command */ +#define COMMAND_VOLATILE_CFGREG_WRITE 0x81 /* Volatile configuration register read command */ +#define COMMAND_ENH_VOLATILE_CFGREG_WRITE 0x61 /* Enhanced volatile configuration register read command */ +#define COMMAND_EXTENDED_ADDRESS_REG_WRITE 0xC5 /* Enhanced volatile configuration register read command */ + +#define COMMAND_CLEAR_FLAG_REGISTER 0x50 /* Clear flag register */ + +//4-byte addressing +#define ENTER_FOUR_BYTE_ADDR_MODE 0xB7 /* enter 4-byte address mode */ +#define EXIT_FOUR_BYTE_ADDR_MODE 0xE9 /* exit 4-byte address mode */ +#define FOUR_BYTE_READ 0x13 /* 4-byte read */ +#define FOUR_BYTE_FAST_READ 0x0C /* 4-byte fast read */ +#define FOUR_BYTE_DUAL_OUTPUT_FAST_READ 0x3C /* 4-byte dual output fast read */ +#define FOUR_BYTE_DUAL_IO_FAST_READ 0xBC /* 4-byte dual Input/output fast read */ +#define FOUR_BYTE_QUAD_OUTPUT_FAST_READ 0x6C /* 4-byte quad output fast read */ +#define FOUR_BYTE_QUAD_IO_FAST_READ 0xEC /* 4-byte quad output fast read */ +#define FOUR_BYTE_PAGE_PROGRAM 0x12 /* 4-byte page program */ +#define FOUR_BYTE_QUAD_INPUT_FAST_PROGRAM 0x34 /* 4-byte quad input fast program */ +#define FOUR_BYTE_QUAD_INPUT_EXT_FAST_PROGRAM 0x3E /* 4-byte quad input extended fast program */ +#define FOUR_BYTE_SECTOR_ERASE 0xDC /* 4-byte sector erase */ + +static const unsigned READ_WRITE_EXTRA_BYTES = FOUR_BYTE_ADDRESSING ? 5 :4; +static const unsigned SECTOR_ERASE_BYTES = FOUR_BYTE_ADDRESSING ? 5 :4; + + +#define IDCODE_READ_BYTES 5 + +#define DUAL_READ_DUMMY_BYTES 2 +#define QUAD_READ_DUMMY_BYTES 4 +#define DUAL_IO_READ_DUMMY_BYTES 2 +#define QUAD_IO_READ_DUMMY_BYTES 5 + +//#define READ_WRITE_EXTRA_BYTES 4 /* Read/Write extra bytes */ +//#define SECTOR_ERASE_BYTES 4 /* Sector erase extra bytes */ +#define WRITE_ENABLE_BYTES 1 /* Write Enable bytes */ +#define BULK_ERASE_BYTES 1 /* Bulk erase extra bytes */ +#define STATUS_READ_BYTES 2 /* Status read bytes count */ +#define STATUS_WRITE_BYTES 2 /* Status write bytes count */ + + + +#define NUM_SLAVES 2 +#define SLAVE_SELECT_MASK ((1 << NUM_SLAVES) -1) +/* + * Flash not busy mask in the status register of the flash device. + */ +#define FLASH_SR_IS_READY_MASK 0x01 /* Ready mask */ +#define COMMAND_WRITE_ENABLE 0x06 /* Write Enable command */ + +//SPI control reg masks. +#define XSP_CR_LOOPBACK_MASK 0x00000001 /**< Local loopback mode */ +#define XSP_CR_ENABLE_MASK 0x00000002 /**< System enable */ +#define XSP_CR_MASTER_MODE_MASK 0x00000004 /**< Enable master mode */ +#define XSP_CR_CLK_POLARITY_MASK 0x00000008 /**< Clock polarity high + or low */ +#define XSP_CR_CLK_PHASE_MASK 0x00000010 /**< Clock phase 0 or 1 */ +#define XSP_CR_TXFIFO_RESET_MASK 0x00000020 /**< Reset transmit FIFO */ +#define XSP_CR_RXFIFO_RESET_MASK 0x00000040 /**< Reset receive FIFO */ +#define XSP_CR_MANUAL_SS_MASK 0x00000080 /**< Manual slave select + assert */ +#define XSP_CR_TRANS_INHIBIT_MASK 0x00000100 /**< Master transaction + inhibit */ + +/** + * LSB/MSB first data format select. The default data format is MSB first. + * The LSB first data format is not available in all versions of the Xilinx Spi + * Device whereas the MSB first data format is supported by all the versions of + * the Xilinx Spi Devices. Please check the HW specification to see if this + * feature is supported or not. + */ +#define XSP_CR_LSB_MSB_FIRST_MASK 0x00000200 + +//End SPI CR masks + +//SPI status reg masks +#define XSP_SR_RX_EMPTY_MASK 0x00000001 /**< Receive Reg/FIFO is empty */ +#define XSP_SR_RX_FULL_MASK 0x00000002 /**< Receive Reg/FIFO is full */ +#define XSP_SR_TX_EMPTY_MASK 0x00000004 /**< Transmit Reg/FIFO is empty */ +#define XSP_SR_TX_FULL_MASK 0x00000008 /**< Transmit Reg/FIFO is full */ +#define XSP_SR_MODE_FAULT_MASK 0x00000010 /**< Mode fault error */ +#define XSP_SR_SLAVE_MODE_MASK 0x00000020 /**< Slave mode select */ + +/* + * The following bits are available only in axi_qspi Status register. + */ +#define XSP_SR_CPOL_CPHA_ERR_MASK 0x00000040 /**< CPOL/CPHA error */ +#define XSP_SR_SLAVE_MODE_ERR_MASK 0x00000080 /**< Slave mode error */ +#define XSP_SR_MSB_ERR_MASK 0x00000100 /**< MSB Error */ +#define XSP_SR_LOOP_BACK_ERR_MASK 0x00000200 /**< Loop back error */ +#define XSP_SR_CMD_ERR_MASK 0x00000400 /**< 'Invalid cmd' error */ + + +//End SPI SR masks + +#define XSP_SRR_OFFSET 0x40 /**< Software Reset register */ +#define XSP_CR_OFFSET 0x60 /**< Control register */ +#define XSP_SR_OFFSET 0x64 /**< Status Register */ +#define XSP_DTR_OFFSET 0x68 /**< Data transmit */ +#define XSP_DRR_OFFSET 0x6C /**< Data receive */ +#define XSP_SSR_OFFSET 0x70 /**< 32-bit slave select */ +#define XSP_TFO_OFFSET 0x74 /**< Tx FIFO occupancy */ +#define XSP_RFO_OFFSET 0x78 /**< Rx FIFO occupancy */ + +#define BYTE1 0 /* Byte 1 position */ +#define BYTE2 1 /* Byte 2 position */ +#define BYTE3 2 /* Byte 3 position */ +#define BYTE4 3 /* Byte 4 position */ +#define BYTE5 4 /* Byte 5 position */ +#define BYTE6 5 /* Byte 6 position */ +#define BYTE7 6 /* Byte 7 position */ +#define BYTE8 7 /* Byte 8 position */ + +/** + * SPI Software Reset Register (SRR) mask. + */ +#define XSP_SRR_RESET_MASK 0x0000000A + + +//---- +#define XSpi_ReadReg(RegOffset) readReg(RegOffset) +#define XSpi_WriteReg(RegOffset, RegisterValue) writeReg(RegOffset, RegisterValue) + +#define XSpi_SetControlReg(Mask) XSpi_WriteReg(XSP_CR_OFFSET, (Mask)) +#define XSpi_GetControlReg() XSpi_ReadReg(XSP_CR_OFFSET) + +#define XSpi_GetStatusReg() XSpi_ReadReg(XSP_SR_OFFSET) + +#define XSpi_SetSlaveSelectReg(Mask) XSpi_WriteReg(XSP_SSR_OFFSET, (Mask)) +#define XSpi_GetSlaveSelectReg() XSpi_ReadReg(XSP_SSR_OFFSET) + +//--- + +static uint8_t WriteBuffer[PAGE_SIZE + READ_WRITE_EXTRA_BYTES]; +static uint8_t ReadBuffer[PAGE_SIZE + READ_WRITE_EXTRA_BYTES + 4]; + +static int slave_index = 0; + +static bool TEST_MODE = false; +static bool TEST_MODE_MCS_ONLY = false; + +static const uint32_t CONTROL_REG_START_STATE + = XSP_CR_TRANS_INHIBIT_MASK | XSP_CR_MANUAL_SS_MASK |XSP_CR_RXFIFO_RESET_MASK + | XSP_CR_TXFIFO_RESET_MASK | XSP_CR_ENABLE_MASK | XSP_CR_MASTER_MODE_MASK ; + +namespace xclxdma +{ + +static void clearReadBuffer(unsigned size) { + for(unsigned i =0; i < size; ++i) { + ReadBuffer[i] = 0; + } +} + +static void clearWriteBuffer(unsigned size) { + for(unsigned i =0; i < size; ++i) { + WriteBuffer[i] = 0; + } +} + +static void clearBuffers() { + clearReadBuffer(PAGE_SIZE + READ_WRITE_EXTRA_BYTES+4); + clearWriteBuffer(PAGE_SIZE + READ_WRITE_EXTRA_BYTES); +} + +static unsigned getSector(unsigned address) { + return (address >> 24) & 0xF; +} + +int XDMAShim::xclTestXSpi(int index) +{ + TEST_MODE = true; + + if(TEST_MODE_MCS_ONLY) { + //just test the mcs. + return 0; + } + + //2 slaves present, set the slave index. + slave_index = index; + + + //print the IP (not of flash) control/status register. + uint32_t ControlReg = XSpi_GetControlReg(); + uint32_t StatusReg = XSpi_GetStatusReg(); + std::cout << "Boot IP Control/Status " << std::hex << ControlReg << "/" << StatusReg << std::dec << std::endl; + + + //Make sure it is ready to receive commands. + ControlReg = XSpi_GetControlReg(); + ControlReg = CONTROL_REG_START_STATE; + + XSpi_SetControlReg(ControlReg); + ControlReg = XSpi_GetControlReg(); + StatusReg = XSpi_GetStatusReg(); + std::cout << "Reset IP Control/Status " << std::hex << ControlReg << "/" << StatusReg << std::dec << std::endl; + +// if(!isFlashReady()) + // return -1; + + //1. Testing idCode reads. + //-- + std::cout << "Testing id code " << std::endl; + if(!getFlashId()) { + std::cout << "Exiting now, as could not get correct idcode" << std::endl; + exit(0); + return -1; + } + + std::cout << "id code successful (please verify the idcode output too" << std::endl; + std::cout << "Now reading various flash registers" << std::endl; + + //2. Testing register reads. + //Using STATUS_READ_BYTES 2 for all, TODO ? + uint8_t Cmd = COMMAND_STATUSREG_READ; + std::cout << "Testing COMMAND_STATUSREG_READ" << std::endl; + readRegister(Cmd, STATUS_READ_BYTES); + + std::cout << "Testing COMMAND_FLAG_STATUSREG_READ" << std::endl; + Cmd = COMMAND_FLAG_STATUSREG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + + std::cout << "Testing COMMAND_NON_VOLATILE_CFGREG_READ" << std::endl; + Cmd = COMMAND_NON_VOLATILE_CFGREG_READ; + readRegister(Cmd, 4); + + std::cout << "Testing COMMAND_VOLATILE_CFGREG_READ" << std::endl; + Cmd = COMMAND_VOLATILE_CFGREG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + + std::cout << "Testing COMMAND_ENH_VOLATILE_CFGREG_READ" << std::endl; + Cmd = COMMAND_ENH_VOLATILE_CFGREG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + + std::cout << "Testing COMMAND_EXTENDED_ADDRESS_REG_READ" << std::endl; + Cmd = COMMAND_EXTENDED_ADDRESS_REG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + + //3. Testing simple read and write + std::cout << "Testing read and write of 16 bytes" << std::endl; + + //unsigned baseAddr = 0x007A0000; + unsigned baseAddr = 0; + unsigned Addr = 0; + unsigned AddressBytes = 3; + if(FOUR_BYTE_ADDRESSING) { + AddressBytes = 4; + writeRegister(ENTER_FOUR_BYTE_ADDR_MODE, 0, 0); + }else + writeRegister(EXIT_FOUR_BYTE_ADDR_MODE, 0, 0); + + //Verify 3 or 4 byte addressing, 0th bit == 1 => 4 byte. + std::cout << "Testing COMMAND_FLAG_STATUSREG_READ" << std::endl; + Cmd = COMMAND_FLAG_STATUSREG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + + uint8_t WriteCmd = 0xff; + uint8_t ReadCmd = 0xff; + + //Test the higher two sectors - first test erase. + + //First try erasing a sector and reading a + //page (we should get FFFF ...) + for(unsigned sector = 2 ; sector <= 3; sector++) + { + clearBuffers(); + + if(!writeRegister(COMMAND_EXTENDED_ADDRESS_REG_WRITE, sector, 1)) + return false; + + std::cout << "Testing COMMAND_EXTENDED_ADDRESS_REG_READ" << std::endl; + Cmd = COMMAND_EXTENDED_ADDRESS_REG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + + //Sector Erase will reset TX and RX FIFO + if(!sectorErase(Addr + baseAddr)) + return false; + + bool ready = isFlashReady(); + if(!ready){ + std::cout << "Unable to get flash ready" << std::endl; + return false; + } + + //try faster read. + if(FOUR_BYTE_ADDRESSING) { + ReadCmd = FOUR_BYTE_QUAD_OUTPUT_FAST_READ; + }else + ReadCmd = COMMAND_QUAD_READ; + + //if(!readPage(Addr, ReadCmd)) + if(!readPage(Addr + baseAddr)) + return false; + } + + clearBuffers(); + //---Erase test done + + + //---Now try writing and reading a page. + //first write 2 pages (using 4 128Mb writes) each to 2 sectors, and then read them + + //Write data + for(unsigned sector = 2 ; sector <= 3; sector++) + { + if(!writeRegister(COMMAND_EXTENDED_ADDRESS_REG_WRITE, sector, 1)) + return false; + + std::cout << "Testing COMMAND_EXTENDED_ADDRESS_REG_READ" << std::endl; + Cmd = COMMAND_EXTENDED_ADDRESS_REG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + + for(int j = 0; j < 4; ++j) + { + clearBuffers(); + for(unsigned i = 0; i < WRITE_DATA_SIZE; ++ i) { + WriteBuffer[i+ AddressBytes + 1] = j + sector + i; //some random data. + } + + Addr = baseAddr + WRITE_DATA_SIZE*j; + + if(!writePage(Addr)) { + std::cout << "Write page unsuccessful, returning" << std::endl; + return -1; + } + } + + } + + + clearBuffers(); + + //Read the data back, use 2 reads each of 128 bytes, twice to test 2 pages. + for(unsigned sector = 2 ; sector <= 3; sector++) + { + //Select a sector (sector 2) + if(!writeRegister(COMMAND_EXTENDED_ADDRESS_REG_WRITE, sector, 1)) + return false; + + std::cout << "Testing COMMAND_EXTENDED_ADDRESS_REG_READ" << std::endl; + Cmd = COMMAND_EXTENDED_ADDRESS_REG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + + //This read should be mix of a b c .. and Z Y X ... + for(int j = 0 ; j < 4; ++j) + { + clearBuffers(); + Addr = baseAddr + WRITE_DATA_SIZE*j; + if(!readPage(Addr)) { + std::cout << "Read page unsuccessful, returning" << std::endl; + return -1; + } + } + std::cout << "Done reading sector: " << sector << std::endl; + } + + return 0; +} + +int XDMAShim::xclUpgradeFirmware2(const char *file1, const char* file2) { + int status = 0; + status = xclUpgradeFirmwareXSpi(file1, 0); + if(status) + return status; + clearBuffers(); + mRecordList.clear(); + return xclUpgradeFirmwareXSpi(file2, 1); +} + +int XDMAShim::xclUpgradeFirmwareXSpi(const char *mcsFile, int index) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << ", " << mcsFile << std::endl; + } + + slave_index = index; + + if(!TEST_MODE) { +// std::cout << "INFO: Reseting hardware\n"; +// if (freezeAXIGate() != 0) { +// return -1; +// } +// +// const timespec req = {0, 5000}; +// nanosleep(&req, 0); +// if (freeAXIGate() != 0) { +// return -1; +// } +// nanosleep(&req, 0); + } + + std::string line; + std::ifstream mcsStream(mcsFile); + std::string startAddress; + ELARecord record; + bool endRecordFound = false; + + if(!mcsStream.is_open()) { + std::cout << "ERROR: Cannot open " << mcsFile << ". Check that it exists and is readable." << std::endl; + return -ENOENT; + } + + std::cout << "INFO: Parsing file " << mcsFile << std::endl; + while (!mcsStream.eof() && !endRecordFound) { + std::string line; + std::getline(mcsStream, line); + if (line.size() == 0) { + continue; + } + if (line[0] != ':') { + return -1; + } + const unsigned dataLen = std::stoi(line.substr(1, 2), 0 , 16); + const unsigned address = std::stoi(line.substr(3, 4), 0, 16); + const unsigned recordType = std::stoi(line.substr(7, 2), 0 , 16); + switch (recordType) { + case 0x00: + { + if (dataLen > 16) { + // For xilinx mcs files data length should be 16 for all records + // except for the last one which can be smaller + return -1; + } + if (address != record.mDataCount) { + std::cout << "Address is not contiguous ! " << std::endl; + return -1; + } + if (record.mEndAddress != address) { + return -1; + } + record.mDataCount += dataLen; + record.mEndAddress += dataLen; + break; + } + case 0x01: + { + if (startAddress.size() == 0) { + break; + } + mRecordList.push_back(record); + endRecordFound = true; + break; + } + case 0x02: + { + assert(0); + break; + } + case 0x04: + { + if (address != 0x0) { + return -1; + } + if (dataLen != 2) { + return -1; + } + std::string newAddress = line.substr(9, dataLen * 2); + if (startAddress.size()) { + // Finish the old record + mRecordList.push_back(record); + } + // Start a new record + record.mStartAddress = std::stoi(newAddress, 0 , 16); + record.mDataPos = mcsStream.tellg(); + record.mEndAddress = 0; + record.mDataCount = 0; + startAddress = newAddress; + } + } + } + + mcsStream.seekg(0); + std::cout << "INFO: Found " << mRecordList.size() << " ELA Records" << std::endl; + + return programXSpi(mcsStream); +} + +unsigned XDMAShim::readReg(unsigned RegOffset) { + unsigned value; + if(pcieBarRead(BPI_FLASH_BAR, FLASH_BASE_ADDRESS + RegOffset, &value, 4) != 0) { + assert(0); + std::cout << "read reg ERROR" << std::endl; + } + return value; +} + +int XDMAShim::writeReg(unsigned RegOffset, unsigned value) { + int status = pcieBarWrite(BPI_FLASH_BAR, FLASH_BASE_ADDRESS + RegOffset, &value, 4); + if(status != 0) { + assert(0); + std::cout << "write reg ERROR " << std::endl; + } + return status; +} + + +bool XDMAShim::waitTxEmpty() { + long long delay = 0; + const timespec req = {0, 5000}; + while (delay < 30000000000) { + uint32_t StatusReg = XSpi_GetStatusReg(); + if(StatusReg & XSP_SR_TX_EMPTY_MASK ) + return true; + //If not empty, check how many bytes remain. + uint32_t Data = XSpi_ReadReg(XSP_TFO_OFFSET); + std::cout << std::hex << Data << std::dec << std::endl; + nanosleep(&req, 0); + delay += 5000; + } + std::cout << "Unable to get Tx Empty\n"; + return false; +} + +bool XDMAShim::isFlashReady() { + uint32_t StatusReg; + const timespec req = {0, 5000}; + long long delay = 0; + while (delay < 30000000000) { + //StatusReg = XSpi_GetStatusReg(); + WriteBuffer[BYTE1] = COMMAND_STATUSREG_READ; + bool status = finalTransfer(WriteBuffer, ReadBuffer, STATUS_READ_BYTES); + if( !status ) { + return false; + } + //TODO: wait ? + StatusReg = ReadBuffer[1]; + if( (StatusReg & FLASH_SR_IS_READY_MASK) == 0) + return true; + //TODO: Try resetting. Uncomment next line? + //XSpi_WriteReg(XSP_SRR_OFFSET, XSP_SRR_RESET_MASK); + nanosleep(&req, 0); + delay += 5000; + } + std::cout << "Unable to get Flash Ready\n"; + return false; + +#if 0 + uint32_t StatusReg; + const timespec req = {0, 5000}; + long long delay = 0; + while (delay < 30000000000) { + StatusReg = XSpi_GetStatusReg(); + if(StatusReg & FLASH_SR_IS_READY_MASK) + return true; + //Try resetting. + XSpi_WriteReg(XSP_SRR_OFFSET, XSP_SRR_RESET_MASK); + nanosleep(&req, 0); + delay += 5000; + } + std::cout << "Unable to get Flash Ready\n"; + return false; +#endif +} + +bool XDMAShim::sectorErase(unsigned Addr) { + if(!isFlashReady()) + return false; + + if(!writeEnable()) + return false; + + if(TEST_MODE) { + std::cout << "Testing COMMAND_FLAG_STATUSREG_READ" << std::endl; + unsigned Cmd = COMMAND_FLAG_STATUSREG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + } + + uint32_t ControlReg = XSpi_GetControlReg(); + ControlReg |= XSP_CR_RXFIFO_RESET_MASK ; + ControlReg |= XSP_CR_TXFIFO_RESET_MASK; + XSpi_SetControlReg(ControlReg); + + /* + * Prepare the WriteBuffer. + */ + if(!FOUR_BYTE_ADDRESSING) { + WriteBuffer[BYTE1] = COMMAND_SECTOR_ERASE; + WriteBuffer[BYTE2] = (uint8_t) (Addr >> 16); + WriteBuffer[BYTE3] = (uint8_t) (Addr >> 8); + WriteBuffer[BYTE4] = (uint8_t) (Addr); + }else { + WriteBuffer[BYTE1] = FOUR_BYTE_SECTOR_ERASE; + WriteBuffer[BYTE2] = (uint8_t) (Addr >> 24); + WriteBuffer[BYTE3] = (uint8_t) (Addr >> 16); + WriteBuffer[BYTE4] = (uint8_t) (Addr >> 8); + WriteBuffer[BYTE5] = (uint8_t) Addr; + } + + if(!finalTransfer(WriteBuffer, NULL, SECTOR_ERASE_BYTES)) + return false; + + /* + * Wait till the Transfer is complete and check if there are any errors + * in the transaction.. + */ + if(!waitTxEmpty()) + return false; + + return true; +} + +bool XDMAShim::bulkErase() +{ + if(!isFlashReady()) + return false; + + if(!writeEnable()) + return false; + + uint32_t ControlReg = CONTROL_REG_START_STATE; + XSpi_SetControlReg(ControlReg); + + uint32_t testControlReg = XSpi_GetControlReg(); + uint32_t testStatusReg = XSpi_GetStatusReg(); + //2 + WriteBuffer[BYTE1] = COMMAND_BULK_ERASE; + + if(!finalTransfer(WriteBuffer, NULL, BULK_ERASE_BYTES)) + return false; + + return waitTxEmpty(); +} + +bool XDMAShim::writeEnable() { + uint32_t StatusReg = XSpi_GetStatusReg(); + if(StatusReg & XSP_SR_TX_FULL_MASK) { + std::cout << "Tx fifo fill during WriteEnable" << std::endl; + return false; + } + + //1 + uint32_t ControlReg = XSpi_GetControlReg(); + ControlReg |= CONTROL_REG_START_STATE; + XSpi_SetControlReg(ControlReg); + + //2 + WriteBuffer[BYTE1] = COMMAND_WRITE_ENABLE; //0x06 + + if(!finalTransfer(WriteBuffer, NULL, WRITE_ENABLE_BYTES)) + return false; + + return waitTxEmpty(); +} + +bool XDMAShim::getFlashId() +{ + + if(!isFlashReady()) { + std::cout << "Unable to get flash ready " << std::endl; + return false; + } + + bool Status = false; + /* * Prepare the Write Buffer. */ + WriteBuffer[BYTE1] = COMMAND_IDCODE_READ; + + Status = finalTransfer(WriteBuffer, ReadBuffer, IDCODE_READ_BYTES); + if( !Status ) { + return false; + } + + for (int i = 0; i < IDCODE_READ_BYTES; i++) { + std::cout << "Idcode byte[" << i << "] " << std::hex << (int)ReadBuffer[i] << std::endl; + ReadBuffer[i] = 0; + } + + unsigned ffCount = 0; + for (int i = 1; i < IDCODE_READ_BYTES; i++) { + if ((unsigned int)ReadBuffer[i] == 0xff) + ffCount++; + } + + if(ffCount == IDCODE_READ_BYTES -1) + return false; + + return true; +} + + +bool XDMAShim::finalTransfer(uint8_t *SendBufPtr, uint8_t *RecvBufPtr, int ByteCount) +{ + uint32_t ControlReg; + uint32_t StatusReg; + uint32_t Data = 0; + uint8_t DataWidth = 8; + uint32_t SlaveSelectMask = SLAVE_SELECT_MASK; + + uint32_t SlaveSelectReg = 0; + if(slave_index == 0) + SlaveSelectReg = ~0x01; + else if(slave_index == 1) + SlaveSelectReg = ~0x02; + + /* + * Enter a critical section from here to the end of the function since + * state is modified, an interrupt is enabled, and the control register + * is modified (r/m/w). + */ + + ControlReg = XSpi_GetControlReg(); + StatusReg = XSpi_GetStatusReg(); + + if(TEST_MODE) + std::cout << "Control/Status " << std::hex << ControlReg << "/" << StatusReg << std::dec << std::endl; + + + /* + * If configured as a master, be sure there is a slave select bit set + * in the slave select register. If no slaves have been selected, the + * value of the register will equal the mask. When the device is in + * loopback mode, however, no slave selects need be set. + */ + if (ControlReg & XSP_CR_MASTER_MODE_MASK) { + if ((ControlReg & XSP_CR_LOOPBACK_MASK) == 0) { + if (SlaveSelectReg == SlaveSelectMask) { + std::cout << "No slave selected" << std::endl; + return false; + } + } + } + + /* + * Set up buffer pointers. + */ + uint8_t* SendBufferPtr = SendBufPtr; + uint8_t* RecvBufferPtr = RecvBufPtr; + + //int RequestedBytes = ByteCount; + int RemainingBytes = ByteCount; + unsigned int BytesTransferred = 0; + + /* + * Fill the DTR/FIFO with as many bytes as it will take (or as many as + * we have to send). We use the tx full status bit to know if the device + * can take more data. By doing this, the driver does not need to know + * the size of the FIFO or that there even is a FIFO. The downside is + * that the status register must be read each loop iteration. + */ + StatusReg = XSpi_GetStatusReg(); + if((StatusReg & (1<<10)) != 0) { + std::cout << "status reg in error situation " << std::endl; + return false; + } + + while (((StatusReg & XSP_SR_TX_FULL_MASK) == 0) && (RemainingBytes > 0)) { + if (DataWidth == 8) { + Data = *SendBufferPtr; + } else if (DataWidth == 16) { + Data = *(uint16_t *)SendBufferPtr; + } else if (DataWidth == 32){ + Data = *(uint32_t *)SendBufferPtr; + } + + if(pcieBarWrite(BPI_FLASH_BAR, FLASH_BASE_ADDRESS + XSP_DTR_OFFSET, &Data, 4) != 0) + return false; + SendBufferPtr += (DataWidth >> 3); + RemainingBytes -= (DataWidth >> 3); + StatusReg = XSpi_GetStatusReg(); + if((StatusReg & (1<<10)) !=0) { + std::cout << "Write command caused created error" << std::endl; + return false; + } + } + + + /* + * Set the slave select register to select the device on the SPI before + * starting the transfer of data. + */ + XSpi_SetSlaveSelectReg(SlaveSelectReg); + + ControlReg = XSpi_GetControlReg(); + StatusReg = XSpi_GetStatusReg(); + + if(TEST_MODE) + std::cout << "Control/Status " << std::hex << ControlReg << "/" << StatusReg << std::dec << std::endl; + + if((StatusReg & (1<<10)) != 0) { + std::cout << "status reg in error situation: 2 " << std::endl; + return false; + } + + /* + * Start the transfer by no longer inhibiting the transmitter and + * enabling the device. For a master, this will in fact start the + * transfer, but for a slave it only prepares the device for a transfer + * that must be initiated by a master. + */ + ControlReg = XSpi_GetControlReg(); + ControlReg &= ~XSP_CR_TRANS_INHIBIT_MASK; + XSpi_SetControlReg(ControlReg); + + if(TEST_MODE) + std::cout << "Control/Status " << std::hex << ControlReg << "/" << StatusReg << std::dec << std::endl; + + + //Data transfer to actual flash has already started happening here. + + { /* Polled mode of operation */ + + // poll the status register to * Transmit/Receive SPI data. + while(ByteCount > 0) + { + + /* + * Wait for the transfer to be done by polling the + * Transmit empty status bit + */ + do { + StatusReg = XSpi_GetStatusReg(); + } while ((StatusReg & XSP_SR_TX_EMPTY_MASK) == 0); + + + //Do masking of slaves at the end as it doesnt make a difference. + //XSpi_SetSlaveSelectReg(SlaveSelectMask); + + /* + * A transmit has just completed. Process received data + * and check for more data to transmit. Always inhibit + * the transmitter while the transmit register/FIFO is + * being filled, or make sure it is stopped if we're + * done. + */ + ControlReg = XSpi_GetControlReg(); + XSpi_SetControlReg(ControlReg | XSP_CR_TRANS_INHIBIT_MASK); + + ControlReg = XSpi_GetControlReg(); + + if(TEST_MODE) + std::cout << "Control/Status " << std::hex << ControlReg << "/" << StatusReg << std::dec << std::endl; + + /* + * First get the data received as a result of the + * transmit that just completed. We get all the data + * available by reading the status register to determine + * when the Receive register/FIFO is empty. Always get + * the received data, but only fill the receive + * buffer if it points to something (the upper layer + * software may not care to receive data). + */ + StatusReg = XSpi_GetStatusReg(); + + while ((StatusReg & XSP_SR_RX_EMPTY_MASK) == 0) + { + //read the data. + if(pcieBarRead(BPI_FLASH_BAR, FLASH_BASE_ADDRESS + XSP_DRR_OFFSET, &Data, 4) != 0) + return false; + + if (DataWidth == 8) { + if(RecvBufferPtr != NULL) { + *RecvBufferPtr++ = (uint8_t)Data; + } + } else if (DataWidth == 16) { + if (RecvBufferPtr != NULL){ + *(uint16_t *)RecvBufferPtr = (uint16_t)Data; + RecvBufferPtr += 2; + } + } else if (DataWidth == 32) { + if (RecvBufferPtr != NULL){ + *(uint32_t *)RecvBufferPtr = Data; + RecvBufferPtr += 4; + } + } + + BytesTransferred += (DataWidth >> 3); + ByteCount -= (DataWidth >> 3); + StatusReg = XSpi_GetStatusReg(); + if((StatusReg & (1<<10)) != 0) { + std::cout << "status reg in error situation " << std::endl; + return false; + } + } + + //If there are still unwritten bytes, then finishing writing (below code) + //and reading (above code) them. + if (RemainingBytes > 0) { + + /* + * Fill the DTR/FIFO with as many bytes as it + * will take (or as many as we have to send). + * We use the Tx full status bit to know if the + * device can take more data. + * By doing this, the driver does not need to + * know the size of the FIFO or that there even + * is a FIFO. + * The downside is that the status must be read + * each loop iteration. + */ + StatusReg = XSpi_GetStatusReg(); + + while(((StatusReg & XSP_SR_TX_FULL_MASK)== 0) && (RemainingBytes > 0)) + { + if (DataWidth == 8) { + Data = *SendBufferPtr; + } else if (DataWidth == 16) { + Data = *(uint16_t *)SendBufferPtr; + } else if (DataWidth == 32) { + Data = *(uint32_t *)SendBufferPtr; + } + + if(pcieBarWrite(BPI_FLASH_BAR, FLASH_BASE_ADDRESS + XSP_DTR_OFFSET, &Data, 4) != 0) + return false; + + SendBufferPtr += (DataWidth >> 3); + RemainingBytes -= (DataWidth >> 3); + StatusReg = XSpi_GetStatusReg(); + if((StatusReg & (1<<10)) != 0) { + std::cout << "status reg in error situation " << std::endl; + return false; + } + } + + //Start the transfer by not inhibiting the transmitter any longer. + ControlReg = XSpi_GetControlReg(); + ControlReg &= ~XSP_CR_TRANS_INHIBIT_MASK; + XSpi_SetControlReg(ControlReg); + } + } + + //Stop the transfer by inhibiting * the transmitter. + ControlReg = XSpi_GetControlReg(); + XSpi_SetControlReg(ControlReg | XSP_CR_TRANS_INHIBIT_MASK); + + /* + * Deassert the slaves on the SPI bus when the transfer is complete, + */ + XSpi_SetSlaveSelectReg(SlaveSelectMask); + } + + return true; +} + + +bool XDMAShim::writePage(unsigned Addr, uint8_t writeCmd) +{ + if(!isFlashReady()) + return false; + + /* + { + //debug + std::cout << "Testing COMMAND_EXTENDED_ADDRESS_REG_READ" << std::endl; + uint8_t Cmd = COMMAND_EXTENDED_ADDRESS_REG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + if(!isFlashReady()) + return false; + }*/ + + if(!writeEnable()) + return false; + + unsigned bkupAddr = Addr; + + //1 : reset Tx and Rx FIFO's + uint32_t ControlReg = CONTROL_REG_START_STATE; +// uint32_t ControlReg = XSpi_GetControlReg(); +// ControlReg |= XSP_CR_RXFIFO_RESET_MASK ; +// ControlReg |= XSP_CR_TXFIFO_RESET_MASK; + XSpi_SetControlReg(ControlReg); + + uint8_t WriteCmd = writeCmd; + //2 + if(!FOUR_BYTE_ADDRESSING) { + if(writeCmd == 0xff) + WriteCmd = COMMAND_QUAD_WRITE; + bkupAddr &= 0x00ffffff; // truncate to 24 bits + //3 byte address mode + //COMMAND_PAGE_PROGRAM gives out all FF's + //COMMAND_EXT_QUAD_WRITE: hangs the system + WriteBuffer[BYTE1] = WriteCmd; + WriteBuffer[BYTE2] = (uint8_t) (bkupAddr >> 16); + WriteBuffer[BYTE3] = (uint8_t) (bkupAddr >> 8); + WriteBuffer[BYTE4] = (uint8_t) bkupAddr; + }else { + if(writeCmd == 0xff) + WriteBuffer[BYTE1] = FOUR_BYTE_QUAD_INPUT_FAST_PROGRAM; + WriteBuffer[BYTE2] = (uint8_t) (bkupAddr >> 24); + WriteBuffer[BYTE3] = (uint8_t) (bkupAddr >> 16); + WriteBuffer[BYTE4] = (uint8_t) (bkupAddr >> 8); + WriteBuffer[BYTE5] = (uint8_t) bkupAddr; + } + + bkupAddr = Addr; + //The data to write is already filled up, so now just write the buffer. + + if(!finalTransfer(WriteBuffer, ReadBuffer, WRITE_DATA_SIZE + READ_WRITE_EXTRA_BYTES)) + return false; + + if(!waitTxEmpty()) + return false; + + + return true; + +} + +bool XDMAShim::readPage(unsigned Addr, uint8_t readCmd) +{ + if(!isFlashReady()) + return false; + + /* + { + //debug + std::cout << "Testing COMMAND_EXTENDED_ADDRESS_REG_READ" << std::endl; + uint8_t Cmd = COMMAND_EXTENDED_ADDRESS_REG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + if(!isFlashReady()) + return false; + clearBuffer(); + }*/ + + unsigned bkupAddr = Addr; + //-- + uint32_t ControlReg = CONTROL_REG_START_STATE; +// uint32_t ControlReg = XSpi_GetControlReg(); +// ControlReg |= XSP_CR_RXFIFO_RESET_MASK ; +// ControlReg |= XSP_CR_TXFIFO_RESET_MASK; + XSpi_SetControlReg(ControlReg); + + //1 : reset TX/RX FIFO's + uint8_t ReadCmd = readCmd; + + //uint8_t ReadCmd = COMMAND_RANDOM_READ; + if(!FOUR_BYTE_ADDRESSING) { + //3 byte addressing mode + if(readCmd == 0xff) + ReadCmd = COMMAND_QUAD_READ; + bkupAddr &= 0x00ffffff; // truncate to 24 bits + //3 byte address mode + WriteBuffer[BYTE1] = ReadCmd; + WriteBuffer[BYTE2] = (uint8_t) (bkupAddr >> 16); + WriteBuffer[BYTE3] = (uint8_t) (bkupAddr >> 8); + WriteBuffer[BYTE4] = (uint8_t) bkupAddr; + }else { + if(readCmd == 0xff) + ReadCmd = FOUR_BYTE_READ; + WriteBuffer[BYTE1] = ReadCmd; + WriteBuffer[BYTE2] = (uint8_t) (bkupAddr >> 24); + WriteBuffer[BYTE3] = (uint8_t) (bkupAddr >> 16); + WriteBuffer[BYTE4] = (uint8_t) (bkupAddr >> 8); + WriteBuffer[BYTE5] = (uint8_t) bkupAddr; + } + + bkupAddr = Addr; + + + unsigned ByteCount = READ_DATA_SIZE; + + if (ReadCmd == COMMAND_DUAL_READ) { + ByteCount += DUAL_READ_DUMMY_BYTES; + } else if (ReadCmd == COMMAND_DUAL_IO_READ) { + ByteCount += DUAL_READ_DUMMY_BYTES; + } else if (ReadCmd == COMMAND_QUAD_IO_READ) { + ByteCount += QUAD_IO_READ_DUMMY_BYTES; + } else if ( (ReadCmd==COMMAND_QUAD_READ) || (ReadCmd==FOUR_BYTE_QUAD_OUTPUT_FAST_READ)) { + ByteCount += QUAD_READ_DUMMY_BYTES; + } + + //Clear the read buffer +// for(unsigned int i = 0; i < ByteCount + READ_WRITE_EXTRA_BYTES; ++i) { +// ReadBuffer[i] = 0; +// } + + if(!finalTransfer(WriteBuffer, ReadBuffer, ByteCount + READ_WRITE_EXTRA_BYTES)) + return false; + + if(!waitTxEmpty()) + return false; + + //reset the RXFIFO bit so. + ControlReg = XSpi_GetControlReg(); + ControlReg |= XSP_CR_RXFIFO_RESET_MASK ; + XSpi_SetControlReg(ControlReg); + + return true; + +} + +bool XDMAShim::prepareXSpi() +{ + if(TEST_MODE) + return true; + + + uint32_t tControlReg = XSpi_GetControlReg(); + uint32_t tStatusReg = XSpi_GetStatusReg(); + +#if defined(_debug) + std::cout << "Boot Control/Status " << std::hex << tControlReg << "/" << tStatusReg << std::dec << std::endl; +#endif + + uint32_t ControlReg = CONTROL_REG_START_STATE; + XSpi_SetControlReg(ControlReg); + + tControlReg = XSpi_GetControlReg(); + tStatusReg = XSpi_GetStatusReg(); + +#if defined(_debug) + std::cout << "After setting start state, Control/Status " << std::hex << tControlReg << "/" << tStatusReg << std::dec << std::endl; +#endif + //-- + + if(!getFlashId()) { + std::cout << "Exiting now, as could not get correct idcode" << std::endl; + exit(0); + return false; + } + + //WriteEnable writes CONTROL_REG_START_STATE - that should be enough for initial configuration ? + //if(!writeEnable()) + //return false; + + //Bulk erase the flash. + //if(!bulkErase()) + //return false; + + return true; +} + +int XDMAShim::programXSpi(std::ifstream& mcsStream, const ELARecord& record) { + if (mLogStream.is_open()) { + mLogStream << __func__ << ", " << std::this_thread::get_id() << std::endl; + } + + //TODO: decrease the sleep time. + const timespec req = {0, 20000}; + +#if defined(_debug) + std::cout << "Programming block (" << std::hex << record.mStartAddress << ", " << record.mEndAddress << std::dec << ")" << std::endl; +#endif + + assert(mcsStream.tellg() < record.mDataPos); + mcsStream.seekg(record.mDataPos, std::ifstream::beg); + unsigned char* buffer = &WriteBuffer[READ_WRITE_EXTRA_BYTES]; + int bufferIndex = 0; + int pageIndex = 0; + std::string prevLine(""); + for (unsigned index = record.mDataCount; index > 0;) { + std::string line; + std::getline(mcsStream, line); + if(TEST_MODE) + std::cout << line << std::endl; + const unsigned dataLen = std::stoi(line.substr(1, 2), 0 , 16); + index -= dataLen; + const unsigned recordType = std::stoi(line.substr(7, 2), 0 , 16); + if (recordType != 0x00) { + continue; + } + const std::string data = line.substr(9, dataLen * 2); + // Write in byte swapped order + for (unsigned i = 0; i < data.length(); i += 2) { + unsigned value = std::stoi(data.substr(i, 2), 0, 16); + buffer[bufferIndex++] = (unsigned char)value; + assert(bufferIndex <= WRITE_DATA_SIZE); + +#if 0 + //To enable byte swapping uncomment this. +// if ((bufferIndex % 4) == 0) { +// bufferIndex += 4; +// } +// assert(bufferIndex <= WRITE_DATA_SIZE); +// unsigned value = std::stoi(data.substr(i, 2), 0, 16); +// if(TEST_MODE) +// std::cout << data.substr(i, 2); +// buffer[--bufferIndex] = (unsigned char)value; +// if ((bufferIndex % 4) == 0) { +// bufferIndex += 4; +// } +#endif + if (bufferIndex == WRITE_DATA_SIZE) { + break; + } + } + + if(TEST_MODE) + std::cout << std::endl; + +#if 0 + //Uncomment if byte swapping enabled. + + //account for the last line + //which can have say 14 bytes instead of 16 + if((bufferIndex %4)!= 0) { + while ((bufferIndex %4)!= 0) { + unsigned char fillValue = 0xFF; + buffer[--bufferIndex] = fillValue; + } + bufferIndex += 4; + } + + assert((bufferIndex % 4) == 0); +#endif + + assert(bufferIndex <= WRITE_DATA_SIZE); + if (bufferIndex == WRITE_DATA_SIZE) { +#if defined(_debug) + std::cout << "writing page " << pageIndex << std::endl; +#endif + const unsigned address = std::stoi(line.substr(3, 4), 0, 16); + assert ( (address + dataLen) == (pageIndex +1)*WRITE_DATA_SIZE); + if(TEST_MODE) { + std::cout << (address + dataLen) << " " << (pageIndex +1)*WRITE_DATA_SIZE << std::endl; + std::cout << record.mStartAddress << " " << record.mStartAddress + pageIndex*PAGE_SIZE; + std::cout << " " << address << std::endl; + } else + { + if(!writePage(record.mStartAddress + pageIndex*WRITE_DATA_SIZE)) + return -1; + clearBuffers(); + { + //debug stuff +#if defined(_debug) + if(pageIndex == 0) { + if(!readPage(record.mStartAddress + pageIndex*WRITE_DATA_SIZE)) + return -1; + clearBuffers(); + } +#endif + } + } + pageIndex++; + nanosleep(&req, 0); + bufferIndex = 0; + } + prevLine = line; + + } + if (bufferIndex) { + //Write the last page + if(TEST_MODE) { + std::cout << "writing final page " << pageIndex << std::endl; + std::cout << bufferIndex << std::endl; + std::cout << prevLine << std::endl; + } + + const unsigned address = std::stoi(prevLine.substr(3, 4), 0, 16); + const unsigned dataLen = std::stoi(prevLine.substr(1, 2), 0 , 16); + + if(TEST_MODE) + std::cout << address % WRITE_DATA_SIZE << " " << dataLen << std::endl; + + //assert( (address % WRITE_DATA_SIZE + dataLen) == bufferIndex); + + if(!TEST_MODE) { + + //Fill unused half page to FF + for(unsigned i = bufferIndex; i < WRITE_DATA_SIZE; ++i) { + buffer[i] = 0xff; + } + + if(!writePage(record.mStartAddress + pageIndex*WRITE_DATA_SIZE)) + return -1; + nanosleep(&req, 0); + clearBuffers(); + { + //debug stuff +#if defined(_debug) + if(!readPage(record.mStartAddress + pageIndex*WRITE_DATA_SIZE)) + return -1; + clearBuffers(); +#endif + } + } + } + return 0; +} + +int XDMAShim::programXSpi(std::ifstream& mcsStream) +{ +// for (ELARecordList::iterator i = mRecordList.begin(), e = mRecordList.end(); i != e; ++i) { +// i->mStartAddress <<= 16; +// i->mEndAddress += i->mStartAddress; +// // Convert from 2 bytes address to 4 bytes address +// i->mStartAddress /= 2; +// i->mEndAddress /= 2; +// } + + if (!prepareXSpi()) { + std::cout << "ERROR: Unable to prepare the XSpi\n"; + return -1; + } + + //if(!bulkErase()) + //return false; + + const timespec req = {0, 20000}; + nanosleep(&req, 0); + + unsigned current_sector = -1; + std::vector erased_sectors; + erased_sectors.reserve(4); + for(int i =0; i < 4; ++i) + erased_sectors.push_back(false); + + int beatCount = 0; + for (ELARecordList::iterator i = mRecordList.begin(), e = mRecordList.end(); i != e; ++i) + { + beatCount++; + if(beatCount%20==0) { + std::cout << "." << std::flush; + } + + i->mStartAddress <<= 16; + + unsigned sector = getSector(i->mStartAddress); + bool valid_sector = false; + if ( (sector == 0) || (sector == 1) || (sector == 2) || (sector == 3) ) + valid_sector = true; + if(!valid_sector) { + std::cout << "Invalid sector encountered" << std::endl; + return -1; + } + + //Remove the sector determinant half byte. + i->mStartAddress &= 0xFFFFFF; + i->mEndAddress += i->mStartAddress; + + if(TEST_MODE) { + std::cout << "INFO: Start address 0x" << std::hex << mRecordList.front().mStartAddress << std::dec << "\n"; + std::cout << "INFO: End address 0x" << std::hex << mRecordList.back().mEndAddress << std::dec << "\n"; + } + + if(current_sector != sector) { + //Issue sector select + if(!writeRegister(COMMAND_EXTENDED_ADDRESS_REG_WRITE, sector, 1)) + return false; + current_sector = sector; + } + + { + //debug +#if defined(_debug) + std::cout << "Testing COMMAND_EXTENDED_ADDRESS_REG_READ" << std::endl; + uint8_t Cmd = COMMAND_EXTENDED_ADDRESS_REG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + if(!isFlashReady()) + return false; +#endif + } + + //Erase the sector if not already erased. + if(!erased_sectors.at(current_sector)) { + //Use addr 0 to erase the sector. + unsigned Addr = 0; + + //Erase the entire segment. Each segment is 128 Mb (bits). + //Each sector is 64KB (bytes). So total 256 sectors in a segment. + for(int i = 0; i < 256 ; ++i) { + if(!sectorErase(Addr)) { + return false; + } + Addr+= 0x10000; + nanosleep(&req, 0); + } + + Addr = 0; + if(!readPage(Addr)) + return false; + erased_sectors.at(sector)=true; + } + + { + //debug +#if defined(_debug) + std::cout << "Testing COMMAND_EXTENDED_ADDRESS_REG_READ" << std::endl; + uint8_t Cmd = COMMAND_EXTENDED_ADDRESS_REG_READ; + readRegister(Cmd, STATUS_READ_BYTES); + if(!isFlashReady()) + return false; +#endif + } + + bool ready = isFlashReady(); + if(!ready){ + std::cout << "Unable to get flash ready" << std::endl; + return false; + } + + clearBuffers(); + + if (programXSpi(mcsStream, *i)) { + std::cout << "ERROR: Could not programXSpi the block\n"; + return -1; + } + nanosleep(&req, 0); + } + std::cout << std::endl; + return 0; +} + +bool XDMAShim::readRegister(unsigned commandCode, unsigned bytes) { + + if(!isFlashReady()) + return false; + + bool Status = false; + + WriteBuffer[BYTE1] = commandCode; + + Status = finalTransfer(WriteBuffer, ReadBuffer, bytes); + + if( !Status ) { + return false; + } + +#if defined(_debug) + std::cout << "Printing output (with some extra bytes of readRegister cmd)" << std::endl; +#endif + + for(unsigned i = 0; i < 5; ++ i) //Some extra bytes, no harm + { +#if defined(_debug) + std::cout << i << " " << std::hex << (int)ReadBuffer[i] << std::dec << std::endl; +#endif + ReadBuffer[i] = 0; //clear + } + //Reset the FIFO bit. + uint32_t ControlReg = XSpi_GetControlReg(); + ControlReg |= XSP_CR_RXFIFO_RESET_MASK ; + ControlReg |= XSP_CR_TXFIFO_RESET_MASK ; + XSpi_SetControlReg(ControlReg); + + return Status; +} + +//max 16 bits for nonvolative cfg register. +//If extra_bytes == 0, then only the command is sent. +bool XDMAShim::writeRegister(unsigned commandCode, unsigned value, unsigned extra_bytes) { + if(!isFlashReady()) + return false; + + if(!writeEnable()) + return false; + + uint32_t ControlReg = XSpi_GetControlReg(); + ControlReg |= XSP_CR_TXFIFO_RESET_MASK; + ControlReg |= XSP_CR_RXFIFO_RESET_MASK; + XSpi_SetControlReg(ControlReg); + + bool Status = false; + + WriteBuffer[BYTE1] = commandCode; + + if(extra_bytes == 0) { + //do nothing + } else if(extra_bytes == 1) + WriteBuffer[BYTE2] = (uint8_t) (value); + else if(extra_bytes == 2) { + WriteBuffer[BYTE2] = (uint8_t) (value >> 8); + WriteBuffer[BYTE3] = (uint8_t) value; + }else { + std::cout << "ERROR: Setting more than 2 bytes" << std::endl; + assert(0); + } + + //+1 for cmd byte. + Status = finalTransfer(WriteBuffer,NULL, extra_bytes+1); + if(!Status) + return false; + + if(!waitTxEmpty()) + return false; + + return Status; +} + + +} //end namespace